From 33a04422a9dc7802e4e969f264523ef09153100b Mon Sep 17 00:00:00 2001 From: sangeethjayaprakash Date: Thu, 13 Jul 2023 21:39:24 +0200 Subject: [PATCH 1/3] add Lix --- docs/source/.Rhistory | 0 docs/source/lix.rst | 25 ++++++++++++++++++++ readability/readability.py | 7 +++++- readability/scorers/__init__.py | 1 + readability/scorers/lix.py | 42 +++++++++++++++++++++++++++++++++ readability/text/analyzer.py | 9 +++++++ test/test_readability.py | 14 +++++++++++ 7 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 docs/source/.Rhistory create mode 100644 docs/source/lix.rst create mode 100644 readability/scorers/lix.py diff --git a/docs/source/.Rhistory b/docs/source/.Rhistory new file mode 100644 index 0000000..e69de29 diff --git a/docs/source/lix.rst b/docs/source/lix.rst new file mode 100644 index 0000000..e375adc --- /dev/null +++ b/docs/source/lix.rst @@ -0,0 +1,25 @@ +Läsbarhetsindex +=============== + +About +^^^^^ + +Readability index for Swedish and other European Languages. [reference]_ + +Usage +^^^^^ + +.. code-block:: python + + r = Readability(text) + + f = r.lix() + + print(f.score) + print(f.ease) + + +References +---------- + +.. [reference] `Lix (readability test) `_ diff --git a/readability/readability.py b/readability/readability.py index 91341b7..b2a944a 100644 --- a/readability/readability.py +++ b/readability/readability.py @@ -1,6 +1,6 @@ from .text import Analyzer from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \ - FleschKincaid, GunningFog, LinsearWrite, Smog, Spache + FleschKincaid, GunningFog, LinsearWrite, Smog, Spache, Lix class Readability: @@ -44,11 +44,16 @@ def smog(self,all_sentences=False): def spache(self): """Spache Index.""" return Spache(self._statistics).score() + + def lix(self): + """Läsbarhetsindex.""" + return Lix(self._statistics).score() def statistics(self): return { 'num_letters': self._statistics.num_letters, 'num_words': self._statistics.num_words, + 'num_long_words': self._statistics.num_long_words, 'num_sentences': self._statistics.num_sentences, 'num_polysyllabic_words': self._statistics.num_poly_syllable_words, 'avg_words_per_sentence': self._statistics.avg_words_per_sentence, diff --git a/readability/scorers/__init__.py b/readability/scorers/__init__.py index df708e8..a18c8a1 100644 --- a/readability/scorers/__init__.py +++ b/readability/scorers/__init__.py @@ -1,5 +1,6 @@ from .flesch import Flesch +from .lix import Lix from .flesch_kincaid import FleschKincaid from .gunning_fog import GunningFog from .coleman_liau import ColemanLiau diff --git a/readability/scorers/lix.py b/readability/scorers/lix.py new file mode 100644 index 0000000..4b4279b --- /dev/null +++ b/readability/scorers/lix.py @@ -0,0 +1,42 @@ +from readability.exceptions import ReadabilityException + + +class Result: + def __init__(self, score, ease): + self.score = score + self.ease = ease + + def __str__(self): + return "score: {}, ease: '{}'". \ + format(self.score, self.ease) + + +class Lix: + def __init__(self, stats): + self._stats = stats + if stats.num_words < 100: + raise ReadabilityException('100 words required.') + + def score(self): + score = self._score() + return Result( + score=score, + ease=self._ease(score)) + + def _score(self): + stats = self._stats + words_per_sent = stats.num_words / stats.num_sentences + percentage_long_words = stats.num_long_words / stats.num_words * 100 + return words_per_sent + percentage_long_words + + def _ease(self, score): + if score > 60: + return 'very_difficult' + elif score > 50 and score <= 60: + return 'difficult' + elif score > 40 and score <= 50: + return 'medium difficulty' + elif score > 30 and score <= 40: + return 'easy reading' + else: + return 'very easy' diff --git a/readability/text/analyzer.py b/readability/text/analyzer.py index dce409e..c989864 100644 --- a/readability/text/analyzer.py +++ b/readability/text/analyzer.py @@ -25,6 +25,10 @@ def num_letters(self): def num_words(self): return self.stats['num_words'] + @property + def num_long_words(self): + return self.stats['num_long_words'] + @property def num_sentences(self): return self.stats['num_sentences'] @@ -71,6 +75,7 @@ def _statistics(self, text): syllable_count = 0 poly_syllable_count = 0 word_count = 0 + long_word_count = 0 letters_count = 0 gunning_complex_count = 0 dale_chall_complex_count = 0 @@ -92,11 +97,14 @@ def is_spache_complex(t): for t in tokens: + num_word_letters = 0 if not self._is_punctuation(t): word_count += 1 word_syllable_count = count_syllables(t) syllable_count += word_syllable_count letters_count += len(t) + word_num_letters = len(t) + long_word_count += 1 if word_num_letters > 6 else 0 poly_syllable_count += 1 if word_syllable_count >= 3 else 0 gunning_complex_count += \ 1 if is_gunning_complex(t, word_syllable_count) \ @@ -113,6 +121,7 @@ def is_spache_complex(t): 'num_syllables': syllable_count, 'num_poly_syllable_words': poly_syllable_count, 'num_words': word_count, + 'num_long_words': long_word_count, 'num_sentences': sentence_count, 'num_letters': letters_count, 'num_gunning_complex': gunning_complex_count, diff --git a/test/test_readability.py b/test/test_readability.py index 46e0d1b..aa6ae18 100644 --- a/test/test_readability.py +++ b/test/test_readability.py @@ -36,6 +36,16 @@ def test_flesch(self): self.assertEqual(['10', '11', '12'], r.grade_levels) self.assertEqual('fairly_difficult', r.ease) + def test_lix(self): + text = """Läsbarhetsindex (LIX) kan användas för att få uppfattning om hur lätt eller svår en text är att läsa. LIX är baserat på medeltalet ord per mening och andelen långa ord (ord med fler än 6 bokstäver) uttryckt i procent. Det finns flera olika läsbarhetsindex, men i Sverige är LIX det mest använda. LIX utvecklades på 1960-talet av pedagogikforskaren Carl-Hugo Björnsson. + Läsbarhetsindex (LIX) kan användas för att få uppfattning om hur lätt eller svår en text är att läsa. LIX är baserat på medeltalet ord per mening och andelen långa ord (ord med fler än 6 bokstäver) uttryckt i procent. Det finns flera olika läsbarhetsindex, men i Sverige är LIX det mest använda. LIX utvecklades på 1960-talet av pedagogikforskaren Carl-Hugo Björnsson. + """ + readability = Readability(text) + r = readability.lix() + print(r) + self.assertEqual(41.47950819672131, r.score) + self.assertEqual('medium difficulty', r.ease) + def test_flesch_kincaid(self): r = self.readability.flesch_kincaid() print(r) @@ -90,3 +100,7 @@ def test_print_stats(self): self.assertEqual(117, stats['num_words']) self.assertEqual(7, stats['num_sentences']) self.assertEqual(20, stats['num_polysyllabic_words']) + + + + From 00a6775cf5bc1a0a26f820b9749acc5be1f30daa Mon Sep 17 00:00:00 2001 From: sangeethjayaprakash Date: Wed, 3 Apr 2024 14:23:55 +0200 Subject: [PATCH 2/3] update docs for lix --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a2ca0a7..d59ee66 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ![Travis Build](https://travis-ci.org/cdimascio/py-readability-metrics.svg?branch=master) ![Python](https://img.shields.io/badge/python-3.x-blue.svg) [![Documentation Status](https://readthedocs.org/projects/py-readability-metrics/badge/?version=latest)](https://py-readability-metrics.readthedocs.io/en/latest/?badge=latest) [![wheel](https://img.shields.io/badge/wheel-yes-ff00c9.svg)](https://pypi.org/project/py-readability-metrics/) [![](https://img.shields.io/gitter/room/cdimascio-oss/community?color=%23eb205a)](https://gitter.im/cdimascio-oss/community) [![All Contributors](https://img.shields.io/badge/all_contributors-1-orange.svg?style=flat-square)](#contributors-) [![MIT license](https://img.shields.io/badge/License-MIT-green.svg)](https://lbesson.mit-license.org/) -Score the _readability_ of text using popular readability formulas and metrics including: [Flesch Kincaid Grade Level](#flesch-kincaid-grade-level), [Flesch Reading Ease](#flesch-reading-ease), [Gunning Fog Index](#gunning-fog), [Dale Chall Readability](#dale-chall-readability), [Automated Readability Index (ARI)](#automated-readability-index-ari), [Coleman Liau Index](#coleman-liau-index), [Linsear Write](#linsear-write), [SMOG](#smog), and [SPACHE](#spache). 📗 +Score the _readability_ of text using popular readability formulas and metrics including: [Flesch Kincaid Grade Level](#flesch-kincaid-grade-level), [Flesch Reading Ease](#flesch-reading-ease), [Gunning Fog Index](#gunning-fog), [Dale Chall Readability](#dale-chall-readability), [Automated Readability Index (ARI)](#automated-readability-index-ari), [Coleman Liau Index](#coleman-liau-index), [Linsear Write](#linsear-write), [SMOG](#smog), [SPACHE](#spache) and [Lix](#lix). 📗 [![GitHub stars](https://img.shields.io/github/stars/cdimascio/py-readability-metrics.svg?style=social&label=Star&maxAge=2592000)](https://GitHub.com/cdimascio/py-readability-metrics/stargazers/) [![Twitter URL](https://img.shields.io/twitter/url/https/github.com/cdimascio/py-readability-metrics.svg?style=social)](https://twitter.com/intent/tweet?text=Check%20out%20py-readability-metrics%20by%20%40CarmineDiMascio%20https%3A%2F%2Fgithub.com%2Fcdimascio%2Fpy-readability-metrics%20%F0%9F%91%8D) @@ -34,6 +34,7 @@ r.ari() r.linsear_write() r.smog() r.spache() +r.lix() ``` **\*Note:** `text` must contain >= 100 words\* @@ -49,6 +50,7 @@ r.spache() - [SMOG](#smog) - [Spache](#spache) - [Linsear Write](#linsear-write) +- [Lix](#lix) ## Readability Metric Details and Properties @@ -240,6 +242,24 @@ print(lw.score) print(lw.grade_level) ``` +### Lix + +Lix (abbreviation of Swedish läsbarhetsindex, "readability index") is a readability measure for Scandinavian and West European languages developed bu Carl-Hugo Björnsson. It is defined as the sum of average sentence length and the percentage of words with more than six letters. + +**_call:_** + +```python +r.lix() +``` + +**_example:_** + +```python +s = r.lix() +print(s.score) +print(s.ease) +``` + ## [Contributing](CONTRIBUTING.md) Contributions are welcome! From 8427e46877c2f37ad9b6c579c9b5cc8504075e0b Mon Sep 17 00:00:00 2001 From: sangeethjayaprakash Date: Wed, 3 Apr 2024 14:25:21 +0200 Subject: [PATCH 3/3] update docs for lix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d59ee66..735bce3 100644 --- a/README.md +++ b/README.md @@ -244,7 +244,7 @@ print(lw.grade_level) ### Lix -Lix (abbreviation of Swedish läsbarhetsindex, "readability index") is a readability measure for Scandinavian and West European languages developed bu Carl-Hugo Björnsson. It is defined as the sum of average sentence length and the percentage of words with more than six letters. +Lix (abbreviation of Swedish läsbarhetsindex, "readability index") is a readability measure for Scandinavian and West European languages developed by Carl-Hugo Björnsson. It is defined as the sum of average sentence length and the percentage of words with more than six letters. **_call:_**