cdimascio · Anirbanbhk88 · Aug 11, 2025 · Aug 12, 2025 · Aug 14, 2025 · Aug 19, 2025
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,5 @@ __pycache__
 .vscode
 py_readability_metrics.egg-info
 dist
-build
+build
+venv
diff --git a/readability/readability.py b/readability/readability.py
@@ -1,13 +1,21 @@
-from .text import Analyzer
-from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \
-    FleschKincaid, GunningFog, LinsearWrite, Smog, Spache
 import warnings
 
+import nltk
+
+from .scorers import (ARI, ColemanLiau, DaleChall, Flesch, FleschKincaid,
+                      GunningFog, LinsearWrite, LixLesbarkeitsIndex,
+                      MiyazakiReadabilityIndex, Smog, Spache,
+                      WienerSachtextformel, Gsmog)
+from .text import Analyzer
+
+nltk.download('punkt_tab')
+
 class Readability:
-    def __init__(self, text, min_words=100):
+    def __init__(self, text, min_words=100, language='en'):
         self._analyzer = Analyzer()
         self._statistics = self._analyzer.analyze(text)
         self._min_words = min_words
+        self._language = language
         if self._min_words < 100:
             warnings.warn(
                 "Documents with fewer than 100 words may affect the accuracy of readability tests"
@@ -27,7 +35,7 @@ def dale_chall(self):
 
     def flesch(self):
         """Calculate Flesch Reading Ease score."""
-        return Flesch(self._statistics, self._min_words).score()
+        return Flesch(self._statistics, self._min_words, self._language).score()
 
     def flesch_kincaid(self):
         """Calculate Flesch-Kincaid Grade Level."""
@@ -46,6 +54,34 @@ def smog(self,all_sentences=False, ignore_length=False):
         `all_sentences` indicates whether SMOG should use a sample of 30 sentences, as described in the original paper, or if it should use all sentences in the text"""
         return Smog(self._statistics, self._analyzer.sentences,
                     all_sentences=all_sentences, ignore_length=ignore_length).score()
+
+    def gsmog(self, ignore_length=False):
+        """GSMOG Index. Measure the SMOG score adapted for German text"""
+        return Gsmog(self._statistics, ignore_length=ignore_length).score()
+
+    def erste_wiener_sachtextformel(self):
+        """erste Wiener Sachtextformel."""
+        return WienerSachtextformel(self._statistics, self._min_words).erste_wiener_sachtextformel_score()
+
+    def zweite_wiener_sachtextformel(self):
+        """zweite Wiener Sachtextformel."""
+        return WienerSachtextformel(self._statistics, self._min_words).zweite_wiener_sachtextformel_score()
+
+    def dritte_wiener_sachtextformel(self):
+        """dritte Wiener Sachtextformel."""
+        return WienerSachtextformel(self._statistics, self._min_words).dritte_wiener_sachtextformel_score()
+
+    def vierte_wiener_sachtextformel(self):
+        """vierte Wiener Sachtextformel."""
+        return WienerSachtextformel(self._statistics, self._min_words).vierte_wiener_sachtextformel_score()
+
+    def lix_lesbarkeits_index(self):
+        """LIX Lesbarkeitsindex."""
+        return LixLesbarkeitsIndex(self._statistics, self._min_words).score()
+
+    def miyazaki_readability_index(self):
+        """Miyazaki Readability Index."""
+        return MiyazakiReadabilityIndex(self._statistics, self._min_words).score()
 
     def spache(self):
         """Spache Index."""
@@ -59,4 +95,6 @@ def statistics(self):
             'num_polysyllabic_words': self._statistics.num_poly_syllable_words,
             'avg_words_per_sentence': self._statistics.avg_words_per_sentence,
             'avg_syllables_per_word': self._statistics.avg_syllables_per_word,
+            'num_six_letter_words': self._statistics.num_six_letter_words,
+            'num_mono_syllable_words': self._statistics.num_mono_syllable_words,
         }
diff --git a/readability/scorers/__init__.py b/readability/scorers/__init__.py
@@ -1,10 +1,14 @@
 
+from .ari import ARI
+from .coleman_liau import ColemanLiau
+from .dale_chall import DaleChall
 from .flesch import Flesch
 from .flesch_kincaid import FleschKincaid
+from .gsmog import Gsmog
 from .gunning_fog import GunningFog
-from .coleman_liau import ColemanLiau
-from .dale_chall import DaleChall
-from .ari import ARI
 from .linsear_write import LinsearWrite
+from .lix import LixLesbarkeitsIndex
+from .miyazaki_readability_index import MiyazakiReadabilityIndex
 from .smog import Smog
 from .spache import Spache
+from .wiener_sachtextformel import WienerSachtextformel
diff --git a/readability/scorers/flesch.py b/readability/scorers/flesch.py
@@ -13,8 +13,9 @@ def __str__(self):
 
 
 class Flesch:
-    def __init__(self, stats, min_words=100):
+    def __init__(self, stats, min_words=100, language='en'):
         self._stats = stats
+        self._language = language
         if stats.num_words < min_words:
             raise ReadabilityException('{} words required.'.format(min_words))
 
@@ -27,38 +28,78 @@ def score(self):
 
     def _score(self):
         stats = self._stats
-        words_per_sent = stats.num_words / stats.num_sentences
-        syllables_per_word = stats.num_syllables / stats.num_words
-        return 206.835 - (1.015 * words_per_sent) - (84.6 * syllables_per_word)
+        if self._language == 'en':
+            words_per_sent = stats.num_words / stats.num_sentences
+            syllables_per_word = stats.num_syllables / stats.num_words
+            return 206.835 - (1.015 * words_per_sent) - (84.6 * syllables_per_word)
+        elif self._language == 'de':
+            words_per_sent = stats.num_words / stats.num_sentences
+            syllables_per_word = stats.num_syllables / stats.num_words
+            return 180 - words_per_sent - (58.5 * syllables_per_word)
+        else:
+            raise ReadabilityException('Unsupported language: {}'.format(self._language))
+
 
     def _ease(self, score):
-        if score >= 90 and score <= 100:
-            return 'very_easy'
-        elif score >= 80 and score < 90:
-            return 'easy'
-        elif score >= 70 and score < 80:
-            return 'fairly_easy'
-        elif score >= 60 and score < 70:
-            return 'standard'
-        elif score >= 50 and score < 60:
-            return 'fairly_difficult'
-        elif score >= 30 and score < 50:
-            return 'difficult'
-        else:
-            return 'very_confusing'
+        if self._language == 'en':
+            if score >= 90 and score <= 100:
+                return 'very_easy'
+            elif score >= 80 and score < 90:
+                return 'easy'
+            elif score >= 70 and score < 80:
+                return 'fairly_easy'
+            elif score >= 60 and score < 70:
+                return 'standard'
+            elif score >= 50 and score < 60:
+                return 'fairly_difficult'
+            elif score >= 30 and score < 50:
+                return 'difficult'
+            else:
+                return 'very_confusing'
+        elif self._language == 'de':
+            if score >= 90 and score <= 100:
+                return 'sehr_leicht'
+            elif score >= 80 and score < 90:
+                return 'leicht'
+            elif score >= 70 and score < 80:
+                return 'mittel_leicht'
+            elif score >= 60 and score < 70:
+                return 'mittel'
+            elif score >= 50 and score < 60:
+                return 'mittel_schwer'
+            elif score >= 30 and score < 50:
+                return 'schwer'
+            else:
+                return 'sehr_schwer'
 
     def _grade_levels(self, score):
-        if score >= 90 and score <= 100:
-            return ['5']
-        elif score >= 80 and score < 90:
-            return ['6']
-        elif score >= 70 and score < 80:
-            return ['7']
-        elif score >= 60 and score < 70:
-            return ['8', '9']
-        elif score >= 50 and score < 60:
-            return ['10', '11', '12']
-        elif score >= 30 and score < 50:
-            return ['college']
-        else:
-            return ['college_graduate']
+        if self._language == 'en':
+            if score >= 90 and score <= 100:
+                return ['5']
+            elif score >= 80 and score < 90:
+                return ['6']
+            elif score >= 70 and score < 80:
+                return ['7']
+            elif score >= 60 and score < 70:
+                return ['8', '9']
+            elif score >= 50 and score < 60:
+                return ['10', '11', '12']
+            elif score >= 30 and score < 50:
+                return ['college']
+            else:
+                return ['college_graduate']
+        elif self._language == 'de':
+            if score >= 90 and score <= 100:
+                return ['11']
+            elif score >= 80 and score < 90:
+                return ['11, 12']
+            elif score >= 70 and score < 80:
+                return ['11, 12']
+            elif score >= 60 and score < 70:
+                return ['13, 14, 15']
+            elif score >= 50 and score < 60:
+                return ['13, 14, 15']
+            elif score >= 30 and score < 50:
+                return ['13, 14, 15']
+            else:
+                return ['Akademikerinnen und Akademiker']
diff --git a/readability/scorers/gsmog.py b/readability/scorers/gsmog.py
@@ -0,0 +1,54 @@
+import math
+import warnings
+
+from readability.exceptions import ReadabilityException
+
+
+class Result:
+    def __init__(self, score, grade_level):
+        self.score = score
+        self.grade_level = grade_level
+
+    def __str__(self):
+        return "score: {}, grade_level: {}". \
+            format(self.score, self.grade_level)
+
+
+class Gsmog:
+    def __init__(self, stats, ignore_length=False):
+        """
+        Bamberger adapted McLaughlin's original formula (Harry McLaughlin, 1969 https://ogg.osu.edu/media/documents/health_lit/WRRSMOG_Readability_Formula_G._Harry_McLaughlin__1969_.pdf)
+        for German-speaking countries. The formula compares the number of multisyllabic words (three or more, see above) to the number of sentences in the entire text. Since the original formula refers to a
+        sample of 30 sentences, the implementation in this class uses 30 sentences as a default if all_sentences is False.
+        """
+        if stats.num_sentences < 30:
+            if not ignore_length:
+                raise ReadabilityException(
+                    'SMOG requires 30 sentences. {} found'
+                    .format(stats.num_sentences))
+            else:
+                warnings.warn(
+                    'SMOG requires 30 sentences. {} found'
+                    .format(stats.num_sentences))
+
+
+        self._stats = stats
+
+
+    def score(self):
+        score = self._score()
+        grade_level = self._grade_level(score)
+        return Result(
+            score=score,
+            grade_level=grade_level
+        )
+
+    def _score(self):
+
+        num_sentences = self._stats.num_sentences
+        num_complex_words = self._stats.num_poly_syllable_words # words with 3 or more syllables
+        return math.sqrt(30 * num_complex_words / num_sentences) - 2
+
+    def _grade_level(self, score):
+        return str(round(score))
+
diff --git a/readability/scorers/lix.py b/readability/scorers/lix.py
@@ -0,0 +1,62 @@
+from readability.exceptions import ReadabilityException
+
+
+class Result:
+    def __init__(self, score, grade_levels, ease):
+        self.score = score
+        self.ease = ease
+        self.grade_levels = grade_levels
+
+    def __str__(self):
+        return "score: {}, ease: '{}', grade_levels: {}". \
+            format(self.score, self.ease, self.grade_levels)
+
+
+
+class LixLesbarkeitsIndex:
+    def __init__(self, stats, min_words=100):
+        self._stats = stats
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
+
+    def score(self):
+        score = self._score()
+        return Result(
+            score=score,
+            ease=self._ease(score),
+            grade_levels=self._grade_levels(score)
+        )
+
+    def _score(self):
+        """
+        Calculates the Lix readability index
+        :param avg_words_per_sentence: mean sentence length
+        :param ratio_long_words: ratio of words with six or more characters
+        :return: Lix index
+        """
+        stats = self._stats
+        return stats.avg_words_per_sentence + stats.avg_num_six_letter_words
+
+    def _ease(self, score):
+        if score >= 4 and score <= 5:
+            return 'very_easy'
+        elif score >=6 and score <=7:
+            return 'easy'
+        elif score >=8 and score <=10:
+            return 'average'
+        elif score >=11 and score <=12:
+            return 'difficult'
+        else:
+            return 'very_difficult'
+
+    def _grade_levels(self, score):
+        if score >= 4 and score <= 5:
+            return [4, 5]
+        elif score >=6 and score <=7:
+            return [6, 7]
+        elif score >=8 and score <=10:
+            return [8, 9, 10]
+        elif score >=11 and score <=12:
+            return [11, 12]
+        else:
+            return ['college level and above']
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,5 @@ __pycache__ @@
     .vscode
     py_readability_metrics.egg-info
     dist
-    build
+    build
+    venv