Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ __pycache__
.vscode
py_readability_metrics.egg-info
dist
build
build
venv
48 changes: 43 additions & 5 deletions readability/readability.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
from .text import Analyzer
from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \
FleschKincaid, GunningFog, LinsearWrite, Smog, Spache
import warnings

import nltk

from .scorers import (ARI, ColemanLiau, DaleChall, Flesch, FleschKincaid,
GunningFog, LinsearWrite, LixLesbarkeitsIndex,
MiyazakiReadabilityIndex, Smog, Spache,
WienerSachtextformel, Gsmog)
from .text import Analyzer

nltk.download('punkt_tab')

class Readability:
def __init__(self, text, min_words=100):
def __init__(self, text, min_words=100, language='en'):
self._analyzer = Analyzer()
self._statistics = self._analyzer.analyze(text)
self._min_words = min_words
self._language = language
if self._min_words < 100:
warnings.warn(
"Documents with fewer than 100 words may affect the accuracy of readability tests"
Expand All @@ -27,7 +35,7 @@ def dale_chall(self):

def flesch(self):
"""Calculate Flesch Reading Ease score."""
return Flesch(self._statistics, self._min_words).score()
return Flesch(self._statistics, self._min_words, self._language).score()

def flesch_kincaid(self):
"""Calculate Flesch-Kincaid Grade Level."""
Expand All @@ -46,6 +54,34 @@ def smog(self,all_sentences=False, ignore_length=False):
`all_sentences` indicates whether SMOG should use a sample of 30 sentences, as described in the original paper, or if it should use all sentences in the text"""
return Smog(self._statistics, self._analyzer.sentences,
all_sentences=all_sentences, ignore_length=ignore_length).score()

def gsmog(self, ignore_length=False):
"""GSMOG Index. Measure the SMOG score adapted for German text"""
return Gsmog(self._statistics, ignore_length=ignore_length).score()

def erste_wiener_sachtextformel(self):
"""erste Wiener Sachtextformel."""
return WienerSachtextformel(self._statistics, self._min_words).erste_wiener_sachtextformel_score()

def zweite_wiener_sachtextformel(self):
"""zweite Wiener Sachtextformel."""
return WienerSachtextformel(self._statistics, self._min_words).zweite_wiener_sachtextformel_score()

def dritte_wiener_sachtextformel(self):
"""dritte Wiener Sachtextformel."""
return WienerSachtextformel(self._statistics, self._min_words).dritte_wiener_sachtextformel_score()

def vierte_wiener_sachtextformel(self):
"""vierte Wiener Sachtextformel."""
return WienerSachtextformel(self._statistics, self._min_words).vierte_wiener_sachtextformel_score()

def lix_lesbarkeits_index(self):
"""LIX Lesbarkeitsindex."""
return LixLesbarkeitsIndex(self._statistics, self._min_words).score()

def miyazaki_readability_index(self):
"""Miyazaki Readability Index."""
return MiyazakiReadabilityIndex(self._statistics, self._min_words).score()

def spache(self):
"""Spache Index."""
Expand All @@ -59,4 +95,6 @@ def statistics(self):
'num_polysyllabic_words': self._statistics.num_poly_syllable_words,
'avg_words_per_sentence': self._statistics.avg_words_per_sentence,
'avg_syllables_per_word': self._statistics.avg_syllables_per_word,
'num_six_letter_words': self._statistics.num_six_letter_words,
'num_mono_syllable_words': self._statistics.num_mono_syllable_words,
}
10 changes: 7 additions & 3 deletions readability/scorers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@

from .ari import ARI
from .coleman_liau import ColemanLiau
from .dale_chall import DaleChall
from .flesch import Flesch
from .flesch_kincaid import FleschKincaid
from .gsmog import Gsmog
from .gunning_fog import GunningFog
from .coleman_liau import ColemanLiau
from .dale_chall import DaleChall
from .ari import ARI
from .linsear_write import LinsearWrite
from .lix import LixLesbarkeitsIndex
from .miyazaki_readability_index import MiyazakiReadabilityIndex
from .smog import Smog
from .spache import Spache
from .wiener_sachtextformel import WienerSachtextformel
105 changes: 73 additions & 32 deletions readability/scorers/flesch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ def __str__(self):


class Flesch:
def __init__(self, stats, min_words=100):
def __init__(self, stats, min_words=100, language='en'):
self._stats = stats
self._language = language
if stats.num_words < min_words:
raise ReadabilityException('{} words required.'.format(min_words))

Expand All @@ -27,38 +28,78 @@ def score(self):

def _score(self):
stats = self._stats
words_per_sent = stats.num_words / stats.num_sentences
syllables_per_word = stats.num_syllables / stats.num_words
return 206.835 - (1.015 * words_per_sent) - (84.6 * syllables_per_word)
if self._language == 'en':
words_per_sent = stats.num_words / stats.num_sentences
syllables_per_word = stats.num_syllables / stats.num_words
return 206.835 - (1.015 * words_per_sent) - (84.6 * syllables_per_word)
elif self._language == 'de':
words_per_sent = stats.num_words / stats.num_sentences
syllables_per_word = stats.num_syllables / stats.num_words
return 180 - words_per_sent - (58.5 * syllables_per_word)
else:
raise ReadabilityException('Unsupported language: {}'.format(self._language))


def _ease(self, score):
if score >= 90 and score <= 100:
return 'very_easy'
elif score >= 80 and score < 90:
return 'easy'
elif score >= 70 and score < 80:
return 'fairly_easy'
elif score >= 60 and score < 70:
return 'standard'
elif score >= 50 and score < 60:
return 'fairly_difficult'
elif score >= 30 and score < 50:
return 'difficult'
else:
return 'very_confusing'
if self._language == 'en':
if score >= 90 and score <= 100:
return 'very_easy'
elif score >= 80 and score < 90:
return 'easy'
elif score >= 70 and score < 80:
return 'fairly_easy'
elif score >= 60 and score < 70:
return 'standard'
elif score >= 50 and score < 60:
return 'fairly_difficult'
elif score >= 30 and score < 50:
return 'difficult'
else:
return 'very_confusing'
elif self._language == 'de':
if score >= 90 and score <= 100:
return 'sehr_leicht'
elif score >= 80 and score < 90:
return 'leicht'
elif score >= 70 and score < 80:
return 'mittel_leicht'
elif score >= 60 and score < 70:
return 'mittel'
elif score >= 50 and score < 60:
return 'mittel_schwer'
elif score >= 30 and score < 50:
return 'schwer'
else:
return 'sehr_schwer'

def _grade_levels(self, score):
if score >= 90 and score <= 100:
return ['5']
elif score >= 80 and score < 90:
return ['6']
elif score >= 70 and score < 80:
return ['7']
elif score >= 60 and score < 70:
return ['8', '9']
elif score >= 50 and score < 60:
return ['10', '11', '12']
elif score >= 30 and score < 50:
return ['college']
else:
return ['college_graduate']
if self._language == 'en':
if score >= 90 and score <= 100:
return ['5']
elif score >= 80 and score < 90:
return ['6']
elif score >= 70 and score < 80:
return ['7']
elif score >= 60 and score < 70:
return ['8', '9']
elif score >= 50 and score < 60:
return ['10', '11', '12']
elif score >= 30 and score < 50:
return ['college']
else:
return ['college_graduate']
elif self._language == 'de':
if score >= 90 and score <= 100:
return ['11']
elif score >= 80 and score < 90:
return ['11, 12']
elif score >= 70 and score < 80:
return ['11, 12']
elif score >= 60 and score < 70:
return ['13, 14, 15']
elif score >= 50 and score < 60:
return ['13, 14, 15']
elif score >= 30 and score < 50:
return ['13, 14, 15']
else:
return ['Akademikerinnen und Akademiker']
54 changes: 54 additions & 0 deletions readability/scorers/gsmog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import math
import warnings

from readability.exceptions import ReadabilityException


class Result:
def __init__(self, score, grade_level):
self.score = score
self.grade_level = grade_level

def __str__(self):
return "score: {}, grade_level: {}". \
format(self.score, self.grade_level)


class Gsmog:
def __init__(self, stats, ignore_length=False):
"""
Bamberger adapted McLaughlin's original formula (Harry McLaughlin, 1969 https://ogg.osu.edu/media/documents/health_lit/WRRSMOG_Readability_Formula_G._Harry_McLaughlin__1969_.pdf)
for German-speaking countries. The formula compares the number of multisyllabic words (three or more, see above) to the number of sentences in the entire text. Since the original formula refers to a
sample of 30 sentences, the implementation in this class uses 30 sentences as a default if all_sentences is False.
"""
if stats.num_sentences < 30:
if not ignore_length:
raise ReadabilityException(
'SMOG requires 30 sentences. {} found'
.format(stats.num_sentences))
else:
warnings.warn(
'SMOG requires 30 sentences. {} found'
.format(stats.num_sentences))


self._stats = stats


def score(self):
score = self._score()
grade_level = self._grade_level(score)
return Result(
score=score,
grade_level=grade_level
)

def _score(self):

num_sentences = self._stats.num_sentences
num_complex_words = self._stats.num_poly_syllable_words # words with 3 or more syllables
return math.sqrt(30 * num_complex_words / num_sentences) - 2

def _grade_level(self, score):
return str(round(score))

62 changes: 62 additions & 0 deletions readability/scorers/lix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from readability.exceptions import ReadabilityException


class Result:
def __init__(self, score, grade_levels, ease):
self.score = score
self.ease = ease
self.grade_levels = grade_levels

def __str__(self):
return "score: {}, ease: '{}', grade_levels: {}". \
format(self.score, self.ease, self.grade_levels)



class LixLesbarkeitsIndex:
def __init__(self, stats, min_words=100):
self._stats = stats
if stats.num_words < min_words:
raise ReadabilityException('{} words required.'.format(min_words))

def score(self):
score = self._score()
return Result(
score=score,
ease=self._ease(score),
grade_levels=self._grade_levels(score)
)

def _score(self):
"""
Calculates the Lix readability index
:param avg_words_per_sentence: mean sentence length
:param ratio_long_words: ratio of words with six or more characters
:return: Lix index
"""
stats = self._stats
return stats.avg_words_per_sentence + stats.avg_num_six_letter_words

def _ease(self, score):
if score >= 4 and score <= 5:
return 'very_easy'
elif score >=6 and score <=7:
return 'easy'
elif score >=8 and score <=10:
return 'average'
elif score >=11 and score <=12:
return 'difficult'
else:
return 'very_difficult'

def _grade_levels(self, score):
if score >= 4 and score <= 5:
return [4, 5]
elif score >=6 and score <=7:
return [6, 7]
elif score >=8 and score <=10:
return [8, 9, 10]
elif score >=11 and score <=12:
return [11, 12]
else:
return ['college level and above']
Loading