Skip to content
This repository has been archived by the owner on Dec 18, 2019. It is now read-only.

Commit

Permalink
PEP8
Browse files Browse the repository at this point in the history
Signed-off-by: Jerome Flesch <[email protected]>
  • Loading branch information
jflesch committed Feb 7, 2018
1 parent fab0e5f commit 003690e
Show file tree
Hide file tree
Showing 32 changed files with 307 additions and 236 deletions.
2 changes: 1 addition & 1 deletion paperwork-backend/paperwork_backend/common/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def __get_labels(self):
labels = []
try:
with self.fs.open(self.fs.join(self.path, self.LABEL_FILE),
'r') as file_desc:
'r') as file_desc:
for line in file_desc.readlines():
line = line.strip()
(label_name, label_color) = line.split(",", 1)
Expand Down
4 changes: 3 additions & 1 deletion paperwork-backend/paperwork_backend/docimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,9 @@ def get_select_mime_types():
]

def __str__(self):
return _("Import all image files in the folder in the current document")
return (
_("Import all image files in the folder in the current document")
)


class ImageImporter(BaseImporter):
Expand Down
3 changes: 2 additions & 1 deletion paperwork-backend/paperwork_backend/docsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,8 @@ def get(self, obj_id):
return self.index.get(obj_id)

def get_doc_from_docid(self, docid, doc_type_name=None, inst=True):
return self.index.get_doc_from_docid(docid, doc_type_name=doc_type_name,
return self.index.get_doc_from_docid(docid,
doc_type_name=doc_type_name,
inst=inst)

def find_documents(self, sentence, limit=None, must_sort=True,
Expand Down
4 changes: 2 additions & 2 deletions paperwork-backend/paperwork_backend/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def readline(self, size=-1):
raise OSError("readline() not supported on Gio.File objects")

def readlines(self, hint=-1):
logger.warning("readlines() shouldn't be called on a binary file descriptor. This is not cross-platform")
all = self.readall()
logger.warning("readlines() shouldn't be called on a binary file"
" descriptor. This is not cross-platform")
return [(x + b"\n") for x in self.readall().split(b"\n")]

def seek(self, offset, whence=os.SEEK_SET):
Expand Down
23 changes: 13 additions & 10 deletions paperwork-backend/paperwork_backend/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
gi.require_version('PangoCairo', '1.0')
gi.require_version('Poppler', '0.18')

from . import config
from . import docimport
from . import docsearch
from .labels import Label
from . import fs
from . import config # noqa: E402
from . import docimport # noqa: E402
from . import docsearch # noqa: E402
from .labels import Label # noqa: E402
from . import fs # noqa: E402


FS = fs.GioFileSystem()
Expand All @@ -31,13 +31,13 @@ def is_interactive():

def verbose(txt):
if is_verbose():
print (txt)
print(txt)


def reply(data):
if "status" not in data:
data['status'] = 'ok'
print (json.dumps(
print(json.dumps(
data, indent=4,
separators=(',', ': '),
sort_keys=True
Expand All @@ -60,7 +60,7 @@ def _dump_page(page):
out = ""
for word in line.word_boxes:
out += word.content + " "
print (out.strip())
print(out.strip())


def cmd_add_label(docid, label_name, color=None):
Expand Down Expand Up @@ -98,7 +98,8 @@ def cmd_add_label(docid, label_name, color=None):
break
if not label and not color:
raise Exception(
"Label {} doesn't exist yet, and no color has been provided".format(
"Label {} doesn't exist yet, and no color has been"
" provided".format(
label_name
)
)
Expand Down Expand Up @@ -410,7 +411,8 @@ def _get_importer(fileuris, doc):
return importers[0]
elif not is_interactive():
raise Exception(
"Many way to import {} and running in batch mode. Can't import.\n{}"
"Many way to import {} and running in batch mode."
" Can't import.\n{}"
.format(
fileuris,
", ".join([str(importer) for importer in importers])
Expand Down Expand Up @@ -698,6 +700,7 @@ def cmd_ocr(*args):
"ocr": [page.pageid for page in pages]
})


def cmd_remove_label(docid, label_name):
"""
Arguments: <document_id> <label_name>
Expand Down
3 changes: 2 additions & 1 deletion paperwork-backend/paperwork_backend/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import threading
import unicodedata

import PIL.Image

try:
import cairo
CAIRO_AVAILABLE = True
Expand All @@ -36,7 +38,6 @@
import enchant.tokenize
import Levenshtein

import PIL.Image

logger = logging.getLogger(__name__)
FORCED_SPLIT_KEYWORDS_REGEX = re.compile("[\n '()]", re.UNICODE)
Expand Down
2 changes: 1 addition & 1 deletion paperwork-backend/scripts/obfuscate.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def main(src_dir, dst_dir):
print("Words are replaced by pieces of their hash (SHA512)")
print("")
print("Example:")
print(" %s ~/papers/20100730_0000_01 ~/tmp/20100730_0000_01.anonymized"
print(" %s ~/papers/20100730_0000_01 ~/tmp/20100730_0000_01.anon"
% sys.argv[0])
sys.exit(1)
src = sys.argv[1]
Expand Down
24 changes: 13 additions & 11 deletions paperwork-backend/scripts/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,22 @@ def main():
print("===")
print("Maximum number of pages in one document: %d" % max_pages)
print("Maximum word length: %d" % max_word_len)
print("Average word length: %f" % (float(total_word_len) / float(nb_words)))
print ("Average number of words per page: %f"
% (float(nb_words) / float(nb_pages)))
print ("Average number of words per document: %f"
% (float(nb_words) / float(nb_docs)))
print ("Average number of pages per document: %f"
% (float(nb_pages) / float(nb_docs)))
print ("Average number of unique words per document: %f"
% (float(total_nb_unique_words_per_doc) / float(nb_docs)))
print("Average word length: %f" % (
float(total_word_len) / float(nb_words)
))
print("Average number of words per page: %f"
% (float(nb_words) / float(nb_pages)))
print("Average number of words per document: %f"
% (float(nb_words) / float(nb_docs)))
print("Average number of pages per document: %f"
% (float(nb_pages) / float(nb_docs)))
print("Average number of unique words per document: %f"
% (float(total_nb_unique_words_per_doc) / float(nb_docs)))
for key in label_keys:
total = total_labels[key]
value = total_label_accuracy[key]
print ("Average accuracy of label prediction (%s): %f%%"
% (key, (100 * value / total)))
print("Average accuracy of label prediction (%s): %f%%"
% (key, (100 * value / total)))


if __name__ == "__main__":
Expand Down
26 changes: 16 additions & 10 deletions paperwork-gtk/nsis/gen_installer_nsi.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# TODO(Jflesch): PEP8 ...
# flake8: noqa

import re
import sys

from paperwork_backend.util import find_language


DOWNLOAD_URI = "https://download.openpaper.work/${PRODUCT_SHORT_VERSION}/paperwork_${PRODUCT_VERSION}_win64.zip"
DOWNLOAD_URI = (
"https://download.openpaper.work/${PRODUCT_SHORT_VERSION}/"
"paperwork_${PRODUCT_VERSION}_win64.zip"
)

ALL_LANGUAGES = [
"eng", # English (always first)
Expand Down Expand Up @@ -110,7 +116,7 @@
"cym", # Welsh
"yid",
]

UNKNOWN_LANGUAGE = {
'download_section': """
Section /o "{long}" SEC_{upper}
Expand Down Expand Up @@ -274,7 +280,7 @@
; CreateShortCut "$DESKTOP.lnk" "$INSTDIR\\paperwork.exe"
; CreateShortCut "$STARTMENU.lnk" "$INSTDIR\\paperwork.exe"
SetOutPath "$INSTDIR\\Tesseract"
CreateDirectory "$INSTDIR\\Tesseract"
nsisunz::UnzipToLog "$PLUGINSDIR\\tesseract.zip" "$INSTDIR"
Expand Down Expand Up @@ -400,7 +406,7 @@ def get_lang_infos(lang_name):
suffix = "" if len(lang) <= 1 else lang[1]

lang = find_language(lang_name)

if not suffix:
long_name = lang.name
else:
Expand All @@ -419,7 +425,7 @@ def main(args):
return

download_uri = DOWNLOAD_URI

if len(args) == 3:
version = short_version = args[1]
download_uri = args[2]
Expand All @@ -446,12 +452,12 @@ def main(args):
txt = txt.format(**get_lang_infos(lang_name))
out_fd.write(txt)
out_fd.write("""
SectionGroupEnd
SectionGroupEnd
""")


out_fd.write(MIDDLE)

for lang_name in ALL_LANGUAGES:
print ("Adding strings section {}".format(lang_name))
lang = UNKNOWN_LANGUAGE
Expand All @@ -460,7 +466,7 @@ def main(args):
txt = lang['lang_strings']
txt = txt.format(**get_lang_infos(lang_name))
out_fd.write(txt)

out_fd.write("""
!insertmacro MUI_FUNCTION_DESCRIPTION_BEGIN
!insertmacro MUI_DESCRIPTION_TEXT ${SEC_PAPERWORK} $(DESC_SEC_PAPERWORK)
Expand All @@ -480,4 +486,4 @@ def main(args):
print ("out.nsi written")

if __name__ == "__main__":
main(sys.argv)
main(sys.argv)
2 changes: 1 addition & 1 deletion paperwork-gtk/pyinstaller/paperwork_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
multiprocessing.freeze_support()

from paperwork.paperwork import main
main()
main()
57 changes: 29 additions & 28 deletions paperwork-gtk/scripts/compare_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@
gi.require_version('PangoCairo', '1.0')
gi.require_version('Gtk', '3.0')

import enchant
import enchant.tokenize
import Levenshtein
import pillowfight
import pyocr
import enchant # noqa: E402
import enchant.tokenize # noqa: E402
import Levenshtein # noqa: E402
import pillowfight # noqa: E402
import pyocr # noqa: E402

from paperwork_backend import config
from paperwork_backend import docsearch
from paperwork_backend import config # noqa: E402
from paperwork_backend import docsearch # noqa: E402

from paperwork.frontend.util.jobs import Job
from paperwork.frontend.util.jobs import JobFactory
from paperwork.frontend.util.jobs import JobScheduler
from paperwork.frontend.util.jobs import Job # noqa: E402
from paperwork.frontend.util.jobs import JobFactory # noqa: E402
from paperwork.frontend.util.jobs import JobScheduler # noqa: E402

from gi.repository import GObject
from gi.repository import GObject # noqa: E402


"""
Expand Down Expand Up @@ -191,10 +191,10 @@ def _add_score(self, txt, stats):
stats['maybe'] += 1

def _print_stats(self):
print ("-" * 40)
print("-" * 40)
for algo in ALGORITHMS:
stats = algo[2]
print ("{}".format(algo[0]))
print("{}".format(algo[0]))
sys.stdout.write(" ")
for (name, value) in stats.items():
if not name.startswith("nb_"):
Expand All @@ -209,7 +209,7 @@ def _print_stats(self):
name, str(value).rjust(5)
))
sys.stdout.write("\n")
print ("-" * 40)
print("-" * 40)

def do(self):
with LOCK:
Expand All @@ -231,11 +231,11 @@ def do(self):
time_per_document = elapsed_time / stats['nb_pages']
eta = time_per_document * (g_nb_total_pages - stats['nb_pages'])

print ("")
print ("")
print ("")
print ("")
print ("Done: {} ({}/{} = {}% ==> ETA: {})".format(
print("")
print("")
print("")
print("")
print("Done: {} ({}/{} = {}% ==> ETA: {})".format(
self.page_in,
stats['nb_pages'], g_nb_total_pages,
int(stats['nb_pages'] * 100 / g_nb_total_pages),
Expand All @@ -245,6 +245,7 @@ def do(self):

gc.collect()


GObject.type_register(JobImageProcessing)


Expand Down Expand Up @@ -298,9 +299,9 @@ def main():
global g_nb_total_pages
global g_start_time

print ("Will use {} for OCR".format(OCR_TOOL.get_name()))
print("Will use {} for OCR".format(OCR_TOOL.get_name()))

print ("Initializing dictionnary ...")
print("Initializing dictionnary ...")
g_lang = "eng"
if len(sys.argv) > 1:
g_lang = "fra"
Expand All @@ -311,28 +312,28 @@ def main():
except enchant.tokenize.TokenizerNotFoundError as exc:
print("Warning: Falling back to default tokenizer ({})".format(exc))
g_tknzr = enchant.tokenize.get_tokenizer()
print ("Done")
print("Done")

print ("Loading documents list ...")
print("Loading documents list ...")
pconfig = config.PaperworkConfig()
pconfig.read()
work_dir = pconfig.settings['workdir'].value
dsearch = docsearch.DocSearch(work_dir)
dsearch.reload_index()
print ("Documents loaded")
print ("")
print("Documents loaded")
print("")

print ("Initalizing workers ...")
print("Initalizing workers ...")
manager = WorkerManager()
manager.start()

factory = JobFactoryImageProcessing()
print ("Done")
print("Done")

g_start_time = datetime.datetime.now()

try:
print ("Queueing jobs ...")
print("Queueing jobs ...")
nb_docs = 0
nb_pages = 0
for doc in dsearch.docs:
Expand Down
Loading

0 comments on commit 003690e

Please sign in to comment.