diff --git a/.gitignore b/.gitignore index abdd986..bc78a30 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,7 @@ *.swp *.swo build/ +dist/ docs/build +pystdf.egg-info/ +MANIFEST diff --git a/pystdf/__main__.py b/pystdf/__main__.py new file mode 100644 index 0000000..8283944 --- /dev/null +++ b/pystdf/__main__.py @@ -0,0 +1,55 @@ + +""" +Execute the pystdf module: apply a conversion to an STDF file. + +Usage: + python -m pystdf [conversion] [stdf-file] + + Conversion is either txt, xml, xlsx, slice or count. +""" + +import sys + + +def print_help(): + print("""pystdf + +Usage: + python -m pystdf [conversion] [stdf-file] + + Conversion is either txt, xml, xlsx, slice or count. +""") + + +def main(): + if len(sys.argv) < 3: + print_help() + return + + conversion, file = sys.argv[1:3] + args = sys.argv[3:] + + if conversion not in ['txt', 'xml', 'xlsx', 'slice', 'count']: + print_help() + return + + if conversion == 'txt': + from pystdf.script import totext + totext.process_file([file]) + elif conversion == 'xml': + from pystdf.script import toxml + toxml.process_file(file) + elif conversion == 'xlsx': + from pystdf.script import toexcel + toexcel.to_excel(file) + elif conversion == 'slice': + from pystdf.script import slice + start, count = args[:] + slice.text_slice(file, int(start), int(count)) + elif conversion == 'count': + from pystdf.script import count + count.process_file(file) + + +if __name__ == '__main__': + main() diff --git a/pystdf/script/__init__.py b/pystdf/script/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/rec_index b/pystdf/script/count.py similarity index 62% rename from scripts/rec_index rename to pystdf/script/count.py index e6b4e64..fcaffa2 100644 --- a/scripts/rec_index +++ b/pystdf/script/count.py @@ -19,59 +19,51 @@ # from __future__ import print_function -import sys, os, re +import sys +import re try: import gzip - have_gzip = True except ImportError: - have_gzip = False + gzip = None try: import bz2 - have_bz2 = True except ImportError: - have_bz2 = False + bz2 = None from pystdf.IO import Parser from pystdf.Indexing import RecordIndexer -import pystdf.V4 -#def info(type, value, tb): -# import traceback, pdb -# # You are not in interactive mode; print the exception -# traceback.print_exception(type, value, tb) -# print -# # ... then star the debugger in post-mortem mode -# pdb.pm() -#sys.excepthook = info -gzPattern = re.compile('\.g?z', re.I) -bz2Pattern = re.compile('\.bz2', re.I) +GZ_PATTERN = re.compile('\.g?z', re.I) +BZ2_PATTERN = re.compile('\.bz2', re.I) -def process_file(fn): - filename, = sys.argv[1:] +def process_file(file_name): reopen_fn = None - if filename is None: + if file_name is None: f = sys.stdin - elif gzPattern.search(filename): - if not have_gzip: + elif GZ_PATTERN.search(file_name): + if not gzip: print("gzip is not supported on this system", file=sys.stderr) sys.exit(1) - reopen_fn = lambda: gzip.open(filename, 'rb') + reopen_fn = lambda: gzip.open(file_name, 'rb') f = reopen_fn() - elif bz2Pattern.search(filename): - if not have_bz2: + elif BZ2_PATTERN.search(file_name): + if not bz2: print("bz2 is not supported on this system", file=sys.stderr) sys.exit(1) - reopen_fn = lambda: bz2.BZ2File(filename, 'rb') + reopen_fn = lambda: bz2.BZ2File(file_name, 'rb') f = reopen_fn() else: - f = open(filename, 'rb') - p=Parser(inp=f, reopen_fn=reopen_fn) - p.addSink(RecordIndexer()) + f = open(file_name, 'rb') + indexer = RecordIndexer() + p = Parser(inp=f, reopen_fn=reopen_fn) + p.addSink(indexer) p.parse() f.close() + print("Record count: ", indexer.recid) + if __name__ == "__main__": if len(sys.argv) < 2: diff --git a/scripts/stdf_slice b/pystdf/script/slice.py similarity index 72% rename from scripts/stdf_slice rename to pystdf/script/slice.py index 5d4a0d2..b274fc1 100644 --- a/scripts/stdf_slice +++ b/pystdf/script/slice.py @@ -18,20 +18,27 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # +import sys + from pystdf.IO import Parser -from pystdf.Mapping import * -from pystdf.Writers import * +from pystdf.Mapping import StreamMapper +from pystdf.Writers import TextWriter + + +def text_slice(file_name, start_index, record_count): + f = open(file_name, 'rb') + p = Parser(inp=f) + record_mapper = StreamMapper() + p.addSink(record_mapper) + p.parse(count=start_index+record_count) + p.addSink(TextWriter()) + f.seek(record_mapper.indexes[start_index]) + p.parse(count=record_count) + if __name__ == '__main__': filename, start, count = sys.argv[1:4] start = int(start) count = int(count) - f = open(filename, 'rb') - p=Parser(inp=f) - record_mapper = StreamMapper() - p.addSink(record_mapper) - p.parse(count=start+count) - p.addSink(AtdfWriter()) - f.seek(record_mapper.indexes[start]) - p.parse(count=count) + text_slice(filename, start, count) diff --git a/scripts/stdf2excel b/pystdf/script/toexcel.py similarity index 62% rename from scripts/stdf2excel rename to pystdf/script/toexcel.py index 85e1bcc..40c0557 100644 --- a/scripts/stdf2excel +++ b/pystdf/script/toexcel.py @@ -19,36 +19,37 @@ # # Modified: 2017 Minh-Hai Nguyen # -import sys, os + +import sys from pystdf.Importer import STDF2DataFrame -import pystdf.V4 +from pystdf import V4 import pandas as pd -def toExcel(fname,tables): - """ Export the tables from toTables to Excel +def to_excel(stdf_file, xlsx_file=None): + """ + Export the tables from toTables to Excel. """ - writer = pd.ExcelWriter(fname) - for k,v in tables.items(): - # Make sure the order of columns complies the specs - record = [r for r in V4.records if r.__class__.__name__.upper()==k] - if len(record)==0: - print("Ignore exporting table %s: No such record type exists." %k) + if xlsx_file is None: + xlsx_file = stdf_file[:stdf_file.rfind('.')] + ".xlsx" + print("Importing %s" % stdf_file) + tables = STDF2DataFrame(stdf_file) + print("Exporting to %s" % xlsx_file) + + writer = pd.ExcelWriter(xlsx_file) + for k, v in tables.items(): + # Make sure the order of columns complies to the specs + record = [r for r in V4.records if r.__class__.__name__.upper() == k] + if len(record) == 0: + print("Ignore exporting table %s: No such record type exists." % k) else: columns = [field[0] for field in record[0].fieldMap] - v.to_excel(writer,sheet_name=k,columns=columns,index=False,na_rep="N/A") + v.to_excel(writer, sheet_name=k, columns=columns, index=False, na_rep="N/A") writer.save() -if __name__=="__main__": - if len(sys.argv)==1: + +if __name__ == "__main__": + if len(sys.argv) == 1: print("Usage: %s " % (sys.argv[0])) else: - fin = sys.argv[1] - if len(sys.argv)>2: - fout = sys.argv[2] - else: - fout = fin[:fin.rfind('.')]+".xlsx" - print("Importing %s" %fin) - dfs= STDF2DataFrame(fin) - print("Exporting to %s" %fout) - toExcel(fout,dfs) + to_excel(sys.argv[1], sys.argv[2]) diff --git a/scripts/stdf2text b/pystdf/script/totext.py similarity index 78% rename from scripts/stdf2text rename to pystdf/script/totext.py index 229f7cd..2d72151 100644 --- a/scripts/stdf2text +++ b/pystdf/script/totext.py @@ -19,58 +19,63 @@ # from __future__ import print_function -import sys, re +import sys +import re try: import gzip - have_gzip = True except ImportError: - have_gzip = False + gzip = None try: import bz2 - have_bz2 = True except ImportError: - have_bz2 = False + bz2 = None from pystdf.IO import Parser from pystdf.Writers import TextWriter -import pystdf.V4 -gzPattern = re.compile('\.g?z', re.I) -bz2Pattern = re.compile('\.bz2', re.I) -def process_file(fnames): - filename = fnames[0] +GZ_PATTERN = re.compile('\.g?z', re.I) +BZ2_PATTERN = re.compile('\.bz2', re.I) + + +def process_file(file_names): + filename = file_names[0] reopen_fn = None if filename is None: f = sys.stdin - elif gzPattern.search(filename): - if not have_gzip: + elif GZ_PATTERN.search(filename): + if not gzip: print("gzip is not supported on this system", file=sys.stderr) sys.exit(1) reopen_fn = lambda: gzip.open(filename, 'rb') f = reopen_fn() - elif bz2Pattern.search(filename): - if not have_bz2: + elif BZ2_PATTERN.search(filename): + if not bz2: print("bz2 is not supported on this system", file=sys.stderr) sys.exit(1) reopen_fn = lambda: bz2.BZ2File(filename, 'rb') f = reopen_fn() else: f = open(filename, 'rb') - p=Parser(inp=f, reopen_fn=reopen_fn) - if len(fnames)<2: + p = Parser(inp=f, reopen_fn=reopen_fn) + if len(file_names) < 2: p.addSink(TextWriter()) p.parse() else: - with open(fnames[1],'w') as fout: + with open(file_names[1], 'w') as fout: p.addSink(TextWriter(stream=fout)) p.parse() f.close() -if __name__ == "__main__": + +def main(): if len(sys.argv) < 2: print("Usage: %s " % (sys.argv[0])) else: process_file(sys.argv[1:]) + + +if __name__ == "__main__": + main() diff --git a/scripts/stdf2xml b/pystdf/script/toxml.py similarity index 80% rename from scripts/stdf2xml rename to pystdf/script/toxml.py index cc350b3..efdb74e 100644 --- a/scripts/stdf2xml +++ b/pystdf/script/toxml.py @@ -19,51 +19,50 @@ # from __future__ import print_function -import sys, re +import sys +import re try: import gzip - have_gzip = True except ImportError: - have_gzip = False + gzip = None try: import bz2 - have_bz2 = True except ImportError: - have_bz2 = False + bz2 = None from pystdf.IO import Parser from pystdf.Writers import XmlWriter -import pystdf.V4 -gzPattern = re.compile('\.g?z', re.I) -bz2Pattern = re.compile('\.bz2', re.I) -def process_file(fn): - filename, = sys.argv[1:] +GZ_PATTERN = re.compile('\.g?z', re.I) +BZ2_PATTERN = re.compile('\.bz2', re.I) + +def process_file(filename): reopen_fn = None if filename is None: f = sys.stdin - elif gzPattern.search(filename): - if not have_gzip: + elif GZ_PATTERN.search(filename): + if not gzip: print("gzip is not supported on this system", file=sys.stderr) sys.exit(1) reopen_fn = lambda: gzip.open(filename, 'rb') f = reopen_fn() - elif bz2Pattern.search(filename): - if not have_bz2: + elif BZ2_PATTERN.search(filename): + if not bz2: print("bz2 is not supported on this system", file=sys.stderr) sys.exit(1) reopen_fn = lambda: bz2.BZ2File(filename, 'rb') f = reopen_fn() else: f = open(filename, 'rb') - p=Parser(inp=f, reopen_fn=reopen_fn) + p = Parser(inp=f, reopen_fn=reopen_fn) p.addSink(XmlWriter()) p.parse() f.close() + if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: %s " % (sys.argv[0])) diff --git a/setup.py b/setup.py index f5c40bb..3788cae 100644 --- a/setup.py +++ b/setup.py @@ -45,8 +45,8 @@ author_email='casey.marshall@gmail.com', url='http://code.google.com/p/pystdf/', packages=['pystdf','pystdf.explorer'], - scripts=['scripts/stdf_slice', 'scripts/rec_index', 'scripts/stdf2text', - 'scripts/stdf2excel', 'scripts/stdf2xml'], + scripts=['pystdf/script/slice.py', 'pystdf/script/count.py', 'pystdf/script/totext.py', + 'pystdf/script/toexcel.py', 'pystdf/script/toxml.py'], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Console',