From 404dc2c9ed1187083ee10a8800c414a7b84a0e02 Mon Sep 17 00:00:00 2001 From: Emily Kellison-Linn <4672118+emilykl@users.noreply.github.com> Date: Wed, 6 Sep 2023 15:57:34 -0400 Subject: [PATCH 1/2] improve performance by calling file.tell() only when needed --- lasio/reader.py | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/lasio/reader.py b/lasio/reader.py index 9255c90..1a66b12 100644 --- a/lasio/reader.py +++ b/lasio/reader.py @@ -300,18 +300,18 @@ def find_sections_in_file(file_obj): file_pos = int(file_obj.tell()) starts = [] ends = [] - line_no = 0 - line = file_obj.readline() - # for i, line in enumerate(file_obj): + line_no = -1 + line = True + while line: + line = file_obj.readline() + line_no = line_no + 1 sline = line.strip().strip("\n") if sline.startswith("~"): + file_pos = int(file_obj.tell()) - full_line_length(line, file_obj) starts.append((file_pos, line_no, sline)) if len(starts) > 1: ends.append(line_no - 1) - file_pos = int(file_obj.tell()) - line = file_obj.readline() - line_no = line_no + 1 ends.append(line_no) section_positions = [] @@ -320,6 +320,18 @@ def find_sections_in_file(file_obj): return section_positions +def full_line_length(line, file_obj): + """Return the full length of the line in characters, adjusted + based on the type of newline separators used ('\\n' vs. '\\r\\n')""" + newlines = ( + file_obj.newlines + if not isinstance(file_obj.newlines, tuple) + else file_obj.newlines[0] + ) + newline_adjust = len(newlines) - 1 if newlines is not None else 0 + return len(line) + newline_adjust + + def determine_section_type(section_title): """Return the type of the LAS section based on its title @@ -454,7 +466,7 @@ def read_data_section_iterative_normal_engine( title = file_obj.readline() def items(f, start_line_no, end_line_no): - for line_no, line in enumerate(f, start=start_line_no+1): + for line_no, line in enumerate(f, start=start_line_no + 1): line = line.strip("\n").strip() if line.startswith(ignore_data_comments): continue @@ -581,10 +593,15 @@ def read_data_section_iterative_numpy_engine(file_obj, line_nos): file_obj.seek(0) # unpack=True transforms the data from an array of rows to an array of columns. - # loose=False will throw an error on non-numerical data, which then sends the + # loose=False will throw an error on non-numerical data, which then sends the # parsing to the 'normal' parser. array = np.genfromtxt( - file_obj, skip_header=first_line, max_rows=max_rows, names=None, unpack=True, loose=False + file_obj, + skip_header=first_line, + max_rows=max_rows, + names=None, + unpack=True, + loose=False, ) # If there is only one data row, np.genfromtxt treats it as one array of @@ -592,7 +609,7 @@ def read_data_section_iterative_numpy_engine(file_obj, line_nos): # converts the single line data array to an array of arrays(column data). if len(array.shape) == 1: arr_len = array.shape[0] - array = array.reshape(arr_len,1) + array = array.reshape(arr_len, 1) return array @@ -1035,7 +1052,7 @@ def configure_metadata_patterns(line, section_name): # 3. double_dots '..' caused by mnemonic abbreviation (with period) # next to the dot delimiter. if ":" in line: - if not "." in line[:line.find(":")]: + if not "." in line[: line.find(":")]: # If there is no period, then we assume that the colon exists and # everything on the left is the name, and everything on the right # is the value - therefore no unit or description field. From 41dc97316ae88a0dcff5c1f90c5bfcb389c7c920 Mon Sep 17 00:00:00 2001 From: Emily Kellison-Linn <4672118+emilykl@users.noreply.github.com> Date: Thu, 7 Sep 2023 14:16:54 -0400 Subject: [PATCH 2/2] handle files with no newline at EOF(?) --- lasio/reader.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lasio/reader.py b/lasio/reader.py index 1a66b12..8ded9b7 100644 --- a/lasio/reader.py +++ b/lasio/reader.py @@ -297,7 +297,7 @@ def find_sections_in_file(file_obj): or line breaks. """ - file_pos = int(file_obj.tell()) + file_pos = 0 starts = [] ends = [] line_no = -1 @@ -314,9 +314,10 @@ def find_sections_in_file(file_obj): ends.append(line_no - 1) ends.append(line_no) - section_positions = [] - for j, (file_pos, first_line_no, sline) in enumerate(starts): - section_positions.append((file_pos, first_line_no, ends[j], sline)) + section_positions = [ + (file_pos, first_line_no, ends[j], sline) + for j, (file_pos, first_line_no, sline) in enumerate(starts) + ] return section_positions @@ -329,7 +330,11 @@ def full_line_length(line, file_obj): else file_obj.newlines[0] ) newline_adjust = len(newlines) - 1 if newlines is not None else 0 - return len(line) + newline_adjust + if line.endswith("\n"): + length = len(line) + newline_adjust + else: + length = len(line) + return length def determine_section_type(section_title): @@ -602,6 +607,7 @@ def read_data_section_iterative_numpy_engine(file_obj, line_nos): names=None, unpack=True, loose=False, + dtype=float, ) # If there is only one data row, np.genfromtxt treats it as one array of