-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtab_delimit_to_json.py
81 lines (70 loc) · 2.72 KB
/
tab_delimit_to_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
# Code by JAL-code
# Create a json file from tab delimited text file.
# Note this file is stage 2 for updating json formatting.
import os
import re
import sys
import json
# SILENTMODE - should the user know if the file was found?
SILENTMODE = False
# check if file exists
def file_exists(filename, location):
f_exists = False
if not os.path.exists(filename):
f_exists=False
print(f"File {filename} does not exist!")
else:
print(f"Error, file {filename} already exists!")
f_exists=True
# sys.exit(1)
return f_exists
# load the lines of data into a dictionary list
def load_products(file_name):
print("Loading the data")
# , encoding='utf8 ' replaced by 2nd line
with open(file_name, 'r' , encoding='utf8') as data_file:
data_loaded = data_file.readlines()
# Get the column names from first line of file
columns = data_loaded[0].strip().split('\t')
# Initialize an empty list to hold dictionaries
processed_data = []
# process the data lines (exclude first line)
for line in data_loaded[1:]:
# Split the line into a list of values
values = line.strip().split('\t')
# Create a dictionary with the column names as keys and the values as values
row = dict(zip(columns, values))
processed_data.append(row)
return processed_data
# save an indent formatted json file. Size is larger.
def save_list_of_items(data, datasave):
with open(datasave, 'w') as file:
json.dump(data, file, indent=4)
# save a json file for a compact size.
def save_compact_list_of_items(data, datasave):
with open(datasave, 'w') as file:
json.dump(data, file)
if __name__ == '__main__':
indent = False
# set the file names
# set the record file
record_file='sdr2023g.txt'
# set the working file. If working file is corrupted, reload the record file.
working_file='sdr2023g_c.json'
# set dir location
working_dir = os.getcwd()
relative_location = f"\\faa\\"
# does this file exist in the working folder?
default_folder = f"{working_dir}\\"
print(f"{default_folder}\\{record_file}")
if file_exists(record_file, default_folder):
print("We can load the data!")
data = load_products(f"{default_folder}{record_file}")
if indent:
save_list_of_items(data, f"{default_folder}{working_file}")
if not indent:
save_compact_list_of_items(data, f"{default_folder}{working_file}")
if not file_exists(working_file, default_folder):
print("We must find the record file!")
# Regex101 candidate: \s(?P<column>[c]+[\d]{1,3})\s+|(?P<columntype>[a-zA-Z]+)\s+|(?P<length>[\d]+)|(?P<description>\b[A-Za-z]+\s.*?\.\b)