11import json
22
3+
34def extract_keys_from_jsonl (input_file , output_file ):
45 """
56 Extract only the 'key' values from a JSONL file and save them to a text file.
6-
7+
78 Args:
89 input_file (str): Path to the input JSONL file
910 output_file (str): Path to the output text file
1011 """
1112 keys = []
12-
13+
1314 try :
14- with open (input_file , 'r' , encoding = ' utf-8' ) as f :
15+ with open (input_file , "r" , encoding = " utf-8" ) as f :
1516 for line_num , line in enumerate (f , 1 ):
1617 line = line .strip ()
1718 if not line : # Skip empty lines
1819 continue
19-
20+
2021 try :
2122 data = json .loads (line )
2223 ref_length = len (data ["ref_text" ].split ())
2324 if ref_length < 16 :
2425 continue
25- if ' key' in data :
26+ if " key" in data :
2627 start , end = data ["key" ].split ("_" )[- 2 :]
27- if float (end ) - float (start ) > 30 or float (end ) - float (start ) < 10 :
28+ if (
29+ float (end ) - float (start ) > 30
30+ or float (end ) - float (start ) < 10
31+ ):
2832 continue
29- keys .append (data [' key' ])
33+ keys .append (data [" key" ])
3034 else :
3135 print (f"Warning: No 'key' field found in line { line_num } " )
32-
36+
3337 except json .JSONDecodeError as e :
3438 print (f"Error parsing JSON on line { line_num } : { e } " )
3539 continue
36-
40+
3741 # Write keys to output file
38- with open (output_file , 'w' , encoding = ' utf-8' ) as f :
42+ with open (output_file , "w" , encoding = " utf-8" ) as f :
3943 for key in keys :
40- f .write (key + ' \n ' )
41-
44+ f .write (key + " \n " )
45+
4246 print (f"Successfully extracted { len (keys )} keys to '{ output_file } '" )
4347 return keys
44-
48+
4549 except FileNotFoundError :
4650 print (f"Error: Input file '{ input_file } ' not found" )
4751 return []
4852 except Exception as e :
4953 print (f"Error processing file: { e } " )
5054 return []
5155
56+
5257# Example usage:
5358if __name__ == "__main__" :
5459 # Replace 'input.jsonl' with your actual input file path
5560 # Replace 'keys_only.txt' with your desired output file path
5661 input_filename = "filtered_results/deletion_error_lt_0.05.jsonl"
5762 output_filename = "deletion_error_lt0.05_300h.txt"
58-
63+
5964 extracted_keys = extract_keys_from_jsonl (input_filename , output_filename )
60-
65+
6166 # Optional: Print the first few keys as a preview
6267 if extracted_keys :
6368 print (f"\n First few keys extracted:" )
@@ -66,15 +71,16 @@ def extract_keys_from_jsonl(input_file, output_file):
6671 if len (extracted_keys ) > 5 :
6772 print (f"... and { len (extracted_keys ) - 5 } more keys" )
6873
74+
6975# Alternative one-liner approach using list comprehension:
7076def extract_keys_one_liner (input_file , output_file ):
7177 """
7278 One-liner version to extract keys from JSONL file
7379 """
7480 try :
75- with open (input_file , 'r' ) as f_in , open (output_file , 'w' ) as f_out :
76- keys = [json .loads (line )[' key' ] for line in f_in if line .strip ()]
77- f_out .write (' \n ' .join (keys ))
81+ with open (input_file , "r" ) as f_in , open (output_file , "w" ) as f_out :
82+ keys = [json .loads (line )[" key" ] for line in f_in if line .strip ()]
83+ f_out .write (" \n " .join (keys ))
7884 print (f"Extracted { len (keys )} keys using one-liner approach" )
7985 except Exception as e :
8086 print (f"Error: { e } " )
0 commit comments