Skip to content

Commit

Permalink
fix pipeline issues
Browse files Browse the repository at this point in the history
  • Loading branch information
nils-schmitt committed Jan 17, 2024
1 parent 2652a1b commit 680932e
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 857 deletions.
5 changes: 1 addition & 4 deletions command_line_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,13 @@

from tracex.extraction.prototype import input_inquiry as ii
from tracex.extraction.prototype import input_handling as ih
from tracex.extraction.prototype import output_handling as oh


def main():
"""Main function calling every pipeline step needed to run the program."""
ii.greeting()
input_text = ii.get_input()
ih.convert_text_to_csv(input_text)
oh.get_output()
oh.farewell()
outputpath = ih.convert_text_to_csv(input_text)


main()
19 changes: 3 additions & 16 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from tracex.extraction.prototype import prompts as p
from tracex.extraction.prototype import create_xes as x

text = open(u.input_path / "journey_synth_covid_0.txt").read()
""" text = open(u.input_path / "journey_synth_covid_0.txt").read()
df = ih.convert_text_to_bulletpoints(text)
print(df)
df = ih.add_start_dates(text, df)
Expand All @@ -27,19 +27,6 @@
print(df)
ih.convert_dataframe_to_csv(df)
x.create_xes(u.output_path / "single_trace.csv", "test", "event_information")
x.create_xes(u.output_path / "single_trace.csv", "test", "event_information") """

# output = 'The end date for the bulletpoint "Experiencing mild cough and fatigue" and the start date 20220108T0000 is 20230911T0000.'

# fc_message = [
# {"role": "system", "content": p.END_DATE_CONTEXT},
# {
# "role": "user",
# "content": p.END_DATE_FUNCTION_CALL
# + "The text: "
# + output
# },
# ]


# print(u.query_gpt(fc_message, tool_choice={ "type": "function", "function": {"name": "add_end_dates"}}))
oh.get_output(u.output_path / "single_trace.csv")
751 changes: 9 additions & 742 deletions tracex/extraction/content/outputs/all_traces.csv

Large diffs are not rendered by default.

7 changes: 0 additions & 7 deletions tracex/extraction/content/outputs/dataframe.csv

This file was deleted.

9 changes: 9 additions & 0 deletions tracex/extraction/content/outputs/single_trace.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
case_id,event_information,start_date,end_date,duration,event_type,attribute_location
0,"experiencing first Covid-19 symptoms: mild cough, fatigue",20220601T0000,20220608T0000,168:00:00,Symptom Onset,Home
0,brushing off symptoms as common cold,20220601T0000,20220608T0000,168:00:00,Other,Home
0,developing high fever and difficulty breathing,20220601T0000,20220611T0000,240:00:00,Symptom Onset,Hospital
0,deciding to get tested for Covid-19,20220601T0000,20220617T0000,384:00:00,Diagnosis,Doctors
0,going to local testing center,20220617T0000,20220617T0000,00:00:00,Diagnosis,Hospital
0,undergoing PCR test,20220617T0000,20220617T0000,00:00:00,Diagnosis,Home
0,receiving negative test results,20220617T0000,20220619T0000,48:00:00,Diagnosis,Doctors
0,getting infected and testing positive,20220617T0000,20220617T0000,00:00:00,Symptom Onset,Home
2 changes: 1 addition & 1 deletion tracex/extraction/prototype/create_xes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def get_activity_key():
return get_activity_key()


def create_xes(csv_file, name, key):
def create_xes(csv_file, name="all_traces", key="event_type"):
"""Creates a xes with all traces from the regarding csv."""
dataframe = pd.read_csv(csv_file, sep=",")
dataframe["case_id"] = dataframe["case_id"].astype(str)
Expand Down
54 changes: 47 additions & 7 deletions tracex/extraction/prototype/input_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,40 @@ def convert_text_to_csv(text):
"""Converts the input to CSV with intermediate steps."""
steps = str(7)
print("Converting Data: Summarizing the text. (1/" + steps + ")", end="\r")
bulletpoints = convert_text_to_bulletpoints(text)
dataframe = convert_text_to_bulletpoints(text)
print(
"Converting Data: Extracting start date information. (2/" + steps + ")",
end="\r",
)
u.pause_between_queries()
start = add_start_dates(text, bulletpoints)
dataframe = add_start_dates(text, dataframe)
print(
"Converting Data: Extracting end date information. (3/" + steps + ") ",
end="\r",
)
u.pause_between_queries()
end = add_end_dates(text, start)
dataframe = add_end_dates(text, dataframe)
print(
"Converting Data: Extracting duration information. (4/" + steps + ") ", end="\r"
)
u.pause_between_queries()
duration = add_durations(text, end)
dataframe = add_durations(dataframe)
print(
"Converting Data: Extracting event types. (5/" + steps + ") ", end="\r"
)
u.pause_between_queries()
event_type = add_event_types(duration)
dataframe = add_event_types(dataframe)
print(
"Converting Data: Extracting location information. (6/" + steps + ")", end="\r"
)
u.pause_between_queries()
location = add_locations(event_type)
dataframe = add_locations(dataframe)
print(
"Converting Data: Creating output CSV. (7/" + steps + ") ", end="\r"
)
output_path = convert_dataframe_to_csv(location)
output_path = convert_dataframe_to_csv(dataframe)
print("Dataconversion finished. ")
output(dataframe)
return output_path


Expand Down Expand Up @@ -318,3 +319,42 @@ def convert_dataframe_to_csv(df):
path_or_buf=output_path, sep=",", encoding="utf-8", header=True, index=False
)
return output_path


def output(df):
decision = u.get_decision("Would you like to see the output? (y/n)\n")
if decision:
print(df)
else:
print("The output can be found at: " + u.output_path / "single_trace.csv.")
decision = u.get_decision(
"Would you like to append this trace to all_traces.csv? (y/n)\n"
)
if decision:
append_csv()
farewell()


def append_csv():
"""Appends the current trace to the CSV containing all traces."""
trace_count = 0
with open(u.CSV_ALL_TRACES, "r") as f:
rows = f.readlines()[1:]
if len(rows) >= 2:
trace_count = max(int(row.split(",")[0]) for row in rows if row)
with open(u.CSV_OUTPUT, "r") as f:
previous_content = f.readlines()
content = []
for row in previous_content:
if row != "\n":
content.append(row)
content = content[1:]
with open(u.CSV_ALL_TRACES, "a") as f:
for row in content:
row = row.replace(row[0], str(int(row[0]) + trace_count + 1), 1)
f.writelines(row)


def farewell():
"""Prints a farewell message."""
print("-----------------------------------\nThank you for using TracEX!\n\n")
79 changes: 0 additions & 79 deletions tracex/extraction/prototype/output_handling.py

This file was deleted.

2 changes: 1 addition & 1 deletion tracex/extraction/prototype/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
MAX_TOKENS = 1100
TEMPERATURE_SUMMARIZING = 0
TEMPERATURE_CREATION = 1
CSV_OUTPUT = settings.BASE_DIR / "extraction/content/outputs/intermediates/7_output.csv"
CSV_OUTPUT = settings.BASE_DIR / "extraction/content/outputs/single_trace.csv"
CSV_ALL_TRACES = settings.BASE_DIR / "extraction/content/outputs/all_traces.csv"


Expand Down

0 comments on commit 680932e

Please sign in to comment.