Skip to content

Commit

Permalink
added chain of thought for event types and locations for improved acc…
Browse files Browse the repository at this point in the history
…uracy
  • Loading branch information
tkv29 committed Jan 17, 2024
1 parent 1c223bd commit 2652a1b
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 16 deletions.
27 changes: 15 additions & 12 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,21 @@
from tracex.extraction.prototype import prompts as p
from tracex.extraction.prototype import create_xes as x

# text = open(u.input_path / "journey_synth_covid_0.txt").read()
# df = ih.convert_text_to_bulletpoints(text)
# print(df)
# df = ih.add_start_dates(text, df)
# df = ih.add_end_dates(text, df)
# df = ih.add_durations(df)
# # df = ih.add_event_types(df)
# # df = ih.add_locations(df)

# print(df)

# ih.convert_dataframe_to_csv(df)
text = open(u.input_path / "journey_synth_covid_0.txt").read()
df = ih.convert_text_to_bulletpoints(text)
print(df)
df = ih.add_start_dates(text, df)
print(df)
df = ih.add_end_dates(text, df)
print(df)
df = ih.add_durations(df)
print(df)
df = ih.add_event_types(df)
print(df)
df = ih.add_locations(df)
print(df)

ih.convert_dataframe_to_csv(df)
x.create_xes(u.output_path / "single_trace.csv", "test", "event_information")

# output = 'The end date for the bulletpoint "Experiencing mild cough and fatigue" and the start date 20220108T0000 is 20230911T0000.'
Expand Down
10 changes: 6 additions & 4 deletions tracex/extraction/prototype/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def life_circumstances_prompt(sex):
If there is only a month specified, use the first of this month as start date. If there is no date specified in the text conclude 'N/A'.
"""
START_DATE_PROMPT = """
Here is the text and the bulletpoint for which you should extract the start date in the format YYYYMMDD with the postfix T0000!
Here is the text and the bulletpoint for which you should extract the start date in the format YYYYMMDD with the postfix T0000!
In case that you are not able to find a start date return the term "N/A". Only use the format YYYYMMDDTHHMM e.g. 20200401T0000!
Explain step by step your conclusions if the date YYYYMMDDTHHMM is available or N/A.
"""
Expand Down Expand Up @@ -165,7 +165,7 @@ def life_circumstances_prompt(sex):
Only return the date! Nothing else!
"""
END_DATE_PROMPT = """
Here is the text and the bulletpoint with the start date for which you should extract the end date in the format YYYYMMDD with the postfix T0000!
Here is the text and the bulletpoint with the start date for which you should extract the end date in the format YYYYMMDD with the postfix T0000!
In case that you are not able to find a end date return the term "N/A". Only use the format YYYYMMDDTHHMM e.g. 20200401T0000!
Explain step by step your conclusions if the end date YYYYMMDDTHHMM is available, if not calculate the average time of the activity and add this on the start date resulting as the end date.
"""
Expand Down Expand Up @@ -225,7 +225,8 @@ def life_circumstances_prompt(sex):
The only output should be the event type!
"""
EVENT_TYPE_PROMPT = """
Here is the bulletpoint for which you should extract the event type:
Here is the bulletpoint for which you should extract the event type.
Explain step by step your conclusions your choice of location: 'Symptom Onset', 'Symptom Offset', 'Diagnosis', 'Doctor visit', 'Treatment', 'Hospital stay', 'Medication', 'Lifestyle Change' and 'Feelings'
"""
EVENT_TYPE_ANSWER = """
For example for the bulletpoint 'visiting doctor's' you should return 'Doctors Visit'.
Expand All @@ -250,7 +251,8 @@ def life_circumstances_prompt(sex):
The only output should be the location.
"""
LOCATION_PROMPT = """
Here is the bulletpoint for which you should extract the location:
Here is the bulletpoint for which you should extract the location.
Explain step by step your conclusions your choice of location: 'Home' or 'Hospital' or 'Doctors' or 'Other'.
"""
LOCATION_ANSWER = """
For example for the bulletpoints 'visiting doctor's', you should return 'Doctors'.
Expand Down

0 comments on commit 2652a1b

Please sign in to comment.