Skip to content

Commit

Permalink
♻️minor changes to prompts
Browse files Browse the repository at this point in the history
  • Loading branch information
FR-SON committed Jan 29, 2024
1 parent db1c7d8 commit b32b9f6
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 19 deletions.
3 changes: 1 addition & 2 deletions tracex/extraction/logic/modules/module_time_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def __extract_start_date(self, activity_label):
{"role": "assistant", "content": p.START_DATE_ANSWER},
]
output = u.query_gpt(messages)
print(output + "\n")
fc_message = [
{"role": "system", "content": p.FC_START_DATE_CONTEXT},
{"role": "user", "content": p.FC_START_DATE_PROMPT + "The text: " + output},
Expand All @@ -54,7 +53,7 @@ def __extract_end_date(self, row):
{
"role": "user",
"content": f"{p.END_DATE_PROMPT} \nThe text: {self.patient_journey} \nThe bulletpoint: "
f"{row['event_information']} \nThe start date: {row['start']}",
f"{row['event_information']} \nThe start date: {row['start']}",
},
{"role": "assistant", "content": p.END_DATE_ANSWER},
]
Expand Down
37 changes: 21 additions & 16 deletions tracex/extraction/logic/modules/module_time_extractor_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def __extract_start_date(self, activity_label):
]
start_date = u.query_gpt(messages)
print(start_date + "\n")
assert self.is_valid_date_format(start_date, "%Y%m%dT%H%M") is True, f"Date {start_date} has no valid format."
assert (
self.is_valid_date_format(start_date, "%Y%m%dT%H%M") is True
), f"Date {start_date} has no valid format."

return start_date

Expand All @@ -45,13 +47,15 @@ def __extract_end_date(self, row):
{"role": "system", "content": END_DATE_CONTEXT},
{
"role": "user",
"content": f"\nThe text: {self.patient_journey} \nThe bulletpoint: "
f"{row['event_information']} \nThe start date: {row['start']}",
"content": f"\nThe text: {self.patient_journey} \nThe activity label: "
f"{row['event_information']} \nThe start date: {row['start']}",
},
]
end_date = u.query_gpt(messages)
print(end_date + "\n")
assert self.is_valid_date_format(end_date, "%Y%m%dT%H%M") is True, f"Date {end_date} has no valid format."
assert (
self.is_valid_date_format(end_date, "%Y%m%dT%H%M") is True
), f"Date {end_date} has no valid format."

return end_date

Expand All @@ -75,6 +79,7 @@ def is_valid_date_format(date_string, date_format):
except ValueError:
return False


# START_DATE_CONTEXT = """
# You are an expert in text understanding and your job is to take a given text and given summarizing bulletpoints and to add a start date to every bulletpoint.
# Edit the bulletpoints in a way, that you just take the existing bulletpoints and add a start date at the end of it.
Expand All @@ -92,15 +97,16 @@ def is_valid_date_format(date_string, date_format):
# """

START_DATE_CONTEXT = """
You are provided with a natural language text containing various events. Your task is to identify the start date of
a specific activity mentioned in the text. The activity label will be provided, and it is your job to extract only
the start date associated with this activity from the text.
You are provided with a natural language text containing various events. Your task is to identify the start date of
a specific activity mentioned in the text. The activity label will be provided, and it is your job to extract only
the start date associated with this activity from the text.
Under no circumstances put anything else in the ouptut apart from the extracted start date.
Please follow the following rules:
1. The format of the date should always be YYYYMMDDT0000. For example, 20200101T0000.
2. If only a month in mentioned then the date should always be the first day of the month. For example for March it should be 20200301T0000.
2. If only a month is mentioned then the date should always be the first day of the month. For example for March it should be 20200301T0000.
3. If the date is mentioned in a different format, please convert it to the format mentioned above.
4. Also consider context information from previous activities and their start dates.
4. Translate formulations like "the next day" or "over the following weeks" to the corresponding date.
5. Also consider context information from previous activities and their start dates.
"""

# END_DATE_CONTEXT = """
Expand All @@ -118,16 +124,15 @@ def is_valid_date_format(date_string, date_format):
# """

END_DATE_CONTEXT = """
You are provided with a natural language text containing various events. Your task is to identify the end date of
a specific activity mentioned in the text. The activity label and the corresponding start date will be provided,
and it is your job to extract only the end date associated with this activity from the text.
You are provided with a natural language text containing various events. Your task is to identify the end date of
a specific activity mentioned in the text. The activity label and the corresponding start date will be provided,
and it is your job to extract only the end date associated with this activity from the text.
Under no circumstances put anything else in the ouptut apart from the extracted end date.
Please follow the following rules:
1. The format of the date should always be YYYYMMDDT0000. For example, 20200101T0000.
2. If only a month in mentioned then the date should always be the first day of the month. For example for March it should be 20200301T0000.
3. If the date is mentioned in a different format, please convert it to the format mentioned above.
4. Also consider context information from previous activities and their start dates and end dates. The end dates should
follow logically from the start dates.
5. End dates can not be earlier than the start dates.
4. Translate formulations like "the next day" or "over the following weeks" to the corresponding date.
5. Also consider context information from previous activities and their start dates and end dates.
6. End dates can not be earlier than the start dates.
"""

1 change: 0 additions & 1 deletion tracex/extraction/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def get_context_data(self, **kwargs):
if not (IS_TEST or is_extracted):
orchestrator.run()
single_trace_df = orchestrator.data
print(single_trace_df)
single_trace_df["caseID"] = single_trace_df["caseID"].astype(str)
single_trace_df["start"] = pd.to_datetime(single_trace_df["start"])
single_trace_df["end"] = pd.to_datetime(single_trace_df["end"])
Expand Down

0 comments on commit b32b9f6

Please sign in to comment.