Skip to content

Commit

Permalink
Merge pull request #138 from bptlab/refactor/96-improve-prompts
Browse files Browse the repository at this point in the history
Refactor/96 improve prompts
  • Loading branch information
FR-SON authored May 24, 2024
2 parents 89334d7 + 33f69bb commit 085a70a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 70 deletions.
Binary file modified tracex_project/db.sqlite3
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def execute(
_input=None,
patient_journey=None,
patient_journey_sentences=None,
cohort=None
cohort=None,
):
"""
Extracts the activity labels from the patient journey with the following steps:
Expand Down Expand Up @@ -76,14 +76,12 @@ def __extract_activities(patient_journey_numbered, condition):
messages.append(
{
"role": "user",
"content": patient_journey_numbered
+ "\n\nConsider all important points regarding the course of the disease of "
+ condition,
"content": f"Focus on those events that are related to the course of the disease of {condition}."
f"\n\n{patient_journey_numbered}",
}
)
else:
messages.append({"role": "user", "content": patient_journey_numbered})
messages.append({"role": "user", "content": patient_journey_numbered})
activity_labels = u.query_gpt(messages).split("\n")
df = pd.DataFrame(activity_labels, columns=[column_name])
df[["activity", "sentence_id"]] = df["activity"].str.split(" #", expand=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,81 +24,54 @@ def execute(
self, _input=None, patient_journey=None, patient_journey_sentences=None
):
"""Preprocesses the patient input for better data quality."""
super().execute(_input, patient_journey=patient_journey, patient_journey_sentences=patient_journey_sentences)
preprocessed_text = self.__spellcheck(patient_journey)
preprocessed_text = self.__punctuationcheck(preprocessed_text)
preprocessed_text = self.__identify_timestamps(preprocessed_text)
preprocessed_text = self.__transform_timestamps(preprocessed_text)
preprocessed_text = self.__interpret_timestamps(preprocessed_text)
preprocessed_text = self.__calculate_timestamps(preprocessed_text)
super().execute(
_input,
patient_journey=patient_journey,
patient_journey_sentences=patient_journey_sentences,
)
preprocessed_text = self.__apply_preprocessing_step(
patient_journey, "SPELLCHECK"
)
preprocessed_text = self.__apply_preprocessing_step(
preprocessed_text, "PUNCTUATION"
)
preprocessed_text = self.__apply_preprocessing_step(
preprocessed_text, "TIME_IDENTIFICATION"
)
preprocessed_text = self.__apply_preprocessing_step(
preprocessed_text, "TIME_HOLIDAYS"
)
preprocessed_text = self.__apply_preprocessing_step(
preprocessed_text, "TIME_GENERAL"
)
preprocessed_text = self.__apply_preprocessing_step(
preprocessed_text, "TIME_IDENTIFICATION"
)
preprocessed_text = self.__apply_preprocessing_step(
preprocessed_text, "TIME_RELATIVE"
)
preprocessed_text = self.__apply_preprocessing_step(
preprocessed_text, "TIME_PROPAGATE"
)

patient_journey_sentences = self.__make_sentences(preprocessed_text)

return patient_journey_sentences

@staticmethod
def __make_sentences(text):
"""Splits the input into a list of its sentences."""
text = text.replace("\n", " ")
text = text.split(". ")

return text

@staticmethod
def __spellcheck(text):
"""Checks and corrects spelling and grammar in the input."""
messages = Prompt.objects.get(name="PREPROCESSING_SPELLCHECK").text
new_user_message = {"role": "user", "content": text}
messages.append(new_user_message)
preprocessed_text = u.query_gpt(messages)

return preprocessed_text

@staticmethod
def __punctuationcheck(text):
"""Checks and corrects punctuations and commas in the input."""
messages = Prompt.objects.get(name="PREPROCESSING_PUNCTUATION").text
new_user_message = {"role": "user", "content": text}
messages.append(new_user_message)
preprocessed_text = u.query_gpt(messages)

return preprocessed_text

@staticmethod
def __identify_timestamps(text):
"""Identifies and formats time specifications in the input."""
messages = Prompt.objects.get(name="PREPROCESSING_IDENTIFY_TIMESTAMPS").text
def __apply_preprocessing_step(text, prompt_name):
"""Applies a preprocessing step based on the step name."""
messages = Prompt.objects.get(name=f"PREPROCESSING_{prompt_name}").text
new_user_message = {"role": "user", "content": text}
messages.append(new_user_message)
preprocessed_text = u.query_gpt(messages)

return preprocessed_text

@staticmethod
def __transform_timestamps(text):
"""Adds a timeline to the input for better understanding of events."""
messages = Prompt.objects.get(name="PREPROCESSING_TRANSFORM_TIMESTAMPS").text
new_user_message = {"role": "user", "content": text}
messages.append(new_user_message)
preprocessed_text = u.query_gpt(messages)

return preprocessed_text

@staticmethod
def __calculate_timestamps(text):
"""Calculate a Timestamp to the input for better understanding of events."""
messages = Prompt.objects.get(name="PREPROCESSING_TIME_CALCULATION").text
new_user_message = {"role": "user", "content": text}
messages.append(new_user_message)
preprocessed_text = u.query_gpt(messages)

return preprocessed_text

@staticmethod
def __interpret_timestamps(text):
"""Interpret a Timestamp to the input for better understanding of events."""
messages = Prompt.objects.get(name="PREPROCESSING_TIME_INTERPRETATION").text
new_user_message = {"role": "user", "content": text}
messages.append(new_user_message)
preprocessed_text = u.query_gpt(messages)
def __make_sentences(text):
"""Splits the input into a list of its sentences."""
text = text.replace("\n", " ")
text = text.split(". ")

return preprocessed_text
return text

0 comments on commit 085a70a

Please sign in to comment.