diff --git a/README.md b/README.md index fbbcb1f3..66a561b7 100644 --- a/README.md +++ b/README.md @@ -72,8 +72,8 @@ The main contributors to the project are the six members of the [2023/24 Bachelo - [Soeren Schubert](https://github.com/soeren227) - [Trung-Kien Vu](https://github.com/tkv29) -These six participants will push the project forward as part of their bachelor's degree until the summer of 2024. -At the same time our commitment to open source means that we are enabling -in fact encouraging- all interested parties to contribute and become part of its developer community. +These six participants will push the project forward as part of their bachelor's degree until the summer of 2024. +At the same time our commitment to open source means that we are enabling -in fact encouraging- all interested parties to contribute and become part of its developer community. ## Project documentation diff --git a/tracex_project/db.sqlite3 b/tracex_project/db.sqlite3 index a509afbe..1cc708e2 100644 Binary files a/tracex_project/db.sqlite3 and b/tracex_project/db.sqlite3 differ diff --git a/tracex_project/db_results/forms.py b/tracex_project/db_results/forms.py index aed83ca9..ebb0a2d5 100644 --- a/tracex_project/db_results/forms.py +++ b/tracex_project/db_results/forms.py @@ -9,9 +9,11 @@ class PatientJourneySelectForm(forms.Form): - """Form for selecting a patient journey.""" + """Form for selecting a Patient Journey.""" - selected_patient_journey = forms.ChoiceField(choices=[]) + selected_patient_journey = forms.ChoiceField( + choices=[], label="Selected Patient Journey:" + ) def __init__(self, *args, **kwargs): """Initializes the PatientJourneySelectForm.""" @@ -20,10 +22,14 @@ def __init__(self, *args, **kwargs): "selected_patient_journey" ].choices = self.get_patient_journey_choices() - def get_patient_journey_choices(self) -> List[Tuple[str, str]]: - """Retrieves the available patient journey choices with existing metrics from the database.""" - patient_journeys = PatientJourney.manager.filter( - trace__events__metrics__isnull=False + @staticmethod + def get_patient_journey_choices() -> List[Tuple[str, str]]: + """Retrieves the available Patient Journey choices with existing metrics from the database.""" + patient_journeys: List[PatientJourney] = PatientJourney.manager.filter( + trace__events__metrics__isnull=False, + trace__events__metrics__activity_relevance__isnull=False, + trace__events__metrics__timestamp_correctness__isnull=False, + trace__events__metrics__correctness_confidence__isnull=False, ).distinct() choices = [(pj.name, pj.name) for pj in patient_journeys] diff --git a/tracex_project/db_results/templates/metrics_pj_overview.html b/tracex_project/db_results/templates/metrics_pj_overview.html index ce74a3c3..f1e87c27 100644 --- a/tracex_project/db_results/templates/metrics_pj_overview.html +++ b/tracex_project/db_results/templates/metrics_pj_overview.html @@ -10,7 +10,7 @@

Welcome to the Metrics Dashboard

The Metrics Dashboard is a comprehensive visual interface that displays key metrics tracked by the metrics analyzer module during the extraction of a trace.

-

Please select one patient journey for that you want to see the dashboard.

+

Please select one Patient Journey for that you want to see the dashboard.

{% csrf_token %} diff --git a/tracex_project/db_results/views.py b/tracex_project/db_results/views.py index 49ba8777..6e7c00ca 100644 --- a/tracex_project/db_results/views.py +++ b/tracex_project/db_results/views.py @@ -29,7 +29,7 @@ def get_context_data(self, **kwargs): class MetricsOverviewView(FormView): - """View for selecting a patient journey for showing metrics.""" + """View for selecting a Patient Journey for showing metrics.""" form_class = PatientJourneySelectForm template_name = "metrics_pj_overview.html" @@ -50,11 +50,11 @@ class MetricsDashboardView(TemplateView): def get_context_data(self, **kwargs): """ - Extend the existing context with additional metrics relevant to the patient journey. + Extend the existing context with additional metrics relevant to the Patient Journey. - This method retrieves the patient journey name from the session, fetches the corresponding + This method retrieves the Patient Journey name from the session, fetches the corresponding data frame, and updates the context object with various metrics and visualizations such as - counts, charts, and data tables related to the patient journey. + counts, charts, and data tables related to the Patient Journey. """ context = super().get_context_data(**kwargs) @@ -68,10 +68,10 @@ def get_context_data(self, **kwargs): def get_latest_trace_df(self) -> pd.DataFrame: """ - Fetch the DataFrame for the latest trace of a specific patient journey stored in the session. + Fetch the DataFrame for the latest trace of a specific Patient Journey stored in the session. This method constructs a query to fetch the ID of the latest trace entry related to a - patient journey. It considers only those entries where activity relevance, timestamp correctness, + Patient Journey. It considers only those entries where activity relevance, timestamp correctness, and correctness confidence metrics are not null. It then retrieves the DataFrame for these events. """ @@ -90,57 +90,86 @@ def get_latest_trace_df(self) -> pd.DataFrame: return u.DataFrameUtilities.get_events_df(query_last_trace) def update_context_with_counts(self, context, trace_df: pd.DataFrame): - """Update the given context dictionary with count statistics related to patient journeys and traces.""" + """Update the given context dictionary with count statistics related to Patient Journeys and traces.""" patient_journey_name = self.request.session["patient_journey_name"] - context.update({ - "patient_journey_name": patient_journey_name, - "total_patient_journeys": PatientJourney.manager.count(), - "total_traces": Trace.manager.count(), - "total_activities": trace_df.shape[0], - "traces_count": Trace.manager.filter(patient_journey__name=patient_journey_name).count() - }) + context.update( + { + "patient_journey_name": patient_journey_name, + "total_patient_journeys": PatientJourney.manager.count(), + "total_traces": Trace.manager.count(), + "total_activities": trace_df.shape[0], + "traces_count": Trace.manager.filter( + patient_journey__name=patient_journey_name + ).count(), + } + ) def update_context_with_charts(self, context, trace_df: pd.DataFrame): """Update the context dictionary with chart visualizations.""" relevance_counts = trace_df["activity_relevance"].value_counts() timestamp_correctness_counts = trace_df["timestamp_correctness"].value_counts() - context.update({ - "activity_relevance_pie_chart": self.create_pie_chart(relevance_counts), - "timestamp_correctness_pie_chart": self.create_pie_chart(timestamp_correctness_counts), - "activity_relevance_bar_chart": self.create_bar_chart(relevance_counts, "Activity Relevance", "Count"), - "timestamp_correctness_bar_chart": self.create_bar_chart(timestamp_correctness_counts, - "Timestamp Correctness", "Count"), - "most_frequent_category": relevance_counts.index[0], - "most_frequent_category_count": relevance_counts.values[0], - "most_frequent_timestamp_correctness": timestamp_correctness_counts.index[0], - "most_frequent_timestamp_correctness_count": timestamp_correctness_counts.values[0], - "average_timestamp_correctness": round(trace_df["correctness_confidence"].mean(), 2) - }) + context.update( + { + "activity_relevance_pie_chart": self.create_pie_chart(relevance_counts), + "timestamp_correctness_pie_chart": self.create_pie_chart( + timestamp_correctness_counts + ), + "activity_relevance_bar_chart": self.create_bar_chart( + relevance_counts, "Activity Relevance", "Count" + ), + "timestamp_correctness_bar_chart": self.create_bar_chart( + timestamp_correctness_counts, "Timestamp Correctness", "Count" + ), + "most_frequent_category": relevance_counts.index[0], + "most_frequent_category_count": relevance_counts.values[0], + "most_frequent_timestamp_correctness": timestamp_correctness_counts.index[ + 0 + ], + "most_frequent_timestamp_correctness_count": timestamp_correctness_counts.values[ + 0 + ], + "average_timestamp_correctness": round( + trace_df["correctness_confidence"].mean(), 2 + ), + } + ) def update_context_with_data_tables(self, context, trace_df: pd.DataFrame): """Format trace data into styled HTML tables and add them to the context.""" # Apply renaming, styling, and convert to HTML, then update the context relevance_columns = ["activity", "activity_relevance"] - timestamp_columns = ["activity", "time:timestamp", "time:end_timestamp", "timestamp_correctness", - "correctness_confidence"] + timestamp_columns = [ + "activity", + "time:timestamp", + "time:end_timestamp", + "timestamp_correctness", + "correctness_confidence", + ] relevance_df = trace_df[relevance_columns] relevance_df = u.Conversion.rename_columns(relevance_df) - relevance_styled = relevance_df.style.set_table_attributes('class="dataframe"').apply(self.color_relevance, - axis=1).hide().to_html() + relevance_styled = ( + relevance_df.style.set_table_attributes('class="dataframe"') + .apply(self.color_relevance, axis=1) + .hide() + .to_html() + ) timestamp_df = trace_df[timestamp_columns] timestamp_df = u.Conversion.rename_columns(timestamp_df) - timestamp_styled = timestamp_df.style.set_table_attributes('class="dataframe"').apply( - self.color_timestamp_correctness, axis=1).hide().to_html() + timestamp_styled = ( + timestamp_df.style.set_table_attributes('class="dataframe"') + .apply(self.color_timestamp_correctness, axis=1) + .hide() + .to_html() + ) - context.update({ - "relevance_df": relevance_styled, - "timestamp_df": timestamp_styled - }) + context.update( + {"relevance_df": relevance_styled, "timestamp_df": timestamp_styled} + ) @staticmethod def color_relevance(row: pd.Series) -> List[str]: @@ -169,9 +198,9 @@ def color_timestamp_correctness(row: pd.Series) -> List[str]: styles = ["background-color: tan"] * len(row) if ( - low_confidence_threshold - <= correctness_confidence - <= high_confidence_threshold + low_confidence_threshold + <= correctness_confidence + <= high_confidence_threshold ): styles[confidence_index] = "background-color: orange" elif correctness_confidence < low_confidence_threshold: @@ -244,8 +273,12 @@ def get_context_data(self, **kwargs): cohorts_df = self.get_cohorts_data(traces) if not event_log_df.empty: - event_log_df = self.filter_and_cleanup_event_log(event_log_df, filter_settings) - context.update(self.generate_dfg_and_tables(event_log_df, cohorts_df, filter_settings)) + event_log_df = self.filter_and_cleanup_event_log( + event_log_df, filter_settings + ) + context.update( + self.generate_dfg_and_tables(event_log_df, cohorts_df, filter_settings) + ) context.update({"form": EvaluationForm(initial=filter_settings)}) self.request.session["event_log"] = event_log_df.to_json() @@ -275,14 +308,20 @@ def get_traces_and_events(self) -> Tuple[QuerySet, pd.DataFrame]: def get_cohorts_data(traces: QuerySet) -> pd.DataFrame: """Extract and format cohort data from given traces for further processing and visualization.""" cohorts = Cohort.manager.filter(trace__in=traces) - cohorts_data = list(cohorts.values("trace", "age", "sex", "origin", "condition", "preexisting_condition")) + cohorts_data = list( + cohorts.values( + "trace", "age", "sex", "origin", "condition", "preexisting_condition" + ) + ) cohorts_df = pd.DataFrame(cohorts_data) if not cohorts_df.empty: cohorts_df["age"] = cohorts_df["age"].astype(pd.Int64Dtype()) return cohorts_df @staticmethod - def filter_and_cleanup_event_log(event_log_df: pd.DataFrame, filter_settings: dict) -> pd.DataFrame: + def filter_and_cleanup_event_log( + event_log_df: pd.DataFrame, filter_settings: dict + ) -> pd.DataFrame: """Apply user-defined filters to the event log data and clean up unnecessary columns.""" filter_dict = { "event_type": filter_settings.get("event_types"), @@ -290,12 +329,18 @@ def filter_and_cleanup_event_log(event_log_df: pd.DataFrame, filter_settings: di } event_log_df = u.DataFrameUtilities.filter_dataframe(event_log_df, filter_dict) event_log_df = event_log_df.drop( - columns=["activity_relevance", "timestamp_correctness", "correctness_confidence"]) + columns=[ + "activity_relevance", + "timestamp_correctness", + "correctness_confidence", + ] + ) return event_log_df @staticmethod - def generate_dfg_and_tables(event_log_df: pd.DataFrame, cohorts_df: pd.DataFrame, - filter_settings: dict) -> dict: + def generate_dfg_and_tables( + event_log_df: pd.DataFrame, cohorts_df: pd.DataFrame, filter_settings: dict + ) -> dict: """Generate visualizations and HTML tables for the provided event log and cohort data.""" activity_key = filter_settings.get("activity_key") return { diff --git a/tracex_project/extraction/fixtures/prompts_fixture.json b/tracex_project/extraction/fixtures/prompts_fixture.json index d019d880..31b3b57d 100644 --- a/tracex_project/extraction/fixtures/prompts_fixture.json +++ b/tracex_project/extraction/fixtures/prompts_fixture.json @@ -1,897 +1,1073 @@ [ - { +{ "model": "extraction.prompt", "pk": 1, "fields": { - "name": "TEXT_TO_ACTIVITY_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and summarization. Your Job is to take a given text about an illness and convert it into bullet points regarding all important points about the course of the disease. Do not include time dates and use a miximum of 6 words per bullet point. Include the number of the sentence in the text from which you take the bullet point. The related numbers are in front of the sentences. Only include ONE sentence number per bullet point!" - }, - { - "role": "user", - "content": "1: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever.\n2: Four days later I went to the doctor and got tested positive for Covid19.\n3: Then I got hospitalized for two weeks." - }, - { - "role": "assistant", - "content": "starting to experience symptoms #1\nvisiting doctor's #2\ntesting positive for Covid19 #2\ngetting admissioned to hospital #3\ngetting discharged from hospital #3" - }, - { - "role": "user", - "content": "8: Concerned about my condition, I contacted my primary care physician via phone.\n9: He advised me to monitor my symptoms and stay at home unless they became severe." - }, - { - "role": "assistant", - "content": "contacting primary care physician #8\nmonitoring symptoms at home #9" - }, - { - "role": "user", - "content": "5: First symptoms on 01/04/2020" - }, - { - "role": "assistant", - "content": "starting to experience symptoms #5" - }, - { - "role": "user", - "content": "1: On July 15, 2022, I started experiencing the first symptoms of Covid-19 for five days.\n2: Initially, I had a mild cough and fatigue." - }, - { - "role": "assistant", - "content": "starting to experience symptoms #1\nending to experience symptoms #1" - } - ] + "name": "TEXT_TO_ACTIVITY_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and summarization. Your Job is to take a given text about a person's course of disease and to convert it into bullet points. If you are asked to focus on a specific condition, follow these steps: First think about which events are typically present in a course of disease with the condition. Then scan the text and determine which of the events you find are relevant to the condition. Finally, summarize only the relevant events in bullet points. Do not include timestamps and use a maximum of 6 words per bullet point. Include the number of the sentence in the text from which you take the bullet point at the end of the sentence, like this '#number'. The related numbers are in front of the sentences." + }, + { + "role": "user", + "content": "Focus on those events that are related to the course of the disease of Covid-19.\n\n1: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever.\n2: Four days later I went to the doctor and got tested positive for Covid-19.\n3: Then I got hospitalized for two weeks." + }, + { + "role": "assistant", + "content": "starting to experience symptoms #1\nvisiting doctor's #2\ntesting positive for Covid-19 #2\ngetting admitted to hospital #3\ngetting discharged from hospital #3" + }, + { + "role": "user", + "content": "8: Concerned about my condition, I contacted my primary care physician via phone.\n9: He advised me to monitor my symptoms and stay at home unless they became severe." + }, + { + "role": "assistant", + "content": "contacting primary care physician #8\nmonitoring symptoms at home #9" + }, + { + "role": "user", + "content": "5: First symptoms on 01/04/2020" + }, + { + "role": "assistant", + "content": "starting to experience symptoms #5" + }, + { + "role": "user", + "content": "Focus on those events that are related to the course of the disease of Covid-19.\n\n1: On July 15, 2022, I started experiencing the first symptoms of Covid-19 for five days.\n2: Initially, I had a mild cough and fatigue." + }, + { + "role": "assistant", + "content": "starting to experience symptoms #1\nending to experience symptoms #1" + }, + { + "role": "user", + "content": "5:After surviving Covid-19, I made getting vaccinated a top priority. 6: I received my first dose of the vaccine in early February 2022 and the second dose three weeks later. 7: Despite the challenges I faced during my infection, I remained determined to protect myself and others from the virus by getting vaccinated." + }, + { + "role": "assistant", + "content": "receiving first dose of vaccine #6\nreceiving second dose of vaccine #6" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 2, "fields": { - "name": "START_DATE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text and a given activity label and to extract a start date to this activity label. Only output the extracted start date! Rely also on the context." - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: experiencing mild symptoms" - }, - { - "role": "assistant", - "content": "20200401T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: testing positive for Covid19" - }, - { - "role": "assistant", - "content": "20200405T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again" - }, - { - "role": "assistant", - "content": "20200601T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms" - }, - { - "role": "assistant", - "content": "20210701T0000" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing side effects of vaccination" - }, - { - "role": "assistant", - "content": "20211104T0000" - } - ] + "name": "START_DATE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text and a given activity label and to extract a start date to this activity label. The text might contain timestamps in the format --YYYYMM/DD--, with two hyphens enclosing the date. Only use these timestamps as backup, in case no other time information is available. Only output the extracted start date! Rely on the context to determine the start date, as it might not be explicitly mentioned." + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--.\nActivity Label: experiencing mild symptoms" + }, + { + "role": "assistant", + "content": "20200401T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--.\nActivity Label: testing positive for Covid19" + }, + { + "role": "assistant", + "content": "20200405T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again" + }, + { + "role": "assistant", + "content": "20200601T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21--2021/07/01--. I then got tested positive for Covid19--2021/07/01--. In October I got infected again--2021/10/01--. Then on the 4th of November I got my first dosage of the vaccine.--2021/11/04-- I had heavy side effects.\nActivity Label: starting to experience symptoms" + }, + { + "role": "assistant", + "content": "20210701T0000" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing side effects of vaccination" + }, + { + "role": "assistant", + "content": "20211104T0000" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 3, "fields": { - "name": "END_DATE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text, a given activity label and a given timestamp for the beginning of this activity and to then extract an end date to this activity label. Only output the extracted start date! Rely also on the context. Use averages if necessary. If there is no information about the end date at all, please state the start date also as the end date." - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: experiencing mild symptoms\nStart Date: 20200401T0000" - }, - { - "role": "assistant", - "content": "20200405T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: testing positive for Covid19\nStart Date: 20200405T0000" - }, - { - "role": "assistant", - "content": "20200405T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again\nStart Date: 20200601T0000" - }, - { - "role": "assistant", - "content": "20200615T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain\nStart Date: N/A" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing side effects of \nStart Date: 20211104T0000" - }, - { - "role": "assistant", - "content": "20211106T0000" - }, - { - "role": "user", - "content": "Text: Four days after the first april 2020 I went to the doctor and got tested positive for Covid19. I was then hospitalized for two weeks.\nActivity Label: getting hospitalized\nStart Date: 20200405T0000" - }, - { - "role": "assistant", - "content": "20200419T0000" - }, - { - "role": "user", - "content": "Text: In the next time I made sure to improve my mental well being.\nActivity Label: improving mental well being\nStart Date: 20210610T0000" - }, - { - "role": "assistant", - "content": "20210710T0000" - } - ] + "name": "END_DATE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text, a given activity label and a given timestamp for the beginning of this activity and to then extract an end date to this activity label. The text might contain dates in the format --YYYYMM/DD--, with two hyphens enclosing the date. Only use these as backup, in case no other time information is available. Only output the extracted end date! Rely on the context to determine the end date, as it might not be explicitly mentioned. Use averages if necessary. If there is no information about the end date at all, please state the start date also as the end date." + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--.\nActivity Label: experiencing mild symptoms\nStart Date: 20200401T0000" + }, + { + "role": "assistant", + "content": "20200405T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--..\nActivity Label: testing positive for Covid19\nStart Date: 20200405T0000" + }, + { + "role": "assistant", + "content": "20200405T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again\nStart Date: 20200601T0000" + }, + { + "role": "assistant", + "content": "20200615T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain\nStart Date: N/A" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21--2021/07/21--. I then got tested positive for Covid19--2021/07/21--. In October I got infected again--2021/10/01--. Then on the 4th of November I got my first dosage of the vaccine--2021/11/04--. I had heavy side effects--2021/11/04--.\nActivity Label: experiencing side effects \nStart Date: 20211104T0000" + }, + { + "role": "assistant", + "content": "20211106T0000" + }, + { + "role": "user", + "content": "Text: Four days after the first of April 2020 I went to the doctor and got tested positive for Covid19. I was then hospitalized for two weeks.\nActivity Label: getting hospitalized\nStart Date: 20200405T0000" + }, + { + "role": "assistant", + "content": "20200419T0000" + }, + { + "role": "user", + "content": "Text: In the next time I made sure to improve my mental well being.\nActivity Label: improving mental well being\nStart Date: 20210610T0000" + }, + { + "role": "assistant", + "content": "20210710T0000" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 4, "fields": { - "name": "EVENT_TYPE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text categorization and your job is to take a given activity label and to classify it into one of the following event types: 'Symptom Onset', 'Symptom Offset', 'Diagnosis', 'Doctor Visit', 'Treatment', 'Hospital Admission', 'Hospital Discharge', 'Medication', 'Lifestyle Change' and 'Feelings'. Please consider the capitalization." - }, - { - "role": "user", - "content": "visiting doctor's" - }, - { - "role": "assistant", - "content": "Doctors Visit" - }, - { - "role": "user", - "content": "testing positive for Covid19" - }, - { - "role": "assistant", - "content": "Diagnosis" - }, - { - "role": "user", - "content": "getting hospitalized" - }, - { - "role": "assistant", - "content": "Hospital Admission" - }, - { - "role": "user", - "content": "isolating at home" - }, - { - "role": "assistant", - "content": "Lifestyle Change" - }, - { - "role": "user", - "content": "prescribed medication for discomfort" - }, - { - "role": "assistant", - "content": "Medication" - }, - { - "role": "user", - "content": "seeking consultation with specialist" - }, - { - "role": "assistant", - "content": "Doctors Visit" - }, - { - "role": "user", - "content": "receiving vaccines to protect against Covid19" - }, - { - "role": "assistant", - "content": "Treatment" - }, - { - "role": "user", - "content": "feeling a sense of relief" - }, - { - "role": "assistant", - "content": "Feeling" - }, - { - "role": "user", - "content": "starting to experience symptoms" - }, - { - "role": "assistant", - "content": "Symptom Onset" - } - ] + "name": "EVENT_TYPE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text categorization and your job is to take a given activity label and to classify it into one of the following event types: 'Symptom Onset', 'Symptom Offset', 'Diagnosis', 'Doctor Visit', 'Treatment', 'Hospital Admission', 'Hospital Discharge', 'Medication', 'Lifestyle Change' and 'Feelings'. Please consider the capitalization." + }, + { + "role": "user", + "content": "visiting doctor's" + }, + { + "role": "assistant", + "content": "Doctors Visit" + }, + { + "role": "user", + "content": "testing positive for Covid19" + }, + { + "role": "assistant", + "content": "Diagnosis" + }, + { + "role": "user", + "content": "getting hospitalized" + }, + { + "role": "assistant", + "content": "Hospital Admission" + }, + { + "role": "user", + "content": "isolating at home" + }, + { + "role": "assistant", + "content": "Lifestyle Change" + }, + { + "role": "user", + "content": "prescribed medication for discomfort" + }, + { + "role": "assistant", + "content": "Medication" + }, + { + "role": "user", + "content": "seeking consultation with specialist" + }, + { + "role": "assistant", + "content": "Doctors Visit" + }, + { + "role": "user", + "content": "receiving vaccines to protect against Covid19" + }, + { + "role": "assistant", + "content": "Treatment" + }, + { + "role": "user", + "content": "feeling a sense of relief" + }, + { + "role": "assistant", + "content": "Feeling" + }, + { + "role": "user", + "content": "starting to experience symptoms" + }, + { + "role": "assistant", + "content": "Symptom Onset" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 5, "fields": { - "name": "LOCATION_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text categorization and your job is to take a given activity label and categorize it into: 'Home', 'Hospital' or 'Doctors'. Use the context to categorize." - }, - { - "role": "user", - "content": "visiting doctor's" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "consulting doctor over phone" - }, - { - "role": "assistant", - "content": "Home" - }, - { - "role": "user", - "content": "testing positive for Covid19" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "getting hospitalized" - }, - { - "role": "assistant", - "content": "Hospital" - }, - { - "role": "user", - "content": "isolating at home" - }, - { - "role": "assistant", - "content": "Home" - }, - { - "role": "user", - "content": "prescribed medication for discomfort" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "receiving special care with a ventilator" - }, - { - "role": "assistant", - "content": "Hospital" - }, - { - "role": "user", - "content": "receiving vaccines to protect against Covid19" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "feeling a sense of relief" - }, - { - "role": "assistant", - "content": "Home" - }, - { - "role": "user", - "content": "starting to experience symptoms" - }, - { - "role": "assistant", - "content": "Home" - } - ] + "name": "LOCATION_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text categorization and your job is to take a given activity label and categorize it into: 'Home', 'Hospital' or 'Doctors'. Use the context to categorize." + }, + { + "role": "user", + "content": "visiting doctor's" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "consulting doctor over phone" + }, + { + "role": "assistant", + "content": "Home" + }, + { + "role": "user", + "content": "testing positive for Covid19" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "getting hospitalized" + }, + { + "role": "assistant", + "content": "Hospital" + }, + { + "role": "user", + "content": "isolating at home" + }, + { + "role": "assistant", + "content": "Home" + }, + { + "role": "user", + "content": "prescribed medication for discomfort" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "receiving special care with a ventilator" + }, + { + "role": "assistant", + "content": "Hospital" + }, + { + "role": "user", + "content": "receiving vaccines to protect against Covid19" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "feeling a sense of relief" + }, + { + "role": "assistant", + "content": "Home" + }, + { + "role": "user", + "content": "starting to experience symptoms" + }, + { + "role": "assistant", + "content": "Home" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 6, "fields": { - "name": "METRIC_ACTIVITY_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text categorization and your job is to take a given bulletpoint and to categorize it into 'No Relevance', 'Low Relevance', 'Moderate Relevance' or 'High Relevance'. It is really important, that that relevance category is correct. Category definition: No Relevance: Events or actions that are not connected to the progression or impact of the disease of the patient in any way. Low Relevance: Events or actions that have limited potential to affect the progression of the disease of the patient and hold minimal significance in its course. Moderate Relevance: Events or actions that possess some potential to influence the disease's progression of the patient but may not be critical to its outcome. High Relevance: Events or actions that hold substantial potential to impact the disease's course of the patient and are crucial in understanding its trajectory." - }, - { - "role": "user", - "content": "receiving support from my children" - }, - { - "role": "assistant", - "content": "Low Relevance" - }, - { - "role": "user", - "content": "taking medicine" - }, - { - "role": "assistant", - "content": "High Relevance" - }, - { - "role": "user", - "content": "eating chips" - }, - { - "role": "assistant", - "content": "No Relevance" - }, - { - "role": "user", - "content": "starting to experience symptoms" - }, - { - "role": "assistant", - "content": "High Relevance" - }, - { - "role": "user", - "content": "feeling side effects from vaccination" - }, - { - "role": "assistant", - "content": "Moderate Relevance" - } - ] + "name": "METRIC_ACTIVITY_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text categorization and your job is to take a given bulletpoint and to categorize it into 'No Relevance', 'Low Relevance', 'Moderate Relevance' or 'High Relevance'. It is really important, that that relevance category is correct. Category definition: No Relevance: Events or actions that are not connected to the progression or impact of the disease of the patient in any way. Low Relevance: Events or actions that have limited potential to affect the progression of the disease of the patient and hold minimal significance in its course. Moderate Relevance: Events or actions that possess some potential to influence the disease's progression of the patient but may not be critical to its outcome. High Relevance: Events or actions that hold substantial potential to impact the disease's course of the patient and are crucial in understanding its trajectory." + }, + { + "role": "user", + "content": "receiving support from my children" + }, + { + "role": "assistant", + "content": "Low Relevance" + }, + { + "role": "user", + "content": "taking medicine" + }, + { + "role": "assistant", + "content": "High Relevance" + }, + { + "role": "user", + "content": "eating chips" + }, + { + "role": "assistant", + "content": "No Relevance" + }, + { + "role": "user", + "content": "starting to experience symptoms" + }, + { + "role": "assistant", + "content": "High Relevance" + }, + { + "role": "user", + "content": "feeling side effects from vaccination" + }, + { + "role": "assistant", + "content": "Moderate Relevance" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 7, "fields": { - "name": "METRIC_TIMESTAMP_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text and to check if a given start date and end date of a given bulletpoint are correct. Correct is a start and end date in the format YYYYMMDDTHHMM if the date is appearing in the patient journey related to bulletpoint. If the start date and end date appearing in the context of the bulletpoint, you should output True. If there is another start or end date in the patient journey, the given timestamps are wrong and you should output False. If the start or end date is not appearing in the patient journey, it could be that the timestamp is estimated. In this case check if the estimation is reasonable and output True if it is and False if it is not." - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210721T0000" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210724T0000" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 07/21/2021\nEnd Date: 20210721T0000" - }, - { - "role": "assistant", - "content": "False" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: N/A" - }, - { - "role": "assistant", - "content": "False" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211104T0000\nEnd Date: 20211107T0000" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211201T0000\nEnd Date: 20211204T0000" - }, - { - "role": "assistant", - "content": "False" - } - ] + "name": "METRIC_TIMESTAMP_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text and to check if a given start date and end date of a given bulletpoint are correct. Correct is a start and end date in the format YYYYMMDDTHHMM if the date is appearing in the patient journey related to bulletpoint. If the start date and end date appearing in the context of the bulletpoint, you should output True. If there is another start or end date in the patient journey, the given timestamps are wrong and you should output False. If the start or end date is not appearing in the patient journey, it could be that the timestamp is estimated. In this case check if the estimation is reasonable and output True if it is and False if it is not." + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210721T0000" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210724T0000" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 07/21/2021\nEnd Date: 20210721T0000" + }, + { + "role": "assistant", + "content": "False" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: N/A" + }, + { + "role": "assistant", + "content": "False" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211104T0000\nEnd Date: 20211107T0000" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211201T0000\nEnd Date: 20211204T0000" + }, + { + "role": "assistant", + "content": "False" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 8, "fields": { - "name": "COMPARE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": " You are an expert in text understanding and your job is to understand the semantical meaning of bulletpoints and compare the semantic to each other. So you take two bulletpoints and check if they are semantically similar. You should return True if you think they are similar and False if you don't." - }, - { - "role": "user", - "content": "First: receiving support from my children\nSecond: taking medicine" - }, - { - "role": "assistant", - "content": "False" - }, - { - "role": "user", - "content": "First: visiting doctor's\nSecond: going to the doctor" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "First: experiencing covid 19 symptoms\nSecond: first symptoms of covid 19" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "First: experiencing first covid 19 symptoms\nSecond: experiencing worse symptoms" - }, - { - "role": "assistant", - "content": "False" - } - ] + "name": "COMPARE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": " You are an expert in text understanding and your job is to understand the semantical meaning of bulletpoints and compare the semantic to each other. So you take two bulletpoints and check if they are semantically similar. You should return True if you think they are similar and False if you don't." + }, + { + "role": "user", + "content": "First: receiving support from my children\nSecond: taking medicine" + }, + { + "role": "assistant", + "content": "False" + }, + { + "role": "user", + "content": "First: visiting doctor's\nSecond: going to the doctor" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "First: experiencing covid 19 symptoms\nSecond: first symptoms of covid 19" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "First: experiencing first covid 19 symptoms\nSecond: experiencing worse symptoms" + }, + { + "role": "assistant", + "content": "False" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 9, "fields": { - "name": "COHORT_TAG_MESSAGES", - "category": "few-shot", - "text": [ - [ - "condition", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the illness it is about." - }, - { - "role": "user", - "content": "In July I got infected with Covid-19 which resulted in similar symptoms like a heavy flu." - }, - { - "role": "assistant", - "content": "Covid-19" - }, - { - "role": "user", - "content": "I had a heavy flu in July." - }, - { - "role": "assistant", - "content": "Flu" - }, - { - "role": "user", - "content": "Last year I was feeling really well, when all of a sudden I had severe breathtaking problems and high fever. I thought it was a flu, but it turned out to be Covid-19." - }, - { - "role": "assistant", - "content": "Covid-19" - } - ], - [ - "sex", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the gender of the author. If the gender isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'." - }, - { - "role": "user", - "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." - }, - { - "role": "assistant", - "content": "male" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin with my boyfriend. When I got Covid-19 last year I was really worried about my job and my boyfriend." - }, - { - "role": "assistant", - "content": "female" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 that struck me as a mother of two really heavily." - }, - { - "role": "assistant", - "content": "female" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. As a divorced father I only see my boy once every month and now I couldn't even do that." - }, - { - "role": "assistant", - "content": "male" - } - ], - [ - "age", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the age of the author. If the gender isn't clear, you should take the context into account. Young means 25, middle aged 50 and old 75. Only if the context doesn't help, you should return 'N/A'." - }, - { - "role": "user", - "content": "I am a 22 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." - }, - { - "role": "assistant", - "content": "22" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." - }, - { - "role": "assistant", - "content": "25" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "I am an old man, so Covid-19 wasn't all easy on me." - }, - { - "role": "assistant", - "content": "75" - } - ], - [ - "origin", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the origin country of the author. If the origin isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'." - }, - { - "role": "user", - "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." - }, - { - "role": "assistant", - "content": "United States of America" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." - }, - { - "role": "assistant", - "content": "Germany" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - } - ], - [ - "preexisting_condition", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract previous diseases of the author. These diseases have to be EXPLICITLY MENTIONED. And they have to have occured BEFORE the illness the text is about!" - }, - { - "role": "user", - "content": "I got Covid-19 last year, which was hard since I since ever had to fight Asthma." - }, - { - "role": "assistant", - "content": "Asthma" - }, - { - "role": "user", - "content": "I infected me with Covid-19 right after I recovered from a heavy cold." - }, - { - "role": "assistant", - "content": "Cold" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "I got Covid-19 last year after I already got it right at the start in 2020." - }, - { - "role": "assistant", - "content": "Covid-19" - } + "name": "COHORT_TAG_MESSAGES", + "category": "few-shot", + "text": [ + [ + "condition", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the illness it is about. Return only the condition using two words a most." + }, + { + "role": "user", + "content": "In July I got infected with Covid-19 which resulted in similar symptoms like a heavy flu." + }, + { + "role": "assistant", + "content": "Covid-19" + }, + { + "role": "user", + "content": "I had a heavy flu in July." + }, + { + "role": "assistant", + "content": "Flu" + }, + { + "role": "user", + "content": "Last year I was feeling really well, when all of a sudden I had severe breathtaking problems and high fever. I thought it was a flu, but it turned out to be Covid-19." + }, + { + "role": "assistant", + "content": "Covid-19" + } + ], + [ + "gender", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the gender of the author. If the gender isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'." + }, + { + "role": "user", + "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." + }, + { + "role": "assistant", + "content": "male" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin with my boyfriend. When I got Covid-19 last year I was really worried about my job and my boyfriend." + }, + { + "role": "assistant", + "content": "female" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 that struck me as a mother of two really heavily." + }, + { + "role": "assistant", + "content": "female" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. As a divorced father I only see my boy once every month and now I couldn't even do that." + }, + { + "role": "assistant", + "content": "male" + } + ], + [ + "age", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the age of the author. If the gender isn't clear, you should take the context into account. Young means 25, middle aged 50 and old 75. Only if the context doesn't help, you should return 'N/A'." + }, + { + "role": "user", + "content": "I am a 22 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." + }, + { + "role": "assistant", + "content": "22" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." + }, + { + "role": "assistant", + "content": "25" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "I am an old man, so Covid-19 wasn't all easy on me." + }, + { + "role": "assistant", + "content": "75" + } + ], + [ + "origin", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the origin country of the author. If the origin isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'. Return only the country of origin using two words a most." + }, + { + "role": "user", + "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." + }, + { + "role": "assistant", + "content": "United States of America" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." + }, + { + "role": "assistant", + "content": "Germany" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + } + ], + [ + "preexisting_condition", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract previous diseases of the author. These diseases have to be EXPLICITLY MENTIONED. And they have to have occured BEFORE the illness the text is about! Return only the preexisting disease using two words a most." + }, + { + "role": "user", + "content": "I got Covid-19 last year, which was hard since I since ever had to fight Asthma." + }, + { + "role": "assistant", + "content": "Asthma" + }, + { + "role": "user", + "content": "I infected me with Covid-19 right after I recovered from a heavy cold." + }, + { + "role": "assistant", + "content": "Cold" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "I got Covid-19 last year after I already got it right at the start in 2020." + }, + { + "role": "assistant", + "content": "Covid-19" + } + ] ] - ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 10, "fields": { - "name": "PREPROCESSING_SPELLCHECK", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis with a focus on spelling accuracy. Your task is to identify any spelling errors in the provided text and correct them. Ensure the corrected text is accurate and readable. Please make sure to give out the full text without shorten it." - }, - { - "role": "user", - "content": "I remeber the day I first learnt about the importnce of spellchek. It was an eye-opener for me." - }, - { - "role": "assistant", - "content": "I remember the day I first learned about the importance of spellcheck. It was an eye-opener for me." - } - ] + "name": "PREPROCESSING_SPELLCHECK", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis with a focus on spelling accuracy. Your task is to identify any spelling errors in the provided text and correct them. Ensure the corrected text is accurate and readable. Please make sure to give out the full text without shortening it." + }, + { + "role": "user", + "content": "I remeber the day I first learnt about the importnce of spellchek. It was an eye-opener for me." + }, + { + "role": "assistant", + "content": "I remember the day I first learned about the importance of spellcheck. It was an eye-opener for me." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 11, "fields": { - "name": "PREPROCESSING_PUNCTUATION", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis with a focus on grammatical accuracy, specifically punctuation and comma usage. Your task is to identify any punctuation or comma errors in the provided text and correct them. Ensure the corrected text is accurate, readable, and follows standard punctuation rules. Please make sure to give out the full text without shortening it." - }, - { - "role": "user", - "content": "Despite the rainy weather many people attended the outdoor concert, which, was surprising. The band played hit after hit, and the crowd's enthusiasm, was infectious even the most reserved attendees found themselves dancing." - }, - { - "role": "assistant", - "content": "Despite the rainy weather, many people attended the outdoor concert, which was surprising. The band played hit after hit, and the crowd's enthusiasm was infectious; even the most reserved attendees found themselves dancing." - } - ] + "name": "PREPROCESSING_PUNCTUATION", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis with a focus on grammatical accuracy, specifically punctuation and comma usage. Your task is to identify any punctuation or comma errors in the provided text and correct them. Ensure the corrected text is accurate, readable, and follows standard punctuation rules. Please make sure to give out the full text without shortening it." + }, + { + "role": "user", + "content": "Despite the rainy weather many people attended the outdoor concert, which, was surprising. The band played hit after hit, and the crowd's enthusiasm, was infectious even the most reserved attendees found themselves dancing." + }, + { + "role": "assistant", + "content": "Despite the rainy weather, many people attended the outdoor concert, which was surprising. The band played hit after hit, and the crowd's enthusiasm was infectious; even the most reserved attendees found themselves dancing." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 12, "fields": { - "name": "PREPROCESSING_IDENTIFY_TIMESTAMPS", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis. Your task is to identify and extract any timestamps (specific dates, months, years, recognized holidays, timeframes like '12 weeks later', or periods between specific dates) mentioned in the context of an individual experiencing symptoms or being diagnosed with an illness. Highlight these timestamps within the text by surrounding them with $$$ symbols. Ensure the full text is presented without any omissions, and only the timestamps are highlighted in this manner." - }, - { - "role": "user", - "content": "I started feeling unwell around the middle of March 2021. The symptoms were quite severe by the 20th of March, which is when I decided to get tested. The test results came back positive for Covid-19 on March 22nd, 2021." - }, - { - "role": "assistant", - "content": "I started feeling unwell around the middle of $$$March 2021$$$. The symptoms were quite severe by the $$$20th of March$$$, which is when I decided to get tested. The test results came back positive for Covid-19 on $$$March 22nd, 2021$$$." - }, - { - "role": "user", - "content": "I started feeling unusually fatigued right before Thanksgiving 2020. The fatigue worsened over the holiday, and by the following Monday, I had developed a fever. I was tested for Covid-19 two days later and received a positive result on November 30th, 2020." - }, - { - "role": "assistant", - "content": "I started feeling unusually fatigued right before $$$Thanksgiving 2020$$$. The fatigue worsened over the holiday, and by the following Monday, I had developed a fever. I was tested for Covid-19 $$$two days later$$$ and received a positive result on $$$November 30th, 2020$$$." - } - ] + "name": "PREPROCESSING_TIME_IDENTIFICATION", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis. Your task is to identify timestamps of events in the given text. Those can be: specific dates, months, years, recognized holidays, relative dates like '12 weeks later' or 'as the week progressed'. Highlight these timestamps within the text by surrounding them with $$$ symbols. Return the full text with the highlighted time specifications." + }, + { + "role": "user", + "content": "I started feeling unwell around the middle of March 2021. The symptoms were quite severe by the 20th of March, which is when I decided to get tested. The test results came back positive for Covid-19 on March 22nd, 2021." + }, + { + "role": "assistant", + "content": "I started feeling unwell around the $$$middle of March 2021$$$. The symptoms were quite severe by the $$$20th of March$$$, which is when I decided to get tested. The test results came back positive for Covid-19 on $$$March 22nd, 2021$$$." + }, + { + "role": "user", + "content": "I started feeling unusually fatigued right before Thanksgiving 2020. The fatigue worsened over the holiday, and by the following Monday, I had developed a fever. I was tested for Covid-19 two days later and received a positive result on November 30th, 2020." + }, + { + "role": "assistant", + "content": "I started feeling unusually fatigued right before $$$Thanksgiving 2020$$$. The fatigue worsened over the holiday, and by the $$$following Monday$$$, I had developed a fever. I was tested for Covid-19 $$$two days later$$$ and received a positive result on $$$November 30th, 2020$$$." + }, + { + "role": "user", + "content": "Two days after new years eve, I started feeling sick. In the following week my symptoms worsened." + }, + { + "role": "assistant", + "content": "$$$Two days after$$$ $$$new years eve$$$, I started feeling sick. $$$In the following week$$$ my symptoms worsened." + }, + { + "role": "user", + "content": "I was tested positive for Covid-19 on by birthday, the 12th of June. The day before I already felt light-headed. Two weeks after my birthday, I was admitted to the hospital." + }, + { + "role": "assistant", + "content": "I was tested positive for Covid-19 on by birthday, $$$the 12th of June$$$. $$$The day before$$$ I already felt light-headed. $$$Two weeks after my birthday$$$, I was admitted to the hospital." + }, + { + "role": "user", + "content": "Over the course of the next few days, my symptoms progressed, and I started experiencing high fever. After two weeks of isolation, I finally started feeling better." + }, + { + "role": "assistant", + "content": "$$$Over the course of the next few days$$$, my symptoms progressed, and I started experiencing high fever. $$$After two weeks$$$ of isolation, I finally started feeling better." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 13, "fields": { - "name": "PREPROCESSING_TRANSFORM_TIMESTAMPS", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis and date formatting. Your task is to identify any timestamps related to when an individual experienced symptoms or was diagnosed with an illness. Convert and present these timestamps in the specific format of YYYY/MM/DD. All relevant time specifications are already highlighted with $$$ $$$. To guarantee the completeness of the date you must make assumptions about the year, month and day based on the context. If the time specification is a duration, based on context, you must make assumptions about the start date of the duration. If there is no information about the year, month or day, you are allowed to assume the current year, month or day. Ensure the full text is presented without any omissions, and try to transform every timestamps as concrete as possible. Please make sure to give out the full text without shortening it." - }, - { - "role": "user", - "content": "I noticed the first symptoms shortly after my birthday in $$$April$$$, and exactly $$$12 weeks later$$$, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on $$$August 7th$$$. In $$$early 2025$$$, it will be gone!" - }, - { - "role": "assistant", - "content": "I noticed the first symptoms shortly after my birthday on 2024/04/01, and exactly 12 weeks later, on 2024/06/24, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on 2024/08/07. It will be gone on 2025/01/01!" - }, - { - "role": "user", - "content": "During the period between $$$the 01.02 and the 03.02$$$, I felt unusually tired, but I thought it was just stress. However, after attending a large event $$$two weeks later$$$, I developed a fever and was tested positive for the flu." - }, - { - "role": "assistant", - "content": "During the period between 2024/02/01 and 2024/02/03, I felt unusually tired, but I thought it was just stress. However, after attending a large event two weeks later, on 2024/02/17, I developed a fever and was tested positive for the flu." - } - ] + "name": "PREPROCESSING_TRANSFORM_TIMESTAMPS", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and date formatting. Convert these timestamps in the specific format of YYYY/MM/DD. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. To guarantee the completeness of the date you must make assumptions about the year, month and day based on the context. If the time specification is a duration, based on context, you must make assumptions about the start date of the duration. If there is no information about the year, month or day, you are allowed to assume the current year, month or day. Ensure the full text is presented without any omissions and transform every timestamps as concrete as possible. Please make sure to give out the full text without shortening it." + }, + { + "role": "user", + "content": "I noticed the first symptoms shortly after my birthday in $$$April$$$, and exactly $$$12 weeks later$$$, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on $$$August 7th$$$. In $$$early 2025$$$, it will be gone!" + }, + { + "role": "assistant", + "content": "I noticed the first symptoms shortly after my birthday on 2024/04/01, and exactly 12 weeks later, on 2024/06/24, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on 2024/08/07. It will be gone on 2025/01/01!" + }, + { + "role": "user", + "content": "During the period between $$$the 01.02 and the 03.02$$$, I felt unusually tired, but I thought it was just stress. However, after attending a large event $$$two weeks later$$$, I developed a fever and was tested positive for the flu." + }, + { + "role": "assistant", + "content": "During the period between 2024/02/01 and 2024/02/03, I felt unusually tired, but I thought it was just stress. However, after attending a large event two weeks later, on 2024/02/17, I developed a fever and was tested positive for the flu." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 14, "fields": { - "name": "PREPROCESSING_TIME_CALCULATION", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis and date calculations. Your task is to identify timestamps related to health events or diagnoses and convert these into concrete dates in the format of YYYY/MM/DD. For relative timestamps (like 'a few weeks after' or 'months before'), calculate the exact dates based on provided or assumed known dates. Ensure the text is complete without omission, with all relevant timestamps accurately transformed to the specified format. Please make sure to give out the full text without shortening it." - }, - { - "role": "user", - "content": "After experiencing severe headaches starting in mid-$$$March 2022$$$, I went to see a neurologist. The MRI scan scheduled $$$three weeks later$$$ confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until $$$four months later$$$. During a follow-up visit $$$two months after my recovery$$$, my doctor confirmed that my condition had improved significantly." - }, - { - "role": "assistant", - "content": "After experiencing severe headaches starting on 2022/03/15, I went to see a neurologist. The MRI scan scheduled on 2022/04/05 confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until 2022/08/05. During a follow-up visit on 2022/10/05, my doctor confirmed that my condition had improved significantly." - }, - { - "role": "user", - "content": "Early July 2020, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And in $$$early August$$$ I had lost my sense of taste." - }, - { - "role": "assistant", - "content": "On the 2020/06/01, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And on 2020/08/01 I had lost my sense of taste." - } - ] + "name": "PREPROCESSING_TIME_RELATIVE", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and temporal relations. Your task is to convert relative time information (like 'a few weeks after' or 'in the following days') into concrete dates in the format of YYYY/MM/DD. For relative timestamps like 'two days after my birthday' or 'the day before that', calculate the exact dates based on other dates in the text. If this information is missing, make reasonable assumptions. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. Return the full text with the transformed timestamps." + }, + { + "role": "user", + "content": "After experiencing severe headaches starting on 2022/03/15$, I went to see a neurologist. The MRI scan scheduled $$$three weeks later$$$ confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until $$$four months later$$$. During a follow-up visit $$$two weeks after my recovery$$$, my doctor confirmed that my condition had improved significantly." + }, + { + "role": "assistant", + "content": "After experiencing severe headaches starting on 2022/03/15, I went to see a neurologist. The MRI scan scheduled on 2022/04/05 confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until 2022/08/05. During a follow-up visit on 2022/08/19, my doctor confirmed that my condition had improved significantly." + }, + { + "role": "user", + "content": "On the 2020/01/06, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And in $$$the following weeks$$$ I had lost my sense of taste." + }, + { + "role": "assistant", + "content": "On the 2020/06/01, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And during 2020/06/01 and 2020/06/15 I lost my sense of taste." + }, + { + "role": "user", + "content": "I was diagnosed with diabetes on 2023/05/01. $$$A few days later$$$, I began experiencing extreme fatigue and dizziness." + }, + { + "role": "assistant", + "content": "I was diagnosed with diabetes on 2023/05/01. On 2023/05/04, I began experiencing extreme fatigue and dizziness." + }, + { + "role": "user", + "content": "I started chemotherapy on 2022/09/15. $$$Two weeks after the treatment$$$, my hair started falling out." + }, + { + "role": "assistant", + "content": "I started chemotherapy on 2022/09/15. On 2022/09/29, my hair started falling out." + }, + { + "role": "user", + "content": "I had surgery to remove a tumor on 2023/03/10. $$$In the following week$$$, I developed an infection at the surgical site." + }, + { + "role": "assistant", + "content": "I had surgery to remove a tumor on 2023/03/10. Between 2023/03/10 and 2023/0317, I developed an infection at the surgical site." + }, + { + "role": "user", + "content": "I began taking new medication for my condition on 2022/07/01. $$$A month after starting the medication$$$, I noticed significant improvements in my symptoms." + }, + { + "role": "assistant", + "content": "I began taking new medication for my condition on 2022/07/01. On 2022/08/01, I noticed significant improvements in my symptoms." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 15, "fields": { - "name": "PREPROCESSING_TIME_INTERPRETATION", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis with a specialization in date formatting and interpretation. Your task is to transform general time references related to health events or any other context into specific dates in the format of YYYY/MM/DD. Specifically, convert 'early [month]' to the 1st of the month, 'mid [month]' to the 15th of the month, and 'end of [month]' to the last day of the month (use 30 for April, June, September, and November; 31 for January, March, May, July, August, October, and December; and 28 or 29 for February, depending on leap years). If the year is not mentioned, assume the current year. Provide the full text without omission, ensuring all general time references are accurately transformed into the specified format. Example transformations: 'early January 2020' becomes 2020/01/01, 'mid January 2020' becomes 2020/01/15, and 'end of January 2020' becomes 2020/01/31." - }, - { - "role": "user", - "content": "In early January, I noticed a persistent cough. By mid January, the cough had worsened, leading me to seek medical advice. A specialist finally saw me at the end of January, diagnosing me with a chronic condition." - }, - { - "role": "assistant", - "content": "In 2020/01/01, I noticed a persistent cough. By 2020/01/15, the cough had worsened, leading me to seek medical advice. A specialist finally saw me at 2020/01/31, diagnosing me with a chronic condition." - }, - { - "role": "user", - "content": "In the middle of 2023 I got a stroke. By the end of the year I had recovered. But in early 2024 I had a relapse." - }, - { - "role": "assistant", - "content": "On 2023/06/01, I got a stroke. By 2023/12/01, I had recovered, On 2024/01/01, I had a relapse." - } - ] + "name": "PREPROCESSING_TIME_GENERAL", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and date formatting. Your task is to transform unspecific time references into specific dates in the format of YYYY/MM/DD. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. Specifically, convert 'early [month]' to the 1st of the month, 'mid [month]' to the 15th of the month, and 'end of [month]' to the last day of the month (use 30 for April, June, September, and November; 31 for January, March, May, July, August, October, and December; and 28 or 29 for February, depending on leap years). If the year is not mentioned, assume the current year. Return the full text, with the transformed timestamps." + }, + { + "role": "user", + "content": "$$$In early January$$$, I noticed a persistent cough. By $$$mid January$$$, the cough had worsened, leading me to seek medical advice. A specialist finally saw me $$$at the end of January$$$, diagnosing me with a chronic condition." + }, + { + "role": "assistant", + "content": "In 2020/01/01, I noticed a persistent cough. By 2020/01/15, the cough had worsened, leading me to seek medical advice. A specialist finally saw me at 2020/01/31, diagnosing me with a chronic condition." + }, + { + "role": "user", + "content": "In the $$$middle of 2023$$$ I got a stroke. By the $$$end of the year$$$ I had recovered. But in $$$early 2024$$$ I had a relapse." + }, + { + "role": "assistant", + "content": "On 2023/06/01, I got a stroke. By 2023/12/01, I had recovered. On 2024/01/01, I had a relapse." + }, + { + "role": "user", + "content": "I plan to start my new fitness routine in $$$early February 2024$$$. By $$$mid February$$$, I hope to see some initial results. By the $$$end of February$$$, I want to have a consistent habit." + }, + { + "role": "assistant", + "content": "I plan to start my new fitness routine on 2024/02/01. By 2024/02/15, I hope to see some initial results. By 2024/02/29, I want to have a consistent habit." + }, + { + "role": "user", + "content": "I usually experience seasonal allergies in $$$early April$$$. By $$$mid April$$$, they become severe, and by the $$$end of May$$$, I need to take stronger medication." + }, + { + "role": "assistant", + "content": "I usually experience seasonal allergies on 2024/04/01. By 2024/04/15, they become severe, and by 2024/05/31, I need to take stronger medication." + }, + { + "role": "user", + "content": "I started feeling unwell around the $$$middle of March 2021$$$. The symptoms were quite severe by the $$$20th of March$$$, which is when I decided to get tested. The test results came back positive for Covid-19 on $$$March 22nd, 2021$$$." + }, + { + "role": "assistant", + "content": "I started feeling unwell around the 2021/03/15. The symptoms were quite severe by the 2021/03/20, which is when I decided to get tested. The test results came back positive for Covid-19 on 2021/03/22." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 16, "fields": { - "name": "CREATE_PATIENT_JOURNEY", - "category": "zero-shot", - "text": [ - { - "role": "user", - "content": "Please outline the course of your Covid-19 infection, what you did (and when you did that) because of it and which doctors you may consulted. Please give some information about the time, in a few cases directly as a date and in the other as something in the lines of 'in the next days', 'the week after that' or similar. Give your outline as a continuous text. Also include if you later went for getting a vaccine and if so, how often. You don't have to include deals about who you are. Please include 100 to 400 words, but not more than 400." - } - ] + "name": "CREATE_PATIENT_JOURNEY", + "category": "zero-shot", + "text": [ + { + "role": "user", + "content": "Please outline the course of your Covid-19 infection, what you did (and when you did that) because of it and which doctors you may consulted. Please give some information about the time, in a few cases directly as a date and in the other as something in the lines of 'in the next days', 'the week after that' or similar. Give your outline as a continuous text. Also include if you later went for getting a vaccine and if so, how often. You don't have to include deals about who you are. Please include 100 to 400 words, but not more than 400." + } + ] + } +}, +{ + "model": "extraction.prompt", + "pk": 17, + "fields": { + "name": "PREPROCESSING_TIME_HOLIDAYS", + "category": "few shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and date formatting. Your task is to identify time information that is related to holidays and to convert them into concrete timestamps. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. There are also time specifications highlighted, that are not holidays, ignore them. If no information about the year is given, assume the current year. Use the format YYYY/MM/DD. Only transform the time information that is related to holidays. Keep all other time information highlighted." + }, + { + "role": "user", + "content": "$$$Two days after$$$ $$$new years eve$$$, I started feeling sick." + }, + { + "role": "assistant", + "content": "$$$Two days after$$$ 2023/12/31, I started feeling sick." + }, + { + "role": "user", + "content": "I started feeling unusually fatigued right before $$$Thanksgiving 2020$$$." + }, + { + "role": "assistant", + "content": "I started feeling unusually fatigued right before 2020/11/26." + }, + { + "role": "assistant", + "content": "I always feel excited on 2023/10/31 and then exhausted the next day." + }, + { + "role": "user", + "content": "I visited my grandparents $$$two weeks before$$$ $$$Christmas$$$ last year." + }, + { + "role": "assistant", + "content": "I visited my grandparents $$$two weeks before$$$ 2022/12/25 last year." + }, + { + "role": "user", + "content": "We planned a big family reunion for $$$Easter$$$, but it got postponed." + }, + { + "role": "assistant", + "content": "We planned a big family reunion for 2023/04/09, but it got postponed." + }, + { + "role": "user", + "content": "My annual check-up is always scheduled for $$$mid June$$$, just after $$$Flag Day$$$." + }, + { + "role": "assistant", + "content": "My annual check-up is always scheduled for $$$mid June$$$, just after 2023/06/14." + } + ] + } +}, +{ + "model": "extraction.prompt", + "pk": 18, + "fields": { + "name": "PREPROCESSING_TIME_PROPAGATE", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and information extraction. Your task is to propagate temporal information throughout a given text. For each sentence, append the latest known timestamp in the format --YYYY/MM/DD-- before the period. Instructions:1. Identify the most recent date mentioned up to and including the current sentence. This includes relative dates like 'after two weeks' or 'in the following das'. Translate these specifications into concrete dates, if you encounter them. 2. Only if no new temporal information is given, reuse the added timestamp from the previous sentence. 3.Append this date at the end of the current sentence before the period." + }, + { + "role": "user", + "content": "0: After experiencing the first symptoms of Covid-19 on 2020/09/13, I isolated myself at home.\n1: My symptoms started with a mild fever and worsened over the following days.\n2: By 2020/09/15, I developed difficulty breathing and consulted a doctor via telemedicine.\n3: The doctor advised me to monitor my symptoms closely and prescribed medications." + }, + { + "role": "assistant", + "content": "0: After experiencing the first symptoms of Covid-19 on 2020/09/13, I isolated myself at home--2020/09/13--.1: My symptoms started with a mild fever and worsened over the following days--2020/09/13--.2: By 2020/09/15, I developed difficulty breathing and consulted a doctor via telemedicine--2020/09/15--.3: The doctor advised me to monitor my symptoms closely and prescribed medications--2020/09/15--." + }, + { + "role": "user", + "content": "15: After a week in the hospital, my breathing got better, and they let me go home on 2022/01/25. 16: I was super happy to be back in my own bed, but I still felt exhausted." + }, + { + "role": "assistant", + "content": "15: After a week in the hospital, my breathing got better, and they let me go home on 2022/01/25--2022/01/25--.16: I was super happy to be back in my own bed, but I still felt exhausted--2022/01/25--." + } + ] } - } +} ] diff --git a/tracex_project/extraction/forms.py b/tracex_project/extraction/forms.py index 7b3cd92e..41a3d700 100644 --- a/tracex_project/extraction/forms.py +++ b/tracex_project/extraction/forms.py @@ -9,7 +9,7 @@ class JourneyUploadForm(forms.ModelForm): - """Form for uploading your own patient journey.""" + """Form for uploading your own Patient Journey.""" class Meta: """ @@ -32,13 +32,13 @@ class Meta: } widgets = { "name": forms.TextInput( - attrs={"placeholder": "Name for your patient journey"} + attrs={"placeholder": "Name for your Patient Journey"} ), } ALLOWED_FILE_TYPES = ["txt"] file = forms.FileField( - label="Upload your patient journey", + label="Upload your Patient Journey", help_text=f"Please upload a file of type {ALLOWED_FILE_TYPES}.", required=True, ) @@ -46,10 +46,11 @@ class Meta: class JourneySelectForm(forms.Form): - """Django form for selecting a patient journey from available choices in the database.""" + """Django form for selecting a Patient Journey from available choices in the database.""" selected_patient_journey = forms.ChoiceField( choices=[], + label="Selected Patient Journey:", widget=forms.Select(attrs={"id": "patient-journey-select"}), ) @@ -62,7 +63,7 @@ def __init__(self, *args, **kwargs): @staticmethod def get_patient_journey_choices() -> List[Tuple[str, str]]: - """Returns a list of tuples containing the names of all patient journeys from the database.""" + """Returns a list of tuples containing the names of all Patient Journeys from the database.""" patient_journeys = PatientJourney.manager.all() choices = [ (patient_journey.name, patient_journey.name) diff --git a/tracex_project/extraction/logic/module.py b/tracex_project/extraction/logic/module.py index f739c81c..d876d9b5 100644 --- a/tracex_project/extraction/logic/module.py +++ b/tracex_project/extraction/logic/module.py @@ -15,7 +15,7 @@ def __init__(self): Initializes a module with the following parameters. name: The name of the module. description: A description of what the module does. - patient_journey: The patient journey most modules operate on. + patient_journey: The Patient Journey most modules operate on. result: The result that the module provides. """ self.name = None @@ -37,8 +37,8 @@ def execute( Keyword arguments: _input -- Any additional input to the module. - patient_journey -- The patient journey as text. - patient_journey_sentences -- The same patient journey as a list of sentences. + patient_journey -- The Patient Journey as text. + patient_journey_sentences -- The same Patient Journey as a list of sentences. """ self.patient_journey = patient_journey self.patient_journey_sentences = patient_journey_sentences @@ -57,8 +57,8 @@ def execute_and_save( Executes the logic of the module and saves the result to the database. Override this to define your own module. Keyword arguments: - patient_journey -- The patient journey as text. - patient_journey_sentences -- The same patient journey as a list of sentences. + patient_journey -- The Patient Journey as text. + patient_journey_sentences -- The same Patient Journey as a list of sentences. """ self.patient_journey = patient_journey self.patient_journey_sentences = patient_journey_sentences diff --git a/tracex_project/extraction/logic/modules/module_activity_labeler.py b/tracex_project/extraction/logic/modules/module_activity_labeler.py index 405fdf00..bc8e97ec 100644 --- a/tracex_project/extraction/logic/modules/module_activity_labeler.py +++ b/tracex_project/extraction/logic/modules/module_activity_labeler.py @@ -1,4 +1,4 @@ -"""This is the module that extracts the activity labels from the patient journey.""" +"""This is the module that extracts the activity labels from the Patient Journey.""" from pathlib import Path from typing import List, Optional import pandas as pd @@ -12,13 +12,13 @@ class ActivityLabeler(Module): """ - This is the module that starts the pipeline with structuring the patient journey in activities. + This is the module that starts the pipeline with structuring the Patient Journey in activities. """ def __init__(self): super().__init__() self.name = "Activity Labeler" - self.description = "Extracts the activity labels from a patient journey." + self.description = "Extracts the activity labels from a Patient Journey." @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( @@ -29,9 +29,9 @@ def execute( cohort=None, ) -> pd.DataFrame: """ - Extracts the activity labels from the patient journey with the following steps: - 1. Number the patient journey sentences to enable selecting a specific range of sentences. - 2. Extract the activity labels from the patient journey using chatgpt. + Extracts the activity labels from the Patient Journey with the following steps: + 1. Number the Patient Journey sentences to enable selecting a specific range of sentences. + 2. Extract the activity labels from the Patient Journey using chatgpt. """ super().execute( _input, @@ -45,14 +45,18 @@ def execute( patient_journey_numbered: str = self.__number_patient_journey_sentences( patient_journey_sentences ) - activity_labels: pd.DataFrame = self.__extract_activities(patient_journey_numbered, condition) + activity_labels: pd.DataFrame = self.__extract_activities( + patient_journey_numbered=patient_journey_numbered, + condition=condition, + number_of_sentences=len(patient_journey_sentences), + ) return activity_labels @staticmethod def __number_patient_journey_sentences(patient_journey_sentences: List[str]) -> str: """ - Number the patient journey sentences as one String in the format: + Number the Patient Journey sentences as one String in the format: 1: ... 2: ... And so on. @@ -65,21 +69,31 @@ def __number_patient_journey_sentences(patient_journey_sentences: List[str]) -> return patient_journey_numbered @staticmethod - def __extract_activities(patient_journey_numbered: str, condition: Optional[str]) -> pd.DataFrame: + def __extract_activities( + patient_journey_numbered: str, + condition: Optional[str], + number_of_sentences: int, + ) -> pd.DataFrame: """ - Converts a patient journey, where every sentence is numbered, to a DataFrame with the activity labels by - extracting the activity labels from the patient journey. + Converts a Patient Journey, where every sentence is numbered, to a DataFrame with the activity labels by + extracting the activity labels from the Patient Journey. """ column_name = "activity" messages = Prompt.objects.get(name="TEXT_TO_ACTIVITY_MESSAGES").text - user_message: List[str] = patient_journey_numbered + user_message: str = patient_journey_numbered if condition is not None: user_message = f"Focus on those events that are related to the course of the disease of {condition}.\n\n\ {user_message}" messages.append({"role": "user", "content": user_message}) activity_labels = u.query_gpt(messages).split("\n") df = pd.DataFrame(activity_labels, columns=[column_name]) - df[["activity", "sentence_id"]] = df["activity"].str.split(" #", expand=True) + try: + df[["activity", "sentence_id"]] = df["activity"].str.split( + " #", expand=True + ) + except ValueError: + scaling_factor = df.shape[0] / (number_of_sentences - 1) + df["sentence_id"] = int(df.reset_index().index * scaling_factor) return df diff --git a/tracex_project/extraction/logic/modules/module_cohort_tagger.py b/tracex_project/extraction/logic/modules/module_cohort_tagger.py index d461782a..551ada15 100644 --- a/tracex_project/extraction/logic/modules/module_cohort_tagger.py +++ b/tracex_project/extraction/logic/modules/module_cohort_tagger.py @@ -1,4 +1,4 @@ -"""This is the module that cohort tags from the patient journey.""" +"""This is the module that cohort tags from the Patient Journey.""" from pathlib import Path from typing import Dict, List, Optional from django.conf import settings @@ -11,14 +11,14 @@ class CohortTagger(Module): """ - This is the module that extracts the cohort information from the patient journey. + This is the module that extracts the cohort information from the Patient Journey. The cohort tags are condition, age, biological sex, origin and preexisting condition. """ def __init__(self): super().__init__() self.name = "Cohort Tagger" - self.description = "Extracts the cohort tags from a patient journey." + self.description = "Extracts the cohort tags from a Patient Journey." @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute_and_save( @@ -27,7 +27,7 @@ def execute_and_save( patient_journey: str = None, patient_journey_sentences: List[str] = None, ) -> Optional[Dict[str, str]]: - """Extracts the cohort information from the patient journey and saves the result in the database.""" + """Extracts the cohort information from the Patient Journey and saves the result in the database.""" super().execute_and_save( df, patient_journey=patient_journey, diff --git a/tracex_project/extraction/logic/modules/module_event_type_classifier.py b/tracex_project/extraction/logic/modules/module_event_type_classifier.py index b0b1ccab..728c3577 100644 --- a/tracex_project/extraction/logic/modules/module_event_type_classifier.py +++ b/tracex_project/extraction/logic/modules/module_event_type_classifier.py @@ -14,13 +14,13 @@ class EventTypeClassifier(Module): This module classifies the event types of the activities. The given event types are 'Symptom Onset', 'Symptom Offset', 'Diagnosis', 'Doctor visit', 'Treatment', 'Hospital admission', 'Hospital discharge', 'Medication', 'Lifestyle Change' and 'Feelings'. This is done so that we can extract a standardized set of event - types from the patient journey. This is necessary for the application of process mining algorithms. + types from the Patient Journey. This is necessary for the application of process mining algorithms. """ def __init__(self): super().__init__() self.name = "Event Type Classifier" - self.description = "Classifies the event types for the corresponding activity labels from a patient journey." + self.description = "Classifies the event types for the corresponding activity labels from a Patient Journey." @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( diff --git a/tracex_project/extraction/logic/modules/module_location_extractor.py b/tracex_project/extraction/logic/modules/module_location_extractor.py index da100c8d..e9115919 100644 --- a/tracex_project/extraction/logic/modules/module_location_extractor.py +++ b/tracex_project/extraction/logic/modules/module_location_extractor.py @@ -11,14 +11,14 @@ class LocationExtractor(Module): """ - This is the module that extracts the location information from the patient journey to each activity. + This is the module that extracts the location information from the Patient Journey to each activity. This means all activities are classified to the given locations "Home", "Hospital", "Doctors". """ def __init__(self): super().__init__() self.name = "Location Extractor" - self.description = "Extracts the locations for the corresponding activity labels from a patient journey." + self.description = "Extracts the locations for the corresponding activity labels from a Patient Journey." @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( diff --git a/tracex_project/extraction/logic/modules/module_time_extractor.py b/tracex_project/extraction/logic/modules/module_time_extractor.py index 91a73bbc..c8857628 100644 --- a/tracex_project/extraction/logic/modules/module_time_extractor.py +++ b/tracex_project/extraction/logic/modules/module_time_extractor.py @@ -1,4 +1,4 @@ -"""This module extracts the time information from the patient journey.""" +"""This module extracts the time information from the Patient Journey.""" from pathlib import Path from typing import List from django.conf import settings @@ -12,14 +12,14 @@ class TimeExtractor(Module): """ - This is the module that extracts the time information from the patient journey. This includes start dates, + This is the module that extracts the time information from the Patient Journey. This includes start dates, end dates and durations. """ def __init__(self): super().__init__() self.name = "Time Extractor" - self.description = "Extracts the timestamps for the corresponding activity labels from a patient journey." + self.description = "Extracts the timestamps for the corresponding activity labels from a Patient Journey." @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( @@ -29,7 +29,7 @@ def execute( patient_journey_sentences: List[str] = None, cohort=None, ) -> pd.DataFrame: - """This function extracts the time information from the patient journey. + """This function extracts the time information from the Patient Journey. For each activity label, the start date, end date and duration are extracted.""" super().execute( df, @@ -100,23 +100,23 @@ def __calculate_duration(row: pd.Series) -> str: def __post_processing(df: pd.DataFrame) -> pd.DataFrame: """Fill missing values for dates with default values.""" - def convert_to_datetime(df: pd.DataFrame, column: pd.Series) -> pd.DataFrame: - df[column] = pd.to_datetime( - df[column], format="%Y%m%dT%H%M", errors="coerce" + def convert_to_datetime(_df: pd.DataFrame, column: str) -> pd.DataFrame: + _df[column] = pd.to_datetime( + _df[column], format="%Y%m%dT%H%M", errors="coerce" ) - return df + return _df - def set_default_date_if_na(df: pd.DataFrame, column: pd.Series) -> pd.DataFrame: - if df[column].isna().all(): - df[column] = df[column].fillna(pd.Timestamp("2020-01-01 00:00")) + def set_default_date_if_na(_df: pd.DataFrame, column: str) -> pd.DataFrame: + if _df[column].isna().all(): + _df[column] = _df[column].fillna(pd.Timestamp("2020-01-01 00:00")) - return df + return _df - def fill_missing_values(df: pd.DataFrame, column: pd.Series) -> pd.DataFrame: - df[column] = df[column].ffill().bfill() + def fill_missing_values(_df: pd.DataFrame, column: str) -> pd.DataFrame: + _df[column] = _df[column].ffill().bfill() - return df + return _df def fix_end_dates(row: pd.Series) -> pd.Series: if ( diff --git a/tracex_project/extraction/logic/orchestrator.py b/tracex_project/extraction/logic/orchestrator.py index da4c5891..d94c6332 100644 --- a/tracex_project/extraction/logic/orchestrator.py +++ b/tracex_project/extraction/logic/orchestrator.py @@ -29,7 +29,7 @@ class ExtractionConfiguration: """ Dataclass for the configuration of the orchestrator. This specifies all modules that can be executed, what event types are used to classify the activity labels, what locations are used to classify the activity labels and what the - patient journey is, on which the pipeline is executed. + Patient Journey is, on which the pipeline is executed. Public Methods: update -- Update the configuration with a dictionary mapping its attributes to new values. diff --git a/tracex_project/extraction/models.py b/tracex_project/extraction/models.py index ac551acb..e935b28b 100644 --- a/tracex_project/extraction/models.py +++ b/tracex_project/extraction/models.py @@ -5,11 +5,11 @@ class PatientJourney(models.Model): - """Model for the patient journey input.""" + """Model for the Patient Journey input.""" name = models.CharField( max_length=100, - help_text="The name represents a unique title describing the content of the patient journey.", + help_text="The name represents a unique title describing the content of the Patient Journey.", unique=True, ) patient_journey = models.TextField() @@ -20,7 +20,7 @@ def __str__(self): class Trace(models.Model): - """Model for the trace of a patient journey.""" + """Model for the trace of a Patient Journey.""" patient_journey = models.ForeignKey( PatientJourney, on_delete=models.CASCADE, related_name="trace" @@ -33,7 +33,7 @@ def __str__(self): class Cohort(models.Model): - """Model for the cohort of a patient journey.""" + """Model for the cohort of a Patient Journey.""" trace = models.OneToOneField( Trace, on_delete=models.CASCADE, related_name="cohort", null=True diff --git a/tracex_project/extraction/templates/choose_input_method.html b/tracex_project/extraction/templates/choose_input_method.html index 9540cd63..7da83db8 100644 --- a/tracex_project/extraction/templates/choose_input_method.html +++ b/tracex_project/extraction/templates/choose_input_method.html @@ -12,7 +12,7 @@

Choose Input Method

-

You can either upload a patient journey or select an existing patient journey from the database.

+

You can either upload a Patient Journey or select an existing Patient Journey from the database.

diff --git a/tracex_project/extraction/templates/filter_journey.html b/tracex_project/extraction/templates/filter_journey.html index d1337db5..7f59988a 100644 --- a/tracex_project/extraction/templates/filter_journey.html +++ b/tracex_project/extraction/templates/filter_journey.html @@ -332,7 +332,7 @@

DFG activity key

- + diff --git a/tracex_project/extraction/templates/journey_details.html b/tracex_project/extraction/templates/journey_details.html index 70fd9da8..aea158fd 100644 --- a/tracex_project/extraction/templates/journey_details.html +++ b/tracex_project/extraction/templates/journey_details.html @@ -17,7 +17,7 @@

Content of "{{ patient_journey.name }}"

- + diff --git a/tracex_project/extraction/templates/result.html b/tracex_project/extraction/templates/result.html index 7a5f4fb7..e0dfead8 100644 --- a/tracex_project/extraction/templates/result.html +++ b/tracex_project/extraction/templates/result.html @@ -11,7 +11,7 @@

Success! The TracEX pipeline produced the following results:

-

This is the patient journey you provided:

+

This is the Patient Journey you provided:

{{ journey }}

diff --git a/tracex_project/extraction/templates/select_journey.html b/tracex_project/extraction/templates/select_journey.html index 8106cd99..703a0527 100644 --- a/tracex_project/extraction/templates/select_journey.html +++ b/tracex_project/extraction/templates/select_journey.html @@ -10,7 +10,7 @@

Select Patient Journey from Database

-

Please select a patient journey from the database below.

+

Please select a Patient Journey from the database below.

{% csrf_token %} diff --git a/tracex_project/extraction/templates/upload_journey.html b/tracex_project/extraction/templates/upload_journey.html index 52afa4ff..f79a204b 100644 --- a/tracex_project/extraction/templates/upload_journey.html +++ b/tracex_project/extraction/templates/upload_journey.html @@ -9,7 +9,7 @@

Upload Patient Journey

-

Please select the patient journey you want to upload below.

+

Please select the Patient Journey you want to upload below.

{% csrf_token %} @@ -35,6 +35,6 @@

Upload Patient Journey

- + diff --git a/tracex_project/extraction/tests/test_orchestrator.py b/tracex_project/extraction/tests/test_orchestrator.py index 1955713e..caaea5dc 100644 --- a/tracex_project/extraction/tests/test_orchestrator.py +++ b/tracex_project/extraction/tests/test_orchestrator.py @@ -5,7 +5,6 @@ from extraction.logic.orchestrator import ExtractionConfiguration, Orchestrator from extraction.logic.modules import ( - Preprocessor, ActivityLabeler, CohortTagger, ) @@ -109,16 +108,15 @@ def test_initialize_modules(self): def test_run(self): """Test if the run method correctly returns a dataframe. Execution of ActivityLabeler, CohortTagger and - Preprocessor is necessary since the run method makes assumptions on how the patient journey looks like. + Preprocessor is necessary since the run method makes assumptions on how the Patient Journey looks like. """ Orchestrator.reset_instance() configuration = ExtractionConfiguration( - patient_journey="This is a test patient journey. This is some description about how I fell ill and " + patient_journey="This is a test Patient Journey. This is some description about how I fell ill and " "recovered in the end.", ) configuration.update( modules={ - "preprocessing": Preprocessor, "activity_labeling": ActivityLabeler, "cohort_tagging": CohortTagger, } diff --git a/tracex_project/extraction/tests/test_views.py b/tracex_project/extraction/tests/test_views.py index 9b3c3e4e..7375c94c 100644 --- a/tracex_project/extraction/tests/test_views.py +++ b/tracex_project/extraction/tests/test_views.py @@ -9,7 +9,6 @@ JourneyUploadForm, JourneySelectForm, FilterForm, - ResultForm, ) from extraction.models import ( PatientJourney, @@ -20,7 +19,6 @@ JourneySelectView, JourneyDetailView, JourneyFilterView, - ResultView, ) @@ -101,7 +99,7 @@ def test_view_post_valid_form(self): Test that posting a valid form with a file upload successfully creates a new model instance with the uploaded content and redirects to the correct success URL. """ - file_content = "This is a test patient journey." + file_content = "This is a test Patient Journey." uploaded_file = SimpleUploadedFile("test.txt", file_content.encode("utf-8")) form_data = {"name": "Test Journey", "file": uploaded_file} @@ -168,10 +166,10 @@ def test_view_context_data(self): def test_view_post_valid_form(self): """ - Test that posting a valid form by selecting an existing patient journey redirects to the correct success URL. + Test that posting a valid form by selecting an existing Patient Journey redirects to the correct success URL. """ mock_journey = PatientJourney.manager.create( - name="Test Journey", patient_journey="This is a test patient journey." + name="Test Journey", patient_journey="This is a test Patient Journey." ) form_data = {"selected_patient_journey": mock_journey.name} response = self.client.post(self.url, data=form_data, format="multipart") @@ -203,10 +201,10 @@ class JourneyDetailViewTests(TestCase): """Test cases for the JourneyDetailView.""" def setUp(self): # pylint: disable=invalid-name - """Set up test client, a mock patient journey and the URL.""" + """Set up test client, a mock Patient Journey and the URL.""" self.client = Client() self.mock_journey = PatientJourney.manager.create( - name="Test Journey", patient_journey="This is a test patient journey." + name="Test Journey", patient_journey="This is a test Patient Journey." ) self.url = reverse("journey_details", kwargs={"pk": self.mock_journey.pk}) @@ -235,7 +233,7 @@ def test_view_context_data(self): ) def test_view_without_patient_journey(self): - """Test that requesting a patient journey that does not exist returns a 404 error.""" + """Test that requesting a Patient Journey that does not exist returns a 404 error.""" response = self.client.get(reverse("journey_details", kwargs={"pk": 999})) self.assertEqual(response.status_code, 404) @@ -263,10 +261,10 @@ class JourneyFilterViewTests(TestCase): fixtures = ["tracex_project/extraction/fixtures/prompts_fixture.json"] def setUp(self): # pylint: disable=invalid-name - """Set up test client, a mock patient journey, the URL and a factory that sends requests to the view.""" + """Set up test client, a mock Patient Journey, the URL and a factory that sends requests to the view.""" self.client = Client() self.mock_journey = PatientJourney.manager.create( - name="Test Journey", patient_journey="This is a test patient journey." + name="Test Journey", patient_journey="This is a test Patient Journey." ) self.url = reverse("journey_filter") self.factory = RequestFactory() @@ -301,27 +299,6 @@ def test_get_context_data(self): self.assertIn("is_comparing", context) - # Non-deterministic test since orchestrator is executed - def test_form_valid(self): - """Test that a valid form submission redirects to the correct URL.""" - form_data = { - "modules_required": ["activity_labeling"], - "modules_optional": ["preprocessing", "event_type_classification"], - "event_types": ["Symptom Onset", "Symptom Offset"], - "locations": ["Home", "Hospital", "Doctors", "N/A"], - "activity_key": "event_type", - } - # Set up session data - session = self.client.session - session["is_comparing"] = False - session.save() - - # Submit the form using the test client - response = self.client.post(self.url, data=form_data) - - self.assertEqual(response.status_code, 302) - self.assertRedirects(response, reverse("result")) - def test_get_ajax(self): """ Test the `get` method when an AJAX request is made. @@ -338,70 +315,3 @@ def test_get_ajax(self): self.assertEqual( json.loads(response.content), {"progress": 50, "status": "running"} ) - - -class ResultViewTests(TestCase): - """Test cases for the ResultView.""" - - fixtures = ["tracex_project/tracex/fixtures/dataframe_fixtures.json"] - - def setUp(self): # pylint: disable=invalid-name - """Set up test client, a mock patient journey, session data and the URL.""" - self.client = Client() - self.mock_journey = PatientJourney.manager.create( - name="Test Journey", patient_journey="This is a test patient journey." - ) - self.session = self.client.session - self.session["selected_modules"] = ["activity_labeling", "cohort_tagging"] - self.session.save() - self.url = reverse("result") - - def test_view_get_request(self): - """Test that the view URL exists and is accessible by passing a GET request.""" - response = self.client.get(self.url) - resolver = resolve(self.url) - - self.assertEqual(response.status_code, 200) - self.assertEqual(resolver.func.view_class, ResultView) - - def test_uses_correct_template(self): - """Test that the view uses the correct template.""" - response = self.client.get(self.url) - - self.assertTemplateUsed(response, "result.html") - - def test_uses_correct_form(self): - """Test that the view uses the correct form.""" - response = self.client.get(self.url) - - self.assertIsInstance(response.context["form"], ResultForm) - - def test_get_form_kwargs(self): - """Test that correct form kwargs are passed to the form.""" - response = self.client.get(self.url) - - self.assertEqual(response.status_code, 200) - - form = response.context["form"] - - self.assertIsInstance(form, ResultForm) - self.assertEqual( - (form.initial["selected_modules"]), self.session["selected_modules"] - ) - - def test_get_context_data(self): - """Test that the view fetches the correct context data.""" - response = self.client.get(self.url) - - self.assertEqual(response.status_code, 200) - - context = response.context - - self.assertIn("form", context) - self.assertIsInstance(context["form"], ResultForm) - self.assertIn("journey", context) - self.assertEqual(context["journey"], self.mock_journey.patient_journey) - self.assertIn("dfg_img", context) - self.assertIn("trace_table", context) - self.assertIn("all_dfg_img", context) - self.assertIn("event_log_table", context) diff --git a/tracex_project/extraction/views.py b/tracex_project/extraction/views.py index eca6d05f..b9524100 100644 --- a/tracex_project/extraction/views.py +++ b/tracex_project/extraction/views.py @@ -28,13 +28,13 @@ class JourneyInputSelectView(generic.TemplateView): - """A Django view that renders a template for the user to choose the patient journey input method.""" + """A Django view that renders a template for the user to choose the Patient Journey input method.""" template_name = "choose_input_method.html" class JourneyUploadView(generic.CreateView): - """A Django view that handles the uploading of a patient journey.""" + """A Django view that handles the uploading of a Patient Journey.""" form_class = JourneyUploadForm template_name = "upload_journey.html" @@ -55,7 +55,7 @@ def get_success_url(self): class JourneySelectView(generic.FormView): - """A Django view that handles the selection of a patient journey from the database.""" + """A Django view that handles the selection of a Patient Journey from the database.""" model = PatientJourney form_class = JourneySelectForm @@ -71,13 +71,13 @@ def form_valid(self, form): class JourneyDetailView(generic.DetailView): - """A Django view that displays the details of a selected patient journey.""" + """A Django view that displays the details of a selected Patient Journey.""" model = PatientJourney template_name = "journey_details.html" def get_context_data(self, **kwargs): - """Overrides the get_context_data method to add the patient journey to the context data.""" + """Overrides the get_context_data method to add the Patient Journey to the context data.""" context = super().get_context_data(**kwargs) patient_journey = self.get_object() context["patient_journey"] = patient_journey diff --git a/tracex_project/patient_journey_generator/apps.py b/tracex_project/patient_journey_generator/apps.py index 3280e08c..4e81b3bc 100644 --- a/tracex_project/patient_journey_generator/apps.py +++ b/tracex_project/patient_journey_generator/apps.py @@ -1,4 +1,4 @@ -"""App configuration for patient journey generator app.""" +"""App configuration for Patient Journey generator app.""" from django.apps import AppConfig diff --git a/tracex_project/patient_journey_generator/forms.py b/tracex_project/patient_journey_generator/forms.py index 3a9a9372..8344e84c 100644 --- a/tracex_project/patient_journey_generator/forms.py +++ b/tracex_project/patient_journey_generator/forms.py @@ -1,13 +1,13 @@ -"""Implementation of forms for the patient journey generator app.""" +"""Implementation of forms for the Patient Journey generator app.""" from django import forms from extraction.models import PatientJourney class GenerationOverviewForm(forms.ModelForm): """ - Form for generating a patient journey. + Form for generating a Patient Journey. - By submitting this form, a patient journey is generated and saved in the orchestrator's configuration. + By submitting this form, a Patient Journey is generated and saved in the orchestrator's configuration. """ class Meta: @@ -19,8 +19,9 @@ class Meta: fields -- The fields to include in the form. help_texts -- The help texts for the fields. widgets -- The widgets for the fields. - - "name" - A text input field to name the patient journey. Required, to save patient journey in the database. + - "name" - A text input field to name the Patient Journey. Required, to save Patient Journey in the database. """ + model = PatientJourney fields = ["name"] help_texts = { @@ -28,6 +29,6 @@ class Meta: } widgets = { "name": forms.TextInput( - attrs={"placeholder": "Name for your patient journey"} + attrs={"placeholder": "Name for your Patient Journey"} ), } diff --git a/tracex_project/patient_journey_generator/generator.py b/tracex_project/patient_journey_generator/generator.py index 433639f3..1bf0d36e 100644 --- a/tracex_project/patient_journey_generator/generator.py +++ b/tracex_project/patient_journey_generator/generator.py @@ -1,12 +1,12 @@ """ -Provides functionality to generate a synthetic patient journey by using the OpenAI API. +Provides functionality to generate a synthetic Patient Journey by using the OpenAI API. Functions: -generate_patient_journey -- Generates a synthetic patient journey. -create_patient_journey_context -- Creates a context for the synthetic patient journey. +generate_patient_journey -- Generates a synthetic Patient Journey. +create_patient_journey_context -- Creates a context for the synthetic Patient Journey. get_country -- Randomizes a european country. -get_date -- Randomizes a start date for the synthetic patient journey. -get_life_circumstances -- Generates life circumstances for the synthetic patient journey. +get_date -- Randomizes a start date for the synthetic Patient Journey. +get_life_circumstances -- Generates life circumstances for the synthetic Patient Journey. """ from datetime import datetime, timedelta import random @@ -17,7 +17,7 @@ def generate_patient_journey(): - """Generate a synthetic patient journey.""" + """Generate a synthetic Patient Journey.""" messages = Prompt.objects.get(name="CREATE_PATIENT_JOURNEY").text messages.insert(0, {"role": "system", "content": create_patient_journey_context()}) patient_journey = u.query_gpt(messages=messages, temperature=1) @@ -27,7 +27,7 @@ def generate_patient_journey(): def create_patient_journey_context(): """ - Create a context for the patient journey. + Create a context for the Patient Journey. The context includes a random sex, country, date and life circumstances. """ diff --git a/tracex_project/patient_journey_generator/templates/journey_generator_overview.html b/tracex_project/patient_journey_generator/templates/journey_generator_overview.html index c60d653d..fdd25b36 100644 --- a/tracex_project/patient_journey_generator/templates/journey_generator_overview.html +++ b/tracex_project/patient_journey_generator/templates/journey_generator_overview.html @@ -1,12 +1,12 @@ @@ -23,7 +23,7 @@
{% if generated_journey %} -

Generated Journey

+

Generated Patient Journey

{{ generated_journey }}

@@ -47,7 +47,7 @@

Generated Journey

- + {% else %} @@ -60,7 +60,7 @@

Welcome to the Patient Journey Generator of TracEX

{% endif %} - + diff --git a/tracex_project/patient_journey_generator/views.py b/tracex_project/patient_journey_generator/views.py index 364b4f5b..4d4647d8 100644 --- a/tracex_project/patient_journey_generator/views.py +++ b/tracex_project/patient_journey_generator/views.py @@ -1,9 +1,9 @@ """ -Provide class-based views for the patient journey generator app. +Provide class-based views for the Patient Journey generator app. Views: -JourneyGeneratorOverviewView -- View for the landing page of the patient journey generator. -JourneyGenerationView -- View to inspect the generated patient journey. +JourneyGeneratorOverviewView -- View for the landing page of the Patient Journey generator. +JourneyGenerationView -- View to inspect the generated Patient Journey. """ import traceback @@ -18,9 +18,9 @@ class JourneyGeneratorOverviewView(generic.CreateView): """ - View for the landing page of the patient journey generator. + View for the landing page of the Patient Journey generator. - If a generated patient journey exists in the session, this view displays a form to name the patient journey + If a generated Patient Journey exists in the session, this view displays a form to name the Patient Journey and save it in the database. """ @@ -29,14 +29,14 @@ class JourneyGeneratorOverviewView(generic.CreateView): success_url = reverse_lazy("journey_filter") def get_context_data(self, **kwargs): - """Add the patient journey to the context to pass to the HTML file.""" + """Add the Patient Journey to the context to pass to the HTML file.""" context = super().get_context_data(**kwargs) context["generated_journey"] = self.request.session.get("generated_journey") return context def form_valid(self, form): - """Create an empty patient journey instance and save the ID in the orchestrator.""" + """Create an empty Patient Journey instance and save the ID in the orchestrator.""" orchestrator = Orchestrator.get_instance() form.instance.patient_journey = orchestrator.get_configuration().patient_journey response = super().form_valid(form) @@ -47,9 +47,9 @@ def form_valid(self, form): class JourneyGenerationView(generic.RedirectView): """ - View to inspect the generated patient journey. + View to inspect the generated Patient Journey. - By passing a GET request to the view, a patient journey is generated and saved in the orchestrator's configuration. + By passing a GET request to the view, a Patient Journey is generated and saved in the orchestrator's configuration. Since the JourneyGenerationView is a RedirectView, the user is redirected back to the JourneyGeneratorOverviewView. Therefore, this view does not render a template. """ @@ -58,17 +58,19 @@ class JourneyGenerationView(generic.RedirectView): def get(self, request, *args, **kwargs): """ - Handle GET requests by generating a patient journey and updating the orchestrator's configuration. + Handle GET requests by generating a Patient Journey and updating the orchestrator's configuration. - The empty patient journey instance from the orchestrator's configuration is modified to contain the generated - patient journey text. The generated patient journey is also saved in the session to pass to the HTML file + The empty Patient Journey instance from the orchestrator's configuration is modified to contain the generated + Patient Journey text. The generated Patient Journey is also saved in the session to pass to the HTML file of the JourneyGenerationOverviewView. """ orchestrator = Orchestrator() try: - configuration = ExtractionConfiguration(patient_journey=generate_patient_journey()) + configuration = ExtractionConfiguration( + patient_journey=generate_patient_journey() + ) except Exception as e: # pylint: disable=broad-except orchestrator.reset_instance() self.request.session.flush() @@ -76,10 +78,15 @@ def get(self, request, *args, **kwargs): return render( self.request, "error_page.html", - {"error_type": type(e).__name__, "error_traceback": traceback.format_exc()} + { + "error_type": type(e).__name__, + "error_traceback": traceback.format_exc(), + }, ) orchestrator.set_configuration(configuration) - request.session["generated_journey"] = orchestrator.get_configuration().patient_journey + request.session[ + "generated_journey" + ] = orchestrator.get_configuration().patient_journey return super().get(request, *args, **kwargs) diff --git a/tracex_project/trace_comparator/forms.py b/tracex_project/trace_comparator/forms.py index 955297b6..df8cb18f 100644 --- a/tracex_project/trace_comparator/forms.py +++ b/tracex_project/trace_comparator/forms.py @@ -6,9 +6,11 @@ class PatientJourneySelectForm(forms.Form): - """Form for selecting a patient journey to use in the trace testing environment.""" + """Form for selecting a Patient Journey to use in the trace testing environment.""" - selected_patient_journey = forms.ChoiceField(choices=[]) + selected_patient_journey = forms.ChoiceField( + choices=[], label="Selected Patient Journey:" + ) def __init__(self, *args, **kwargs): """Initializes the PatientJourneySelectForm with available choices.""" @@ -19,7 +21,7 @@ def __init__(self, *args, **kwargs): @staticmethod def get_patient_journey_choices() -> List[Tuple[str, str]]: - """Retrieves the available patient journey choices from the database. Available choices are those with a + """Retrieves the available Patient Journey choices from the database. Available choices are those with a saved ground truth.""" patient_journeys = PatientJourney.manager.filter( name__contains="journey_comparison_" diff --git a/tracex_project/trace_comparator/migrations/__init__.py b/tracex_project/trace_comparator/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tracex_project/trace_comparator/templates/testing_overview.html b/tracex_project/trace_comparator/templates/testing_overview.html index 2acb1e3b..e26983b7 100644 --- a/tracex_project/trace_comparator/templates/testing_overview.html +++ b/tracex_project/trace_comparator/templates/testing_overview.html @@ -9,7 +9,7 @@

Welcome to the Trace Testing Environment

The Trace Testing Environment is a tool that enables you to assess the effectiveness of your current prompts by comparing the results of the extraction pipeline against a manually created trace, which serves as a predefined groundtruth.

-

Please select one patient journey as ground truth to compare it against the extraction pipeline result.

+

Please select one Patient Journey as ground truth to compare it against the extraction pipeline result.

{% csrf_token %} diff --git a/tracex_project/trace_comparator/views.py b/tracex_project/trace_comparator/views.py index 32f314dc..cf5fa7ce 100644 --- a/tracex_project/trace_comparator/views.py +++ b/tracex_project/trace_comparator/views.py @@ -25,7 +25,7 @@ class TraceComparisonMixin(View): def get_first_and_last_trace( patient_journey_name: str, ) -> Tuple[pd.DataFrame, pd.DataFrame]: - """Get the first and last trace of a patient journey from the database.""" + """Get the first and last trace of a Patient Journey from the database.""" query_last_trace = Q( id=Trace.manager.filter(patient_journey__name=patient_journey_name) .latest("last_modified") @@ -45,7 +45,7 @@ def get_first_and_last_trace( class TraceTestingOverviewView(FormView): - """View for selecting a patient journey to use in the Trace Testing Environment.""" + """View for selecting a Patient Journey to use in the Trace Testing Environment.""" form_class = PatientJourneySelectForm template_name = "testing_overview.html" @@ -72,7 +72,7 @@ class TraceTestingComparisonView(TemplateView, TraceComparisonMixin): template_name = "testing_comparison.html" def get_context_data(self, **kwargs): - """Prepare the latest trace of the selected patient journey that is available in the database for display.""" + """Prepare the latest trace of the selected Patient Journey that is available in the database for display.""" context = super().get_context_data(**kwargs) patient_journey_name: str = self.request.session.get("patient_journey_name") patient_journey: str = PatientJourney.manager.get( @@ -114,7 +114,7 @@ def get(self, request, *args, **kwargs): return super().get(request, *args, **kwargs) def post(self, request): - """Compare the newest trace of a patient journey against the ground truth and update session with results.""" + """Compare the newest trace of a Patient Journey against the ground truth and update session with results.""" patient_journey_name: str = self.request.session.get("patient_journey_name") ground_truth_df, pipeline_df = self.get_first_and_last_trace( patient_journey_name diff --git a/tracex_project/tracex/fixtures/dataframe_fixtures.json b/tracex_project/tracex/fixtures/dataframe_fixtures.json index 4ef4d184..48070174 100644 Binary files a/tracex_project/tracex/fixtures/dataframe_fixtures.json and b/tracex_project/tracex/fixtures/dataframe_fixtures.json differ diff --git a/tracex_project/tracex/logic/constants.py b/tracex_project/tracex/logic/constants.py index adde72a4..7ada56bd 100644 --- a/tracex_project/tracex/logic/constants.py +++ b/tracex_project/tracex/logic/constants.py @@ -117,7 +117,7 @@ ] MODULES_REQUIRED: Final = [ ("activity_labeling", "Activity Labeler"), - ("cohort_tagging", "Cohort Tagger") + ("cohort_tagging", "Cohort Tagger"), ] SNOMED_CT_API_URL = ( "https://browser.ihtsdotools.org/snowstorm/snomed-ct/browser/MAIN/descriptions" diff --git a/tracex_project/tracex/settings.py b/tracex_project/tracex/settings.py index f76e7bca..a5b0e3d4 100644 --- a/tracex_project/tracex/settings.py +++ b/tracex_project/tracex/settings.py @@ -19,10 +19,8 @@ # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ -# SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = "django-insecure-$u00r=^xd*m1ggjgzwj%2o2$h=34k358#imaxe22w@stk_aptt" -# SECURITY WARNING: don't run with debug turned on in production! DEBUG = True ALLOWED_HOSTS = []