From ac74a88b99598e68fb2e5a2c3e1540a32b34e46f Mon Sep 17 00:00:00 2001 From: "tkv29@yahoo.de" Date: Mon, 17 Jun 2024 11:03:50 +0200 Subject: [PATCH] changes --- tracex_project/extraction/views.py | 13 ++- tracex_project/trace_comparator/comparator.py | 81 +++++++++++++------ 2 files changed, 69 insertions(+), 25 deletions(-) diff --git a/tracex_project/extraction/views.py b/tracex_project/extraction/views.py index 2201854..a487658 100644 --- a/tracex_project/extraction/views.py +++ b/tracex_project/extraction/views.py @@ -249,14 +249,23 @@ def get_context_data(self, **kwargs): } if TEST_MODE: - patient_journey_name = "Synthetic journey 1" + patient_journey_name = "patient journey 1" query_last_trace = Q( id=Trace.manager.filter(patient_journey__name=patient_journey_name) .latest("last_modified") .id ) trace = utils.DataFrameUtilities.get_events_df(query_last_trace) - print(trace) + print(trace.columns) + trace.drop( + columns=[ + "activity_relevance", + "timestamp_correctness", + "correctness_confidence", + ], + axis=1, + inplace=True, + ) else: trace = self.build_trace_df(filter_dict) event_log = self.build_event_log_df(filter_dict, trace) diff --git a/tracex_project/trace_comparator/comparator.py b/tracex_project/trace_comparator/comparator.py index 50319a6..da99cc8 100644 --- a/tracex_project/trace_comparator/comparator.py +++ b/tracex_project/trace_comparator/comparator.py @@ -12,7 +12,7 @@ @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def compare_traces( - view, pipeline_df: pd.DataFrame, ground_truth_df: pd.DataFrame + view, pipeline_df: pd.DataFrame, ground_truth_df: pd.DataFrame ) -> dict: """Executes the trace comparison. @@ -25,12 +25,45 @@ def compare_traces( simulate_progress(view) mapping_pipeline_to_ground_truth = [0, -1, -1, 1, 12, 13, 16] - mapping_ground_truth_to_pipeline = [0, 3, -1, -1, -1, -1, -1, -1, 3, 3, -1, -1, 4, 5, -1, 6, 6, -1] - missing_activities = ['informing family', 'putting loved ones over financial worries', 'experiencing worse symptoms', 'consulting family physician', 'getting tested for covid 19 in local testing center', 'testing positive for covid 19', 'experiencing slow recovery', 'returning to work with precautions', 'remainding optimistic and adhering to safety guidelines', 'feeling thankful for healthcare personal'] - unexpected_activities = ['consulting doctor for worsening symptoms', 'getting tested for Covid-19'] + mapping_ground_truth_to_pipeline = [ + 0, + 3, + -1, + -1, + -1, + -1, + -1, + -1, + 3, + 3, + -1, + -1, + 4, + 5, + -1, + 6, + 6, + -1, + ] + missing_activities = [ + "informing family", + "putting loved ones over financial worries", + "experiencing worse symptoms", + "consulting family physician", + "getting tested for covid 19 in local testing center", + "testing positive for covid 19", + "experiencing slow recovery", + "returning to work with precautions", + "remainding optimistic and adhering to safety guidelines", + "feeling thankful for healthcare personal", + ] + unexpected_activities = [ + "consulting doctor for worsening symptoms", + "getting tested for Covid-19", + ] wrong_orders = [] - matching_percent_ground_truth_to_pipeline = 71 - matching_percent_pipeline_to_ground_truth = 44 + matching_percent_ground_truth_to_pipeline = 44 + matching_percent_pipeline_to_ground_truth = 71 else: pipeline_activities: pd.Series = pipeline_df["activity"] @@ -89,7 +122,7 @@ def simulate_progress(view): def find_activity_mapping( - view, pipeline_activities: pd.Series, ground_truth_activities: pd.Series + view, pipeline_activities: pd.Series, ground_truth_activities: pd.Series ) -> Tuple[List[int], List[int]]: """Create a mapping of activities from the pipeline to the ground truth and vice versa.""" total_steps: int = len(pipeline_activities) + len(ground_truth_activities) @@ -123,12 +156,12 @@ def find_activity_mapping( def compare_activities( - view, - current_step: int, - total_steps: int, - status: str, - input_activities: pd.Series, - comparison_basis_activities: pd.Series, + view, + current_step: int, + total_steps: int, + status: str, + input_activities: pd.Series, + comparison_basis_activities: pd.Series, ) -> List[Tuple[int, float]]: """Compare input activities with ground truth activities.""" mapping_input_to_comparison: List[Tuple[int, float]] = [] @@ -147,10 +180,10 @@ def compare_activities( def find_activity( - activity, - comparison_basis_activities: pd.Series, - activity_index: int, - mapping_input_to_comparison: List[Tuple[int, float]], + activity, + comparison_basis_activities: pd.Series, + activity_index: int, + mapping_input_to_comparison: List[Tuple[int, float]], ) -> None: """Compares an activity against potential matches to identify the best match based on similarity. @@ -171,7 +204,9 @@ def find_activity( "content": f"First: {activity}\nSecond: {second_activity}", } ) - response, linear_probability = u.query_gpt(messages, return_linear_probability=True, top_logprobs=1) + response, linear_probability = u.query_gpt( + messages, return_linear_probability=True, top_logprobs=1 + ) if "True" in response: possible_matches.append((lower + count, linear_probability)) @@ -189,7 +224,7 @@ def find_activity( def postprocess_mappings( - mapping_data_to_ground_truth: List, mapping_ground_truth_to_data: List + mapping_data_to_ground_truth: List, mapping_ground_truth_to_data: List ) -> Tuple[List[int], List[int]]: """Postprocess the mappings between data and ground truth.""" mapping_data_to_ground_truth = fill_mapping( @@ -229,7 +264,7 @@ def remove_probabilities(mapping: List[Tuple[int, float]]) -> List[int]: def find_matching_percentage( - input_activities: pd.Series, mapping_input_to_comparison: list + input_activities: pd.Series, mapping_input_to_comparison: list ) -> int: """Calculate the percentage of matching activities.""" total_matching_activities: int = sum( @@ -252,7 +287,7 @@ def find_unmapped_activities(activities: pd.Series, mapping: list) -> List[str]: def find_wrong_orders( - df_activities: pd.Series, mapping_ground_truth_to_data: List[int] + df_activities: pd.Series, mapping_ground_truth_to_data: List[int] ) -> List[Tuple[str, str]]: """Find the activities that are in the wrong order. @@ -270,8 +305,8 @@ def find_wrong_orders( continue if first_activity_index > second_activity_index: if not any( - pair == (first_activity_index, second_activity_index) - for pair in wrong_orders_indices + pair == (first_activity_index, second_activity_index) + for pair in wrong_orders_indices ): wrong_orders_indices.append( (first_activity_index, second_activity_index)