add metric to pipeline

ashryaagr · ashryaagr · commit f736c3751ecc · 2024-12-01T16:23:30.000-08:00
diff --git a/config/config.yaml b/config/config.yaml
@@ -46,7 +46,7 @@ apis:
 
   local_api:
     name: LocalAPI
-    endpoint: http://127.0.0.1:8025/answer
+    endpoint: http://127.0.0.1:8000/answer
     auth_required: false  # Authentication not required
     headers:
       accept: application/json
diff --git a/sources/full_pipeline.py b/sources/full_pipeline.py
@@ -33,7 +33,7 @@ def run_pipeline(file):
 
     #-----------------------------------------------------
 
-    # tests.execute(LLMExecutor())
+    tests.execute(LLMExecutor())
     print("Stage 3/5 completed - LLM queries executed and will be stored in results/stage3_execution_result.json")
 
     execution_result_json = tests.to_dict()
@@ -43,20 +43,34 @@ def run_pipeline(file):
 
     #-----------------------------------------------------
 
-    # tests.evaluate_responses()
+    tests.evaluate_responses()
     print("Stage 4/5 completed - Paraphrases generated and will be stored in results/stage4_response_evaluation.json")
 
     response_evaluation_result_json = tests.to_dict()
 
     with open("results/stage4_response_evaluation.json", 'w') as json_file:
         json.dump(response_evaluation_result_json, json_file, indent=4)
 
+
+
+    # file_path = "results/stage5_metric_evaluation.json"
+    # with open(file_path, "r") as json_file:
+    #     test_data = json.load(json_file)
+    #     from sources.models.unit_tests import UnitTests
+    #     tests = UnitTests.from_json(test_data)
+    # print(tests[0].question)
+
     #----------------------------------------------------- Stage 5
 
-    # tests.evaluate_responses()
+    result_array = tests.get_evaluation_result_as_numpy()
+    print(result_array)
+    from sources.metrics.accuracy import Accuracy
+    metric = Accuracy()
+    metric.get_metric_value(result_array)
+    tests.metrics.append(metric)
     print("Stage 5/5 completed - Metric evaluation completed and will be stored in results/stage5_metric_evaluation.json")
 
-    #----------------------------------------------------- Metadata creation
+    # #----------------------------------------------------- Metadata creation
 
     end_time = time.time()
     execution_time = end_time - start_time
@@ -93,7 +107,7 @@ def run_pipeline(file):
     return tests
 
     # Below is the the code to load the object from JSON. Adapt it according to the stage you want to load the object from.
-    # file_path = "results/tests.json"
+    # file_path = "results/stage5_metric_evaluation.json"
     # with open(file_path, "r") as json_file:
     #     test_data = json.load(json_file)
     #     from sources.models.unit_tests import UnitTests
diff --git a/sources/metrics/accuracy.py b/sources/metrics/accuracy.py
@@ -22,6 +22,7 @@ def passed(self):
         raise Exception("Method not implemented")
 
 
-    def to_json(self):
+    def get_metric_value(self, result_array):
 
-        pass
+        self.metric_result = result_array.mean()
+        return self.metric_result
diff --git a/sources/metrics/base_metric.py b/sources/metrics/base_metric.py
@@ -1,4 +1,6 @@
-class BaseMetric:
+from sources.models.common_interface import BaseTest
+
+class BaseMetric(BaseTest):
 
     def __init__(self, metric_name="", threshold=None):
 
@@ -20,9 +22,9 @@ def passed(self):
         raise Exception("Method not implemented")
 
 
-    def to_json(self):
+    # def to_json(self):
 
-        pass
+    #     pass
 
 """
 Dimensions guide to accessing numpy results array:
diff --git a/sources/models/unit_tests.py b/sources/models/unit_tests.py
@@ -6,6 +6,7 @@
 from sources.helpers.paraphrase_helper import paraphrase_question
 from sources.models.unit_tests_result import ParaphrasedQuestion
 from sources.models.metadata import MetaData
+from sources.metrics.base_metric import BaseMetric
 import numpy as np
 
 import os
@@ -111,6 +112,7 @@ def __init__(self, file=None):
         self.file = file
         self.unit_tests: List[UnitTest] = []
         self.metadata: MetaData = None
+        self.metrics: List[BaseMetric] = []
 
     def read_file(self):
         df = pd.read_csv(self.file)
diff --git a/sources/models/unit_tests_result.py b/sources/models/unit_tests_result.py
@@ -24,7 +24,7 @@ def evaluate_responses(self, tests):
         for test in tests:
             # evaluate whether the answer follows the given test
             passed, reason = evaluate_answer_for_test(self.answer, test.test_case)
-            self.test_cases.append(AtomicTestCaseExecutionResult(test, passed, reason))
+            self.test_cases.append(AtomicTestCaseExecutionResult(test.test_case, passed, reason))
 
     def get_evaluation_result_as_numpy(self):
         results = []