quality_control: correct the per-class accuracy formula (#7640)

### Motivation and context  The current formula used to calculate `ConfusionMatrix.accuracy` is, in fact, not accuracy, but the Jaccard index. Replace it with the correct formula. Since the Jaccard index is a useful metric in its own right, calculate it too, but save it in another attribute of `ConfusionMatrix`. ### How has this been tested?  Manual testing. ### Checklist  - [x] I submit my changes into the `develop` branch - [x] I have created a changelog fragment  - ~~[ ] I have updated the documentation accordingly~~ - ~~[ ] I have added tests to cover my changes~~ - ~~[ ] I have linked related issues (see [GitHub docs]( https://help.github.com/en/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword))~~ - ~~[ ] I have increased versions of npm packages if it is necessary ([cvat-canvas](https://github.com/opencv/cvat/tree/develop/cvat-canvas#versioning), [cvat-core](https://github.com/opencv/cvat/tree/develop/cvat-core#versioning), [cvat-data](https://github.com/opencv/cvat/tree/develop/cvat-data#versioning) and [cvat-ui](https://github.com/opencv/cvat/tree/develop/cvat-ui#versioning))~~ ### License - [x] I submit _my code changes_ under the same [MIT License]( https://github.com/opencv/cvat/blob/develop/LICENSE) that covers the project. Feel free to contact the maintainers if that's a concern. --------- Co-authored-by: Maxim Zhiltsov <[email protected]>
cvat-ai · Mar 21, 2024 · fc54c47 · fc54c47
1 parent 1749002
commit fc54c47
Show file tree

Hide file tree

Showing 6 changed files with 2,189 additions and 330 deletions.
diff --git a/changelog.d/20240319_183656_roman_accuracy_jaccard.md b/changelog.d/20240319_183656_roman_accuracy_jaccard.md
@@ -0,0 +1,5 @@
+### Fixed
+
+- Corrected the formula for per-class accuracy in quality reports;
+  the old formula is now exposed as the `jaccard_index` key
+  (<https://github.com/opencv/cvat/pull/7640>)
diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py
@@ -218,6 +218,7 @@ class ConfusionMatrix(_Serializable):
     precision: np.array
     recall: np.array
     accuracy: np.array
+    jaccard_index: Optional[np.array]
 
     @property
     def axes(self):
@@ -240,6 +241,9 @@ def from_dict(cls, d: dict):
             precision=np.asarray(d["precision"]),
             recall=np.asarray(d["recall"]),
             accuracy=np.asarray(d["accuracy"]),
+            # This field didn't exist at first, so it might not be present
+            # in old serialized instances.
+            jaccard_index=np.asarray(d["jaccard_index"]) if "jaccard_index" in d else None,
         )
 
 
@@ -1934,17 +1938,23 @@ def _generate_annotations_summary(
         matched_ann_counts = np.diag(confusion_matrix)
         ds_ann_counts = np.sum(confusion_matrix, axis=1)
         gt_ann_counts = np.sum(confusion_matrix, axis=0)
+        total_annotations_count = np.sum(confusion_matrix)
 
-        label_accuracies = _arr_div(
+        label_jaccard_indices = _arr_div(
             matched_ann_counts, ds_ann_counts + gt_ann_counts - matched_ann_counts
         )
         label_precisions = _arr_div(matched_ann_counts, ds_ann_counts)
         label_recalls = _arr_div(matched_ann_counts, gt_ann_counts)
+        label_accuracies = (
+            total_annotations_count  # TP + TN + FP + FN
+            - (ds_ann_counts - matched_ann_counts)  # - FP
+            - (gt_ann_counts - matched_ann_counts)  # - FN
+            # ... = TP + TN
+        ) / (total_annotations_count or 1)
 
         valid_annotations_count = np.sum(matched_ann_counts)
         missing_annotations_count = np.sum(confusion_matrix[cls._UNMATCHED_IDX, :])
         extra_annotations_count = np.sum(confusion_matrix[:, cls._UNMATCHED_IDX])
-        total_annotations_count = np.sum(confusion_matrix)
         ds_annotations_count = np.sum(ds_ann_counts[: cls._UNMATCHED_IDX])
         gt_annotations_count = np.sum(gt_ann_counts[: cls._UNMATCHED_IDX])
 
@@ -1961,6 +1971,7 @@ def _generate_annotations_summary(
                 precision=label_precisions,
                 recall=label_recalls,
                 accuracy=label_accuracies,
+                jaccard_index=label_jaccard_indices,
             ),
         )
 

diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py
@@ -1202,3 +1202,14 @@ def test_settings_affect_metrics(
 
         new_report = self.create_quality_report(admin_user, task_id)
         assert new_report["summary"]["conflict_count"] != old_report["summary"]["conflict_count"]
+
+    def test_old_report_can_be_loaded(self, admin_user, quality_reports):
+        report = min((r for r in quality_reports if r["task_id"]), key=lambda r: r["id"])
+        assert report["created_date"] < "2024"
+
+        with make_api_client(admin_user) as api_client:
+            (report_data, _) = api_client.quality_api.retrieve_report_data(report["id"])
+
+        # This report should have been created before the Jaccard index was included.
+        for d in [report_data["comparison_summary"], *report_data["frame_results"].values()]:
+            assert d["annotations"]["confusion_matrix"]["jaccard_index"] is None
diff --git a/tests/python/shared/assets/cvat_db/data.json b/tests/python/shared/assets/cvat_db/data.json