ServiceNow
diff --git a/‎CHANGELOG.md
+1-1 b/‎CHANGELOG.md
+1-1
diff --git a/‎azimuth/app.py
+1-1 b/‎azimuth/app.py
+1-1
diff --git a/‎azimuth/routers/v1/class_overlap.py
+7-7 b/‎azimuth/routers/v1/class_overlap.py
+7-7
diff --git a/‎azimuth/types/class_overlap.py
+3-3 b/‎azimuth/types/class_overlap.py
+3-3
diff --git a/‎docs/docs/_static/images/key-concepts/class-analysis-table.png ‎docs/docs/_static/images/key-concepts/class-overlap-table.png b/‎docs/docs/_static/images/key-concepts/class-analysis-table.png ‎docs/docs/_static/images/key-concepts/class-overlap-table.png
diff --git a/‎docs/docs/key-concepts/class-similarity.md
-73 b/‎docs/docs/key-concepts/class-similarity.md
-73
diff --git a/‎docs/docs/key-concepts/similarity.md
+40-2 b/‎docs/docs/key-concepts/similarity.md
+40-2
diff --git a/‎docs/docs/user-guide/class-analysis.md
-61 b/‎docs/docs/user-guide/class-analysis.md
-61
diff --git a/‎docs/docs/user-guide/class-overlap.md
+80 b/‎docs/docs/user-guide/class-overlap.md
+80
@@ -14,7 +14,7 @@ Released changes are shown in the
 - Azimuth now works on French datasets (and pipelines)! Language can be selected in the config,
   and language-specific defaults for syntax-tagging and behavioral tests (neutral tokens) will be
   set dynamically (or can be altered manually).
-- New class analysis section, with class overlap detection. More details are available in the User Guide and in the Key Concepts.
+- New class overlap section, with class overlap detection. More details are available in the User Guide and in the Key Concepts.
 - Pipeline pre/post-processing steps breakdown in the utterance detail page.
 
 ### Changed
 
@@ -164,7 +164,7 @@ def create_app() -> FastAPI:
     api_router.include_router(admin_router, prefix="/admin")
     api_router.include_router(
         class_overlap_router,
-        prefix="/class_analysis",
+        prefix="/class_overlap",
         dependencies=[Depends(require_application_ready)],
     )
     api_router.include_router(tags_router, prefix="/tags", dependencies=[])
 
@@ -14,10 +14,10 @@
 from azimuth.task_manager import TaskManager
 from azimuth.types import DatasetSplitName, ModuleOptions, SupportedModule
 from azimuth.types.class_overlap import (
-    ClassAnalysisClassPair,
-    ClassAnalysisResponse,
     ClassOverlapPlotResponse,
     ClassOverlapResponse,
+    ClassOverlapTableClassPair,
+    ClassOverlapTableResponse,
 )
 from azimuth.types.model_performance import ConfusionMatrixResponse
 from azimuth.utils.project import similarity_available
@@ -90,15 +90,15 @@ def get_class_overlap_plot(
     summary="Get class overlap table.",
     description="Get data for class overlap, confusion, and related utterance counts.",
     tags=TAGS,
-    response_model=ClassAnalysisResponse,
+    response_model=ClassOverlapTableResponse,
 )
-def get_class_analysis(
+def get_class_overlap(
     task_manager: TaskManager = Depends(get_task_manager),
     dataset_split_managers: Dict[DatasetSplitName, DatasetSplitManager] = Depends(
         get_all_dataset_split_managers
     ),
     pipeline_index: Optional[int] = Depends(query_pipeline_index),
-) -> ClassAnalysisResponse:
+) -> ClassOverlapTableResponse:
     dm = dataset_split_managers[DatasetSplitName.train]
     class_counts_train = dm.class_distribution()
     class_counts_eval = dataset_split_managers[DatasetSplitName.eval].class_distribution()
@@ -133,7 +133,7 @@ def get_class_analysis(
     )
 
     class_pairs_list = [
-        ClassAnalysisClassPair(
+        ClassOverlapTableClassPair(
             source_class=class_names[i],
             target_class=class_names[j],
             overlap_score_train=class_overlap_result.s_matrix[i, j],
@@ -151,6 +151,6 @@ def get_class_analysis(
         )
     ]
 
-    api_result = ClassAnalysisResponse(class_pairs=class_pairs_list)
+    api_result = ClassOverlapTableResponse(class_pairs=class_pairs_list)
 
     return api_result
@@ -30,7 +30,7 @@ class ClassOverlapPlotResponse(AliasModel):
     default_overlap_threshold: float
 
 
-class ClassAnalysisClassPair(AliasModel):
+class ClassOverlapTableClassPair(AliasModel):
     source_class: str = Field(..., title="Source class")
     target_class: str = Field(..., title="Target class")
     overlap_score_train: float = Field(..., title="Overlap score on train")
@@ -46,5 +46,5 @@ class ClassAnalysisClassPair(AliasModel):
     )
 
 
-class ClassAnalysisResponse(AliasModel):
-    class_pairs: List[ClassAnalysisClassPair] = Field(..., title="Class pair overlap data")
+class ClassOverlapTableResponse(AliasModel):
+    class_pairs: List[ClassOverlapTableClassPair] = Field(..., title="Class pair overlap data")
@@ -7,15 +7,21 @@ analysis can be quite powerful given that **no trained ML model is needed**; onl
 be supplied.
 
 Within Azimuth, different similarity analyses are provided to determine how similar utterances are
-within a class, across classes, and so on. This can help indicate whether classes are well-defined,
+within a class, between classes, and so on. This can help indicate whether classes are well-defined,
 or whether changes should be made to improve the dataset, such as by redefining classes, relabeling
 or omitting data, or augmenting the dataset.
 
 ## Where is this used in Azimuth?
 
 In Azimuth, the similarity analysis is used to derive [:material-link: Smart Tags](smart-tags.md),
 and also to show the most similar utterances in both dataset splits on
-the [:material-link: Utterances Details](../user-guide/exploration-space/utterance-details.md).
+the [:material-link: Utterances Details](../user-guide/exploration-space/utterance-details.md)
+(see below).
+
+Similarity is also used for class overlap, which assesses the semantic overlap between pairs of
+classes. Class overlap is presented in the [Class Overlap Dashboard
+Section](../user-guide/index.md#class-overlap) as well as the
+[:material-link: Class Overlap](../user-guide/class-overlap.md) page.
 
 <figure markdown>
   ![Image title](../_static/images/exploration-space/utterance-details-similarity.png)
@@ -62,6 +68,35 @@ and/or `conflicting_neighbors_eval`, based on which dataset split is being exami
 utterance in the test set will be compared to its neighbors in both the training and evaluation
 dataset splits.)
 
+### Class Overlap
+
+#### Class Overlap Value
+
+Class overlap is calculated using utterance embeddings, which are computed as described above.
+
+Class overlap for class *C<sub>i</sub>* (source class) with class *C<sub>j</sub>* (target class) is
+defined as the area of the feature (embedding) space in which an utterance in class
+*C<sub>i</sub>* has a greater probability of being in class *C<sub>j</sub>* than in class
+*C<sub>i</sub>*.
+
+To approximate this probability, we make use of the
+[`spectral-metric`](https://github.com/Dref360/spectral-metric) package
+([Branchaud-Charron, 2019](https://arxiv.org/abs/1905.07299)[^2]). The probability of a sample
+being in a specified class is determined based on the representation of this class in the
+sample's 5 nearest neighbors, as well as the hypervolume containing these neighbors (Parzen
+window). Class overlap for the *C<sub>i</sub>* with the *C<sub>j</sub>* is calculated as the mean
+probability across all samples in *C<sub>i</sub>*. The similarity matrix *S* from `spectral-metric`
+contains these probabilities for all class pairs. Note that probabilities are normalized by the
+source class, to sum to 1.
+
+#### Samples with overlap
+
+Individual samples from a source class are determined to have overlap with a target class when
+their probability of being in the target class is greater than 0, which is the same as saying
+that at least one of their 5 nearest neighbors are from the target class. This is a
+conservative metric, on which we anticipate iterating in the future.
+
+
 ### Configuration
 
 [:material-link: Similarity Analysis Configuration](../reference/configuration/analyses/similarity.md)
@@ -71,5 +106,8 @@ the smart tags.
 
 [^1]: Reimers, Nils, and Iryna Gurevych. "Sentence-bert: Sentence embeddings using siamese
 bert-networks." arXiv preprint arXiv:1908.10084 (2019).
+[^2]: Branchaud-Charron, Frederic, Andrew Achkar, and Pierre-Marc Jodoin. "Spectral metric for
+dataset complexity assessment." Proceedings of the IEEE/CVF Conference on Computer Vision and
+Pattern Recognition. 2019.
 
 --8<-- "includes/abbreviations.md"
@@ -0,0 +1,80 @@
+# Class Overlap
+
+Class overlap assesses the semantic overlap between pairs of classes. In some cases, high
+overlap may be associated with poor class definitions, mislabelling, and/or model confusion.
+
+Class overlap is determined with a dataset alone, based on the locations of utterances in
+embedding space, as described in
+[:material-link: Similarity Analysis](../key-concepts/similarity.md).
+
+## Class Overlap Plot
+
+The Class Overlap plot shows the extent to which source classes semantically overlap target
+classes, all in the training data. The source class is the class label, and the target class is
+the class that the source class may look like, based on its nearest neighbors. As such, flows
+between class nodes indicate whether samples in a source class are in neighborhoods typified
+by other classes (class overlap) or its own class (self overlap). For each source class, class
+overlap and self-overlap values sum to 1, unless values are scaled by class size.
+
+Overlap is displayed as flows from source class (nodes on the left) to target classes (right).
+Nodes are ordered with flows for greatest overlap values towards the top, so as to highlight these
+class pairs. Wider flows indicate greater overlap values. Colors group flows from the same
+source class. The plot is interactive, in that nodes can be moved and reordered via dragging.
+
+### Plot options
+
+* **Minimum displayed overlap value**: This value determines which overlap flows will be displayed
+  on the plot. Vary this value to focus on class pairs with greatest overlap, or to see all
+  overlap to better understand the complexity of the dataset. The default value is set to the
+  tenth-highest class overlap value for ease of visualization alone, and will differ across
+  different datasets.
+* **Self-overlap**: This toggle determines whether to show flows for overlap of a class with
+  itself, to get a sense of the relative magnitude (and possibly importance) of class overlap.
+* **Scale by class size**: Overlap values are normalized by source class, such that the sum of
+  all class overlap and self-overlap values for a source class is 1. This toggle multiples overlap
+  values by class sample sizes, changing node size and flow width accordingly.
+
+### Suggested workflow
+
+The plot options described above allow for exploration of different aspects of class overlap. To
+navigate them, we suggest the following workflow:
+
+#### 1. Default view: `Self-overlap` off, `Scale by class size` on
+
+- Start here. This view shows you the class pairs with the greatest (scaled) semantic overlap
+  scores in the dataset. Vary the `Minimum displayed overlap value` to see all dataset overlap or
+  to focus on the class pairs with the greatest overlap scores.
+- Because `Scale by class size` is on, this view will emphasize overlapping classes with greater
+  sample counts. This is useful if you are less concerned about class overlap from
+  source classes with few samples in the training data. However, if you want to further investigate
+  classes with high overlap values but fewer samples, either for better understanding your dataset
+  or because some classes might have high business value, then you can toggle `Scale by class size`
+  to off, as explained in step 2.
+
+#### 2. Toggle `Scale by class size` off:
+
+- When `Scale by class size` is turned off, total flows (class overlap and self-overlap) sum to 1.
+  This view emphasizes class pairs with the greatest class overlap scores, regardless of
+  whether the source class has many samples in it.
+- This is useful to further understand class overlap for classes that have relatively fewer
+  samples in them, which might not have been as visible during the analysis at step 1.
+
+#### 3. Toggle `Self-overlap` on:
+
+- For any given class, turning on `Self-overlap` lets you compare the extent to which its samples
+  semantically overlap other classes (class overlap) vs. samples of its own class (self-overlap).
+  For example, if self-overlap is much higher than class overlap, class overlap may be less
+  problematic for this class, and vice versa.
+
+!!! tip
+
+    :material-restart: Click the reset button next to the overlap threshold value to reset to
+    the default threshold.
+
+<figure markdown>
+![](../_static/images/key-concepts/class-overlap-plot.png)
+<figcaption>
+Class Overlap plot on the Class Overlap page, accessed via the Dashboard.
+</figcaption>
+</figure>
+
Original file line number	Diff line number	Diff line change
`@@ -164,7 +164,7 @@ def create_app() -> FastAPI:`
`164`	`164`	`api_router.include_router(admin_router, prefix="/admin")`
`165`	`165`	`api_router.include_router(`
`166`	`166`	`class_overlap_router,`
`167`		`- prefix="/class_analysis",`
	`167`	`+ prefix="/class_overlap",`
`168`	`168`	`dependencies=[Depends(require_application_ready)],`
`169`	`169`	`)`
`170`	`170`	`api_router.include_router(tags_router, prefix="/tags", dependencies=[])`