-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ac7bdeb
commit bb184d0
Showing
5 changed files
with
112 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import numpy as np | ||
from scipy.spatial.distance import cosine | ||
from AtlasMl.atlasml.ml.VectorEmbeddings.ModelDimension import ModelDimension | ||
|
||
|
||
def compute_cosine_similarity(embedding_vector, model: ModelDimension, comparison_vector): | ||
""" | ||
Computes the cosine similarity between two embedding vectors. | ||
Parameters: | ||
embedding_vector (iterable): The input vector. | ||
model (ModelDimension): An enum member indicating the dimension of the embedding_vector. | ||
comparison_vector (iterable): The second embedding vector to compare with. | ||
Returns: | ||
float: The cosine similarity between the two vectors. | ||
""" | ||
# Convert inputs to numpy arrays (in case they aren't already) | ||
emb1 = np.array(embedding_vector) | ||
emb2 = np.array(comparison_vector) | ||
|
||
# Ensure both vectors have the same shape | ||
if emb1.shape != emb2.shape: | ||
raise ValueError("Both vectors must have the same dimensions.") | ||
# TODO: Add vector re-shaper | ||
|
||
# Calculate cosine similarity | ||
similarity = 1.0 - cosine(emb1, emb2) | ||
return similarity | ||
|
||
|
||
# Sanity Check | ||
example_vec1 = [1, 2, 3] | ||
example_vec2 = [4, 5, 6] | ||
computed_distance = compute_cosine_similarity(example_vec1, ModelDimension.text_embedding_three_small, example_vec2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import numpy as np | ||
from scipy.spatial.distance import euclidean | ||
from AtlasMl.atlasml.ml.VectorEmbeddings.ModelDimension import ModelDimension | ||
|
||
|
||
def compute_euclidean_distance(embedding_vector, model: ModelDimension, comparison_vector): | ||
""" | ||
Computes the Euclidean distance between two embedding vectors. | ||
Parameters: | ||
embedding_vector (iterable): The input vector. | ||
model (ModelDimension): An enum member indicating the dimension of the embedding_vector. | ||
comparison_vector (iterable): The second embedding vector to compare with. | ||
Returns: | ||
float: The Euclidean distance between the two vectors. | ||
""" | ||
# Convert inputs to numpy arrays (in case they aren't already) | ||
vec1 = np.array(embedding_vector) | ||
vec2 = np.array(comparison_vector) | ||
|
||
# Ensure both vectors have the same shape | ||
if vec1.shape != vec2.shape: | ||
raise ValueError("Both vectors must have the same dimensions.") | ||
# TODO: Add vector re-shaper | ||
|
||
# Calculate Euclidean distance | ||
distance = euclidean(vec1, vec2) | ||
return distance | ||
|
||
|
||
# Sanity Check | ||
example_vec1 = [1, 2, 3] | ||
example_vec2 = [4, 5, 6] | ||
computed_distance = compute_euclidean_distance(example_vec1, ModelDimension.text_embedding_three_small, example_vec2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import numpy as np | ||
from scipy.spatial.distance import jaccard | ||
from AtlasMl.atlasml.ml.VectorEmbeddings.ModelDimension import ModelDimension | ||
|
||
|
||
def compute_jaccard_similarity(embedding_vector, model: ModelDimension, comparison_vector): | ||
""" | ||
Computes the Jaccard similarity between two vectors. | ||
Parameters: | ||
embedding_vector (iterable): The input vector. | ||
model (ModelDimension): An enum member indicating the dimension of the embedding_vector. | ||
comparison_vector (iterable): The second embedding vector to compare with. | ||
Returns: | ||
float: The Jaccard similarity between the two vectors. | ||
""" | ||
# Convert inputs to numpy arrays | ||
vec1 = np.array(embedding_vector) | ||
vec2 = np.array(comparison_vector) | ||
|
||
# Ensure both vectors have the same shape | ||
if vec1.shape != vec2.shape: | ||
raise ValueError("Both vectors must have the same dimensions.") | ||
# TODO: Add vector re-shaper | ||
|
||
# Calculate Jaccard distance then convert to similarity | ||
distance = jaccard(vec1, vec2) | ||
similarity = 1.0 - distance | ||
return similarity | ||
|
||
|
||
# Sanity Check | ||
example_vec1 = [1, 2, 3] | ||
example_vec2 = [4, 5, 6] | ||
computed_distance = compute_jaccard_similarity(example_vec1, ModelDimension.text_embedding_three_small, example_vec2) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from enum import Enum | ||
|
||
class ModelDimension(Enum): | ||
text_embedding_three_small = 1536 | ||
text_embedding_three_large = 3072 | ||
# Add more models that are to be used and their vector dimensions |