14
14
"""
15
15
16
16
17
- def jaccard_similarity (set_a , set_b , alternative_union = False ):
17
+ def jaccard_similarity (
18
+ set_a : set [str ] | list [str ] | tuple [str ],
19
+ set_b : set [str ] | list [str ] | tuple [str ],
20
+ alternative_union = False ,
21
+ ):
18
22
"""
19
23
Finds the jaccard similarity between two sets.
20
24
Essentially, its intersection over union.
@@ -37,41 +41,52 @@ def jaccard_similarity(set_a, set_b, alternative_union=False):
37
41
>>> set_b = {'c', 'd', 'e', 'f', 'h', 'i'}
38
42
>>> jaccard_similarity(set_a, set_b)
39
43
0.375
40
-
41
44
>>> jaccard_similarity(set_a, set_a)
42
45
1.0
43
-
44
46
>>> jaccard_similarity(set_a, set_a, True)
45
47
0.5
46
-
47
48
>>> set_a = ['a', 'b', 'c', 'd', 'e']
48
49
>>> set_b = ('c', 'd', 'e', 'f', 'h', 'i')
49
50
>>> jaccard_similarity(set_a, set_b)
50
51
0.375
52
+ >>> set_a = ('c', 'd', 'e', 'f', 'h', 'i')
53
+ >>> set_b = ['a', 'b', 'c', 'd', 'e']
54
+ >>> jaccard_similarity(set_a, set_b)
55
+ 0.375
56
+ >>> set_a = ('c', 'd', 'e', 'f', 'h', 'i')
57
+ >>> set_b = ['a', 'b', 'c', 'd']
58
+ >>> jaccard_similarity(set_a, set_b, True)
59
+ 0.2
60
+ >>> set_a = {'a', 'b'}
61
+ >>> set_b = ['c', 'd']
62
+ >>> jaccard_similarity(set_a, set_b)
63
+ Traceback (most recent call last):
64
+ ...
65
+ ValueError: Set a and b must either both be sets or be either a list or a tuple.
51
66
"""
52
67
53
68
if isinstance (set_a , set ) and isinstance (set_b , set ):
54
- intersection = len (set_a .intersection (set_b ))
69
+ intersection_length = len (set_a .intersection (set_b ))
55
70
56
71
if alternative_union :
57
- union = len (set_a ) + len (set_b )
72
+ union_length = len (set_a ) + len (set_b )
58
73
else :
59
- union = len (set_a .union (set_b ))
74
+ union_length = len (set_a .union (set_b ))
60
75
61
- return intersection / union
76
+ return intersection_length / union_length
62
77
63
- if isinstance (set_a , (list , tuple )) and isinstance (set_b , (list , tuple )):
78
+ elif isinstance (set_a , (list , tuple )) and isinstance (set_b , (list , tuple )):
64
79
intersection = [element for element in set_a if element in set_b ]
65
80
66
81
if alternative_union :
67
- union = len (set_a ) + len (set_b )
68
- return len (intersection ) / union
82
+ return len (intersection ) / (len (set_a ) + len (set_b ))
69
83
else :
70
- union = set_a + [element for element in set_b if element not in set_a ]
84
+ # Cast set_a to list because tuples cannot be mutated
85
+ union = list (set_a ) + [element for element in set_b if element not in set_a ]
71
86
return len (intersection ) / len (union )
72
-
73
- return len ( intersection ) / len ( union )
74
- return None
87
+ raise ValueError (
88
+ "Set a and b must either both be sets or be either a list or a tuple."
89
+ )
75
90
76
91
77
92
if __name__ == "__main__" :
0 commit comments