5
5
from .string_similarity import NormalizedStringSimilarity
6
6
7
7
8
- class Cosine (ShingleBased , NormalizedStringDistance , NormalizedStringSimilarity ):
8
+ class Cosine (ShingleBased , NormalizedStringDistance ,
9
+ NormalizedStringSimilarity ):
9
10
10
11
def __init__ (self , k ):
11
12
super ().__init__ (k )
@@ -24,10 +25,12 @@ def similarity(self, s0, s1):
24
25
return 0.0
25
26
profile0 = self .get_profile (s0 )
26
27
profile1 = self .get_profile (s1 )
27
- return self ._dot_product (profile0 , profile1 ) / (self ._norm (profile0 ) * self ._norm (profile1 ))
28
+ return self ._dot_product (profile0 , profile1 ) / (
29
+ self ._norm (profile0 ) * self ._norm (profile1 ))
28
30
29
31
def similarity_profiles (self , profile0 , profile1 ):
30
- return self ._dot_product (profile0 , profile1 ) / (self ._norm (profile0 ) * self ._norm (profile1 ))
32
+ return self ._dot_product (profile0 , profile1 ) / (
33
+ self ._norm (profile0 ) * self ._norm (profile1 ))
31
34
32
35
@staticmethod
33
36
def _dot_product (profile0 , profile1 ):
@@ -50,13 +53,3 @@ def _norm(profile):
50
53
for k , v in profile .items ():
51
54
agg += 1.0 * v * v
52
55
return math .sqrt (agg )
53
-
54
-
55
- if __name__ == "__main__" :
56
- cosine = Cosine (1 )
57
- str0 = "上海市宝山区 你好"
58
- str1 = "上海浦东新区 你好吗"
59
- d = cosine .distance (str0 , str1 )
60
- s = cosine .similarity (str0 , str1 )
61
- print (d )
62
- print (s )
0 commit comments