forked from xiepaup/dbatools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstringSimilarity.go
More file actions
121 lines (95 loc) · 2.88 KB
/
stringSimilarity.go
File metadata and controls
121 lines (95 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
package tools
/*
* use similar text as text compare engine
* add scores with every level words
* Created by vitoxie
* Date 2018-12-30
* Email xiepaup@163.com
*/
type SmartSimilarityText struct {
}
func NewSmartSimilarityText() *SmartSimilarityText {
return &SmartSimilarityText{}
}
/*
*use SmartIdentifyText can have higher performance and Accruance !
*ef :
* a := "tdw|112729|20181219|djfsd982lsd289jdksfj0flksadfjsdf2lkfsadh9fasddfyf"
* b := "tdw|112729|20190323|9dkfk892o3kd9sdfa9dfakdfj92fklsdahf;as"
* SimilarText ---> output is 40
* but SmartIdentifyText ---> output is 74
*/
// return a int value in [0, 100], which stands for match level
func (this *SmartSimilarityText) SimilarText(str1, str2 string) int {
txt1, txt2 := []rune(str1), []rune(str2)
if len(txt1) == 0 || len(txt2) == 0 {
return 0
}
return this.similarChar(txt1, txt2) * 200 / (len(txt1) + len(txt2))
}
// return a int value in [0, 100], which stands for match level
func (this *SmartSimilarityText) SmartIdentifyText(s1, s2 string) int {
var sim int
words1 := this.SmartDevideText(s1)
words2 := this.SmartDevideText(s2)
if (len(words1) == 0 && len(words2) == 0) || len(words1) !=
len(words2) {
//fmt.Println("two string with different segs ")
// or downgrade to compare original engine ... ; for faster we just ignored this
return 0
}
weightScores := this.smartWeights(len(words1))
for i := 0; i < len(words1); i++ {
sameval := this.SimilarText(words1[i], words2[i])
//fmt.Printf("words : %s, %s similary : %d\n", words1[i], words2[i], sameval)
sim += sameval * weightScores[i]
}
return sim / 100
}
func (this *SmartSimilarityText) smartWeights(l int) []int {
if l <= 0 {
return nil
}
avgscore := 100 / l
middle := l / 2
var scores []int
for i := 0; i < l; i++ {
scores = append(scores, avgscore+middle*(middle-i))
}
return scores
}
// return the len of longest string both in str1 and str2 and the positions in str1 and str2
func (this *SmartSimilarityText) similarStr(str1 []rune, str2 []rune) (int, int, int) {
var maxLen, tmp, pos1, pos2 = 0, 0, 0, 0
len1, len2 := len(str1), len(str2)
for p := 0; p < len1; p++ {
for q := 0; q < len2; q++ {
tmp = 0
for p+tmp < len1 && q+tmp < len2 && str1[p+tmp] == str2[q+tmp] {
tmp++
}
if tmp > maxLen {
maxLen, pos1, pos2 = tmp, p, q
}
}
}
return maxLen, pos1, pos2
}
// return the total length of longest string both in str1 and str2
func (this *SmartSimilarityText) similarChar(str1 []rune, str2 []rune) int {
maxLen, pos1, pos2 := this.similarStr(str1, str2)
total := maxLen
if maxLen != 0 {
if pos1 > 0 && pos2 > 0 {
total += this.similarChar(str1[:pos1], str2[:pos2])
}
if pos1+maxLen < len(str1) && pos2+maxLen < len(str2) {
total += this.similarChar(str1[pos1+maxLen:], str2[pos2+maxLen:])
}
}
return total
}
func SmartDevideText(s string) []string {
// TODO
return strings.Split(s, "|")
}