https://programmers.co.kr/learn/courses/30/lessons/17677
Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import re
from collections import Counter
def solution(str1, str2):
# 1. Lower
str1, str2 = str1.lower(), str2.lower()
# 2. Generate substring list
def generate_valid_substrings(str):
# Select only [a-z] substrings
sstr = [str[i:i+2] for i in range(len(str)-1)]
return [ss for ss in sstr if re.match("^[a-z]+$", ss)]
sstr1 = generate_valid_substrings(str1)
sstr2 = generate_valid_substrings(str2)
# 3. Convert to set
union = set(sstr1) | set(sstr2)
inters = set(sstr1) & set(sstr2)
# 3.1 Convert to multiset
cnt1, cnt2 = Counter(sstr1), Counter(sstr2)
union_multi = {k: max(cnt1[k], cnt2[k]) for k in union}
inters_multi = {k: min(cnt1[k], cnt2[k]) for k in inters}
# 4. Compute Jaccard distance
dist = sum(inters_multi.values()) / sum(union_multi.values()) if cnt1 or cnt2 else 1
return int(dist * 65536)
Complexity
$O(|\text{str1}|+|\text{str2}|)$
PREVIOUSEtc