-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathChapter6_Splink_Settings.json
78 lines (78 loc) · 3.42 KB
/
Chapter6_Splink_Settings.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
"link_type": "link_only",
"blocking_rules_to_generate_predictions": [
"l.Postcode = r.Postcode",
"l.CompanyName = r.CompanyName"
],
"comparisons": [
{
"output_column_name": "CompanyName",
"comparison_levels": [
{
"sql_condition": "\"CompanyName_l\" IS NULL OR \"CompanyName_r\" IS NULL",
"label_for_charts": "Null",
"is_null_level": true
},
{
"sql_condition": "\"CompanyName_l\" = \"CompanyName_r\"",
"label_for_charts": "Exact match",
"m_probability": 0.41242595303592494,
"u_probability": 2.0716586735169513e-07
},
{
"sql_condition": "jaro_winkler_similarity(\"CompanyName_l\", \"CompanyName_r\") >= 0.9",
"label_for_charts": "Jaro_winkler_similarity >= 0.9",
"m_probability": 0.2215830625864,
"u_probability": 5.1584300970572084e-06
},
{
"sql_condition": "jaro_winkler_similarity(\"CompanyName_l\", \"CompanyName_r\") >= 0.8",
"label_for_charts": "Jaro_winkler_similarity >= 0.8",
"m_probability": 0.36481524661439907,
"u_probability": 0.0002904258294403414
},
{
"sql_condition": "ELSE",
"label_for_charts": "All other comparisons",
"m_probability": 0.0011757377632759672,
"u_probability": 0.9997042085745953
}
],
"comparison_description": "Exact match vs. Companyname within jaro_winkler_similarity thresholds 0.9, 0.8 vs. anything else"
},
{
"output_column_name": "Stopwords",
"comparison_levels": [
{
"sql_condition": "\"Stopwords_l\" IS NULL OR \"Stopwords_r\" IS NULL",
"label_for_charts": "Null",
"is_null_level": true
},
{
"sql_condition": "\"Stopwords_l\" = \"Stopwords_r\"",
"label_for_charts": "Exact match",
"m_probability": 0.18447271147761657,
"u_probability": 0.2523245907588882
},
{
"sql_condition": "jaro_winkler_similarity(\"Stopwords_l\", \"Stopwords_r\") >= 0.9",
"label_for_charts": "Jaro_winkler_similarity >= 0.9",
"m_probability": 0.17056327661754525,
"u_probability": 0.01687521516420069
},
{
"sql_condition": "ELSE",
"label_for_charts": "All other comparisons",
"m_probability": 0.6449640119048382,
"u_probability": 0.7308001940769111
}
],
"comparison_description": "Exact match vs. Stopwords within jaro_winkler_similarity threshold 0.9 vs. anything else"
}
],
"retain_intermediate_calculation_columns": true,
"retain_matching_columns": true,
"sql_dialect": "duckdb",
"linker_uid": "35wsnscw",
"probability_two_random_records_match": 0.0001
}