-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcustom.py
105 lines (75 loc) · 4.09 KB
/
custom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from dwm import _CollectHistory_, _CollectHistoryAgg_
import re
## Define custom functions here
#################################
## TEMPLATE
def __TEMPLATEFCN__(data, histObj):
# data is a single record (dict)
# do transformations to data
# capture value changes
change = _CollectHistory_(lookupType='__TEMPLATEFCN__', fromVal='oldvalue', toVal='newvalue')
histObjUpd = _CollectHistoryAgg_(contactHist=histObj, fieldHistObj=change, fieldName='fieldThatWasChanged')
# return data and history
return data, histObj
#################################
## CleanZipcodeUS
## This function is for cleaning the zipPostalCode field where country == 'US'
# It gets the first string of numbers ('20194-2245' becomes '20194')
# then strips down to the first 5 digits ('123456788' becomes '12345')
# then adds any leading 0s, usually removed due to excel formatting ('223' becomes '00223')
def CleanZipcodeUS(data, histObj):
if 'zipPostalCode' not in data.keys():
return data, histObj
zipOld = data['zipPostalCode']
zipNew = zipOld
if data['country']=='US' and zipNew!='':
zipNew = re.split('[^0-9]', zipNew)[0]
zipNew = zipNew[:5]
zipNew = zipNew.zfill(5)
data['zipPostalCode'] = zipNew
change = _CollectHistory_(lookupType='UDF-CleanZipcodeUS', fromVal=zipOld, toVal=zipNew)
histObjUpd = _CollectHistoryAgg_(contactHist=histObj, fieldHistObj=change, fieldName='zipPostalCode')
return data, histObj
#################################
## CleanAnnualRevenue
# 1. Try converting string to int
# - if successful, group into one of the buckets
# 2. Try replacing any words with spaces , then see how many strings of numbers there are
# - if 2 number strings, round into the closest upper bucket
def CleanAnnualRevenue(data, histObj):
if 'annualRevenue' not in data.keys():
return data, histObj
annualRevenueOld = data['annualRevenue']
annualRevenueNew = annualRevenueOld
argood = ['Less than $1 mil', '$1 mil to less than $5 mil', '$5 mil to less than $10 mil', '$10 mil to less than $25 mil', '$25 mil to less than $50 mil',
'$50 mil to less than $100 mil', '$100 mil to less than $250 mil', '$250 mil to less than $500 mil', '$500 mil to less than $1 bil', '$1 bil and above']
if annualRevenueNew not in argood and annualRevenueNew!='':
try:
annualRevenueNew = int(annualRevenueNew.replace(',', '').replace('$', ''))
if annualRevenueNew < 1000000:
annualRevenueNew = 'Less than $1 mil'
elif annualRevenueNew >= 1000000 and annualRevenueNew < 5000000:
annualRevenueNew = '$1 mil to less than $5 mil'
elif annualRevenueNew >= 5000000 and annualRevenueNew < 10000000:
annualRevenueNew = '$5 mil to less than $10 mil'
elif annualRevenueNew >= 10000000 and annualRevenueNew < 25000000:
annualRevenueNew = '$10 mil to less than $25 mil'
elif annualRevenueNew >= 25000000 and annualRevenueNew < 50000000:
annualRevenueNew = '$25 mil to less than $50 mil'
elif annualRevenueNew >= 50000000 and annualRevenueNew < 100000000:
annualRevenueNew = '$50 mil to less than $100 mil'
elif annualRevenueNew >= 100000000 and annualRevenueNew < 250000000:
annualRevenueNew = '$100 mil to less than $250 mil'
elif annualRevenueNew >= 250000000 and annualRevenueNew < 500000000:
annualRevenueNew = '$250 mil to less than $500 mil'
elif annualRevenueNew >= 500000000 and annualRevenueNew < 1000000000:
annualRevenueNew = '$500 mil to less than $1 bil'
else:
annualRevenueNew = '$1 bil and above'
except:
pass
data['annualRevenue'] = annualRevenueNew
change = _CollectHistory_(lookupType='UDF-CleanAnnualRevenue', fromVal=annualRevenueOld, toVal=annualRevenueNew)
histObjUpd = _CollectHistoryAgg_(contactHist=histObj, fieldHistObj=change, fieldName='annualRevenue')
# return data and history
return data, histObj