This repository has been archived by the owner on Aug 27, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathTransaction.py
133 lines (109 loc) · 4.04 KB
/
Transaction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import exceptions
import csv
import json
import re
import logging
logger = logging.getLogger('PricesPaidTrans')
hdlr = logging.FileHandler('../logs/PricesPaidTrans.log')
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
hdlr.setFormatter(formatter)
logger.addHandler(hdlr)
logger.setLevel(logging.ERROR)
# These are fields that are more or less "standard"
UNITS = "unitsOrdered"
PRICE = "unitPrice"
AGENCY = "contractingAgency"
VENDOR = "vendor"
PSC = "psc"
DESCR = "productDescription"
LONGDESCR = "longDescription"
DATE = "orderDate"
AWARDIDIDV = "awardIdIdv"
DATASOURCE = "dataSource"
STANDARD_FIELDS = [UNITS,PRICE,AGENCY,VENDOR,PSC,DESCR,LONGDESCR,DATE,AWARDIDIDV]
# Need to create Semi-standard fields here...
MANUFACTURER_NAME = "Manufacturer Name"
MANUFACTURER_PART_NUMBER = "Manufacturer Part Number"
BUREAU = "Bureau"
CONTRACT_NUMBER = "Contract Number"
TO_ZIP_CODE = "To Zip Code"
FROM_ZIP_CODE = "From Zip Code"
UNIT_OF_ISSUE = "Unit of Issue"
SEMI_STANDARD_FIELDS = [MANUFACTURER_NAME,MANUFACTURER_PART_NUMBER,BUREAU,CONTRACT_NUMBER,TO_ZIP_CODE,FROM_ZIP_CODE,UNIT_OF_ISSUE]
def ensureZipCodeHasFiveDigits(zip):
return zip.zfill(5)
def parseFormatVersion(filename):
match = re.search(r"(\w+)-pppifver-(\w+)-(\d+)-(\d+)-(\d+)-(\d+)-(\d+).csv",filename)
if not match:
return None
else:
return match.group(3)
def dumpOrReturnWarning(str):
try:
return json.dumps(str)
except UnicodeDecodeError:
return json.dumps("Bad Data, this string had non Unicode characters.")
# Having to call this function is a significant performance hit.
# It would be better to find the offensive data and remove it or
# fix it in our data files. I may try to do that eventually.
def replaceUndumpableData(str):
try:
json.dumps(str)
return str
except UnicodeDecodeError:
return "Bad Data, this string had non Unicode characters."
class RawTransaction:
"Represents an Individual Transaction as read from a file"
def __init__(self,name):
self.data = None
class BasicTransaction:
"A Dictionary of Partial scructured Data"
def __init__(self,adapter,raw,datasource):
self.fields = None
self.datasource = datasource
self.dictionaryAdapter = adapter
self.dict = self.getStandardDictionary(raw)
def getStandardDictionary(self,rawTransaction):
xdict = self.dictionaryAdapter(rawTransaction,self.datasource)
xdict = self.cleanUpData(xdict)
return xdict
def cleanUpData(self,qdict):
qdict[PRICE] = qdict[PRICE].replace("$","")
# add a whitespace trim
for key, value in qdict.iteritems():
qdict[key] = value.strip();
return qdict
def getJSON(self):
return json.dumps(self.dict)
def getSearchMemento(self):
return self.getJSON()
# First check: we have to have a UnitPrice which is a number!
def isValidTransaction(self):
try:
dummx = float(self.dict[PRICE])
dummy = int(self.dict[UNITS])
return True;
except ValueError:
return False;
class TransactionDirector:
"Operations on the Universe of Transactions"
def __init__(self):
self.transactions = [];
def addTransaction(self,name):
self.transactions.append(name)
def findAllMatching(self,pattern,psc_pattern):
matches = []
print "patterns"
print pattern
print psc_pattern
for tr in self.transactions:
matchesGeneral = False
if ((pattern is None) or re.search(pattern, tr.getSearchMemento()) is not None):
matchesGeneral = True
matchesPSC = False
if ((psc_pattern is None) or re.search(psc_pattern, tr.dict[PSC]) is not None):
matchesPSC = True
logger.info("matchs PSC, matchesGeneral"+str(matchesPSC)+"."+str(matchesGeneral)+"|"+tr.dict[PSC])
if (matchesPSC and matchesGeneral):
matches.append(tr)
return matches