-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcross_reference.py
98 lines (87 loc) · 3.36 KB
/
cross_reference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import csv
from datetime import datetime
def load_node_categories(file_path):
categorized_nodes = {}
try:
with open(file_path, 'r') as file:
reader = csv.DictReader(file)
for row in reader:
categorized_nodes[row['Node ID']] = {
'category': row['Category'],
'tx_id': row['Transaction ID'],
'timestamp': row['Timestamp']
}
except FileNotFoundError:
print(f"File not found: {file_path}")
return categorized_nodes
def load_stolen_addresses(file_path):
stolen_addresses = set()
try:
with open(file_path, 'r') as file:
reader = csv.reader(file)
next(reader, None) # Skip header if exists
for row in reader:
stolen_addresses.add(row[0]) # Assuming address is first column
except FileNotFoundError:
print(f"File not found: {file_path}")
return stolen_addresses
def cross_reference_addresses():
# Load both datasets
categorized_nodes = load_node_categories('node_categories.csv')
stolen_addresses = load_stolen_addresses('csv_drained_node_addresses.csv')
# Cross reference
matches = []
unmatched_categorized = []
unmatched_stolen = set(stolen_addresses)
for node_id, details in categorized_nodes.items():
if node_id in stolen_addresses:
matches.append({
'address': node_id,
'category': details['category'],
'tx_id': details['tx_id'],
'timestamp': details['timestamp']
})
unmatched_stolen.remove(node_id)
else:
unmatched_categorized.append(node_id)
# Write results to CSV
with open('cross_reference_results.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Category', 'Address', 'Original Category', 'Transaction ID', 'Timestamp'])
# Write matches
for match in matches:
writer.writerow([
'Match (Found in both lists)',
match['address'],
match['category'],
match['tx_id'],
match['timestamp']
])
# Write unmatched categorized nodes
for address in unmatched_categorized:
writer.writerow([
'Categorized Only',
address,
categorized_nodes[address]['category'],
categorized_nodes[address]['tx_id'],
categorized_nodes[address]['timestamp']
])
# Write unmatched stolen addresses
for address in unmatched_stolen:
writer.writerow([
'Stolen Only',
address,
'', # No category
'', # No transaction ID
'' # No timestamp
])
# Print summary
print(f"\nCross-Reference Results:")
print(f"Total Categorized Nodes: {len(categorized_nodes)}")
print(f"Total Stolen Addresses: {len(stolen_addresses)}")
print(f"Matches Found: {len(matches)}")
print(f"Unmatched Categorized Nodes: {len(unmatched_categorized)}")
print(f"Unmatched Stolen Addresses: {len(unmatched_stolen)}")
print("\nDetailed results saved to 'cross_reference_results.csv'")
if __name__ == "__main__":
cross_reference_addresses()