-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_names.py
45 lines (40 loc) · 1.24 KB
/
add_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import sys
import os
from time import time
import pandas as pd
import csv
sys._enablelegacywindowsfsencoding()
current_path = os.getcwd()
data_path = '{}\\{}'.format(current_path, 'data')
print ('Path to data folder:', data_path)
# Get number of clusters
n = len(os.listdir(data_path))
print ('Number of clusters:', n)
for i in range(n):
# print (i)
df_2d = pd.read_csv('{}\\2d\\{}.csv'.format(current_path, i), sep=',')
# print (df_2d.head())
# print (df_names.head())
# csv_names=csv.reader('{}\\{}\\names.csv'.format(data_path, i))
# for line in csv_names:
# print (line)
# print (csv_names)
print (df_2d.shape)
lst = []
with open('{}\\{}\\names.csv'.format(data_path, i), "r") as f:
reader = csv.reader(f, delimiter=",")
for line in reader:
d = dict()
item_id = line[0]
item_name = ', '.join([x.strip() for x in line[1:]])
# d['item_id'] = item_id
d['item_name'] = item_name
lst.append(d)
df_names = pd.DataFrame(lst)
# print (df_names.head())
print (df_names.shape)
df_new = pd.concat([df_2d, df_names], axis=1)
dfNew = pd.merge(df_2d, df_names, left_index=True, right_index=True, how='outer')
print (df_new.head())
print (df_new.shape)
df_new.to_csv('{}\\2d\\{}.csv'.format(current_path, i), index=None, header=True)