-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_cleaning.py
22 lines (18 loc) · 881 Bytes
/
data_cleaning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
"""
Filename: data_cleaning.py
Program Description: Script to clean issues in UCI dataset for better processing in experiments, generate csv files for data
Author: Keelin Sekerka-Bajbus, B00739421
References:
[1] https://stackoverflow.com/questions/21147058/pandas-to-csv-output-quoting-issue
"""
import pandas as pd
import csv
# read in allbp.data and allbp.test
data = pd.read_csv('original_data/allbp.data',
sep='|', names=['', 'no'], encoding='utf-8')
data = data.drop('no',axis=1)
data.to_csv('allbp_data.csv', header=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar=" ") #index=False)
test = pd.read_csv('original_data/allbp.test',
sep='|', names=['', 'no'], encoding='utf-8')
test = test.drop('no',axis=1)
test.to_csv('allbp_test.csv', header=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar=" ") #index=False)