-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathpreproc.py
58 lines (49 loc) · 993 Bytes
/
preproc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import numpy as np
import re
cols = ["Date", "Open", "High", "Low", "Close", "Volume", "Name"]
name = [
"MMM",
"AXP",
"AAPL",
"BA",
"CAT",
"CVX",
"CSCO",
"KO",
"DIS",
"XOM",
"GE",
"GS",
"HD",
"IBM",
"INTC",
"JNJ",
"JPM",
"MCD",
"MRK",
"MSFT",
"NKE",
"PFE",
"PG",
"TRV",
"UTX",
"UNH",
"VZ",
"WMT",
"GOOGL",
"AMZN",
"AABA"
]
# TODO Create seperate folder for Volume correlation network analysis
# TODO Repeat workflow and compute graph similarity between networks.
def preproc():
# read the csv file in a dataframe
df = pd.read_csv("all_stocks_2006-01-01_to_2018-01-01.csv")
# convert 'Date' column to a datetime object
df["Date"] = pd.to_datetime(df["Date"])
# preprocess data for time series analysis
df = df.pivot(index="Date", columns="Name", values="Close")
return df
if __name__ == "__main__":
preproc()