-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathPredict_NextDay_Open_STOCK_PRICE.py
151 lines (116 loc) · 4.04 KB
/
Predict_NextDay_Open_STOCK_PRICE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 17 20:30:01 2017
@author: PUNEETMATHUR
I am creating this script to predict next day Opening Price based on
Today's Closing Price for any given stock
"""
import numpy as np
import pandas as pd
import os
#Change your directory to wherever your dataset is stored
os.chdir("E:\\BUSINESS\\APRESS\\ApplicationsOfMachineLearning\\Chapter16\\")
#Loading the dataset of the company for which prediction is required
df=pd.read_csv("BalmerLawrieColtd.csv",parse_dates=['Date'])
print(df.head(1))
print(df.columns)
df.shape
#Selecting only relevant columns required for prediction
cols=['Date','Open','Close']
df=df[cols]
print(df.columns)
print(df.head(5))
# Checking data if Cleaning up data is required
df.isnull().any()
#df=df.dropna()
#df=df.replace("NA",0)
df.dtypes
#Sorting up data to plot historically ascending values in graph
df = df.sort_values(by='Date',ascending=True)
#Plotting the price of stock over the years
#What story does it tell?
import matplotlib.pyplot as plt
plt.plot(df['Date'],df['Close'])
#Now plot only for last one year and last 1 month
df['Date'].dt.year==2017
mask=(df['Date'] > '2017-1-1') & (df['Date'] <= '2017-12-31')
print(df.loc[mask])
df2017=df.loc[mask]
print(df2017.head(5))
import matplotlib.pyplot as plt
plt.plot(df2017['Date'],df2017['Close'])
#Plotting last 1 month data on stock
mask=(df['Date'] > '2017-11-17') & (df['Date'] <= '2017-12-26')
print(df.loc[mask])
dfnovdec2017=df.loc[mask]
print(dfnovdec2017.head(5))
import matplotlib.pyplot as plt
plt.xticks( rotation='vertical')
plt.plot(dfnovdec2017['Date'],dfnovdec2017['Close'])
#Now calculating the Simple Moving Average of the Stock
#Simple Moving Average One Year
df2017['SMA'] = df2017['Close'].rolling(window=20).mean()
df2017.head(25)
df2017[['SMA','Close']].plot()
#Does the Open and Closing price of the stock follow very well?
df2017[['Open','Close']].plot()
#Checking the Correlation
df2017.corr()
#Simple Moving Average One Month
dfnovdec2017['SMA'] = dfnovdec2017['Close'].rolling(window=2).mean()
dfnovdec2017.head(25)
dfnovdec2017[['SMA','Close']].plot()
#Now creating NextDayOpen column for prediction
ln=len(df)
lnop=len(df['Open'])
print(lnop)
ii=0
df['NextDayOpen']=df['Open']
df['NextDayOpen']=0
for i in range(0,ln-1):
print("Open Price: ",df['Open'][i])
if i!=0:
ii=i-1
df['NextDayOpen'][ii]=df['Open'][i]
print(df['NextDayOpen'][ii])
print(df['NextDayOpen'].head())
#Checking on the new data
print(df[['Open','NextDayOpen', 'Close']].head(5))
#Now checking if there is any correlation
dfnew=df[['Close','NextDayOpen']]
print(dfnew.head(5))
dfnew.corr()
#Now Creating the Prediction model as correlation is very high
#Importing the libraries
from sklearn import cross_validation
from sklearn.utils import shuffle
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
#Creating the features and target dataframes
price=dfnew['Close']
print(price)
print(dfnew.columns)
features=dfnew[['NextDayOpen']]
#Shuffling the data
price=shuffle(price, random_state=0)
features=shuffle(features,random_state=0)
#Dividing data into Train and Test
X_train, X_test, y_train, y_test= cross_validation.train_test_split(features,price,test_size=0.2, random_state=0)
#Linear Regression on Sensex data
reg= linear_model.LinearRegression()
X_train.shape
reg.fit(X_train, y_train)
y_pred= reg.predict(X_test)
print("Coefficients: ", reg.coef_)
#Mean squared error
print("mean squared error: ",mean_squared_error(y_test,y_pred))
#Variance score
print("Variance score: ", r2_score(y_test, y_pred))
#STANDARD DEVIATION
standarddev=price.std()
#Predict based on Opening BSE Sensex Index and Opening Volume
#In the predict function below enter the first parameter Open forNSE and 2nd Volume in Crores
sensexClosePredict=reg.predict([[269.05]])
#175 is the standard deviation of the Diff between Open and Close of sensex so this range
print("Stock Likely to Open at: ",sensexClosePredict , "(+-STD)")
print("Stock Open between: ",sensexClosePredict+standarddev , " & " , sensexClosePredict-standarddev)