-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmachine learning model.py
61 lines (44 loc) · 2 KB
/
machine learning model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
import pandas as pd
import pickle
dataset = pd.read_csv('data.csv')
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
dataset['ca']=pd.to_numeric(dataset['ca'],errors='coerce')
dataset['thal']=pd.to_numeric(dataset['thal'],errors='coerce')
dataset['slope']=pd.to_numeric(dataset['slope'],errors='coerce')
dataset['chol']=pd.to_numeric(dataset['chol'],errors='coerce')
dataset['restecg']=pd.to_numeric(dataset['restecg'],errors='coerce')
dataset['thalach']=pd.to_numeric(dataset['thalach'],errors='coerce')
dataset['fbs']=pd.to_numeric(dataset['fbs'],errors='coerce')
dataset['exang']=pd.to_numeric(dataset['exang'],errors='coerce')
dataset['trestbps']=pd.to_numeric(dataset['trestbps'],errors='coerce')
from sklearn.base import TransformerMixin
class DataFrameImputer(TransformerMixin):
def __init__(self):
"""Impute missing values.
Columns of dtype object are imputed with the most frequent value
in column.
Columns of other types are imputed with mean of column.
"""
def fit(self, X, y=None):
self.fill = pd.Series([X[c].value_counts().index[0]
if X[c].dtype == np.dtype('O') else X[c].mean() for c in X],
index=X.columns)
return self
def transform(self, X, y=None):
return X.fillna(self.fill)
from sklearn.model_selection import train_test_split
X = pd.DataFrame(dataset.iloc[:,:-1].values)
xt = DataFrameImputer().fit_transform(X)
x_train,x_test,y_train,y_test = train_test_split(xt,y,test_size=0.2,random_state=0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test= sc.transform(x_test)
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion='entropy',random_state=0)
classifier.fit(x_train,y_train)
y_pred= classifier.predict(x_test)
filename = 'diabetes.pkl'
pickle.dump(classifier, open(filename, 'wb'))