Skip to content

Commit

Permalink
Major Enhancement(Support Tensorflow Model)
Browse files Browse the repository at this point in the history
  • Loading branch information
Thilakraj1998 committed Nov 17, 2021
1 parent f22d991 commit 62c4389
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 41 deletions.
36 changes: 26 additions & 10 deletions blobcity/code_gen/Generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,16 @@ def modeler(yml_data,key,with_doc,codes="",nb=None):
The function adds code syntax related to the Machine learning model initialization and training.
"""
param=SourceCode.parameters.replace("PARAM", str(yml_data['model']['parameters']))
model=SourceCode.models_init.replace("MODELNAME", str(yml_data['model']['type']))
if yml_data['model']['type'] not in ['TF','tf','Tensorflow']:
param=SourceCode.parameters.replace("PARAM", str(yml_data['model']['parameters']))
model=SourceCode.models_init.replace("MODELNAME", str(yml_data['model']['type']))
else:param,model="\n",SourceCode.tf_load

imports,metaDesc=SourceCode.models[key][yml_data['model']['type']],PyComments.models[key][yml_data['model']['type']]

if nb!=None:
nb['cells'][1]['source']=nb['cells'][1]['source']+imports
if with_doc:
nb['cells'].append(nbf.v4.new_markdown_cell(IpynbComments.models[key][yml_data['model']['type']]))

if with_doc:nb['cells'].append(nbf.v4.new_markdown_cell(IpynbComments.models[key][yml_data['model']['type']]))
nb['cells'].append(nbf.v4.new_code_cell(param+model))
return nb
elif codes!="":
Expand All @@ -233,7 +234,7 @@ def modeler(yml_data,key,with_doc,codes="",nb=None):
codes=codes+"# "+metaDesc
return codes+param+model

def model_metrics(key,codes="",nb=None,with_doc=False):
def model_metrics(yml_data,key,codes="",nb=None,with_doc=False):
"""
param1: dictionary : AutoAI steps data
param2: string : Code syntaxs
Expand All @@ -246,11 +247,26 @@ def model_metrics(key,codes="",nb=None,with_doc=False):
if with_doc:
if nb!=None: nb['cells'].append(nbf.v4.new_markdown_cell(IpynbComments.procedure['metrics']))
else: codes=codes+PyComments.procedure['metrics']

if nb!=None and codes=="":
nb['cells'].append(nbf.v4.new_code_cell(SourceCode.metric[key]))
if yml_data['model']['type'] not in ['TF','tf','Tensorflow']:
nb['cells'].append(nbf.v4.new_code_cell(SourceCode.metric[key]))
else:
if key == 'Classification':
tf_metric_type=SourceCode.tf_metric[key]['binary'] if yml_data['model']['classification_type']=='binary' else SourceCode.tf_metric[key]['multi']
nb['cells'].append(nbf.v4.new_code_cell(tf_metric_type))
else:nb['cells'].append(nbf.v4.new_code_cell(SourceCode.tf_metric[key]))
return nb
else:
return codes+SourceCode.metric[key]
if yml_data['model']['type'] not in ['TF','tf','Tensorflow']:
return codes+SourceCode.metric[key]
else:
if key == 'Classification':
tf_metric_type=SourceCode.tf_metric[key]['binary'] if yml_data['model']['classification_type']=='binary' else SourceCode.tf_metric[key]['multi']
return codes+tf_metric_type
else:
return codes+SourceCode.tf_metric[key]


def pycoder(yml_data,CGpath,doc=False):
"""
Expand All @@ -270,7 +286,7 @@ def pycoder(yml_data,CGpath,doc=False):
codes=add_corr_matrix(codes=codes,with_doc=doc)
codes=splits(codes=codes,with_doc=doc)
codes=modeler(yml_data,key,doc,codes=codes)
codes=model_metrics(key,codes=codes,with_doc=doc)
codes=model_metrics(yml_data,key,codes=codes,with_doc=doc)
write_pycode(CGpath,codes)

def ipynbcoder(yml_data,CGpath,doc=True):
Expand All @@ -292,7 +308,7 @@ def ipynbcoder(yml_data,CGpath,doc=True):
nb=add_corr_matrix(nb=nb,with_doc=doc)
nb=splits(nb=nb,with_doc=doc)
nb=modeler(yml_data,key,doc,nb=nb)
nb=model_metrics(key,nb=nb,with_doc=doc)
nb=model_metrics(yml_data,key,nb=nb,with_doc=doc)
write_ipynbcode(CGpath,nb)

def code_generator(data,filepath,doc=None):
Expand Down
3 changes: 2 additions & 1 deletion blobcity/code_gen/IpynbMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
class IpynbComments:
models={
'Classification':{
'TF':"""### Neural Network/Deep Learning Model \nDeep learning is a subset of machine learning, which is essentially a neural network with three or more layers. These neural networks attempt to simulate the behavior of the human brain-albeit far from matching its ability-allowing it to 'learn' from large amounts of data. While a neural network with a single layer can still make approximate predictions, additional hidden layers can help to optimize and refine for accuracy.""",
'LinearDiscriminantAnalysis':"""### Model\n A classifier with a linear decision boundary, generated by fitting class conditional densities to the data and using Bayes’ rule.\n The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.\n The fitted model can also be used to reduce the dimensionality of the input by projecting it to the most discriminative directions, using the transform method.
1. solver: Solver to use, possible values: {'svd', 'lsqr', 'eigen'}
Expand Down Expand Up @@ -337,6 +338,7 @@ class IpynbComments:
7. **n_iter_no_change** -> Number of iterations with no improvement to wait before early stopping."""
},
'Regression':{
'TF':"""### Neural Network/Deep Learning Model \nDeep learning is a subset of machine learning, which is essentially a neural network with three or more layers. These neural networks attempt to simulate the behavior of the human brain-albeit far from matching its ability-allowing it to 'learn' from large amounts of data. While a neural network with a single layer can still make approximate predictions, additional hidden layers can help to optimize and refine for accuracy.""",
'OrthogonalMatchingPursuit':"""### Model \nOrthogonalMatchingPursuit and orthogonal_mp implements the OMP algorithm for approximating the fit of a linear model with constraints imposed on the number of non-zero coefficients \n
OMP is based on a greedy algorithm that includes at each step the atom most highly correlated with the current residual. It is similar to the simpler matching pursuit (MP) method, but better in that at each iteration, the residual is recomputed using an orthogonal projection on the space of the previously chosen dictionary elements.
Expand Down Expand Up @@ -765,7 +767,6 @@ class IpynbComments:
2. **tol** -> Stopping criterion.
3. **max_iter** -> The maximal number of iterations for the solver.""",
'TF':'Neural Network Model Description'
}
}

Expand Down
2 changes: 2 additions & 0 deletions blobcity/code_gen/PyMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class PyComments:
'LGBMClassifier':'LightGBM is a gradient boosting framework that uses tree based learning algorithms.\n# It is designed to be distributed and efficiency\r\n',
'PassiveAggressiveClassifier':'The passive-aggressive algorithms are a family of algorithms for large-scale learning.\n# They are similar to the Perceptron in that they do not require a learning rate. However, contrary to the Perceptron,\n# they include a regularization parameter C.\r\n',
'LinearDiscriminantAnalysis':'A classifier with a linear decision boundary, generated by fitting class conditional densities to the data and using Bayes’ rule.\n #The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.\n# The fitted model can also be used to reduce the dimensionality of the input by projecting it to the most discriminative directions, using the transform method.\r\n',
'TF':"Deep learning is a subset of machine learning, which is essentially a neural network with three or more layers.\n# These neural networks attempt to simulate the behavior of the human brain—albeit far from matching its ability—allowing it to “learn” from large amounts of data.\n# While a neural network with a single layer can still make approximate predictions,\n# additional hidden layers can help to optimize and refine for accuracy."
},
'Regression':{
'OrthogonalMatchingPursuit':"OrthogonalMatchingPursuit and orthogonal_mp implements the OMP algorithm for approximating\n# the fit of a linear model with constraints imposed on the number of non-zero coefficients\n# OMP is based on a greedy algorithm that includes at each step the atom most highly\n# correlated with the current residual. It is similar to the simpler matching pursuit (MP) method, but better in that at each iteration, the residual is recomputed using an orthogonal projection on the space of the previously chosen dictionary elements.",
Expand Down Expand Up @@ -72,6 +73,7 @@ class PyComments:
'HuberRegressor':"Linear regression model that is robust to outliers.\n# The Huber Regressor optimizes the squared loss for the samples\n# where |(y - X'w) / sigma| < epsilon and the absolute loss for the samples where |(y - X'w) / sigma| > epsilon, where w and sigma are parameters to be optimized.\n# The parameter sigma makes sure that if y is scaled up or down by a certain factor, one does not need to rescale epsilon to achieve the same robustness.\n# Note that this does not take into account the fact that the different features of X may be of different scales.\n# This makes sure that the loss function is not heavily influenced by the outliers while not completely ignoring their effect.\r\n",
'ElasticNet':'Elastic Net first emerged as a result of critique on Lasso, whose variable selection can be too dependent on data and thus unstable.\n# The solution is to combine the penalties of Ridge regression and Lasso to get the best of both worlds.\r\n',
'PoissonRegressor':"Poisson regression is a generalized linear model form of regression used to model count data and contingency tables.\n# It assumes the response variable or target variable Y has a Poisson distribution, and assumes the logarithm of its expected value can be modeled by a linear combination of unknown parameters.\n# It is sometimes known as a log-linear model, especially when used to model contingency tables.\r\n",
'TF':"Deep learning is a subset of machine learning, which is essentially a neural network with three or more layers.\n# These neural networks attempt to simulate the behavior of the human brain—albeit far from matching its ability—allowing it to “learn” from large amounts of data.\n# While a neural network with a single layer can still make approximate predictions,\n# additional hidden layers can help to optimize and refine for accuracy."
}
}

Expand Down
13 changes: 7 additions & 6 deletions blobcity/code_gen/SourceCodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ class SourceCode:
}

cor_matrix="f,ax = plt.subplots(figsize=(18, 18))\rmatrix = np.triu(X.corr())\rse.heatmap(X.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax, mask=matrix)\rplt.show()\n"
tf_model_load="model = tf.keras.models.load_model(./autoaimodel.h5)"
tf_model_metric={
tf_load="model = bc.load('PICKLE FILE PATH')\r#summary\rnn=model.model\rnn.summary()\nnn.fit(X_train,Y_train,epochs=10)"
tf_metric={
'Classification':{
'binary':"y_pred=model.predict(Y_test)\ry_pred=np.round(y_pred)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n",
'multi':"y_pred=model.predict(Y_test)\ry_pred=np.argmax(y_pred,axis=1)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n"
'binary':"\ry_pred=nn.predict(X_test)\ry_pred=np.round(y_pred)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n",
'multi':"\rnn=model.model\ry_pred=nn.predict(test_df)\ry_pred=np.argmax(y_pred,axis=1)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n"
},
'Regression':"# Metrics\r\ny_pred=model.predict(X_test)\rprint('R2 Score: {:.2f}'.format(r2_score(Y_test,y_pred)))\r"+\
'Regression':"# Metrics\r\ntest_df = pd.DataFrame(X_test,columns = X.columns.to_list())\nnn=model.model\ry_pred=nn.predict(test_df)\rprint('R2 Score: {:.2f}'.format(r2_score(Y_test,y_pred)))\r"+\
"print('Mean Absolute Error {:.2f}'.format(mean_absolute_error(Y_test,y_pred)))\r"+\
"print('Mean Squared Error {:.2f}'.format(mean_squared_error(Y_test,y_pred)))"
}
Expand Down Expand Up @@ -121,6 +121,7 @@ class SourceCode:
'LinearDiscriminantAnalysis':'from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\r\n',
'PassiveAggressiveClassifier':'from sklearn.linear_model import PassiveAggressiveClassifier\r\n',
'LGBMClassifier':'from lightgbm import LGBMClassifier\r\n',
'TF':'import blobcity as bc\r\n'
},
'Regression':{
'OrthogonalMatchingPursuit':'from sklearn.linear_model import OrthogonalMatchingPursuit\r\n',
Expand Down Expand Up @@ -151,6 +152,6 @@ class SourceCode:
'HuberRegressor':'from sklearn.linear_model import HuberRegressor\r\n',
'ElasticNet':'from sklearn.linear_model import ElasticNet\r\n',
'PoissonRegressor':'from sklearn.linear_model import PoissonRegressor\r\n',
'TF':'import tensorflow as tf\r\n'
'TF':'import blobcity as bc\r\n'
}
}
4 changes: 3 additions & 1 deletion blobcity/main/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def train(file=None, df=None, target=None,features=None,use_neural=False,accurac
param3: string: target/dependent column name.
param4: float: range[0.1,1.0]
param4: boolean: whether to train tensorflow models
param5: float: range[0.1,1.0]
return: Model Class Object
Performs a model search on the data proivded. A yaml file is generated once the best fit model configuration
Expand Down
Loading

0 comments on commit 62c4389

Please sign in to comment.