Major Enhancement(Support Tensorflow Model)

blobcity · Nov 17, 2021 · 62c4389 · 62c4389
1 parent f22d991
commit 62c4389
Show file tree

Hide file tree

Showing 7 changed files with 153 additions and 41 deletions.
diff --git a/blobcity/code_gen/Generator.py b/blobcity/code_gen/Generator.py
@@ -215,15 +215,16 @@ def modeler(yml_data,key,with_doc,codes="",nb=None):
 
     The function adds code syntax related to the Machine learning model initialization and training.
     """
-    param=SourceCode.parameters.replace("PARAM", str(yml_data['model']['parameters']))
-    model=SourceCode.models_init.replace("MODELNAME", str(yml_data['model']['type']))
+    if yml_data['model']['type'] not in ['TF','tf','Tensorflow']:
+        param=SourceCode.parameters.replace("PARAM", str(yml_data['model']['parameters']))
+        model=SourceCode.models_init.replace("MODELNAME", str(yml_data['model']['type']))
+    else:param,model="\n",SourceCode.tf_load
+
     imports,metaDesc=SourceCode.models[key][yml_data['model']['type']],PyComments.models[key][yml_data['model']['type']]
 
     if nb!=None:
         nb['cells'][1]['source']=nb['cells'][1]['source']+imports
-        if with_doc:
-            nb['cells'].append(nbf.v4.new_markdown_cell(IpynbComments.models[key][yml_data['model']['type']]))
-
+        if with_doc:nb['cells'].append(nbf.v4.new_markdown_cell(IpynbComments.models[key][yml_data['model']['type']]))
         nb['cells'].append(nbf.v4.new_code_cell(param+model))
         return nb
     elif codes!="":
@@ -233,7 +234,7 @@ def modeler(yml_data,key,with_doc,codes="",nb=None):
             codes=codes+"# "+metaDesc
         return codes+param+model
 
-def model_metrics(key,codes="",nb=None,with_doc=False):
+def model_metrics(yml_data,key,codes="",nb=None,with_doc=False):
     """
     param1: dictionary : AutoAI steps data
     param2: string : Code syntaxs
@@ -246,11 +247,26 @@ def model_metrics(key,codes="",nb=None,with_doc=False):
     if with_doc: 
         if nb!=None: nb['cells'].append(nbf.v4.new_markdown_cell(IpynbComments.procedure['metrics']))
         else: codes=codes+PyComments.procedure['metrics']
+
     if nb!=None and codes=="":
-        nb['cells'].append(nbf.v4.new_code_cell(SourceCode.metric[key]))
+        if yml_data['model']['type'] not in ['TF','tf','Tensorflow']:
+            nb['cells'].append(nbf.v4.new_code_cell(SourceCode.metric[key]))
+        else:
+            if key == 'Classification':
+                tf_metric_type=SourceCode.tf_metric[key]['binary'] if yml_data['model']['classification_type']=='binary' else SourceCode.tf_metric[key]['multi']
+                nb['cells'].append(nbf.v4.new_code_cell(tf_metric_type))
+            else:nb['cells'].append(nbf.v4.new_code_cell(SourceCode.tf_metric[key]))
         return nb
     else:
-        return codes+SourceCode.metric[key]
+        if yml_data['model']['type'] not in ['TF','tf','Tensorflow']:
+           return codes+SourceCode.metric[key]
+        else:
+            if key == 'Classification':
+                tf_metric_type=SourceCode.tf_metric[key]['binary'] if yml_data['model']['classification_type']=='binary' else SourceCode.tf_metric[key]['multi']
+                return codes+tf_metric_type
+            else:
+                return codes+SourceCode.tf_metric[key]
+
 
 def pycoder(yml_data,CGpath,doc=False):
     """
@@ -270,7 +286,7 @@ def pycoder(yml_data,CGpath,doc=False):
     codes=add_corr_matrix(codes=codes,with_doc=doc)
     codes=splits(codes=codes,with_doc=doc)
     codes=modeler(yml_data,key,doc,codes=codes)
-    codes=model_metrics(key,codes=codes,with_doc=doc)
+    codes=model_metrics(yml_data,key,codes=codes,with_doc=doc)
     write_pycode(CGpath,codes)
 
 def ipynbcoder(yml_data,CGpath,doc=True):
@@ -292,7 +308,7 @@ def ipynbcoder(yml_data,CGpath,doc=True):
     nb=add_corr_matrix(nb=nb,with_doc=doc)
     nb=splits(nb=nb,with_doc=doc)
     nb=modeler(yml_data,key,doc,nb=nb) 
-    nb=model_metrics(key,nb=nb,with_doc=doc)
+    nb=model_metrics(yml_data,key,nb=nb,with_doc=doc)
     write_ipynbcode(CGpath,nb)
 
 def code_generator(data,filepath,doc=None):

diff --git a/blobcity/code_gen/IpynbMeta.py b/blobcity/code_gen/IpynbMeta.py
@@ -19,6 +19,7 @@
 class IpynbComments:
     models={
         'Classification':{
+            'TF':"""### Neural Network/Deep Learning Model \nDeep learning is a subset of machine learning, which is essentially a neural network with three or more layers. These neural networks attempt to simulate the behavior of the human brain-albeit far from matching its ability-allowing it to 'learn' from large amounts of data. While a neural network with a single layer can still make approximate predictions, additional hidden layers can help to optimize and refine for accuracy.""",
             'LinearDiscriminantAnalysis':"""### Model\n A classifier with a linear decision boundary, generated by fitting class conditional densities to the data and using Bayes’ rule.\n The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.\n The fitted model can also be used to reduce the dimensionality of the input by projecting it to the most discriminative directions, using the transform method.
             
             1. solver: Solver to use, possible values: {'svd', 'lsqr', 'eigen'}
@@ -337,6 +338,7 @@ class IpynbComments:
 7. **n_iter_no_change** -> Number of iterations with no improvement to wait before early stopping."""
         },
         'Regression':{
+            'TF':"""### Neural Network/Deep Learning Model \nDeep learning is a subset of machine learning, which is essentially a neural network with three or more layers. These neural networks attempt to simulate the behavior of the human brain-albeit far from matching its ability-allowing it to 'learn' from large amounts of data. While a neural network with a single layer can still make approximate predictions, additional hidden layers can help to optimize and refine for accuracy.""",
             'OrthogonalMatchingPursuit':"""### Model \nOrthogonalMatchingPursuit and orthogonal_mp implements the OMP algorithm for approximating the fit of a linear model with constraints imposed on the number of non-zero coefficients \n
             OMP is based on a greedy algorithm that includes at each step the atom most highly correlated with the current residual. It is similar to the simpler matching pursuit (MP) method, but better in that at each iteration, the residual is recomputed using an orthogonal projection on the space of the previously chosen dictionary elements.
             
@@ -765,7 +767,6 @@ class IpynbComments:
 2. **tol**  -> Stopping criterion.
 
 3. **max_iter** -> The maximal number of iterations for the solver.""",
-'TF':'Neural Network Model Description'
         }
     }
 

diff --git a/blobcity/code_gen/PyMeta.py b/blobcity/code_gen/PyMeta.py
@@ -42,6 +42,7 @@ class PyComments:
             'LGBMClassifier':'LightGBM is a gradient boosting framework that uses tree based learning algorithms.\n# It is designed to be distributed and efficiency\r\n',
             'PassiveAggressiveClassifier':'The passive-aggressive algorithms are a family of algorithms for large-scale learning.\n# They are similar to the Perceptron in that they do not require a learning rate. However, contrary to the Perceptron,\n# they include a regularization parameter C.\r\n',
             'LinearDiscriminantAnalysis':'A classifier with a linear decision boundary, generated by fitting class conditional densities to the data and using Bayes’ rule.\n #The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.\n# The fitted model can also be used to reduce the dimensionality of the input by projecting it to the most discriminative directions, using the transform method.\r\n',
+            'TF':"Deep learning is a subset of machine learning, which is essentially a neural network with three or more layers.\n# These neural networks attempt to simulate the behavior of the human brain—albeit far from matching its ability—allowing it to “learn” from large amounts of data.\n# While a neural network with a single layer can still make approximate predictions,\n# additional hidden layers can help to optimize and refine for accuracy."
         },
         'Regression':{
             'OrthogonalMatchingPursuit':"OrthogonalMatchingPursuit and orthogonal_mp implements the OMP algorithm for approximating\n# the fit of a linear model with constraints imposed on the number of non-zero coefficients\n# OMP is based on a greedy algorithm that includes at each step the atom most highly\n# correlated with the current residual. It is similar to the simpler matching pursuit (MP) method, but better in that at each iteration, the residual is recomputed using an orthogonal projection on the space of the previously chosen dictionary elements.",
@@ -72,6 +73,7 @@ class PyComments:
             'HuberRegressor':"Linear regression model that is robust to outliers.\n#  The Huber Regressor optimizes the squared loss for the samples\n#  where |(y - X'w) / sigma| < epsilon and the absolute loss for the samples where |(y - X'w) / sigma| > epsilon, where w and sigma are parameters to be optimized.\n# The parameter sigma makes sure that if y is scaled up or down by a certain factor, one does not need to rescale epsilon to achieve the same robustness.\n# Note that this does not take into account the fact that the different features of X may be of different scales.\n# This makes sure that the loss function is not heavily influenced by the outliers while not completely ignoring their effect.\r\n",
             'ElasticNet':'Elastic Net first emerged as a result of critique on Lasso, whose variable selection can be too dependent on data and thus unstable.\n# The solution is to combine the penalties of Ridge regression and Lasso to get the best of both worlds.\r\n',
             'PoissonRegressor':"Poisson regression is a generalized linear model form of regression used to model count data and contingency tables.\n# It assumes the response variable or target variable Y has a Poisson distribution, and assumes the logarithm of its expected value can be modeled by a linear combination of unknown parameters.\n# It is sometimes known as a log-linear model, especially when used to model contingency tables.\r\n",
+            'TF':"Deep learning is a subset of machine learning, which is essentially a neural network with three or more layers.\n# These neural networks attempt to simulate the behavior of the human brain—albeit far from matching its ability—allowing it to “learn” from large amounts of data.\n# While a neural network with a single layer can still make approximate predictions,\n# additional hidden layers can help to optimize and refine for accuracy."
         }
     }
 

diff --git a/blobcity/code_gen/SourceCodes.py b/blobcity/code_gen/SourceCodes.py
@@ -87,13 +87,13 @@ class SourceCode:
     }
 
     cor_matrix="f,ax = plt.subplots(figsize=(18, 18))\rmatrix = np.triu(X.corr())\rse.heatmap(X.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax, mask=matrix)\rplt.show()\n"
-    tf_model_load="model = tf.keras.models.load_model(./autoaimodel.h5)"
-    tf_model_metric={
+    tf_load="model = bc.load('PICKLE FILE PATH')\r#summary\rnn=model.model\rnn.summary()\nnn.fit(X_train,Y_train,epochs=10)"
+    tf_metric={
         'Classification':{
-            'binary':"y_pred=model.predict(Y_test)\ry_pred=np.round(y_pred)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n",
-            'multi':"y_pred=model.predict(Y_test)\ry_pred=np.argmax(y_pred,axis=1)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n"
+            'binary':"\ry_pred=nn.predict(X_test)\ry_pred=np.round(y_pred)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n",
+            'multi':"\rnn=model.model\ry_pred=nn.predict(test_df)\ry_pred=np.argmax(y_pred,axis=1)# Classification Report\rprint(classification_report(Y_test,y_pred))\r\n"
         },
-        'Regression':"# Metrics\r\ny_pred=model.predict(X_test)\rprint('R2 Score: {:.2f}'.format(r2_score(Y_test,y_pred)))\r"+\
+        'Regression':"# Metrics\r\ntest_df = pd.DataFrame(X_test,columns = X.columns.to_list())\nnn=model.model\ry_pred=nn.predict(test_df)\rprint('R2 Score: {:.2f}'.format(r2_score(Y_test,y_pred)))\r"+\
             "print('Mean Absolute Error {:.2f}'.format(mean_absolute_error(Y_test,y_pred)))\r"+\
             "print('Mean Squared Error {:.2f}'.format(mean_squared_error(Y_test,y_pred)))"
     }
@@ -121,6 +121,7 @@ class SourceCode:
             'LinearDiscriminantAnalysis':'from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\r\n',
             'PassiveAggressiveClassifier':'from sklearn.linear_model import PassiveAggressiveClassifier\r\n',
             'LGBMClassifier':'from lightgbm import LGBMClassifier\r\n',
+            'TF':'import blobcity as bc\r\n'
         },
         'Regression':{
             'OrthogonalMatchingPursuit':'from sklearn.linear_model import OrthogonalMatchingPursuit\r\n',
@@ -151,6 +152,6 @@ class SourceCode:
             'HuberRegressor':'from sklearn.linear_model import HuberRegressor\r\n',
             'ElasticNet':'from sklearn.linear_model import ElasticNet\r\n',
             'PoissonRegressor':'from sklearn.linear_model import PoissonRegressor\r\n',
-            'TF':'import tensorflow as tf\r\n'
+            'TF':'import blobcity as bc\r\n'
         }
     }
diff --git a/blobcity/main/driver.py b/blobcity/main/driver.py
@@ -33,7 +33,9 @@ def train(file=None, df=None, target=None,features=None,use_neural=False,accurac
 
     param3: string: target/dependent column name.
 
-    param4: float: range[0.1,1.0] 
+    param4: boolean: whether to train tensorflow models
+
+    param5: float: range[0.1,1.0] 
 
     return: Model Class Object
     Performs a model search on the data proivded. A yaml file is generated once the best fit model configuration