KG and Unit tests changes (#137)

* added single tech_summary input for standardization * fixed test/unit/test_assessment.py * added first version of clustering * fixed new .sql dependencies * sync with main repo * fixed .sql * fixed bug in unit tests * fixed tests * fixed test_standardization.py Co-authored-by: mmerler <michele@micheles-mbp.watson.ibm.com> Co-authored-by: mmerler <michele@micheles-mbp.myfiosgateway.com> Co-authored-by: mmerler <michele@Micheles-MBP.fritz.box>
konveyor · Jul 25, 2022 · 0466bb3 · 0466bb3
1 parent fcc75e8
commit 0466bb3
Show file tree

Hide file tree

Showing 7 changed files with 13,046 additions and 13,040 deletions.
diff --git a/db/1.0.4.sql b/db/1.0.4.sql
diff --git a/service/clustering.py b/service/clustering.py
@@ -19,6 +19,8 @@
 from collections import OrderedDict
 import logging
 from service.utils import Utils
+import ast
+import numpy as np
 
 import configparser
 
@@ -30,7 +32,7 @@
 
 class Clustering():
     """
-    This class for containerize Clustering
+    This class for Clustering
     """
 
     def __init__(self):
@@ -39,59 +41,21 @@ def __init__(self):
             Setting up the logging level as info and opens logfile in write mode to capture the logs in text file
          """
 
-    def output_to_ui_assessment(self, appL):
-        """
-        output_to_ui assessment methods takes the final assessed data as input and formats it & keeps
-         only required fields and returns it as output assessment response
-
-        """
-        pAppL = []
-        print('ok here!')
-        try :
-            for app in appL:
-
-
-
-                # Order dictionry to fix the order of columns in the output
-                pApp = OrderedDict()
-
-
-                # Raw Fields
-                pApp['Name'] = ''
-                if 'Name' in app:
-                    pApp['application_name'] = app["Name"]
-
-                pApp['application_description'] = ''
-                if 'Desc' in app:
-                    pApp['application_description'] = app["Desc"]
-
-                pApp['component_name'] = ''
-                if 'Cmpt' in app:
-                    pApp['component_name'] = app["Cmpt"]
+        logging.basicConfig(level=logging.INFO)
 
+        # read entities
+        entities_filepath = os.path.join(config['general']['kg_dir'], config['tca']['entities'])
+        if os.path.exists(entities_filepath):
+            with open(entities_filepath, 'r') as f:
+                entities_json = json.load(f)
+                self.entity_names = np.empty(len(entities_json['data']), dtype='object')
+                for i, en in enumerate(entities_json['data']):
+                    self.entity_names[i] = entities_json['data'][en]['entity_name']
+        else:
+            self.entities = {}
+            logging.error(f'entities[{entities_filepath}] is empty or not exists')
 
-                # Curated
-                pApp['OS'] = eval(app["OS"])
-                pApp['Lang'] = eval(app["Lang"])
-                pApp["App Server"] = eval(app["App Server"])
-                pApp["App"] = eval(app["Dependent Apps"])
-                pApp["Runtime"] = eval(app["Runtime"])
-                pApp["Lib"] = eval(app["Libs"])
 
-                pApp['assessment_reason'] = app['Reason']
-
-                try :
-                    pApp["KG Version"] = app["KG Version"]
-                except :
-                    pApp["KG Version"] = 'Not Available'
-
-
-                pAppL.append(pApp)
-
-            return pAppL
-
-        except Exception as e:
-            logging.error(str(e))
 
 
     def output_to_ui_clustering(self, appL):
@@ -100,30 +64,40 @@ def output_to_ui_clustering(self, appL):
          only required fields and returns it as output assessment response
 
         """
-        pAppL = []
-        for app in appL:
 
-            print(app)
+        # initialize tech stack
+        tech_stack = np.zeros((len(appL), self.entity_names.shape[0]), dtype='bool')
+        appL_array = np.array(appL)
+
+        # find unique clusters
+        fields = ['OS', 'Lang', 'App Server', 'Dependent Apps', 'Runtime', 'Libs']
+        for i, app in enumerate(appL):
+            for k in fields:
+                txt = ast.literal_eval(app[k])
+                for t in txt.keys():
+                    entity = list(txt[t].keys())[0]
+
+                    # keep only root of hierarchical entity
+                    if entity.find('|') > 0:
+                        entity = f"{entity.split('|')[0]}|*"
+
+                    tech_stack[i][self.entity_names == entity] = 1
+
+        # find unique clusters
+        clusters, index, counts = np.unique(tech_stack, return_inverse=True, return_counts=True, axis=0)
+
+        # sort clusters by number of apps
+        order = np.argsort(counts)[::-1]
 
-            # Order dictionry to fix the order of columns in the output
-            pApp = OrderedDict()
+        clusters = clusters[order]
+        counts = counts[order]
 
-            # Raw Data
-            pApp['Name'] = ''
-            if 'application_name' in app:
-                pApp['Name'] = app["application_name"]
-            pApp['Desc'] = ''
-            if 'application_description' in app:
-                pApp['Desc'] = app["application_description"]
-            pApp['Cmpt'] = ''
-            if 'component_name' in app:
-                pApp['Cmpt'] = app["component_name"]
+        unique_clusters = []
+        for i in range(clusters.shape[0]):
+            cl = { "id": i, "name": f'unique_tech_stack_{i}',  "type": 'unique', "tech_stack": list(self.entity_names[clusters[i] == 1]),\
+                   "num_elements": counts[i], "apps": list(appL_array[index == order[i]]) }
 
-            # AI Insights
-            pApp["Ref Dockers"] = ""
-            pApp["Confidence"] = 0
+            unique_clusters.append(cl)
 
-            # pAppL['Clusters'].append(pApp)
-            pAppL.append(pApp)
 
-        return pAppL
+        return unique_clusters
diff --git a/service/functions.py b/service/functions.py
@@ -131,14 +131,10 @@ def assessment(self,auth_url,headers,auth_headers,app_data):
             if not is_valid:
                 return resp, code
 
-            print('ok here 1')
             appL = self.standardize.app_standardizer(app_data)
 
-            print('ok here 2')
             appL = self.assess.app_validation(appL)
 
-            print('ok here 3')
-
             # Generate output for UI
             output = self.assess.output_to_ui_assessment(appL)
             logging.info(f'{str(datetime.now())} output assessment num: {str(len(output))} ')
@@ -176,18 +172,15 @@ def planning(self, auth_url, headers, auth_headers, assessment_data, catalog):
     def clustering(self, auth_url, headers, auth_headers, app_data):
         """
         Invokes detect_access_token for accesstoken validation and if it's valid, it will call
-        compose_app for assessment and app_validation for validation the assessed application data
-        and finally call output_to_ui_clustering to return the formatted assessment data
+        output_to_ui_clustering to return the formatted assessment data
         """
         try:
             resp, code, is_valid = self.detect_access_token(auth_url, headers, auth_headers)
             if not is_valid:
                 return resp, code
 
-            appL = self.cluster.output_to_ui_assessment(app_data)
-
             # Generate output for UI
-            clusters = self.cluster.output_to_ui_clustering(appL)
+            clusters = self.cluster.output_to_ui_clustering(app_data)
             logging.info(f'{str(datetime.now())} output clustering num: {str(len(clusters))} ')
             return dict(status=201, message="Clustering completed successfully!", clusters=clusters), 201
         except Exception as e:

diff --git a/service/planning.py b/service/planning.py
@@ -411,10 +411,6 @@ def ui_to_input_assessment(self, assessment_data):
                 # Curated
                 pApp['OS'] = eval(app["OS"])
 
-                print('+++++++++OS++++++++++++')
-                print( pApp['OS'])
-                print('+++++++++END OS++++++++++++')
-
                 pApp['Lang'] = eval(app["Lang"])
                 pApp["App Server"] = eval(app["App Server"])
                 pApp["App"] = eval(app["Dependent Apps"])

diff --git a/service/routes.py b/service/routes.py
@@ -121,13 +121,14 @@
     "Recommend": fields.String(required=False, description='Recommended disposition')
     })
 
-# clustering_model = api.model('Clustering', {
-#     "Name": fields.String(required=True, description='Name of the application'),
-#     "Desc": fields.String(required=True, description='Description of the application'),
-#     "Cmpt": fields.String(required=True, description='Component/Deployment Unit of the application'),
-#     "Ref Dockers": fields.String(required=False, description='Description of the application'),
-#     "Confidence": fields.Float(required=False, description='Confidence of the assessment')
-#     })
+clustering_model = api.model('Clustering', {
+    "id": fields.Integer(required=True, description='Cluster ID'),
+    "name": fields.String(required=True, description='Cluster name'),
+    "type": fields.String(required=True, description='Cluster type'),
+    "tech_stack": fields.List(fields.String, required=True, description='List of tech stack elements'),
+    "num_elements": fields.Integer(required=True, description='Number of elements'),
+    "apps": fields.List(fields.Nested(assessment_model), required=True, description='An array of applications')
+    })
 
 
 output_model_assessment = api.model('Standardization Output', {
@@ -142,11 +143,11 @@
     "containerization": fields.List(fields.Nested(planning_model), required=True, description='An array of containerization planning for application workload')
     })
 
-# output_model_clustering = api.model('Clustering Output', {
-#     "status": fields.Integer(required=True, description='Status of the call'),
-#     "message": fields.String(required=True, description='Status message'),
-#     "clusters": fields.List(fields.Nested(clustering_model), required=True, description='An array of containerization clustering for application workload')
-#     })
+output_model_clustering = api.model('Clustering Output', {
+    "status": fields.Integer(required=True, description='Status of the call'),
+    "message": fields.String(required=True, description='Status message'),
+    "clusters": fields.List(fields.Nested(clustering_model), required=True, description='An array of containerization clustering for application workload')
+    })
 
 # @api.route('/match', strict_slashes=False)
 # class Standardization(Resource):
@@ -228,28 +229,28 @@ def post(self):
 
         return functions.do_planning(auth_url,dict(request.headers),auth_headers,api.payload,catalog)
 
-# @api.route('/clustering', strict_slashes=False)
-# class ContainerizationClustering(Resource):
-#     """
-#     ContainerizationClustering class creates the clustering in the form of clustering_model for the
-#     applications/components details given in the assessment_model
-#     """
-#     @api.doc('create_clustering')
-#     @api.response(201, 'Clustering Completed successfully!')
-#     @api.response(400, 'Input data format doesn\'t match the format expected by TCA')
-#     @api.response(401, 'Unauthorized, missing or invalid access token')
-#     @api.response(500, 'Internal Server Error, missing or wrong config of RBAC access token validation url')
-#     @api.expect([assessment_model])
-#     @api.marshal_with(output_model_clustering)
-#     @api.doc(security='apikey')
-#
-#
-#     def post(self):
-#         """
-#         Returns grouping of apps based on technology stack similarity
-#         """
-#         # Invoke do_clustering method in clustering class to initiate clustering process
-#         return functions.do_clustering(auth_url,dict(request.headers),auth_headers,api.payload)
+@api.route('/clustering', strict_slashes=False)
+class ContainerizationClustering(Resource):
+    """
+    ContainerizationClustering class creates the clustering in the form of clustering_model for the
+    applications/components details given in the assessment_model
+    """
+    @api.doc('create_clustering')
+    @api.response(201, 'Clustering Completed successfully!')
+    @api.response(400, 'Input data format doesn\'t match the format expected by TCA')
+    @api.response(401, 'Unauthorized, missing or invalid access token')
+    @api.response(500, 'Internal Server Error, missing or wrong config of RBAC access token validation url')
+    @api.expect([assessment_model])
+    @api.marshal_with(output_model_clustering)
+    @api.doc(security='apikey')
+
+
+    def post(self):
+        """
+        Returns grouping of apps based on technology stack similarity
+        """
+        # Invoke do_clustering method in clustering class to initiate clustering process
+        return functions.do_clustering(auth_url,dict(request.headers),auth_headers,api.payload)
 
 @api.route('/health_check')
 @api.response(200, 'HTTP OK')

diff --git a/test/unit/test_clustering.py b/test/unit/test_clustering.py
@@ -26,7 +26,7 @@ def test_ui_to_input_assessment(self):
                  'Desc': '',
                  'Cmpt': 'Component 1',
                  'OS': "{'ZOS': {'MVS|z/OS': ('NA_VERSION', 'NA_VERSION')}}",
-                 'Lang': "{'JavaScript': {'JavaScript': ('NA_VERSION', 'ES6')}, 'PL/1': {'PL/I': ('1', '1')}}",
+                 'Lang': "{'JavaScript': {'JavaScript|*': ('NA_VERSION', 'ES6')}, 'PL/1': {'PL/I': ('1', '1')}}",
                  'App Server': '{}',
                  'Dependent Apps': '{}',
                  'Runtime': '{}',
@@ -37,7 +37,7 @@ def test_ui_to_input_assessment(self):
                  'Desc': '',
                  'Cmpt': 'Component 1',
                  'OS': "{'Windows 2016 Standard': {'Windows|Windows Server': ('2016 standard', '2016 standard')}}",
-                 'Lang': "{'JavaScript': {'JavaScript': ('NA_VERSION', 'ES6')}}",
+                 'Lang': "{'JavaScript': {'JavaScript|*': ('NA_VERSION', 'ES6')}}",
                  'App Server': '{}',
                  'Dependent Apps': '{}',
                  'Runtime': '{}',
@@ -70,7 +70,7 @@ def test_ui_to_input_assessment(self):
                  'Desc': '',
                  'Cmpt': 'Component 1',
                  'OS': "{'zOS': {'MVS|z/OS': ('NA_VERSION', 'NA_VERSION')}}",
-                 'Lang': "{'JavaScript': {'JavaScript': ('NA_VERSION', 'ES6')}, 'PL1': {'PL/I': ('1', '1')}}",
+                 'Lang': "{'JavaScript': {'JavaScript|*': ('NA_VERSION', 'ES6')}, 'PL1': {'PL/I': ('1', '1')}}",
                  'App Server': '{}',
                  'Dependent Apps': '{}',
                  'Runtime': '{}',