changed standardize output to be json compatible (#180)

Signed-off-by: mmerler <michele.merler@gmail.com> Signed-off-by: mmerler <michele.merler@gmail.com>
konveyor · Dec 8, 2022 · 1047bd3 · 1047bd3
1 parent 9b31681
commit 1047bd3
Show file tree

Hide file tree

Showing 10 changed files with 563 additions and 310 deletions.
diff --git a/service/clustering.py b/service/clustering.py
@@ -73,9 +73,11 @@ def output_to_ui_clustering(self, appL):
         fields = ['OS', 'Lang', 'App Server', 'Dependent Apps', 'Runtime', 'Libs']
         for i, app in enumerate(appL):
             for k in fields:
-                txt = ast.literal_eval(app[k])
+                # txt = ast.literal_eval(app[k])
+                txt = app[k]
                 for t in txt.keys():
-                    entity = list(txt[t].keys())[0]
+                    # entity = list(txt[t].keys())[0]
+                    entity = txt[t]['standard_name']
 
                     # keep only root of hierarchical entity
                     if entity.find('|') > 0:

diff --git a/service/infer_tech.py b/service/infer_tech.py
@@ -148,56 +148,66 @@ def infer_missing_tech(self, appL):
             ## Infer Language from Lib if language is missing
             ## If a lib supports multiple langages, we do not infer the language
             if app['Lib']:
+
                 for snippet, obj in app['Lib'].items():
                     if not obj:
                         continue
-                    for tech, version in obj.items():
-                        if '|' in tech:
-                            inferred_tech = tech.split('|')[0]
-                            app_lang = Utils.getEntityString(app['Lang']).split(', ')
-                            if inferred_tech not in app_lang:
-                                if snippet not in app['Lang']:
-                                    app['Lang'][snippet] = {}
-                                app['Lang'][snippet][inferred_tech] = version
-                                app['Inferred']['Lang'].append(inferred_tech)
+
+                    tech = obj['standard_name']
+
+                    # for tech, version in obj.items():
+                    if '|' in tech:
+                        inferred_tech = tech.split('|')[0] + '|*'
+
+                        app_lang = Utils.getStandardEntityString(app['Lang']).split(', ')
+
+                        if inferred_tech not in app_lang:
+                            if snippet not in app['Lang']:
+                                app['Lang'][snippet] = {}
+                            app['Lang'][snippet] = {'standard_name': inferred_tech, 'detected_version': 'NA_VERSION', 'latest_known_version': 'NA_VERSION'}
+                            app['Inferred']['Lang'].append(inferred_tech)
 
             # Infer App from Plugin if App is missing
             ## If a plugin supports multiple Apps, we do not infer the App
             if 'Plugin' in app and app['Plugin']:
                 for snippet, obj in app['Plugin'].items():
                     if not obj:
                         continue
-                    for tech, version in obj.items():
-                        if '|' in tech:
-                            inferred_tech = tech.split('|')[0]
-                            app_App = Utils.getEntityString(app['App']).split(', ')
-                            if inferred_tech not in app_App:
-                                if snippet not in app['App']:
-                                    app['App'][snippet] = {}
-                                app['App'][snippet][inferred_tech] = version
-                                app['Inferred']['App'].append(inferred_tech)
+
+                    tech = obj['standard_name']
+                    # for tech, version in obj.items():
+                    if '|' in tech:
+                        inferred_tech = tech.split('|')[0] + '|*'
+                        app_App = Utils.getStandardEntityString(app['App']).split(', ')
+                        if inferred_tech not in app_App:
+                            if snippet not in app['App']:
+                                app['App'][snippet] = {}
+                            app['App'][snippet] = {'standard_name': inferred_tech, 'detected_version': 'NA_VERSION', 'latest_known_version': 'NA_VERSION'}
+                            app['Inferred']['App'].append(inferred_tech)
 
             # Infer App Server and Runtime from Runlib
             ## If a runlib supports multiple App Servers or Runtimes, we do not infer them
             if app['Runlib']:
                 for snippet, obj in app['Runlib'].items():
                     if not obj:
                         continue
-                    for tech, version in obj.items():
-                        if '|' in tech:
-                            inferred_tech = tech.split('|')[0]
-                            app_AppServer = Utils.getEntityString(app['App Server']).split(', ')
-                            app_Runtime = Utils.getEntityString(app['Runtime']).split(', ')
-                            if self.__class_type_mapper['mappings'].get(inferred_tech, 'NA') == 'App Server' and inferred_tech not in app_AppServer:
-                                if snippet not in app['App Server']:
-                                    app['App Server'][snippet] = {}
-                                app['App Server'][snippet][inferred_tech] = version
-                                app['Inferred']['App Server'].append(inferred_tech)
-                            elif self.__class_type_mapper['mappings'].get(inferred_tech, 'NA') == 'Runtime' and inferred_tech not in app_Runtime:
-                                if snippet not in app['Runtime']:
-                                    app['Runtime'][snippet] = {}
-                                app['Runtime'][snippet][inferred_tech] = version
-                                app['Inferred']['Runtime'].append(inferred_tech)
+
+                    tech = obj['standard_name']
+                    # for tech, version in obj.items():
+                    if '|' in tech:
+                        inferred_tech = tech.split('|')[0] + '|*'
+                        app_AppServer = Utils.getStandardEntityString(app['App Server']).split(', ')
+                        app_Runtime = Utils.getStandardEntityString(app['Runtime']).split(', ')
+                        if self.__class_type_mapper['mappings'].get(inferred_tech, 'NA') == 'App Server' and inferred_tech not in app_AppServer:
+                            if snippet not in app['App Server']:
+                                app['App Server'][snippet] = {}
+                            app['App Server'][snippet] = {'standard_name': inferred_tech, 'detected_version': 'NA_VERSION', 'latest_known_version': 'NA_VERSION'}
+                            app['Inferred']['App Server'].append(inferred_tech)
+                        elif self.__class_type_mapper['mappings'].get(inferred_tech, 'NA') == 'Runtime' and inferred_tech not in app_Runtime:
+                            if snippet not in app['Runtime']:
+                                app['Runtime'][snippet] = {}
+                            app['Runtime'][snippet] = {'standard_name': inferred_tech, 'detected_version': 'NA_VERSION', 'latest_known_version': 'NA_VERSION'}
+                            app['Inferred']['Runtime'].append(inferred_tech)
 
             app['Linux'] = {'Lang':[],'App':[], 'App Server':[], 'Runtime':[]}
             app['Windows'] = {'Lang':[],'App':[], 'App Server':[], 'Runtime':[]}
@@ -211,16 +221,19 @@ def infer_missing_tech(self, appL):
             containerize_not_supported = []
             if (app['Lang'] or app['App'] or app['App Server'] or app['Runtime']):
                 is_need_check_compatible = True
-                app_OS = Utils.getEntityString(app['OS']).split(', ')
+
+                #mic
+                app_OS = Utils.getStandardEntityString(app['OS']).split(', ')
+
                 if len(app_OS) == 0:
                     is_need_check_compatible = False
 
                 child_types = ["App Server", "App", "Runtime","Lang"]
                 is_init_recommended_OS = False
                 for child_type in child_types:
                     if app[child_type]:
-                        for child in Utils.getEntityString(app[child_type]).split(', '):
-                            
+                        for child in Utils.getStandardEntityString(app[child_type]).split(', '):
+
                             if child != 'score':
                                 candidate_OS = self.__get_candidate_OS(child)
                                 if not candidate_OS or len(candidate_OS) == 0:

diff --git a/service/planning.py b/service/planning.py
@@ -209,7 +209,7 @@ def __compute_confidence(self, app, catalog = 'dockerhub'):
                                     break
                             scope_image = best_image
                             images_score += scores_dict[child_type]
-                            # print(str(scope_image))
+
                             app['scope_images'][scope_image] = {'Docker_URL': containerimageKG['Container Images'][scope_image][imageurl], 'Status': containerimageKG['Container Images'][scope_image].get('CertOfImageAndPublisher')}
                             app['scope_images_confidence']['mapping'][child] = scope_image
                             if child_type in app_appserver_child_types:
@@ -283,7 +283,6 @@ def __compute_confidence(self, app, catalog = 'dockerhub'):
         if not app['scope_images'] and scope_images:
             # find best for OS
             scope_image = scope_images[0]
-            # print(scope_image)
             app['scope_images'][scope_image] = {'Docker_URL': containerimageKG['Container Images'][scope_image][imageurl], 'Status': containerimageKG['Container Images'][scope_image].get('CertOfImageAndPublisher')}
             # app['scope_images_confidence']['mapping'][child] = scope_image
 
@@ -340,6 +339,7 @@ def __search_docker(self, app, catalog = 'dockerhub'):
         if (app['OS'].split('|')[0] != app['OS']) and app['OS'].split('|')[0] in inverted_containerimageKG:
             parent_os_check_images = inverted_containerimageKG[app['OS'].split('|')[0]]
 
+
         # parent_os_scope_images = []
         child_types = ["App Server", "App", "Runtime","Lang"]
         for child_type in child_types:
@@ -369,7 +369,7 @@ def __find_best_os(self, app, os):
         """
         linux_list = ['Linux|Red Hat Enterprise Linux', 'Linux|Ubuntu', 'Linux|CentOS', 'Linux|Fedora', 'Linux|Debian', '	Linux|Oracle Linux', '	Linux|openSUSE', '	Linux|Amazon Linux']
         result = os
-        for inputOS in Utils.getEntityString(app['OS']).split(', '):
+        for inputOS in Utils.getStandardEntityString(app['OS']).split(', '):
             if inputOS and '|' in inputOS and os == inputOS.split('|')[0]:
                 result = inputOS
                 break
@@ -382,7 +382,7 @@ def __find_best_os(self, app, os):
 
     def ui_to_input_assessment(self, assessment_data):
         """
-        ui_to_input_assessment method takes the assessment ouput and format it to list of application details
+        ui_to_input_assessment method takes the assessment output and format it to list of application details
         which will be further used for planning
 
         :param assessment_data: list of assessment output for each component
@@ -411,12 +411,19 @@ def ui_to_input_assessment(self, assessment_data):
                     pApp['component_name'] = app["Cmpt"]
 
                 # Curated
-                pApp['OS'] = ast.literal_eval(app["OS"])
-                pApp['Lang'] = ast.literal_eval(app["Lang"])
-                pApp["App Server"] = ast.literal_eval(app["App Server"])
-                pApp["App"] = ast.literal_eval(app["Dependent Apps"])
-                pApp["Runtime"] = ast.literal_eval(app["Runtime"])
-                pApp["Lib"] = ast.literal_eval(app["Libs"])
+                # mic
+                # pApp['OS'] = ast.literal_eval(app["OS"])
+                # pApp['Lang'] = ast.literal_eval(app["Lang"])
+                # pApp["App Server"] = ast.literal_eval(app["App Server"])
+                # pApp["App"] = ast.literal_eval(app["Dependent Apps"])
+                # pApp["Runtime"] = ast.literal_eval(app["Runtime"])
+                # pApp["Lib"] = ast.literal_eval(app["Libs"])
+                pApp['OS'] = app["OS"]
+                pApp['Lang'] = app["Lang"]
+                pApp["App Server"] = app["App Server"]
+                pApp["App"] = app["Dependent Apps"]
+                pApp["Runtime"] = app["Runtime"]
+                pApp["Lib"] = app["Libs"]
 
                 pApp['assessment_reason'] = app['Reason']
                 try:
@@ -499,6 +506,7 @@ def map_to_docker(self, appL, catalog = 'dockerhub'):
                                 subapp[child_type] = ', '.join(filter(None, app[os][child_type]))
 
                             subapp = self.__search_docker(subapp, catalog)
+
                             try:
                                 subapp['unknown'] = app['unknown']
                             except Exception :
@@ -598,6 +606,7 @@ def output_to_ui_planning(self, containerL):
                         image_name = image_name + '(' + app['scope_images'][image]['Status'] + ')'
                         docker_url_dict['status'] = app['scope_images'][image]['Status']
                     # mic docker_url_dict[image_name] = app["scope_images"][image]["Docker_URL"]
+
                     docker_url_dict['url'] = app["scope_images"][image]["Docker_URL"]
                     pApp['Ref Dockers'].append(docker_url_dict)  # mic += str(counter) + ". " + str(docker_url_dict) +'\n'
                     counter_list += str(counter) + ','
@@ -609,18 +618,20 @@ def output_to_ui_planning(self, containerL):
                 if app['scope_images_confidence']:
                     pApp["Confidence"] = app['scope_images_confidence']['image_confidence']
                 if 'scope_images_win' in app and app['scope_images_win']:
+
                     counter_list = ''
                     for image in app["scope_images_win"]:
-                        # mic start
+
                         docker_url_dict = {'name': "", 'status': "", 'url': ""}
                         docker_url_dict['name'] = image_name
-                        # mic end
+
                         image_name = image
                         if app['scope_images_win'][image]['Status']:
                             image_name = image_name + '(' + app['scope_images_win'][image]['Status'] + ')'
                             docker_url_dict['status'] = app['scope_images_win'][image]['Status']
 
-                        docker_url_dict['url'] = app["scope_images"][image]["Docker_URL"]
+                        docker_url_dict['url'] = app["scope_images_win"][image]["Docker_URL"]
+
                         pApp['Ref Dockers'].append(docker_url_dict)  # mic += str(counter) + ". " + image_name +'|'+app["scope_images_win"][image]["Docker_URL"]+'\n'
                         counter_list += str(counter) + ','
                         counter += 1

diff --git a/service/standardization.py b/service/standardization.py
@@ -128,6 +128,16 @@ def remove_scores(self, apps):
 
         return apps
 
+    # format mentions metadata (standard entity name, detected version,
+    def format_mentions(self, apps):
+        for app in apps:
+            for k in app.keys():
+                if type(app[k]) is dict:
+                    for m in app[k].keys():
+                        app[k][m] = { 'standard_name': list(app[k][m].keys())[0] , 'detected_version': list(app[k][m].values())[0][0], 'latest_known_version': list(app[k][m].values())[0][1] }
+
+        return apps
+
     def entity_standardizer(self, mention_data):
         """
         Invokes detect_access_token for accesstoken validation and if it's valid, it will call
@@ -603,5 +613,7 @@ def app_standardizer(self, app_data):
         # remove redundant mentions and entity matching scores
         app_data = self.remove_redundant_mentions(app_data)
         app_data = self.remove_scores(app_data)
+        app_data = self.format_mentions(app_data)
+
 
         return app_data
diff --git a/service/utils.py b/service/utils.py
@@ -45,6 +45,18 @@ def getEntityString(obj):
                 if x:
                     tech.append(x)
         return ', '.join(filter(None, tech))
+
+    # added to process json compatible format of entities
+    @staticmethod
+    def getStandardEntityString(obj):
+        ## obj: {snippet:{entity:version}}
+        ## return: entity, entity
+        if not obj:
+            return ''
+        tech = []
+        for x in obj.values():
+            tech.append(x['standard_name'])
+        return ', '.join(filter(None, tech))
 
     @staticmethod
     def mergeDicts(result, app_tech):

diff --git a/setup.sh b/setup.sh
@@ -25,7 +25,7 @@ then
 fi
 
 # Check to make sure python is installed
-if ! command -v python &> /dev/null
+if ! command -v $python &> /dev/null
 then
     echo "**** ERROR: python command could not be found. Cannot continue."
     exit 1

diff --git a/test/unit/test_assessment.py b/test/unit/test_assessment.py
@@ -32,14 +32,29 @@ def test_app_validation(self):
 
     def test_output_assessment(self):
         assessment = Assessment()
-        appL = [{'application_id': 'App ID 0114', 'application_name': 'App Name 0114', 'application_description': 'App Desc 0114', 'component_name': 'Comp 1', 'operating_system': 'RHEL', 'programming_languages': 'Java', 'middleware': 'WebSphere Application Server', 'database': 'db2 10.0', 'integration_services_and_additional_softwares': 'Redis', 'technology_summary':'angularJs,express.js,jenkins', 'versioning_tool_type': '1', 'application_inbound_interfaces': 5, 'application_outbound_interfaces': 1, 'devops_maturity_level': 'Moderate', 'devops_tooling': 'Jenkins, Git, JIRA', 'test_automation_%': '50%', 'performance_testing_enabled': 'No', 'KG Version': '1.0.1', 'App Server': {'WebSphere Application Server': {'Websphere Application Server (WAS)': ''}}, 'Runtime': {}, 'Lang': {'Java': {'Java': ''}}, 'App': {'db2 10.0': {'DB2': '10.0'}, 'Redis': {'Redis': ''}, 'jenkins': {'Jenkins': ''}},'OS': {'RHEL': {'Linux|Red Hat Enterprise Linux': ''}}, 'Lib': {'angularJs': {'JavaScript|AngularJS': ''}, 'express.js': {'JavaScript|Express.js': ''}}, 'assessment_reason': ''}]
+        appL = [
+            {'application_id': 'App ID 0114', 'application_name': 'App Name 0114',
+             'application_description': 'App Desc 0114',
+             'component_name': 'Comp 1', 'operating_system': 'RHEL', 'programming_languages': 'Java',
+             'middleware': 'WebSphere Application Server', 'database': 'db2 10.0',
+             'integration_services_and_additional_softwares': 'Redis',
+             'technology_summary': 'angularJs,express.js,jenkins',
+             'versioning_tool_type': '1', 'application_inbound_interfaces': 5, 'application_outbound_interfaces': 1,
+             'devops_maturity_level': 'Moderate', 'devops_tooling': 'Jenkins, Git, JIRA', 'test_automation_%': '50%',
+             'performance_testing_enabled': 'No', 'KG Version': '1.0.1',
+             'App Server': {'WebSphere Application Server': {'Websphere Application Server (WAS)': ''}}, 'Runtime': {},
+             'Lang': {'Java': {'Java|*': ''}},
+             'App': {'db2 10.0': {'DB2': '10.0'}, 'Redis': {'Redis': ''}, 'jenkins': {'Jenkins': ''}},
+             'OS': {'RHEL': {'Linux|Red Hat Enterprise Linux': ''}},
+             'Lib': {'angularJs': {'JavaScript|AngularJS': ''}, 'express.js': {'JavaScript|Express.js': ''}},
+             'assessment_reason': ''}]
 
         expected = {
             'Name': 'App Name 0114',
             'Desc': 'App Desc 0114',
             'Cmpt': 'Comp 1',
             'OS': {'RHEL': {'Linux|Red Hat Enterprise Linux': ''}},
-            'Lang': {'Java': {'Java': ''}},
+            'Lang': {'Java': {'Java|*': ''}},
             'App Server': {'WebSphere Application Server': {'Websphere Application Server (WAS)': ''}},
             'Dependent Apps': {'db2 10.0': {'DB2': '10.0'}, 'Redis': {'Redis': ''}, 'jenkins': {'Jenkins': ''}},
             'Runtime': {},