Skip to content

Commit

Permalink
KG and Unit tests changes (#137)
Browse files Browse the repository at this point in the history
* added single tech_summary input for standardization

* fixed test/unit/test_assessment.py

* added first version of clustering

* fixed new .sql dependencies

* sync with main repo

* fixed .sql

* fixed bug in unit tests

* fixed tests

* fixed test_standardization.py

Co-authored-by: mmerler <michele@micheles-mbp.watson.ibm.com>
Co-authored-by: mmerler <michele@micheles-mbp.myfiosgateway.com>
Co-authored-by: mmerler <michele@Micheles-MBP.fritz.box>
  • Loading branch information
4 people authored Jul 25, 2022
1 parent fcc75e8 commit 0466bb3
Show file tree
Hide file tree
Showing 7 changed files with 13,046 additions and 13,040 deletions.
25,845 changes: 12,944 additions & 12,901 deletions db/1.0.4.sql

Large diffs are not rendered by default.

118 changes: 46 additions & 72 deletions service/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from collections import OrderedDict
import logging
from service.utils import Utils
import ast
import numpy as np

import configparser

Expand All @@ -30,7 +32,7 @@

class Clustering():
"""
This class for containerize Clustering
This class for Clustering
"""

def __init__(self):
Expand All @@ -39,59 +41,21 @@ def __init__(self):
Setting up the logging level as info and opens logfile in write mode to capture the logs in text file
"""

def output_to_ui_assessment(self, appL):
"""
output_to_ui assessment methods takes the final assessed data as input and formats it & keeps
only required fields and returns it as output assessment response
"""
pAppL = []
print('ok here!')
try :
for app in appL:



# Order dictionry to fix the order of columns in the output
pApp = OrderedDict()


# Raw Fields
pApp['Name'] = ''
if 'Name' in app:
pApp['application_name'] = app["Name"]

pApp['application_description'] = ''
if 'Desc' in app:
pApp['application_description'] = app["Desc"]

pApp['component_name'] = ''
if 'Cmpt' in app:
pApp['component_name'] = app["Cmpt"]
logging.basicConfig(level=logging.INFO)

# read entities
entities_filepath = os.path.join(config['general']['kg_dir'], config['tca']['entities'])
if os.path.exists(entities_filepath):
with open(entities_filepath, 'r') as f:
entities_json = json.load(f)
self.entity_names = np.empty(len(entities_json['data']), dtype='object')
for i, en in enumerate(entities_json['data']):
self.entity_names[i] = entities_json['data'][en]['entity_name']
else:
self.entities = {}
logging.error(f'entities[{entities_filepath}] is empty or not exists')

# Curated
pApp['OS'] = eval(app["OS"])
pApp['Lang'] = eval(app["Lang"])
pApp["App Server"] = eval(app["App Server"])
pApp["App"] = eval(app["Dependent Apps"])
pApp["Runtime"] = eval(app["Runtime"])
pApp["Lib"] = eval(app["Libs"])

pApp['assessment_reason'] = app['Reason']

try :
pApp["KG Version"] = app["KG Version"]
except :
pApp["KG Version"] = 'Not Available'


pAppL.append(pApp)

return pAppL

except Exception as e:
logging.error(str(e))


def output_to_ui_clustering(self, appL):
Expand All @@ -100,30 +64,40 @@ def output_to_ui_clustering(self, appL):
only required fields and returns it as output assessment response
"""
pAppL = []
for app in appL:

print(app)
# initialize tech stack
tech_stack = np.zeros((len(appL), self.entity_names.shape[0]), dtype='bool')
appL_array = np.array(appL)

# find unique clusters
fields = ['OS', 'Lang', 'App Server', 'Dependent Apps', 'Runtime', 'Libs']
for i, app in enumerate(appL):
for k in fields:
txt = ast.literal_eval(app[k])
for t in txt.keys():
entity = list(txt[t].keys())[0]

# keep only root of hierarchical entity
if entity.find('|') > 0:
entity = f"{entity.split('|')[0]}|*"

tech_stack[i][self.entity_names == entity] = 1

# find unique clusters
clusters, index, counts = np.unique(tech_stack, return_inverse=True, return_counts=True, axis=0)

# sort clusters by number of apps
order = np.argsort(counts)[::-1]

# Order dictionry to fix the order of columns in the output
pApp = OrderedDict()
clusters = clusters[order]
counts = counts[order]

# Raw Data
pApp['Name'] = ''
if 'application_name' in app:
pApp['Name'] = app["application_name"]
pApp['Desc'] = ''
if 'application_description' in app:
pApp['Desc'] = app["application_description"]
pApp['Cmpt'] = ''
if 'component_name' in app:
pApp['Cmpt'] = app["component_name"]
unique_clusters = []
for i in range(clusters.shape[0]):
cl = { "id": i, "name": f'unique_tech_stack_{i}', "type": 'unique', "tech_stack": list(self.entity_names[clusters[i] == 1]),\
"num_elements": counts[i], "apps": list(appL_array[index == order[i]]) }

# AI Insights
pApp["Ref Dockers"] = ""
pApp["Confidence"] = 0
unique_clusters.append(cl)

# pAppL['Clusters'].append(pApp)
pAppL.append(pApp)

return pAppL
return unique_clusters
11 changes: 2 additions & 9 deletions service/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,10 @@ def assessment(self,auth_url,headers,auth_headers,app_data):
if not is_valid:
return resp, code

print('ok here 1')
appL = self.standardize.app_standardizer(app_data)

print('ok here 2')
appL = self.assess.app_validation(appL)

print('ok here 3')

# Generate output for UI
output = self.assess.output_to_ui_assessment(appL)
logging.info(f'{str(datetime.now())} output assessment num: {str(len(output))} ')
Expand Down Expand Up @@ -176,18 +172,15 @@ def planning(self, auth_url, headers, auth_headers, assessment_data, catalog):
def clustering(self, auth_url, headers, auth_headers, app_data):
"""
Invokes detect_access_token for accesstoken validation and if it's valid, it will call
compose_app for assessment and app_validation for validation the assessed application data
and finally call output_to_ui_clustering to return the formatted assessment data
output_to_ui_clustering to return the formatted assessment data
"""
try:
resp, code, is_valid = self.detect_access_token(auth_url, headers, auth_headers)
if not is_valid:
return resp, code

appL = self.cluster.output_to_ui_assessment(app_data)

# Generate output for UI
clusters = self.cluster.output_to_ui_clustering(appL)
clusters = self.cluster.output_to_ui_clustering(app_data)
logging.info(f'{str(datetime.now())} output clustering num: {str(len(clusters))} ')
return dict(status=201, message="Clustering completed successfully!", clusters=clusters), 201
except Exception as e:
Expand Down
4 changes: 0 additions & 4 deletions service/planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,6 @@ def ui_to_input_assessment(self, assessment_data):
# Curated
pApp['OS'] = eval(app["OS"])

print('+++++++++OS++++++++++++')
print( pApp['OS'])
print('+++++++++END OS++++++++++++')

pApp['Lang'] = eval(app["Lang"])
pApp["App Server"] = eval(app["App Server"])
pApp["App"] = eval(app["Dependent Apps"])
Expand Down
69 changes: 35 additions & 34 deletions service/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,14 @@
"Recommend": fields.String(required=False, description='Recommended disposition')
})

# clustering_model = api.model('Clustering', {
# "Name": fields.String(required=True, description='Name of the application'),
# "Desc": fields.String(required=True, description='Description of the application'),
# "Cmpt": fields.String(required=True, description='Component/Deployment Unit of the application'),
# "Ref Dockers": fields.String(required=False, description='Description of the application'),
# "Confidence": fields.Float(required=False, description='Confidence of the assessment')
# })
clustering_model = api.model('Clustering', {
"id": fields.Integer(required=True, description='Cluster ID'),
"name": fields.String(required=True, description='Cluster name'),
"type": fields.String(required=True, description='Cluster type'),
"tech_stack": fields.List(fields.String, required=True, description='List of tech stack elements'),
"num_elements": fields.Integer(required=True, description='Number of elements'),
"apps": fields.List(fields.Nested(assessment_model), required=True, description='An array of applications')
})


output_model_assessment = api.model('Standardization Output', {
Expand All @@ -142,11 +143,11 @@
"containerization": fields.List(fields.Nested(planning_model), required=True, description='An array of containerization planning for application workload')
})

# output_model_clustering = api.model('Clustering Output', {
# "status": fields.Integer(required=True, description='Status of the call'),
# "message": fields.String(required=True, description='Status message'),
# "clusters": fields.List(fields.Nested(clustering_model), required=True, description='An array of containerization clustering for application workload')
# })
output_model_clustering = api.model('Clustering Output', {
"status": fields.Integer(required=True, description='Status of the call'),
"message": fields.String(required=True, description='Status message'),
"clusters": fields.List(fields.Nested(clustering_model), required=True, description='An array of containerization clustering for application workload')
})

# @api.route('/match', strict_slashes=False)
# class Standardization(Resource):
Expand Down Expand Up @@ -228,28 +229,28 @@ def post(self):

return functions.do_planning(auth_url,dict(request.headers),auth_headers,api.payload,catalog)

# @api.route('/clustering', strict_slashes=False)
# class ContainerizationClustering(Resource):
# """
# ContainerizationClustering class creates the clustering in the form of clustering_model for the
# applications/components details given in the assessment_model
# """
# @api.doc('create_clustering')
# @api.response(201, 'Clustering Completed successfully!')
# @api.response(400, 'Input data format doesn\'t match the format expected by TCA')
# @api.response(401, 'Unauthorized, missing or invalid access token')
# @api.response(500, 'Internal Server Error, missing or wrong config of RBAC access token validation url')
# @api.expect([assessment_model])
# @api.marshal_with(output_model_clustering)
# @api.doc(security='apikey')
#
#
# def post(self):
# """
# Returns grouping of apps based on technology stack similarity
# """
# # Invoke do_clustering method in clustering class to initiate clustering process
# return functions.do_clustering(auth_url,dict(request.headers),auth_headers,api.payload)
@api.route('/clustering', strict_slashes=False)
class ContainerizationClustering(Resource):
"""
ContainerizationClustering class creates the clustering in the form of clustering_model for the
applications/components details given in the assessment_model
"""
@api.doc('create_clustering')
@api.response(201, 'Clustering Completed successfully!')
@api.response(400, 'Input data format doesn\'t match the format expected by TCA')
@api.response(401, 'Unauthorized, missing or invalid access token')
@api.response(500, 'Internal Server Error, missing or wrong config of RBAC access token validation url')
@api.expect([assessment_model])
@api.marshal_with(output_model_clustering)
@api.doc(security='apikey')


def post(self):
"""
Returns grouping of apps based on technology stack similarity
"""
# Invoke do_clustering method in clustering class to initiate clustering process
return functions.do_clustering(auth_url,dict(request.headers),auth_headers,api.payload)

@api.route('/health_check')
@api.response(200, 'HTTP OK')
Expand Down
6 changes: 3 additions & 3 deletions test/unit/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_ui_to_input_assessment(self):
'Desc': '',
'Cmpt': 'Component 1',
'OS': "{'ZOS': {'MVS|z/OS': ('NA_VERSION', 'NA_VERSION')}}",
'Lang': "{'JavaScript': {'JavaScript': ('NA_VERSION', 'ES6')}, 'PL/1': {'PL/I': ('1', '1')}}",
'Lang': "{'JavaScript': {'JavaScript|*': ('NA_VERSION', 'ES6')}, 'PL/1': {'PL/I': ('1', '1')}}",
'App Server': '{}',
'Dependent Apps': '{}',
'Runtime': '{}',
Expand All @@ -37,7 +37,7 @@ def test_ui_to_input_assessment(self):
'Desc': '',
'Cmpt': 'Component 1',
'OS': "{'Windows 2016 Standard': {'Windows|Windows Server': ('2016 standard', '2016 standard')}}",
'Lang': "{'JavaScript': {'JavaScript': ('NA_VERSION', 'ES6')}}",
'Lang': "{'JavaScript': {'JavaScript|*': ('NA_VERSION', 'ES6')}}",
'App Server': '{}',
'Dependent Apps': '{}',
'Runtime': '{}',
Expand Down Expand Up @@ -70,7 +70,7 @@ def test_ui_to_input_assessment(self):
'Desc': '',
'Cmpt': 'Component 1',
'OS': "{'zOS': {'MVS|z/OS': ('NA_VERSION', 'NA_VERSION')}}",
'Lang': "{'JavaScript': {'JavaScript': ('NA_VERSION', 'ES6')}, 'PL1': {'PL/I': ('1', '1')}}",
'Lang': "{'JavaScript': {'JavaScript|*': ('NA_VERSION', 'ES6')}, 'PL1': {'PL/I': ('1', '1')}}",
'App Server': '{}',
'Dependent Apps': '{}',
'Runtime': '{}',
Expand Down
Loading

0 comments on commit 0466bb3

Please sign in to comment.