making diff mode more robust

SethHollandsworth · Apr 12, 2024 · 06e6177 · 06e6177
1 parent 362a710
commit 06e6177
Show file tree

Hide file tree

Showing 7 changed files with 251 additions and 62 deletions.
diff --git a/src/confcom/azext_confcom/config.py b/src/confcom/azext_confcom/config.py
@@ -11,6 +11,7 @@
 ACI_FIELD_RESOURCES = "resources"
 ACI_FIELD_RESOURCES_NAME = "name"
 ACI_FIELD_CONTAINERS = "containers"
+ACI_FIELD_CONTAINERS_NAME = "name"
 ACI_FIELD_CONTAINERS_CONTAINERIMAGE = "containerImage"
 ACI_FIELD_CONTAINERS_ENVS = "environmentVariables"
 ACI_FIELD_CONTAINERS_ENVS_NAME = "name"
@@ -74,6 +75,7 @@
 
 # output json values
 POLICY_FIELD_CONTAINERS = "containers"
+POLICY_FIELD_CONTAINERS_NAME = "name"
 POLICY_FIELD_CONTAINERS_ID = "id"
 POLICY_FIELD_CONTAINERS_ELEMENTS = "elements"
 POLICY_FIELD_CONTAINERS_LENGTH = "length"

diff --git a/src/confcom/azext_confcom/container.py b/src/confcom/azext_confcom/container.py
@@ -92,6 +92,10 @@ def extract_id(container_json: Any) -> str:
     return case_insensitive_dict_get(container_json, config.ACI_FIELD_CONTAINERS_ID)
 
 
+def extract_container_name(container_json: Any) -> str:
+    return case_insensitive_dict_get(container_json, config.ACI_FIELD_CONTAINERS_NAME)
+
+
 def extract_working_dir(container_json: Any) -> str:
     # parse working directory
     workingDir = case_insensitive_dict_get(
@@ -488,6 +492,7 @@ def from_json(
 
         container_image = extract_container_image(container_json)
         id_val = extract_id(container_json)
+        container_name = extract_container_name(container_json)
         environment_rules = extract_env_rules(container_json=container_json)
         command = extract_command(container_json)
         working_dir = extract_working_dir(container_json)
@@ -507,6 +512,7 @@ def from_json(
         allow_privilege_escalation = extract_allow_privilege_escalation(container_json)
         return ContainerImage(
             containerImage=container_image,
+            containerName=container_name,
             environmentRules=environment_rules,
             command=command,
             workingDir=working_dir,
@@ -540,8 +546,10 @@ def __init__(
         allowPrivilegeEscalation: bool = True,
         execProcesses: List = None,
         signals: List = None,
+        containerName: str = ""
     ) -> None:
         self.containerImage = containerImage
+        self.containerName = containerName
         if ":" in containerImage:
             self.base, self.tag = containerImage.split(":", 1)
         else:
@@ -573,6 +581,9 @@ def get_policy_json(self) -> str:
     def get_id(self) -> str:
         return self._identifier
 
+    def get_name(self) -> str:
+        return self.containerName
+
     def get_working_dir(self) -> str:
         return self._workingDir
 
@@ -616,6 +627,8 @@ def set_extra_environment_rules(self, rules: Dict) -> None:
     def parse_all_parameters_and_variables(self, params, vars_dict) -> None:
         field_names = [
             "containerImage",
+            "containerName",
+            "_identifier",
             "_environmentRules",
             "_command",
             "_workingDir",
@@ -696,6 +709,7 @@ def _populate_policy_json_elements(self) -> Dict[str, Any]:
 
         elements = {
             config.POLICY_FIELD_CONTAINERS_ID: self._identifier,
+            config.POLICY_FIELD_CONTAINERS_NAME: self.get_name(),
             config.POLICY_FIELD_CONTAINERS_ELEMENTS_LAYERS: self._layers,
             config.POLICY_FIELD_CONTAINERS_ELEMENTS_COMMANDS: self._command,
             config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS: self._get_environment_rules(),

diff --git a/src/confcom/azext_confcom/os_util.py b/src/confcom/azext_confcom/os_util.py
@@ -145,23 +145,22 @@ def map_image_from_tar(image_name: str, tar: TarFile, tar_location: str):
     tar_dir = os.path.dirname(tar_location)
     info_file = None
     info_file_name = "manifest.json"
-    # if there's more than one image in the tarball, we need to do some more logic
-    if len(info_file_name) > 0:
-        # extract just the manifest file and see if any of the RepoTags match the image_name we're searching for
-        # the manifest.json should have a list of all the image tags
-        # and what json files they map to to get env vars, startup cmd, etc.
-        tar.extract(info_file_name, path=tar_dir)
-        manifest_path = os.path.join(tar_dir, info_file_name)
-        manifest = load_json_from_file(manifest_path)
+
+    # extract just the manifest file and see if any of the RepoTags match the image_name we're searching for
+    # the manifest.json should have a list of all the image tags
+    # and what json files they map to to get env vars, startup cmd, etc.
+    tar.extract(info_file_name, path=tar_dir)
+    manifest_path = os.path.join(tar_dir, info_file_name)
+    manifest = load_json_from_file(manifest_path)
+    try:
         # if we match a RepoTag to the image, stop searching
         for image in manifest:
             if image_name in image.get("RepoTags"):
                 info_file = image.get("Config")
                 break
+    finally:
         # remove the extracted manifest file to clean up
         os.remove(manifest_path)
-    else:
-        eprint(f"Tarball at {tar_location} contains no images")
 
     if not info_file:
         return None

diff --git a/src/confcom/azext_confcom/security_policy.py b/src/confcom/azext_confcom/security_policy.py
@@ -31,7 +31,8 @@
     extract_probe,
     process_env_vars_from_template,
     get_image_info,
-    get_tar_location_from_mapping
+    get_tar_location_from_mapping,
+    get_diff_size
 )
 from azext_confcom.rootfs_proxy import SecurityPolicyProxy
 
@@ -241,7 +242,15 @@ def validate_sidecars(self) -> Tuple[bool, Dict]:
 
     def validate(self, policy, sidecar_validation=False) -> Tuple[bool, Dict]:
         """Utility method: general method to compare two policies.
-        One being the current object and the other is passed in as a parameter"""
+        One being the current object and the other is passed in as a parameter.
+
+        This is done by comparing first the container names (IDs) and then
+        the contents of the containers with special logic for environment variables,
+        since they can use regular expressions.
+
+        The minimum difference is used to match up the containers in the policy vs
+        the containers in the ARM template. Afterwards, the differences are compiled
+        and returned as a dictionary organized by container name."""
         if not policy:
             eprint("Policy is not in the expected form to validate against")
 
@@ -261,56 +270,75 @@ def validate(self, policy, sidecar_validation=False) -> Tuple[bool, Dict]:
             # see if the IDs match with any container in the policy
 
             id_val = case_insensitive_dict_get(container, config.ACI_FIELD_CONTAINERS_ID)
+            container_name = case_insensitive_dict_get(
+                container,
+                config.ACI_FIELD_CONTAINERS_NAME
+            )
 
-            idx = policy_ids.index(id_val) if id_val in policy_ids else None
+            # idx = policy_ids.index(id_val) if id_val in policy_ids else None
+            idx_arr = [i for i, item in enumerate(policy_ids) if item == id_val]
 
-            if idx is None:
-                reason_list[id_val] = f"{id_val} not found in policy"
+            if idx_arr == []:
+                reason_list[container_name] = f"{id_val} not found in policy"
                 continue
-            matching_policy_container = policy[idx]
-
-            # copy so we can delete fields and not affect the original data
-            # structure
-            container1 = copy.deepcopy(matching_policy_container)
-            container2 = copy.deepcopy(container)
-
-            # the ID does not matter so delete them from comparison
-            container1.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
-            container2.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
-            # env vars will be compared later so delete them from this
-            # comparison
-            container1.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)
-            container2.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)
-
-            container_diff = compare_containers(container1, container2)
-
-            # for sidecar validation, it's fine if the policy has
-            # more things defined than the image, so we can take
-            # those out of the diff because it would not hinder deployment
-            if sidecar_validation:
-                for k in list(container_diff.keys()):
-                    if "removed" in k:
-                        container_diff.pop(k)
-            if container_diff != {}:
-                reason_list[id_val] = container_diff
-
-            env_reason_list = compare_env_vars(
-                id_val,
-                case_insensitive_dict_get(
-                    matching_policy_container,
-                    config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS,
-                ),
-                case_insensitive_dict_get(
-                    container, config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS
-                ),
-            )
 
-            # merge the output of checking env vars with the original reason
-            # list
-            for key, value in env_reason_list.items():
-                if key not in reason_list:
-                    reason_list[key] = {}
-                reason_list[key].update(value)
+            temp_diff_list = []
+            for idx in idx_arr:
+                temp_diff = {}
+                matching_policy_container = policy[idx]
+
+                # copy so we can delete fields and not affect the original data
+                # structure
+                container1 = copy.deepcopy(matching_policy_container)
+                container2 = copy.deepcopy(container)
+
+                # the ID does not matter so delete them from comparison
+                container1.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
+                container2.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
+                # env vars will be compared later so delete them from this
+                # comparison
+                container1.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)
+                container2.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)
+
+                diff_values = compare_containers(container1, container2)
+                # label the diff with the ID so it can be merged
+                # with the env vars and other container diffs
+                temp_diff[container_name] = diff_values
+                # for sidecar validation, it's fine if the policy has
+                # more things defined than the image, so we can take
+                # those out of the diff because it would not hinder deployment
+                if sidecar_validation:
+                    for k in list(temp_diff.keys()):
+                        if "removed" in k:
+                            temp_diff.pop(k)
+
+                env_reason_list = compare_env_vars(
+                    container_name,
+                    case_insensitive_dict_get(
+                        matching_policy_container,
+                        config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS,
+                    ),
+                    case_insensitive_dict_get(
+                        container, config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS
+                    ),
+                )
+
+                # merge the output of checking env vars with the original reason
+                # list
+                for key, value in env_reason_list.items():
+                    if key not in temp_diff:
+                        temp_diff[key] = {}
+                    temp_diff[key].update(value)
+
+                temp_diff_list.append(copy.deepcopy(temp_diff))
+
+            diff_sizes = [get_diff_size(diff) for diff in copy.deepcopy(temp_diff_list)]
+
+            if diff_sizes.count(0) >= 1:
+                continue
+
+            reason_list.update(temp_diff_list[diff_sizes.index(min(diff_sizes))])
+
         is_valid = not bool(reason_list)
         return is_valid, reason_list
 
@@ -440,7 +468,7 @@ def populate_policy_content_for_all_images(
 
                     if (
                         not deepdiff.DeepDiff(image.get_user(), config.DEFAULT_USER, ignore_order=True)
-                        and image_info.get("User") != ""
+                        and (image_info.get("User") != "" and image_info.get("User") is not None)
                     ):
                         # valid values are in the form "user", "user:group", "uid", "uid:gid", "user:gid", "uid:group"
                         # where each entry is either a string or an unsigned integer
@@ -490,6 +518,7 @@ def pull_image(self, image: ContainerImage) -> Any:
         return client.images.pull(image.base, image.tag)
 
 
+# pylint: disable=R0914,
 def load_policy_from_arm_template_str(
     template_data: str,
     parameter_data: str,
@@ -594,6 +623,11 @@ def load_policy_from_arm_template_str(
                 image_properties, config.ACI_FIELD_TEMPLATE_IMAGE
             )
 
+            # this is guaranteed unique for a valid ARM template
+            container_name = case_insensitive_dict_get(
+                container, config.ACI_FIELD_CONTAINERS_NAME
+            )
+
             if not image_name:
                 eprint(
                     f'Field ["{config.ACI_FIELD_TEMPLATE_PARAMETERS}"] is empty or cannot be found'
@@ -606,6 +640,7 @@ def load_policy_from_arm_template_str(
             containers.append(
                 {
                     config.ACI_FIELD_CONTAINERS_ID: image_name,
+                    config.ACI_FIELD_CONTAINERS_NAME: container_name,
                     config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE: image_name,
                     config.ACI_FIELD_CONTAINERS_ENVS: process_env_vars_from_template(
                         AciPolicy.all_params, AciPolicy.all_vars, image_properties, approve_wildcards),
@@ -688,6 +723,7 @@ def load_policy_from_image_name(
 
         # assign image name to ID field
         container[config.ACI_FIELD_CONTAINERS_ID] = image_name
+        container[config.ACI_FIELD_CONTAINERS_NAME] = image_name
 
         container[config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE] = image_name
         container[config.ACI_FIELD_CONTAINERS_ALLOW_STDIO_ACCESS] = not disable_stdio
@@ -779,11 +815,16 @@ def load_policy_from_str(data: str, debug_mode: bool = False) -> AciPolicy:
             container, config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE
         )
 
+        container_name = case_insensitive_dict_get(
+            container, config.ACI_FIELD_CONTAINERS_NAME
+        ) or image_name
+
         if not image_name:
             eprint(
                 f'Field ["{config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE}"] is empty or can not be found.'
             )
         container[config.ACI_FIELD_CONTAINERS_ID] = image_name
+        container[config.ACI_FIELD_CONTAINERS_NAME] = container_name
 
         # set the fields that are present in the container but not in the
         # config

diff --git a/src/confcom/azext_confcom/template_util.py b/src/confcom/azext_confcom/template_util.py
@@ -395,6 +395,29 @@ def change_key_names(dictionary) -> Dict:
     return dictionary
 
 
+def get_diff_size(diff: dict) -> int:
+    """Utility function: get the size of the diff dictionary"""
+    size = 0
+    for key in diff:
+        if isinstance(diff[key], dict):
+            size += get_diff_size_helper(diff[key])
+        else:
+            size += 1
+    return size
+
+
+def get_diff_size_helper(diff: dict) -> int:
+    size = 0
+    for key in diff:
+        if isinstance(diff[key], dict):
+            size += get_diff_size_helper(diff[key])
+        elif isinstance(diff[key], list) and key == "env_rules":
+            size += len(diff[key])
+        else:
+            size += 1
+    return size
+
+
 def replace_params_and_vars(params: dict, vars_dict: dict, attribute):
     out = None
     if isinstance(attribute, (int, float, bool)):