Skip to content

Commit

Permalink
making diff mode more robust
Browse files Browse the repository at this point in the history
  • Loading branch information
SethHollandsworth committed Apr 12, 2024
1 parent 362a710 commit 06e6177
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 62 deletions.
2 changes: 2 additions & 0 deletions src/confcom/azext_confcom/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
ACI_FIELD_RESOURCES = "resources"
ACI_FIELD_RESOURCES_NAME = "name"
ACI_FIELD_CONTAINERS = "containers"
ACI_FIELD_CONTAINERS_NAME = "name"
ACI_FIELD_CONTAINERS_CONTAINERIMAGE = "containerImage"
ACI_FIELD_CONTAINERS_ENVS = "environmentVariables"
ACI_FIELD_CONTAINERS_ENVS_NAME = "name"
Expand Down Expand Up @@ -74,6 +75,7 @@

# output json values
POLICY_FIELD_CONTAINERS = "containers"
POLICY_FIELD_CONTAINERS_NAME = "name"
POLICY_FIELD_CONTAINERS_ID = "id"
POLICY_FIELD_CONTAINERS_ELEMENTS = "elements"
POLICY_FIELD_CONTAINERS_LENGTH = "length"
Expand Down
14 changes: 14 additions & 0 deletions src/confcom/azext_confcom/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ def extract_id(container_json: Any) -> str:
return case_insensitive_dict_get(container_json, config.ACI_FIELD_CONTAINERS_ID)


def extract_container_name(container_json: Any) -> str:
return case_insensitive_dict_get(container_json, config.ACI_FIELD_CONTAINERS_NAME)


def extract_working_dir(container_json: Any) -> str:
# parse working directory
workingDir = case_insensitive_dict_get(
Expand Down Expand Up @@ -488,6 +492,7 @@ def from_json(

container_image = extract_container_image(container_json)
id_val = extract_id(container_json)
container_name = extract_container_name(container_json)
environment_rules = extract_env_rules(container_json=container_json)
command = extract_command(container_json)
working_dir = extract_working_dir(container_json)
Expand All @@ -507,6 +512,7 @@ def from_json(
allow_privilege_escalation = extract_allow_privilege_escalation(container_json)
return ContainerImage(
containerImage=container_image,
containerName=container_name,
environmentRules=environment_rules,
command=command,
workingDir=working_dir,
Expand Down Expand Up @@ -540,8 +546,10 @@ def __init__(
allowPrivilegeEscalation: bool = True,
execProcesses: List = None,
signals: List = None,
containerName: str = ""
) -> None:
self.containerImage = containerImage
self.containerName = containerName
if ":" in containerImage:
self.base, self.tag = containerImage.split(":", 1)
else:
Expand Down Expand Up @@ -573,6 +581,9 @@ def get_policy_json(self) -> str:
def get_id(self) -> str:
return self._identifier

def get_name(self) -> str:
return self.containerName

def get_working_dir(self) -> str:
return self._workingDir

Expand Down Expand Up @@ -616,6 +627,8 @@ def set_extra_environment_rules(self, rules: Dict) -> None:
def parse_all_parameters_and_variables(self, params, vars_dict) -> None:
field_names = [
"containerImage",
"containerName",
"_identifier",
"_environmentRules",
"_command",
"_workingDir",
Expand Down Expand Up @@ -696,6 +709,7 @@ def _populate_policy_json_elements(self) -> Dict[str, Any]:

elements = {
config.POLICY_FIELD_CONTAINERS_ID: self._identifier,
config.POLICY_FIELD_CONTAINERS_NAME: self.get_name(),
config.POLICY_FIELD_CONTAINERS_ELEMENTS_LAYERS: self._layers,
config.POLICY_FIELD_CONTAINERS_ELEMENTS_COMMANDS: self._command,
config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS: self._get_environment_rules(),
Expand Down
19 changes: 9 additions & 10 deletions src/confcom/azext_confcom/os_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,23 +145,22 @@ def map_image_from_tar(image_name: str, tar: TarFile, tar_location: str):
tar_dir = os.path.dirname(tar_location)
info_file = None
info_file_name = "manifest.json"
# if there's more than one image in the tarball, we need to do some more logic
if len(info_file_name) > 0:
# extract just the manifest file and see if any of the RepoTags match the image_name we're searching for
# the manifest.json should have a list of all the image tags
# and what json files they map to to get env vars, startup cmd, etc.
tar.extract(info_file_name, path=tar_dir)
manifest_path = os.path.join(tar_dir, info_file_name)
manifest = load_json_from_file(manifest_path)

# extract just the manifest file and see if any of the RepoTags match the image_name we're searching for
# the manifest.json should have a list of all the image tags
# and what json files they map to to get env vars, startup cmd, etc.
tar.extract(info_file_name, path=tar_dir)
manifest_path = os.path.join(tar_dir, info_file_name)
manifest = load_json_from_file(manifest_path)
try:
# if we match a RepoTag to the image, stop searching
for image in manifest:
if image_name in image.get("RepoTags"):
info_file = image.get("Config")
break
finally:
# remove the extracted manifest file to clean up
os.remove(manifest_path)
else:
eprint(f"Tarball at {tar_location} contains no images")

if not info_file:
return None
Expand Down
139 changes: 90 additions & 49 deletions src/confcom/azext_confcom/security_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
extract_probe,
process_env_vars_from_template,
get_image_info,
get_tar_location_from_mapping
get_tar_location_from_mapping,
get_diff_size
)
from azext_confcom.rootfs_proxy import SecurityPolicyProxy

Expand Down Expand Up @@ -241,7 +242,15 @@ def validate_sidecars(self) -> Tuple[bool, Dict]:

def validate(self, policy, sidecar_validation=False) -> Tuple[bool, Dict]:
"""Utility method: general method to compare two policies.
One being the current object and the other is passed in as a parameter"""
One being the current object and the other is passed in as a parameter.
This is done by comparing first the container names (IDs) and then
the contents of the containers with special logic for environment variables,
since they can use regular expressions.
The minimum difference is used to match up the containers in the policy vs
the containers in the ARM template. Afterwards, the differences are compiled
and returned as a dictionary organized by container name."""
if not policy:
eprint("Policy is not in the expected form to validate against")

Expand All @@ -261,56 +270,75 @@ def validate(self, policy, sidecar_validation=False) -> Tuple[bool, Dict]:
# see if the IDs match with any container in the policy

id_val = case_insensitive_dict_get(container, config.ACI_FIELD_CONTAINERS_ID)
container_name = case_insensitive_dict_get(
container,
config.ACI_FIELD_CONTAINERS_NAME
)

idx = policy_ids.index(id_val) if id_val in policy_ids else None
# idx = policy_ids.index(id_val) if id_val in policy_ids else None
idx_arr = [i for i, item in enumerate(policy_ids) if item == id_val]

if idx is None:
reason_list[id_val] = f"{id_val} not found in policy"
if idx_arr == []:
reason_list[container_name] = f"{id_val} not found in policy"
continue
matching_policy_container = policy[idx]

# copy so we can delete fields and not affect the original data
# structure
container1 = copy.deepcopy(matching_policy_container)
container2 = copy.deepcopy(container)

# the ID does not matter so delete them from comparison
container1.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
container2.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
# env vars will be compared later so delete them from this
# comparison
container1.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)
container2.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)

container_diff = compare_containers(container1, container2)

# for sidecar validation, it's fine if the policy has
# more things defined than the image, so we can take
# those out of the diff because it would not hinder deployment
if sidecar_validation:
for k in list(container_diff.keys()):
if "removed" in k:
container_diff.pop(k)
if container_diff != {}:
reason_list[id_val] = container_diff

env_reason_list = compare_env_vars(
id_val,
case_insensitive_dict_get(
matching_policy_container,
config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS,
),
case_insensitive_dict_get(
container, config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS
),
)

# merge the output of checking env vars with the original reason
# list
for key, value in env_reason_list.items():
if key not in reason_list:
reason_list[key] = {}
reason_list[key].update(value)
temp_diff_list = []
for idx in idx_arr:
temp_diff = {}
matching_policy_container = policy[idx]

# copy so we can delete fields and not affect the original data
# structure
container1 = copy.deepcopy(matching_policy_container)
container2 = copy.deepcopy(container)

# the ID does not matter so delete them from comparison
container1.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
container2.pop(config.POLICY_FIELD_CONTAINERS_ID, None)
# env vars will be compared later so delete them from this
# comparison
container1.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)
container2.pop(config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS, None)

diff_values = compare_containers(container1, container2)
# label the diff with the ID so it can be merged
# with the env vars and other container diffs
temp_diff[container_name] = diff_values
# for sidecar validation, it's fine if the policy has
# more things defined than the image, so we can take
# those out of the diff because it would not hinder deployment
if sidecar_validation:
for k in list(temp_diff.keys()):
if "removed" in k:
temp_diff.pop(k)

env_reason_list = compare_env_vars(
container_name,
case_insensitive_dict_get(
matching_policy_container,
config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS,
),
case_insensitive_dict_get(
container, config.POLICY_FIELD_CONTAINERS_ELEMENTS_ENVS
),
)

# merge the output of checking env vars with the original reason
# list
for key, value in env_reason_list.items():
if key not in temp_diff:
temp_diff[key] = {}
temp_diff[key].update(value)

temp_diff_list.append(copy.deepcopy(temp_diff))

diff_sizes = [get_diff_size(diff) for diff in copy.deepcopy(temp_diff_list)]

if diff_sizes.count(0) >= 1:
continue

reason_list.update(temp_diff_list[diff_sizes.index(min(diff_sizes))])

is_valid = not bool(reason_list)
return is_valid, reason_list

Expand Down Expand Up @@ -440,7 +468,7 @@ def populate_policy_content_for_all_images(

if (
not deepdiff.DeepDiff(image.get_user(), config.DEFAULT_USER, ignore_order=True)
and image_info.get("User") != ""
and (image_info.get("User") != "" and image_info.get("User") is not None)
):
# valid values are in the form "user", "user:group", "uid", "uid:gid", "user:gid", "uid:group"
# where each entry is either a string or an unsigned integer
Expand Down Expand Up @@ -490,6 +518,7 @@ def pull_image(self, image: ContainerImage) -> Any:
return client.images.pull(image.base, image.tag)


# pylint: disable=R0914,
def load_policy_from_arm_template_str(
template_data: str,
parameter_data: str,
Expand Down Expand Up @@ -594,6 +623,11 @@ def load_policy_from_arm_template_str(
image_properties, config.ACI_FIELD_TEMPLATE_IMAGE
)

# this is guaranteed unique for a valid ARM template
container_name = case_insensitive_dict_get(
container, config.ACI_FIELD_CONTAINERS_NAME
)

if not image_name:
eprint(
f'Field ["{config.ACI_FIELD_TEMPLATE_PARAMETERS}"] is empty or cannot be found'
Expand All @@ -606,6 +640,7 @@ def load_policy_from_arm_template_str(
containers.append(
{
config.ACI_FIELD_CONTAINERS_ID: image_name,
config.ACI_FIELD_CONTAINERS_NAME: container_name,
config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE: image_name,
config.ACI_FIELD_CONTAINERS_ENVS: process_env_vars_from_template(
AciPolicy.all_params, AciPolicy.all_vars, image_properties, approve_wildcards),
Expand Down Expand Up @@ -688,6 +723,7 @@ def load_policy_from_image_name(

# assign image name to ID field
container[config.ACI_FIELD_CONTAINERS_ID] = image_name
container[config.ACI_FIELD_CONTAINERS_NAME] = image_name

container[config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE] = image_name
container[config.ACI_FIELD_CONTAINERS_ALLOW_STDIO_ACCESS] = not disable_stdio
Expand Down Expand Up @@ -779,11 +815,16 @@ def load_policy_from_str(data: str, debug_mode: bool = False) -> AciPolicy:
container, config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE
)

container_name = case_insensitive_dict_get(
container, config.ACI_FIELD_CONTAINERS_NAME
) or image_name

if not image_name:
eprint(
f'Field ["{config.ACI_FIELD_CONTAINERS_CONTAINERIMAGE}"] is empty or can not be found.'
)
container[config.ACI_FIELD_CONTAINERS_ID] = image_name
container[config.ACI_FIELD_CONTAINERS_NAME] = container_name

# set the fields that are present in the container but not in the
# config
Expand Down
23 changes: 23 additions & 0 deletions src/confcom/azext_confcom/template_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,29 @@ def change_key_names(dictionary) -> Dict:
return dictionary


def get_diff_size(diff: dict) -> int:
"""Utility function: get the size of the diff dictionary"""
size = 0
for key in diff:
if isinstance(diff[key], dict):
size += get_diff_size_helper(diff[key])
else:
size += 1
return size


def get_diff_size_helper(diff: dict) -> int:
size = 0
for key in diff:
if isinstance(diff[key], dict):
size += get_diff_size_helper(diff[key])
elif isinstance(diff[key], list) and key == "env_rules":
size += len(diff[key])
else:
size += 1
return size


def replace_params_and_vars(params: dict, vars_dict: dict, attribute):
out = None
if isinstance(attribute, (int, float, bool)):
Expand Down
Loading

0 comments on commit 06e6177

Please sign in to comment.