From b79451d4f6b4955569cae15d82fac6387e8cd7d3 Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Mon, 17 Feb 2025 15:50:51 -0700 Subject: [PATCH 1/8] Add WAS dmz sync Add WAS dmz sync --- .../xfd_django/xfd_api/schema_models/scan.py | 8 + .../src/xfd_django/xfd_api/tasks/was_sync.py | 223 ++++++++++++++++++ frontend/src/pages/Scans/ScanTasksView.tsx | 1 + 3 files changed, 232 insertions(+) create mode 100644 backend/src/xfd_django/xfd_api/tasks/was_sync.py diff --git a/backend/src/xfd_django/xfd_api/schema_models/scan.py b/backend/src/xfd_django/xfd_api/schema_models/scan.py index b1f80557..923417c2 100644 --- a/backend/src/xfd_django/xfd_api/schema_models/scan.py +++ b/backend/src/xfd_django/xfd_api/schema_models/scan.py @@ -317,6 +317,14 @@ class GenericMessageResponseModel(BaseModel): memory="16384", description="Loops through all domains and determines if their associated IP can be found in a report Cidr block.", ), + "was_sync": ScanSchema( + type="fargate", + isPassive=True, + global_scan=True, + cpu="1024", + memory="8192", + description="Pull in WAS finding data from commercial mdl", + ), "xpanse_sync": ScanSchema( type="fargate", isPassive=True, diff --git a/backend/src/xfd_django/xfd_api/tasks/was_sync.py b/backend/src/xfd_django/xfd_api/tasks/was_sync.py new file mode 100644 index 00000000..a79f673c --- /dev/null +++ b/backend/src/xfd_django/xfd_api/tasks/was_sync.py @@ -0,0 +1,223 @@ +"""WasSync scan.""" +# Standard Python Libraries +import datetime +import os +import time + +# Third-Party Libraries +import django +import requests +from xfd_mini_dl.models import DataSource, Organization, WasFindings, WasReport + + +# Django setup +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "xfd_django.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +# Constants +MAX_RETRIES = 3 # Max retries for failed tasks +TIMEOUT = 60 # Timeout in seconds for waiting on task completion + + +def handler(event): + """Retrieve and save WAS Findings from the DMZ.""" + try: + print('in handler') + main() + return { + "statusCode": 200, + "body": "DMZ WAS Finding sync completed successfully.", + } + except Exception as e: + return {"statusCode": 500, "body": str(e)} + + +def main(): + """Fetch and save DMZ WAS Findings.""" + print('in main') + try: + all_orgs = Organization.objects.all() + print(all_orgs) + + # all_orgs = Organization.objects.filter(acronym__in=['USAGM', 'DHS']) + + + # Step 1: Get the current date and time in UTC + current_time = datetime.datetime.now(datetime.timezone.utc) + # Step 2: Subtract days from the current date + days_ago = current_time - datetime.timedelta(days=15) + # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) + since_timestamp_str = days_ago.date().isoformat() + + for org in all_orgs: + print( + "Processing organization: {acronym}, {name}".format( + acronym=org.acronym, name=org.name + ) + ) + done = False + page = 1 + total_pages = 2 + per_page = 200 + retry_count = 0 + + while not done: + data = fetch_dmz_was_findings_task( + org.acronym, page, per_page, since_timestamp_str + ) + print(data) + if not data or data.get("status") != "Processing": + print( + "Failed to start Was Finding sync task for org: {acronym}, {name}".format( + acronym=org.acronym, name=org.name + ) + ) + + retry_count += 1 + + if retry_count >= MAX_RETRIES: + print( + "Max retries reached for org: {acronym}. Moving to next organization.".format( + acronym=org.acronym + ) + ) + break # Skip to next organization + + time.sleep(5) + continue + + response = fetch_dmz_was_finding_data(data.get("task_id", None)) + print(response) + while response and response.get("status") == "Pending": + time.sleep(1) + response = fetch_dmz_was_finding_data(data.get("task_id", None)) + + if response and response.get("status") == "Completed": + was_finding_array = ( + response.get("result", {}).get("data", []) + ) + total_pages = response.get("result", {}).get("total_pages", 1) + current_page = response.get("result", {}).get("current_page", 1) + print("findings") + print(was_finding_array) + save_findings_to_db( + was_finding_array, org + ) + + if current_page >= total_pages: + done = True + page += 1 + else: + raise Exception( + "Task error: {error} - Status: {status}".format( + error=response.get("error"), status=response.get("status") + ) + ) + except Exception as e: + print('failed to in main: {error}'.format(error=e)) + +def fetch_dmz_was_findings_task(org_acronym, page, per_page, since_timestamp): + """Fetch Was Finding task id.""" + print( + "Fetching WAS finding task for organization: {acronym}".format( + acronym=org_acronym + ) + ) + headers = { + "X-API-KEY": os.getenv("CF_API_KEY"), + "access_token": os.getenv("PE_API_KEY"), + "Content-Type": "", + } + + data = { + "org_acronym": org_acronym, + "page": page, + "per_page": per_page, + "since_timestamp": since_timestamp, + } + + try: + response = requests.post( + "https://api.staging-cd.crossfeed.cyber.dhs.gov/pe/apiv1/get_mdl_was_findings", + headers=headers, + json=data, + timeout=20, # Timeout in seconds + ) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + print("Error fetching DMZ task: {error}".format(error=e)) + return None + + +def fetch_dmz_was_finding_data(task_id): + """Fetch DMZ WAS Finding data for a task.""" + url = "https://api.staging-cd.crossfeed.cyber.dhs.gov/pe/apiv1/get_mdl_was_findings/task/{t_id}".format( + t_id=task_id + ) + headers = { + "X-API-KEY": os.getenv("CF_API_KEY"), + "access_token": os.getenv("PE_API_KEY"), + "Content-Type": "", + } + + try: + response = requests.get(url, headers=headers, timeout=20) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + print("Error fetching DMZ Was Finding data: {error}".format(error=e)) + return None + +def convert_timestamp_to_date(timestamp: str) -> str: + """Convert an ISO 8601 timestamp to a date string in YYYY-MM-DD format.""" + if timestamp: + date_object = datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S") + formatted_date = date_object.strftime("%Y-%m-%d") + else: + formatted_date = None + return formatted_date + +def save_findings_to_db(was_finding_array, org): + """Save WAS finding data to the mini datalake using Django ORM.""" + if was_finding_array: + for asset in was_finding_array: + try: + + ( + was_finding_object, + created, + ) = WasFindings.objects.update_or_create( + finding_uid= asset.get('finding_uid'), + defaults={ + 'finding_type': asset.get('finding_type'), + 'webapp_id': asset.get('webapp_id'), + 'webapp_url': asset.get('webapp_url'), + 'webapp_name': asset.get('webapp_name'), + 'was_org_id': asset.get('was_org_id'), + 'name': asset.get('name'), + 'owasp_category': asset.get('owasp_category'), + 'severity': asset.get('severity'), + 'times_detected': asset.get('times_detected'), + 'cvss_v3_attack_vector': asset.get('cvss_v3_attack_vector'), + 'base_score': asset.get('base_score'), + 'temporal_score': asset.get('temporal_score'), + 'fstatus': asset.get('fstatus'), + 'last_detected': convert_timestamp_to_date(asset.get('last_detected')), + 'first_detected': convert_timestamp_to_date(asset.get('first_detected')), + 'potential': asset.get('potential'), + 'cwe_list': asset.get('cwe_list'), + 'wasc_list': asset.get('wasc_list'), + 'last_tested': convert_timestamp_to_date(asset.get('last_tested')), + 'fixed_date': convert_timestamp_to_date(asset.get('fixed_date')), + 'is_ignored': asset.get('is_ignored'), + 'is_remediated': asset.get('is_remediated'), + 'url': asset.get('url'), + 'qid': asset.get('qid'), + 'response': asset.get('response') + }, + ) + + except Exception as e: + print("Error saving Was Finding: {error}".format(error=e)) diff --git a/frontend/src/pages/Scans/ScanTasksView.tsx b/frontend/src/pages/Scans/ScanTasksView.tsx index 1c3eea9a..84eef00d 100644 --- a/frontend/src/pages/Scans/ScanTasksView.tsx +++ b/frontend/src/pages/Scans/ScanTasksView.tsx @@ -282,6 +282,7 @@ export const ScanTasksView: React.FC = () => { 'lookingGlass', 'dnstwist', 'rootDomainSync', + 'was_sync', 'xpanse_sync' ]; From 9eb11de15d131bbae601835fabe23dca64a05e90 Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Tue, 18 Feb 2025 14:47:45 -0700 Subject: [PATCH 2/8] run linter on WAS sync run linter on WAS sync --- .../src/xfd_django/xfd_api/tasks/was_sync.py | 86 +++++++++---------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/backend/src/xfd_django/xfd_api/tasks/was_sync.py b/backend/src/xfd_django/xfd_api/tasks/was_sync.py index a79f673c..f206eb46 100644 --- a/backend/src/xfd_django/xfd_api/tasks/was_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/was_sync.py @@ -9,7 +9,6 @@ import requests from xfd_mini_dl.models import DataSource, Organization, WasFindings, WasReport - # Django setup os.environ.setdefault("DJANGO_SETTINGS_MODULE", "xfd_django.settings") os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" @@ -23,7 +22,6 @@ def handler(event): """Retrieve and save WAS Findings from the DMZ.""" try: - print('in handler') main() return { "statusCode": 200, @@ -35,21 +33,17 @@ def handler(event): def main(): """Fetch and save DMZ WAS Findings.""" - print('in main') try: all_orgs = Organization.objects.all() - print(all_orgs) - # all_orgs = Organization.objects.filter(acronym__in=['USAGM', 'DHS']) - # Step 1: Get the current date and time in UTC current_time = datetime.datetime.now(datetime.timezone.utc) # Step 2: Subtract days from the current date days_ago = current_time - datetime.timedelta(days=15) # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) since_timestamp_str = days_ago.date().isoformat() - + for org in all_orgs: print( "Processing organization: {acronym}, {name}".format( @@ -66,7 +60,6 @@ def main(): data = fetch_dmz_was_findings_task( org.acronym, page, per_page, since_timestamp_str ) - print(data) if not data or data.get("status") != "Processing": print( "Failed to start Was Finding sync task for org: {acronym}, {name}".format( @@ -88,22 +81,17 @@ def main(): continue response = fetch_dmz_was_finding_data(data.get("task_id", None)) - print(response) while response and response.get("status") == "Pending": time.sleep(1) response = fetch_dmz_was_finding_data(data.get("task_id", None)) if response and response.get("status") == "Completed": - was_finding_array = ( - response.get("result", {}).get("data", []) - ) + was_finding_array = response.get("result", {}).get("data", []) total_pages = response.get("result", {}).get("total_pages", 1) current_page = response.get("result", {}).get("current_page", 1) print("findings") print(was_finding_array) - save_findings_to_db( - was_finding_array, org - ) + save_findings_to_db(was_finding_array, org) if current_page >= total_pages: done = True @@ -115,7 +103,8 @@ def main(): ) ) except Exception as e: - print('failed to in main: {error}'.format(error=e)) + print("failed to in main: {error}".format(error=e)) + def fetch_dmz_was_findings_task(org_acronym, page, per_page, since_timestamp): """Fetch Was Finding task id.""" @@ -170,6 +159,7 @@ def fetch_dmz_was_finding_data(task_id): print("Error fetching DMZ Was Finding data: {error}".format(error=e)) return None + def convert_timestamp_to_date(timestamp: str) -> str: """Convert an ISO 8601 timestamp to a date string in YYYY-MM-DD format.""" if timestamp: @@ -179,45 +169,53 @@ def convert_timestamp_to_date(timestamp: str) -> str: formatted_date = None return formatted_date + def save_findings_to_db(was_finding_array, org): """Save WAS finding data to the mini datalake using Django ORM.""" if was_finding_array: for asset in was_finding_array: try: - ( was_finding_object, created, ) = WasFindings.objects.update_or_create( - finding_uid= asset.get('finding_uid'), + finding_uid=asset.get("finding_uid"), defaults={ - 'finding_type': asset.get('finding_type'), - 'webapp_id': asset.get('webapp_id'), - 'webapp_url': asset.get('webapp_url'), - 'webapp_name': asset.get('webapp_name'), - 'was_org_id': asset.get('was_org_id'), - 'name': asset.get('name'), - 'owasp_category': asset.get('owasp_category'), - 'severity': asset.get('severity'), - 'times_detected': asset.get('times_detected'), - 'cvss_v3_attack_vector': asset.get('cvss_v3_attack_vector'), - 'base_score': asset.get('base_score'), - 'temporal_score': asset.get('temporal_score'), - 'fstatus': asset.get('fstatus'), - 'last_detected': convert_timestamp_to_date(asset.get('last_detected')), - 'first_detected': convert_timestamp_to_date(asset.get('first_detected')), - 'potential': asset.get('potential'), - 'cwe_list': asset.get('cwe_list'), - 'wasc_list': asset.get('wasc_list'), - 'last_tested': convert_timestamp_to_date(asset.get('last_tested')), - 'fixed_date': convert_timestamp_to_date(asset.get('fixed_date')), - 'is_ignored': asset.get('is_ignored'), - 'is_remediated': asset.get('is_remediated'), - 'url': asset.get('url'), - 'qid': asset.get('qid'), - 'response': asset.get('response') + "finding_type": asset.get("finding_type"), + "webapp_id": asset.get("webapp_id"), + "webapp_url": asset.get("webapp_url"), + "webapp_name": asset.get("webapp_name"), + "was_org_id": asset.get("was_org_id"), + "name": asset.get("name"), + "owasp_category": asset.get("owasp_category"), + "severity": asset.get("severity"), + "times_detected": asset.get("times_detected"), + "cvss_v3_attack_vector": asset.get("cvss_v3_attack_vector"), + "base_score": asset.get("base_score"), + "temporal_score": asset.get("temporal_score"), + "fstatus": asset.get("fstatus"), + "last_detected": convert_timestamp_to_date( + asset.get("last_detected") + ), + "first_detected": convert_timestamp_to_date( + asset.get("first_detected") + ), + "potential": asset.get("potential"), + "cwe_list": asset.get("cwe_list"), + "wasc_list": asset.get("wasc_list"), + "last_tested": convert_timestamp_to_date( + asset.get("last_tested") + ), + "fixed_date": convert_timestamp_to_date( + asset.get("fixed_date") + ), + "is_ignored": asset.get("is_ignored"), + "is_remediated": asset.get("is_remediated"), + "url": asset.get("url"), + "qid": asset.get("qid"), + "response": asset.get("response"), }, ) - + except Exception as e: print("Error saving Was Finding: {error}".format(error=e)) From b3c9ea9f53754ae98ddb40150929e7aa77b70a4b Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Wed, 19 Feb 2025 09:09:20 -0700 Subject: [PATCH 3/8] remove unused imports remove unused model imports --- backend/src/xfd_django/xfd_api/tasks/was_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/xfd_django/xfd_api/tasks/was_sync.py b/backend/src/xfd_django/xfd_api/tasks/was_sync.py index f206eb46..a93e2d44 100644 --- a/backend/src/xfd_django/xfd_api/tasks/was_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/was_sync.py @@ -7,7 +7,7 @@ # Third-Party Libraries import django import requests -from xfd_mini_dl.models import DataSource, Organization, WasFindings, WasReport +from xfd_mini_dl.models import Organization, WasFindings # Django setup os.environ.setdefault("DJANGO_SETTINGS_MODULE", "xfd_django.settings") From fe104b5712a3c45c3a98609b6989b53d6422a394 Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Wed, 19 Feb 2025 11:08:06 -0700 Subject: [PATCH 4/8] lint on unrelated files lint on unrelated files --- .../xfd_django/xfd_api/tasks/credential_sync.py | 6 ++++-- .../src/xfd_django/xfd_api/tasks/shodan_sync.py | 11 ++++++++--- .../src/xfd_django/xfd_api/tasks/xpanse_sync.py | 15 ++++++++++----- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/backend/src/xfd_django/xfd_api/tasks/credential_sync.py b/backend/src/xfd_django/xfd_api/tasks/credential_sync.py index 2c5fb4d2..4356c601 100644 --- a/backend/src/xfd_django/xfd_api/tasks/credential_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/credential_sync.py @@ -64,7 +64,9 @@ def main(): retry_count = 0 while not done: - data = fetch_dmz_cred_task(org.acronym, page, per_page, since_timestamp_str) + data = fetch_dmz_cred_task( + org.acronym, page, per_page, since_timestamp_str + ) if not data or data.get("status") != "Processing": print( "Failed to start Credential Sync task for org: {acronym}, {name}".format( @@ -120,7 +122,7 @@ def main(): ) ) except Exception as e: - print('Scan failed to complete: {error}'.format(error=e)) + print("Scan failed to complete: {error}".format(error=e)) def fetch_dmz_cred_task(org_acronym, page, per_page, since_timestamp): diff --git a/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py b/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py index decf5c5a..4682d35d 100644 --- a/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py @@ -98,10 +98,14 @@ def main(): if response and response.get("status") == "Completed": shodan_asset_array = ( - response.get("result", {}).get("data", {}).get("shodan_assets", []) + response.get("result", {}) + .get("data", {}) + .get("shodan_assets", []) ) shodan_vuln_array = ( - response.get("result", {}).get("data", {}).get("shodan_vulns", []) + response.get("result", {}) + .get("data", {}) + .get("shodan_vulns", []) ) total_pages = response.get("result", {}).get("total_pages", 1) current_page = response.get("result", {}).get("current_page", 1) @@ -123,7 +127,8 @@ def main(): ) ) except Exception as e: - print('Scan failed to complete: {error}'.format(error=e)) + print("Scan failed to complete: {error}".format(error=e)) + def fetch_dmz_shodan_task(org_acronym, page, per_page, since_timestamp): """Fetch shodan task id.""" diff --git a/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py b/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py index 13cb37a7..01a7591b 100644 --- a/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py @@ -99,13 +99,18 @@ def main(): else: raise Exception( "Task error: {error} - Status: {status}".format( - error=response.get("error"), status=response.get("status") + error=response.get("error"), + status=response.get("status"), ) ) else: - print('{name} does not have a linked CyHy org'.format(name=business_unit.entity_name)) + print( + "{name} does not have a linked CyHy org".format( + name=business_unit.entity_name + ) + ) except Exception as e: - print('Scan failed to complete: {error}'.format(error=e)) + print("Scan failed to complete: {error}".format(error=e)) def is_bu_pull_day(): @@ -166,8 +171,8 @@ def pull_and_save_business_units(): entity_name=business_unit.get("entity_name"), defaults=mdl_defaults ) bu_list.append(mdl_business_unit_object) - - print('Business Units saved to MDL.') + + print("Business Units saved to MDL.") return bu_list except Exception as e: print("Error fetching DMZ Business Unit pull: {error}".format(error=e)) From a5424219ea04db0a755e0be58018ab01ada4bb26 Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Wed, 19 Feb 2025 11:41:46 -0700 Subject: [PATCH 5/8] update date function to allow None return update date function to allow None return --- backend/src/xfd_django/xfd_api/tasks/was_sync.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/src/xfd_django/xfd_api/tasks/was_sync.py b/backend/src/xfd_django/xfd_api/tasks/was_sync.py index a93e2d44..06b8efd9 100644 --- a/backend/src/xfd_django/xfd_api/tasks/was_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/was_sync.py @@ -160,12 +160,12 @@ def fetch_dmz_was_finding_data(task_id): return None -def convert_timestamp_to_date(timestamp: str) -> str: +def convert_timestamp_to_date(timestamp: str) -> str | None: """Convert an ISO 8601 timestamp to a date string in YYYY-MM-DD format.""" - if timestamp: + try: date_object = datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S") formatted_date = date_object.strftime("%Y-%m-%d") - else: + except Exception: formatted_date = None return formatted_date From 5fa7a724465da3dfb6a524663bedfbe71c791f43 Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Wed, 19 Feb 2025 13:28:02 -0700 Subject: [PATCH 6/8] update scan to pull more often update scan to pull more often --- backend/src/xfd_django/xfd_api/tasks/was_sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/src/xfd_django/xfd_api/tasks/was_sync.py b/backend/src/xfd_django/xfd_api/tasks/was_sync.py index 06b8efd9..05551e7c 100644 --- a/backend/src/xfd_django/xfd_api/tasks/was_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/was_sync.py @@ -40,7 +40,8 @@ def main(): # Step 1: Get the current date and time in UTC current_time = datetime.datetime.now(datetime.timezone.utc) # Step 2: Subtract days from the current date - days_ago = current_time - datetime.timedelta(days=15) + # Adjust based on PE WAS scan cadence + days_ago = current_time - datetime.timedelta(days=5) # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) since_timestamp_str = days_ago.date().isoformat() From 5157e2c1debf39de6af56e53166ff185f06a7fa0 Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Thu, 20 Feb 2025 11:29:14 -0700 Subject: [PATCH 7/8] remove duplicate code remove duplicate code for headers and date calculations --- .../xfd_api/helpers/date_time_helpers.py | 12 ++++++++++ .../xfd_api/tasks/credential_sync.py | 21 +++++------------- .../xfd_django/xfd_api/tasks/shodan_sync.py | 22 +++++-------------- .../src/xfd_django/xfd_api/tasks/was_sync.py | 22 +++++-------------- .../xfd_django/xfd_api/tasks/xpanse_sync.py | 12 +++++----- backend/src/xfd_django/xfd_django/settings.py | 6 +++++ 6 files changed, 39 insertions(+), 56 deletions(-) create mode 100644 backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py diff --git a/backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py b/backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py new file mode 100644 index 00000000..3ac62c23 --- /dev/null +++ b/backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py @@ -0,0 +1,12 @@ +"""Helper methods to deal with date and time manipulation.""" +import datetime + +def calculate_days_back(days_ago:int): + # Step 1: Get the current date and time in UTC + current_time = datetime.datetime.now(datetime.timezone.utc) + # Step 2: Subtract days from the current date + days_ago = current_time - datetime.timedelta(days=days_ago) + # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) + since_timestamp_str = days_ago.isoformat() + + return since_timestamp_str \ No newline at end of file diff --git a/backend/src/xfd_django/xfd_api/tasks/credential_sync.py b/backend/src/xfd_django/xfd_api/tasks/credential_sync.py index 4356c601..7a9bfa02 100644 --- a/backend/src/xfd_django/xfd_api/tasks/credential_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/credential_sync.py @@ -6,8 +6,10 @@ # Third-Party Libraries import django +from django.conf import settings from django.utils import timezone import requests +from xfd_api.helpers.date_time_helpers import calculate_days_back from xfd_mini_dl.models import ( CredentialBreaches, CredentialExposures, @@ -24,6 +26,8 @@ MAX_RETRIES = 3 # Max retries for failed tasks TIMEOUT = 60 # Timeout in seconds for waiting on task completion +headers = settings.DMZ_API_HEADER + def handler(event): """Retrieve and save credential breaches and exposures from the DMZ.""" @@ -44,12 +48,7 @@ def main(): # For testing # all_orgs = Organization.objects.filter(acronym__in=['USAGM', 'DHS']) - # Step 1: Get the current date and time in UTC - current_time = datetime.datetime.now(datetime.timezone.utc) - # Step 2: Subtract days from the current date - days_ago = current_time - datetime.timedelta(days=15) - # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) - since_timestamp_str = days_ago.isoformat() + since_timestamp_str = calculate_days_back(15) for org in all_orgs: print( @@ -132,11 +131,6 @@ def fetch_dmz_cred_task(org_acronym, page, per_page, since_timestamp): acronym=org_acronym ) ) - headers = { - "X-API-KEY": os.getenv("CF_API_KEY"), - "access_token": os.getenv("PE_API_KEY"), - "Content-Type": "", - } data = { "org_acronym": org_acronym, @@ -164,11 +158,6 @@ def fetch_dmz_cred_data(task_id): url = "https://api.staging-cd.crossfeed.cyber.dhs.gov/pe/apiv1/get_mdl_cred_data/task/{t_id}".format( t_id=task_id ) - headers = { - "X-API-KEY": os.getenv("CF_API_KEY"), - "access_token": os.getenv("PE_API_KEY"), - "Content-Type": "", - } try: response = requests.get(url, headers=headers, timeout=20) diff --git a/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py b/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py index 4682d35d..d013bcb6 100644 --- a/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/shodan_sync.py @@ -1,13 +1,14 @@ """ShodanSync scan.""" # Standard Python Libraries -import datetime import os import time # Third-Party Libraries import django +from django.conf import settings from django.utils import timezone import requests +from xfd_api.helpers.date_time_helpers import calculate_days_back from xfd_mini_dl.models import DataSource, Organization, ShodanAssets, ShodanVulns # Django setup @@ -19,6 +20,8 @@ MAX_RETRIES = 3 # Max retries for failed tasks TIMEOUT = 60 # Timeout in seconds for waiting on task completion +headers = settings.DMZ_API_HEADER + def handler(event): """Retrieve and save shodan vulnerabilities and assets from the DMZ.""" @@ -46,12 +49,7 @@ def main(): }, ) - # Step 1: Get the current date and time in UTC - current_time = datetime.datetime.now(datetime.timezone.utc) - # Step 2: Subtract days from the current date - days_ago = current_time - datetime.timedelta(days=15) - # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) - since_timestamp_str = days_ago.isoformat() + since_timestamp_str = calculate_days_back(15) for org in all_orgs: print( @@ -137,11 +135,6 @@ def fetch_dmz_shodan_task(org_acronym, page, per_page, since_timestamp): acronym=org_acronym ) ) - headers = { - "X-API-KEY": os.getenv("CF_API_KEY"), - "access_token": os.getenv("PE_API_KEY"), - "Content-Type": "", - } data = { "org_acronym": org_acronym, @@ -169,11 +162,6 @@ def fetch_dmz_shodan_data(task_id): url = "https://api.staging-cd.crossfeed.cyber.dhs.gov/pe/apiv1/get_mdl_shodan_data/task/{t_id}".format( t_id=task_id ) - headers = { - "X-API-KEY": os.getenv("CF_API_KEY"), - "access_token": os.getenv("PE_API_KEY"), - "Content-Type": "", - } try: response = requests.get(url, headers=headers, timeout=20) diff --git a/backend/src/xfd_django/xfd_api/tasks/was_sync.py b/backend/src/xfd_django/xfd_api/tasks/was_sync.py index 05551e7c..468ef0b8 100644 --- a/backend/src/xfd_django/xfd_api/tasks/was_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/was_sync.py @@ -6,7 +6,9 @@ # Third-Party Libraries import django +from django.conf import settings import requests +from xfd_api.helpers.date_time_helpers import calculate_days_back from xfd_mini_dl.models import Organization, WasFindings # Django setup @@ -18,6 +20,8 @@ MAX_RETRIES = 3 # Max retries for failed tasks TIMEOUT = 60 # Timeout in seconds for waiting on task completion +headers = settings.DMZ_API_HEADER + def handler(event): """Retrieve and save WAS Findings from the DMZ.""" @@ -37,13 +41,7 @@ def main(): all_orgs = Organization.objects.all() # all_orgs = Organization.objects.filter(acronym__in=['USAGM', 'DHS']) - # Step 1: Get the current date and time in UTC - current_time = datetime.datetime.now(datetime.timezone.utc) - # Step 2: Subtract days from the current date - # Adjust based on PE WAS scan cadence - days_ago = current_time - datetime.timedelta(days=5) - # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) - since_timestamp_str = days_ago.date().isoformat() + since_timestamp_str = calculate_days_back(5) for org in all_orgs: print( @@ -114,11 +112,6 @@ def fetch_dmz_was_findings_task(org_acronym, page, per_page, since_timestamp): acronym=org_acronym ) ) - headers = { - "X-API-KEY": os.getenv("CF_API_KEY"), - "access_token": os.getenv("PE_API_KEY"), - "Content-Type": "", - } data = { "org_acronym": org_acronym, @@ -146,11 +139,6 @@ def fetch_dmz_was_finding_data(task_id): url = "https://api.staging-cd.crossfeed.cyber.dhs.gov/pe/apiv1/get_mdl_was_findings/task/{t_id}".format( t_id=task_id ) - headers = { - "X-API-KEY": os.getenv("CF_API_KEY"), - "access_token": os.getenv("PE_API_KEY"), - "Content-Type": "", - } try: response = requests.get(url, headers=headers, timeout=20) diff --git a/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py b/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py index 01a7591b..89af962c 100644 --- a/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py +++ b/backend/src/xfd_django/xfd_api/tasks/xpanse_sync.py @@ -7,8 +7,10 @@ # Third-Party Libraries import django +from django.conf import settings from django.core.exceptions import ObjectDoesNotExist import requests +from xfd_api.helpers.date_time_helpers import calculate_days_back from xfd_mini_dl.models import ( Organization, XpanseAlerts, @@ -26,6 +28,8 @@ # Constants MAX_RETRIES = 3 # Max retries for failed tasks +headers = settings.DMZ_API_HEADER + def handler(event): """Retrieve and save Xpanse alerts from the DMZ.""" @@ -42,12 +46,8 @@ def handler(event): def main(): """Fetch and save DMZ Xpanse alerts.""" try: - # Step 1: Get the current date and time in UTC - current_time = datetime.datetime.now(datetime.timezone.utc) - # Step 2: Subtract days from the current date - days_ago = current_time - datetime.timedelta(days=15) - # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) - modified_timestamp_str = days_ago.isoformat() + modified_timestamp_str = calculate_days_back(5) + if is_bu_pull_day() or XpanseBusinessUnits.objects.count() == 0: business_units = pull_and_save_business_units() else: diff --git a/backend/src/xfd_django/xfd_django/settings.py b/backend/src/xfd_django/xfd_django/settings.py index 10bbc196..b49d3aa7 100644 --- a/backend/src/xfd_django/xfd_django/settings.py +++ b/backend/src/xfd_django/xfd_django/settings.py @@ -151,6 +151,12 @@ SESSION_COOKIE_SAMESITE = "Lax" CSRF_COOKIE_SAMESITE = "Lax" +DMZ_API_HEADER = { + "X-API-KEY": os.getenv("CF_API_KEY"), + "access_token": os.getenv("PE_API_KEY"), + "Content-Type": "", +} + # SECURITY CONFIGURATION SECURE_HSTS_SECONDS = 31536000 # Enable HSTS for 1 year SECURE_HSTS_PRELOAD = True From 34a437f90ce88b850b20fc2cb60187259d492fba Mon Sep 17 00:00:00 2001 From: DJensen94 <79864006+DJensen94@users.noreply.github.com> Date: Thu, 20 Feb 2025 11:35:42 -0700 Subject: [PATCH 8/8] run linter and update reused variable name run linter and update reused variable name --- .../src/xfd_django/xfd_api/helpers/date_time_helpers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py b/backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py index 3ac62c23..671323e4 100644 --- a/backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py +++ b/backend/src/xfd_django/xfd_api/helpers/date_time_helpers.py @@ -1,12 +1,15 @@ """Helper methods to deal with date and time manipulation.""" +# Standard Python Libraries import datetime -def calculate_days_back(days_ago:int): + +def calculate_days_back(days_ago_int: int): + """Create a date string of a calculated past date.""" # Step 1: Get the current date and time in UTC current_time = datetime.datetime.now(datetime.timezone.utc) # Step 2: Subtract days from the current date - days_ago = current_time - datetime.timedelta(days=days_ago) + days_ago = current_time - datetime.timedelta(days=days_ago_int) # Step 3: Convert to an ISO 8601 string with timezone (e.g., UTC) since_timestamp_str = days_ago.isoformat() - return since_timestamp_str \ No newline at end of file + return since_timestamp_str