From 699e75c6256d73d22e1c2f00149cf6fd3fb01a3f Mon Sep 17 00:00:00 2001 From: Seth Grover Date: Mon, 18 Mar 2024 12:23:34 -0600 Subject: [PATCH] work for idaholab/Malcolm#445, supporting suricata eve.json rotation --- Dockerfiles/filebeat.Dockerfile | 2 +- Dockerfiles/suricata.Dockerfile | 5 +- config/filebeat.env.example | 6 +++ config/upload-common.env.example | 8 +-- filebeat/filebeat.yml | 4 +- ...ed-folder.py => clean-processed-folder.py} | 49 ++++++++++++------- .../interface/sensor_ctl/control_vars.conf | 1 + .../suricata_config_populate.sh | 5 -- shared/bin/suricata_config_populate.py | 13 ++++- suricata/scripts/eve-clean-logs.sh | 47 ------------------ 10 files changed, 55 insertions(+), 85 deletions(-) rename filebeat/scripts/{filebeat-clean-zeeklogs-processed-folder.py => clean-processed-folder.py} (80%) delete mode 100755 suricata/scripts/eve-clean-logs.sh diff --git a/Dockerfiles/filebeat.Dockerfile b/Dockerfiles/filebeat.Dockerfile index 65b75e4e1..fb73e015b 100644 --- a/Dockerfiles/filebeat.Dockerfile +++ b/Dockerfiles/filebeat.Dockerfile @@ -124,7 +124,7 @@ RUN for INPUT in nginx tcp; do \ chmod 770 /usr/share/filebeat-$INPUT/data; \ done; \ chmod 755 /usr/local/bin/*.sh /usr/local/bin/*.py && \ - (echo "* * * * * /usr/local/bin/filebeat-process-zeek-folder.sh\n*/5 * * * * /usr/local/bin/filebeat-clean-zeeklogs-processed-folder.py" > ${SUPERCRONIC_CRONTAB}) + (echo "* * * * * /usr/local/bin/filebeat-process-zeek-folder.sh\n*/5 * * * * /usr/local/bin/clean-processed-folder.py" > ${SUPERCRONIC_CRONTAB}) ENV AUTO_TAG $AUTO_TAG ENV LOG_CLEANUP_MINUTES $LOG_CLEANUP_MINUTES diff --git a/Dockerfiles/suricata.Dockerfile b/Dockerfiles/suricata.Dockerfile index 6defb77fd..45c89d84b 100644 --- a/Dockerfiles/suricata.Dockerfile +++ b/Dockerfiles/suricata.Dockerfile @@ -114,7 +114,7 @@ RUN sed -i "s/main$/main contrib non-free/g" /etc/apt/sources.list.d/debian.sour useradd -M --uid ${DEFAULT_UID} --gid ${DEFAULT_GID} --home /nonexistant ${PUSER} && \ usermod -a -G tty ${PUSER} && \ ln -sfr /usr/local/bin/pcap_processor.py /usr/local/bin/pcap_suricata_processor.py && \ - (echo "*/5 * * * * /usr/local/bin/eve-clean-logs.sh\n0 */6 * * * /bin/bash /usr/local/bin/suricata-update-rules.sh\n" > ${SUPERCRONIC_CRONTAB}) && \ + (echo "0 */6 * * * /bin/bash /usr/local/bin/suricata-update-rules.sh\n" > ${SUPERCRONIC_CRONTAB}) && \ mkdir -p "$SURICATA_CUSTOM_RULES_DIR" "$SURICATA_DEFAULT_RULES_DIR" "$SURICATA_CUSTOM_CONFIG_DIR" && \ chown -R ${PUSER}:${PGROUP} "$SURICATA_CUSTOM_RULES_DIR" "$SURICATA_DEFAULT_RULES_DIR" "$SURICATA_CUSTOM_CONFIG_DIR" && \ cp "$(dpkg -L suricata-update | grep 'update\.yaml$' | head -n 1)" \ @@ -136,7 +136,6 @@ COPY --chmod=755 shared/bin/pcap_processor.py /usr/local/bin/ COPY --chmod=644 scripts/malcolm_utils.py /usr/local/bin/ COPY --chmod=755 shared/bin/suricata_config_populate.py /usr/local/bin/ COPY --chmod=755 suricata/scripts/docker_entrypoint.sh /usr/local/bin/ -COPY --chmod=755 suricata/scripts/eve-clean-logs.sh /usr/local/bin/ COPY --chmod=755 suricata/scripts/suricata-update-rules.sh /usr/local/bin/ COPY --chmod=u=rwX,go=rX suricata/rules-default/ "$SURICATA_DEFAULT_RULES_DIR"/ @@ -148,7 +147,6 @@ ARG SURICATA_CRON=true ARG SURICATA_AUTO_ANALYZE_PCAP_FILES=false ARG SURICATA_CUSTOM_RULES_ONLY=false ARG SURICATA_AUTO_ANALYZE_PCAP_THREADS=1 -ARG LOG_CLEANUP_MINUTES=30 ARG SURICATA_UPDATE_RULES=false ARG SURICATA_UPDATE_DEBUG=false ARG SURICATA_UPDATE_ETOPEN=true @@ -168,7 +166,6 @@ ENV SURICATA_CRON $SURICATA_CRON ENV SURICATA_AUTO_ANALYZE_PCAP_FILES $SURICATA_AUTO_ANALYZE_PCAP_FILES ENV SURICATA_AUTO_ANALYZE_PCAP_THREADS $SURICATA_AUTO_ANALYZE_PCAP_THREADS ENV SURICATA_CUSTOM_RULES_ONLY $SURICATA_CUSTOM_RULES_ONLY -ENV LOG_CLEANUP_MINUTES $LOG_CLEANUP_MINUTES ENV SURICATA_UPDATE_RULES $SURICATA_UPDATE_RULES ENV SURICATA_UPDATE_DEBUG $SURICATA_UPDATE_DEBUG ENV SURICATA_UPDATE_ETOPEN $SURICATA_UPDATE_ETOPEN diff --git a/config/filebeat.env.example b/config/filebeat.env.example index 3091946ef..9b637a4e0 100644 --- a/config/filebeat.env.example +++ b/config/filebeat.env.example @@ -9,6 +9,12 @@ FILEBEAT_CLOSE_RENAMED=true FILEBEAT_CLOSE_REMOVED=true FILEBEAT_CLOSE_EOF=true FILEBEAT_CLEAN_REMOVED=true +# The age (in minutes) at which already-processed log files containing network traffic metadata should +# be pruned from the filesystem +LOG_CLEANUP_MINUTES=360 +# The age (in minutes) at which the compressed archives containing already-processed log files should +# be pruned from the filesystem +ZIP_CLEANUP_MINUTES=720 # Whether or not to use polling vs. native inotify API to watch for files FILEBEAT_WATCHER_POLLING=false # When polling, seconds of inactivity to assume a file is closed and ready for processing diff --git a/config/upload-common.env.example b/config/upload-common.env.example index ad55df213..f4d38211b 100644 --- a/config/upload-common.env.example +++ b/config/upload-common.env.example @@ -14,10 +14,4 @@ PCAP_PIPELINE_POLLING=false PCAP_PIPELINE_POLLING_ASSUME_CLOSED_SEC=10 # 'pcap-monitor' to match the name of the container providing the uploaded/captured PCAP file # monitoring service -PCAP_MONITOR_HOST=pcap-monitor -# The age (in minutes) at which already-processed log files containing network traffic metadata should -# be pruned from the filesystem -LOG_CLEANUP_MINUTES=360 -# The age (in minutes) at which the compressed archives containing already-processed log files should -# be pruned from the filesystem -ZIP_CLEANUP_MINUTES=720 \ No newline at end of file +PCAP_MONITOR_HOST=pcap-monitor \ No newline at end of file diff --git a/filebeat/filebeat.yml b/filebeat/filebeat.yml index 454a443e4..4645b7918 100644 --- a/filebeat/filebeat.yml +++ b/filebeat/filebeat.yml @@ -72,7 +72,7 @@ filebeat.inputs: #-------------------------- Suricata EVE JSON logs ----------------------------- - type: log paths: - - ${FILEBEAT_SURICATA_LOG_PATH:/suricata}/eve-*.json + - ${FILEBEAT_SURICATA_LOG_PATH:/suricata}/eve*.json symlinks: true fields_under_root: true tags: ["_filebeat_suricata_malcolm_upload"] @@ -88,7 +88,7 @@ filebeat.inputs: - type: log paths: - - ${FILEBEAT_SURICATA_LOG_PATH:/suricata}/live/eve.json + - ${FILEBEAT_SURICATA_LOG_PATH:/suricata}/live/eve*.json symlinks: true fields_under_root: true tags: ["_filebeat_suricata_malcolm_live"] diff --git a/filebeat/scripts/filebeat-clean-zeeklogs-processed-folder.py b/filebeat/scripts/clean-processed-folder.py similarity index 80% rename from filebeat/scripts/filebeat-clean-zeeklogs-processed-folder.py rename to filebeat/scripts/clean-processed-folder.py index 2484f9c39..5f72cda50 100755 --- a/filebeat/scripts/filebeat-clean-zeeklogs-processed-folder.py +++ b/filebeat/scripts/clean-processed-folder.py @@ -16,16 +16,20 @@ from subprocess import Popen, PIPE lockFilename = os.path.join(gettempdir(), '{}.lock'.format(os.path.basename(__file__))) -zeekDir = os.path.join(os.getenv('FILEBEAT_ZEEK_DIR', "/zeek/"), '') cleanLogSeconds = int(os.getenv('LOG_CLEANUP_MINUTES', "30")) * 60 cleanZipSeconds = int(os.getenv('ZIP_CLEANUP_MINUTES', "120")) * 60 fbRegFilename = os.getenv('FILEBEAT_REGISTRY_FILE', "/usr/share/filebeat/data/registry/filebeat/data.json") -currentDir = zeekDir + "current/" -processedDir = zeekDir + "processed/" -liveDir = zeekDir + "live/logs/" + +zeekDir = os.path.join(os.getenv('FILEBEAT_ZEEK_DIR', "/zeek/"), '') +zeekLiveDir = zeekDir + "live/logs/" +zeekCurrentDir = zeekDir + "current/" +zeekProcessedDir = zeekDir + "processed/" + +suricataDir = os.path.join(os.getenv('FILEBEAT_SURICATA_LOG_PATH', "/suricata/"), '') +suricataLiveDir = suricataDir + "live/" nowTime = time.time() -logMimeType = "text/plain" +logMimeTypeRegex = re.compile(r"(text/plain|application/(x-nd)?json)") archiveMimeTypeRegex = re.compile( r"(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip)" ) @@ -74,7 +78,7 @@ def checkFile(filename, filebeatReg=None, checkLogs=True, checkArchives=True): # get the file type fileType = magic.from_file(filename, mime=True) - if (checkLogs is True) and (cleanLogSeconds > 0) and (fileType == logMimeType): + if (checkLogs is True) and (cleanLogSeconds > 0) and logMimeTypeRegex.match(fileType) is not None: cleanSeconds = cleanLogSeconds elif (checkArchives is True) and (cleanZipSeconds > 0) and archiveMimeTypeRegex.match(fileType) is not None: cleanSeconds = cleanZipSeconds @@ -100,14 +104,16 @@ def pruneFiles(): # disabled, don't do anything return - # look for regular files in the processed/ directory - foundFiles = [ - (os.path.join(root, filename)) for root, dirnames, filenames in os.walk(processedDir) for filename in filenames + # look for regular Zeek files in the processed/ directory + zeekFoundFiles = [ + (os.path.join(root, filename)) + for root, dirnames, filenames in os.walk(zeekProcessedDir) + for filename in filenames ] # look for rotated files from live zeek instance - rotatedFiles = [ - (os.path.join(root, filename)) for root, dirnames, filenames in os.walk(liveDir) for filename in filenames + zeekRotatedFiles = [ + (os.path.join(root, filename)) for root, dirnames, filenames in os.walk(zeekLiveDir) for filename in filenames ] # look up the filebeat registry file and try to read it @@ -117,22 +123,22 @@ def pruneFiles(): fbReg = json.load(f) # see if the files we found are in use and old enough to be pruned - for file in foundFiles: + for file in zeekFoundFiles: checkFile(file, filebeatReg=fbReg, checkLogs=True, checkArchives=True) - for file in rotatedFiles: + for file in zeekRotatedFiles: checkFile(file, filebeatReg=None, checkLogs=False, checkArchives=True) - # clean up any broken symlinks in the current/ directory - for current in os.listdir(currentDir): - currentFileSpec = os.path.join(currentDir, current) + # clean up any broken symlinks in the Zeek current/ directory + for current in os.listdir(zeekCurrentDir): + currentFileSpec = os.path.join(zeekCurrentDir, current) if os.path.islink(currentFileSpec) and not os.path.exists(currentFileSpec): print('removing dead symlink "{}"'.format(currentFileSpec)) silentRemove(currentFileSpec) - # clean up any old and empty directories in processed/ directory + # clean up any old and empty directories in Zeek processed/ directory cleanDirSeconds = min(i for i in (cleanLogSeconds, cleanZipSeconds) if i > 0) candidateDirs = [] - for root, dirs, files in os.walk(processedDir, topdown=False): + for root, dirs, files in os.walk(zeekProcessedDir, topdown=False): if root and dirs: candidateDirs += [os.path.join(root, tmpDir) for tmpDir in dirs] candidateDirs = list(set(candidateDirs)) @@ -148,6 +154,13 @@ def pruneFiles(): except OSError: pass + # check the suricata logs (live and otherwise) as well + for surDir in [suricataDir, suricataLiveDir]: + for eve in os.listdir(surDir): + eveFile = os.path.join(surDir, eve) + if os.path.isfile(eveFile): + checkFile(eveFile, filebeatReg=fbReg, checkLogs=True, checkArchives=False) + def main(): with open(lockFilename, 'w') as lock_file: diff --git a/hedgehog-iso/interface/sensor_ctl/control_vars.conf b/hedgehog-iso/interface/sensor_ctl/control_vars.conf index e1c49578e..a3b2c76ac 100644 --- a/hedgehog-iso/interface/sensor_ctl/control_vars.conf +++ b/hedgehog-iso/interface/sensor_ctl/control_vars.conf @@ -96,6 +96,7 @@ export ZEEK_DISABLE_BEST_GUESS_ICS=true export SURICATA_CUSTOM_RULES_ONLY=false export SURICATA_DISABLE_ICS_ALL=false export SURICATA_RUNMODE=workers +export SURICATA_LIVE_CAPTURE=true export SURICATA_AF_PACKET_BLOCK_SIZE=32768 export SURICATA_AF_PACKET_BLOCK_TIMEOUT=10 export SURICATA_AF_PACKET_BUFFER_SIZE=32768 diff --git a/hedgehog-iso/interface/sensor_ctl/supervisor.init/suricata_config_populate.sh b/hedgehog-iso/interface/sensor_ctl/supervisor.init/suricata_config_populate.sh index 63f020636..bd5746d2a 100644 --- a/hedgehog-iso/interface/sensor_ctl/supervisor.init/suricata_config_populate.sh +++ b/hedgehog-iso/interface/sensor_ctl/supervisor.init/suricata_config_populate.sh @@ -2,11 +2,6 @@ if [[ -n $SUPERVISOR_PATH ]] && [[ -r /usr/local/bin/suricata_config_populate.py ]]; then - # if there's a previous eve.json file, rename it prior to starting up - [[ -n $ZEEK_LOG_PATH ]] && [[ -f "$ZEEK_LOG_PATH"/suricata/eve.json ]] && \ - mv -f "$ZEEK_LOG_PATH/suricata/eve.json" \ - "$ZEEK_LOG_PATH/suricata/eve.json.$(date -d @$(stat -c%Y "$ZEEK_LOG_PATH/suricata/eve.json") +'%Y%m%d%H%M%S')" - # if there's no configuration files to modify, start with the defaults [[ ! -f "$SUPERVISOR_PATH"/suricata/suricata.yaml ]] && cp /etc/suricata/suricata.yaml "$SUPERVISOR_PATH"/suricata/suricata.yaml [[ ! -f "$SUPERVISOR_PATH"/suricata/update.yaml ]] && cp "$(dpkg -L suricata-update | grep 'update\.yaml' | head -n 1)" "$SUPERVISOR_PATH"/suricata/update.yaml diff --git a/shared/bin/suricata_config_populate.py b/shared/bin/suricata_config_populate.py index 718d3e19c..a0f42d761 100755 --- a/shared/bin/suricata_config_populate.py +++ b/shared/bin/suricata_config_populate.py @@ -100,6 +100,9 @@ def __call__(self, repr, data): 'ENIP_ENABLED': True, 'ENIP_EVE_ENABLED': False, 'ENIP_PORTS': 44818, + 'EVE_FILENAME_PATTERN': 'eve-%Y%m%d_%H%M%S.json', + 'EVE_ROTATE_INTERVAL': '300s', + 'EVE_THREADED': False, 'EXTERNAL_NET': '!$HOME_NET', 'FILE_DATA_PORTS': "[$HTTP_PORTS,110,143]", 'FILES_ENABLED': True, @@ -134,6 +137,7 @@ def __call__(self, repr, data): 'IMAP_EVE_ENABLED': False, 'KRB5_ENABLED': True, 'KRB5_EVE_ENABLED': False, + 'LIVE_CAPTURE': False, 'MANAGED_RULES_DIR': '/var/lib/suricata/rules', 'MAX_PENDING_PACKETS': 1024, 'MODBUS_ENABLED': True, @@ -719,6 +723,7 @@ def main(): deep_set(cfg, ['vars', 'port-groups', portKey], DEFAULT_VARS[portKey]) # capture parameters + liveCapture = val2bool(DEFAULT_VARS['LIVE_CAPTURE']) for cfgKey in ( ['capture', 'disable-offloading', 'CAPTURE_DISABLE_OFFLOADING'], ['capture', 'checksum-validation', 'CAPTURE_CHECKSUM_VALIDATION'], @@ -776,7 +781,13 @@ def main(): # enable community-id for easier cross-referencing and pcap-file for # tying back to the original PCAP filename cfg['outputs'][outputIdx][name]['community-id'] = True - cfg['outputs'][outputIdx][name]['pcap-file'] = True + + # some options make sense for live capture but not PCAP processing + cfg['outputs'][outputIdx][name]['pcap-file'] = not liveCapture + if liveCapture: + cfg['outputs'][outputIdx][name]['filename'] = DEFAULT_VARS['EVE_FILENAME_PATTERN'] + cfg['outputs'][outputIdx][name]['threaded'] = DEFAULT_VARS['EVE_THREADED'] + cfg['outputs'][outputIdx][name]['rotate-interval'] = DEFAULT_VARS['EVE_ROTATE_INTERVAL'] # configure the various different output types belonging to eve-log if 'types' in cfg['outputs'][outputIdx][name]: diff --git a/suricata/scripts/eve-clean-logs.sh b/suricata/scripts/eve-clean-logs.sh deleted file mode 100755 index 33b0127e0..000000000 --- a/suricata/scripts/eve-clean-logs.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2024 Battelle Energy Alliance, LLC. All rights reserved. - -# Clean up suricata log files that have reached a certain age. If we can -# verify they've been parsed and logged at least one event to the database, -# clean them up. If they haven't logged an event to the database, only clean -# them up if they're even older. - -set -o pipefail - -# for live traffic capture we don't need to do this check -if [[ "${SURICATA_LIVE_CAPTURE:-false}" != "true" ]]; then - - CURRENT_TIME="$(date -u +%s)" - FILE_AGE_MIN=${LOG_CLEANUP_MINUTES:-30} - FILE_AGE_MIN_UNKNOWN=$(( FILE_AGE_MIN * 2 )) - - if (( $FILE_AGE_MIN > 0 )); then - find "${SURICATA_LOG_DIR:-/var/log/suricata}"/ -type f -name "*.json" -mmin +$FILE_AGE_MIN | while read LOGFILE - do - - # query the database to see if any records exist from parsing this log file - DOCUMENT_FOUND=$( - curl -sSL -XPOST \ - -H 'Content-Type: application/json' \ - 'http://api:5000/mapi/document' \ - -d "{\"limit\":1,\"filter\":{\"log.file.path\":\"$(basename $LOGFILE)\"}}" 2>/dev/null \ - | jq '.results | length' 2>/dev/null || echo '0') - - if (( $DOCUMENT_FOUND > 0 )) || (( $(stat --printf='%s' "$LOGFILE" 2>/dev/null || echo -n '1') == 0 )); then - # at least one log document exists in the database (or the file is empty), assume it's safe to clean up now - rm -f "$LOGFILE" - - else - # the document doesn't exist in the database. still clean it up, but only if it's quite a bit older - MODIFY_TIME="$(stat -c %Y "$LOGFILE" 2>/dev/null || echo '0')" - MODIFY_AGE_MINS=$(( (CURRENT_TIME - MODIFY_TIME) / 60)) - if (( $MODIFY_AGE_MINS >= $FILE_AGE_MIN_UNKNOWN )); then - rm -f "$LOGFILE" - fi - fi - - done # loop over found files at least FILE_AGE_MIN old - fi # FILE_AGE_MIN is set (suricata log cleaning is enabled) - -fi \ No newline at end of file