-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathFSEParser_V4.1.py
1594 lines (1379 loc) · 62.2 KB
/
FSEParser_V4.1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python
# FSEvents Parser Python Script
# ------------------------------------------------------
# Parse FSEvent records from allocated fsevent files and carved gzip files.
# Outputs parsed information to a tab delimited txt file and SQLite database.
# Errors and exceptions are recorded in the exceptions logfile.
# Copyright 2024
# Nicole Ibrahim
#
# Nicole Ibrahim licenses this file to you under the Apache License, Version
# 2.0 (the "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
import sys
import os
import struct
import binascii
import gzip
import re
import datetime
import sqlite3
import json
import io
from time import (gmtime, strftime)
from optparse import OptionParser
import contextlib
try:
from dfvfs.analyzer import analyzer
from dfvfs.lib import definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.volume import tsk_volume_system
from dfvfs.resolver import resolver
from dfvfs.lib import raw
from dfvfs.helpers import source_scanner
DFVFS_IMPORT = True
IMPORT_ERROR = None
except ImportError as exp:
DFVFS_IMPORT = False
IMPORT_ERROR =("\n%s\n\
You have specified the source type as image but DFVFS \n\
is not installed and is required for image support. \n\
To install DFVFS please refer to \n\
http://www.hecfblog.com/2015/12/how-to-install-dfvfs-on-windows-without.html" % (exp))
VERSION = '4.1'
EVENTMASK = {
0x00000000: 'None;',
0x00000001: 'FolderEvent;',
0x00000002: 'Mount;',
0x00000004: 'Unmount;',
0x00000020: 'EndOfTransaction;',
0x00000800: 'LastHardLinkRemoved;',
0x00001000: 'HardLink;',
0x00004000: 'SymbolicLink;',
0x00008000: 'FileEvent;',
0x00010000: 'PermissionChange;',
0x00020000: 'ExtendedAttrModified;',
0x00040000: 'ExtendedAttrRemoved;',
0x00100000: 'DocumentRevisioning;',
0x00400000: 'ItemCloned;', # macOS HighSierra
0x01000000: 'Created;',
0x02000000: 'Removed;',
0x04000000: 'InodeMetaMod;',
0x08000000: 'Renamed;',
0x10000000: 'Modified;',
0x20000000: 'Exchange;',
0x40000000: 'FinderInfoMod;',
0x80000000: 'FolderCreated;',
0x00000008: 'NOT_USED-0x00000008;',
0x00000010: 'NOT_USED-0x00000010;',
0x00000040: 'NOT_USED-0x00000040;',
0x00000080: 'NOT_USED-0x00000080;',
0x00000100: 'NOT_USED-0x00000100;',
0x00000200: 'NOT_USED-0x00000200;',
0x00000400: 'NOT_USED-0x00000400;',
0x00002000: 'NOT_USED-0x00002000;',
0x00080000: 'NOT_USED-0x00080000;',
0x00200000: 'NOT_USED-0x00200000;',
0x00800000: 'NOT_USED-0x00800000;'
}
print('\n==========================================================================')
print(('FSEParser v {} -- provided by G-C Partners, LLC'.format(VERSION)))
print('==========================================================================')
def get_options():
"""
Get needed options for processing
"""
usage = "usage: %prog -s SOURCE -o OUTDIR -t SOURCETYPE [folder|image] [-c CASENAME -q REPORT_QUERIES]"
options = OptionParser(usage=usage)
options.add_option("-s",
action="store",
type="string",
dest="source",
default=False,
help="REQUIRED. The source directory or image containing fsevent files to be parsed")
options.add_option("-o",
action="store",
type="string",
dest="outdir",
default=False,
help="REQUIRED. The destination directory used to store parsed reports")
options.add_option("-t",
action="store",
type="string",
dest="sourcetype",
default=False,
help="REQUIRED. The source type to be parsed. Available options are 'folder' or 'image'")
options.add_option("-c",
action="store",
type="string",
dest="casename",
default=False,
help="OPTIONAL. The name of the current session, \
used for naming standards. Defaults to 'FSE_Reports'")
options.add_option("-q",
action="store",
type="string",
dest="report_queries",
default=False,
help="OPTIONAL. The location of the report_queries.json file \
containing custom report queries to generate targeted reports."
)
# Return options to caller #
return options
def parse_options():
"""
Capture and return command line arguments.
"""
# Get options
options = get_options()
(opts, args) = options.parse_args()
# The meta will store all information about the arguments passed #
meta = {
'casename': opts.casename,
'reportqueries': opts.report_queries,
'sourcetype': opts.sourcetype,
'source': opts.source,
'outdir': opts.outdir
}
# Print help if no options are provided
if len(sys.argv[1:]) == 0:
options.print_help()
sys.exit(1)
# Test required arguments
if meta['source'] is False or meta['outdir'] is False or meta['sourcetype'] is False:
options.error('Unable to proceed. The following parameters '
'are required:\n-s SOURCE\n-o OUTDIR\n-t SOURCETYPE')
if not os.path.exists(meta['source']):
options.error("Unable to proceed. \n\n%s does not exist.\n" % meta['source'])
if not os.path.exists(meta['outdir']):
options.error("Unable to proceed. \n\n%s does not exist.\n" % meta['outdir'])
if meta['reportqueries'] and not os.path.exists(meta['reportqueries']):
options.error("Unable to proceed. \n\n%s does not exist.\n" % meta['reportqueries'])
if meta['sourcetype'].lower() != 'folder' and meta['sourcetype'].lower() != 'image':
options.error(
'Unable to proceed. \n\nIncorrect source type provided: "%s". The following are valid options:\
\n -t folder\n -t image\n' % (meta['sourcetype']))
if meta['sourcetype'] == 'image' and DFVFS_IMPORT is False:
options.error(IMPORT_ERROR)
if meta['reportqueries'] ==False:
print('[Info]: Report queries file not specified using the -q option. Custom reports will not be generated.')
if meta['casename'] is False:
print('[Info]: No casename specified using -c. Defaulting to "FSE_Reports".')
meta['casename'] = 'FSE_Reports'
# Return meta to caller #
return meta
def main():
"""
Call the main processes.
"""
# Process fsevents
FSEventHandler()
# Commit transaction
SQL_CON.commit()
# Close database connection
SQL_CON.close()
def enumerate_flags(flag, f_map):
"""
Iterate through record flag mappings and enumerate.
"""
# Reset string based flags to null
f_type = ''
f_flag = ''
# Iterate through flags
for i in f_map:
if i & flag:
if f_map[i] == 'FolderEvent;' or \
f_map[i] == 'FileEvent;' or \
f_map[i] == 'SymbolicLink;' or \
f_map[i] == 'HardLink;':
f_type = ''.join([f_type, f_map[i]])
else:
f_flag = ''.join([f_flag, f_map[i]])
return f_type, f_flag
def progress(count, total):
"""
Handles the progress bar in the console.
"""
bar_len = 45
filled_len = int(round(bar_len * count / float(total)))
percents = round(100 * count / float(total), 1)
p_bar = '=' * filled_len + '.' * (bar_len - filled_len)
try:
sys.stdout.write(' File {} of {} [{}] {}{}\r'.format(count, total, p_bar, percents, '%'))
except:
pass
sys.stdout.flush()
class FSEventHandler():
"""
FSEventHandler iterates through and parses fsevents.
"""
def __init__(self):
"""
"""
self.meta = parse_options()
if self.meta['reportqueries']:
# Check json file
try:
# Basic json syntax
self.r_queries = json.load(open(self.meta['reportqueries']))
# Check to see if required keys are present
for i in self.r_queries['process_list']:
i['report_name']
i['query']
except Exception as exp:
print(('An error occurred while reading the json file. \n{}'.format(str(exp))))
sys.exit(0)
else:
# if report queries option was not specified
self.r_queries = False
self.path = self.meta['source']
create_sqlite_db(self)
self.files = []
self.pages = []
self.src_fullpath = ''
self.dls_version = 0
# Initialize statistic counters
self.all_records_count = 0
self.all_files_count = 0
self.parsed_file_count = 0
self.error_file_count = 0
# Try to open the output files
try:
# Try to open ouput files
self.l_all_fsevents = open(
os.path.join(self.meta['outdir'], self.meta['casename'], 'All_FSEVENTS.tsv'),
'wb'
)
# Process report queries output files
# if option was specified.
if self.r_queries:
# Try to open custom report query output files
for i in self.r_queries['process_list']:
r_file = os.path.join(self.meta['outdir'], self.meta['casename'], i['report_name'] + '.tsv')
if os.path.exists(r_file):
os.remove(r_file)
setattr(self, 'l_' + i['report_name'], open(r_file, 'wb'))
# Output log file for exceptions
l_file = os.path.join(self.meta['outdir'], self.meta['casename'], 'EXCEPTIONS_LOG.txt')
self.logfile = open(l_file, 'w')
except Exception as exp:
# Print error to command prompt if unable to open files
if 'Permission denied' in str(exp):
print(('{}\nEnsure that you have permissions to write to file '
'\nand output file is not in use by another application.\n'.format(str(exp))))
else:
print(exp)
sys.exit(0)
# Begin FSEvent processing
print(('\n[STARTED] {} UTC Parsing files.'.format(strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
if self.meta['sourcetype'] == 'image':
self._get_fsevent_image_files()
elif self.meta['sourcetype'] == 'folder':
self._get_fsevent_files()
print(('\n All Files Attempted: {}\n All Parsed Files: {}\n Files '
'with Errors: {}\n All Records Parsed: {}'.format(
self.all_files_count,
self.parsed_file_count,
self.error_file_count,
self.all_records_count)))
print(('[FINISHED] {} UTC Parsing files.\n'.format(strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
print(('[STARTED] {} UTC Sorting fsevents table in Database.'.format(strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
row_count = reorder_sqlite_db(self)
if row_count != 0:
print(('[FINISHED] {} UTC Sorting fsevents table in Database.\n'.format(strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
print(('[STARTED] {} UTC Exporting fsevents table from Database.'.format(
strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
self.export_fsevent_report(self.l_all_fsevents, row_count)
print(('[FINISHED] {} UTC Exporting fsevents table from Database.\n'.format(
strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
if self.r_queries:
print(('[STARTED] {} UTC Exporting views from database '
'to TSV files.'.format(strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
for i in self.r_queries['process_list']:
Output.print_columns(getattr(self, 'l_' + i['report_name']))
# Export report views to output files
self.export_sqlite_views()
print(('[FINISHED] {} UTC Exporting views from database '
'to TSV files.\n'.format(strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
print((" Exception log and Reports exported to:\n '{}'\n".format(os.path.join(self.meta['outdir'], self.meta['casename']))))
# Close output files
self.l_all_fsevents.close()
self.logfile.close()
else:
print(('[FINISHED] {} UTC No records were parsed.\n'.format(strftime("%m/%d/%Y %H:%M:%S", gmtime()))))
print('Nothing to export.\n')
@contextlib.contextmanager
def skip_gzip_check(self):
"""
Context manager that replaces gzip.GzipFile._read_eof with a no-op.
This is useful when decompressing partial files, something that won't
work if GzipFile does it's checksum comparison.
stackoverflow.com/questions/1732709/unzipping-part-of-a-gz-file-using-python/18602286
"""
_read_eof = gzip._GzipReader._read_eof
gzip.GzipFile._read_eof = lambda *args, **kwargs: None
yield
gzip.GzipFile._read_eof = _read_eof
def _get_fsevent_files(self):
"""
get_fsevent_files will iterate through each file in the fsevents dir provided,
and attempt to decompress the gzip. If it is unable to decompress,
it will write an entry in the logfile. If successful, the script will
check for a DLS header signature in the decompress gzip. If found, the contents of
the gzip will be placed into a buffer and passed to the next phase of processing.
"""
# Print the header columns to the output files
Output.print_columns(self.l_all_fsevents)
# Total number of files in events dir #
t_files = len(os.listdir(self.path))
for filename in os.listdir(self.path):
if filename == 'fseventsd-uuid':
t_files -= 1
self.time_range_src_mod = []
prev_mod_date = "Unknown"
prev_last_wd = 0
c_last_wd = 0
# Uses file mod dates to generate time ranges by default unless
# files are carved or mod dates lost due to exporting
self.use_file_mod_dates = True
# Run simple test to see if file mod dates
# should be used to generate time ranges
# In some instances fsevent files may not have
# their original mod times preserved on export
# This code will flag true when the same date and hour
# exists for the first file and the last file
# in the provided source fsevents folder
first = os.path.join(self.path, os.listdir(self.path)[0])
last = os.path.join(self.path, os.listdir(self.path)[len(os.listdir(self.path)) - 1])
first = os.path.getmtime(first)
last = os.path.getmtime(last)
first = str(datetime.datetime.utcfromtimestamp(first))[:14]
last = str(datetime.datetime.utcfromtimestamp(last))[:14]
if first == last:
self.use_file_mod_dates = False
# Iterate through each file in supplied fsevents dir
for filename in os.listdir(self.path):
if filename == 'fseventsd-uuid':
continue
# Variables
self.all_files_count += 1
# Call the progress bar which shows parsing stats
progress(self.all_files_count, t_files)
buf = ""
# Full path to source fsevent file
self.src_fullpath = os.path.join(self.path, filename)
# Name of source fsevent file
self.src_filename = filename
# UTC mod date of source fsevent file
self.m_time = os.path.getmtime(self.src_fullpath)
self.m_time = str(datetime.datetime.utcfromtimestamp((self.m_time))) + " [UTC]"
# Regex to match against source fsevent log filename
regexp = re.compile(r'^.*[\][0-9a-fA-F]{16}$')
# Test to see if fsevent file name matches naming standard
# if not, assume this is a carved gzip
if len(self.src_filename) == 16 and regexp.search(filename) is not None:
c_last_wd = int(self.src_filename, 16)
self.time_range_src_mod = prev_last_wd, c_last_wd, prev_mod_date, self.m_time
self.is_carved_gzip = False
else:
self.is_carved_gzip = True
# Attempt to decompress the fsevent archive
try:
with self.skip_gzip_check():
self.files = gzip.GzipFile(self.src_fullpath, "rb")
buf = self.files.read()
except Exception as exp:
# When permission denied is encountered
if "Permission denied" in str(exp) and not os.path.isdir(self.src_fullpath):
print(('\nEnsure that you have permissions to read '
'from {}\n{}\n'.format(self.path, str(exp))))
sys.exit(0)
# Otherwise write error to log file
else:
self.logfile.write(
"%s\tError: Error while decompressing FSEvents file.%s\n" % (
self.src_filename,
str(exp)
)
)
self.error_file_count += 1
continue
# If decompress is success, check for DLS headers in the current file
dls_chk = FSEventHandler.dls_header_search(self, buf, self.src_fullpath)
# If check for DLS returns false, write information to logfile
if dls_chk is False:
self.logfile.write('%s\tInfo: DLS Header Check Failed. Unable to find a '
'DLS header. Unable to parse File.\n' % (self.src_filename))
# Continue to the next file in the fsevents directory
self.error_file_count += 1
continue
self.parsed_file_count += 1
# Accounts for fsevent files that get flushed to disk
# at the same time. Usually the result of a shutdown
# or unmount
if not self.is_carved_gzip and self.use_file_mod_dates:
prev_mod_date = self.m_time
prev_last_wd = int(self.src_filename, 16)
# If DLSs were found, pass the decompressed file to be parsed
FSEventHandler.parse(self, buf)
def _get_fsevent_image_files(self):
"""
get_fsevent_files will iterate through each file in the fsevents dir
and attempt to decompress the gzip. If it is unable to decompress,
it will write an entry in the logfile. If successful, the script will
check for a DLS header signature in the decompress gzip. If found, the contents of
the gzip will be placed into a buffer and passed to the next phase of processing.
"""
# Print the header columns to the output file
Output.print_columns(self.l_all_fsevents)
scan_path_spec = None
scanner = source_scanner.SourceScanner()
scan_context = source_scanner.SourceScannerContext()
scan_context.OpenSourcePath(self.meta['source'])
scanner.Scan(
scan_context,
scan_path_spec=scan_path_spec
)
for file_system_path_spec, file_system_scan_node in list(scan_context._file_system_scan_nodes.items()):
t_files = 0
self.all_files_count = 0
self.error_file_count = 0
self.all_records_count = 0
self.parsed_file_count = 0
try:
location = file_system_path_spec.parent.location
except:
location = file_system_path_spec.location
print(" Processing Volume {}.\n".format(location))
fsevent_locs = ["/.fseventsd","/System/Volumes/Data/.fseventsd"]
for f_loc in fsevent_locs:
fs_event_path_spec = path_spec_factory.Factory.NewPathSpec(
file_system_path_spec.type_indicator,
parent=file_system_path_spec.parent,
location=f_loc
)
file_entry = resolver.Resolver.OpenFileEntry(
fs_event_path_spec
)
if file_entry != None:
t_files = file_entry.number_of_sub_file_entries
for sub_file_entry in file_entry.sub_file_entries:
if sub_file_entry.name == 'fseventsd-uuid':
t_files -= 1
self.time_range_src_mod = []
prev_mod_date = "Unknown"
prev_last_wd = 0
c_last_wd = 0
counter = 0
# Uses file mod dates to generate time ranges by default unless
# files are carved or mod dates lost due to exporting
self.use_file_mod_dates = True
# Iterate through each file in supplied fsevents dir
for sub_file_entry in file_entry.sub_file_entries:
if sub_file_entry.name == 'fseventsd-uuid':
continue
# Variables
counter += 1
self.all_files_count += 1
# Call the progress bar which shows parsing stats
progress(counter, t_files)
buf = ""
# Name of source fsevent file
self.src_filename = sub_file_entry.name
self.src_fullpath = self.meta['source'] + ": " + location + sub_file_entry.path_spec.location
stat_object = sub_file_entry.GetStat()
# UTC mod date of source fsevent file
self.m_time = datetime.datetime.fromtimestamp(
stat_object.mtime).strftime(
'%Y-%m-%d %H:%M:%S') + " [UTC]"
# Regex to match against source fsevent log filename
regexp = re.compile(r'^.*[\][0-9a-fA-F]{16}$')
# Test to see if fsevent file name matches naming standard
# if not, assume this is a carved gzip
if len(self.src_filename) == 16 and regexp.search(self.src_filename) is not None:
c_last_wd = int(self.src_filename, 16)
self.time_range_src_mod = prev_last_wd, c_last_wd, prev_mod_date, self.m_time
self.is_carved_gzip = False
else:
self.is_carved_gzip = True
file_object = sub_file_entry.GetFileObject()
compressedFile = io.StringIO()
compressedFile.write(file_object.read())
compressedFile.seek(0)
# Attempt to decompress the fsevent archive
try:
with self.skip_gzip_check():
self.files = gzip.GzipFile(fileobj=compressedFile, mode='rb')
buf = self.files.read()
except Exception as exp:
self.logfile.write(
"%s\tError: Error while decompressing FSEvents file.%s\n" % (
self.src_filename,
str(exp)
)
)
self.error_file_count += 1
continue
# If decompress is success, check for DLS headers in the current file
dls_chk = FSEventHandler.dls_header_search(self, buf, self.src_filename)
# If check for DLS returns false, write information to logfile
if dls_chk is False:
self.logfile.write('%s\tInfo: DLS Header Check Failed. Unable to find a '
'DLS header. Unable to parse File.\n' % (self.src_filename))
# Continue to the next file in the fsevents directory
self.error_file_count += 1
continue
self.parsed_file_count += 1
# Accounts for fsevent files that get flushed to disk
# at the same time. Usually the result of a shutdown
# or unmount
if not self.is_carved_gzip and self.use_file_mod_dates:
prev_mod_date = self.m_time
prev_last_wd = int(self.src_filename, 16)
# If DLSs were found, pass the decompressed file to be parsed
FSEventHandler.parse(self, buf)
else:
print('Unable to process volume or no fsevent files found')
continue
print(('\n\n All Files Attempted: {}\n All Parsed Files: {}\n Files '
'with Errors: {}\n All Records Parsed: {}'.format(
self.all_files_count,
self.parsed_file_count,
self.error_file_count,
self.all_records_count)))
def dls_header_search(self, buf, f_name):
"""
Search within the unzipped file
for all occurrences of the DLS magic header.
There can be more than one DLS header in an fsevents file.
The start and end offsets are stored and used for parsing
the records contained within each DLS page.
"""
self.file_size = len(buf)
self.my_dls = []
raw_file = buf
dls_count = 0
start_offset = 0
end_offset = 0
while end_offset != self.file_size:
try:
start_offset = end_offset
page_len = struct.unpack("<I", raw_file[start_offset + 8:start_offset + 12])[0]
end_offset = start_offset + page_len
rfh = str(raw_file[start_offset:start_offset + 4])[2:-1]
if rfh == '1SLD' or rfh == '2SLD' or rfh == '3SLD':
self.my_dls.append({'Start Offset': start_offset, 'End Offset': end_offset})
dls_count += 1
else:
self.logfile.write("%s: Error in length of page when finding page headers." % (f_name))
break
except:
self.logfile.write("%s: Error in length of page when finding page headers." % (f_name))
sys.exit(0)
if dls_count == 0:
# Return false to caller so that the next file will be searched
return False
else:
# Return true so that the DLSs found can be parsed
return True
def parse(self, buf):
"""
Parse the decompressed fsevent log. First
finding other dates, then iterating through
eash DLS page found. Then parse records within
each page.
"""
# Initialize variables
pg_count = 0
# Call the date finder for current fsevent file
FSEventHandler.find_date(self, buf)
self.valid_record_check = True
# Iterate through DLS pages found in current fsevent file
for i in self.my_dls:
# Assign current DLS offsets
start_offset = self.my_dls[pg_count]['Start Offset']
end_offset = self.my_dls[pg_count]['End Offset']
# Extract the raw DLS page from the fsevents file
raw_page = buf[start_offset:end_offset]
self.page_offset = start_offset
# Reverse byte stream to match byte order little-endian
m_dls_chk = str(raw_page[0:4])[2:-1]
# Assign DLS version based off magic header in page
if m_dls_chk == "1SLD":
self.dls_version = 1
elif m_dls_chk == "2SLD":
self.dls_version = 2
elif m_dls_chk == "3SLD":
self.dls_version = 3
else:
self.logfile.write("%s: Unknown DLS Version." % (self.src_filename))
break
# Pass the raw page + a start offset to find records within page
FSEventHandler.find_page_records(
self,
raw_page,
start_offset
)
# Increment the DLS page count by 1
pg_count += 1
def find_date(self, raw_file):
"""
Search within current file for names of log files that are created
that store the date as a part of its naming
standard.
"""
# Reset variables
self.time_range = []
# Add previous file's mod timestamp, wd and current file's timestamp, wd
# to time range
if not self.is_carved_gzip and self.use_file_mod_dates:
c_time_1 = str(self.time_range_src_mod[2])[:10].replace("-", ".")
c_time_2 = str(self.time_range_src_mod[3])[:10].replace("-", ".")
self.time_range.append([self.time_range_src_mod[0], c_time_1])
self.time_range.append([self.time_range_src_mod[1], c_time_2])
# Regex's for logs with dates in name
regex_1 = ("private/var/log/asl/[\x30-\x39]{4}[.][\x30-\x39]{2}" +
"[.][\x30-\x39]{2}[.][\x30-\x7a]{2,8}[.]asl")
regex_2 = ("mobile/Library/Logs/CrashReporter/DiagnosticLogs/security[.]log" +
"[.][\x30-\x39]{8}T[\x30-\x39]{6}Z")
regex_3 = ("private/var/log/asl/Logs/aslmanager[.][\x30-\x39]{8}T[\x30-\x39]" +
"{6}[-][\x30-\x39]{2}")
regex_4 = ("private/var/log/DiagnosticMessages/[\x30-\x39]{4}[.][\x30-\x39]{2}" +
"[.][\x30-\x39]{2}[.]asl")
regex_5 = ("private/var/log/com[.]apple[.]clouddocs[.]asl/[\x30-\x39]{4}[.]" +
"[\x30-\x39]{2}[.][\x30-\x39]{2}[.]asl")
regex_6 = ("private/var/log/powermanagement/[\x30-\x39]{4}[.][\x30-\x39]{2}[.]" +
"[\x30-\x39]{2}[.]asl")
regex_7 = ("private/var/log/asl/AUX[.][\x30-\x39]{4}[.][\x30-\x39]{2}[.]" +
"[\x30-\x39]{2}/[0-9]{9}")
regex_8 = "private/var/audit/[\x30-\x39]{14}[.]not_terminated"
# Regex that matches only events with created flag
flag_regex = ("[\x00-\xFF]{9}[\x01|\x11|\x21|\x31|\x41|\x51|\x61|\x05|\x15|" +
"\x25|\x35|\x45|\x55|\x65]")
# Concatenating date, flag matching regexes
# Also grabs working descriptor for record
m_regex = "(" + regex_1 + "|" + regex_2 + "|" + regex_3 + "|" + regex_4 + "|" + regex_5
m_regex = m_regex + "|" + regex_6 + "|" + regex_7 + "|" + regex_8 + ")" + flag_regex
m_regex = m_regex.encode("utf-8")
# Start searching within fsevent file for events that match dates regex
# As the length of each log location is different, create if statements for each
# so that the date can be pulled from the correct location within the fullpath
for match in re.finditer(m_regex, raw_file):
if raw_file[match.regs[0][0]:match.regs[0][0] + 35] == "private/var/log/asl/Logs/aslmanager":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 36
# The date is 8 chars long in the format of yyyymmdd
t_end = t_start + 8
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
# Format the date
t_temp = t_temp[:4] + "." + t_temp[4:6] + "." + t_temp[6:8]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
elif raw_file[match.regs[0][0]:match.regs[0][0] + 23] == "private/var/log/asl/AUX":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 24
# The date is 10 chars long in the format of yyyy.mm.dd
t_end = t_start + 10
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
elif raw_file[match.regs[0][0]:match.regs[0][0] + 19] == "private/var/log/asl":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 20
# The date is 10 chars long in the format of yyyy.mm.dd
t_end = t_start + 10
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
elif raw_file[match.regs[0][0]:match.regs[0][0] + 4] == "mobi":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 62
# The date is 8 chars long in the format of yyyymmdd
t_end = t_start + 8
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
# Format the date
t_temp = t_temp[:4] + "." + t_temp[4:6] + "." + t_temp[6:8]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
elif raw_file[match.regs[0][0]:match.regs[0][0] + 34] == "private/var/log/DiagnosticMessages":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 35
# The date is 10 chars long in the format of yyyy.mm.dd
t_end = t_start + 10
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
elif raw_file[match.regs[0][0]:match.regs[0][0] + 39] == "private/var/log/com.apple.clouddocs.asl":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 40
# The date is 10 chars long in the format of yyyy.mm.dd
t_end = t_start + 10
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
elif raw_file[match.regs[0][0]:match.regs[0][0] + 31] == "private/var/log/powermanagement":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 32
# The date is 10 chars long in the format of yyyy.mm.dd
t_end = t_start + 10
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
elif raw_file[match.regs[0][0]:match.regs[0][0] + 17] == "private/var/audit":
# Clear timestamp temp variable
t_temp = ''
# t_start uses the start offset of the match
t_start = match.regs[0][0] + 18
# The date is 8 chars long in the format of yyyymmdd
t_end = t_start + 8
# Strip the date from the fsevent file
t_temp = raw_file[t_start:t_end]
# Format the date
t_temp = t_temp[:4] + "." + t_temp[4:6] + "." + t_temp[6:8]
wd_temp = struct.unpack("<Q", raw_file[match.regs[0][1] - 9:match.regs[0][1] - 1])[0]
else:
t_temp = ''
wd_temp = ''
# Append date, wd to time range list
if wd_temp == '' and t_temp == '':
pass
else:
self.time_range.append([wd_temp, t_temp])
# Sort the time range list by wd
self.time_range = sorted(self.time_range, key=self.get_key)
# Call the time range builder to rebuild time range
self.build_time_range()
def get_key(self, item):
"""
Return the key in the time range item provided.
"""
return item[0]
def build_time_range(self):
"""
Rebuilds the time range list to
include the previous and current working descriptor
as well as the previous and current date found
"""
prev_date = '0'
prev_wd = 0
temp = []
# Iterate through each in time range list
for i in self.time_range:
# Len is 7 when prev_date is 'Unknown'
if len(prev_date) == 7:
p_date = 0
c_date = i[1][:10].replace(".", "")
# When current date is 'Unknown'
if len(i[1]) == 7:
p_date = prev_date[:10].replace(".", "")
c_date = 0
# When both dates are known
if len(prev_date) != 7 and len(i[1]) != 7:
p_date = prev_date[:10].replace(".", "")
c_date = i[1][:10].replace(".", "")
# Bypass a date when current date is less than prev date
if int(c_date) < int(p_date):
prev_wd = prev_wd
prev_date = prev_date
else:
# Reassign prev_date to 'Unknown'
if prev_date == '0':
prev_date = 'Unknown'
# Add previous, current wd and previous, current date to temp
temp.append([prev_wd, i[0], prev_date, i[1]])
prev_wd = i[0]
prev_date = i[1]
# Assign temp list to time range list
self.time_range = temp
def find_page_records(self, page_buf, page_start_off):
"""
Input values are starting offset of current page and
end offset of current page within the current fsevent file
find_page_records will identify all records within a given page.
"""
# Initialize variables
fullpath = ''
char = ''
# Start, end offset of first record to be parsed within current DLS page
start_offset = 12
end_offset = 13
len_buf = len(page_buf)
# Call the file header parser for current DLS page
try:
FsEventFileHeader(
page_buf[:13],
self.src_fullpath
)
except:
self.logfile.write(
"%s\tError: Unable to parse file header at offset %d\n" % (
self.src_filename,
page_start_off
)
)
# Account for length of record for different DLS versions
# Prior to HighSierra
if self.dls_version == 1:
bin_len = 13
rbin_len = 12
# HighSierra
elif self.dls_version == 2:
bin_len = 21
rbin_len = 20
# Sonoma
elif self.dls_version == 3:
bin_len = 25
rbin_len = 24
else:
pass
# Iterate through the page.
# Valid record check should be true while parsing.
# If an invalid record is encounted (occurs in carved gzips)
# parsing stops for the current file
while len_buf > start_offset and self.valid_record_check:
# Grab the first char
char = page_buf[start_offset:end_offset].hex()
if char != '00':
# Replace non-printable char with nothing
if str(char).lower() == '0d' or str(char).lower() == '0a':