Skip to content

Commit

Permalink
skip blank rows in source files, #206
Browse files Browse the repository at this point in the history
Skip rows that are completely blank instead of erroring out
  • Loading branch information
ThrawnCA authored Feb 9, 2024
1 parent 64f741d commit b5b99c7
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions ckanext/xloader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):

# Get the list of rows to skip. The rows in the tabulator stream are
# numbered starting with 1.
skip_rows = list(range(1, header_offset + 1))
skip_rows = list(range(1, header_offset + 1), {'type': 'preset', 'value': 'blank'})

# Get the delimiter used in the file
delimiter = stream.dialect.get('delimiter')
Expand Down Expand Up @@ -347,12 +347,14 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
try:
file_format = os.path.splitext(table_filepath)[1].strip('.')
with UnknownEncodingStream(table_filepath, file_format, decoding_result,
skip_rows=[{'type': 'preset', 'value': 'blank'}],
post_parse=[TypeConverter().convert_types]) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException:
try:
file_format = mimetype.lower().split('/')[-1]
with UnknownEncodingStream(table_filepath, file_format, decoding_result,
skip_rows=[{'type': 'preset', 'value': 'blank'}],
post_parse=[TypeConverter().convert_types]) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException as e:
Expand All @@ -373,7 +375,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):

# Get the list of rows to skip. The rows in the tabulator stream are
# numbered starting with 1. We also want to skip the header row.
skip_rows = list(range(1, header_offset + 2))
skip_rows = list(range(1, header_offset + 2), {'type': 'preset', 'value': 'blank'})

TYPES, TYPE_MAPPING = get_types()
types = type_guess(stream.sample[1:], types=TYPES, strict=True)
Expand Down

0 comments on commit b5b99c7

Please sign in to comment.