Skip to content

Commit

Permalink
Merge pull request #448 from BrainPad/447
Browse files Browse the repository at this point in the history
Prevent the value from being empty in pandas read_csv
  • Loading branch information
yasuhiro-ohba authored Mar 7, 2024
2 parents 0830d75 + 3538a9d commit 6182b27
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 5 deletions.
28 changes: 23 additions & 5 deletions cliboa/scenario/transform/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def _read_csv_func(self, chunksize, fi, fo):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)
for df in tfr:
for c in self._columns:
Expand Down Expand Up @@ -159,6 +160,7 @@ def _read_csv_func(self, chunksize, fi, fo):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)
pattern = re.compile(self._regex_pattern)
for df in tfr:
Expand Down Expand Up @@ -262,6 +264,7 @@ def _read_csv_func(self, chunksize, fi, fo):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)
dest_str = None
for df in tfr:
Expand Down Expand Up @@ -348,7 +351,7 @@ def convert(self, fi, fo):
def _read_csv_func(self, chunksize, fi, fo):
# Used in chunk_size_handling
first_write = True
tfr = pandas.read_csv(fi, chunksize=chunksize)
tfr = pandas.read_csv(fi, chunksize=chunksize, na_filter=False)
for df in tfr:
df = df[~df[self._src_column].isin(self.df_target_list)]
df.to_csv(
Expand Down Expand Up @@ -460,6 +463,7 @@ def _read_csv_func(self, chunksize, target1_files, target2_files):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)
for df in tfr1:
df.to_csv(
Expand Down Expand Up @@ -524,6 +528,7 @@ def _read_csv_func(self, chunksize, fi, fo):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)
for df in tfr:
if set(self._column_order) - set(df.columns.values):
Expand Down Expand Up @@ -584,12 +589,16 @@ def execute(self, *args):

# Create output headers to conform to the concat specification.
file_1 = files[0]
output_header = pandas.read_csv(file_1, dtype=str, encoding=self._encoding, nrows=0)
output_header = pandas.read_csv(
file_1, dtype=str, encoding=self._encoding, nrows=0, na_filter=False
)
for file in files[1:]:
output_header = pandas.concat(
[
output_header,
pandas.read_csv(file, dtype=str, encoding=self._encoding, nrows=0),
pandas.read_csv(
file, dtype=str, encoding=self._encoding, nrows=0, na_filter=False
),
]
)

Expand All @@ -604,6 +613,7 @@ def _read_csv_func(self, chunksize, files, output_header):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)
for df in tfr:
# Change the header order to the one you plan to output.
Expand Down Expand Up @@ -858,7 +868,13 @@ def execute(self, *args):
super().io_files(files, func=self.convert)

def convert(self, fi, fo):
header = pandas.read_csv(fi, dtype=str, encoding=self._encoding, nrows=0)
header = pandas.read_csv(
fi,
dtype=str,
encoding=self._encoding,
nrows=0,
na_filter=False,
)
if self._src_column not in header:
raise KeyError("Copy source column does not exist in file. [%s]" % self._src_column)

Expand All @@ -872,6 +888,7 @@ def _read_csv_func(self, chunksize, fi, fo):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)

for df in tfr:
Expand Down Expand Up @@ -931,7 +948,7 @@ def execute(self, *args):
super().io_files(files, func=self.convert)

def convert(self, fi, fo):
header = pandas.read_csv(fi, dtype=str, encoding=self._encoding, nrows=0)
header = pandas.read_csv(fi, dtype=str, encoding=self._encoding, nrows=0, na_filter=False)
if self._column not in header:
raise KeyError("Replace source column does not exist in file. [%s]" % self._column)

Expand All @@ -945,6 +962,7 @@ def _read_csv_func(self, chunksize, fi, fo):
dtype=str,
encoding=self._encoding,
chunksize=chunksize,
na_filter=False,
)

for df in tfr:
Expand Down
Loading

0 comments on commit 6182b27

Please sign in to comment.