Skip to content

Commit

Permalink
Merge pull request #453 from BrainPad/451
Browse files Browse the repository at this point in the history
Allow changing suffix FileDivede Class
  • Loading branch information
yasuhiro-ohba authored Jun 13, 2024
2 parents 6182b27 + deee08f commit 83357dc
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 13 deletions.
8 changes: 6 additions & 2 deletions cliboa/scenario/transform/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,13 +422,17 @@ def __init__(self):
super().__init__()
self._divide_rows = None
self._header = False
self._suffix_pattern = ".%d"

def divide_rows(self, divide_rows):
self._divide_rows = divide_rows

def header(self, header):
self._header = header

def suffix_pattern(self, suffix_pattern):
self._suffix_pattern = suffix_pattern

def execute(self, *args):
valid = EssentialParameters(
self.__class__.__name__,
Expand Down Expand Up @@ -459,7 +463,7 @@ def execute(self, *args):
self._header_row = i.readline()

row = self._ifile_reader(file)
newfilename = px + nameonly + ".%s" + ext
newfilename = px + nameonly + self._suffix_pattern + ext

if self._dest_dir:
os.makedirs(self._dest_dir, exist_ok=True)
Expand All @@ -469,7 +473,7 @@ def execute(self, *args):
has_left = True
index = 1
while has_left:
ofile_path = os.path.join(dest_dir, newfilename % str(index))
ofile_path = os.path.join(dest_dir, newfilename % index)
has_left = self._ofile_generator(ofile_path, row)
index = index + 1

Expand Down
93 changes: 93 additions & 0 deletions cliboa/test/scenario/transform/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,99 @@ def test_execute_ok_4(self):
else:
break

def test_execute_ok_5(self):
file1 = os.path.join(self._data_dir, "test.txt")
with open(file1, mode="w", encoding="utf-8") as f:
f.write("idx\n")
for i in range(100):
f.write("%s\n" % str(i))

instance = FileDivide()
Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__))
Helper.set_property(instance, "src_dir", self._data_dir)
Helper.set_property(instance, "src_pattern", r"test\.txt")
Helper.set_property(instance, "dest_dir", self._out_dir)
Helper.set_property(instance, "divide_rows", 10)
Helper.set_property(instance, "header", True)
Helper.set_property(instance, "suffix_pattern", "_%d")
instance.execute()

row_index = 0
for i in range(1, 11):
file = os.path.join(self._out_dir, "test_%s.txt" % i)
assert os.path.exists(file)
with open(file, "r", encoding="utf-8", newline="") as f:
line = f.readline()
assert line == "idx\n"
while line:
line = f.readline()
if line:
assert str(row_index) == line.splitlines()[0]
row_index += 1

def test_execute_ok_6(self):
file1 = os.path.join(self._data_dir, "test.txt")
with open(file1, mode="w", encoding="utf-8") as f:
f.write("idx\n")
for i in range(100):
f.write("%s\n" % str(i))

instance = FileDivide()
Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__))
Helper.set_property(instance, "src_dir", self._data_dir)
Helper.set_property(instance, "src_pattern", r"test\.txt")
Helper.set_property(instance, "dest_dir", self._out_dir)
Helper.set_property(instance, "divide_rows", 10)
Helper.set_property(instance, "header", True)
Helper.set_property(instance, "suffix_pattern", ".%02d")
instance.execute()

row_index = 0
for i in range(1, 11):
if i < 10:
file = os.path.join(self._out_dir, "test.0%s.txt" % i)
else:
file = os.path.join(self._out_dir, "test.%s.txt" % i)
assert os.path.exists(file)
with open(file, "r", encoding="utf-8", newline="") as f:
line = f.readline()
assert line == "idx\n"
while line:
line = f.readline()
if line:
assert str(row_index) == line.splitlines()[0]
row_index += 1

def test_execute_ok_7(self):
file1 = os.path.join(self._data_dir, "test.txt")
with open(file1, mode="w", encoding="utf-8") as f:
f.write("idx\n")
for i in range(100):
f.write("%s\n" % str(i))

instance = FileDivide()
Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__))
Helper.set_property(instance, "src_dir", self._data_dir)
Helper.set_property(instance, "src_pattern", r"test\.txt")
Helper.set_property(instance, "dest_dir", self._out_dir)
Helper.set_property(instance, "divide_rows", 1)
Helper.set_property(instance, "header", True)
Helper.set_property(instance, "suffix_pattern", ".%1d")
instance.execute()

row_index = 0
for i in range(1, 101):
file = os.path.join(self._out_dir, "test.%s.txt" % i)
assert os.path.exists(file)
with open(file, "r", encoding="utf-8", newline="") as f:
line = f.readline()
assert line == "idx\n"
while line:
line = f.readline()
if line:
assert str(row_index) == line.splitlines()[0]
row_index += 1


class TestFileRename(TestFileTransform):
def test_execute_ok(self):
Expand Down
60 changes: 49 additions & 11 deletions docs/modules/file_divide.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ Either way index number of divided count will be added for the suffix of the new
Ex. foo.txt -> [ foo.1.txt, foo.2.txt, foo.3.txt ... ]

# Parameters
|Parameters|Explanation|Required|Default|Remarks|
|----------|-----------|--------|-------|-------|
|src_dir|Path of the directory which target files are placed.|Yes|None||
|src_pattern|Regex which is to find target files.|Yes|None||
|dest_dir|Path of the directory which is for output files.|No|None|If this parameter is not set, the file is created in the same directory as the processing file. If a non-existent directory path is specified, the directory is automatically created.|
|divide_rows|Number of the rows of individual files after divided|Yes|None||
|header|Whether if header is added to the divided files|No|False|If True, Original file's header will be added to the all divided files.|
|encoding|Character encoding|No|utf-8|||
|nonfile_error|Whether an error is thrown when files are not found in src_dir.|No|False||

# Examples
| Parameters | Explanation | Required | Default | Remarks |
|----------------|-----------------------------------------------------------------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| src_dir | Path of the directory which target files are placed. | Yes | None | |
| src_pattern | Regex which is to find target files. | Yes | None | |
| dest_dir | Path of the directory which is for output files. | No | None | If this parameter is not set, the file is created in the same directory as the processing file. If a non-existent directory path is specified, the directory is automatically created. |
| divide_rows | Number of the rows of individual files after divided. | Yes | None | |
| header | Whether if header is added to the divided files. | No | False | If True, Original file's header will be added to the all divided files. |
| encoding | Character encoding. | No | utf-8 | |
| nonfile_error | Whether an error is thrown when files are not found in src_dir. | No | False | |
| suffix_pattern | The pattern of symbols to use as a suffix when splitting files. | No | .%d | |

# Example1
```
scenario:
- step:
Expand Down Expand Up @@ -51,3 +52,40 @@ id, name
id, name
5, five
```

# Example2
```
scenario:
- step:
class: FileDivide
arguments:
src_dir: /in
src_pattern: test\.csv
dest_dir: /out
divided_rows: 2
header: True
suffix: _%02d
Input: /in/test.csv
id, name
1, one
2, two
3, three
4, four
5, five
Output:
/out/test_01.csv
id, name
1, one
2, two
/out/test_02.csv
id, name
3, three
4, four
/out/test_03.csv
id, name
5, five
```

0 comments on commit 83357dc

Please sign in to comment.