diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d16bef309..1b715bdfcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -73,6 +73,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - Remove deprecated MediaManager () +- Fix explore command without project + () ## Jan. 2024 Release 1.5.2 ### Enhancements diff --git a/docs/source/docs/command-reference/context_free/explorer.md b/docs/source/docs/command-reference/context_free/explorer.md index dbddbcec22..ebeda16233 100644 --- a/docs/source/docs/command-reference/context_free/explorer.md +++ b/docs/source/docs/command-reference/context_free/explorer.md @@ -14,7 +14,7 @@ Usage: ```console datum explore [target] [--query-img-path ] [--query-item-id --query-item-subset ] - [--query-str ] [-topk TOPK] [-p PROJECT_DIR] [-s SAVE] [--stage STAGE] + [--query-str ] [-topk TOPK] [-p PROJECT_DIR] [-s SAVE] [-o DST_DIR] [--stage STAGE] ``` Parameters: @@ -27,6 +27,7 @@ Parameters: - `-topk` (int) - Number how much you want to find similar data. - `-p, --project` (string) - Directory of the project to operate on (default: current directory). - `-s, --save` (bool) - Save explorer result files on explore_result folder. +- `-o, --output-dir` (string) - Output directory. By default, a new directory is created in the current directory. - `--stage` (bool) - Include this action as a project build step. If true, this operation will be saved in the project build tree, allowing to reproduce the resulting dataset later. diff --git a/docs/source/docs/level-up/intermediate_skills/10_data_exploration.rst b/docs/source/docs/level-up/intermediate_skills/10_data_exploration.rst index c43280ad25..232a7a9f76 100644 --- a/docs/source/docs/level-up/intermediate_skills/10_data_exploration.rst +++ b/docs/source/docs/level-up/intermediate_skills/10_data_exploration.rst @@ -65,6 +65,12 @@ The Python example for the usage of explorer is described in :doc:`here <../../j ``QUERY_STR`` could be text description or list of them + .. code-block:: bash + + datum explore --query-str QUERY_STR -topk TOPK_NUM -s -o DST_DIR + + To save the result, specify the output directory as ``DST_DIR`` + .. tab-item:: ProjectCLI With the project-based CLI, we first require to ``create`` a project by diff --git a/src/datumaro/cli/commands/explore.py b/src/datumaro/cli/commands/explore.py index 536dfd190b..8469f9b626 100644 --- a/src/datumaro/cli/commands/explore.py +++ b/src/datumaro/cli/commands/explore.py @@ -5,7 +5,6 @@ import argparse import logging as log import os -import os.path as osp import shutil import uuid @@ -88,6 +87,13 @@ def build_parser(parser_ctor=argparse.ArgumentParser): default=False, help="Save explorer result files on explore_result folder", ) + parser.add_argument( + "-o", + "--output-dir", + dest="dst_dir", + default=None, + help="Directory to save explore results " "(default: generate automatically)", + ) parser.add_argument( "--stage", type=str_to_bool, @@ -112,6 +118,7 @@ def get_sensitive_args(): "target", "topk", "project_dir", + "dst_dir", ] } @@ -130,22 +137,20 @@ def explore_command(args): else: targets = list(project.working_tree.sources) - source_datasets = [] - for target in targets: - target_dataset, _ = parse_full_revpath(target, project) - source_datasets.append(target_dataset) + source_datasets = [parse_full_revpath(target, project)[0] for target in targets] explorer_args = {"save_hashkey": True} - build_tree = project.working_tree.clone() - for target in targets: - build_tree.build_targets.add_explore_stage(target, params=explorer_args) + if project: + build_tree = project.working_tree.clone() + for target in targets: + build_tree.build_targets.add_explore_stage(target, params=explorer_args) explorer = Explorer(*source_datasets) for dataset in source_datasets: dst_dir = dataset.data_path dataset.save(dst_dir, save_media=True, save_hashkey_meta=True) - if args.stage: + if args.stage and project: project.working_tree.config.update(build_tree.config) project.working_tree.save() @@ -179,14 +184,14 @@ def explore_command(args): log.info(f"id: {result.id} | subset: {result.subset} | path : {path}") if args.save: - saved_result_path = osp.join(args.project_dir, "explore_result") - if osp.exists(saved_result_path): + saved_result_path = args.dst_dir or os.path.join(args.project_dir, "explore_result") + if os.path.exists(saved_result_path): shutil.rmtree(saved_result_path) os.makedirs(saved_result_path) for result in results: - saved_subset_path = osp.join(saved_result_path, result.subset) - if not osp.exists(saved_subset_path): + saved_subset_path = os.path.join(saved_result_path, result.subset) + if not os.path.exists(saved_subset_path): os.makedirs(saved_subset_path) - shutil.copyfile(path, osp.join(saved_subset_path, result.id + ".jpg")) + shutil.copyfile(path, os.path.join(saved_subset_path, result.id + ".jpg")) return 0 diff --git a/tests/integration/cli/test_explore.py b/tests/integration/cli/test_explore.py index 6899d11207..0ad72fd54a 100644 --- a/tests/integration/cli/test_explore.py +++ b/tests/integration/cli/test_explore.py @@ -169,6 +169,34 @@ def test_can_explore_dataset_w_query_str(self): self.assertIn(osp.join(saved_result_path, "train", "1.jpg"), results) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + @scoped + def test_can_explore_dataset_w_target(self): + test_dir = scope_add(TestDir()) + proj_dir = osp.join(test_dir, "proj") + dataset_url = osp.join(test_dir, "dataset") + train_image_path = osp.join(dataset_url, "images", "train", "1.jpg") + saved_result_path = osp.join(proj_dir, "explore_result") + + self.test_dataset.export(dataset_url, "datumaro", save_media=True) + + run( + self, + "explore", + dataset_url, + "--query-img-path", + train_image_path, + "-topk", + "2", + "-s", + "-o", + saved_result_path, + ) + + results = glob(osp.join(saved_result_path, "**", "*"), recursive=True) + + self.assertIn(osp.join(saved_result_path, "train", "1.jpg"), results) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) @scoped def test_can_explore_dataset_wo_target(self):