diff --git a/README.md b/README.md index 0d43dfe..1b1b427 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,27 @@ Extension to linkml-runtime for converting between instances of LinkML models and CSVs. This may involve selective normalization/denormalization, plus serialization of selected elements as JSON/YAML This builds on [json-flattener](https://github.com/cmungall/json-flattener) + +## Command Line Usage + +Denormalizing conversion from YAML instance data to TSV + +``` +link-convert \ + -s examples/bookshop.schema.yaml \ + -C Shop \ + -S all_book_series \ + -o examples/shop1.instance.tsv \ + examples/shop1.instance.yaml +``` + +Converting back to YAML/JSON: + +``` +link-convert \ + -s examples/bookshop.schema.yaml \ + -C Shop \ + -S all_book_series \ + -o examples/shop1-troundtrip.instance.json \ + examples/shop1.instance.tsv +``` diff --git a/examples/bookshop.schema.yaml b/examples/bookshop.schema.yaml new file mode 100644 index 0000000..0348bf5 --- /dev/null +++ b/examples/bookshop.schema.yaml @@ -0,0 +1,112 @@ +id: https://w3id.org/example +name: example +description: example +imports: +- linkml:types +prefixes: + linkml: https://w3id.org/linkml/ + example: https://w3id.org/example +default_prefix: example +types: {} +classes: + creative work: + slots: + - id + - name + - genres + - creator + - genres + - summary + - reviews + book: + is_a: creative work + slots: + - price + - inStock + book series: + is_a: creative work + slots: + - books + - genres + - price + author: + slots: + - name + - genres + - from_country + shop: + slots: + - all_book_series + country: + slots: + - name + slot_usage: + name: + identifier: true + review: + slots: + - creator + - rating + - review_text + + +slots: + id: + range: string + identifier: true + examples: + - value: '123' + book_category: + range: string + examples: + - value: book + multivalued: true + name: + range: string + examples: + - value: Consider Phlebas + price: + range: float + examples: + - value: '7.99' + inStock: + range: string + examples: + - value: 'true' + creator: + range: author + examples: + - value: Ian M Banks + genres: + #range: genre_enum + range: string + multivalued: true + from_country: + range: country + books: + range: book + multivalued: true + inlined: true + inlined_as_list: true + all_book_series: + range: book series + multivalued: true + inlined: true + inlined_as_list: true + summary: + reviews: + multivalued: true + range: review + rating: + range: integer + review_text: + +enums: + genre_enum: + permissible_values: + scifi: + fantasy: + western: + romance: + modern: + diff --git a/examples/shop1-troundtrip.instance.json b/examples/shop1-troundtrip.instance.json new file mode 100644 index 0000000..7d5488c --- /dev/null +++ b/examples/shop1-troundtrip.instance.json @@ -0,0 +1,116 @@ +{ + "all_book_series": [ + { + "id": "S001", + "name": "Lord of the Rings", + "genres": [ + "fantasy" + ], + "creator": { + "name": "JRR Tolkein", + "from_country": "England" + }, + "books": [ + { + "id": "S001.1", + "name": "Fellowship of the Ring", + "summary": "Hobbits", + "price": 5.99 + }, + { + "id": "S001.2", + "name": "The Two Towers", + "summary": "More hobbits", + "price": 5.99 + }, + { + "id": "S001.3", + "name": "Return of the King", + "summary": "Yet more hobbits", + "price": 6.99 + } + ] + }, + { + "id": "S002", + "name": "The Culture Series", + "genres": [ + "scifi" + ], + "creator": { + "name": "Ian M Banks", + "from_country": "Scotland" + }, + "books": [ + { + "id": "S002.1", + "name": "Consider Phlebas", + "price": 5.99 + }, + { + "id": "S002.2", + "name": "Player of Games", + "price": 5.99 + } + ] + }, + { + "id": "S003", + "name": "Book of the New Sun", + "genres": [ + "scifi", + "fantasy" + ], + "creator": { + "name": "Gene Wolfe", + "genres": [ + "scifi", + "fantasy" + ], + "from_country": "USA" + }, + "books": [ + { + "id": "S003.1", + "name": "Shadow of the Torturer" + }, + { + "id": "S003.2", + "name": "Claw of the Conciliator", + "price": 6.99 + } + ] + }, + { + "id": "S004", + "name": "Example with single book", + "creator": { + "name": "Ms Writer", + "genres": [ + "romance" + ], + "from_country": "USA" + }, + "books": [ + { + "id": "S004.1", + "name": "Blah" + } + ] + }, + { + "id": "S005", + "name": "Example with no books", + "creator": { + "name": "Mr Unproductive", + "genres": [ + "romance", + "scifi", + "fantasy" + ], + "from_country": "USA" + } + } + ], + "@type": "Shop" +} \ No newline at end of file diff --git a/examples/shop1.instance.tsv b/examples/shop1.instance.tsv new file mode 100644 index 0000000..c1004bd --- /dev/null +++ b/examples/shop1.instance.tsv @@ -0,0 +1,6 @@ +id name genres creator_json creator_name creator_from_country books_json books_summary books_price books_name books_id creator_genres +S001 Lord of the Rings [fantasy] {\"name\": \"JRR Tolkein\", \"from_country\": \"England\"} JRR Tolkein England [{\"id\": \"S001.1\", \"name\": \"Fellowship of the Ring\", \"summary\": \"Hobbits\", \"price\": 5.99}, {\"id\": \"S001.2\", \"name\": \"The Two Towers\", \"summary\": \"More hobbits\", \"price\": 5.99}, {\"id\": \"S001.3\", \"name\": \"Return of the King\", \"summary\": \"Yet more hobbits\", \"price\": 6.99}] [Hobbits|More hobbits|Yet more hobbits] [5.99|5.99|6.99] [Fellowship of the Ring|The Two Towers|Return of the King] [S001.1|S001.2|S001.3] +S002 The Culture Series [scifi] {\"name\": \"Ian M Banks\", \"from_country\": \"Scotland\"} Ian M Banks Scotland [{\"id\": \"S002.1\", \"name\": \"Consider Phlebas\", \"price\": 5.99}, {\"id\": \"S002.2\", \"name\": \"Player of Games\", \"price\": 5.99}] [5.99|5.99] [Consider Phlebas|Player of Games] [S002.1|S002.2] +S003 Book of the New Sun [scifi|fantasy] {\"name\": \"Gene Wolfe\", \"genres\": [\"scifi\", \"fantasy\"], \"from_country\": \"USA\"} Gene Wolfe USA [{\"id\": \"S003.1\", \"name\": \"Shadow of the Torturer\"}, {\"id\": \"S003.2\", \"name\": \"Claw of the Conciliator\", \"price\": 6.99}] [|6.99] [Shadow of the Torturer|Claw of the Conciliator] [S003.1|S003.2] [scifi|fantasy] +S004 Example with single book {\"name\": \"Ms Writer\", \"genres\": [\"romance\"], \"from_country\": \"USA\"} Ms Writer USA [{\"id\": \"S004.1\", \"name\": \"Blah\"}] [Blah] [S004.1] [romance] +S005 Example with no books {\"name\": \"Mr Unproductive\", \"genres\": [\"romance\", \"scifi\", \"fantasy\"], \"from_country\": \"USA\"} Mr Unproductive USA [romance|scifi|fantasy] diff --git a/examples/shop1.instance.yaml b/examples/shop1.instance.yaml new file mode 100644 index 0000000..cf202bf --- /dev/null +++ b/examples/shop1.instance.yaml @@ -0,0 +1,73 @@ +all_book_series: +- id: S001 + name: Lord of the Rings + genres: + - fantasy + creator: + name: JRR Tolkein + from_country: England + books: + - id: S001.1 + name: Fellowship of the Ring + price: 5.99 + summary: Hobbits + - id: S001.2 + name: The Two Towers + price: 5.99 + summary: More hobbits + - id: S001.3 + name: Return of the King + price: 6.99 + summary: Yet more hobbits +- id: S002 + name: The Culture Series + genres: + - scifi + creator: + name: Ian M Banks + from_country: Scotland + books: + - id: S002.1 + name: Consider Phlebas + price: 5.99 + - id: S002.2 + name: Player of Games + price: 5.99 +- id: S003 + name: Book of the New Sun + genres: + - scifi + - fantasy + creator: + name: Gene Wolfe + from_country: USA + genres: + - scifi + - fantasy + books: + - id: S003.1 + name: Shadow of the Torturer + # deliberately omit price information + - id: S003.2 + name: Claw of the Conciliator + price: 6.99 +- id: S004 + name: Example with single book + creator: + name: Ms Writer + from_country: USA + genres: + - romance + books: + - id: S004.1 + name: Blah +- id: S005 + name: Example with no books + creator: + name: Mr Unproductive + from_country: USA + genres: + - romance + - scifi + - fantasy + diff --git a/linkml_csv/utils/converter.py b/linkml_csv/utils/converter.py index 924861e..c9bbc5a 100644 --- a/linkml_csv/utils/converter.py +++ b/linkml_csv/utils/converter.py @@ -1,31 +1,87 @@ import os import re import sys +from types import ModuleType import click +from linkml.generators.yamlgen import YAMLGenerator +from linkml.generators.pythongen import PythonGenerator +from linkml_runtime.utils.compile_python import compile_python -def compare_files(file: str, target: str, comments: str = r'^\s+#.*\n') -> int: - def filtr(txt: str) -> str: - return re.sub(comments, '', txt, flags=re.MULTILINE).strip() - if os.path.exists(target): - with open(target) as oldfile: - oldtext = filtr(oldfile.read()) - else: - oldtext = "" +from linkml_csv.dumpers.csv_dumper import CSVDumper +from linkml_runtime.dumpers.yaml_dumper import YAMLDumper +from linkml_runtime.dumpers.json_dumper import JSONDumper +from linkml_runtime.dumpers.dumper_root import Dumper +from linkml_csv.loaders.csv_loader import CSVLoader +from linkml_runtime.loaders.yaml_loader import YAMLLoader +from linkml_runtime.loaders.json_loader import JSONLoader +from linkml_runtime.loaders.loader_root import Loader + +dumpers_loaders = { + 'tsv': (CSVDumper, CSVLoader), + 'csv': (CSVDumper, CSVLoader), + 'yaml': (YAMLDumper, YAMLLoader), + 'json': (JSONDumper, JSONLoader), +} + +def make_python(schema) -> ModuleType: + """ + Note: if you change the yaml schema and associated test instance objects, + you may need to run this test twice + """ + pstr = str(PythonGenerator(schema, mergeimports=True).serialize()) + m = compile_python(pstr) + return m + +def _get_format(input: str, input_format: str =None): + if input_format is None: + _, ext = os.path.splitext(input) + if ext is not None: + input_format = ext.replace('.', '') + else: + raise Exception(f'Must pass format option OR use known file suffix: {input}') + return input_format.lower() - with open(file) as newfile: - newtext = filtr(newfile.read()) - return int(oldtext == newtext) +def _is_xsv(fmt: str) -> bool: + return fmt == 'csv' or fmt == 'tsv' + +def get_loader(fmt: str) -> Loader: + return dumpers_loaders[fmt][1]() +def get_dumper(fmt: str) -> Loader: + return dumpers_loaders[fmt][0]() @click.command() -@click.argument("file1", type=click.Path(exists=True, dir_okay=False)) -@click.argument("file2", type=click.Path(dir_okay=False)) -@click.option("-c", "--comments", help="Comments regexp", default="^#.*$", show_default=True) -def cli(file1, file2, comments) -> None: - """ Compare file1 to file2 using a filter """ - sys.exit(compare_files(file1, file2, comments)) +@click.option("--output", "-o") +@click.option("--input-format", "-f") +@click.option("--output-format", "-t") +@click.option("--schema", "-s") +@click.option("--index-slot", "-S", required=True) +@click.option("--target-class", "-C") +@click.argument("input") +def cli(input, output=None, input_format=None, output_format=None, index_slot=None, schema=None, target_class=None) -> None: + """ + Converts to/from TSV to rich LinkML instance format (JSON/YAML/RDF) + """ + print(f'IN={input}') + python_module = make_python(schema) + target_class = python_module.__dict__[target_class] + schema = YAMLGenerator(schema).schema + input_format = _get_format(input, input_format) + output_format = _get_format(output, output_format) + loader = get_loader(input_format) + dumper = get_dumper(output_format) + + if _is_xsv(input_format): + obj = loader.load(source=input, target_class=target_class, schema=schema, index_slot=index_slot) + else: + obj = loader.load(source=input, target_class=target_class) + print(f'Obj={obj}') + if _is_xsv(output_format): + obj = dumper.dump(obj, output, schema=schema, index_slot=index_slot) + else: + obj = dumper.dump(obj, output) if __name__ == '__main__': diff --git a/setup.cfg b/setup.cfg index 6ad664c..1d49a16 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,4 +34,4 @@ packages = [entry_points] console_scripts = - comparefiles = linkml_csv.utils.converter:cli + lconvert = linkml_csv.utils.converter:cli