diff --git a/notebooks/provide_test_data.ipynb b/notebooks/provide_test_data.ipynb deleted file mode 100644 index 1795abd..0000000 --- a/notebooks/provide_test_data.ipynb +++ /dev/null @@ -1,108 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "%run base.ipynb\n", - "import json\n", - "from pathlib import Path\n", - "\n", - "from dataland_backend.models.company_associated_data_nuclear_and_gas_data import ( # type: ignore\n", - " CompanyAssociatedDataNuclearAndGasData,\n", - ")\n", - "\n", - "from dataland_qa_lab.utils import config\n", - "\n", - "pdf_path = Path(\"../data/pdfs/\")\n", - "json_path = Path(\"../data/jsons/\")\n", - "\n", - "conf = config.get_config()\n", - "dataland_client = conf.dataland_client\n", - "\n", - "# list of companies to test\n", - "companies = [\"concordia\", \"covestro\", \"deka\", \"enbw\", \"enel\", \"eon\", \"iberdrola\", \"munichre\", \"rwe\", \"total\"]\n", - "# list of ids of corresponding pdfs\n", - "pdfs = [\n", - " \"0a8eebb9e32d3c0a32a1083699352018afcbbe39458ab8441cd0c8985a466a59\",\n", - " \"ebff9ec3cf12e715cb6ee1c55a1295656a87e1716a9b536b4fbf2a1b9312260c\",\n", - " \"b31dfa1143e9e518cfdacd95b2d4f6c531e50bc33c0dabbbe35cccfe14dd83f3\",\n", - " \"9c0a555a29683aedd2cd50ff7e837181a7fbb2d1c567d336897e2356fc17a595\",\n", - " \"a58354fd0d2969d7c3161d6ba273c9ba4814866c0fc8ec0e220dc4ee6e87753c\",\n", - " \"4abdfd0764559831fdd2e972abab0f34bc7300c650f6f789beea10ecb7d20251\",\n", - " \"3305bd49f340b73919de891d166f7492cd61f59a9efdc1b84a0720db1f846fc2\",\n", - " \"e974e3f3675386f17b67af4a5b03ee5a0a313c4d0b07d719c2cf5cb715ccbeb3\",\n", - " \"eb119227edc8c66d672785619522cd6045b2faf37e63796207799c0e40fa66be\",\n", - " \"dba48e9f5e7e6fc9862dd95159960eb2a270d6975f2457f443ca422e7449e7d6\",\n", - "]\n", - "for company, pdf_id in zip(companies, pdfs, strict=False):\n", - "\n", - " # if needed upload pdf file to dataland\n", - " if not dataland_client.documents_api.get_document(document_id=pdf_id):\n", - " pdf_file_path = pdf_path / f\"{company}.pdf\"\n", - " pdf_content = pdf_file_path.read_bytes()\n", - "\n", - " print(dataland_client.documents_api.post_document(document=pdf_content))\n", - "\n", - " # get companyIDs of company to test\n", - " if company == \"eon\":\n", - " dataset = dataland_client.company_api.get_companies_by_search_string(search_string=\"E.ON SE\", result_limit=1)\n", - " elif company == \"munichre\":\n", - " dataset = dataland_client.company_api.get_companies_by_search_string(\n", - " search_string=\"Münchener Rückversicherungs-Gesellschaft Aktiengesellschaft in München\", result_limit=1\n", - " )\n", - " else:\n", - " dataset = dataland_client.company_api.get_companies_by_search_string(search_string=company, result_limit=1)\n", - "\n", - " company_id = dataset[0].company_id\n", - "\n", - " # change companyID in json file\n", - " json_file_path = json_path / f\"{company}.json\"\n", - "\n", - " with json_file_path.open(encoding=\"utf-8\") as f:\n", - " json_data = json.load(f)\n", - " json_data[\"companyId\"] = company_id\n", - " json_str = json.dumps(json_data, indent=4)\n", - " json_file_path.write_text(json_str, encoding=\"utf-8\")\n", - " written_data = json_file_path.read_text(encoding=\"utf-8\")\n", - "\n", - " # if needed upload document\n", - " if not dataland_client.eu_taxonomy_nuclear_and_gas_api.get_all_company_nuclear_and_gas_data(company_id=company_id):\n", - " nuclear_and_gas_data = CompanyAssociatedDataNuclearAndGasData.from_json(json_str)\n", - " print(\n", - " dataland_client.eu_taxonomy_nuclear_and_gas_api.post_company_associated_nuclear_and_gas_data(\n", - " company_associated_data_nuclear_and_gas_data=nuclear_and_gas_data\n", - " )\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/dataland_qa_lab/dataland/dataland_client.py b/src/dataland_qa_lab/dataland/dataland_client.py index 7454cef..be8a4e9 100644 --- a/src/dataland_qa_lab/dataland/dataland_client.py +++ b/src/dataland_qa_lab/dataland/dataland_client.py @@ -44,6 +44,11 @@ def eu_taxonomy_nf_api(self) -> dataland_backend.EutaxonomyNonFinancialsDataCont """Function to run the eu-taxonomy-non-financials-data-controller API.""" return dataland_backend.EutaxonomyNonFinancialsDataControllerApi(self.backend_client) + @property + def eu_taxonomy_nuclear_and_gas_api(self) -> dataland_backend.NuclearAndGasDataControllerApi: + """Function to run the eu-taxonomy-nuclear-and-gas-data-controller API.""" + return dataland_backend.NuclearAndGasDataControllerApi(self.backend_client) + @property def documents_client(self) -> dataland_documents.ApiClient: """Retrieves the client for accessing the documents API.""" diff --git a/src/dataland_qa_lab/dataland/upload_test_data.py b/src/dataland_qa_lab/dataland/upload_test_data.py new file mode 100644 index 0000000..8f7d918 --- /dev/null +++ b/src/dataland_qa_lab/dataland/upload_test_data.py @@ -0,0 +1,73 @@ +import json +from pathlib import Path + +from dataland_backend.models.company_associated_data_nuclear_and_gas_data import ( + CompanyAssociatedDataNuclearAndGasData, +) + +from dataland_qa_lab.utils import config + + +def upload_test_data() -> bool: + """Function to upload 10 test cases for EU Taxonomy Nuclear and Gas to Dataland.""" + pdf_path = Path("../data/pdfs/") + json_path = Path("../data/jsons/") + + conf = config.get_config() + dataland_client = conf.dataland_client + + # list of companies to test + companies = ["concordia", "covestro", "deka", "enbw", "enel", "eon", "iberdrola", "munichre", "rwe", "total"] + # list of ids of corresponding pdfs + pdfs = [ + "0a8eebb9e32d3c0a32a1083699352018afcbbe39458ab8441cd0c8985a466a59", + "ebff9ec3cf12e715cb6ee1c55a1295656a87e1716a9b536b4fbf2a1b9312260c", + "b31dfa1143e9e518cfdacd95b2d4f6c531e50bc33c0dabbbe35cccfe14dd83f3", + "9c0a555a29683aedd2cd50ff7e837181a7fbb2d1c567d336897e2356fc17a595", + "a58354fd0d2969d7c3161d6ba273c9ba4814866c0fc8ec0e220dc4ee6e87753c", + "4abdfd0764559831fdd2e972abab0f34bc7300c650f6f789beea10ecb7d20251", + "3305bd49f340b73919de891d166f7492cd61f59a9efdc1b84a0720db1f846fc2", + "e974e3f3675386f17b67af4a5b03ee5a0a313c4d0b07d719c2cf5cb715ccbeb3", + "eb119227edc8c66d672785619522cd6045b2faf37e63796207799c0e40fa66be", + "dba48e9f5e7e6fc9862dd95159960eb2a270d6975f2457f443ca422e7449e7d6", + ] + for company, pdf_id in zip(companies, pdfs, strict=False): + # if needed upload pdf file to dataland + if not dataland_client.documents_api.get_document(document_id=pdf_id): + pdf_file_path = pdf_path / f"{company}.pdf" + pdf_content = pdf_file_path.read_bytes() + + dataland_client.documents_api.post_document(document=pdf_content) + + # get companyIDs of company to test + if company == "eon": + dataset = dataland_client.company_api.get_companies_by_search_string( + search_string="E.ON SE", result_limit=1 + ) + elif company == "munichre": + dataset = dataland_client.company_api.get_companies_by_search_string( + search_string="Münchener Rückversicherungs-Gesellschaft Aktiengesellschaft in München", result_limit=1 + ) + else: + dataset = dataland_client.company_api.get_companies_by_search_string(search_string=company, result_limit=1) + + company_id = dataset[0].company_id + + # change companyID in json file + json_file_path = json_path / f"{company}.json" + + with json_file_path.open(encoding="utf-8") as f: + json_data = json.load(f) + json_data["companyId"] = company_id + json_str = json.dumps(json_data, indent=4) + json_file_path.write_text(json_str, encoding="utf-8") + + # if needed upload document + if not dataland_client.eu_taxonomy_nuclear_and_gas_api.get_all_company_nuclear_and_gas_data( + company_id=company_id + ): + nuclear_and_gas_data = CompanyAssociatedDataNuclearAndGasData.from_json(json_str) + dataland_client.eu_taxonomy_nuclear_and_gas_api.post_company_associated_nuclear_and_gas_data( + company_associated_data_nuclear_and_gas_data=nuclear_and_gas_data, bypass_qa=True + ) + return True diff --git a/tests/dataland/test_upload_test_data.py b/tests/dataland/test_upload_test_data.py new file mode 100644 index 0000000..0db28cf --- /dev/null +++ b/tests/dataland/test_upload_test_data.py @@ -0,0 +1,6 @@ +from src.dataland_qa_lab.dataland import upload_test_data + + +def test_upload_test_data() -> None: + uploaded_test_data = upload_test_data() + assert uploaded_test_data is True