From 972f2fc03b2169619b29c66237dcb0be245812f2 Mon Sep 17 00:00:00 2001 From: JonathanFrey2003 <186038973+JonathanFrey2003@users.noreply.github.com> Date: Thu, 21 Nov 2024 09:27:17 +0100 Subject: [PATCH] Revert "Merge remote-tracking branch 'origin/DF-24-Notebook-DatalandAPI' into DF-29-provide-test-data" This reverts commit eaaa88f7e0782ddc00aa240aca08db00070a1973, reversing changes made to 06e633b2fea61e168686c4f08f3cf06754572399. --- notebooks/DF-24_Notebook-API.ipynb | 222 ------------------ .../dataland/dataland_client.py | 5 - src/dataland_qa_lab/dataland/get_data.py | 19 -- 3 files changed, 246 deletions(-) delete mode 100644 notebooks/DF-24_Notebook-API.ipynb delete mode 100644 src/dataland_qa_lab/dataland/get_data.py diff --git a/notebooks/DF-24_Notebook-API.ipynb b/notebooks/DF-24_Notebook-API.ipynb deleted file mode 100644 index 46df6cd..0000000 --- a/notebooks/DF-24_Notebook-API.ipynb +++ /dev/null @@ -1,222 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "ac4a575e", - "metadata": {}, - "outputs": [], - "source": [ - "%run base.ipynb" - ] - }, - { - "cell_type": "markdown", - "id": "ec394145", - "metadata": {}, - "source": [ - "# EPIC 1 - User Story DF-24\n", - "## Erstellung eines Notebooks und aufrufen der Daten per Dataland-API\n", - "\n", - "Als QA-Lab-Team möchten wir ein Notebook erstellen, mit dem wir eine Anfrage an die Dataland-API senden, sodass wir anhand der Berichts-ID den Unternehmensbericht erhalten.\n", - "\n", - "Beschreibung:
\n", - "Nutzen von Dataland Endpoint /data/nuclear-and-gas/companies/{companyId} im Nuclear & Gas Controller. Die Funktionalität wird außerhalb des Notebooks implementiert.\n", - "\n", - "Akzeptanzkriterien:
\n", - " - Notebook ist auf jedem Laptop ausführbar
\n", - " - Anfrage an Dataland gibt Daten zurück" - ] - }, - { - "cell_type": "markdown", - "id": "6b79b78e", - "metadata": {}, - "source": [ - "## 1.Schritt: Dataset mit Hilfe der Company_ID über die Dataland_API aufrufen\n", - "Mithilfe der Company_ID wird die Dataland_API aufgerufen. Nachdem Eintragen aus welcher Periode das Dataset sein soll, ist es möglich den Wert 1 zu erlangen." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cf87363e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "No\n" - ] - } - ], - "source": [ - "import dataland_qa_lab.dataland.get_data as qa\n", - "\n", - "company_id = \"9f6f6cea-7316-4101-aba9-170023c811e8\"\n", - "year = \"2020\"\n", - "\n", - "value1 = qa._get_data_set_by_year_(company_id=company_id, year=year)\n", - "print(value1)" - ] - }, - { - "cell_type": "markdown", - "id": "c42cdbaa", - "metadata": {}, - "source": [ - "## 2.Schritt: Laden aller Daten\n", - "Hier werden die Daten von Dataland geladen und alle 6 Werte gesondert gespeichert." - ] - }, - { - "cell_type": "markdown", - "id": "a4031931", - "metadata": {}, - "source": [ - "from dataland_qa_lab.utils import config\n", - "\n", - "conf = config.get_config()\n", - "dataland_client = conf.dataland_client\n", - "\n", - "api = dataland_client.eu_taxonomy_nuclear_and_gas_api\n", - "dataset = api.get_all_company_nuclear_and_gas_data(company_id=company_id)\n", - "\n", - "# Eintragen aus welcher Periode man das Dataset haben will -> In diesem Fall 2024\n", - "data_id = \"test\"\n", - "for t in range(len(dataset)):\n", - " if (dataset[t].meta_info.reporting_period == year):\n", - " data_id = dataset[t].meta_info.data_id\n", - " break\n", - "\n", - "data = dataland_client.eu_taxonomy_nuclear_and_gas_api.get_company_associated_nuclear_and_gas_data(data_id=data_id)\n", - "\n", - "wert1 = data.data.general.general.nuclear_energy_related_activities_section426\n", - "wert2 = data.data.general.general.nuclear_energy_related_activities_section427\n", - "wert3 = data.data.general.general.nuclear_energy_related_activities_section428\n", - "wert4 = data.data.general.general.fossil_gas_related_activities_section429\n", - "wert5 = data.data.general.general.fossil_gas_related_activities_section430\n", - "wert6 = data.data.general.general.fossil_gas_related_activities_section431\n", - "print(data)\n", - "print(wert1)\n", - "print(wert2)\n", - "print(wert3)\n", - "print(wert4)\n", - "print(wert5)\n", - "print(wert6)" - ] - }, - { - "cell_type": "markdown", - "id": "fe1a2009", - "metadata": {}, - "source": [ - "## 3.Schritt: Datenquelle laden\n", - "In diesem Fall wird die Dateireferenz von wert1 genutzt um das dazugehörige Dokument zu bekommen." - ] - }, - { - "cell_type": "markdown", - "id": "c9cf6625", - "metadata": {}, - "source": [ - "# datenQuelle = wert1.data_source.file_reference\n", - "# print(datenQuelle)\n", - "\n", - "document_bytes = dataland_client.documents_api.get_document(wert1.data_source.file_reference)" - ] - }, - { - "cell_type": "markdown", - "id": "13580b00", - "metadata": {}, - "source": [ - "## 4.Schritt: Daten aus dem Dokument extrahieren und in Text umwandeln" - ] - }, - { - "cell_type": "markdown", - "id": "a9846d68", - "metadata": {}, - "source": [ - "import io\n", - "\n", - "import pypdf\n", - "\n", - "full_document_byte_stream = io.BytesIO(document_bytes)\n", - "full_pdf = pypdf.PdfReader(full_document_byte_stream)\n", - "\n", - "partial_document_byte_stream = io.BytesIO()\n", - "partial_pdf = pypdf.PdfWriter()\n", - "\n", - "partial_pdf.add_page(full_pdf.get_page(int(wert1.data_source.page) - 1)) # Correct for 0 offset\n", - "partial_pdf.write(partial_document_byte_stream)\n", - "partial_document_byte_stream.seek(0)\n", - "None" - ] - }, - { - "cell_type": "markdown", - "id": "2da4ca74", - "metadata": {}, - "source": [ - "from azure.ai.documentintelligence import DocumentIntelligenceClient\n", - "from azure.ai.documentintelligence.models import AnalyzeResult, ContentFormat\n", - "from azure.core.credentials import AzureKeyCredential\n", - "\n", - "docintel_cred = AzureKeyCredential(conf.azure_docintel_api_key)\n", - "document_intelligence_client = DocumentIntelligenceClient(\n", - " endpoint=conf.azure_docintel_endpoint, credential=docintel_cred\n", - ")\n", - "\n", - "poller = document_intelligence_client.begin_analyze_document(\n", - " \"prebuilt-layout\",\n", - " analyze_request=partial_document_byte_stream,\n", - " content_type=\"application/octet-stream\",\n", - " output_content_format=ContentFormat.MARKDOWN,\n", - ")\n", - "result: AnalyzeResult = poller.result()" - ] - }, - { - "cell_type": "markdown", - "id": "4d28e7c3", - "metadata": {}, - "source": [ - "#### Das Ergebnis wird als Markdown direkt im Notebook wiedergegeben" - ] - }, - { - "cell_type": "markdown", - "id": "f3ba48bf", - "metadata": {}, - "source": [ - "from IPython.display import Markdown, display\n", - "\n", - "display(Markdown(result.content))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/src/dataland_qa_lab/dataland/dataland_client.py b/src/dataland_qa_lab/dataland/dataland_client.py index be8a4e9..7454cef 100644 --- a/src/dataland_qa_lab/dataland/dataland_client.py +++ b/src/dataland_qa_lab/dataland/dataland_client.py @@ -44,11 +44,6 @@ def eu_taxonomy_nf_api(self) -> dataland_backend.EutaxonomyNonFinancialsDataCont """Function to run the eu-taxonomy-non-financials-data-controller API.""" return dataland_backend.EutaxonomyNonFinancialsDataControllerApi(self.backend_client) - @property - def eu_taxonomy_nuclear_and_gas_api(self) -> dataland_backend.NuclearAndGasDataControllerApi: - """Function to run the eu-taxonomy-nuclear-and-gas-data-controller API.""" - return dataland_backend.NuclearAndGasDataControllerApi(self.backend_client) - @property def documents_client(self) -> dataland_documents.ApiClient: """Retrieves the client for accessing the documents API.""" diff --git a/src/dataland_qa_lab/dataland/get_data.py b/src/dataland_qa_lab/dataland/get_data.py deleted file mode 100644 index 7486009..0000000 --- a/src/dataland_qa_lab/dataland/get_data.py +++ /dev/null @@ -1,19 +0,0 @@ -from dataland_qa_lab.utils import config - - -def _get_data_set_by_year_(company_id: str, year: str) -> str: - conf = config.get_config() - dataland_client = conf.dataland_client - - api = dataland_client.eu_taxonomy_nuclear_and_gas_api - dataset = api.get_all_company_nuclear_and_gas_data(company_id=company_id) - - # Eintragen aus welcher Periode man das Dataset haben will -> In diesem Fall 2024 - data_id = "test" - for t in range(len(dataset)): - if dataset[t].meta_info.reporting_period == year: - data_id = dataset[t].meta_info.data_id - break - data = dataland_client.eu_taxonomy_nuclear_and_gas_api.get_company_associated_nuclear_and_gas_data(data_id=data_id) - value1 = data.data.general.general.nuclear_energy_related_activities_section426.value.value - return value1