From 972f2fc03b2169619b29c66237dcb0be245812f2 Mon Sep 17 00:00:00 2001
From: JonathanFrey2003 <186038973+JonathanFrey2003@users.noreply.github.com>
Date: Thu, 21 Nov 2024 09:27:17 +0100
Subject: [PATCH] Revert "Merge remote-tracking branch
'origin/DF-24-Notebook-DatalandAPI' into DF-29-provide-test-data"
This reverts commit eaaa88f7e0782ddc00aa240aca08db00070a1973, reversing
changes made to 06e633b2fea61e168686c4f08f3cf06754572399.
---
notebooks/DF-24_Notebook-API.ipynb | 222 ------------------
.../dataland/dataland_client.py | 5 -
src/dataland_qa_lab/dataland/get_data.py | 19 --
3 files changed, 246 deletions(-)
delete mode 100644 notebooks/DF-24_Notebook-API.ipynb
delete mode 100644 src/dataland_qa_lab/dataland/get_data.py
diff --git a/notebooks/DF-24_Notebook-API.ipynb b/notebooks/DF-24_Notebook-API.ipynb
deleted file mode 100644
index 46df6cd..0000000
--- a/notebooks/DF-24_Notebook-API.ipynb
+++ /dev/null
@@ -1,222 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "ac4a575e",
- "metadata": {},
- "outputs": [],
- "source": [
- "%run base.ipynb"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ec394145",
- "metadata": {},
- "source": [
- "# EPIC 1 - User Story DF-24\n",
- "## Erstellung eines Notebooks und aufrufen der Daten per Dataland-API\n",
- "\n",
- "Als QA-Lab-Team möchten wir ein Notebook erstellen, mit dem wir eine Anfrage an die Dataland-API senden, sodass wir anhand der Berichts-ID den Unternehmensbericht erhalten.\n",
- "\n",
- "Beschreibung:
\n",
- "Nutzen von Dataland Endpoint /data/nuclear-and-gas/companies/{companyId} im Nuclear & Gas Controller. Die Funktionalität wird außerhalb des Notebooks implementiert.\n",
- "\n",
- "Akzeptanzkriterien:
\n",
- " - Notebook ist auf jedem Laptop ausführbar
\n",
- " - Anfrage an Dataland gibt Daten zurück"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6b79b78e",
- "metadata": {},
- "source": [
- "## 1.Schritt: Dataset mit Hilfe der Company_ID über die Dataland_API aufrufen\n",
- "Mithilfe der Company_ID wird die Dataland_API aufgerufen. Nachdem Eintragen aus welcher Periode das Dataset sein soll, ist es möglich den Wert 1 zu erlangen."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "cf87363e",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "No\n"
- ]
- }
- ],
- "source": [
- "import dataland_qa_lab.dataland.get_data as qa\n",
- "\n",
- "company_id = \"9f6f6cea-7316-4101-aba9-170023c811e8\"\n",
- "year = \"2020\"\n",
- "\n",
- "value1 = qa._get_data_set_by_year_(company_id=company_id, year=year)\n",
- "print(value1)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c42cdbaa",
- "metadata": {},
- "source": [
- "## 2.Schritt: Laden aller Daten\n",
- "Hier werden die Daten von Dataland geladen und alle 6 Werte gesondert gespeichert."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a4031931",
- "metadata": {},
- "source": [
- "from dataland_qa_lab.utils import config\n",
- "\n",
- "conf = config.get_config()\n",
- "dataland_client = conf.dataland_client\n",
- "\n",
- "api = dataland_client.eu_taxonomy_nuclear_and_gas_api\n",
- "dataset = api.get_all_company_nuclear_and_gas_data(company_id=company_id)\n",
- "\n",
- "# Eintragen aus welcher Periode man das Dataset haben will -> In diesem Fall 2024\n",
- "data_id = \"test\"\n",
- "for t in range(len(dataset)):\n",
- " if (dataset[t].meta_info.reporting_period == year):\n",
- " data_id = dataset[t].meta_info.data_id\n",
- " break\n",
- "\n",
- "data = dataland_client.eu_taxonomy_nuclear_and_gas_api.get_company_associated_nuclear_and_gas_data(data_id=data_id)\n",
- "\n",
- "wert1 = data.data.general.general.nuclear_energy_related_activities_section426\n",
- "wert2 = data.data.general.general.nuclear_energy_related_activities_section427\n",
- "wert3 = data.data.general.general.nuclear_energy_related_activities_section428\n",
- "wert4 = data.data.general.general.fossil_gas_related_activities_section429\n",
- "wert5 = data.data.general.general.fossil_gas_related_activities_section430\n",
- "wert6 = data.data.general.general.fossil_gas_related_activities_section431\n",
- "print(data)\n",
- "print(wert1)\n",
- "print(wert2)\n",
- "print(wert3)\n",
- "print(wert4)\n",
- "print(wert5)\n",
- "print(wert6)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "fe1a2009",
- "metadata": {},
- "source": [
- "## 3.Schritt: Datenquelle laden\n",
- "In diesem Fall wird die Dateireferenz von wert1 genutzt um das dazugehörige Dokument zu bekommen."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c9cf6625",
- "metadata": {},
- "source": [
- "# datenQuelle = wert1.data_source.file_reference\n",
- "# print(datenQuelle)\n",
- "\n",
- "document_bytes = dataland_client.documents_api.get_document(wert1.data_source.file_reference)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "13580b00",
- "metadata": {},
- "source": [
- "## 4.Schritt: Daten aus dem Dokument extrahieren und in Text umwandeln"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a9846d68",
- "metadata": {},
- "source": [
- "import io\n",
- "\n",
- "import pypdf\n",
- "\n",
- "full_document_byte_stream = io.BytesIO(document_bytes)\n",
- "full_pdf = pypdf.PdfReader(full_document_byte_stream)\n",
- "\n",
- "partial_document_byte_stream = io.BytesIO()\n",
- "partial_pdf = pypdf.PdfWriter()\n",
- "\n",
- "partial_pdf.add_page(full_pdf.get_page(int(wert1.data_source.page) - 1)) # Correct for 0 offset\n",
- "partial_pdf.write(partial_document_byte_stream)\n",
- "partial_document_byte_stream.seek(0)\n",
- "None"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "2da4ca74",
- "metadata": {},
- "source": [
- "from azure.ai.documentintelligence import DocumentIntelligenceClient\n",
- "from azure.ai.documentintelligence.models import AnalyzeResult, ContentFormat\n",
- "from azure.core.credentials import AzureKeyCredential\n",
- "\n",
- "docintel_cred = AzureKeyCredential(conf.azure_docintel_api_key)\n",
- "document_intelligence_client = DocumentIntelligenceClient(\n",
- " endpoint=conf.azure_docintel_endpoint, credential=docintel_cred\n",
- ")\n",
- "\n",
- "poller = document_intelligence_client.begin_analyze_document(\n",
- " \"prebuilt-layout\",\n",
- " analyze_request=partial_document_byte_stream,\n",
- " content_type=\"application/octet-stream\",\n",
- " output_content_format=ContentFormat.MARKDOWN,\n",
- ")\n",
- "result: AnalyzeResult = poller.result()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "4d28e7c3",
- "metadata": {},
- "source": [
- "#### Das Ergebnis wird als Markdown direkt im Notebook wiedergegeben"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "f3ba48bf",
- "metadata": {},
- "source": [
- "from IPython.display import Markdown, display\n",
- "\n",
- "display(Markdown(result.content))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": ".venv",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/src/dataland_qa_lab/dataland/dataland_client.py b/src/dataland_qa_lab/dataland/dataland_client.py
index be8a4e9..7454cef 100644
--- a/src/dataland_qa_lab/dataland/dataland_client.py
+++ b/src/dataland_qa_lab/dataland/dataland_client.py
@@ -44,11 +44,6 @@ def eu_taxonomy_nf_api(self) -> dataland_backend.EutaxonomyNonFinancialsDataCont
"""Function to run the eu-taxonomy-non-financials-data-controller API."""
return dataland_backend.EutaxonomyNonFinancialsDataControllerApi(self.backend_client)
- @property
- def eu_taxonomy_nuclear_and_gas_api(self) -> dataland_backend.NuclearAndGasDataControllerApi:
- """Function to run the eu-taxonomy-nuclear-and-gas-data-controller API."""
- return dataland_backend.NuclearAndGasDataControllerApi(self.backend_client)
-
@property
def documents_client(self) -> dataland_documents.ApiClient:
"""Retrieves the client for accessing the documents API."""
diff --git a/src/dataland_qa_lab/dataland/get_data.py b/src/dataland_qa_lab/dataland/get_data.py
deleted file mode 100644
index 7486009..0000000
--- a/src/dataland_qa_lab/dataland/get_data.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from dataland_qa_lab.utils import config
-
-
-def _get_data_set_by_year_(company_id: str, year: str) -> str:
- conf = config.get_config()
- dataland_client = conf.dataland_client
-
- api = dataland_client.eu_taxonomy_nuclear_and_gas_api
- dataset = api.get_all_company_nuclear_and_gas_data(company_id=company_id)
-
- # Eintragen aus welcher Periode man das Dataset haben will -> In diesem Fall 2024
- data_id = "test"
- for t in range(len(dataset)):
- if dataset[t].meta_info.reporting_period == year:
- data_id = dataset[t].meta_info.data_id
- break
- data = dataland_client.eu_taxonomy_nuclear_and_gas_api.get_company_associated_nuclear_and_gas_data(data_id=data_id)
- value1 = data.data.general.general.nuclear_energy_related_activities_section426.value.value
- return value1