Skip to content

Commit

Permalink
refactor(ETL): Utilser QuerySet et Serializer pour publier les cantin…
Browse files Browse the repository at this point in the history
…es dans Metabase (#4884)

Co-authored-by: Raphaël Odini <raphodn@users.noreply.github.com>
  • Loading branch information
qloridant and raphodn authored Jan 16, 2025
1 parent ee09a82 commit fe674ff
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 38 deletions.
1 change: 1 addition & 0 deletions api/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
ElectedCanteenSerializer,
MinimalCanteenSerializer,
CanteenSummarySerializer,
CanteenMetabaseSerializer,
)
from .diagnostic import ( # noqa: F401
ManagerDiagnosticSerializer,
Expand Down
128 changes: 128 additions & 0 deletions api/serializers/canteen.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from drf_base64.fields import Base64ImageField
from rest_framework import serializers

from data.department_choices import get_lib_department_from_code
from data.models import Canteen, CanteenImage, Diagnostic, Sector
from data.region_choices import get_lib_region_from_code
from macantine.etl.utils import SECTEURS_SPE

from .diagnostic import (
ApproDiagnosticSerializer,
Expand Down Expand Up @@ -580,3 +583,128 @@ class Meta:
"yearly_meal_count",
"sectors",
)


class CanteenMetabaseSerializer(serializers.ModelSerializer):
nom = serializers.SerializerMethodField()
code_insee_commune = serializers.SerializerMethodField()
libelle_commune = serializers.SerializerMethodField()
departement = serializers.SerializerMethodField()
departement_lib = serializers.SerializerMethodField()
region = serializers.SerializerMethodField()
region_lib = serializers.SerializerMethodField()
nbre_repas_jour = serializers.SerializerMethodField()
nbre_repas_an = serializers.SerializerMethodField()
modele_economique = serializers.SerializerMethodField()
type_gestion = serializers.SerializerMethodField()
type_production = serializers.SerializerMethodField()
nombre_satellites = serializers.SerializerMethodField()
siret_cuisine_centrale = serializers.SerializerMethodField()
ministere_tutelle = serializers.SerializerMethodField()
secteur = serializers.SerializerMethodField()
categorie = serializers.SerializerMethodField()
spe = serializers.SerializerMethodField()
date_creation = serializers.SerializerMethodField()
date_modification = serializers.SerializerMethodField()

class Meta:
model = Canteen
fields = (
"id",
"nom",
"siret",
"code_insee_commune",
"libelle_commune",
"departement",
"departement_lib",
"region",
"region_lib",
"date_creation",
"date_modification",
"nbre_repas_jour",
"nbre_repas_an",
"modele_economique",
"type_gestion",
"type_production",
"nombre_satellites",
"siret_cuisine_centrale",
"ministere_tutelle",
"secteur",
"categorie",
"spe",
)
read_only_fields = fields

def get_nom(self, obj):
return obj.name

def get_code_insee_commune(self, obj):
return obj.city_insee_code

def get_libelle_commune(self, obj):
return obj.city

def get_departement(self, obj):
return obj.department

def get_departement_lib(self, obj):
return get_lib_department_from_code(obj.department)

def get_region(self, obj):
return obj.region

def get_region_lib(self, obj):
return get_lib_region_from_code(obj.region)

def get_date_creation(self, obj):
return obj.creation_date.strftime('"%Y-%m-%d"')

def get_date_modification(self, obj):
return obj.modification_date.strftime('"%Y-%m-%d"')

def get_nbre_repas_jour(self, obj):
return obj.daily_meal_count

def get_nbre_repas_an(self, obj):
return obj.yearly_meal_count

def get_modele_economique(self, obj):
if obj.economic_model:
return Canteen.EconomicModel(obj.economic_model).label
else:
return "inconnu"

def get_type_gestion(self, obj):
if obj.management_type:
return Canteen.ManagementType(obj.management_type).label
else:
return "inconnu"

def get_type_production(self, obj):
if obj.production_type:
return Canteen.ProductionType(obj.production_type).label
else:
return "inconnu"

def get_nombre_satellites(self, obj):
return obj.satellite_canteens_count

def get_siret_cuisine_centrale(self, obj):
return obj.central_producer_siret

def get_ministere_tutelle(self, obj):
return obj.line_ministry

def get_secteur(self, obj):
sectors = [sector.name for sector in obj.sectors.all()]
return ",".join(sectors)

def get_categorie(self, obj):
categories = [sector.category for sector in obj.sectors.all()]
return ",".join(categories)

def get_spe(self, obj):
if obj.sectors.filter(pk__in=SECTEURS_SPE):
return "Oui"
else:
return "Non"
10 changes: 10 additions & 0 deletions api/views/canteen.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
)
from api.serializers import (
CanteenActionsSerializer,
CanteenMetabaseSerializer,
CanteenPreviewSerializer,
CanteenStatusSerializer,
CanteenSummarySerializer,
Expand Down Expand Up @@ -1332,3 +1333,12 @@ def get(self, request, format=None):
}
)
return Response(ministries)


class MetabaseListView(ListAPIView):
serializer_class = CanteenMetabaseSerializer
filter_backends = [django_filters.DjangoFilterBackend]
ordering_fields = ["creation_date"]

def get_queryset(self):
return Canteen.objects.all()
4 changes: 4 additions & 0 deletions data/department_choices.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from django.db import models


def get_lib_department_from_code(department: str) -> str:
return Department(department).label.split(" - ")[1]


class Department(models.TextChoices):
ain = "01", "01 - Ain"
aisne = "02", "02 - Aisne"
Expand Down
4 changes: 4 additions & 0 deletions data/region_choices.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from django.db import models


def get_lib_region_from_code(region: str) -> str:
return Region(region).label.split(" - ")[1]


class Region(models.TextChoices):
guadeloupe = "01", "01 - Guadeloupe"
martinique = "02", "02 - Martinique"
Expand Down
50 changes: 14 additions & 36 deletions macantine/etl/analysis.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import json
import logging
import re
import time

import numpy as np
import pandas as pd

from api.views.canteen import MetabaseListView
from macantine.etl import etl, utils
from macantine.etl.data_ware_house import DataWareHouse
from macantine.utils import CAMPAIGN_DATES
Expand Down Expand Up @@ -315,45 +317,21 @@ class ETL_ANALYSIS_CANTEEN(etl.CANTEENS, ANALYSIS):

def __init__(self):
super().__init__()

self.extracted_table_name = "canteens"
self.warehouse = DataWareHouse()
self.schema = json.load(open("data/schemas/schema_analysis_cantines.json"))

# The following mapper is used for renaming columns and for selecting the columns to extract from db
self.columns_mapper = {
"id": "id",
"name": "nom",
"siret": "siret",
"city_insee_code": "code_insee_commune",
"city": "libelle_commune",
"department": "departement",
"region": "region",
"creation_date": "date_creation",
"modification_date": "date_modification",
"daily_meal_count": "nbre_repas_jour",
"yearly_meal_count": "nbre_repas_an",
"economic_model": "modele_economique",
"management_type": "type_gestion",
"production_type": "type_production",
"satellite_canteens_count": "nombre_satellites",
"central_producer_siret": "siret_cuisine_centrale",
"line_ministry": "ministere_tutelle",
"sectors": "secteur",
}
self.columns = self.columns_mapper.keys()
def extract_dataset(self):
start = time.time()
open_data_view = MetabaseListView()
queryset = open_data_view.get_queryset()
serializer = open_data_view.get_serializer_class()
canteens = serializer(queryset, many=True).data
self.df = pd.DataFrame(canteens)
end = time.time()
logger.info(f"Time spent on canteens extraction : {end - start}")

def transform_dataset(self):
logger.info("Filling geo names")
self.fill_geo_names()
self.columns_mapper["department_lib"] = "departement_lib"

# Extract the sector names and categories
logger.info("Canteens : Extract sectors and SPE...")
self.df = utils.extract_sectors(self.df, extract_spe=True, split_category_and_sector=True, only_one_value=True)

self.df = self.df.rename(columns={"categories": "categorie"})
self.df = self.df.rename(columns=self.columns_mapper)

logger.info("Canteens : Clean dataset")
self._clean_dataset()
# Calling this method is still needed to respect the structure of the code
# TODO : Make it possible to stop calling transform_dataset()
logger.info("No more transformation needed here !")
4 changes: 3 additions & 1 deletion macantine/etl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pandas as pd
import requests

from api.serializers import SectorSerializer
from data.models import Sector, Teledeclaration
from macantine.utils import CAMPAIGN_DATES

Expand Down Expand Up @@ -205,6 +204,9 @@ def map_sectors():
"""
Populate the details of a sector, given its id
"""

from api.serializers import SectorSerializer # avoid circular import

sectors = Sector.objects.all()
sectors_mapper = {}
for sector in sectors:
Expand Down
2 changes: 1 addition & 1 deletion macantine/tests/test_etl_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_transformed_dataset_match_schema(self):
self.assertEqual(first_canteen["departement_lib"], "Finistère")
self.assertEqual(first_canteen["region_lib"], "Bretagne")
self.assertEqual(first_canteen["secteur"], "Sector factory")
self.assertEqual(first_canteen["spe"], False)
self.assertEqual(first_canteen["spe"], "Non")


class TestETLAnalysisTD(TestCase):
Expand Down

0 comments on commit fe674ff

Please sign in to comment.