Skip to content

Commit

Permalink
improve(Imports de masse): Rendre le header obligatoire pour les acha…
Browse files Browse the repository at this point in the history
…ts (#4794)
  • Loading branch information
qloridant authored Dec 20, 2024
1 parent eb53535 commit cdc88ef
Show file tree
Hide file tree
Showing 18 changed files with 166 additions and 33 deletions.
2 changes: 1 addition & 1 deletion api/tests/files/bad_purchase_import.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
canteen_siret,description,provider,date,price_ht,famille,characteristics,local_definition
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
,Empty siret,Le bon traiteur,2022-05-02,90.11,PRODUITS_LAITIERS,"BIO,LOCAL",DEPARTMENT
86180597100897,Non-existent canteen,Le bon traiteur,2022-05-02,90.11,PRODUITS_LAITIERS,"BIO,LOCAL",DEPARTMENT
36462492895701,Don't manage canteen,Le bon traiteur,2022-05-02,90.11,PRODUITS_LAITIERS,"BIO,LOCAL",DEPARTMENT
Expand Down
2 changes: 1 addition & 1 deletion api/tests/files/comma_separated_purchase_import.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
canteen siret,description,fournisseur,date,prix HT,famille,caractéristiques,definition de local
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
82399356058716,"Pommes, rouges",Le bon traiteur ,2022-05-02,"90,11", PRODUITS_LAITIERS ,"BIO, LOCAL", DEPARTMENT
2 changes: 1 addition & 1 deletion api/tests/files/corrupt_purchase_import.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"cantine SIRET,""description"",""fournisseur"",""date"",""prix HT"",""famille"",""caractéristiques"",""definition de local"""
"siret,""description"",""fournisseur"",""date"",""prix_ht"",""famille_produits"",""caracteristiques"",""definition_local"""
"82399356058716,""weird formatting"",""Le bon traiteur"",""2022-05-02"",""90.11"",""PRODUITS_LAITIERS"",""LOCAL"",""REGION"""
2 changes: 1 addition & 1 deletion api/tests/files/good_purchase_import.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
canteen siret,description,fournisseur,date,prix HT,famille,caractéristiques,definition de local
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
82399356058716,"Pommes, rouges",Le bon traiteur ,2022-05-02,90.11, PRODUITS_LAITIERS ,"BIO, LOCAL", DEPARTMENT
82399356058716,"Pommes, vertes",Le bon traiteur ,2022-05-03,910.11, PRODUITS_LAITIERS ,"BIO, LOCAL", DEPARTMENT
2 changes: 1 addition & 1 deletion api/tests/files/good_purchase_import_no_local_def.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
canteen siret,description,fournisseur,date,prix HT,famille,caractéristiques
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
82399356058716,"Pommes, rouges",Le bon traiteur ,2022-05-02,90.11, PRODUITS_LAITIERS ,"BIO"
82399356058716,"Pommes, vertes",Le bon traiteur ,2022-05-03,910.11, PRODUITS_LAITIERS ,"BIO"
2 changes: 2 additions & 0 deletions api/tests/files/no_header_purchase_import.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
82399356058716,"Pommes, rouges",Le bon traiteur ,2022-05-02,90.11, PRODUITS_LAITIERS ,"BIO, LOCAL", DEPARTMENT
82399356058716,"Pommes, vertes",Le bon traiteur ,2022-05-03,910.11, PRODUITS_LAITIERS ,"BIO, LOCAL", DEPARTMENT
2 changes: 1 addition & 1 deletion api/tests/files/purchase_encoding_iso-8859-1.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
canteen siret,description,fournisseur,date,prix HT,famille,caracteristiques,definition de local
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
82399356058716,deuxi�me pomme,Le bon traiteur ,2022-05-02,90.11, PRODUITS_LAITIERS ,"BIO, LOCAL", DEPARTMENT
2 changes: 1 addition & 1 deletion api/tests/files/purchase_import_delimiter_semicolon.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
canteen siret; description;fournisseur; date; prix HT;famille; caractéristiques; definition de local
siret;description;fournisseur;date;prix_ht;famille_produits;caracteristiques;definition_local
82399356058716;"Pommes vertes"; Le bon traiteur;2022-05-02; 90.11; PRODUITS_LAITIERS;"BIO, LOCAL"; DEPARTMENT
4 changes: 2 additions & 2 deletions api/tests/files/purchase_import_delimiter_tab.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
canteen siret description fournisseur date prix HT famille caractéristiques definition de local
82399356058716 Pommes vertes Le bon traiteur 2022-05-02 90.11 PRODUITS_LAITIERS BIO, LOCAL DEPARTMENT
siret description fournisseur date prix_ht famille_produits caracteristiques definition_local
82399356058716 Pommes vertes Le bon traiteur 2022-05-02 90.11 PRODUITS_LAITIERS BIO, LOCAL DEPARTMENT
2 changes: 1 addition & 1 deletion api/tests/files/purchase_many_errors.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
canteen_siret,description,provider,date,price_ht,famille,characteristics,local_definition
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
,Empty siret,Le bon traiteur,2022-05-02,90.11,PRODUITS_LAITIERS,"BIO,LOCAL",DEPARTMENT
86180597100897,Non-existent canteen,Le bon traiteur,2022-05-02,90.11,PRODUITS_LAITIERS,"BIO,LOCAL",DEPARTMENT
36462492895701,Don't manage canteen,Le bon traiteur,2022-05-02,90.11,PRODUITS_LAITIERS,"BIO,LOCAL",DEPARTMENT
Expand Down
2 changes: 1 addition & 1 deletion api/tests/files/purchases_floating_number.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
canteen siret,description,fournisseur,date,prix HT,famille,caractéristiques,definition de local
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
82399356058716,"Pommes, rouges",Le bon traiteur ,2022-05-02,90.11000000002, PRODUITS_LAITIERS ,"BIO, LOCAL", DEPARTMENT
26 changes: 24 additions & 2 deletions api/tests/test_import_purchases.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def test_import_batch_purchases(self, _process_chunk_mock):

with open("./api/tests/files/good_purchase_import.csv") as purchase_file:
_ = self.client.post(reverse("import_purchases"), {"file": purchase_file})
self.assertEqual(_process_chunk_mock.call_count, 3)
self.assertEqual(_process_chunk_mock.call_count, 2)

@authenticate
@override_settings(CSV_IMPORT_MAX_SIZE=10)
Expand Down Expand Up @@ -192,7 +192,9 @@ def test_import_corrupt_purchases_file(self):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(Purchase.objects.count(), 0)
errors = response.json()["errors"]
self.assertEqual(errors.pop(0)["message"], "Format fichier : 7-8 colonnes attendues, 1 trouvées.")
self.assertEqual(
errors.pop(0)["message"], "La première ligne du fichier doit contenir les bon noms de colonnes"
)

@authenticate
def test_warn_duplicate_file(self):
Expand Down Expand Up @@ -297,3 +299,23 @@ def test_fail_import_bad_format(self):
first_error["message"],
"Ce fichier est au format application/vnd.oasis.opendocument.spreadsheet, merci d'exporter votre fichier au format CSV et réessayer.",
)

@authenticate
def test_no_header(self):
"""
A file should not be valid if doesn't contain a valid header
"""
canteen = CanteenFactory.create(siret="82399356058716")
canteen.managers.add(authenticate.user)
self.assertEqual(Purchase.objects.count(), 0)

with open("./api/tests/files/no_header_purchase_import.csv", "rb") as diag_file:
response = self.client.post(f"{reverse('import_purchases')}", {"file": diag_file})

self.assertEqual(response.status_code, status.HTTP_200_OK)
body = response.json()
self.assertEqual(body["count"], 0)
self.assertEqual(len(body["errors"]), 1)
self.assertEqual(
body["errors"][0]["message"], "La première ligne du fichier doit contenir les bon noms de colonnes"
)
38 changes: 23 additions & 15 deletions api/views/purchaseimport.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
import hashlib
import io
import json
import logging
import time
import uuid
Expand Down Expand Up @@ -40,6 +41,8 @@ def __init__(self, **kwargs):
self.is_duplicate_file = False
self.duplicate_purchases = []
self.duplicate_purchase_count = 0
self.data_schema = json.load(open("data/schemas/imports/achats.json"))
self.expected_header = [field["name"] for field in self.data_schema["fields"]]
super().__init__(**kwargs)

def post(self, request):
Expand Down Expand Up @@ -99,24 +102,31 @@ def _log_error(self, message, level="warning"):
def _process_file(self):
file_hash = hashlib.md5()
chunk = []
read_header = True
row_count = 1
for row in self.file:
# Sniffing header
if self.dialect is None:
file_hash.update(row)

# Sniffing 1st line
if read_header:
# decode header, discarding encoding result that might not be accurate without more data
(decoded_row, _) = decode_bytes(row)
self.dialect = csv.Sniffer().sniff(decoded_row)

file_hash.update(row)

# Split into chunks
chunk.append(row)

# Process full chunk
if row_count == settings.CSV_PURCHASE_CHUNK_LINES:
self._process_chunk(chunk)
chunk = []
csvreader = csv.reader(io.StringIO("".join(decoded_row)), self.dialect)
for header in csvreader:
if header != self.expected_header:
raise ValidationError("La première ligne du fichier doit contenir les bon noms de colonnes")
read_header = False
row_count = 0
else:
# Split into chunks
chunk.append(row)

# Process full chunk
if row_count == settings.CSV_PURCHASE_CHUNK_LINES:
self._process_chunk(chunk)
chunk = []
row_count = 0
row_count += 1

# Process the last chunk
Expand Down Expand Up @@ -152,9 +162,7 @@ def _process_chunk(self, chunk):
csvreader = csv.reader(io.StringIO("".join(decoded_chunk)), self.dialect)
for row_number, row in enumerate(csvreader, start=1):
siret = None
# If header, pass
if row_number == 1 and row[0].lower().__contains__("siret"):
continue

try:
# first check that the number of columns is good
# to throw error if badly formatted early on.
Expand Down
93 changes: 93 additions & 0 deletions data/schemas/imports/achats.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"encoding": "utf-8",
"fields": [
{
"constraints": {
"required": true
},
"description": "Le SIRET de la cantine ayant réalisé l'achat",
"name": "siret",
"pattern": "^[0-9]{14}$",
"type": "string"
},
{
"constraints": {
"required": true
},
"description": "Une description de l'achat",
"name": "description",
"type": "string"
},
{
"constraints": {
"required": true
},
"description": "Le nom du fournisseur",
"name": "fournisseur",
"type": "string"
},
{
"constraints": {
"required": true
},
"description": "La date de l'achat",
"format": "%Y-%m-%d",
"name": "date",
"type": "date"
},
{
"constraints": {
"required": true
},
"description": "Le prix HT de l'achat",
"name": "prix_ht",
"type": "number"
},
{
"constraints": {
"required": true
},
"description": "La famille de produits de l'achat",
"enum": [
"VIANDES_VOLAILLES",
"CHARCUTERIE",
"PRODUITS_DE_LA_MER",
"FRUITS_ET_LEGUMES",
"PRODUITS_LAITIERS",
"BOULANGERIE",
"BOISSONS",
"AUTRES"
],
"name": "famille_produits",
"type": "string"
},
{
"constraints": {
"pattern": "(?:(?:^|;)(BIO|LABEL_ROUGE|AOCAOP|IGP|STG|HVE|PECHE_DURABLE|RUP|COMMERCE_EQUITABLE|FERMIER|EXTERNALITES|PERFORMANCE|FRANCE|SHORT_DISTRIBUTION|LOCAL))+$",
"required": false
},
"description": "Les caractéristiques de l'achat",
"name": "caracteristiques",
"type": "string"
},
{
"constraints": {
"required": true
},
"description": "La définition de local si l'achat a la caractéristique de LOCAL",
"enum": [
"AUTOUR_SERVICE",
"DEPARTMENT",
"REGION",
"AUTRE"
],
"name": "definition_local",
"type": "string"
}
],
"format": "csv",
"mediatype": "text/csv",
"scheme": "file",
"title": "Schéma import des achats",
"type": "table"
}
16 changes: 12 additions & 4 deletions frontend/src/views/PurchasesImporter.vue
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@
</div>
<h2 class="my-6">Format du fichier</h2>
<p>
Le fichier CSV doit contenir un achat par ligne.
Le fichier CSV doit commencer par une ligne en-tête avec le nom des colonnes exactement comme listé ci-dessous
dans "Titre". Il doit ensuite contenir un achat par ligne.
</p>
<p>Les données doivent être présentées dans l'ordre indiqué ci-dessous.</p>
<h3 class="my-6">Colonnes</h3>
<v-simple-table class="my-6">
<template v-slot:default>
<thead>
<tr>
<th>Colonne</th>
<th>Titre</th>
<th>Champ</th>
<th>Description</th>
<th>Type</th>
Expand All @@ -110,7 +110,7 @@
</thead>
<tbody>
<tr v-for="(field, idx) in documentation" :key="idx">
<td class="text-center">{{ idx + 1 }}</td>
<td>{{ field.title }}</td>
<td>{{ field.name }}</td>
<td v-html="field.description"></td>
<td style="min-width: 150px;">{{ field.type }}</td>
Expand Down Expand Up @@ -171,33 +171,39 @@ export default {
duplicatePurchases: null,
documentation: [
{
title: "siret",
name: "SIRET de la cantine ayant réalisé l'achat",
description: "La cantine avec ce SIRET doit être déjà enregistrée sur notre plateforme.",
type: "14 chiffres, avec ou sans espaces",
example: "000 000 000 00000",
},
{
title: "description",
name: "Description de l'achat",
example: "Pommes de terre",
type: "Texte libre",
},
{
title: "fournisseur",
name: "Fournisseur",
example: "Le traiteur du village",
type: "Texte libre",
},
{
title: "date",
name: "Date d'achat",
type: "Date en format AAAA-MM-JJ",
example: "2022-01-30",
},
{
title: "prix_ht",
name: "Prix HT",
description: numberFormatExample,
type: "Chiffre",
example: "3290.23",
},
{
title: "famille_produits",
name: "Famille de produits",
description: `Options acceptées : ${Object.keys(Constants.ProductFamilies).map(
(x) => " <code>" + x + "</code>"
Expand All @@ -206,6 +212,7 @@ export default {
example: `${Object.keys(Constants.ProductFamilies)[0]}`,
},
{
title: "caracteristiques",
name: "Caractéristiques",
description: `Options acceptées : ${Object.keys(Constants.Characteristics).map(
(x) => " <code>" + x + "</code>"
Expand All @@ -214,6 +221,7 @@ export default {
example: `${Object.keys(Constants.Characteristics)[0]},${Object.keys(Constants.Characteristics)[3]}`,
},
{
title: "definition_local",
name: "Définition de local",
description: `Obligatoire si l'achat a la caractéristique de LOCAL. Options acceptées : ${Object.keys(
Constants.LocalDefinitions
Expand Down
2 changes: 1 addition & 1 deletion web/static/documents/achats_fichier_exemple_ma_cantine.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
canteen siret,description,fournisseur,date,prix HT,famille,caractéristiques,definition de local
siret,description,fournisseur,date,prix_ht,famille_produits,caracteristiques,definition_local
82399356058716,"Pommes, rouges",Le bon traiteur,2022-05-02,90.11,AUTRES,"BIO,LOCAL",DEPARTMENT
Binary file modified web/static/documents/achats_fichier_exemple_ma_cantine.ods
Binary file not shown.
Binary file modified web/static/documents/achats_fichier_exemple_ma_cantine.xlsx
Binary file not shown.

0 comments on commit cdc88ef

Please sign in to comment.