Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mise-à-jour des fixtures Validata en préparation de changements cassants #4141

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions apps/shared/lib/validation/tableschema_validator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ defmodule Shared.Validation.TableSchemaValidator do
"""
import Transport.Shared.Schemas
@behaviour Shared.Validation.TableSchemaValidator.Wrapper

@timeout 180_000
@max_nb_errors 100
@validata_web_url URI.parse("https://validata.fr/table-schema")
@validata_api_url URI.parse("https://api.validata.etalab.studio/validate")
# https://git.opendatafrance.net/validata/validata-core/-/blob/75ee5258010fc43b6a164122eff2579c2adc01a7/validata_core/helpers.py#L152
@structure_tags ["#head", "#structure"]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

J'ai fusionné les tags "#head" et "#header", et conservé uniquement "#header" qui est déjà présent dans frictionless

# https://gitlab.com/validata-table/validata-table/-/blob/main/src/validata_core/domain/helpers.py#L57
@structure_tags MapSet.new(["#structure", "#header"])

@impl true
def validate(schema_name, url, schema_version \\ "latest") when is_binary(schema_name) and is_binary(url) do
Expand Down Expand Up @@ -70,33 +72,23 @@ defmodule Shared.Validation.TableSchemaValidator do
|> URI.to_string()
end

defp build_report(
%{"report" => %{"tasks" => tasks}, "_meta" => %{"validata-table-version" => validata_version}} = payload
) do
if Enum.count(tasks) != 1 do
raise "tasks should have a length of 1 for response #{payload}"
end

raw_errors = hd(tasks)["errors"]
# We count the errors on our side, because the error count given by the report can be wrong
# see https://git.opendatafrance.net/validata/validata-core/-/issues/37
nb_errors = Enum.count(raw_errors)
defp build_report(%{
"report" => %{"valid" => valid, "stats" => %{"errors" => nb_errors}, "errors" => errors},
"version" => validata_version
}) do
{structure_errors, row_errors} = Enum.split_with(errors, &structure_error?/1)

{row_errors, structure_errors} =
raw_errors |> Enum.split_with(&MapSet.disjoint?(MapSet.new(&1["tags"]), MapSet.new(@structure_tags)))

structure_errors = structure_errors |> Enum.map(&~s(#{&1["name"]} : #{&1["message"]}))
structure_errors = Enum.map(structure_errors, & &1["message"])

row_errors =
row_errors
|> Enum.map(fn row ->
~s(#{row["name"]} : colonne #{row["fieldName"]}, ligne #{row["rowPosition"]}. #{row["message"]})
Enum.map(row_errors, fn row ->
~s(#{row["message"]} Colonne `#{row["fieldName"]}`, ligne #{row["rowNumber"]}.)
end)

errors = (structure_errors ++ row_errors) |> Enum.take(100)
errors = (structure_errors ++ row_errors) |> Enum.take(@max_nb_errors)

%{
"has_errors" => nb_errors > 0,
"has_errors" => not valid,
"errors_count" => nb_errors,
"errors" => errors,
"validator" => __MODULE__,
Expand All @@ -106,6 +98,14 @@ defmodule Shared.Validation.TableSchemaValidator do

defp build_report(_), do: nil

defp structure_error?(%{"tags" => tags, "type" => type} = _row) do
has_structure_tags = not MapSet.disjoint?(MapSet.new(tags), @structure_tags)
# May not need to rely on error type in the future.
# https://gitlab.com/validata-table/validata-table/-/issues/154
eligible_error_type = type in ["check-error"]
has_structure_tags or eligible_error_type
end

defp ensure_schema_is_tableschema!(schema_name) do
unless Enum.member?(tableschema_names(), schema_name) do
raise "#{schema_name} is not a tableschema"
Expand Down

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion apps/shared/test/fixtures/validata_with_errors.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion apps/shared/test/fixtures/validata_with_file_error.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"_meta":{"args":{"schema":"https:\/\/schema.data.gouv.fr\/schemas\/etalab\/schema-irve\/latest\/schema.json","url":"https:\/\/www.data.gouv.fr\/fr\/datasets\/r\/099eb6ff-bcf4-42be-bda7-61dfe1ca4c9f"},"validata-table-version":"0.6.1","validata-core-version":"0.8.3"},"error":{"message":"impossible de lire le contenu","name":"source-error"}}
{"schema":"https://schema.data.gouv.fr/schemas/etalab/schema-irve/latest/schema.json","url":"https://www.data.gouv.fr/fr/datasets/r/099eb6ff-bcf4-42be-bda7-61dfe1ca4c9f","options":{"ignore_header_case":"false"},"error":{"message":"JSON non valide ; Expecting value: line 1 column 1 (char 0)","type":"json-format-error"}}
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Je réalise qu'il manque ici "version" et "date" comme j'avais l'intention de les ajouter. Ca sera le premier patch pour la v0.12.1 !

2 changes: 1 addition & 1 deletion apps/shared/test/fixtures/validata_with_no_errors.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"_meta":{"args":{"schema":"https:\/\/schema.data.gouv.fr\/schemas\/etalab\/schema-lieux-covoiturage\/0.2.4\/schema.json","url":"https:\/\/gist.githubusercontent.com\/AntoineAugusti\/59889a3e82ecfcdcc40cddaf8bfbd8cc\/raw\/4cd50116e4b8232be0a86136f57153992f737306\/covoit_invalide.csv"},"validata-table-version":"0.6.1","validata-core-version":"0.8.4"},"report":{"date":"2022-04-28T13:08:04.054599+00:00","errors":[],"stats":{"errors":1,"tasks":1},"tasks":[{"errors":[{"cell":"lundi \u00e0 dimanche","cells":["76217-C-001","None","Gare SNCF de Dieppe","2 Boulevard Georges Clemenceau","Dieppe","76217","Parking","2019-06-25","True","217602176","1.081183","49.921823","20","2","None","lundi \u00e0 dimanche","Ville de Dieppe","True","Correspondance avec la ligne TER Dieppe-Rouen"],"code":"opening-hours-value","description":"","fieldName":"horaires","fieldNumber":16,"fieldPosition":16,"message":"La valeur 'lundi \u00e0 dimanche' n'est pas une d\u00e9finition d'horaire d'ouverture correcte.\n\n Celle-ci doit respecter la sp\u00e9cification [OpenStreetMap](https:\/\/wiki.openstreetmap.org\/wiki\/Key:opening_hours) de description d'horaires d'ouverture.","name":"Horaires d'ouverture incorrects","note":"","rowNumber":1,"rowPosition":2,"tags":["#body"]}],"partial":false,"resource":{"data":[["id_lieu","id_local","nom_lieu","ad_lieu","com_lieu","insee","type","date_maj","ouvert","source","Xlong","Ylat","nbre_pl","nbre_pmr","duree","horaires","proprio","lumiere","comm"],["76217-C-001","","Gare SNCF de Dieppe","2 Boulevard Georges Clemenceau","Dieppe","76217","Parking","2019-06-25","true","217602176","1.081183","49.921823","20","2","","lundi \u00e0 dimanche","Ville de Dieppe","true","Correspondance avec la ligne TER Dieppe-Rouen"]],"format":"inline","hashing":"md5","layout":{"limitRows":100000},"name":"memory","profile":"tabular-data-resource","schema":{"$schema":"https:\/\/frictionlessdata.io\/schemas\/table-schema.json","author":"Antoine Augusti pour Etalab","contact":"contact@transport.beta.gouv.fr","contributors":[{"email":"contact@transport.beta.gouv.fr","organisation":"transport.data.gouv.fr","role":"contributor","title":"Miryad Ali et Francis Chabouis"},{"email":"ishan@beta.gouv.fr","organisation":"transport.data.gouv.fr","role":"contributor","title":"Ishan Bhojwani"},{"email":"antoine.augusti@data.gouv.fr","organisation":"Etalab","role":"contributor","title":"Antoine Augusti"},{"email":"loic.hay@gmail.com ","organisation":"Open Data France","role":"contributor","title":"Lo\u00efc Hay"},{"email":"sophie.raspail@ct-corse.fr","organisation":"Agence d'Am\u00e9nagement et d'Urbanisme de Corse","role":"contributor","title":"Sophie Raspail"},{"email":"","organisation":"","role":"contributor","title":"Tristan Roussel"},{"email":"johan.richer@jailbreak.paris","organisation":"Jailbreak","role":"contributor","title":"Johan Richer"}],"countryCode":"FR","created":"2019-06-25","custom_checks":[{"name":"french-siren-value","params":{"column":"source"}},{"name":"opening-hours-value","params":{"column":"horaires"}}],"description":"Sp\u00e9cification des lieux permettant le covoiturage","example":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.4\/exemple-valide.csv","fields":[{"constraints":{"pattern":"^([013-9]\\d|2[AB1-9])\\d{3}-C-\\d{3}$","required":true},"description":"Identifiant du lieu de covoiturage, d\u00e9livr\u00e9 par le point d'acc\u00e8s national selon la r\u00e8gle INSEE-C-XXX o\u00f9 INSEE est le code INSEE de la commune et XXX est le num\u00e9ro d\u2019ordre d'arriv\u00e9e dans la base sur 3 chiffres, commen\u00e7ant par 001","example":"35238-C-001 pour la premi\u00e8re aire r\u00e9f\u00e9renc\u00e9e dans la commune de code INSEE 35238","name":"id_lieu","type":"string"},{"constraints":{"required":false},"description":"Identifiant du lieu de covoiturage fix\u00e9 par le producteur de la donn\u00e9e pour son propre usage","example":"23X01","name":"id_local","type":"string"},{"constraints":{"required":true},"description":"Le nom du lieu de covoiturage. Recommandation : inutile de r\u00e9p\u00e9ter la nature du type de covoiturage","example":"Les Romains","name":"nom_lieu","type":"string"},{"constraints":{"required":false},"description":"L'adresse du lieu compr\u00e9hensible par le grand public pour assurer la coordination entre le passager et le conducteur. Exemple : \"3, rue de la gare\" ; pour les lieux proches des sorties d'autoroute ou de nationale : \"A11 sortie 7 Le Mans Nord\" ; pour les zones rurales sans adresse : \"croisement de route 1 - route 2\" ou \"le long de route X apr\u00e8s le passage \u00e0 niveau\"","example":"3, rue de la Gare","name":"ad_lieu","type":"string"},{"constraints":{"required":false},"description":"La commune \/ le lieu-dit du covoiturage","example":"Rouen","name":"com_lieu","type":"string"},{"constraints":{"pattern":"^([013-9]\\d|2[AB1-9])\\d{3}$","required":true},"description":"Le code INSEE de la commune d'implantation","example":"76540","name":"insee","type":"string"},{"constraints":{"enum":["Aire de covoiturage","Sortie d'autoroute","Parking","Supermarch\u00e9","Parking relais","D\u00e9laiss\u00e9 routier","Auto-stop"],"required":true},"description":"Le type de lieu de covoiturage","example":"Parking","name":"type","type":"string"},{"constraints":{"required":true},"description":"Date de derni\u00e8re mise \u00e0 jour des donn\u00e9es. Notation ISO 8601, format AAAA-MM-DD","example":"2016-10-31","format":"%Y-%m-%d","name":"date_maj","type":"date"},{"constraints":{"required":true},"description":"Le lieu est il actuellement accessible (actif ou inactif)","example":"true","name":"ouvert","type":"boolean"},{"constraints":{"pattern":"^\\d{9}$","required":false},"description":"SIREN de l'entit\u00e9 ayant fourni la donn\u00e9e","example":"225300011","name":"source","type":"string"},{"constraints":{"maximum":180,"minimum":-180,"required":true},"description":"La longitude en degr\u00e9s d\u00e9cimaux (point comme s\u00e9parateur d\u00e9cimal, avec au moins 4 chiffres apr\u00e8s le point d\u00e9cimal) de la localisation de l\u2019entr\u00e9e du lieu de covoiturage exprim\u00e9e dans le syst\u00e8me de coordonn\u00e9es WGS84","example":"1.452323","name":"Xlong","type":"number"},{"constraints":{"maximum":90,"minimum":-90,"required":true},"description":"La latitude en degr\u00e9s d\u00e9cimaux (point comme s\u00e9parateur d\u00e9cimal, avec au moins 4 chiffres apr\u00e8s le point d\u00e9cimal) de la localisation de l\u2019entr\u00e9e du lieu de covoiturage exprim\u00e9e dans le syst\u00e8me de coordonn\u00e9es WGS84","example":"46.59698","name":"Ylat","type":"number"},{"constraints":{"minimum":0,"required":false},"description":"Le nombre de places r\u00e9serv\u00e9es au stationnement disponibles","example":"42","name":"nbre_pl","type":"integer"},{"constraints":{"minimum":0,"required":false},"description":"Le nombre de places PMR disponibles","example":"3","name":"nbre_pmr","type":"integer"},{"constraints":{"minimum":0,"required":false},"description":"S'il existe une restriction sur la dur\u00e9e de stationnement autoris\u00e9e, la dur\u00e9e maximale de stationnement autoris\u00e9e exprim\u00e9e en minutes","example":"60","name":"duree","type":"integer"},{"constraints":{"required":false},"description":"Ce champ permet de renseigner, si l'information est connue, les jours et horaires d'ouverture de l'\u00e9quipement","example":"Mo-Fr 08:00-20:00","name":"horaires","type":"string"},{"constraints":{"required":false},"description":"Le nom de l'am\u00e9nageur, c'est-\u00e0-dire de l'entit\u00e9 publique ou priv\u00e9e propri\u00e9taire des infrastructures","example":"D\u00e9partement","name":"proprio","type":"string"},{"constraints":{"required":false},"description":"Un \u00e9clairage nocturne est-il pr\u00e9sent","example":false,"name":"lumiere","type":"boolean"},{"constraints":{"required":false},"description":"Commentaires \u00e9ventuels sur les commodit\u00e9s mises \u00e0 disposition du grand public comme : le num\u00e9ro de t\u00e9l\u00e9phone unique qui indique les services disponibles au moment de l'arriv\u00e9e sur l'aire pour r\u00e9aliser le dernier kilom\u00e8tre ; la pr\u00e9sence de prises 220V ou USB ; acc\u00e8s \u00e0 du r\u00e9seau (t\u00e9l\u00e9com, WiFi) ; sanitaires ; intermodalit\u00e9 en transports","example":"Pr\u00e9sence de sanitaires et acc\u00e8s \u00e0 de l'eau courante","name":"comm","type":"string"}],"homepage":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage","keywords":["covoiturage","transport","mobilit\u00e9"],"licenses":[{"name":"etalab-2.0","path":"https:\/\/www.etalab.gouv.fr\/licence-ouverte-open-licence","title":"Licence Ouverte"}],"missingValues":[""],"primaryKey":"id_lieu","resources":[{"name":"exemple-valide","path":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.2\/exemple-valide.csv","title":"Ressource valide"},{"name":"exemple-invalide","path":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.2\/exemple-invalide.csv","title":"Ressource invalide"}],"title":"Lieux de covoiturage","updated":"2022-04-13","uri":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.4\/schema.json","version":"0.2.4"},"scheme":"","stats":{"bytes":0,"fields":19,"hash":"","rows":1}},"scope":["hash-count-error","byte-count-error","field-count-error","row-count-error","blank-header","extra-label","missing-label","blank-label","duplicate-label","incorrect-label","blank-row","primary-key-error","foreign-key-error","extra-cell","missing-cell","type-error","constraint-error","unique-error"],"stats":{"errors":1},"structure_warnings":[],"time":0.026,"valid":false}],"time":0.026,"valid":false,"version":"4.18.2"}}
{"schema":"https://schema.data.gouv.fr/schemas/etalab/schema-lieux-covoiturage/0.2.4/schema.json","url":"https://gist.githubusercontent.com/AntoineAugusti/59889a3e82ecfcdcc40cddaf8bfbd8cc/raw/4cd50116e4b8232be0a86136f57153992f737306/covoit_invalide.csv","options":{"ignore_header_case":"false"},"date":"2024-08-23T12:02:37.487948+00:00","version":"0.12.0","report":{"valid":false,"stats":{"errors":1,"warnings":0,"seconds":0.04,"fields":19,"rows":1,"rows_processed":1},"warnings":[],"errors":[{"message":"La valeur 'lundi à dimanche' n'est pas une définition d'horaire d'ouverture correcte.\n\n Celle-ci doit respecter la spécification [OpenStreetMap](https://wiki.openstreetmap.org/wiki/Key:opening_hours) de description d'horaires d'ouverture.","type":"opening-hours-value","tags":["#body"],"rowNumber":2,"fieldName":"horaires","fieldNumber":16,"cell":"lundi à dimanche"}]}}
Loading