Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Outil amélioré de comptage de l'évolution des points de charge IRVE #4335

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
111 changes: 111 additions & 0 deletions livebook/irve-count-faster.livemd
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Suivi du nombre d'IRVE (version accélérée)

```elixir
Mix.install([
{:req, "~> 0.5.7"},
{:nimble_csv, "~> 1.2"},
{:kino, "~> 0.14.2"},
{:explorer, "~> 0.10.0"},
{:kino_vega_lite, "~> 0.1.11"},
{:kino_explorer, "~> 0.1.20"}
])
```

## Analyse

Je pars de la ressource [Fichier consolidé des bornes de recharge pour véhicule électrique](https://transport.data.gouv.fr/datasets/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques).

Je récupére l'identifiant du dataset (`118`) et je m'appuie sur l'url qui retourne l'historique.

Reesource pour filtre:

* https://transport.data.gouv.fr/resources/81623

```elixir
Code.require_file(__DIR__ <> "/../apps/shared/lib/req_custom_cache.ex")

require Explorer.DataFrame

defmodule HTTPQuery do
def get!(url) do
%{status: 200, body: data} = Req.get!(url)
data
end

def cache_dir, do: Path.join(__DIR__, "../cache-dir")

def cached_get!(url) do
req = Req.new() |> Transport.Shared.ReqCustomCache.attach()
# avoid decoding, for much faster processing
Req.get!(req, url: url, receive_timeout: 100_000, custom_cache_dir: cache_dir(), decode_body: false)
end
end

defmodule Stats do
def get_versions_data(headers, rows) do
rows
|> build_list_of_maps(headers)
|> remove_json_rows()
|> prepare_date_field()
|> pick_first_row_by_month()
end

def build_list_of_maps(rows, headers), do: rows |> Enum.map(&(headers |> Enum.zip(&1) |> Map.new()))
def remove_json_rows(rows), do: rows |> Enum.reject(fn(row) -> row["permanent_url"] =~ ~r/\.json$/ end)

def prepare_date_field(rows) do
rows
|> Enum.map(fn row -> Map.update!(row, "inserted_at", fn(x) ->
String.slice(x, 0..9) |> Date.from_iso8601!()
end) end )
end

def pick_first_row_by_month(rows) do
rows
|> Enum.group_by(fn(x) -> Map.fetch!(x, "inserted_at") |> to_string() |> String.slice(0..6) end)
|> Enum.map(fn({_k,v}) -> v |> Enum.sort_by(fn(x) -> x["inserted_at"] end) |> List.first end)
end
end

history_url = "https://transport.data.gouv.fr/datasets/118/resources_history_csv"
[headers | rows] = HTTPQuery.get!(history_url)
data = Stats.get_versions_data(headers, rows)

data = data
|> Task.async_stream(fn(row) ->
try do
%{status: 200, body: body} = row["permanent_url"] |> HTTPQuery.cached_get!()
[headers | rows] = body |> String.split("\n")
headers = headers |> String.split(",")
{true, pdc_count} = {"id_pdc_itinerance" in headers, rows |> length}
row
|> Map.put("pdc_count", pdc_count)
|> Map.drop(["payload", "permanent_url"])
rescue
_x ->
# IO.inspect _x
nil
end
end, timeout: 100_000, max_concurrency: 10)
|> Enum.map(fn({:ok, row}) -> row end)
|> Enum.reject(&is_nil(&1))

:ok

```

```elixir
data
|> Enum.map(fn(x) -> Map.take(x, ["inserted_at", "pdc_count"]) end)
|> Enum.sort_by(&(&1["inserted_at"] |> to_string()), :desc)
|> Kino.DataTable.new()

```

```elixir
VegaLite.new(width: 750, height: 500)
|> VegaLite.data_from_values(data, only: ["inserted_at", "pdc_count"])
|> VegaLite.mark(:area)
|> VegaLite.encode_field(:x, "inserted_at", type: :temporal, time_unit: "yearmonth", axis: [format: "%Y-%m", label_angle: -45])
|> VegaLite.encode_field(:y, "pdc_count", type: :quantitative)
```
Loading