From 64d9cb07f8d0e08ce9e10a51d3135e44b2a4b4d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Thu, 3 Oct 2024 22:26:28 +0200 Subject: [PATCH] Use CSV to load bz2 file --- Gemfile | 1 + Gemfile.lock | 34 +++++++++++------------ datasources/schemas/tags/base.i18n.json | 10 +++++++ datasources/schemas/tags/base.schema.json | 6 +++- datasources/sources/csv.rb | 17 ++++++++---- 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/Gemfile b/Gemfile index 4967318..774b6c9 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,7 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" } ruby '>= 3' gem 'activesupport' +gem 'bzip2-ffi' gem 'http' gem 'json' gem 'jsonpath' diff --git a/Gemfile.lock b/Gemfile.lock index 5768e74..cac30e8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -24,13 +24,15 @@ GEM ast (2.4.2) base64 (0.2.0) bigdecimal (3.1.7) + bzip2-ffi (1.1.1) + ffi (~> 1.0) commander (4.6.0) highline (~> 2.0.0) concurrent-ruby (1.2.3) connection_pool (2.4.1) domain_name (0.6.20240107) drb (2.2.1) - erubi (1.12.0) + erubi (1.13.0) ffi (1.16.3) ffi-compiler (1.3.2) ffi (>= 1.15.5) @@ -79,8 +81,7 @@ GEM racc polyfill (1.9.0) power_assert (2.0.3) - prettier_print (1.2.1) - prism (0.24.0) + prism (1.1.0) psych (5.1.2) stringio public_suffix (5.0.4) @@ -88,8 +89,8 @@ GEM rack (3.0.9.1) rainbow (3.1.1) rake (13.1.0) - rbi (0.1.9) - prism (>= 0.18.0, < 0.25) + rbi (0.2.1) + prism (~> 1.0) sorbet-runtime (>= 0.5.9204) regexp_parser (2.9.0) rexml (3.2.6) @@ -141,40 +142,39 @@ GEM sorbet-runtime (= 0.5.11295) sorbet-struct-comparable (1.3.0) sorbet-runtime (>= 0.5) - spoom (1.2.4) + spoom (1.5.0) erubi (>= 1.10.0) + prism (>= 0.28.0) sorbet-static-and-runtime (>= 0.5.10187) - syntax_tree (>= 6.1.1) thor (>= 0.19.2) stringio (3.1.0) - syntax_tree (6.2.0) - prettier_print (>= 1.2.0) - tapioca (0.12.0) + tapioca (0.16.3) bundler (>= 2.2.25) netrc (>= 0.11.0) parallel (>= 1.21.0) - rbi (>= 0.1.4, < 0.2) - sorbet-static-and-runtime (>= 0.5.10820) - spoom (~> 1.2.0, >= 1.2.0) + rbi (~> 0.2) + sorbet-static-and-runtime (>= 0.5.11087) + spoom (>= 1.2.0) thor (>= 1.2.0) yard-sorbet test-unit (3.6.2) power_assert - thor (1.3.1) + thor (1.3.2) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.5.0) yaml (0.3.0) yard (0.9.36) - yard-sorbet (0.8.1) - sorbet-runtime (>= 0.5) - yard (>= 0.9) + yard-sorbet (0.9.0) + sorbet-runtime + yard PLATFORMS x86_64-linux DEPENDENCIES activesupport + bzip2-ffi http json json-schema diff --git a/datasources/schemas/tags/base.i18n.json b/datasources/schemas/tags/base.i18n.json index 37c6e22..aee5262 100644 --- a/datasources/schemas/tags/base.i18n.json +++ b/datasources/schemas/tags/base.i18n.json @@ -165,7 +165,17 @@ "@default:full": { "fr": "wifi" } + }, + "no": { + "@default:full": { + "fr": "pas d'accès à internet" + } } } + }, + "brand": { + "@default": { + "fr": "enseigne" + } } } diff --git a/datasources/schemas/tags/base.schema.json b/datasources/schemas/tags/base.schema.json index c87861f..2a42b35 100644 --- a/datasources/schemas/tags/base.schema.json +++ b/datasources/schemas/tags/base.schema.json @@ -119,8 +119,12 @@ }, "internet_access": { "enum": [ - "wlan" + "wlan", + "no" ] + }, + "brand": { + "type": "string" } }, "$defs": { diff --git a/datasources/sources/csv.rb b/datasources/sources/csv.rb index 5852718..0821e27 100644 --- a/datasources/sources/csv.rb +++ b/datasources/sources/csv.rb @@ -3,6 +3,7 @@ require 'csv' require 'http' +require 'bzip2/ffi' require 'active_support/all' require 'sorbet-runtime' @@ -13,8 +14,9 @@ class CsvSource < Source class Settings < Source::SourceSettings const :url, String - const :col_sep, String - const :id, String + const :uncompress, T.nilable(String) + const :col_sep, String, default: ',' + const :id, T::Array[String] const :lon, String const :lat, String const :timestamp, String @@ -29,7 +31,12 @@ def fetch(url, col_sep) raise [url, resp].inspect end - CSV.parse(resp.body.to_s, headers: true, col_sep: col_sep, quote_char: nil).each(&:to_h) + reader = resp.body.to_s + if @settings.uncompress == 'bz2' + reader = Bzip2::FFI::Reader.read(StringIO.new(reader)) + end + + CSV.parse(reader, headers: true, col_sep: col_sep, quote_char: nil).each(&:to_h) end def each @@ -37,11 +44,11 @@ def each end def map_id(feat) - feat[@settings.id].to_i + @settings.id.collect{ |id| feat[id] }.join(',') end def map_updated_at(feat) - feat[@settings.timestamp] + feat[@settings.timestamp] || '1970-01-01' end def map_geometry(feat)