Skip to content

Commit

Permalink
Use CSV to load bz2 file
Browse files Browse the repository at this point in the history
  • Loading branch information
frodrigo committed Oct 3, 2024
1 parent f0c4282 commit 64d9cb0
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 23 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" }
ruby '>= 3'

gem 'activesupport'
gem 'bzip2-ffi'
gem 'http'
gem 'json'
gem 'jsonpath'
Expand Down
34 changes: 17 additions & 17 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ GEM
ast (2.4.2)
base64 (0.2.0)
bigdecimal (3.1.7)
bzip2-ffi (1.1.1)
ffi (~> 1.0)
commander (4.6.0)
highline (~> 2.0.0)
concurrent-ruby (1.2.3)
connection_pool (2.4.1)
domain_name (0.6.20240107)
drb (2.2.1)
erubi (1.12.0)
erubi (1.13.0)
ffi (1.16.3)
ffi-compiler (1.3.2)
ffi (>= 1.15.5)
Expand Down Expand Up @@ -79,17 +81,16 @@ GEM
racc
polyfill (1.9.0)
power_assert (2.0.3)
prettier_print (1.2.1)
prism (0.24.0)
prism (1.1.0)
psych (5.1.2)
stringio
public_suffix (5.0.4)
racc (1.7.3)
rack (3.0.9.1)
rainbow (3.1.1)
rake (13.1.0)
rbi (0.1.9)
prism (>= 0.18.0, < 0.25)
rbi (0.2.1)
prism (~> 1.0)
sorbet-runtime (>= 0.5.9204)
regexp_parser (2.9.0)
rexml (3.2.6)
Expand Down Expand Up @@ -141,40 +142,39 @@ GEM
sorbet-runtime (= 0.5.11295)
sorbet-struct-comparable (1.3.0)
sorbet-runtime (>= 0.5)
spoom (1.2.4)
spoom (1.5.0)
erubi (>= 1.10.0)
prism (>= 0.28.0)
sorbet-static-and-runtime (>= 0.5.10187)
syntax_tree (>= 6.1.1)
thor (>= 0.19.2)
stringio (3.1.0)
syntax_tree (6.2.0)
prettier_print (>= 1.2.0)
tapioca (0.12.0)
tapioca (0.16.3)
bundler (>= 2.2.25)
netrc (>= 0.11.0)
parallel (>= 1.21.0)
rbi (>= 0.1.4, < 0.2)
sorbet-static-and-runtime (>= 0.5.10820)
spoom (~> 1.2.0, >= 1.2.0)
rbi (~> 0.2)
sorbet-static-and-runtime (>= 0.5.11087)
spoom (>= 1.2.0)
thor (>= 1.2.0)
yard-sorbet
test-unit (3.6.2)
power_assert
thor (1.3.1)
thor (1.3.2)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unicode-display_width (2.5.0)
yaml (0.3.0)
yard (0.9.36)
yard-sorbet (0.8.1)
sorbet-runtime (>= 0.5)
yard (>= 0.9)
yard-sorbet (0.9.0)
sorbet-runtime
yard

PLATFORMS
x86_64-linux

DEPENDENCIES
activesupport
bzip2-ffi
http
json
json-schema
Expand Down
10 changes: 10 additions & 0 deletions datasources/schemas/tags/base.i18n.json
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,17 @@
"@default:full": {
"fr": "wifi"
}
},
"no": {
"@default:full": {
"fr": "pas d'accès à internet"
}
}
}
},
"brand": {
"@default": {
"fr": "enseigne"
}
}
}
6 changes: 5 additions & 1 deletion datasources/schemas/tags/base.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,12 @@
},
"internet_access": {
"enum": [
"wlan"
"wlan",
"no"
]
},
"brand": {
"type": "string"
}
},
"$defs": {
Expand Down
17 changes: 12 additions & 5 deletions datasources/sources/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

require 'csv'
require 'http'
require 'bzip2/ffi'
require 'active_support/all'

require 'sorbet-runtime'
Expand All @@ -13,8 +14,9 @@
class CsvSource < Source
class Settings < Source::SourceSettings
const :url, String
const :col_sep, String
const :id, String
const :uncompress, T.nilable(String)
const :col_sep, String, default: ','
const :id, T::Array[String]
const :lon, String
const :lat, String
const :timestamp, String
Expand All @@ -29,19 +31,24 @@ def fetch(url, col_sep)
raise [url, resp].inspect
end

CSV.parse(resp.body.to_s, headers: true, col_sep: col_sep, quote_char: nil).each(&:to_h)
reader = resp.body.to_s
if @settings.uncompress == 'bz2'
reader = Bzip2::FFI::Reader.read(StringIO.new(reader))
end

CSV.parse(reader, headers: true, col_sep: col_sep, quote_char: nil).each(&:to_h)
end

def each
super(ENV['NO_DATA'] ? [] : fetch(@settings.url, @settings.col_sep))
end

def map_id(feat)
feat[@settings.id].to_i
@settings.id.collect{ |id| feat[id] }.join(',')
end

def map_updated_at(feat)
feat[@settings.timestamp]
feat[@settings.timestamp] || '1970-01-01'
end

def map_geometry(feat)
Expand Down

0 comments on commit 64d9cb0

Please sign in to comment.