From 0bd31f8e49b3ad837c2a450cfa83a8b8aabad9a0 Mon Sep 17 00:00:00 2001 From: Brandon Taylor Date: Mon, 9 Dec 2019 11:18:08 -0500 Subject: [PATCH] inlining, CSV updates --- .travis.yml | 4 +-- Project.toml | 1 - docs/src/index.md | 69 +++++--------------------------------- src/LightQuery.jl | 5 ++- src/columns.jl | 54 +++++++++++------------------ src/{Index.jl => index.jl} | 0 src/macros.jl | 15 ++------- 7 files changed, 35 insertions(+), 113 deletions(-) rename src/{Index.jl => index.jl} (100%) diff --git a/.travis.yml b/.travis.yml index 100f047..48def48 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ os: - linux julia: - 1.0 - - 1.2 + - 1.3 - nightly matrix: allow_failures: @@ -15,6 +15,6 @@ after_success: julia --project=coverage/ coverage/submit.jl jobs: include: - stage: "Documentation" - julia: 1.2 + julia: 1.3 script: julia --project=docs/ docs/document.jl after_success: skip diff --git a/Project.toml b/Project.toml index 158f048..6116c49 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,6 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" -Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] julia = "1.0" diff --git a/docs/src/index.md b/docs/src/index.md index 0e8b66f..79d61a9 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -50,28 +50,15 @@ Use [`CSV.File`](http://juliadata.github.io/CSV.jl/stable/#CSV.File) to import t ```jldoctest dplyr julia> import CSV -julia> airports_file = CSV.File("airports.csv", missingstrings = ["", "\\N"]) -CSV.File("airports.csv"): -Size: 1458 x 8 -Tables.Schema: - :faa String - :name String - :lat Float64 - :lon Float64 - :alt Int64 - :tz Int64 - :dst String - :tzone Union{Missing, String} +julia> airports_file = CSV.File("airports.csv", missingstrings = ["", "\\N"]); ``` For this package, I made [`named_tuple`](@ref)s to replace `NamedTuple`s. Use [`@name`](@ref) to work with them. -Convert the `schema` to [`row_info`](@ref). +Get [`row_info`](@ref). ```jldoctest dplyr -julia> using Tables: schema - -julia> const Airport = row_info(schema(airports_file)); +julia> const Airport = row_info(airports_file); ``` Read the first row. @@ -197,35 +184,13 @@ julia> indexed_airports["JFK"] Use [`CSV.File`](http://juliadata.github.io/CSV.jl/stable/#CSV.File) to import the flights data. ```jldoctest dplyr -julia> flights_file = CSV.File("flights.csv") -CSV.File("flights.csv"): -Size: 336776 x 19 -Tables.Schema: - :year Int64 - :month Int64 - :day Int64 - :dep_time Union{Missing, Int64} - :sched_dep_time Int64 - :dep_delay Union{Missing, Int64} - :arr_time Union{Missing, Int64} - :sched_arr_time Int64 - :arr_delay Union{Missing, Int64} - :carrier String - :flight Int64 - :tailnum Union{Missing, String} - :origin String - :dest String - :air_time Union{Missing, Int64} - :distance Int64 - :hour Int64 - :minute Int64 - :time_hour String +julia> flights_file = CSV.File("flights.csv"); ``` Get the first flight, [`rename`](@ref), [`remove`](@ref), and [`transform`](@ref) to add units. ```jldoctest dplyr -julia> const Flight = row_info(schema(flights_file)); +julia> const Flight = row_info(flights_file); julia> flight = @name @> flights_file |> @@ -489,27 +454,9 @@ Showing at most 4 rows Import weather data. Get the first row, [`rename`](@ref), [`remove`](@ref), and [`transform`](@ref) to add units. ```jldoctest dplyr -julia> weathers_file = CSV.File("weather.csv") -CSV.File("weather.csv"): -Size: 26115 x 15 -Tables.Schema: - :origin String - :year Int64 - :month Int64 - :day Int64 - :hour Int64 - :temp Union{Missing, Float64} - :dewp Union{Missing, Float64} - :humid Union{Missing, Float64} - :wind_dir Union{Missing, Int64} - :wind_speed Union{Missing, Float64} - :wind_gust Union{Missing, Float64} - :precip Float64 - :pressure Union{Missing, Float64} - :visib Float64 - :time_hour String - -julia> const Weather = row_info(schema(weathers_file)); +julia> weathers_file = CSV.File("weather.csv"); + +julia> const Weather = row_info(weathers_file); julia> function get_weather(indexed_airports, row) @name @> row |> diff --git a/src/LightQuery.jl b/src/LightQuery.jl index fc8de68..3a5bd5b 100644 --- a/src/LightQuery.jl +++ b/src/LightQuery.jl @@ -14,17 +14,16 @@ import Base.Iterators: take using Base.Iterators: Filter using Base.Meta: quot using Compat: hasproperty -using CSV: getcell, getfile, getrow, Row +using CSV: getcolumn, Column, getrow, Row, File using IterTools: @ifsomething import MacroTools using MacroTools: @capture using Markdown: MD, Table -using Tables: Schema include("utilities.jl") include("macros.jl") include("columns.jl") -include("Index.jl") +include("index.jl") include("rows.jl") include("make_columns.jl") include("pivot.jl") diff --git a/src/columns.jl b/src/columns.jl index 15c2cb9..c9b0ad1 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -360,30 +360,26 @@ export Apply map_unrolled(tuple, apply.names, them) end -struct InRow{name, type, position} +struct InRow{name, AColumn} + column::AColumn end - -@pure function InRow(name, type, position) - InRow{name, type, position}() -end -@inline function (::InRow{name, type, position})(row::Row) where {name, type, position} - getcell(getfile(row), type, position, getrow(row))::type +@inline function InRow{name}(column::AColumn) where {name, AColumn} + InRow{name, AColumn}(column) end -@inline function get_pair(row::Row, column::InRow{name}) where {name} - name, column(row) +@inline function (in_row::InRow)(row::Row) + in_row.column[getrow(row)] end -@inline function (columns::Some{InRow})(data) - partial_map(get_pair, data, columns) +@inline function get_pair(row::Row, in_row::InRow{name}) where {name} + Name{name}(), in_row(row) end - -@inline function InRow_at(::Schema{Names, Types}, index) where {Names, Types} - InRow(Name{Names[index]}(), fieldtype(Types, index), index) +@inline function (in_rows::Some{InRow})(data) + partial_map(get_pair, data, in_rows) end """ - row_info(::Tables.Schema) + row_info(::CSV.File) -Get row info for the schema. Can be used as a type stable selector function. +Get row info for the CSV file. Can be used as a type stable selector function. ```jldoctest julia> using LightQuery @@ -392,30 +388,20 @@ julia> using Test: @inferred julia> using CSV: File -julia> using Tables: schema - -julia> test = File("test.csv") -CSV.File("test.csv"): -Size: 1 x 6 -Tables.Schema: - :a Int64 - :b Float64 - :c Int64 - :d Float64 - :e Int64 - :f Float64 +julia> test = File("test.csv"); -julia> template = @inferred row_info(schema(test)) -(LightQuery.InRow{`a`,Int64,1}(), LightQuery.InRow{`b`,Float64,2}(), LightQuery.InRow{`c`,Int64,3}(), LightQuery.InRow{`d`,Float64,4}(), LightQuery.InRow{`e`,Int64,5}(), LightQuery.InRow{`f`,Float64,6}()) +julia> template = row_info(test) +(LightQuery.InRow{:a,CSV.Column{Int64,Int64}}([1]), LightQuery.InRow{:b,CSV.Column{Float64,Float64}}([1.0]), LightQuery.InRow{:c,CSV.Column{Int64,Int64}}([1]), LightQuery.InRow{:d,CSV.Column{Float64,Float64}}([1.0]), LightQuery.InRow{:e,CSV.Column{Int64,Int64}}([1]), LightQuery.InRow{:f,CSV.Column{Float64,Float64}}([1.0])) julia> @inferred template(first(test)) ((`a`, 1), (`b`, 1.0), (`c`, 1), (`d`, 1.0), (`e`, 1), (`f`, 1.0)) ``` """ -@inline function row_info(a_schema::Schema{Names}) where {Names} - ntuple(let a_schema = a_schema - @inline InRow_at_capture(index) = InRow_at(a_schema, index) - end, Val{length(Names)}()) +@noinline function row_info(file::File) + (( + InRow{name}(getcolumn(file, name)) + for name in propertynames(file) + )...,) end export row_info diff --git a/src/Index.jl b/src/index.jl similarity index 100% rename from src/Index.jl rename to src/index.jl diff --git a/src/macros.jl b/src/macros.jl index 4158953..ddf8922 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -35,18 +35,9 @@ function substitute_underscores!(underscores_to_gensyms, meta_level, code::Expr) else meta_level end - Expr(head, map( - let underscores_to_gensyms = underscores_to_gensyms, - new_meta_level = new_meta_level - function substitute_underscores!_capture(code) - substitute_underscores!( - underscores_to_gensyms, - new_meta_level, - code - ) - end - end, - expanded_code.args + Expr(head, ( + substitute_underscores!(underscores_to_gensyms, new_meta_level, code) + for code in expanded_code.args )...) end