diff --git a/docs/make.jl b/docs/make.jl index 7332766786..653108241c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,6 +1,7 @@ using Documenter, Flux, NNlib, Functors, MLUtils DocMeta.setdocmeta!(Flux, :DocTestSetup, :(using Flux); recursive = true) + makedocs(modules = [Flux, NNlib, Functors, MLUtils], doctest = false, sitename = "Flux", diff --git a/docs/src/models/advanced.md b/docs/src/models/advanced.md index 8757828594..1453da95cc 100644 --- a/docs/src/models/advanced.md +++ b/docs/src/models/advanced.md @@ -213,6 +213,7 @@ model(gpu(rand(10))) A custom loss function for the multiple outputs may look like this: ```julia using Statistics +using Flux.Losses: mse # assuming model returns the output of a Split # x is a single input @@ -220,6 +221,6 @@ using Statistics function loss(x, ys, model) # rms over all the mse ŷs = model(x) - return sqrt(mean(Flux.mse(y, ŷ) for (y, ŷ) in zip(ys, ŷs))) + return sqrt(mean(mse(y, ŷ) for (y, ŷ) in zip(ys, ŷs))) end ``` diff --git a/docs/src/models/losses.md b/docs/src/models/losses.md index 39703ec365..9fe9e6c15b 100644 --- a/docs/src/models/losses.md +++ b/docs/src/models/losses.md @@ -3,43 +3,36 @@ Flux provides a large number of common loss functions used for training machine learning models. They are grouped together in the `Flux.Losses` module. -Loss functions for supervised learning typically expect as inputs a target `y`, and a prediction `ŷ`. -In Flux's convention, the order of the arguments is the following +As an example, the crossentropy function for multi-class classification that takes logit predictions (i.e. not [`softmax`](@ref)ed) +can be imported with + +```julia +using Flux.Losses: logitcrossentropy +``` + +Loss functions for supervised learning typically expect as inputs a true target `y` and a prediction `ŷ`. +In Flux's convention, the order of the arguments is the following: ```julia loss(ŷ, y) ``` -Most loss functions in Flux have an optional argument `agg`, denoting the type of aggregation performed over the -batch: +They are commonly passed as arrays of size `num_target_features x num_examples_in_batch`. + +Most losses in Flux have an optional argument `agg` accepting a function to be used as +as a final aggregation: ```julia -loss(ŷ, y) # defaults to `mean` -loss(ŷ, y, agg=sum) # use `sum` for reduction -loss(ŷ, y, agg=x->sum(x, dims=2)) # partial reduction -loss(ŷ, y, agg=x->mean(w .* x)) # weighted mean -loss(ŷ, y, agg=identity) # no aggregation. +loss(ŷ, y) # defaults to `mean` +loss(ŷ, y, agg = sum) # use `sum` for reduction +loss(ŷ, y, agg = x -> sum(x, dims=2)) # partial reduction +loss(ŷ, y, agg = x -> mean(w .* x)) # weighted mean +loss(ŷ, y, agg = identity) # no aggregation. ``` ## Losses Reference -```@docs -Flux.Losses.mae -Flux.Losses.mse -Flux.Losses.msle -Flux.Losses.huber_loss -Flux.Losses.label_smoothing -Flux.Losses.crossentropy -Flux.Losses.logitcrossentropy -Flux.Losses.binarycrossentropy -Flux.Losses.logitbinarycrossentropy -Flux.Losses.kldivergence -Flux.Losses.poisson_loss -Flux.Losses.hinge_loss -Flux.Losses.squared_hinge_loss -Flux.Losses.dice_coeff_loss -Flux.Losses.tversky_loss -Flux.Losses.binary_focal_loss -Flux.Losses.focal_loss -Flux.Losses.siamese_contrastive_loss +```@autodocs +Modules = [Flux.Losses] +Pages = ["functions.jl"] ``` diff --git a/src/Flux.jl b/src/Flux.jl index aa3f021595..963b0dee05 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -51,9 +51,8 @@ include("outputsize.jl") include("data/Data.jl") using .Data - include("losses/Losses.jl") -using .Losses # TODO: stop importing Losses in Flux's namespace in v0.12 +using .Losses # TODO: stop importing Losses in Flux's namespace in v0.14? include("deprecations.jl") diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 8b036e0fb4..fe858cb231 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -10,11 +10,14 @@ Return the loss corresponding to mean absolute error: agg(abs.(ŷ .- y)) -# Example +# Examples + ```jldoctest +julia> using Flux.Losses: mae + julia> y_model = [1.1, 1.9, 3.1]; -julia> Flux.mae(y_model, 1:3) +julia> mae(y_model, 1:3) 0.10000000000000009 ``` """ @@ -32,13 +35,16 @@ Return the loss corresponding to mean square error: See also: [`mae`](@ref), [`msle`](@ref), [`crossentropy`](@ref). -# Example +# Examples + ```jldoctest +julia> using Flux.Losses: mse + julia> y_model = [1.1, 1.9, 3.1]; julia> y_true = 1:3; -julia> Flux.mse(y_model, y_true) +julia> mse(y_model, y_true) 0.010000000000000018 ``` """ @@ -57,12 +63,15 @@ The loss corresponding to mean squared logarithmic errors, calculated as The `ϵ` term provides numerical stability. Penalizes an under-estimation more than an over-estimatation. -# Example +# Examples + ```jldoctest -julia> Flux.msle(Float32[1.1, 2.2, 3.3], 1:3) +julia> using Flux.Losses: msle + +julia> msle(Float32[1.1, 2.2, 3.3], 1:3) 0.009084041f0 -julia> Flux.msle(Float32[0.9, 1.8, 2.7], 1:3) +julia> msle(Float32[0.9, 1.8, 2.7], 1:3) 0.011100831f0 ``` """ @@ -112,14 +121,17 @@ value of α larger the smoothing of `y`. `dims` denotes the one-hot dimension, unless `dims=0` which denotes the application of label smoothing to binary distributions encoded in a single number. -# Example +# Examples + ```jldoctest +julia> using Flux.Losses: label_smoothing, crossentropy + julia> y = Flux.onehotbatch([1, 1, 1, 0, 1, 0], 0:1) 2×6 OneHotMatrix(::Vector{UInt32}) with eltype Bool: ⋅ ⋅ ⋅ 1 ⋅ 1 1 1 1 ⋅ 1 ⋅ -julia> y_smoothed = Flux.label_smoothing(y, 0.2f0) +julia> y_smoothed = label_smoothing(y, 0.2f0) 2×6 Matrix{Float32}: 0.1 0.1 0.1 0.9 0.1 0.9 0.9 0.9 0.9 0.1 0.9 0.1 @@ -134,10 +146,10 @@ julia> y_dis = vcat(y_sim[2,:]', y_sim[1,:]') 0.666667 0.666667 0.666667 0.333333 0.666667 0.333333 0.333333 0.333333 0.333333 0.666667 0.333333 0.666667 -julia> Flux.crossentropy(y_sim, y) < Flux.crossentropy(y_sim, y_smoothed) +julia> crossentropy(y_sim, y) < crossentropy(y_sim, y_smoothed) true -julia> Flux.crossentropy(y_dis, y) > Flux.crossentropy(y_dis, y_smoothed) +julia> crossentropy(y_dis, y) > crossentropy(y_dis, y_smoothed) true ``` """ @@ -177,8 +189,11 @@ computing the loss. See also: [`logitcrossentropy`](@ref), [`binarycrossentropy`](@ref), [`logitbinarycrossentropy`](@ref). -# Example +# Examples + ```jldoctest +julia> using Flux.Losses: label_smoothing, crossentropy + julia> y_label = Flux.onehotbatch([0, 1, 2, 1, 0], 0:2) 3×5 OneHotMatrix(::Vector{UInt32}) with eltype Bool: 1 ⋅ ⋅ ⋅ 1 @@ -195,19 +210,19 @@ julia> sum(y_model; dims=1) 1×5 Matrix{Float32}: 1.0 1.0 1.0 1.0 1.0 -julia> Flux.crossentropy(y_model, y_label) +julia> crossentropy(y_model, y_label) 1.6076053f0 -julia> 5 * ans ≈ Flux.crossentropy(y_model, y_label; agg=sum) +julia> 5 * ans ≈ crossentropy(y_model, y_label; agg=sum) true -julia> y_smooth = Flux.label_smoothing(y_label, 0.15f0) +julia> y_smooth = label_smoothing(y_label, 0.15f0) 3×5 Matrix{Float32}: 0.9 0.05 0.05 0.05 0.9 0.05 0.9 0.05 0.9 0.05 0.05 0.05 0.9 0.05 0.05 -julia> Flux.crossentropy(y_model, y_smooth) +julia> crossentropy(y_model, y_smooth) 1.5776052f0 ``` """ @@ -229,8 +244,11 @@ and [`softmax`](@ref) separately. See also: [`binarycrossentropy`](@ref), [`logitbinarycrossentropy`](@ref), [`label_smoothing`](@ref). -# Example +# Examples + ```jldoctest +julia> using Flux.Losses: crossentropy, logitcrossentropy + julia> y_label = Flux.onehotbatch(collect("abcabaa"), 'a':'c') 3×7 OneHotMatrix(::Vector{UInt32}) with eltype Bool: 1 ⋅ ⋅ 1 ⋅ 1 1 @@ -243,10 +261,10 @@ julia> y_model = reshape(vcat(-9:0, 0:9, 7.5f0), 3, 7) -8.0 -5.0 -2.0 0.0 3.0 6.0 9.0 -7.0 -4.0 -1.0 1.0 4.0 7.0 7.5 -julia> Flux.logitcrossentropy(y_model, y_label) +julia> logitcrossentropy(y_model, y_label) 1.5791205f0 -julia> Flux.crossentropy(softmax(y_model), y_label) +julia> crossentropy(softmax(y_model), y_label) 1.5791197f0 ``` """ @@ -272,7 +290,10 @@ computing the loss. See also: [`crossentropy`](@ref), [`logitcrossentropy`](@ref). # Examples + ```jldoctest +julia> using Flux.Losses: binarycrossentropy, crossentropy + julia> y_bin = Bool[1,0,1] 3-element Vector{Bool}: 1 @@ -284,7 +305,7 @@ julia> y_prob = softmax(reshape(vcat(1:3, 3:5), 2, 3) .* 1f0) 0.268941 0.5 0.268941 0.731059 0.5 0.731059 -julia> Flux.binarycrossentropy(y_prob[2,:], y_bin) +julia> binarycrossentropy(y_prob[2,:], y_bin) 0.43989f0 julia> all(p -> 0 < p < 1, y_prob[2,:]) # else DomainError @@ -295,7 +316,7 @@ julia> y_hot = Flux.onehotbatch(y_bin, 0:1) ⋅ 1 ⋅ 1 ⋅ 1 -julia> Flux.crossentropy(y_prob, y_hot) +julia> crossentropy(y_prob, y_hot) 0.43989f0 ``` """ @@ -313,7 +334,10 @@ Mathematically equivalent to See also: [`crossentropy`](@ref), [`logitcrossentropy`](@ref). # Examples + ```jldoctest +julia> using Flux.Losses: binarycrossentropy, logitbinarycrossentropy + julia> y_bin = Bool[1,0,1]; julia> y_model = Float32[2, -1, pi] @@ -322,10 +346,10 @@ julia> y_model = Float32[2, -1, pi] -1.0 3.1415927 -julia> Flux.logitbinarycrossentropy(y_model, y_bin) +julia> logitbinarycrossentropy(y_model, y_bin) 0.160832f0 -julia> Flux.binarycrossentropy(sigmoid.(y_model), y_bin) +julia> binarycrossentropy(sigmoid.(y_model), y_bin) 0.16083185f0 ``` """ @@ -344,8 +368,11 @@ between the given probability distributions. The KL divergence is a measure of how much one probability distribution is different from the other. It is always non-negative, and zero only when both the distributions are equal. -# Example +# Examples + ```jldoctest +julia> using Flux.Losses: kldivergence + julia> p1 = [1 0; 0 1] 2×2 Matrix{Int64}: 1 0 @@ -356,16 +383,16 @@ julia> p2 = fill(0.5, 2, 2) 0.5 0.5 0.5 0.5 -julia> Flux.kldivergence(p2, p1) ≈ log(2) +julia> kldivergence(p2, p1) ≈ log(2) true -julia> Flux.kldivergence(p2, p1; agg = sum) ≈ 2log(2) +julia> kldivergence(p2, p1; agg = sum) ≈ 2log(2) true -julia> Flux.kldivergence(p2, p2; ϵ = 0) # about -2e-16 with the regulator +julia> kldivergence(p2, p2; ϵ = 0) # about -2e-16 with the regulator 0.0 -julia> Flux.kldivergence(p1, p2; ϵ = 0) # about 17.3 with the regulator +julia> kldivergence(p1, p2; ϵ = 0) # about 17.3 with the regulator Inf ``` """ @@ -377,12 +404,14 @@ function kldivergence(ŷ, y; dims = 1, agg = mean, ϵ = epseltype(ŷ)) end """ - poisson_loss(ŷ, y) + poisson_loss(ŷ, y; agg = mean) + +Return how much the predicted distribution `ŷ` diverges from the expected Poisson +distribution `y`. Calculated as -# Return how much the predicted distribution `ŷ` diverges from the expected Poisson -# distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`. + agg(ŷ .- y .* log.(ŷ)) -[More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson). +[More information](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson). """ function poisson_loss(ŷ, y; agg = mean) _check_sizes(ŷ, y) @@ -393,10 +422,11 @@ end hinge_loss(ŷ, y; agg = mean) Return the [hinge_loss loss](https://en.wikipedia.org/wiki/Hinge_loss) given the -prediction `ŷ` and true labels `y` (containing 1 or -1); calculated as -`sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)`. +prediction `ŷ` and true labels `y` (containing 1 or -1). Calculated as -See also: [`squared_hinge_loss`](@ref) + agg(max.(0, 1 .- ŷ .* y)) + +See also: [`squared_hinge_loss`](@ref). """ function hinge_loss(ŷ, y; agg = mean) _check_sizes(ŷ, y) @@ -407,9 +437,11 @@ end squared_hinge_loss(ŷ, y) Return the squared hinge_loss loss given the prediction `ŷ` and true labels `y` -(containing 1 or -1); calculated as `sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)`. +(containing 1 or -1). Calculated as + + agg((max.(0, 1 .- ŷ .* y)).^2) -See also: [`hinge_loss`](@ref) +See also [`hinge_loss`](@ref). """ function squared_hinge_loss(ŷ, y; agg = mean) _check_sizes(ŷ, y) @@ -438,6 +470,7 @@ Return the [Tversky loss](https://arxiv.org/abs/1706.05721). Used with imbalanced data to give more weight to false negatives. Larger β weigh recall more than precision (by placing more emphasis on false negatives) Calculated as: + 1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + β*(1 .- y) .* ŷ + (1 - β)*y .* (1 .- ŷ)) + 1) """ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) @@ -452,12 +485,15 @@ end binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=eps(ŷ)) Return the [binary_focal_loss](https://arxiv.org/pdf/1708.02002.pdf) -The input, 'ŷ', is expected to be normalized (i.e. [`softmax`](@ref) output). +The input `ŷ` is expected to be normalized (i.e. [`softmax`](@ref) output). + +For `γ == 0`, the loss is mathematically equivalent to [`binarycrossentropy`](@ref). -For `γ == 0`, the loss is mathematically equivalent to [`Losses.binarycrossentropy`](@ref). +# Examples -# Example ```jldoctest +julia> using Flux.Losses: binary_focal_loss + julia> y = [0 1 0 1 0 1] 2×3 Matrix{Int64}: @@ -470,12 +506,11 @@ julia> ŷ = [0.268941 0.5 0.268941 0.268941 0.5 0.268941 0.731059 0.5 0.731059 -julia> Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385 +julia> binary_focal_loss(ŷ, y) ≈ 0.0728675615927385 true ``` -See also: [`Losses.focal_loss`](@ref) for multi-class setting - +See also: [`focal_loss`](@ref) for multi-class setting. """ function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) _check_sizes(ŷ, y) @@ -496,10 +531,13 @@ It down-weights well-classified examples and focuses on hard examples. The input, 'ŷ', is expected to be normalized (i.e. [`softmax`](@ref) output). The modulating factor, `γ`, controls the down-weighting strength. -For `γ == 0`, the loss is mathematically equivalent to [`Losses.crossentropy`](@ref). +For `γ == 0`, the loss is mathematically equivalent to [`crossentropy`](@ref). + +# Examples -# Example ```jldoctest +julia> using Flux.Losses: focal_loss + julia> y = [1 0 0 0 1 0 1 0 1 0 0 0 1 0 0] @@ -514,11 +552,11 @@ julia> ŷ = softmax(reshape(-7:7, 3, 5) .* 1f0) 0.244728 0.244728 0.244728 0.244728 0.244728 0.665241 0.665241 0.665241 0.665241 0.665241 -julia> Flux.focal_loss(ŷ, y) ≈ 1.1277571935622628 +julia> focal_loss(ŷ, y) ≈ 1.1277571935622628 true ``` -See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels +See also: [`binary_focal_loss`](@ref) for binary (not one-hot) labels """ function focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) @@ -535,15 +573,15 @@ which can be useful for training Siamese Networks. It is given by agg(@. (1 - y) * ŷ^2 + y * max(0, margin - ŷ)^2) -Specify `margin` to set the baseline for distance at which pairs are dissimilar. - +Specify `margin` to set the baseline for distance at which pairs are dissimilar. """ -function siamese_contrastive_loss(ŷ, y; agg = mean, margin::Real = 1) - _check_sizes(ŷ, y) +function siamese_contrastive_loss(ŷ, y; agg = mean, margin::Real = 1) + _check_sizes(ŷ, y) margin < 0 && throw(DomainError(margin, "Margin must be non-negative")) return agg(@. (1 - y) * ŷ^2 + y * max(0, margin - ŷ)^2) end + ```@meta DocTestFilters = nothing -``` +``` \ No newline at end of file diff --git a/test/losses.jl b/test/losses.jl index 2ca697a657..78fa2f5404 100644 --- a/test/losses.jl +++ b/test/losses.jl @@ -1,20 +1,20 @@ using Test -using Flux: onehotbatch, σ +using Flux: onehotbatch, sigmoid -using Flux.Losses: mse, label_smoothing, crossentropy, logitcrossentropy, binarycrossentropy, logitbinarycrossentropy +using Flux.Losses using Flux.Losses: xlogx, xlogy # group here all losses, used in tests -const ALL_LOSSES = [Flux.Losses.mse, Flux.Losses.mae, Flux.Losses.msle, - Flux.Losses.crossentropy, Flux.Losses.logitcrossentropy, - Flux.Losses.binarycrossentropy, Flux.Losses.logitbinarycrossentropy, - Flux.Losses.kldivergence, - Flux.Losses.huber_loss, - Flux.Losses.tversky_loss, - Flux.Losses.dice_coeff_loss, - Flux.Losses.poisson_loss, - Flux.Losses.hinge_loss, Flux.Losses.squared_hinge_loss, - Flux.Losses.binary_focal_loss, Flux.Losses.focal_loss, Flux.Losses.siamese_contrastive_loss] +const ALL_LOSSES = [mse, mae, msle, + crossentropy, logitcrossentropy, + binarycrossentropy, logitbinarycrossentropy, + kldivergence, + huber_loss, + tversky_loss, + dice_coeff_loss, + poisson_loss, + hinge_loss, squared_hinge_loss, + binary_focal_loss, focal_loss, siamese_contrastive_loss] @testset "xlogx & xlogy" begin @@ -45,17 +45,17 @@ y = [1, 1, 0, 0] end @testset "mae" begin - @test Flux.mae(ŷ, y) ≈ 1/2 + @test mae(ŷ, y) ≈ 1/2 end @testset "huber_loss" begin - @test Flux.huber_loss(ŷ, y) ≈ 0.20500000000000002 + @test huber_loss(ŷ, y) ≈ 0.20500000000000002 end y = [123.0,456.0,789.0] ŷ = [345.0,332.0,789.0] @testset "msle" begin - @test Flux.msle(ŷ, y) ≈ 0.38813985859136585 + @test msle(ŷ, y) ≈ 0.38813985859136585 end # Now onehot y's @@ -104,15 +104,15 @@ logŷ, y = randn(3), rand(3) yls = y.*(1-2sf).+sf @testset "binarycrossentropy" begin - @test binarycrossentropy.(σ.(logŷ), label_smoothing(y, 2sf; dims=0); ϵ=0) ≈ -yls.*log.(σ.(logŷ)) - (1 .- yls).*log.(1 .- σ.(logŷ)) - @test binarycrossentropy(σ.(logŷ), y; ϵ=0) ≈ mean(-y.*log.(σ.(logŷ)) - (1 .- y).*log.(1 .- σ.(logŷ))) - @test binarycrossentropy(σ.(logŷ), y) ≈ mean(-y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 .- y).*log.(1 .- σ.(logŷ) .+ eps.(σ.(logŷ)))) + @test binarycrossentropy.(sigmoid.(logŷ), label_smoothing(y, 2sf; dims=0); ϵ=0) ≈ -yls.*log.(sigmoid.(logŷ)) - (1 .- yls).*log.(1 .- sigmoid.(logŷ)) + @test binarycrossentropy(sigmoid.(logŷ), y; ϵ=0) ≈ mean(-y.*log.(sigmoid.(logŷ)) - (1 .- y).*log.(1 .- sigmoid.(logŷ))) + @test binarycrossentropy(sigmoid.(logŷ), y) ≈ mean(-y.*log.(sigmoid.(logŷ) .+ eps.(sigmoid.(logŷ))) - (1 .- y).*log.(1 .- sigmoid.(logŷ) .+ eps.(sigmoid.(logŷ)))) @test binarycrossentropy([0.1,0.2,0.9], 1) ≈ -mean(log, [0.1,0.2,0.9]) # constant label end @testset "logitbinarycrossentropy" begin - @test logitbinarycrossentropy.(logŷ, label_smoothing(y, 0.2)) ≈ binarycrossentropy.(σ.(logŷ), label_smoothing(y, 0.2); ϵ=0) - @test logitbinarycrossentropy(logŷ, y) ≈ binarycrossentropy(σ.(logŷ), y; ϵ=0) + @test logitbinarycrossentropy.(logŷ, label_smoothing(y, 0.2)) ≈ binarycrossentropy.(sigmoid.(logŷ), label_smoothing(y, 0.2); ϵ=0) + @test logitbinarycrossentropy(logŷ, y) ≈ binarycrossentropy(sigmoid.(logŷ), y; ϵ=0) end y = onehotbatch([1], 0:1) @@ -128,44 +128,44 @@ y = [1 2 3] ŷ = [4.0 5.0 6.0] @testset "kldivergence" begin - @test Flux.kldivergence([0.1,0.0,0.9], [0.1,0.0,0.9]) ≈ Flux.kldivergence([0.1,0.9], [0.1,0.9]) - @test Flux.kldivergence(ŷ, y) ≈ -1.7661057888493457 - @test Flux.kldivergence(y, y) ≈ 0 + @test kldivergence([0.1,0.0,0.9], [0.1,0.0,0.9]) ≈ kldivergence([0.1,0.9], [0.1,0.9]) + @test kldivergence(ŷ, y) ≈ -1.7661057888493457 + @test kldivergence(y, y) ≈ 0 end y = [1 2 3 4] ŷ = [5.0 6.0 7.0 8.0] @testset "hinge_loss" begin - @test Flux.hinge_loss(ŷ, y) ≈ 0 - @test Flux.hinge_loss(y, 0.5 .* y) ≈ 0.125 + @test hinge_loss(ŷ, y) ≈ 0 + @test hinge_loss(y, 0.5 .* y) ≈ 0.125 end @testset "squared_hinge_loss" begin - @test Flux.squared_hinge_loss(ŷ, y) ≈ 0 - @test Flux.squared_hinge_loss(y, 0.5 .* y) ≈ 0.0625 + @test squared_hinge_loss(ŷ, y) ≈ 0 + @test squared_hinge_loss(y, 0.5 .* y) ≈ 0.0625 end y = [0.1 0.2 0.3] ŷ = [0.4 0.5 0.6] @testset "poisson_loss" begin - @test Flux.poisson_loss(ŷ, y) ≈ 0.6278353988097339 - @test Flux.poisson_loss(y, y) ≈ 0.5044459776946685 + @test poisson_loss(ŷ, y) ≈ 0.6278353988097339 + @test poisson_loss(y, y) ≈ 0.5044459776946685 end y = [1.0 0.5 0.3 2.4] ŷ = [0 1.4 0.5 1.2] @testset "dice_coeff_loss" begin - @test Flux.dice_coeff_loss(ŷ, y) ≈ 0.2799999999999999 - @test Flux.dice_coeff_loss(y, y) ≈ 0.0 + @test dice_coeff_loss(ŷ, y) ≈ 0.2799999999999999 + @test dice_coeff_loss(y, y) ≈ 0.0 end @testset "tversky_loss" begin - @test Flux.tversky_loss(ŷ, y) ≈ -0.06772009029345383 - @test Flux.tversky_loss(ŷ, y, β=0.8) ≈ -0.09490740740740744 - @test Flux.tversky_loss(y, y) ≈ -0.5576923076923075 + @test tversky_loss(ŷ, y) ≈ -0.06772009029345383 + @test tversky_loss(ŷ, y, β=0.8) ≈ -0.09490740740740744 + @test tversky_loss(y, y) ≈ -0.5576923076923075 end @testset "no spurious promotions" begin @@ -173,7 +173,7 @@ end y = rand(T, 2) ŷ = rand(T, 2) for f in ALL_LOSSES - fwd, back = Flux.pullback(f, ŷ, y) + fwd, back = pullback(f, ŷ, y) @test fwd isa T @test eltype(back(one(T))[1]) == T end @@ -190,9 +190,9 @@ end 0 1] ŷ1 = [0.6 0.3 0.4 0.7] - @test Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385 - @test Flux.binary_focal_loss(ŷ1, y1) ≈ 0.05691642237852222 - @test Flux.binary_focal_loss(ŷ, y; γ=0.0) ≈ Flux.binarycrossentropy(ŷ, y) + @test binary_focal_loss(ŷ, y) ≈ 0.0728675615927385 + @test binary_focal_loss(ŷ1, y1) ≈ 0.05691642237852222 + @test binary_focal_loss(ŷ, y; γ=0.0) ≈ binarycrossentropy(ŷ, y) end @testset "focal_loss" begin @@ -206,9 +206,9 @@ end ŷ1 = [0.4 0.2 0.5 0.5 0.1 0.3] - @test Flux.focal_loss(ŷ, y) ≈ 1.1277571935622628 - @test Flux.focal_loss(ŷ1, y1) ≈ 0.45990566879720157 - @test Flux.focal_loss(ŷ, y; γ=0.0) ≈ Flux.crossentropy(ŷ, y) + @test focal_loss(ŷ, y) ≈ 1.1277571935622628 + @test focal_loss(ŷ1, y1) ≈ 0.45990566879720157 + @test focal_loss(ŷ, y; γ=0.0) ≈ crossentropy(ŷ, y) end @testset "siamese_contrastive_loss" begin @@ -232,19 +232,19 @@ end 0.1 0.2 0.7] - @test Flux.siamese_contrastive_loss(ŷ, y) ≈ 0.2333333333333333 - @test Flux.siamese_contrastive_loss(ŷ, y, margin = 0.5f0) ≈ 0.10000000000000002 - @test Flux.siamese_contrastive_loss(ŷ, y, margin = 1.5f0) ≈ 0.5333333333333333 - @test Flux.siamese_contrastive_loss(ŷ1, y1) ≈ 0.32554644f0 - @test Flux.siamese_contrastive_loss(ŷ1, y1, margin = 0.5f0) ≈ 0.16271012f0 - @test Flux.siamese_contrastive_loss(ŷ1, y1, margin = 1.5f0) ≈ 0.6532292f0 - @test Flux.siamese_contrastive_loss(ŷ, y, margin = 1) ≈ Flux.siamese_contrastive_loss(ŷ, y) - @test Flux.siamese_contrastive_loss(y, y) ≈ 0.0 - @test Flux.siamese_contrastive_loss(y1, y1) ≈ 0.0 - @test Flux.siamese_contrastive_loss(ŷ, y, margin = 0) ≈ 0.09166666666666667 - @test Flux.siamese_contrastive_loss(ŷ1, y1, margin = 0) ≈ 0.13161165f0 - @test Flux.siamese_contrastive_loss(ŷ2, y2) ≈ 0.21200000000000005 - @test Flux.siamese_contrastive_loss(ŷ2, ŷ2) ≈ 0.18800000000000003 - @test_throws DomainError(-0.5, "Margin must be non-negative") Flux.siamese_contrastive_loss(ŷ1, y1, margin = -0.5) - @test_throws DomainError(-1, "Margin must be non-negative") Flux.siamese_contrastive_loss(ŷ, y, margin = -1) + @test siamese_contrastive_loss(ŷ, y) ≈ 0.2333333333333333 + @test siamese_contrastive_loss(ŷ, y, margin = 0.5f0) ≈ 0.10000000000000002 + @test siamese_contrastive_loss(ŷ, y, margin = 1.5f0) ≈ 0.5333333333333333 + @test siamese_contrastive_loss(ŷ1, y1) ≈ 0.32554644f0 + @test siamese_contrastive_loss(ŷ1, y1, margin = 0.5f0) ≈ 0.16271012f0 + @test siamese_contrastive_loss(ŷ1, y1, margin = 1.5f0) ≈ 0.6532292f0 + @test siamese_contrastive_loss(ŷ, y, margin = 1) ≈ siamese_contrastive_loss(ŷ, y) + @test siamese_contrastive_loss(y, y) ≈ 0.0 + @test siamese_contrastive_loss(y1, y1) ≈ 0.0 + @test siamese_contrastive_loss(ŷ, y, margin = 0) ≈ 0.09166666666666667 + @test siamese_contrastive_loss(ŷ1, y1, margin = 0) ≈ 0.13161165f0 + @test siamese_contrastive_loss(ŷ2, y2) ≈ 0.21200000000000005 + @test siamese_contrastive_loss(ŷ2, ŷ2) ≈ 0.18800000000000003 + @test_throws DomainError(-0.5, "Margin must be non-negative") siamese_contrastive_loss(ŷ1, y1, margin = -0.5) + @test_throws DomainError(-1, "Margin must be non-negative") siamese_contrastive_loss(ŷ, y, margin = -1) end