Merge #1660

bors[bot] · mcabbott · web-flow · commit 1a0b51938b9a · 2021-07-13T18:46:41.000Z
1660: Printing &amp; docstrings for `onehot` / `onehotbatch` r=mcabbott a=mcabbott

Right now, the printing of OneHotArray lies about its type parameters. That's pretty confusing. The standard way to hide messy details the way `view` and `adjoint` do is via `Base.showarg`, so I did that. Then I also re-used the dots which LinearAlgebra's sparse matrix printing uses: 
```
julia&gt; Flux.onehotbatch(collect("foo"), 'a':'z')  # before
26×3 Flux.OneHotArray{26,2,Vector{UInt32}}:
 0  0  0
 0  0  0
 0  0  0
 0  0  0
 0  0  0
 1  0  0
 0  0  0
 ...
 
julia&gt; typeof(ans)
Flux.OneHotArray{UInt32, 26, 1, 2, Vector{UInt32}}

julia&gt; Flux.onehotbatch(collect("foo"), 'a':'z')  # after
26×3 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅
 1  ⋅  ⋅
 ⋅  ⋅  ⋅
```
I've also tried to tidy up things I thought were unclear in the docstrings. E.g. it looked `unk...` was indicating that you could specify multiple defaults, but in fact the splat is just an implementation trick. And following `Base.get` perhaps it's called `default`.

Should have no functional changes at all.

Co-authored-by: Michael Abbott &lt;32575566+mcabbott@users.noreply.github.com&gt;
diff --git a/docs/src/data/onehot.md b/docs/src/data/onehot.md
@@ -6,15 +6,15 @@ It's common to encode categorical variables (like `true`, `false` or `cat`, `dog
 julia> using Flux: onehot, onecold
 
 julia> onehot(:b, [:a, :b, :c])
-3-element Flux.OneHotVector{3,UInt32}:
- 0
+3-element OneHotVector(::UInt32) with eltype Bool:
+ ⋅
  1
- 0
+ ⋅
 
 julia> onehot(:c, [:a, :b, :c])
-3-element Flux.OneHotVector{3,UInt32}:
- 0
- 0
+3-element OneHotVector(::UInt32) with eltype Bool:
+ ⋅
+ ⋅
  1
 ```
 
@@ -44,16 +44,16 @@ Flux.onecold
 julia> using Flux: onehotbatch
 
 julia> onehotbatch([:b, :a, :b], [:a, :b, :c])
-3×3 Flux.OneHotArray{3,2,Vector{UInt32}}:
- 0  1  0
- 1  0  1
- 0  0  0
-
-julia> onecold(ans, [:a, :b, :c])	
-3-element Vector{Symbol}:	
- :b	
- :a	
- :b   
+3×3 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
+ ⋅  1  ⋅
+ 1  ⋅  1
+ ⋅  ⋅  ⋅
+
+julia> onecold(ans, [:a, :b, :c])
+3-element Vector{Symbol}:
+ :b
+ :a
+ :b
 ```
 
 Note that these operations returned `OneHotVector` and `OneHotMatrix` rather than `Array`s. `OneHotVector`s behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood.
diff --git a/src/losses/functions.jl b/src/losses/functions.jl
@@ -115,9 +115,9 @@ of label smoothing to binary distributions encoded in a single number.
 # Example
 ```jldoctest
 julia> y = Flux.onehotbatch([1, 1, 1, 0, 1, 0], 0:1)
-2×6 Flux.OneHotArray{2,2,Vector{UInt32}}:
- 0  0  0  1  0  1
- 1  1  1  0  1  0
+2×6 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
+ ⋅  ⋅  ⋅  1  ⋅  1
+ 1  1  1  ⋅  1  ⋅
 
 julia> y_smoothed = Flux.label_smoothing(y, 0.2f0)
 2×6 Matrix{Float32}:
@@ -180,10 +180,10 @@ See also: [`logitcrossentropy`](@ref), [`binarycrossentropy`](@ref), [`logitbina
 # Example
 ```jldoctest
 julia> y_label = Flux.onehotbatch([0, 1, 2, 1, 0], 0:2)
-3×5 Flux.OneHotArray{3,2,Vector{UInt32}}:
- 1  0  0  0  1
- 0  1  0  1  0
- 0  0  1  0  0
+3×5 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
+ 1  ⋅  ⋅  ⋅  1
+ ⋅  1  ⋅  1  ⋅
+ ⋅  ⋅  1  ⋅  ⋅
 
 julia> y_model = softmax(reshape(-7:7, 3, 5) .* 1f0)
 3×5 Matrix{Float32}:
@@ -232,10 +232,10 @@ See also: [`binarycrossentropy`](@ref), [`logitbinarycrossentropy`](@ref), [`lab
 # Example
 ```jldoctest
 julia> y_label = Flux.onehotbatch(collect("abcabaa"), 'a':'c')
-3×7 Flux.OneHotArray{3,2,Vector{UInt32}}:
- 1  0  0  1  0  1  1
- 0  1  0  0  1  0  0
- 0  0  1  0  0  0  0
+3×7 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
+ 1  ⋅  ⋅  1  ⋅  1  1
+ ⋅  1  ⋅  ⋅  1  ⋅  ⋅
+ ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅
 
 julia> y_model = reshape(vcat(-9:0, 0:9, 7.5f0), 3, 7)
 3×7 Matrix{Float32}:
@@ -291,9 +291,9 @@ julia> all(p -> 0 < p < 1, y_prob[2,:])  # else DomainError
 true
 
 julia> y_hot = Flux.onehotbatch(y_bin, 0:1)
-2×3 Flux.OneHotArray{2,2,Vector{UInt32}}:
- 0  1  0
- 1  0  1
+2×3 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
+ ⋅  1  ⋅
+ 1  ⋅  1
 
 julia> Flux.crossentropy(y_prob, y_hot)
 0.43989f0
diff --git a/src/onehot.jl b/src/onehot.jl
@@ -1,6 +1,12 @@
 import Adapt
 import .CUDA
 
+"""
+    OneHotArray{T,L,N,M,I} <: AbstractArray{Bool,M}
+
+These are constructed by [`onehot`](@ref) and [`onehotbatch`](@ref).
+Parameter `I` is the type of the underlying storage, and `T` its eltype.
+"""
 struct OneHotArray{T<:Integer, L, N, var"N+1", I<:Union{T, AbstractArray{T, N}}} <: AbstractArray{Bool, var"N+1"}
   indices::I
 end
@@ -15,31 +21,11 @@ _indices(x::Base.ReshapedArray{<: Any, <: Any, <: OneHotArray}) =
 const OneHotVector{T, L} = OneHotArray{T, L, 0, 1, T}
 const OneHotMatrix{T, L, I} = OneHotArray{T, L, 1, 2, I}
 
+@doc @doc(OneHotArray)
 OneHotVector(idx, L) = OneHotArray(idx, L)
+@doc @doc(OneHotArray)
 OneHotMatrix(indices, L) = OneHotArray(indices, L)
 
-function _show_elements(x::OneHotArray)
-  xbool = convert(Array{Bool}, cpu(x))
-  xrepr = join(split(repr(MIME("text/plain"), xbool; context= :limit => true), "\n")[2:end], "\n")
-
-  return xrepr
-end
-
-function Base.show(io::IO, ::MIME"text/plain", x::OneHotArray{<:Any, L, <:Any, N, I}) where {L, N, I}
-  join(io, string.(size(x)), "×")
-  print(io, " Flux.OneHotArray{")
-  join(io, string.([L, N, I]), ",")
-  println(io, "}:")
-  print(io, _show_elements(x))
-end
-function Base.show(io::IO, ::MIME"text/plain", x::OneHotVector{T, L}) where {T, L}
-  print(io, string.(length(x)))
-  print(io, "-element Flux.OneHotVector{")
-  join(io, string.([L, T]), ",")
-  println(io, "}:")
-  print(io, _show_elements(x))
-end
-
 # use this type so reshaped arrays hit fast paths
 # e.g. argmax
 const OneHotLike{T, L, N, var"N+1", I} =
@@ -61,6 +47,25 @@ Base.getindex(x::OneHotArray{<:Any, L}, ::Colon, I...) where L = OneHotArray(x.i
 Base.getindex(x::OneHotArray{<:Any, <:Any, <:Any, N}, ::Vararg{Colon, N}) where N = x
 Base.getindex(x::OneHotArray, I::CartesianIndex{N}) where N = x[I[1], Tuple(I)[2:N]...]
 
+function Base.showarg(io::IO, x::OneHotArray, toplevel)
+    print(io, ndims(x) == 1 ? "OneHotVector(" : ndims(x) == 2 ? "OneHotMatrix(" : "OneHotArray(")
+    Base.showarg(io, x.indices, false)
+    print(io, ')')
+    toplevel && print(io, " with eltype Bool")
+    return nothing
+end
+
+# this is from /LinearAlgebra/src/diagonal.jl, official way to print the dots:
+function Base.replace_in_print_matrix(x::OneHotLike, i::Integer, j::Integer, s::AbstractString)
+    x[i,j] ? s : _isonehot(x) ? Base.replace_with_centered_mark(s) : s
+end
+
+# copy CuArray versions back before trying to print them:
+Base.print_array(io::IO, X::OneHotLike{T, L, N, var"N+1", <:CuArray}) where {T, L, N, var"N+1"} = 
+  Base.print_array(io, cpu(X))
+Base.print_array(io::IO, X::LinearAlgebra.AdjOrTrans{Bool, <:OneHotLike{T, L, N, var"N+1", <:CuArray}}) where {T, L, N, var"N+1"} = 
+  Base.print_array(io, cpu(X))
+
 _onehot_bool_type(x::OneHotLike{<:Any, <:Any, <:Any, N, <:Union{Integer, AbstractArray}}) where N = Array{Bool, N}
 _onehot_bool_type(x::OneHotLike{<:Any, <:Any, <:Any, N, <:CuArray}) where N = CuArray{Bool, N}
 
@@ -95,72 +100,104 @@ Base.argmax(x::OneHotLike; dims = Colon()) =
     invoke(argmax, Tuple{AbstractArray}, x; dims = dims)
 
 """
-    onehot(l, labels[, unk])
+    onehot(x, labels, [default])
+
+Return a `OneHotVector` which is roughly a sparse representation of `x .== labels`.
 
-Return a `OneHotVector` where only first occourence of `l` in `labels` is `1` and
-all other elements are `0`.
+Instead of storing say `Vector{Bool}`, it stores the index of the first occurrence 
+of `x` in `labels`. If `x` is not found in labels, then it either returns `onehot(default, labels)`,
+or gives an error if no default is given.
 
-If `l` is not found in labels and  `unk` is present, the function returns
-`onehot(unk, labels)`; otherwise the function raises an error.
+See also [`onehotbatch`](@ref) to apply this to many `x`s, 
+and [`onecold`](@ref) to reverse either of these, as well as to generalise `argmax`.
 
 # Examples
 ```jldoctest
-julia> Flux.onehot(:b, [:a, :b, :c])
-3-element Flux.OneHotVector{3,UInt32}:
- 0
+julia> β = Flux.onehot(:b, [:a, :b, :c])
+3-element OneHotVector(::UInt32) with eltype Bool:
+ ⋅
  1
- 0
+ ⋅
 
-julia> Flux.onehot(:c, [:a, :b, :c])
-3-element Flux.OneHotVector{3,UInt32}:
- 0
- 0
- 1
+julia> αβγ = (Flux.onehot(0, 0:2), β, Flux.onehot(:z, [:a, :b, :c], :c))  # uses default
+(Bool[1, 0, 0], Bool[0, 1, 0], Bool[0, 0, 1])
+
+julia> hcat(αβγ...)  # preserves sparsity
+3×3 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
+ 1  ⋅  ⋅
+ ⋅  1  ⋅
+ ⋅  ⋅  1
 ```
 """
-function onehot(l, labels)
-  i = something(findfirst(isequal(l), labels), 0)
-  i > 0 || error("Value $l is not in labels")
+function onehot(x, labels)
+  i = something(findfirst(isequal(x), labels), 0)
+  i > 0 || error("Value $x is not in labels")
   OneHotVector{UInt32, length(labels)}(i)
 end
 
-function onehot(l, labels, unk)
-  i = something(findfirst(isequal(l), labels), 0)
-  i > 0 || return onehot(unk, labels)
+function onehot(x, labels, default)
+  i = something(findfirst(isequal(x), labels), 0)
+  i > 0 || return onehot(default, labels)
   OneHotVector{UInt32, length(labels)}(i)
 end
 
 """
-    onehotbatch(ls, labels[, unk...])
+    onehotbatch(xs, labels, [default])
+
+Returns a `OneHotMatrix` where `k`th column of the matrix is [`onehot(xs[k], labels)`](@ref onehot).
+This is a sparse matrix, which stores just a `Vector{UInt32}` containing the indices of the
+nonzero elements.
 
-Return a `OneHotMatrix` where `k`th column of the matrix is `onehot(ls[k], labels)`.
+If one of the inputs in `xs` is not found in `labels`, that column is `onehot(default, labels)`
+if `default` is given, else an error.
 
-If one of the input labels `ls` is not found in `labels` and `unk` is given,
-return [`onehot(unk, labels)`](@ref) ; otherwise the function will raise an error.
+If `xs` has more dimensions, `M = ndims(xs) > 1`, then the result is an 
+`AbstractArray{Bool, M+1}` which is one-hot along the first dimension, 
+i.e. `result[:, k...] == onehot(xs[k...], labels)`.
 
 # Examples
 ```jldoctest
-julia> Flux.onehotbatch([:b, :a, :b], [:a, :b, :c])
-3×3 Flux.OneHotArray{3,2,Vector{UInt32}}:
- 0  1  0
- 1  0  1
- 0  0  0
+julia> oh = Flux.onehotbatch(collect("abracadabra"), 'a':'e', 'e')
+5×11 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
+ 1  ⋅  ⋅  1  ⋅  1  ⋅  1  ⋅  ⋅  1
+ ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅
+ ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
+ ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅
+ ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅
+
+julia> reshape(1:15, 3, 5) * oh  # this matrix multiplication is done efficiently
+3×11 Matrix{Int64}:
+ 1  4  13  1  7  1  10  1  4  13  1
+ 2  5  14  2  8  2  11  2  5  14  2
+ 3  6  15  3  9  3  12  3  6  15  3
 ```
 """
-onehotbatch(ls, labels, unk...) = batch([onehot(l, labels, unk...) for l in ls])
+onehotbatch(ls, labels, default...) = batch([onehot(l, labels, default...) for l in ls])
 
 """
-    onecold(y[, labels = 1:length(y)])
+    onecold(y::AbstractArray, labels = 1:size(y,1))
 
-Inverse operations of [`onehot`](@ref).
+Roughly the inverse operation of [`onehot`](@ref) or [`onehotbatch`](@ref): 
+This finds the index of the largest element of `y`, or each column of `y`, 
+and looks them up in `labels`.
+
+If `labels` are not specified, the default is integers `1:size(y,1)` --
+the same operation as `argmax(y, dims=1)` but sometimes a different return type.
 
 # Examples
 ```jldoctest
-julia> Flux.onecold([true, false, false], [:a, :b, :c])
-:a
+julia> Flux.onecold([false, true, false])
+2
 
 julia> Flux.onecold([0.3, 0.2, 0.5], [:a, :b, :c])
 :c
+
+julia> Flux.onecold([ 1  0  0  1  0  1  0  1  0  0  1
+                      0  1  0  0  0  0  0  0  1  0  0
+                      0  0  0  0  1  0  0  0  0  0  0
+                      0  0  0  0  0  0  1  0  0  0  0
+                      0  0  1  0  0  0  0  0  0  1  0 ], 'a':'e') |> String
+"abeacadabea"
 ```
 """
 onecold(y::AbstractVector, labels = 1:length(y)) = labels[argmax(y)]