From 0461bc1935b99da4e5ea89e0e38aab0faf2d3879 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 10:37:31 +0200
Subject: [PATCH 01/29] allow bin width to be given to fixed rect bin directly

---
 .../rectangular_binning.jl                    | 35 +++++++++++++------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 25d90a5b1..2e8231e7e 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -39,27 +39,32 @@ const ValidFixedBinInputs = Union{Number, NTuple}
 
 """
     FixedRectangularBinning <: AbstractBinning
-    FixedRectangularBinning(ϵmin::NTuple, ϵmax::NTuple, N::Int)
+    FixedRectangularBinning(ϵmin::NTuple, ϵmax::NTuple, z::Union{Real, Int})
 
 Rectangular box partition of state space where the extent of the grid is explicitly
-specified by `ϵmin` and `emax`, and along each dimension, the grid is subdivided into `N`
-subintervals. Points falling outside the partition do not contribute to probabilities.
+specified by `ϵmin` and `emax`, and along each dimension.
+
+The third argument `subdiv` decides how the grid is subdivided:
+if it is an integer, the grid is subdivided into that many subintervals.
+Otherwise it is a real, and it is taken as the edge width.
+
+Points falling outside the partition do not contribute to probabilities.
 This binning type leads to a well-defined outcome space without knowledge of input data,
 see [`ValueHistogram`](@ref).
 
 `ϵmin`/`emax` must be `NTuple{D, <:Real}` for input of `D`-dimensional data.
 
-    FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N::Int, D::Int = 1)
+    FixedRectangularBinning(ϵmin::Real, ϵmax::Real, subdiv, D::Int = 1)
 
 This is a convenience method where each dimension of the binning has the same extent
 and the input data are `D` dimensional, which defaults to 1 (timeseries).
 """
-struct FixedRectangularBinning{D,T<:Real} <: AbstractBinning
+struct FixedRectangularBinning{D,T<:Real,S<:Union{<:Real,Int}} <: AbstractBinning
     ϵmin::NTuple{D,T}
     ϵmax::NTuple{D,T}
-    N::Int
+    subdiv::S
 end
-function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N::Int, D::Int = 1)
+function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N, D::Int = 1)
     FixedRectangularBinning(ntuple(x->ϵmin, D), ntuple(x->ϵmax, D), N)
 end
 
@@ -76,7 +81,9 @@ The second signature does not need `x` because (1) the binning is fixed, and the
 size of `x` doesn't matter, and (2) because the binning contains the dimensionality
 information as `ϵmin/max` is already an `NTuple`.
 
-Due to roundoff error when computing bin edges, the computed bin widths
+When specifying `n::Int` amount of bins instead of directly providing an
+edge with in the `binning`, to roundoff errors typically occur when computing bin edges.
+In this case, the computed bin widths
 are increased to their `nextfloat` `n_eps` times
 to ensure the correct number of bins is produced.
 
@@ -175,9 +182,15 @@ function RectangularBinEncoding(b::FixedRectangularBinning{D, T}; n_eps = 2) whe
     ϵmin, ϵmax = b.ϵmin, b.ϵmax
     mini = SVector{D, T}(ϵmin)
     maxi = SVector{D, T}(ϵmax)
-    edgelengths_nonadjusted = @. (maxi .- mini) / b.N
-    edgelengths = nextfloat.(edgelengths_nonadjusted, n_eps)
-    histsize = SVector{D,Int}(fill(b.N, D))
+    if b.subdiv isa Real
+        edgelengths = one(SVector{D, T}) .* b.subdiv
+        histsize = ceil.(Int, (maxi .- mini) ./ edgelengths)
+    elseif b.subdiv isa Int
+        N = b.subdiv
+        edgelengths_nonadjusted = @. (maxi .- mini) / N
+        edgelengths = nextfloat.(edgelengths_nonadjusted, n_eps)
+        histsize = SVector{D,Int}(fill(N, D))
+    end
     ci = CartesianIndices(Tuple(histsize))
     li = LinearIndices(ci)
     RectangularBinEncoding(b, typeof(edgelengths)(mini), edgelengths, histsize, ci, li)

From e0edcdf9533d164904a529a72c4ad0c0cfeacd53 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 10:38:10 +0200
Subject: [PATCH 02/29] remove incorrect edge application of nextfloat for
 given edge

---
 src/encoding_implementations/rectangular_binning.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 2e8231e7e..870978bf0 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -146,7 +146,7 @@ function RectangularBinEncoding(b::RectangularBinning, x; n_eps = 2)
     v = ones(SVector{D,T})
     if ϵ isa Float64 || ϵ isa AbstractVector{<:AbstractFloat}
         edgelengths = SVector{D,T}(ϵ .* v)
-        histsize = ceil.(Int, nextfloat.((maxi .- mini), n_eps) ./ edgelengths)
+        histsize = ceil.(Int, (maxi .- mini) ./ edgelengths)
     elseif ϵ isa Int || ϵ isa Vector{Int}
         edgeslengths_nonadjusted = @. (maxi - mini)/ϵ
         # Just taking nextfloat once here isn't enough for bins to cover data when using

From 37514e88c5a43bd24024a3f78792adf9ac411b38 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 10:40:31 +0200
Subject: [PATCH 03/29] better doc for fixed rect bin

---
 src/encoding_implementations/rectangular_binning.jl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 870978bf0..5e4471a5c 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -39,14 +39,15 @@ const ValidFixedBinInputs = Union{Number, NTuple}
 
 """
     FixedRectangularBinning <: AbstractBinning
-    FixedRectangularBinning(ϵmin::NTuple, ϵmax::NTuple, z::Union{Real, Int})
+    FixedRectangularBinning(xmin::NTuple, xmax::NTuple, subdiv::Union{Real, Int})
 
 Rectangular box partition of state space where the extent of the grid is explicitly
-specified by `ϵmin` and `emax`, and along each dimension.
+specified by by providing the minium `xmin` and maximum `xmax`
+values along each dimension.
 
 The third argument `subdiv` decides how the grid is subdivided:
-if it is an integer, the grid is subdivided into that many subintervals.
-Otherwise it is a real, and it is taken as the edge width.
+if it is an integer, each dimension is subdivided into that many subintervals.
+Otherwise it is a real, and it is taken as the edge width for each dimension.
 
 Points falling outside the partition do not contribute to probabilities.
 This binning type leads to a well-defined outcome space without knowledge of input data,
@@ -54,7 +55,7 @@ see [`ValueHistogram`](@ref).
 
 `ϵmin`/`emax` must be `NTuple{D, <:Real}` for input of `D`-dimensional data.
 
-    FixedRectangularBinning(ϵmin::Real, ϵmax::Real, subdiv, D::Int = 1)
+    FixedRectangularBinning(xmin::Real, xmax::Real, subdiv, D::Int = 1)
 
 This is a convenience method where each dimension of the binning has the same extent
 and the input data are `D` dimensional, which defaults to 1 (timeseries).

From 9933870263319ca64a52d8ddcd18c3770b032d98 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 11:21:16 +0200
Subject: [PATCH 04/29] better docuemnt and clarify bins

---
 src/encoding_implementations/rectangular_binning.jl | 10 +++++++---
 src/probabilities_estimators/value_histogram.jl     | 11 ++++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 5e4471a5c..bce8f4a19 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -50,8 +50,12 @@ if it is an integer, each dimension is subdivided into that many subintervals.
 Otherwise it is a real, and it is taken as the edge width for each dimension.
 
 Points falling outside the partition do not contribute to probabilities.
-This binning type leads to a well-defined outcome space without knowledge of input data,
-see [`ValueHistogram`](@ref).
+Since bins are always left-closed-right-open `[a, b)`, keep this in mind
+when specifying bin width directly. I.e., if `xmax = 1`,
+a data point with value `1` does not fall into any bins!
+
+`FixedRectangularBinning` leads to a well-defined outcome space without knowledge of input
+data, see [`ValueHistogram`](@ref).
 
 `ϵmin`/`emax` must be `NTuple{D, <:Real}` for input of `D`-dimensional data.
 
@@ -184,7 +188,7 @@ function RectangularBinEncoding(b::FixedRectangularBinning{D, T}; n_eps = 2) whe
     mini = SVector{D, T}(ϵmin)
     maxi = SVector{D, T}(ϵmax)
     if b.subdiv isa Real
-        edgelengths = one(SVector{D, T}) .* b.subdiv
+        edgelengths = ones(SVector{D, T}) .* b.subdiv
         histsize = ceil.(Int, (maxi .- mini) ./ edgelengths)
     elseif b.subdiv isa Int
         N = b.subdiv
diff --git a/src/probabilities_estimators/value_histogram.jl b/src/probabilities_estimators/value_histogram.jl
index cbddb4063..240427229 100644
--- a/src/probabilities_estimators/value_histogram.jl
+++ b/src/probabilities_estimators/value_histogram.jl
@@ -27,11 +27,12 @@ and initializes this binning directly.
 ## Outcomes
 
 The outcome space for `ValueHistogram` is the unique bins constructed
-from `b`. Each bin is identified by its left (lowest-value) corner.
+from `b`. Each bin is identified by its left (lowest-value) corner,
+because bins are always left-closed-right-open intervals `[a, b)`.
 The bins are in data units, not integer (cartesian indices units), and
 are returned as `SVector`s.
-For [`FixedRectangularBinning`](@ref) this is well-defined from the binning, but for
-[`RectangularBinning`](@ref) input `x` is needed for a well-defined [`outcome_space`](@ref).
+For [`FixedRectangularBinning`](@ref) the [`outcome_space`](@ref) is well-defined from the
+binning, but for [`RectangularBinning`](@ref) input `x` is needed as well.
 """
 struct ValueHistogram{B<:AbstractBinning} <: ProbabilitiesEstimator
     binning::B
@@ -63,3 +64,7 @@ function probabilities_and_outcomes(est::ValueHistogram, x)
 end
 
 outcome_space(est::ValueHistogram, x) = outcome_space(RectangularBinEncoding(est.binning, x))
+
+function outcome_space(est::ValueHistogram{<:FixedRectangularBinning})
+    return outcome_space(RectangularBinEncoding(est.binning))
+end
\ No newline at end of file

From a180cd0ffbdda7e1f6cd4f577eab9a4798e51f14 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 12:43:13 +0200
Subject: [PATCH 05/29] compelte rewrite of binnings based on ranges

---
 .../rectangular_binning.jl                    | 202 ++++++++++--------
 .../estimators/value_histogram.jl             |   2 +-
 2 files changed, 116 insertions(+), 88 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index bce8f4a19..5a9ed3819 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -19,6 +19,8 @@ Rectangular box partition of state space using the scheme `ϵ`,
 deducing the coordinates of the grid axis minima from the input data.
 Generally it is preferred to use [`FixedRectangularBinning`](@ref) instead,
 as it has a well defined outcome space without knowledge of input data.
+`RectangularBinning` is re-cast into [`FixedRectangularBinning`](@ref)
+once the data are provided, so see that docstring for info on the bin calculation.
 
 Binning instructions are deduced from the type of `ϵ` as follows:
 
@@ -35,114 +37,112 @@ struct RectangularBinning{E} <: AbstractBinning
     ϵ::E
 end
 
-const ValidFixedBinInputs = Union{Number, NTuple}
-
 """
     FixedRectangularBinning <: AbstractBinning
-    FixedRectangularBinning(xmin::NTuple, xmax::NTuple, subdiv::Union{Real, Int})
+    FixedRectangularBinning(ranges::Tuple{<:AbstractRange...}, precise = false)
 
-Rectangular box partition of state space where the extent of the grid is explicitly
-specified by by providing the minium `xmin` and maximum `xmax`
-values along each dimension.
+Rectangular box partition of state space where the partition along each dimension
+is explicitly given by each range `ranges`, which is a tuple of `AbstractRange` subtypes.
+Typically, each range is the output of the `range` Base function, e.g.,
+`ranges = (0:0.1:1, range(0, 1; length = 101), range(2.1, 3.2; step = 0.33))`.
+All ranges must be sorted.
 
-The third argument `subdiv` decides how the grid is subdivided:
-if it is an integer, each dimension is subdivided into that many subintervals.
-Otherwise it is a real, and it is taken as the edge width for each dimension.
+The optional second argument `precise` dictates whether Julia Base's `TwicePrecision`
+is used for when searching where a point falls into the range.
+Only useful for edge cases of points being almost exactly on the bin edges,
+and comes with a performance downside, so by default it is `false`.
 
 Points falling outside the partition do not contribute to probabilities.
-Since bins are always left-closed-right-open `[a, b)`, keep this in mind
-when specifying bin width directly. I.e., if `xmax = 1`,
-a data point with value `1` does not fall into any bins!
+Bins are always left-closed-right-open `[a, b)`.
+**This means that the last value of
+each of the ranges dictates the last right-closing value.**
+This value does _not_ belong to the histogram!
+E.g., if given a range `r = range(0, 1; length = 11)`, with `r[end] = 1`,
+the value `1` is outside the partition and would not attribute any
+increase of the probability corresponding to the last bin (here `[0.9, 1)`)!
+
+Equivalently, the size of the histogram is
+`histsize = map(r -> length(r)-1, ranges)`.
 
 `FixedRectangularBinning` leads to a well-defined outcome space without knowledge of input
 data, see [`ValueHistogram`](@ref).
 
-`ϵmin`/`emax` must be `NTuple{D, <:Real}` for input of `D`-dimensional data.
+    FixedRectangularBinning(range::AbstractRange, D::Int = 1, precise = false)
 
-    FixedRectangularBinning(xmin::Real, xmax::Real, subdiv, D::Int = 1)
-
-This is a convenience method where each dimension of the binning has the same extent
+This is a convenience method where each dimension of the binning has the same range
 and the input data are `D` dimensional, which defaults to 1 (timeseries).
 """
-struct FixedRectangularBinning{D,T<:Real,S<:Union{<:Real,Int}} <: AbstractBinning
-    ϵmin::NTuple{D,T}
-    ϵmax::NTuple{D,T}
-    subdiv::S
+struct FixedRectangularBinning{R<:Tuple} <: AbstractBinning
+    ranges::R
+    precise::Bool
+end
+function FixedRectangularBinning(ranges::R, precise = false) where {R}
+    if any(r -> !issorted(r), ranges)
+        throw(ArgumentError("All input ranges must be sorted!"))
+    end
+    return FixedRectangularBinning{R}(ranges, precise)
+end
+# Convenience
+function FixedRectangularBinning(r::AbstractRange, D::Int = 1, precise = false)
+    FixedRectangularBinning(ntuple(x->r, D), precise)
+end
+
+# Deprecations
+function FixedRectangularBinning(ϵmin::NTuple{D,T}, ϵmax::NTuple{D,T}, N::Int)
+    FixedRectangularBinning(ntuple(x->range(ϵmin,ϵmax;length=N), D))
 end
 function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N, D::Int = 1)
-    FixedRectangularBinning(ntuple(x->ϵmin, D), ntuple(x->ϵmax, D), N)
+    if N isa Int
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; length = N), D))
+    else
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; step = N), D))
+    end
 end
 
 """
     RectangularBinEncoding <: Encoding
-    RectangularBinEncoding(binning::RectangularBinning, x; n_eps = 2)
-    RectangularBinEncoding(binning::FixedRectangularBinning; n_eps = 2)
+    RectangularBinEncoding(binning::RectangularBinning, x)
+    RectangularBinEncoding(binning::FixedRectangularBinning)
 
 An encoding scheme that [`encode`](@ref)s points `χ ∈ x` into their histogram bins.
-It finds the minima along each dimension, and computes appropriate
-edge lengths for each dimension of `x` given a rectangular binning.
 
-The second signature does not need `x` because (1) the binning is fixed, and the
-size of `x` doesn't matter, and (2) because the binning contains the dimensionality
-information as `ϵmin/max` is already an `NTuple`.
+The first call signature simply initializes a [`FixedRectangularBinning`](@ref)
+and then calls the second call signature.
 
-When specifying `n::Int` amount of bins instead of directly providing an
-edge with in the `binning`, to roundoff errors typically occur when computing bin edges.
-In this case, the computed bin widths
-are increased to their `nextfloat` `n_eps` times
-to ensure the correct number of bins is produced.
-
-See also: [`RectangularBinning`](@ref), [`FixedRectangularBinning`](@ref).
+See [`FixedRectangularBinning`](@ref) for info on mapping points to bins.
 """
-struct RectangularBinEncoding{B, D, T, C, L} <: HistogramEncoding
-    binning::B # either RectangularBinning or FixedRectangularBinning
-    mini::SVector{D,T} # fields are either static vectors or numbers
-    edgelengths::SVector{D,T}
-    histsize::SVector{D,Int}
+struct RectangularBinEncoding{R<:Tuple, C, L} <: HistogramEncoding
+    ranges::R
+    precise::Bool
+    histsize::NTuple{D,Int}
     ci::C # cartesian indices
     li::L # linear indices
 end
 
 function Base.show(io::IO, x::RectangularBinEncoding)
     return print(io, "RectangularBinEncoding\n" *
-        "  box corners: $(x.mini)\n" *
-        "  edgelengths: $(x.edgelengths)\n" *
+        "  ranges: $(x.ranges)\n" *
         "  histogram size: $(x.histsize)"
     )
 end
 
-function encode(e::RectangularBinEncoding, point)
-    (; mini, edgelengths) = e
-    # Map a data point to its bin edge (plus one because indexing starts from 1)
-    bin = floor.(Int, (point .- mini) ./ edgelengths) .+ 1
-    cartidx = CartesianIndex(Tuple(bin))
-    # We have decided on the arbitrary convention that out of bound points
-    # will get the special symbol `-1`. Erroring doesn't make sense as it is expected
-    # that for fixed histograms there may be points outside of them.
-    if checkbounds(Bool, e.li, cartidx)
-        return @inbounds e.li[cartidx]
-    else
-        return -1
-    end
-end
+##################################################################
+# Initialization of encoding
+##################################################################
+# input data not needed here
+RectangularBinEncoding(b::FixedRectangularBinning, x) =
+RectangularBinEncoding(b::FixedRectangularBinning)
 
-function decode(e::RectangularBinEncoding{B, D, T}, bin::Int) where {B, D, T}
-    V = SVector{D,T}
-    if checkbounds(Bool, e.ci, bin)
-        @inbounds cartesian = e.ci[bin]
-    else
-        error("Cannot decode integer $(bin): out of bounds of underlying histogram.")
-    end
-    (; mini, edgelengths) = e
-    # Remove one because we want lowest value corner, and we get indices starting from 1
-    return (V(Tuple(cartesian)) .- 1) .* edgelengths .+ mini
+function RectangularBinEncoding(b::FixedRectangularBinning)
+    ranges = b.binning.ranges
+    histsize = map(r -> length(r)-1, ranges)
+    ci = CartesianIndices(Tuple(histsize))
+    li = LinearIndices(ci)
+    RectangularBinEncoding(ranges, b.precise, histsize, ci, li)
 end
 
-##################################################################
-# Initialization of encodings
-##################################################################
-# Data-controlled grid
-function RectangularBinEncoding(b::RectangularBinning, x; n_eps = 2)
+# Data-controlled grid: just cast into FixesRectangularBinning
+function RectangularBinEncoding(b::RectangularBinning, x)
     # This function always returns static vectors and is type stable
     D = dimension(x)
     T = eltype(x)
@@ -150,26 +150,17 @@ function RectangularBinEncoding(b::RectangularBinning, x; n_eps = 2)
     mini, maxi = minmaxima(x)
     v = ones(SVector{D,T})
     if ϵ isa Float64 || ϵ isa AbstractVector{<:AbstractFloat}
-        edgelengths = SVector{D,T}(ϵ .* v)
-        histsize = ceil.(Int, (maxi .- mini) ./ edgelengths)
+        widths = SVector{D,T}(ϵ .* v)
+        ranges = ntuple(range(mini[i], maxi[i]; step = widths[i]), D)
     elseif ϵ isa Int || ϵ isa Vector{Int}
-        edgeslengths_nonadjusted = @. (maxi - mini)/ϵ
-        # Just taking nextfloat once here isn't enough for bins to cover data when using
-        # `encode_as_bin` later, because subtraction and division leads to loss
-        # of precision. We need a slightly bigger number, so apply nextfloat twice.
-        edgelengths = SVector{D,T}(nextfloat.(edgeslengths_nonadjusted, n_eps))
-        if ϵ isa Vector{Int}
-            histsize = SVector{D, Int}(ϵ)
-        else
-            histsize = SVector{D, Int}(fill(ϵ, D))
-        end
+        lengths = ϵ .* ones(SVector{D,Int})
+        ranges = ntuple(range(mini[i], maxi[i]; length = lengths[i]), D)
     else
         error("Invalid ϵ for binning of a dataset")
     end
-    # Cartesian indices of the underlying histogram
-    ci = CartesianIndices(Tuple(histsize))
-    li = LinearIndices(ci)
-    RectangularBinEncoding(b, mini, edgelengths, histsize, ci, li)
+    # By default we have the imprecise version here;
+    # use `Fixed` if you want precise
+    return RectangularBinEncoding(FixedRectangularBinning(ranges))
 end
 
 # fixed grid
@@ -201,6 +192,43 @@ function RectangularBinEncoding(b::FixedRectangularBinning{D, T}; n_eps = 2) whe
     RectangularBinEncoding(b, typeof(edgelengths)(mini), edgelengths, histsize, ci, li)
 end
 
+##################################################################
+# encode/decode
+##################################################################
+function encode(e::RectangularBinEncoding, point)
+    ranges = e.ranges
+    if e.precise
+        cartidx = CartesianIndex(map(searchsortedlast, ranges, Tuple(point)))
+    else
+        # These are extracted at compile time and are `Tuple`s
+        edgelengths = map(step, ranges)
+        mini = map(minimum, range)
+        # Map a data point to its bin edge (plus one because indexing starts from 1)
+        bin = floor.(Int, (point .- mini) ./ edgelengths) .+ 1
+        cartidx = CartesianIndex(bin)
+    end
+    # We have decided on the arbitrary convention that out of bound points
+    # will get the special symbol `-1`. Erroring doesn't make sense as it is expected
+    # that for fixed histograms there may be points outside of them.
+    if checkbounds(Bool, e.li, cartidx)
+        return @inbounds e.li[cartidx]
+    else
+        return -1
+    end
+end
+
+function decode(e::RectangularBinEncoding{R}, bin::Int) where {R}
+    V = SVector{length(R), eltype(first(e.ranges))}
+    if checkbounds(Bool, e.ci, bin)
+        @inbounds cartesian = e.ci[bin]
+    else
+        error("Cannot decode integer $(bin): out of bounds of underlying histogram.")
+    end
+    (; mini, edgelengths) = e
+    # Remove one because we want lowest value corner, and we get indices starting from 1
+    return (V(Tuple(cartesian)) .- 1) .* edgelengths .+ mini
+end
+
 ##################################################################
 # Outcomes / total outcomes
 ##################################################################
diff --git a/test/probabilities/estimators/value_histogram.jl b/test/probabilities/estimators/value_histogram.jl
index 2d743f4cd..0f2ef49bd 100644
--- a/test/probabilities/estimators/value_histogram.jl
+++ b/test/probabilities/estimators/value_histogram.jl
@@ -88,7 +88,7 @@ end
     n = 10 # boxes cover 0 - 1 in steps of slightly more than 0.1
     ε = nextfloat(0.1, 2) # this guarantees that we get the same as the `n` above!
 
-    bin = FixedRectangularBinning(0, 1, n, 2)
+    bin = FixedRectangularBinning(0.0, 1.0, n, 2)
 
     est = ValueHistogram(bin)
     p = probabilities(est, x)

From 1aa1547fc8e2e86881f2ac15ac40378eeb92005e Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 12:52:32 +0200
Subject: [PATCH 06/29] make compilable

---
 .../rectangular_binning.jl                    | 56 ++++++-------------
 1 file changed, 16 insertions(+), 40 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 5a9ed3819..58bc054cd 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -53,7 +53,7 @@ Only useful for edge cases of points being almost exactly on the bin edges,
 and comes with a performance downside, so by default it is `false`.
 
 Points falling outside the partition do not contribute to probabilities.
-Bins are always left-closed-right-open `[a, b)`.
+Bins are always left-closed-right-open: `[a, b)`.
 **This means that the last value of
 each of the ranges dictates the last right-closing value.**
 This value does _not_ belong to the histogram!
@@ -61,8 +61,8 @@ E.g., if given a range `r = range(0, 1; length = 11)`, with `r[end] = 1`,
 the value `1` is outside the partition and would not attribute any
 increase of the probability corresponding to the last bin (here `[0.9, 1)`)!
 
-Equivalently, the size of the histogram is
-`histsize = map(r -> length(r)-1, ranges)`.
+**Equivalently, the size of the histogram is
+`histsize = map(r -> length(r)-1, ranges)`!**
 
 `FixedRectangularBinning` leads to a well-defined outcome space without knowledge of input
 data, see [`ValueHistogram`](@ref).
@@ -88,8 +88,8 @@ function FixedRectangularBinning(r::AbstractRange, D::Int = 1, precise = false)
 end
 
 # Deprecations
-function FixedRectangularBinning(ϵmin::NTuple{D,T}, ϵmax::NTuple{D,T}, N::Int)
-    FixedRectangularBinning(ntuple(x->range(ϵmin,ϵmax;length=N), D))
+function FixedRectangularBinning(ϵmin::NTuple{D,T}, ϵmax::NTuple{D,T}, N::Int) where {D, T}
+    FixedRectangularBinning(ntuple(x->range(ϵmin[i],ϵmax[i];length=N), D))
 end
 function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N, D::Int = 1)
     if N isa Int
@@ -111,7 +111,7 @@ and then calls the second call signature.
 
 See [`FixedRectangularBinning`](@ref) for info on mapping points to bins.
 """
-struct RectangularBinEncoding{R<:Tuple, C, L} <: HistogramEncoding
+struct RectangularBinEncoding{R<:Tuple, D, C, L} <: HistogramEncoding
     ranges::R
     precise::Bool
     histsize::NTuple{D,Int}
@@ -129,17 +129,22 @@ end
 ##################################################################
 # Initialization of encoding
 ##################################################################
-# input data not needed here
-RectangularBinEncoding(b::FixedRectangularBinning, x) =
-RectangularBinEncoding(b::FixedRectangularBinning)
-
+# Fixed grid
 function RectangularBinEncoding(b::FixedRectangularBinning)
-    ranges = b.binning.ranges
+    ranges = b.ranges
     histsize = map(r -> length(r)-1, ranges)
     ci = CartesianIndices(Tuple(histsize))
     li = LinearIndices(ci)
     RectangularBinEncoding(ranges, b.precise, histsize, ci, li)
 end
+function RectangularBinEncoding(b::FixedRectangularBinning, x)
+    if length(e.ranges) != dimension(x)
+        throw(ArgumentError("""
+        The dimensionality of the `FixedRectangularBinning` and input `x` do not match.
+        Got $(e.ranges) and $(dimension(x))."""))
+    end
+    return RectangularBinEncoding(b)
+end
 
 # Data-controlled grid: just cast into FixesRectangularBinning
 function RectangularBinEncoding(b::RectangularBinning, x)
@@ -163,35 +168,6 @@ function RectangularBinEncoding(b::RectangularBinning, x)
     return RectangularBinEncoding(FixedRectangularBinning(ranges))
 end
 
-# fixed grid
-function RectangularBinEncoding(b::FixedRectangularBinning{D}, x; n_eps = 2) where {D}
-    if D != dimension(x)
-        throw(ArgumentError("""
-        The dimensionality of the `FixedRectangularBinning` and input `x` do not match.
-        Got $(D) and $(dimension(x))."""))
-    end
-    return RectangularBinEncoding(b; n_eps)
-end
-
-function RectangularBinEncoding(b::FixedRectangularBinning{D, T}; n_eps = 2) where {D,T}
-    # This function always returns static vectors and is type stable
-    ϵmin, ϵmax = b.ϵmin, b.ϵmax
-    mini = SVector{D, T}(ϵmin)
-    maxi = SVector{D, T}(ϵmax)
-    if b.subdiv isa Real
-        edgelengths = ones(SVector{D, T}) .* b.subdiv
-        histsize = ceil.(Int, (maxi .- mini) ./ edgelengths)
-    elseif b.subdiv isa Int
-        N = b.subdiv
-        edgelengths_nonadjusted = @. (maxi .- mini) / N
-        edgelengths = nextfloat.(edgelengths_nonadjusted, n_eps)
-        histsize = SVector{D,Int}(fill(N, D))
-    end
-    ci = CartesianIndices(Tuple(histsize))
-    li = LinearIndices(ci)
-    RectangularBinEncoding(b, typeof(edgelengths)(mini), edgelengths, histsize, ci, li)
-end
-
 ##################################################################
 # encode/decode
 ##################################################################

From 8c988c0b2429b3ed7a00babdf0ad9451f15a0f2b Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:03:21 +0200
Subject: [PATCH 07/29] super simple outcome space

---
 .../rectangular_binning.jl                    | 25 +++++++++++--------
 .../value_histogram.jl                        |  5 +++-
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 58bc054cd..035ce64fd 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -148,7 +148,6 @@ end
 
 # Data-controlled grid: just cast into FixesRectangularBinning
 function RectangularBinEncoding(b::RectangularBinning, x)
-    # This function always returns static vectors and is type stable
     D = dimension(x)
     T = eltype(x)
     ϵ = b.ϵ
@@ -177,10 +176,10 @@ function encode(e::RectangularBinEncoding, point)
         cartidx = CartesianIndex(map(searchsortedlast, ranges, Tuple(point)))
     else
         # These are extracted at compile time and are `Tuple`s
-        edgelengths = map(step, ranges)
-        mini = map(minimum, range)
+        widths = map(step, ranges)
+        mini = map(minimum, ranges)
         # Map a data point to its bin edge (plus one because indexing starts from 1)
-        bin = floor.(Int, (point .- mini) ./ edgelengths) .+ 1
+        bin = floor.(Int, (point .- mini) ./ widths) .+ 1
         cartidx = CartesianIndex(bin)
     end
     # We have decided on the arbitrary convention that out of bound points
@@ -193,16 +192,18 @@ function encode(e::RectangularBinEncoding, point)
     end
 end
 
-function decode(e::RectangularBinEncoding{R}, bin::Int) where {R}
-    V = SVector{length(R), eltype(first(e.ranges))}
+function decode(e::RectangularBinEncoding, bin::Int)
     if checkbounds(Bool, e.ci, bin)
         @inbounds cartesian = e.ci[bin]
     else
         error("Cannot decode integer $(bin): out of bounds of underlying histogram.")
     end
-    (; mini, edgelengths) = e
+    ranges = e.ranges
+    V = SVector{length(ranges), eltype(float(first(ranges)))}
+    widths = map(step, ranges)
+    mini = map(minimum, ranges)
     # Remove one because we want lowest value corner, and we get indices starting from 1
-    return (V(Tuple(cartesian)) .- 1) .* edgelengths .+ mini
+    return (V(Tuple(cartesian)) .- 1) .* widths .+ mini
 end
 
 ##################################################################
@@ -211,9 +212,13 @@ end
 total_outcomes(e::RectangularBinEncoding) = prod(e.histsize)
 
 function outcome_space(e::RectangularBinEncoding)
-    # this is super simple :P could be optimized but its not a frequent operation
-    return [decode(e, i) for i in 1:total_outcomes(e)]
+    # This is super simple thanks to using ranges :)
+    # (and super performant)
+    reduced_ranges = map(r -> r[1:end-1], e.ranges)
+    return collect(Iterators.product(reduced_ranges...))
 end
+outcome_space(b::AbstractBinning, args...) =
+outcome_space(RectangularBinEncoding(b, args...))
 
 ##################################################################
 # low level histogram call
diff --git a/src/probabilities_estimators/value_histogram.jl b/src/probabilities_estimators/value_histogram.jl
index 240427229..df74104ac 100644
--- a/src/probabilities_estimators/value_histogram.jl
+++ b/src/probabilities_estimators/value_histogram.jl
@@ -30,7 +30,10 @@ The outcome space for `ValueHistogram` is the unique bins constructed
 from `b`. Each bin is identified by its left (lowest-value) corner,
 because bins are always left-closed-right-open intervals `[a, b)`.
 The bins are in data units, not integer (cartesian indices units), and
-are returned as `SVector`s.
+are returned as `Tuple`s. For convenience, [`outcome_space`](@ref)
+returns the outcomes in the same array format as the histogram
+(e.g., `Matrix` for 2D input).
+
 For [`FixedRectangularBinning`](@ref) the [`outcome_space`](@ref) is well-defined from the
 binning, but for [`RectangularBinning`](@ref) input `x` is needed as well.
 """

From 6c3f14eab13788010fb6c893f766ba307be71a9b Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:04:03 +0200
Subject: [PATCH 08/29] move deprecations to their appropriate file

---
 src/deprecations.jl                                 | 13 +++++++++++++
 src/encoding_implementations/rectangular_binning.jl | 12 ------------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/deprecations.jl b/src/deprecations.jl
index 73476c099..cd590d0a8 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -1,3 +1,16 @@
+# from before histogram-from-ranges rework
+function FixedRectangularBinning(ϵmin::NTuple{D,T}, ϵmax::NTuple{D,T}, N::Int) where {D, T}
+    FixedRectangularBinning(ntuple(x->range(ϵmin[i],ϵmax[i];length=N), D))
+end
+function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N, D::Int = 1)
+    if N isa Int
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; length = N), D))
+    else
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; step = N), D))
+    end
+end
+
+
 # from before https://github.com/JuliaDynamics/ComplexityMeasures.jl/pull/239
 function entropy(e::EntropyDefinition, est::DiffEntropyEst, x)
     if e isa Shannon
diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 035ce64fd..a1829099c 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -87,18 +87,6 @@ function FixedRectangularBinning(r::AbstractRange, D::Int = 1, precise = false)
     FixedRectangularBinning(ntuple(x->r, D), precise)
 end
 
-# Deprecations
-function FixedRectangularBinning(ϵmin::NTuple{D,T}, ϵmax::NTuple{D,T}, N::Int) where {D, T}
-    FixedRectangularBinning(ntuple(x->range(ϵmin[i],ϵmax[i];length=N), D))
-end
-function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N, D::Int = 1)
-    if N isa Int
-        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; length = N), D))
-    else
-        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; step = N), D))
-    end
-end
-
 """
     RectangularBinEncoding <: Encoding
     RectangularBinEncoding(binning::RectangularBinning, x)

From 119ce689edd84ba0fc1ffaf32e5eb3a26bc7bd6b Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:06:40 +0200
Subject: [PATCH 09/29] more clear docs for rect bin

---
 .../rectangular_binning.jl                      | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index a1829099c..7df603744 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -7,20 +7,19 @@ abstract type HistogramEncoding <: Encoding end
 ##################################################################
 # Structs and docstrings
 ##################################################################
-# Notice that the binning types are intermediate structs that are not used directly
-# in the source code. Their only purpose is instructions of how to create a
-# `RectangularBinEncoder`. All actual source code functionality of `ValueHistogram`
-# is implemented based on `RectangularBinEncoder`.
+# The binning types are intermediate structs whose only purpose is
+# instructions for initializing the `RectangularBinEncoding`
 
 """
     RectangularBinning(ϵ) <: AbstractBinning
 
 Rectangular box partition of state space using the scheme `ϵ`,
-deducing the coordinates of the grid axis minima from the input data.
-Generally it is preferred to use [`FixedRectangularBinning`](@ref) instead,
-as it has a well defined outcome space without knowledge of input data.
-`RectangularBinning` is re-cast into [`FixedRectangularBinning`](@ref)
-once the data are provided, so see that docstring for info on the bin calculation.
+deducing the histogram extent and bin width from the input data.
+
+`RectangularBinning` is a convenience struct.
+It is re-cast into [`FixedRectangularBinning`](@ref)
+once the data are provided, so see that docstring for info on the bin calculation
+and for the possibility of more precision during the histogram calculation.
 
 Binning instructions are deduced from the type of `ϵ` as follows:
 

From 9ae366795e499990dcce542f36ca46cf84c9d8e1 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:16:05 +0200
Subject: [PATCH 10/29] move deprecations to file

---
 src/ComplexityMeasures.jl                          |  4 +++-
 .../rectangular_binning.jl                         | 14 ++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/ComplexityMeasures.jl b/src/ComplexityMeasures.jl
index 19968dba2..01e019376 100644
--- a/src/ComplexityMeasures.jl
+++ b/src/ComplexityMeasures.jl
@@ -21,7 +21,6 @@ include("encodings.jl")
 include("complexity.jl")
 include("multiscale.jl")
 include("convenience.jl")
-include("deprecations.jl")
 
 # Library implementations (files include other files)
 include("encoding_implementations/encoding_implementations.jl")
@@ -30,6 +29,9 @@ include("entropies_definitions/entropies_definitions.jl")
 include("entropies_estimators/entropies_estimators.jl")
 include("complexity_measures/complexity_measures.jl")
 
+# deprecations (must be after all declarations)
+include("deprecations.jl")
+
 # Update messages:
 using Scratch
 display_update = false
diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 7df603744..eac7ae1c0 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -65,23 +65,25 @@ increase of the probability corresponding to the last bin (here `[0.9, 1)`)!
 
 `FixedRectangularBinning` leads to a well-defined outcome space without knowledge of input
 data, see [`ValueHistogram`](@ref).
-
-    FixedRectangularBinning(range::AbstractRange, D::Int = 1, precise = false)
-
-This is a convenience method where each dimension of the binning has the same range
-and the input data are `D` dimensional, which defaults to 1 (timeseries).
 """
 struct FixedRectangularBinning{R<:Tuple} <: AbstractBinning
     ranges::R
     precise::Bool
 end
+
 function FixedRectangularBinning(ranges::R, precise = false) where {R}
     if any(r -> !issorted(r), ranges)
         throw(ArgumentError("All input ranges must be sorted!"))
     end
     return FixedRectangularBinning{R}(ranges, precise)
 end
-# Convenience
+
+"""
+    FixedRectangularBinning(range::AbstractRange, D::Int = 1, precise = false)
+
+This is a convenience method where each dimension of the binning has the same range
+and the input data are `D` dimensional, which defaults to 1 (timeseries).
+"""
 function FixedRectangularBinning(r::AbstractRange, D::Int = 1, precise = false)
     FixedRectangularBinning(ntuple(x->r, D), precise)
 end

From c89427b7d9ff38dfbd2c1b910079c8886f2bd0c4 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:20:15 +0200
Subject: [PATCH 11/29] allow geting FRB from RB

---
 .../rectangular_binning.jl                         | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index eac7ae1c0..aeba450b7 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -18,8 +18,7 @@ deducing the histogram extent and bin width from the input data.
 
 `RectangularBinning` is a convenience struct.
 It is re-cast into [`FixedRectangularBinning`](@ref)
-once the data are provided, so see that docstring for info on the bin calculation
-and for the possibility of more precision during the histogram calculation.
+once the data are provided, so see that docstring for info on the bin calculation.
 
 Binning instructions are deduced from the type of `ϵ` as follows:
 
@@ -31,6 +30,9 @@ Binning instructions are deduced from the type of `ϵ` as follows:
     intervals that cover all data.
 4. `ϵ::Vector{Float64}` divides the i-th coordinate axis into intervals of fixed size
     `ϵ[i]`, starting from the axis minima until the data is completely covered by boxes.
+
+To ensure all data are covered, the `nextfloat` of the data maximum value is used
+as the maximum along each dimension.
 """
 struct RectangularBinning{E} <: AbstractBinning
     ϵ::E
@@ -137,13 +139,19 @@ end
 
 # Data-controlled grid: just cast into FixesRectangularBinning
 function RectangularBinEncoding(b::RectangularBinning, x)
+    RectangularBinEncoding(FixedRectangularBinning(b, x))
+end
+function FixedRectangularBinning(b::RectangularBinning, x)
     D = dimension(x)
     T = eltype(x)
     ϵ = b.ϵ
     mini, maxi = minmaxima(x)
+    # use `nextfloat` to ensure all data are covered!
+    maxi = nextfloat.(maxi)
     v = ones(SVector{D,T})
     if ϵ isa Float64 || ϵ isa AbstractVector{<:AbstractFloat}
         widths = SVector{D,T}(ϵ .* v)
+        # use `nextfloat` to ensure all data are covered!
         ranges = ntuple(range(mini[i], maxi[i]; step = widths[i]), D)
     elseif ϵ isa Int || ϵ isa Vector{Int}
         lengths = ϵ .* ones(SVector{D,Int})
@@ -153,7 +161,7 @@ function RectangularBinEncoding(b::RectangularBinning, x)
     end
     # By default we have the imprecise version here;
     # use `Fixed` if you want precise
-    return RectangularBinEncoding(FixedRectangularBinning(ranges))
+    return FixedRectangularBinning(ranges)
 end
 
 ##################################################################

From ac40e535ee0a81fc17afae46fa42f86e0ebebc18 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:27:00 +0200
Subject: [PATCH 12/29] use `nextfloat` to ensure promise

---
 src/encoding_implementations/rectangular_binning.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index aeba450b7..707382aae 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -129,7 +129,7 @@ function RectangularBinEncoding(b::FixedRectangularBinning)
     RectangularBinEncoding(ranges, b.precise, histsize, ci, li)
 end
 function RectangularBinEncoding(b::FixedRectangularBinning, x)
-    if length(e.ranges) != dimension(x)
+    if length(b.ranges) != dimension(x)
         throw(ArgumentError("""
         The dimensionality of the `FixedRectangularBinning` and input `x` do not match.
         Got $(e.ranges) and $(dimension(x))."""))
@@ -176,7 +176,9 @@ function encode(e::RectangularBinEncoding, point)
         widths = map(step, ranges)
         mini = map(minimum, ranges)
         # Map a data point to its bin edge (plus one because indexing starts from 1)
-        bin = floor.(Int, (point .- mini) ./ widths) .+ 1
+        # we also use `Tuple` because `point` is `SVector` and the broadcast
+        # below results in  `Vector` if you mix `Tuple` with `SVector`.
+        bin = floor.(Int, (Tuple(point) .- mini) ./ widths) .+ 1
         cartidx = CartesianIndex(bin)
     end
     # We have decided on the arbitrary convention that out of bound points

From c4fd1def9a0e535aa6e2c699a67bd7224de894ef Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:47:30 +0200
Subject: [PATCH 13/29] increase performance of inprecise version

---
 .../rectangular_binning.jl                    | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 707382aae..63e1237d5 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -50,8 +50,8 @@ All ranges must be sorted.
 
 The optional second argument `precise` dictates whether Julia Base's `TwicePrecision`
 is used for when searching where a point falls into the range.
-Only useful for edge cases of points being almost exactly on the bin edges,
-and comes with a performance downside, so by default it is `false`.
+Useful for edge cases of points being almost exactly on the bin edges,
+and is exactly twice as slow, so by default it is `false`.
 
 Points falling outside the partition do not contribute to probabilities.
 Bins are always left-closed-right-open: `[a, b)`.
@@ -102,10 +102,12 @@ and then calls the second call signature.
 
 See [`FixedRectangularBinning`](@ref) for info on mapping points to bins.
 """
-struct RectangularBinEncoding{R<:Tuple, D, C, L} <: HistogramEncoding
+struct RectangularBinEncoding{R<:Tuple, D, T, C, L} <: HistogramEncoding
     ranges::R
     precise::Bool
-    histsize::NTuple{D,Int}
+    mini::SVector{D, T}
+    widths::SVector{D, T}
+    histsize::NTuple{D, Int}
     ci::C # cartesian indices
     li::L # linear indices
 end
@@ -124,9 +126,13 @@ end
 function RectangularBinEncoding(b::FixedRectangularBinning)
     ranges = b.ranges
     histsize = map(r -> length(r)-1, ranges)
+    D = length(ranges)
+    T = float(eltype(first(ranges)))
+    mini = SVector{D,T}(map(minimum, ranges))
+    widths = SVector{D,T}(map(step, ranges))
     ci = CartesianIndices(Tuple(histsize))
     li = LinearIndices(ci)
-    RectangularBinEncoding(ranges, b.precise, histsize, ci, li)
+    RectangularBinEncoding(ranges, b.precise, mini, widths, histsize, ci, li)
 end
 function RectangularBinEncoding(b::FixedRectangularBinning, x)
     if length(b.ranges) != dimension(x)
@@ -170,16 +176,11 @@ end
 function encode(e::RectangularBinEncoding, point)
     ranges = e.ranges
     if e.precise
+        # Don't know how to make this faster unfurtunately...
         cartidx = CartesianIndex(map(searchsortedlast, ranges, Tuple(point)))
     else
-        # These are extracted at compile time and are `Tuple`s
-        widths = map(step, ranges)
-        mini = map(minimum, ranges)
-        # Map a data point to its bin edge (plus one because indexing starts from 1)
-        # we also use `Tuple` because `point` is `SVector` and the broadcast
-        # below results in  `Vector` if you mix `Tuple` with `SVector`.
-        bin = floor.(Int, (Tuple(point) .- mini) ./ widths) .+ 1
-        cartidx = CartesianIndex(bin)
+        bin = floor.(Int, (point .- e.mini) ./ e.widths) .+ 1
+        cartidx = CartesianIndex(Tuple(bin))
     end
     # We have decided on the arbitrary convention that out of bound points
     # will get the special symbol `-1`. Erroring doesn't make sense as it is expected

From 053a905ebe08ee5ceada8d9f17b994b6bab0a8e2 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 13:58:10 +0200
Subject: [PATCH 14/29] correct extraction of length of range

---
 src/encoding_implementations/rectangular_binning.jl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 63e1237d5..5a9c9fa3f 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -158,10 +158,12 @@ function FixedRectangularBinning(b::RectangularBinning, x)
     if ϵ isa Float64 || ϵ isa AbstractVector{<:AbstractFloat}
         widths = SVector{D,T}(ϵ .* v)
         # use `nextfloat` to ensure all data are covered!
-        ranges = ntuple(range(mini[i], maxi[i]; step = widths[i]), D)
+        ranges = ntuple(i -> range(mini[i], maxi[i]; step = widths[i]), D)
     elseif ϵ isa Int || ϵ isa Vector{Int}
-        lengths = ϵ .* ones(SVector{D,Int})
-        ranges = ntuple(range(mini[i], maxi[i]; length = lengths[i]), D)
+        # We add one, because the user input specifies the number of bins,
+        # and the number of bins is the range length - 1
+        lengths = ϵ .* ones(SVector{D,Int}) .+ 1
+        ranges = ntuple(i -> range(mini[i], maxi[i]; length = lengths[i]), D)
     else
         error("Invalid ϵ for binning of a dataset")
     end

From e3e082dad1969431142b09d7ac72ba188655115e Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 14:46:49 +0200
Subject: [PATCH 15/29] re-write tests

---
 .../rectangular_binning.jl                    |  37 ++++--
 .../value_histogram.jl                        |   6 +-
 .../estimators/value_histogram.jl             | 121 +++++++++---------
 3 files changed, 92 insertions(+), 72 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 5a9c9fa3f..118630fad 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -11,14 +11,15 @@ abstract type HistogramEncoding <: Encoding end
 # instructions for initializing the `RectangularBinEncoding`
 
 """
-    RectangularBinning(ϵ) <: AbstractBinning
+    RectangularBinning(ϵ, precise = false) <: AbstractBinning
 
 Rectangular box partition of state space using the scheme `ϵ`,
 deducing the histogram extent and bin width from the input data.
 
 `RectangularBinning` is a convenience struct.
 It is re-cast into [`FixedRectangularBinning`](@ref)
-once the data are provided, so see that docstring for info on the bin calculation.
+once the data are provided, so see that docstring for info on the bin calculation
+and the meaning of `precise`.
 
 Binning instructions are deduced from the type of `ϵ` as follows:
 
@@ -32,11 +33,14 @@ Binning instructions are deduced from the type of `ϵ` as follows:
     `ϵ[i]`, starting from the axis minima until the data is completely covered by boxes.
 
 To ensure all data are covered, the `nextfloat` of the data maximum value is used
-as the maximum along each dimension.
+as the maximum along each dimension. However this will lead to 100% accurate results
+only when `precise = true`.
 """
 struct RectangularBinning{E} <: AbstractBinning
     ϵ::E
+    precise::Bool
 end
+RectangularBinning(ε) = RectangularBinning(ε, false)
 
 """
     FixedRectangularBinning <: AbstractBinning
@@ -51,7 +55,7 @@ All ranges must be sorted.
 The optional second argument `precise` dictates whether Julia Base's `TwicePrecision`
 is used for when searching where a point falls into the range.
 Useful for edge cases of points being almost exactly on the bin edges,
-and is exactly twice as slow, so by default it is `false`.
+but it is exactly four times as slow, so by default it is `false`.
 
 Points falling outside the partition do not contribute to probabilities.
 Bins are always left-closed-right-open: `[a, b)`.
@@ -157,7 +161,6 @@ function FixedRectangularBinning(b::RectangularBinning, x)
     v = ones(SVector{D,T})
     if ϵ isa Float64 || ϵ isa AbstractVector{<:AbstractFloat}
         widths = SVector{D,T}(ϵ .* v)
-        # use `nextfloat` to ensure all data are covered!
         ranges = ntuple(i -> range(mini[i], maxi[i]; step = widths[i]), D)
     elseif ϵ isa Int || ϵ isa Vector{Int}
         # We add one, because the user input specifies the number of bins,
@@ -169,7 +172,7 @@ function FixedRectangularBinning(b::RectangularBinning, x)
     end
     # By default we have the imprecise version here;
     # use `Fixed` if you want precise
-    return FixedRectangularBinning(ranges)
+    return FixedRectangularBinning(ranges, b.precise)
 end
 
 ##################################################################
@@ -198,14 +201,20 @@ function decode(e::RectangularBinEncoding, bin::Int)
     if checkbounds(Bool, e.ci, bin)
         @inbounds cartesian = e.ci[bin]
     else
-        error("Cannot decode integer $(bin): out of bounds of underlying histogram.")
+        throw(ArgumentError(
+            "Cannot decode integer $(bin): out of bounds of underlying binning."
+        ))
     end
+    # The decoding step is rather trivial here; we just index the ranges at the index
     ranges = e.ranges
-    V = SVector{length(ranges), eltype(float(first(ranges)))}
-    widths = map(step, ranges)
-    mini = map(minimum, ranges)
+    D = length(ranges)
+    left_edge = @inbounds ntuple(i -> ranges[i][cartesian[i]], D)
+    return SVector{D}(left_edge)
+    # widths = e.widths
+    # mini = e.mini
+    # V = SVector{length(ranges), eltype(float(first(ranges)))}
     # Remove one because we want lowest value corner, and we get indices starting from 1
-    return (V(Tuple(cartesian)) .- 1) .* widths .+ mini
+    # return (V(Tuple(cartesian)) .- 1) .* widths .+ mini
 end
 
 ##################################################################
@@ -215,9 +224,11 @@ total_outcomes(e::RectangularBinEncoding) = prod(e.histsize)
 
 function outcome_space(e::RectangularBinEncoding)
     # This is super simple thanks to using ranges :)
-    # (and super performant)
     reduced_ranges = map(r -> r[1:end-1], e.ranges)
-    return collect(Iterators.product(reduced_ranges...))
+    iter = Iterators.product(reduced_ranges...)
+    # Convert to `SVector` because that's the agreed outcome space type
+    V = SVector{length(e.ranges), eltype(float(first(e.ranges)))}
+    return V.(iter)
 end
 outcome_space(b::AbstractBinning, args...) =
 outcome_space(RectangularBinEncoding(b, args...))
diff --git a/src/probabilities_estimators/value_histogram.jl b/src/probabilities_estimators/value_histogram.jl
index df74104ac..59ff304b4 100644
--- a/src/probabilities_estimators/value_histogram.jl
+++ b/src/probabilities_estimators/value_histogram.jl
@@ -30,8 +30,10 @@ The outcome space for `ValueHistogram` is the unique bins constructed
 from `b`. Each bin is identified by its left (lowest-value) corner,
 because bins are always left-closed-right-open intervals `[a, b)`.
 The bins are in data units, not integer (cartesian indices units), and
-are returned as `Tuple`s. For convenience, [`outcome_space`](@ref)
-returns the outcomes in the same array format as the histogram
+are returned as `SVector`s, i.e., same type as input data.
+
+For convenience, [`outcome_space`](@ref)
+returns the outcomes in the same array format as the underlying binning
 (e.g., `Matrix` for 2D input).
 
 For [`FixedRectangularBinning`](@ref) the [`outcome_space`](@ref) is well-defined from the
diff --git a/test/probabilities/estimators/value_histogram.jl b/test/probabilities/estimators/value_histogram.jl
index 0f2ef49bd..f12a1b9d9 100644
--- a/test/probabilities/estimators/value_histogram.jl
+++ b/test/probabilities/estimators/value_histogram.jl
@@ -2,29 +2,43 @@ using ComplexityMeasures
 using Test
 using Random
 
-@testset "Rectangular binning" begin
+@testset "Standard ranges binning" begin
 
     x = Dataset(rand(Random.MersenneTwister(1234), 100_000, 2))
     push!(x, SVector(0, 0)) # ensure both 0 and 1 have values in, exactly.
     push!(x, SVector(1, 1))
-    # All these binnings should give approximately same probabilities
-    n = 10 # boxes cover 0 - 1 in steps of slightly more than 0.1
-    ε = nextfloat(0.1, 2) # this guarantees that we get the same as the `n` above!
 
+    # All these binnings give exactly the same ranges because of the 0-1 values
+    # The value 1 is always included, due to the call of `nextfloat` when making
+    # ranges for the `FixedRectangularBinning`
+    n = 10
+    ε = nextfloat(0.1)
+
+    # All following binnings are equivalent
     binnings = [
-        n,
-        ε,
         RectangularBinning(n),
+        RectangularBinning(n, true),
         RectangularBinning(ε),
         RectangularBinning([n, n]),
-        RectangularBinning([ε, ε])
+        RectangularBinning([ε, ε]),
+        FixedRectangularBinning(range(0, nextfloat(1.0); length = n+1), 2),
+        FixedRectangularBinning(range(0, nextfloat(1.0); length = n+1), 2, true),
+        FixedRectangularBinning(
+            (range(0, nextfloat(1.0); step = ε), range(0, nextfloat(1.0); step = ε))
+        ),
+
     ]
 
     for bin in binnings
-        @testset "bin isa $(typeof(bin))" begin
+        @testset "bin isa $(nameof(typeof(bin)))" begin
             est = ValueHistogram(bin)
+            out = outcome_space(est, x)
+            @test length(out) == n^2
+
             p = probabilities(est, x)
+            # all bins are covered due to random data
             @test length(p) == 100
+            # ensure uniform coverage since input is uniformly random
             @test all(e -> 0.009 ≤ e ≤ 0.011, p)
 
             p2, o = probabilities_and_outcomes(est, x)
@@ -34,14 +48,16 @@ using Random
             @test all(x -> x < 1, maximum(o))
             o2 = outcomes(est, x)
             @test o2 == o
-        end
-    end
 
-    @testset "Check rogue 1s" begin
-        b = RectangularBinning(0.1) # no `nextfloat` here, so the rogue (1, 1) is in extra bin!
-        p = probabilities(ValueHistogram(b), x)
-        @test length(p) == 100 + 1
-        @test p[end] ≈ 1/100_000 atol = 1e-5
+            ospace = outcome_space(est, x)
+            @test ospace isa Matrix{SVector{2, Float64}}
+            @test size(ospace) == (n,n)
+            @test SVector(0.0, 0.0) ∈ ospace
+
+            # ensure 1 is included, and must also be in the last bin
+            rbe = RectangularBinEncoding(bin, x)
+            @test encode(rbe, SVector(1.0, 1.0)) == n^2
+        end
     end
 
     @testset "vector" begin
@@ -56,51 +72,42 @@ using Random
         end
     end
 
-    # An extra bin might appear due to roundoff error after using nextfloat when
-    # constructing `RectangularBinEncoding`s.
-    # The following tests ensure with *some* certainty that this does not occur.
-    @testset "Rogue extra bins" begin
-        # Concrete examples where a rogue extra bin has appeared.
-        x1 = [0.5213236385155418, 0.03516318860292644, 0.5437726723245310, 0.52598710966469610, 0.34199879802511246, 0.6017129426606275, 0.6972844365031351, 0.89163995617220900, 0.39009862510518045, 0.06296038912844315, 0.9897176284081909, 0.7935001082966890, 0.890198448900077700, 0.11762640519877565, 0.7849413168095061, 0.13768932585886573, 0.50869900547793430, 0.18042178201388548, 0.28507312391861270, 0.96480406570924970]
-        x2 = [0.4125754262679051, 0.52844411982339560, 0.4535277505543355, 0.25502420827802674, 0.77862522996085940, 0.6081939026664078, 0.2628674795466387, 0.18846258495465185, 0.93320375283233840, 0.40093871561247874, 0.8032730760974603, 0.3531608285217499, 0.018436525139752136, 0.55541857934068420, 0.9907521337888632, 0.15382361136212420, 0.01774321666660561, 0.67569337507728300, 0.06130971689608822, 0.31417161558476836]
-        N = 10
-        b = RectangularBinning(N)
-        rb1 = RectangularBinEncoding(b, x1; n_eps = 1)
-        rb2 = RectangularBinEncoding(b, x1; n_eps = 2)
-        @test encode(rb1, maximum(x1)) == -1 # shouldn't occur, but does when tolerance is too low
-        @test encode(rb2, maximum(x1)) == 10
-
-        rb1 = RectangularBinEncoding(b, x2; n_eps = 1)
-        rb2 = RectangularBinEncoding(b, x2; n_eps = 2)
-        @test encode(rb1, maximum(x2)) == -1 # shouldn't occur, but does when tolerance is too low
-        @test encode(rb2, maximum(x2)) == 10
+    @testset "convenience" begin
+        @test ValueHistogram(n) == ValueHistogram(RectangularBinning(n))
+        @test ValueHistogram(ε) == ValueHistogram(RectangularBinning(ε))
     end
 
 end
 
+@testset "Encodings, edge cases" begin
+    # Encoding has been practically tested fully in the codes above.
+    # here we just ensure the API works as expected and edge cases also
+    # work as expected.
+    # Concrete examples where a rogue extra bin has appeared.
+    x1 = [0.5213236385155418, 0.03516318860292644, 0.5437726723245310, 0.52598710966469610, 0.34199879802511246, 0.6017129426606275, 0.6972844365031351, 0.89163995617220900, 0.39009862510518045, 0.06296038912844315, 0.9897176284081909, 0.7935001082966890, 0.890198448900077700, 0.11762640519877565, 0.7849413168095061, 0.13768932585886573, 0.50869900547793430, 0.18042178201388548, 0.28507312391861270, 0.96480406570924970]
+    N = 10
+
+    b1 = RectangularBinning(N)
+    rb1 = RectangularBinEncoding(RectangularBinning(N, false), x1)
+    rb2 = RectangularBinEncoding(RectangularBinning(N, true), x1)
+
+    # With low accuracy we get this rounding error
+    @test encode(rb1, maximum(x1)) == -1
+    @test encode(rb2, maximum(x1)) == 10
+
+    x2 = [0.4125754262679051, 0.52844411982339560, 0.4535277505543355, 0.25502420827802674, 0.77862522996085940, 0.6081939026664078, 0.2628674795466387, 0.18846258495465185, 0.93320375283233840, 0.40093871561247874, 0.8032730760974603, 0.3531608285217499, 0.018436525139752136, 0.55541857934068420, 0.9907521337888632, 0.15382361136212420, 0.01774321666660561, 0.67569337507728300, 0.06130971689608822, 0.31417161558476836]
+    rb1 = RectangularBinEncoding(RectangularBinning(N, false), x2)
+    rb2 = RectangularBinEncoding(RectangularBinning(N, true), x2)
+    @test encode(rb1, maximum(x2)) == -1
+    @test encode(rb2, maximum(x2)) == 10
+
+    # and a final analytic test with decode
+    bin = FixedRectangularBinning(range(0, 1.0; length = 11), 2, true)
+    rbc = RectangularBinEncoding(bin)
+    @test encode(rbc, SVector(0.0, 0.0)) == 1
+    @test encode(rbc, SVector(1.0, 1.0)) == -1
+    @test decode(rbc, 1) == SVector(0.0, 0.0)
+    @test decode(rbc, 100) == SVector(0.9, 0.9)
+end
 
-@testset "Fixed Rectangular binning" begin
-
-    x = Dataset(rand(Random.MersenneTwister(1234), 100_000, 2))
-    push!(x, SVector(0, 0)) # ensure both 0 and 1 have values in, exactly.
-    push!(x, SVector(1, 1))
-    # All these binnings should give approximately same probabilities
-    n = 10 # boxes cover 0 - 1 in steps of slightly more than 0.1
-    ε = nextfloat(0.1, 2) # this guarantees that we get the same as the `n` above!
-
-    bin = FixedRectangularBinning(0.0, 1.0, n, 2)
-
-    est = ValueHistogram(bin)
-    p = probabilities(est, x)
-    @test length(p) == 100
-    @test all(e -> 0.009 ≤ e ≤ 0.011, p)
-
-    p2, o = probabilities_and_outcomes(est, x)
-    @test p2 == p
-    @test o isa Vector{SVector{2, Float64}}
-    @test length(o) == length(p)
-    @test all(x -> x < 1, maximum(o))
-    o2 = outcomes(est, x)
-    @test o2 == o
 
-end

From dd73c8c9ecb4808dbf8d433d5284ad4fa2d8baf4 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 15:03:56 +0200
Subject: [PATCH 16/29] more clarity in tests

---
 test/probabilities/estimators/value_histogram.jl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/test/probabilities/estimators/value_histogram.jl b/test/probabilities/estimators/value_histogram.jl
index f12a1b9d9..aac1d37f1 100644
--- a/test/probabilities/estimators/value_histogram.jl
+++ b/test/probabilities/estimators/value_histogram.jl
@@ -12,9 +12,10 @@ using Random
     # The value 1 is always included, due to the call of `nextfloat` when making
     # ranges for the `FixedRectangularBinning`
     n = 10
-    ε = nextfloat(0.1)
+    ε = nextfloat(0.1) # when dividing 0-nextfloat(1) into 10, you get this width
 
     # All following binnings are equivalent
+    # (`nextfloat` is necessary in Fixed, due to the promise given in the regular)
     binnings = [
         RectangularBinning(n),
         RectangularBinning(n, true),
@@ -26,8 +27,13 @@ using Random
         FixedRectangularBinning(
             (range(0, nextfloat(1.0); step = ε), range(0, nextfloat(1.0); step = ε))
         ),
-
     ]
+    # all reduce to these ranges (due to demanding ALL data to be in
+    # the histogram, i.e., also the `SVector(1,1)`)
+    casted_ranges = (
+        0.0:0.10000000000000002:1.0000000000000002,
+        0.0:0.10000000000000002:1.0000000000000002
+    )
 
     for bin in binnings
         @testset "bin isa $(nameof(typeof(bin)))" begin
@@ -57,6 +63,8 @@ using Random
             # ensure 1 is included, and must also be in the last bin
             rbe = RectangularBinEncoding(bin, x)
             @test encode(rbe, SVector(1.0, 1.0)) == n^2
+
+            @test rbe.ranges == casted_ranges
         end
     end
 

From 56df9dee476698cf682191eb00dcac06991572f4 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:37:25 +0200
Subject: [PATCH 17/29] fix diversityy

---
 src/deprecations.jl                       | 4 ++--
 src/probabilities_estimators/diversity.jl | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/deprecations.jl b/src/deprecations.jl
index cd590d0a8..6f2c81472 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -4,9 +4,9 @@ function FixedRectangularBinning(ϵmin::NTuple{D,T}, ϵmax::NTuple{D,T}, N::Int)
 end
 function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N, D::Int = 1)
     if N isa Int
-        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; length = N), D))
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, nextfloat(ϵmax); length = N), D))
     else
-        FixedRectangularBinning(ntuple(x-> range(ϵmin, ϵmax; step = N), D))
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, nextfloat(ϵmax); step = N), D))
     end
 end
 
diff --git a/src/probabilities_estimators/diversity.jl b/src/probabilities_estimators/diversity.jl
index 3304d1c51..7d2c1039c 100644
--- a/src/probabilities_estimators/diversity.jl
+++ b/src/probabilities_estimators/diversity.jl
@@ -21,7 +21,8 @@ Diversity probabilities are computed as follows.
 2. Compute ``D = \\{d(\\bf x_t, \\bf x_{t+1}) \\}_{t=1}^{N-mτ-1}``,
     where ``d(\\cdot, \\cdot)`` is the cosine similarity between two `m`-dimensional
     vectors in the embedding.
-3. Divide the interval `[-1, 1]` into `nbins` equally sized subintervals.
+3. Divide the interval `[-1, 1]` into `nbins` equally sized subintervals
+   (including the value `+1`).
 4. Construct a histogram of cosine similarities ``d \\in D`` over those subintervals.
 5. Sum-normalize the histogram to obtain probabilities.
 
@@ -70,5 +71,6 @@ end
 cosine_similarity(xᵢ, xⱼ) = sum(xᵢ .* xⱼ) / (sqrt(sum(xᵢ .^ 2)) * sqrt(sum(xⱼ .^ 2)))
 
 function encoding_for_diversity(nbins::Int)
-    RectangularBinEncoding(FixedRectangularBinning(-1.0, 1.0, nbins))
+    binning = FixedRectangularBinning((range(-1.0, nextfloat(1.0); length = nbins),))
+    return RectangularBinEncoding(binning)
 end

From bc9fa6af02828c405910e74f2e57d5711f051a42 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:37:35 +0200
Subject: [PATCH 18/29] remove `transferoperatorencoder` in favor of
 `RectangularBinEncoding`

---
 .../transfer_operator/transfer_operator.jl        | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index 9ca53b850..ed30d4b2a 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -179,17 +179,6 @@ struct TransferOperatorApproximationRectangular{
     visitors::Vector{Vector{Int}}
 end
 
-function transferoperatorencoder(
-    binning::Union{FixedRectangularBinning, RectangularBinning}, x::AbstractDataset
-)
-    if binning isa FixedRectangularBinning
-        return RectangularBinEncoding(binning)
-    elseif binning isa RectangularBinning
-        return RectangularBinEncoding(binning, x)
-    else
-        throw(ArgumentError("Binning $(typeof(binning)) not supported for transfer operator"))
-    end
-end
 """
     transferoperator(pts::AbstractDataset,
         binning::RectangularBinning) → TransferOperatorApproximationRectangular
@@ -217,7 +206,7 @@ function transferoperator(pts::AbstractDataset{D, T},
         boundary_condition = :circular) where {D, T<:Real}
 
     L = length(pts)
-    encoder = transferoperatorencoder(binning, pts)
+    encoder = RectangularBinEncoding(binning, pts)
 
     # The L points visits a total of L bins, which are the following bins (referenced
     # here as cartesian coordinates, not absolute bins):
@@ -479,7 +468,7 @@ function transfermatrix(iv::InvariantMeasure)
 end
 
 function probabilities_and_outcomes(est::TransferOperator, x::Array_or_Dataset)
-    encoder = transferoperatorencoder(est.binning, x)
+    encoder = RectangularBinEncoding(est.binning, x)
     to = transferoperator(x, encoder.binning)
     probs = invariantmeasure(to).ρ
 

From 90f654153318037f85cf5fccb59f8c5815b9a6b0 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:42:55 +0200
Subject: [PATCH 19/29] add catesian bin index internal function

---
 .../rectangular_binning.jl                    | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 118630fad..8c4178b58 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -179,14 +179,7 @@ end
 # encode/decode
 ##################################################################
 function encode(e::RectangularBinEncoding, point)
-    ranges = e.ranges
-    if e.precise
-        # Don't know how to make this faster unfurtunately...
-        cartidx = CartesianIndex(map(searchsortedlast, ranges, Tuple(point)))
-    else
-        bin = floor.(Int, (point .- e.mini) ./ e.widths) .+ 1
-        cartidx = CartesianIndex(Tuple(bin))
-    end
+
     # We have decided on the arbitrary convention that out of bound points
     # will get the special symbol `-1`. Erroring doesn't make sense as it is expected
     # that for fixed histograms there may be points outside of them.
@@ -197,6 +190,26 @@ function encode(e::RectangularBinEncoding, point)
     end
 end
 
+"""
+    cartesian_bin_index(e::RectangularBinEncoding, point::SVector)
+
+Internal function called by `encode`. Returns the cartesian index of
+the given `point` within the binning encapsulated in `e`.
+"""
+function cartesian_bin_index(e::RectangularBinEncoding, point)
+    ranges = e.ranges
+    if e.precise
+        # Don't know how to make this faster unfurtunately...
+        cartidx = CartesianIndex(map(searchsortedlast, ranges, Tuple(point)))
+    else
+        bin = floor.(Int, (point .- e.mini) ./ e.widths) .+ 1
+        cartidx = CartesianIndex(Tuple(bin))
+    end
+    return cartidx
+end
+
+
+
 function decode(e::RectangularBinEncoding, bin::Int)
     if checkbounds(Bool, e.ci, bin)
         @inbounds cartesian = e.ci[bin]

From 95557852b6cbd91e38e976ab18306812207382d4 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:43:51 +0200
Subject: [PATCH 20/29] actually call the function

---
 src/encoding_implementations/rectangular_binning.jl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 8c4178b58..18d83d885 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -179,7 +179,7 @@ end
 # encode/decode
 ##################################################################
 function encode(e::RectangularBinEncoding, point)
-
+    cartidx = cartesian_bin_index(e, point)
     # We have decided on the arbitrary convention that out of bound points
     # will get the special symbol `-1`. Erroring doesn't make sense as it is expected
     # that for fixed histograms there may be points outside of them.
@@ -193,8 +193,9 @@ end
 """
     cartesian_bin_index(e::RectangularBinEncoding, point::SVector)
 
-Internal function called by `encode`. Returns the cartesian index of
+Return the cartesian index of
 the given `point` within the binning encapsulated in `e`.
+Internal function called by `encode`.
 """
 function cartesian_bin_index(e::RectangularBinEncoding, point)
     ranges = e.ranges
@@ -208,8 +209,6 @@ function cartesian_bin_index(e::RectangularBinEncoding, point)
     return cartidx
 end
 
-
-
 function decode(e::RectangularBinEncoding, bin::Int)
     if checkbounds(Bool, e.ci, bin)
         @inbounds cartesian = e.ci[bin]

From 976ee77d3b11b9fed6fd9646d65a3b9af7fc5602 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:47:25 +0200
Subject: [PATCH 21/29] fix diversity: given bins + 1 to the range

---
 src/deprecations.jl                       | 4 ++--
 src/probabilities_estimators/diversity.jl | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/deprecations.jl b/src/deprecations.jl
index 6f2c81472..8a89fbcb4 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -4,9 +4,9 @@ function FixedRectangularBinning(ϵmin::NTuple{D,T}, ϵmax::NTuple{D,T}, N::Int)
 end
 function FixedRectangularBinning(ϵmin::Real, ϵmax::Real, N, D::Int = 1)
     if N isa Int
-        FixedRectangularBinning(ntuple(x-> range(ϵmin, nextfloat(ϵmax); length = N), D))
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, nextfloat(float(ϵmax)); length = N), D))
     else
-        FixedRectangularBinning(ntuple(x-> range(ϵmin, nextfloat(ϵmax); step = N), D))
+        FixedRectangularBinning(ntuple(x-> range(ϵmin, nextfloat(float(ϵmax)); step = N), D))
     end
 end
 
diff --git a/src/probabilities_estimators/diversity.jl b/src/probabilities_estimators/diversity.jl
index 7d2c1039c..c74c4840c 100644
--- a/src/probabilities_estimators/diversity.jl
+++ b/src/probabilities_estimators/diversity.jl
@@ -49,7 +49,7 @@ end
 
 function probabilities_and_outcomes(est::Diversity, x::AbstractVector{T}) where T <: Real
     ds, rbc = similarities_and_binning(est, x)
-    return probabilities_and_outcomes(ValueHistogram(rbc.binning), ds)
+    return probabilities_and_outcomes(rbc, ds)
 end
 
 outcome_space(est::Diversity) = outcome_space(encoding_for_diversity(est.nbins))
@@ -71,6 +71,6 @@ end
 cosine_similarity(xᵢ, xⱼ) = sum(xᵢ .* xⱼ) / (sqrt(sum(xᵢ .^ 2)) * sqrt(sum(xⱼ .^ 2)))
 
 function encoding_for_diversity(nbins::Int)
-    binning = FixedRectangularBinning((range(-1.0, nextfloat(1.0); length = nbins),))
+    binning = FixedRectangularBinning((range(-1.0, nextfloat(1.0); length = nbins+1),))
     return RectangularBinEncoding(binning)
 end

From 14e534aa608822f13b40e29f4d9e23a633320423 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:47:33 +0200
Subject: [PATCH 22/29] allow dispatch to encoding

---
 src/probabilities_estimators/value_histogram.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/probabilities_estimators/value_histogram.jl b/src/probabilities_estimators/value_histogram.jl
index 59ff304b4..be4b45ffb 100644
--- a/src/probabilities_estimators/value_histogram.jl
+++ b/src/probabilities_estimators/value_histogram.jl
@@ -61,6 +61,9 @@ end
 
 function probabilities_and_outcomes(est::ValueHistogram, x)
     encoding = RectangularBinEncoding(est.binning, x)
+    return probabilities_and_outcomes(encoding, x)
+end
+function probabilities_and_outcomes(encoding::RectangularBinEncoding, x)
     probs, bins = fasthist(encoding, x) # bins are integers here
     unique!(bins) # `bins` is already sorted from `fasthist!`
     # Here we transfor the cartesian coordinate based bins into data unit bins:

From 65faafd9074efc892538c96a153565f57689fbec Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:50:53 +0200
Subject: [PATCH 23/29] fix incorrect definition of outcome_space in TrOp

---
 .../transfer_operator/transfer_operator.jl                     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index ed30d4b2a..df30f3ede 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -481,5 +481,4 @@ function probabilities_and_outcomes(est::TransferOperator, x::Array_or_Dataset)
     return probs, outcomes
 end
 
-outcome_space(x, est::TransferOperator) = outcome_space(x, est.binning)
-total_outcomes(x, est::TransferOperator) = total_outcomes(x, est.binning)
+outcome_space(est::TransferOperator, x) = outcome_space(est.encoder, x)

From 77bb880b9d9ddad445bd5edf05016f3967f724dd Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:53:28 +0200
Subject: [PATCH 24/29] fix some tests of TrOp...

---
 .../transfer_operator/transfer_operator.jl                     | 3 +--
 test/probabilities/estimators/transfer_operator.jl             | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index df30f3ede..13b9a0f5e 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -468,8 +468,7 @@ function transfermatrix(iv::InvariantMeasure)
 end
 
 function probabilities_and_outcomes(est::TransferOperator, x::Array_or_Dataset)
-    encoder = RectangularBinEncoding(est.binning, x)
-    to = transferoperator(x, encoder.binning)
+    to = transferoperator(x, est.binning)
     probs = invariantmeasure(to).ρ
 
     # Note: bins are *not* sorted. They occur in the order of first appearance, according
diff --git a/test/probabilities/estimators/transfer_operator.jl b/test/probabilities/estimators/transfer_operator.jl
index 06c061f07..933c11581 100644
--- a/test/probabilities/estimators/transfer_operator.jl
+++ b/test/probabilities/estimators/transfer_operator.jl
@@ -9,7 +9,7 @@ binnings = [
     RectangularBinning(0.2),
     RectangularBinning([2, 3]),
     RectangularBinning([0.2, 0.3]),
-    FixedRectangularBinning(0, 1, 5, 2)
+    FixedRectangularBinning(rage(0, 1; length = 5), 2)
 ]
 
 # There's not easy way of constructing an analytical example for the resulting

From 201f5042cb371f6faa8d6102240b37dd74405ca6 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:56:10 +0200
Subject: [PATCH 25/29] remove completely unused computations from function in
 TrOp...

---
 .../transfer_operator/transfer_operator.jl                    | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index 13b9a0f5e..11b2928b2 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -439,10 +439,6 @@ function invariantmeasure(to::TransferOperatorApproximationRectangular;
         distribution = distribution ./ colsum_distribution
     end
 
-    # Find partition elements with strictly positive measure.
-    δ = tolerance/size(to.transfermatrix, 1)
-    inds_nonzero = findall(distribution .> δ)
-
     # Extract the elements of the invariant measure corresponding to these indices
     return InvariantMeasure(to, Probabilities(distribution))
 end

From bb0477473c5b2e342d7c20492da475515e0602bb Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 16:58:03 +0200
Subject: [PATCH 26/29] remove more completely unused codfe from TrOp...

---
 .../transfer_operator/transfer_operator.jl                      | 2 --
 test/probabilities/estimators/transfer_operator.jl              | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index 11b2928b2..b776e79b8 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -285,12 +285,10 @@ function transferoperator(pts::AbstractDataset{D, T},
             # To which boxes do each of the visitors to bᵢ jump in the next
             # time step?
             target_bins = visits_whichbin[timeindices_visiting_pts .+ 1]
-            unique_target_bins = unique(target_bins)
 
             # Count how many points jump from the i-th bin to each of
             # the unique target bins, and use that to calculate the transition
             # probability from bᵢ to bⱼ.
-            #for j in 1:length(unique_target_bins)
             for (j, bᵤ) in enumerate(unique(target_bins))
                 n_transitions_i_to_j = sum(target_bins .== bᵤ)
 
diff --git a/test/probabilities/estimators/transfer_operator.jl b/test/probabilities/estimators/transfer_operator.jl
index 933c11581..0fe02fc7b 100644
--- a/test/probabilities/estimators/transfer_operator.jl
+++ b/test/probabilities/estimators/transfer_operator.jl
@@ -9,7 +9,7 @@ binnings = [
     RectangularBinning(0.2),
     RectangularBinning([2, 3]),
     RectangularBinning([0.2, 0.3]),
-    FixedRectangularBinning(rage(0, 1; length = 5), 2)
+    FixedRectangularBinning(range(0, 1; length = 5), 2)
 ]
 
 # There's not easy way of constructing an analytical example for the resulting

From e15374176a2f5cff1d45bd29fec55accf0c30479 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 17:00:46 +0200
Subject: [PATCH 27/29] remove more unused code and comments

---
 .../transfer_operator/transfer_operator.jl                   | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index b776e79b8..8be7c8f5e 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -117,8 +117,7 @@ end
 # for datasets of >100 000+ points
 function inds_in_terms_of_unique_sorted(x) # assumes sorted
     @assert issorted(x)
-    U = unique(x)
-    N, Nu = length(x), length(U)
+    N = length(x)
     prev = view(x, 1)
     inds = zeros(Int, N)
     uidx = 1
@@ -236,10 +235,8 @@ function transferoperator(pts::AbstractDataset{D, T},
     n_visitsᵢ::Int = 0
 
     if boundary_condition == :circular
-        #warn("Using circular boundary condition")
         append!(visits_whichbin, [1])
     elseif boundary_condition == :random
-        #warn("Using random circular boundary condition")
         append!(visits_whichbin, [rand(1:length(visits_whichbin))])
     else
         error("Boundary condition $(boundary_condition) not implemented")

From 937973c944222cd7f0fcbd29fd21bd9eb6f086cf Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 17:24:26 +0200
Subject: [PATCH 28/29] ensre ranges are large enough to cover data in fixed
 witdth

---
 .../rectangular_binning.jl                    | 24 ++++++++++++-------
 .../estimators/value_histogram.jl             | 14 +++++++++++
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/src/encoding_implementations/rectangular_binning.jl b/src/encoding_implementations/rectangular_binning.jl
index 18d83d885..de161f430 100644
--- a/src/encoding_implementations/rectangular_binning.jl
+++ b/src/encoding_implementations/rectangular_binning.jl
@@ -32,9 +32,8 @@ Binning instructions are deduced from the type of `ϵ` as follows:
 4. `ϵ::Vector{Float64}` divides the i-th coordinate axis into intervals of fixed size
     `ϵ[i]`, starting from the axis minima until the data is completely covered by boxes.
 
-To ensure all data are covered, the `nextfloat` of the data maximum value is used
-as the maximum along each dimension. However this will lead to 100% accurate results
-only when `precise = true`.
+`RectangularBinning` ensures all input data are covered by extending the created
+ranges if need be.
 """
 struct RectangularBinning{E} <: AbstractBinning
     ϵ::E
@@ -156,13 +155,22 @@ function FixedRectangularBinning(b::RectangularBinning, x)
     T = eltype(x)
     ϵ = b.ϵ
     mini, maxi = minmaxima(x)
-    # use `nextfloat` to ensure all data are covered!
-    maxi = nextfloat.(maxi)
-    v = ones(SVector{D,T})
     if ϵ isa Float64 || ϵ isa AbstractVector{<:AbstractFloat}
-        widths = SVector{D,T}(ϵ .* v)
-        ranges = ntuple(i -> range(mini[i], maxi[i]; step = widths[i]), D)
+        widths = SVector{D,T}(ϵ .* ones(SVector{D,T}))
+        # To ensure all points are guaranteed to be covered, we add the width
+        # to the max, if the max isn't included in the resulting range
+        function covering_range(i)
+            r = range(mini[i], maxi[i]; step = widths[i])
+            if maxi[i] ∉ r
+                return range(mini[i], maxi[i] + widths[i]; step = widths[i])
+            else
+                return r
+            end
+        end
+        ranges = ntuple(i -> covering_range(i), D)
     elseif ϵ isa Int || ϵ isa Vector{Int}
+        # use `nextfloat` to ensure all data are covered!
+        maxi = nextfloat.(maxi)
         # We add one, because the user input specifies the number of bins,
         # and the number of bins is the range length - 1
         lengths = ϵ .* ones(SVector{D,Int}) .+ 1
diff --git a/test/probabilities/estimators/value_histogram.jl b/test/probabilities/estimators/value_histogram.jl
index aac1d37f1..ea6fda248 100644
--- a/test/probabilities/estimators/value_histogram.jl
+++ b/test/probabilities/estimators/value_histogram.jl
@@ -118,4 +118,18 @@ end
     @test decode(rbc, 100) == SVector(0.9, 0.9)
 end
 
+@testset "All points covered" begin
+    x = Dataset(rand(100, 2))
+    binnings = [
+        RectangularBinning(5, true),
+        RectangularBinning(0.2, true),
+        RectangularBinning([2, 4], true),
+        RectangularBinning([0.5, 0.25], true),
+    ]
 
+    for bin in binnings
+        rbe = RectangularBinEncoding(bin, x)
+        visited_bins = map(pᵢ -> encode(rbe, pᵢ), x)
+        @test -1 ∉ visited_bins
+    end
+end

From 68e759f304d8523413f4a404d0a3f07f604c17d5 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Sat, 14 Jan 2023 17:27:31 +0200
Subject: [PATCH 29/29] add changelog entry

---
 CHANGELOG.md | 4 ++++
 Project.toml | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e1776a4a2..bda10d584 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 Changelog is kept with respect to version 0.11 of Entropies.jl. From version v2.0 onwards, this package has been renamed to ComplexityMeasures.jl.
 
+## 2.4
+- Rectangular binnings have been reformed to operate based on ranges. This leads to much more intuitive bin sizes and edges. For `RectangularBinning` nothing changes, while for `FixedRectangularBinning` the ranges should be given explicitly. Backwards compatible deprecations have been added.
+- This also allows for a new `precise` option that utilizes Base Julia `TwinPrecision` to make more accurate mapping of points to bins at the cost of performance.
+
 ## 2.3
 - Like differential entropies, discrete entropies now also have their own estimator type.
 - The approach of giving both an entropy definition, and an entropy estimator to `entropy` has been dropped. Now the entropy estimators know what definitions they are applied for. This change is a deprecation, i.e., backwards compatible.
diff --git a/Project.toml b/Project.toml
index 710bd61fe..b79e58b70 100644
--- a/Project.toml
+++ b/Project.toml
@@ -2,7 +2,7 @@ name = "ComplexityMeasures"
 uuid = "ab4b797d-85ee-42ba-b621-05d793b346a2"
 authors = "Kristian Agasøster Haaga <kahaaga@gmail.com>, George Datseries <datseris.george@gmail.com>"
 repo = "https://github.com/juliadynamics/ComplexityMeasures.jl.git"
-version = "2.3.2"
+version = "2.4.0"
 
 [deps]
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"