From 10178cb697a1aac4060e7b1c5455ef790652dd37 Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Thu, 1 Sep 2022 00:03:31 +0200 Subject: [PATCH 1/6] Symbolic permutation 2D Add random to deps Separate probabilities and entropy estimators --- Project.toml | 1 + src/Entropies.jl | 1 + src/api.jl | 55 +++++++++ .../2d/square/SymbolicPermutation2D.jl | 68 +++++++++++ .../2d/square/permutation_2d_square.jl | 107 ++++++++++++++++++ src/symbolic/2d/symbolic_2d.jl | 2 + src/symbolic/symbolic.jl | 1 + src/symbolic/test.jl | 30 +++++ test/runtests.jl | 22 ++++ 9 files changed, 287 insertions(+) create mode 100644 src/api.jl create mode 100644 src/symbolic/2d/square/SymbolicPermutation2D.jl create mode 100644 src/symbolic/2d/square/permutation_2d_square.jl create mode 100644 src/symbolic/2d/symbolic_2d.jl create mode 100644 src/symbolic/test.jl diff --git a/Project.toml b/Project.toml index 512e22252..56149c162 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Neighborhood = "645ca80c-8b79-4109-87ea-e1f58159d116" QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/src/Entropies.jl b/src/Entropies.jl index 854ae31d0..240362f67 100644 --- a/src/Entropies.jl +++ b/src/Entropies.jl @@ -1,5 +1,6 @@ module Entropies include("core.jl") + include("api.jl") include("histogram_estimation.jl") include("counting_based/CountOccurrences.jl") include("symbolic/symbolic.jl") diff --git a/src/api.jl b/src/api.jl new file mode 100644 index 000000000..35481d590 --- /dev/null +++ b/src/api.jl @@ -0,0 +1,55 @@ +using Random +export EntropyGenerator, ProbabilityGenerator +export entropygenerator, probabilitygenerator + +struct ProbabilityGenerator{S <: Union{ProbabilitiesEstimator}, X, A, R <: AbstractRNG} + method::S # method with its input parameters + x::X # input data + init::A # pre-initialized things that speed up probability estimation + rng::R # random number generator object +end + +struct EntropyGenerator{S, PG, X, A, R <: AbstractRNG} + + method::S # method with its input parameters + # A probability estimator can be supplied if the probability estimation is separate + # from the entropy computation step. This may not be the case for all methods, so + # it defaults to nothing. + probability_generator::PG + + x::X # input data + init::A # pre-initialized things that speed up entropy estimation + rng::R # random number generator object + +end + + +""" + entropygenerator(x, method::EntropyEstimator[, rng]) → sg::EntropyGenerator + +Initialize a generator that computes entropies of `x` on demand, based on the given `method`. +This is efficient, because for most methods some things can be initialized and reused +for every computation. Optionally you can provide an `rng::AbstractRNG` object that will +control random number generation and hence establish reproducibility of the +generated entropy values, if they rely on random number generation. By default +`Random.default_rng()` is used. + +Note: not all entropy estimators have this functionality enabled yet. The documentation +strings for individual methods indicate whether entropy generators are available. +To compute entropy using a generator, call `eg` as a function with the optional `base` +argument, e.g. + +```julia +eg = entropygenerator(x, method) +h = eg(; base = 2) +``` +""" +function entropygenerator end + +""" + probabilitiesgenerator(x, method, [, rng]) → sg::ProbabilitiesGenerator + +The same as [`entropygenerator`](@ref), but initializes a `ProbabilitiesGenerator` that +can be used for repeated computation of probabilities. +""" +function probabilitygenerator end diff --git a/src/symbolic/2d/square/SymbolicPermutation2D.jl b/src/symbolic/2d/square/SymbolicPermutation2D.jl new file mode 100644 index 000000000..0ef688fb4 --- /dev/null +++ b/src/symbolic/2d/square/SymbolicPermutation2D.jl @@ -0,0 +1,68 @@ +export SymbolicPermutation2D + +""" + SymbolicPermutation2D(m::Int = 2) + +A symbolic permutation entropy estimator for 2D arrays that symbolizes square sub-matrices +of size `m * m` to estimate probabilities. + +The estimator is based on Riberio et al. (2012)[^Ribeiro2012], but is here extended to +generalized entropies of order `q`. + +# Parameter requirements + +According to Riberio et al. (2012), `m` should be picked such that `(m*m)! << Nc*Nr`, +where `Nc` is the number of columns and `Nr` is the number of rows in the 2D array +for which estimation is targeted. + + genentropy(x::AbstractArray{T, 2}, est::SymbolicPermutation2D; + q = 1, base = MathConstants.e, normalize = true) where T + +Compute the generalized order-`q` permutation entropy of a **pre-symbolized** 2D array, +with arbitrary element type `T`. If `normalize == true`, then the entropy is normalized +to the number of possible states. + +## Example + +```julia +using Entropies +# A pre-symbolized 10000-pixel image, where each pixel now is represented by an +# integer from 1 to 5. +x = rand(1:5, 100, 100) + +# Estimate permutation entropy by considering `3 * 3`-sized square pixel blocks, +# using logarithms to base 2 +genentropy(x, SymbolicPermutation2D(m = 3), base = 2) +``` + + entropygenerator(x::AbstractArray{T, 2}, + method::SymbolicPermutation2D) → eg::EntropyGenerator{<:SymbolicPermutation2D} + +Create an `EntropyGenerator`, using `x` as a template, that efficiently computes the +generalized order-`q` permutation entropy of 2D arrays that has the same size as `x`. + +The generator must be called with a 2D array (of same size as `x`) as input, optionally +with keywords `base`, `q` and `normalize` (which have meanings as described above). + +## Example + +```julia +using Entropies + +# 50 images where each of the 10000 pixel take on binary values ("dark" or "light") +images = [rand(["dark", "light"], 100, 100) for i = 1:50] + +# Create a generator using the first image as a template. All images are the same size, +# so we can re-use the generator. Use pixel blocks of size `2*2`. +est = SymbolicPermutation2D(m = 2) +eg = entropygenerator(first(images), est) + +# The generalized order-`1` (Shannon) normalized permutation entropy to base 2 of each image +[eg(img, base = 2, q = 1, normalize = true) for img in images] +``` + +[^Ribeiro2012]: Ribeiro, H. V., Zunino, L., Lenzi, E. K., Santoro, P. A., & Mendes, R. S. (2012). Complexity-entropy causality plane as a complexity measure for two-dimensional patterns. +""" +@Base.kwdef struct SymbolicPermutation2D <: ProbabilitiesEstimator + m::Int = 2 +end diff --git a/src/symbolic/2d/square/permutation_2d_square.jl b/src/symbolic/2d/square/permutation_2d_square.jl new file mode 100644 index 000000000..b81d82d9d --- /dev/null +++ b/src/symbolic/2d/square/permutation_2d_square.jl @@ -0,0 +1,107 @@ +using StaticArrays + +function update_x̂!(x̂, X, ntot, i, j, d) + Q = view(X, j:j+d-1, i:i+d-1) + + for i in 1:ntot + x̂[i] = Q[i] + end +end + +function update_motif!(motifs, k, motif) + motifs[k] = motif +end + +function update_motifs!(motifs, x̂, x̂_sorted, X, d) + ny, nx = size(X) + ntot = d * d + + rx = 1:nx-d+1 # columns + ry = 1:ny-d+1 # rows + k = 1 + for i in rx + for j in ry + update_x̂!(x̂, X, ntot, i, j, d) + sortperm!(x̂_sorted, x̂) + update_motif!(motifs, k, Entropies.encode_motif(x̂_sorted)) + k += 1 + end + end + return motifs +end + +function probabilitygenerator(x::AbstractArray{T, 2}, method::SymbolicPermutation2D, + rng = Random.default_rng()) where T + d = method.m + ntot = d * d + # Picking the first `ntot` elements of x to infer type from `x`, so that we can + # use non-numeric types as input too. + x̂ = x[1:ntot] + x̂_sorted = [i for i = 1:ntot] + + # Pre-allocate motif vector. + ny, nx = size(x) + + # Picking sub-matrices means there are fewer than `ny`/`nx` rows/columns + # to pick from. + nr = length(1:nx-d+1) # rows + nc = length(1:ny-d+1) # columns + motifs = zeros(Int, nr * nc) + + init = ( + x̂ = x̂, + x̂_sorted = x̂_sorted, + motifs = motifs, + ) + return ProbabilityGenerator(method, x, init, rng) +end + +function entropygenerator(x::AbstractArray{T, 2}, method::SymbolicPermutation2D, + rng = Random.default_rng()) where T + pg = probabilitygenerator(x, method, rng) + init = () + return EntropyGenerator(method, pg, x, init, rng) +end + +function (eg::ProbabilityGenerator{<:SymbolicPermutation2D})(; kwargs...) + throw(ArgumentError("Missing 2D array input to probability generator")) +end + +function (eg::ProbabilityGenerator{<:SymbolicPermutation2D})(X::AbstractArray{T, 2}) where T + d = eg.method.m + x̂, x̂_sorted, motifs = getfield.( + Ref(eg.init), + (:x̂, :x̂_sorted, :motifs) + ) + # Loop through the d*d-sized submatrices and symbolize them, one by one, into + # the pre-allocated `motifs` vector. + update_motifs!(motifs, x̂, x̂_sorted, X, d) + + return Probabilities(_non0hist(motifs) ./ length(motifs)) +end + + +function (eg::EntropyGenerator{<:SymbolicPermutation2D, PROBGEN})(X::AbstractArray{T, 2}; + base = MathConstants.e, normalize = true, q = 1) where {PROBGEN, T} + + ps = eg.probability_generator(X) + d = eg.probability_generator.method.m + if normalize + # For `d > 4`, we need BigInt to compute the factorial of the product. + nc = convert(Float64, log(base, factorial(BigInt(d) * BigInt(d)))) + return float(genentropy(ps, base = base, q = q) / nc) + else + return genentropy(ps, base = base, q = q) + end +end + +function probabilities(x::AbstractArray{T, 2}, est::SymbolicPermutation2D) where T + pg = probabilitygenerator(x, est) + return pg(x) +end + +function genentropy(x::AbstractArray{T, 2}, est::SymbolicPermutation2D; + q = 1, base = MathConstants.e, normalize = true) where T + eg = entropygenerator(x, est) + eg(x, base = base, q = q, normalize = normalize) +end diff --git a/src/symbolic/2d/symbolic_2d.jl b/src/symbolic/2d/symbolic_2d.jl new file mode 100644 index 000000000..b732817b1 --- /dev/null +++ b/src/symbolic/2d/symbolic_2d.jl @@ -0,0 +1,2 @@ +include("square/SymbolicPermutation2D.jl") +include("square/permutation_2d_square.jl") diff --git a/src/symbolic/symbolic.jl b/src/symbolic/symbolic.jl index 99be6dbf1..f84c254e1 100644 --- a/src/symbolic/symbolic.jl +++ b/src/symbolic/symbolic.jl @@ -10,6 +10,7 @@ include("utils.jl") include("SymbolicPermutation.jl") include("SymbolicWeightedPermutation.jl") include("SymbolicAmplitudeAware.jl") +include("2d/symbolic_2d.jl") """ permentropy(x; τ = 1, m = 3, base = MathConstants.e) diff --git a/src/symbolic/test.jl b/src/symbolic/test.jl new file mode 100644 index 000000000..35951dfc3 --- /dev/null +++ b/src/symbolic/test.jl @@ -0,0 +1,30 @@ + +N = 100; +X = rand(1:5, N, N); +est = SymbolicPermutation2D(m = 2) +gen = entropygenerator(X, est) + +@time timeit(gen) +# using BenchmarkTools +# @btime gen($X); +# Y = rand(1:5, N, N); +# @btime gen($Y); +using Entropies +using Entropies + +# 50 images where each of the 10000 pixel take on binary values ("dark" or "light") +images = [rand(["dark", "light"], 100, 100) for i = 1:50] + +# Create a generator using the first image as a template. All images are the same size, +# so we can re-use the generator. Use pixel blocks of size `2*2`. +est = SymbolicPermutation2D(m = 2) +eg = entropygenerator(first(images), est) + +# The generalized order-`1` (Shannon) normalized permutation entropy to base 2 of each image +[eg(img, base = 2, q = 1, normalize = true) for img in images] + +x = [1 2 1; 8 3 4; 6 7 5] +genentropy(x, SymbolicPermutation2D(m = 2), base = 2) + +ps = [0.5, 0.25, 0.25] +genentropy(Probabilities(ps), base = 2) / log(2, factorial(2*2)) diff --git a/test/runtests.jl b/test/runtests.jl index a9392b602..beaa77d14 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -376,4 +376,26 @@ end @test typeof(de) <: Real @test de >= 0.0 end + + @testset "Symbolic 2D" begin + x = [1 2 1; 8 3 4; 6 7 5] + est = SymbolicPermutation2D(m = 2) + + # Generic tests + ps = probabilities(x, est) + @test ps isa Probabilities + + probgen = probabilitygenerator(x, est) + @test probgen isa ProbabilityGenerator + @test probgen(x) isa Probabilities + + hgen = entropygenerator(x, est) + @test hgen isa EntropyGenerator + @test hgen(x, normalize = true) isa T where T <: Real + @test hgen(x, normalize = false) isa T where T <: Real + + # test case from Ribeiro et al, 2012 + h = genentropy(x, est, base = MathConstants.e) + @test round(h, digits = 2) ≈ 0.33 + end end From d67ca4ca42d08147d918f93dbd445c9b16cae1c8 Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Thu, 1 Sep 2022 11:29:04 +0200 Subject: [PATCH 2/6] Stencil approach --- .../2d/square/SymbolicPermutation2D.jl | 68 ------- .../2d/square/permutation_2d_square.jl | 107 ----------- src/symbolic/2d/symbolic_2d.jl | 2 - .../higher_dim/stencil/PermutationStencil.jl | 176 ++++++++++++++++++ src/symbolic/higher_dim/stencil/stencil_2d.jl | 115 ++++++++++++ src/symbolic/higher_dim/symbolic_higherdim.jl | 3 + src/symbolic/higher_dim/utils.jl | 4 + src/symbolic/symbolic.jl | 2 +- test/runtests.jl | 8 +- 9 files changed, 305 insertions(+), 180 deletions(-) delete mode 100644 src/symbolic/2d/square/SymbolicPermutation2D.jl delete mode 100644 src/symbolic/2d/square/permutation_2d_square.jl delete mode 100644 src/symbolic/2d/symbolic_2d.jl create mode 100644 src/symbolic/higher_dim/stencil/PermutationStencil.jl create mode 100644 src/symbolic/higher_dim/stencil/stencil_2d.jl create mode 100644 src/symbolic/higher_dim/symbolic_higherdim.jl create mode 100644 src/symbolic/higher_dim/utils.jl diff --git a/src/symbolic/2d/square/SymbolicPermutation2D.jl b/src/symbolic/2d/square/SymbolicPermutation2D.jl deleted file mode 100644 index 0ef688fb4..000000000 --- a/src/symbolic/2d/square/SymbolicPermutation2D.jl +++ /dev/null @@ -1,68 +0,0 @@ -export SymbolicPermutation2D - -""" - SymbolicPermutation2D(m::Int = 2) - -A symbolic permutation entropy estimator for 2D arrays that symbolizes square sub-matrices -of size `m * m` to estimate probabilities. - -The estimator is based on Riberio et al. (2012)[^Ribeiro2012], but is here extended to -generalized entropies of order `q`. - -# Parameter requirements - -According to Riberio et al. (2012), `m` should be picked such that `(m*m)! << Nc*Nr`, -where `Nc` is the number of columns and `Nr` is the number of rows in the 2D array -for which estimation is targeted. - - genentropy(x::AbstractArray{T, 2}, est::SymbolicPermutation2D; - q = 1, base = MathConstants.e, normalize = true) where T - -Compute the generalized order-`q` permutation entropy of a **pre-symbolized** 2D array, -with arbitrary element type `T`. If `normalize == true`, then the entropy is normalized -to the number of possible states. - -## Example - -```julia -using Entropies -# A pre-symbolized 10000-pixel image, where each pixel now is represented by an -# integer from 1 to 5. -x = rand(1:5, 100, 100) - -# Estimate permutation entropy by considering `3 * 3`-sized square pixel blocks, -# using logarithms to base 2 -genentropy(x, SymbolicPermutation2D(m = 3), base = 2) -``` - - entropygenerator(x::AbstractArray{T, 2}, - method::SymbolicPermutation2D) → eg::EntropyGenerator{<:SymbolicPermutation2D} - -Create an `EntropyGenerator`, using `x` as a template, that efficiently computes the -generalized order-`q` permutation entropy of 2D arrays that has the same size as `x`. - -The generator must be called with a 2D array (of same size as `x`) as input, optionally -with keywords `base`, `q` and `normalize` (which have meanings as described above). - -## Example - -```julia -using Entropies - -# 50 images where each of the 10000 pixel take on binary values ("dark" or "light") -images = [rand(["dark", "light"], 100, 100) for i = 1:50] - -# Create a generator using the first image as a template. All images are the same size, -# so we can re-use the generator. Use pixel blocks of size `2*2`. -est = SymbolicPermutation2D(m = 2) -eg = entropygenerator(first(images), est) - -# The generalized order-`1` (Shannon) normalized permutation entropy to base 2 of each image -[eg(img, base = 2, q = 1, normalize = true) for img in images] -``` - -[^Ribeiro2012]: Ribeiro, H. V., Zunino, L., Lenzi, E. K., Santoro, P. A., & Mendes, R. S. (2012). Complexity-entropy causality plane as a complexity measure for two-dimensional patterns. -""" -@Base.kwdef struct SymbolicPermutation2D <: ProbabilitiesEstimator - m::Int = 2 -end diff --git a/src/symbolic/2d/square/permutation_2d_square.jl b/src/symbolic/2d/square/permutation_2d_square.jl deleted file mode 100644 index b81d82d9d..000000000 --- a/src/symbolic/2d/square/permutation_2d_square.jl +++ /dev/null @@ -1,107 +0,0 @@ -using StaticArrays - -function update_x̂!(x̂, X, ntot, i, j, d) - Q = view(X, j:j+d-1, i:i+d-1) - - for i in 1:ntot - x̂[i] = Q[i] - end -end - -function update_motif!(motifs, k, motif) - motifs[k] = motif -end - -function update_motifs!(motifs, x̂, x̂_sorted, X, d) - ny, nx = size(X) - ntot = d * d - - rx = 1:nx-d+1 # columns - ry = 1:ny-d+1 # rows - k = 1 - for i in rx - for j in ry - update_x̂!(x̂, X, ntot, i, j, d) - sortperm!(x̂_sorted, x̂) - update_motif!(motifs, k, Entropies.encode_motif(x̂_sorted)) - k += 1 - end - end - return motifs -end - -function probabilitygenerator(x::AbstractArray{T, 2}, method::SymbolicPermutation2D, - rng = Random.default_rng()) where T - d = method.m - ntot = d * d - # Picking the first `ntot` elements of x to infer type from `x`, so that we can - # use non-numeric types as input too. - x̂ = x[1:ntot] - x̂_sorted = [i for i = 1:ntot] - - # Pre-allocate motif vector. - ny, nx = size(x) - - # Picking sub-matrices means there are fewer than `ny`/`nx` rows/columns - # to pick from. - nr = length(1:nx-d+1) # rows - nc = length(1:ny-d+1) # columns - motifs = zeros(Int, nr * nc) - - init = ( - x̂ = x̂, - x̂_sorted = x̂_sorted, - motifs = motifs, - ) - return ProbabilityGenerator(method, x, init, rng) -end - -function entropygenerator(x::AbstractArray{T, 2}, method::SymbolicPermutation2D, - rng = Random.default_rng()) where T - pg = probabilitygenerator(x, method, rng) - init = () - return EntropyGenerator(method, pg, x, init, rng) -end - -function (eg::ProbabilityGenerator{<:SymbolicPermutation2D})(; kwargs...) - throw(ArgumentError("Missing 2D array input to probability generator")) -end - -function (eg::ProbabilityGenerator{<:SymbolicPermutation2D})(X::AbstractArray{T, 2}) where T - d = eg.method.m - x̂, x̂_sorted, motifs = getfield.( - Ref(eg.init), - (:x̂, :x̂_sorted, :motifs) - ) - # Loop through the d*d-sized submatrices and symbolize them, one by one, into - # the pre-allocated `motifs` vector. - update_motifs!(motifs, x̂, x̂_sorted, X, d) - - return Probabilities(_non0hist(motifs) ./ length(motifs)) -end - - -function (eg::EntropyGenerator{<:SymbolicPermutation2D, PROBGEN})(X::AbstractArray{T, 2}; - base = MathConstants.e, normalize = true, q = 1) where {PROBGEN, T} - - ps = eg.probability_generator(X) - d = eg.probability_generator.method.m - if normalize - # For `d > 4`, we need BigInt to compute the factorial of the product. - nc = convert(Float64, log(base, factorial(BigInt(d) * BigInt(d)))) - return float(genentropy(ps, base = base, q = q) / nc) - else - return genentropy(ps, base = base, q = q) - end -end - -function probabilities(x::AbstractArray{T, 2}, est::SymbolicPermutation2D) where T - pg = probabilitygenerator(x, est) - return pg(x) -end - -function genentropy(x::AbstractArray{T, 2}, est::SymbolicPermutation2D; - q = 1, base = MathConstants.e, normalize = true) where T - eg = entropygenerator(x, est) - eg(x, base = base, q = q, normalize = normalize) -end diff --git a/src/symbolic/2d/symbolic_2d.jl b/src/symbolic/2d/symbolic_2d.jl deleted file mode 100644 index b732817b1..000000000 --- a/src/symbolic/2d/symbolic_2d.jl +++ /dev/null @@ -1,2 +0,0 @@ -include("square/SymbolicPermutation2D.jl") -include("square/permutation_2d_square.jl") diff --git a/src/symbolic/higher_dim/stencil/PermutationStencil.jl b/src/symbolic/higher_dim/stencil/PermutationStencil.jl new file mode 100644 index 000000000..c95e80547 --- /dev/null +++ b/src/symbolic/higher_dim/stencil/PermutationStencil.jl @@ -0,0 +1,176 @@ +export PermutationStencil + +""" + PermutationStencil(stencil::AbstractArray{T, N}) <: ProbabilitiesEstimator + +Estimate permutation entropies of `N`-dimensional matrices using the provided +`stencil` array, which can be any rectangular shape, and whose elements have +type `T` that can be passed to `Base.isone` (i.e. numbers or booleans). + +The size of `stencil` determines the dimension of the rectangular submatrices for +which symbols are generated, and `stencil[i]` determines if the `i`-th +element of the submatrices (using linear indexing) is considered as part of the +ordinal pattern. + +# Input requirements + +The dimensions `dx, dy, ...` of the `stencil` should be much smaller than the +array to which it is applied. If `nx, ny, ...` are the dimensions of the target +array, it is required that `factorial(dx* dy * ...) << nx * ny * ...`. + +## Example + +```jldoctest +julia> img = reshape(1:25, 5, 5) +5×5 reshape(::UnitRange{Int64}, 5, 5) with eltype Int64: + 1 6 11 16 21 + 2 7 12 17 22 + 3 8 13 18 23 + 4 9 14 19 24 + 5 10 15 20 25 + +julia> stencil = [1 0 1; 0 1 0; 1 0 1] +3×3 Matrix{Int64}: + 1 0 1 + 0 1 0 + 1 0 1 +``` + +Then, the upper left `3*3`-submatrix of `img` would be represented by the vector +`[1, 3, 7, 11, 13]`, whose ordinal pattern is converted to a permutation symbol +(integer). Repeated application to all `3*3` submatrices yield a distribution of +integer symbols, from which a probability distribution can be estimated, from +which entropy can be computed. + + PermutationStencil(m::Int, D::Int) <: ProbabilitiesEstimator + +Convenience constructor for (hyper)square stencils in dimension `D` where +all elements are true. For the 2D case, this recovers the approach in +Riberio et al. (2012)[^Ribeiro2012]. + +## Parameter requirements + +As for the stencil-based approach, it is required `factorial(d*d) << nx*ny`, where +`ny` and `nx` are the number of rows and columns in the target array. + +### Example + +The following two approaches are equivalent. + +```jldoctest +julia> using Entropies + +julia> PermutationStencil(2, 2) +PermutationStencil{2}(Bool[1 1; 1 1]) + +julia> PermutationStencil([1 1; 1 1]) +PermutationStencil{2}(Bool[1 1; 1 1]) +``` + +# Computing entropies + + genentropy(x::AbstractArray{T, 2}, est::SymbolicPermutation2D; + q = 1, base = MathConstants.e, normalize = true) where T + +Compute the generalized order-`q` permutation entropy of a **pre-symbolized** 2D array, +with arbitrary element type `T`. If `normalize == true`, then the entropy is normalized +to the number of possible states. + +## Example + +```julia +using Entropies +# A pre-symbolized 10000-pixel image, where each pixel now is represented by an integer. +x = rand(1:5, 100, 100) + +# Estimate permutation entropy by considering `3 * 3`-sized square pixel blocks, +# using logarithms to base 2 +m, N = 3, 2 +genentropy(x, PermutationStencil(m, N), base = 2) +``` + + entropygenerator(x::AbstractArray{T, 2}, + method::PermutationStencil{2}) → eg::EntropyGenerator + +Create an `EntropyGenerator`, using `x` as a template, that efficiently computes the +generalized order-`q` permutation entropy of 2D arrays that has the same size as `x`. +The generator must be called with a 2D array (of same size as `x`) as input, optionally +with keywords `base`, `q` and `normalize` (which have meanings as described above). + +## Example + +```julia +using Entropies + +# 50 images where each of the 10000 pixel take on binary values ("dark" or "light") +images = [rand(["dark", "light"], 100, 100) for i = 1:50] + +# Create a generator using the first image as a template. All images are the same size, +# so we can re-use the generator. Use pixel blocks of size `2*2`. +m, dim = 2, 2 +est = PermutationStencil(m, dim) +eg = entropygenerator(first(images), est) + +# The generalized order-`1` (Shannon) normalized permutation entropy to base 2 of each image +[eg(img, base = 2, q = 1, normalize = true) for img in images] +``` + +# Computing probabilities + + probabilities(x::AbstractArray{T, 2}, est::PermutationStencil{2}) → Probabilities + +The same as above, but instead of computing the entropy, directly return the estimated +probabilities. + + probabilitygenerator(x::AbstractArray{T, 2}, + method::PermutationStencil{2}) → ProbabilityGenerator + +The same as above, but returns a `ProbabilityGenerator` instead of a `EntropyGenerator`. + +[^Ribeiro2012]: Ribeiro, H. V., Zunino, L., Lenzi, E. K., Santoro, P. A., & Mendes, R. S. (2012). Complexity-entropy causality plane as a complexity measure for two-dimensional patterns. +""" +struct PermutationStencil{N} <: ProbabilitiesEstimator + stencil::AbstractArray{Bool, N} + + function PermutationStencil(s::AbstractArray{Bool, N}) where N + return new{N}(s) + end + + function PermutationStencil(s::AbstractArray{T, N}) where {T <: Number, N} + return new{N}(isone.(s)) + end + + # Square blocks of size `blocksize*blocksize*...` in `N` dimensions. + function PermutationStencil(blocksize::Int, N::Int = 2) + return PermutationStencil(ones(Int, tuple(repeat([m], N)...))) + end +end + +function (eg::ProbabilityGenerator{<:PermutationStencil{N}})(; kwargs...) where N + throw(ArgumentError("Probability generator cannot be called without an argument. Please provide an `$N`-dimensional array for which probabilities should be computed, e.g `probgen(x)`" )) +end + +function (eg::EntropyGenerator{<:PermutationStencil{N}})(; kwargs...) where N + throw(ArgumentError("Entropy generator cannot be called without an argument. Please provide an `$N`-dimensional array for which probabilities should be computed, e.g `probgen(x)`" )) +end + +function probabilities(x::AbstractArray{T, N}, est::PermutationStencil{N}) where {T, N<:Integer} + throw(ArgumentError("Probability estimation using `$N`-dimensional stencils is not yet implemented." )) +end + + +function genentropy(x::AbstractArray{T, N}, est::PermutationStencil{N}; kwargs...) where {T, N} + throw(ArgumentError("Entropy estimation using `$N`-dimensional stencils is not yet implemented." )) +end + + +function entropygenerator(x::AbstractArray{T, N}, method::PermutationStencil{N}, + rng = Random.default_rng()) where {T, N <: Integer} + + pg = probabilitygenerator(x, method, rng) + init = () + return EntropyGenerator(method, pg, x, init, rng) +end + +D = 2 +m = 2 diff --git a/src/symbolic/higher_dim/stencil/stencil_2d.jl b/src/symbolic/higher_dim/stencil/stencil_2d.jl new file mode 100644 index 000000000..680fd65d3 --- /dev/null +++ b/src/symbolic/higher_dim/stencil/stencil_2d.jl @@ -0,0 +1,115 @@ + + +function probabilitygenerator(x::AbstractArray{T, 2}, method::PermutationStencil{2}, + rng = Random.default_rng()) where T + + ny, nx = size(x) + + # From a submatrix of size `ny`*`nx`, what are the linear indices that select + # the correct elements, given `method.stencil`? + stencil = method.stencil + stencil_submatrix_idxs = LinearIndices(stencil)[findall(stencil .== true)] + + # Picking the first `L` elements of x to infer type from `x`, so that we can + # use non-numeric types as input too. + L = count(stencil .== true) + x̂ = x[1:L] + x̂_sorted = [i for i = 1:L] + + # Pre-allocate motif vector. Picking sub-matrices means there are fewer + # than `ny`/`nx` rows/columns + # to pick from. + dy, dx = size(method.stencil) + nr = length(1:nx-dy+1) # rows + nc = length(1:ny-dx+1) # columns + motifs = zeros(Int, nr * nc) + + init = ( + x̂ = x̂, + x̂_sorted = x̂_sorted, + motifs = motifs, + stencil_submatrix_idxs = stencil_submatrix_idxs, + dx = dx, + dy = dy, + ) + return ProbabilityGenerator(method, x, init, rng) +end + +function update_x̂_from_stencil!(x̂, X::AbstractArray{T, 2}, i, j, dx, dy, + stencil_submatrix_idxs::Vector{Int}) where T + + Q = view(X, j:j+dy-1, i:i+dx-1) + + k = 1 + for i in stencil_submatrix_idxs + x̂[k] = Q[i] + k += 1 + end +end + +function update_motifs_from_stencil!(motifs, x̂, x̂_sorted, X::AbstractArray{T, 2}, dx, dy, + stencil_submatrix_idxs) where T + + ny, nx = size(X) + rx = 1:nx-dx+1 # columns + ry = 1:ny-dy+1 # rows + k = 1 + for i in rx + for j in ry + update_x̂_from_stencil!(x̂, X, i, j, dx, dy, stencil_submatrix_idxs) + sortperm!(x̂_sorted, x̂) + update_motif!(motifs, k, Entropies.encode_motif(x̂_sorted)) + k += 1 + end + end + return motifs +end + +function (eg::ProbabilityGenerator{<:PermutationStencil{2}})(X::AbstractArray{T, 2}) where T + x̂, x̂_sorted, motifs, stencil_submatrix_idxs, dx, dy = getfield.( + Ref(eg.init), + (:x̂, :x̂_sorted, :motifs, :stencil_submatrix_idxs, :dx, :dy) + ) + # Loop through the d*d-sized submatrices and symbolize them, one by one, into + # the pre-allocated `motifs` vector. + update_motifs_from_stencil!(motifs, x̂, x̂_sorted, X, dx, dy, stencil_submatrix_idxs) + + return Probabilities(_non0hist(motifs) ./ length(motifs)) +end + +function probabilities(x::AbstractArray{T, 2}, + est::PermutationStencil{2}) where {T} + + pg = probabilitygenerator(x, est) + return pg(x) +end + +function (eg::EntropyGenerator{<:PermutationStencil{2}})(X::AbstractArray{T, 2}; + base = MathConstants.e, normalize = true, q = 1) where T + + ps = eg.probability_generator(X) + if normalize + # For `d > 4`, we need BigInt to compute the factorial of the product. + dx = eg.probability_generator.init.dx + dy = eg.probability_generator.init.dy + nc = convert(Float64, log(base, factorial(BigInt(dx) * BigInt(dy)))) + return float(genentropy(ps, base = base, q = q) / nc) + else + return genentropy(ps, base = base, q = q) + end +end + +function entropygenerator(x::AbstractArray{T, 2}, method::PermutationStencil{2}, + rng = Random.default_rng()) where T + + pg = probabilitygenerator(x, method, rng) + init = () + return EntropyGenerator(method, pg, x, init, rng) +end + + +function genentropy(x::AbstractArray{T, 2}, est::PermutationStencil{2}; + q = 1, base = MathConstants.e, normalize = true) where {T} + eg = entropygenerator(x, est) + eg(x, base = base, q = q, normalize = normalize) +end diff --git a/src/symbolic/higher_dim/symbolic_higherdim.jl b/src/symbolic/higher_dim/symbolic_higherdim.jl new file mode 100644 index 000000000..a071d6bca --- /dev/null +++ b/src/symbolic/higher_dim/symbolic_higherdim.jl @@ -0,0 +1,3 @@ +include("utils.jl") +include("stencil/PermutationStencil.jl") +include("stencil/stencil_2d.jl") diff --git a/src/symbolic/higher_dim/utils.jl b/src/symbolic/higher_dim/utils.jl new file mode 100644 index 000000000..8b8694b7f --- /dev/null +++ b/src/symbolic/higher_dim/utils.jl @@ -0,0 +1,4 @@ + +function update_motif!(motifs, k, motif) + motifs[k] = motif +end diff --git a/src/symbolic/symbolic.jl b/src/symbolic/symbolic.jl index f84c254e1..d6b95b4bf 100644 --- a/src/symbolic/symbolic.jl +++ b/src/symbolic/symbolic.jl @@ -10,7 +10,7 @@ include("utils.jl") include("SymbolicPermutation.jl") include("SymbolicWeightedPermutation.jl") include("SymbolicAmplitudeAware.jl") -include("2d/symbolic_2d.jl") +include("higher_dim/symbolic_higherdim.jl") """ permentropy(x; τ = 1, m = 3, base = MathConstants.e) diff --git a/test/runtests.jl b/test/runtests.jl index beaa77d14..e1c710959 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -377,9 +377,11 @@ end @test de >= 0.0 end - @testset "Symbolic 2D" begin + @testset "Permutation stencil 2D" begin x = [1 2 1; 8 3 4; 6 7 5] - est = SymbolicPermutation2D(m = 2) + # Re-create the Ribeiro et al, 2012 using stencil instead of specifying m + stencil = [true true; true true] + est = PermutationStencil(stencil) # Generic tests ps = probabilities(x, est) @@ -388,11 +390,13 @@ end probgen = probabilitygenerator(x, est) @test probgen isa ProbabilityGenerator @test probgen(x) isa Probabilities + @test_throws ArgumentError probgen() hgen = entropygenerator(x, est) @test hgen isa EntropyGenerator @test hgen(x, normalize = true) isa T where T <: Real @test hgen(x, normalize = false) isa T where T <: Real + @test_throws ArgumentError hgen() # test case from Ribeiro et al, 2012 h = genentropy(x, est, base = MathConstants.e) From be7eaa17021666c7c80741a6f1ce9b119e6ba78e Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Sat, 3 Sep 2022 12:22:13 +0200 Subject: [PATCH 3/6] Fix erroneously hard-coded value --- src/symbolic/higher_dim/stencil/PermutationStencil.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/symbolic/higher_dim/stencil/PermutationStencil.jl b/src/symbolic/higher_dim/stencil/PermutationStencil.jl index c95e80547..dab6ea587 100644 --- a/src/symbolic/higher_dim/stencil/PermutationStencil.jl +++ b/src/symbolic/higher_dim/stencil/PermutationStencil.jl @@ -142,7 +142,7 @@ struct PermutationStencil{N} <: ProbabilitiesEstimator # Square blocks of size `blocksize*blocksize*...` in `N` dimensions. function PermutationStencil(blocksize::Int, N::Int = 2) - return PermutationStencil(ones(Int, tuple(repeat([m], N)...))) + return PermutationStencil(ones(Int, tuple(repeat([blocksize], N)...))) end end @@ -172,5 +172,4 @@ function entropygenerator(x::AbstractArray{T, N}, method::PermutationStencil{N}, return EntropyGenerator(method, pg, x, init, rng) end -D = 2 -m = 2 +include("stencil_2d.jl") From 85190bca13e8b4d0e2e612e137a9e770a2fe4c1c Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Sat, 3 Sep 2022 12:26:24 +0200 Subject: [PATCH 4/6] Fix double import --- src/symbolic/higher_dim/symbolic_higherdim.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/symbolic/higher_dim/symbolic_higherdim.jl b/src/symbolic/higher_dim/symbolic_higherdim.jl index a071d6bca..e5af4bd98 100644 --- a/src/symbolic/higher_dim/symbolic_higherdim.jl +++ b/src/symbolic/higher_dim/symbolic_higherdim.jl @@ -1,3 +1,2 @@ include("utils.jl") include("stencil/PermutationStencil.jl") -include("stencil/stencil_2d.jl") From 435937b78d641b53044d9b93acf3f2c847ac70e1 Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Sat, 3 Sep 2022 18:34:49 +0200 Subject: [PATCH 5/6] remove default value --- src/symbolic/higher_dim/stencil/PermutationStencil.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/symbolic/higher_dim/stencil/PermutationStencil.jl b/src/symbolic/higher_dim/stencil/PermutationStencil.jl index dab6ea587..f3e7b30f0 100644 --- a/src/symbolic/higher_dim/stencil/PermutationStencil.jl +++ b/src/symbolic/higher_dim/stencil/PermutationStencil.jl @@ -141,7 +141,7 @@ struct PermutationStencil{N} <: ProbabilitiesEstimator end # Square blocks of size `blocksize*blocksize*...` in `N` dimensions. - function PermutationStencil(blocksize::Int, N::Int = 2) + function PermutationStencil(blocksize::Int, N::Int) return PermutationStencil(ones(Int, tuple(repeat([blocksize], N)...))) end end From f4bd4d27b66c9c3ca6f020cc92819d0ed8c8109f Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Sat, 3 Sep 2022 22:26:55 +0200 Subject: [PATCH 6/6] Disequilibrium --- src/Entropies.jl | 3 ++ src/disequilibrium/disequilibrium.jl | 48 +++++++++++++++++++ src/disequilibrium/generator.jl | 30 ++++++++++++ src/symbolic/higher_dim/stencil/stencil_2d.jl | 24 +++++++++- 4 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 src/disequilibrium/disequilibrium.jl create mode 100644 src/disequilibrium/generator.jl diff --git a/src/Entropies.jl b/src/Entropies.jl index 240362f67..460970686 100644 --- a/src/Entropies.jl +++ b/src/Entropies.jl @@ -1,6 +1,9 @@ module Entropies include("core.jl") include("api.jl") + + include("disequilibrium/disequilibrium.jl") + include("histogram_estimation.jl") include("counting_based/CountOccurrences.jl") include("symbolic/symbolic.jl") diff --git a/src/disequilibrium/disequilibrium.jl b/src/disequilibrium/disequilibrium.jl new file mode 100644 index 000000000..e5d6744a3 --- /dev/null +++ b/src/disequilibrium/disequilibrium.jl @@ -0,0 +1,48 @@ +export disequilibrium + +""" + disequilibrium(x, est) + +Compute the disequilibrium ``Q_J`` of `x` using the given estimator `est`. + +# Definition + +```math +Q_J[P, P_e] =\\dfrac{ S[\\dfrac{(P + P_e)}{2}] - \\dfrac{S[P]}{2}] - \\dfrac{S[P_e]}{2}] }{Q_{max}}, +``` + +where + +```math +Q_{max} = - \\dfrac{1}{2} \\left{ \\dfrac{(d_x d_y)! + 1}{d_x d_y} \\log [(d_x d_y)! + 1] - 2 \\log [2(d_x d_y)!] + \\log[(d_x d_y)!] \\right} +```` + +# Meaning + +The disequilibrium quantifies the information contained in correlational structures beyond +that which is captured by permutation entropy. + + +""" +function disequilibrium end + +# For disequilibrium, we are taking the Shannon entropy of distributions which don't sum +# to 1 (𝐏 .+ 𝐏ₑ). Since `genentropy` is only defined for `Probabilities` instances, we +# define this (only internally used) function. +_shannon_ent(p; base = 2) = -sum(x * log(base, x) for x in p) + +function _compute_q(𝐏::AbstractVector{T}, 𝐏ₑ::AbstractVector{T}; + base = MathConstants.e) where T <: Real + _shannon_ent((𝐏 .+ 𝐏ₑ) ./ 2, base = base) - + _shannon_ent(𝐏, base = base) / 2 - + _shannon_ent(𝐏ₑ, base = base) / 2 +end + +function _compute_q(𝐏::AbstractVector{T}; base = MathConstants.e) where T <: Real + Pₑ = repeat([1/length(P)], length(P)) + _shannon_ent((𝐏 .+ 𝐏ₑ) ./ 2, base = base) - + _shannon_ent(𝐏, base = base) / 2 - + _shannon_ent(𝐏ₑ, base = base) / 2 +end + +include("generator.jl") diff --git a/src/disequilibrium/generator.jl b/src/disequilibrium/generator.jl new file mode 100644 index 000000000..2130ed1bd --- /dev/null +++ b/src/disequilibrium/generator.jl @@ -0,0 +1,30 @@ +""" + DisequilibriumGenerator(probability_generator, x, init, rng) + +A generator for repeated computation of [`disequilibrium`](@ref). + +# Fields + +- A probability generator `probability_generator`(see [`ProbabilityGenerator`](@ref)). +- Input data `x`. +- Initialization values `init`, given as a named tuple. +- A random number generator `rng` for reproducibility, which is applied if relevant. +""" +struct DisequilibriumGenerator{M, PG, X, A, R <: AbstractRNG} + method::M + probability_generator::PG + x::X # input data + init::A # pre-initialized things that speed up estimation + rng::R # random number generator object +end + +""" + disequilibriumgenerator(x, method, rng = Random.default_rng()) + +Generate a [`DisequilibriumGenerator`](@ref) for `x` using the given estimation `method`. +""" +function disequilibriumgenerator(x, method, rng = Random.default_rng()) + pg = probabilitygenerator(x, method, rng) + init = () + return DisequilibriumGenerator(method, pg, x, init, rng) +end diff --git a/src/symbolic/higher_dim/stencil/stencil_2d.jl b/src/symbolic/higher_dim/stencil/stencil_2d.jl index 680fd65d3..bd83ecf5d 100644 --- a/src/symbolic/higher_dim/stencil/stencil_2d.jl +++ b/src/symbolic/higher_dim/stencil/stencil_2d.jl @@ -1,4 +1,4 @@ - +export disequilibriumgenerator function probabilitygenerator(x::AbstractArray{T, 2}, method::PermutationStencil{2}, rng = Random.default_rng()) where T @@ -113,3 +113,25 @@ function genentropy(x::AbstractArray{T, 2}, est::PermutationStencil{2}; eg = entropygenerator(x, est) eg(x, base = base, q = q, normalize = normalize) end + + +function _qmax(dx, dy; base = MathConstants.e) + dxb, dyb = BigInt(dx), BigInt(dy) + f = factorial(dxb*dyb) + qmax = -0.5 * ( + (f + 1)/f * log(base, f + 1) - + 2 * log(base, 2 * f) + log(base, f) + ) + return convert(Float64, qmax) +end + +function disequilibrium(x::AbstractArray{T, 2}, est::PermutationStencil{2}; + base = MathConstants.e) where T + + dy, dx = size(est.stencil) + diseq_gen = disequilibriumgenerator(x, est) + 𝐏 = diseq_gen.probability_generator(x) + N = length(𝐏) + 𝐏ₑ = [1/N for i = 1:N] + return _compute_q(𝐏, 𝐏ₑ, base = base) / _qmax(dx, dy, base = base) +end