192
192
193
193
# Factor out equivalence classes given by different roots
194
194
function Base. hash (t:: ColoredRootedTree , h:: UInt )
195
- # TODO : ColoredRootedTree. Use a fast path if possible
195
+ # Use a fast path if possible
196
+ if UInt == UInt64 && t isa BicoloredRootedTree && order (t) <= 12
197
+ return simple_hash (t, h)
198
+ end
199
+
196
200
isempty (t. level_sequence) && return h
197
201
root = first (t. level_sequence)
198
202
for (l, c) in zip (t. level_sequence, t. color_sequence)
@@ -202,21 +206,154 @@ function Base.hash(t::ColoredRootedTree, h::UInt)
202
206
return h
203
207
end
204
208
209
+ # Map the level sequence to an unsigned integer by concatenating the bit
210
+ # representations of level sequence differences. If the level sequence increases
211
+ # from one vertex to the next, it can increase at most by unity. Since we want
212
+ # to use simple bits representations, we measure the decrease compared to the
213
+ # maximal possible increase.
214
+ # The maximal drop in the level sequence is
215
+ # maximal_drop = length(t.level_sequence) - 3
216
+ # We need at most
217
+ # number_of_bits = trunc(Int, log2(maximal_drop)) + 1
218
+ # bits to represent this. Thus, 64 bit allow us to compute unique hashes for
219
+ # level sequence up to length 16 in the following simple way; 64 bit result
220
+ # in `number_of_bits = 4` for `maximal_drop = 16 - 3 = 13`.
221
+ # For 32 bits, we could use a maximal length of 10 with `number_of_bits = 3`.
222
+ # However, most user systems should use 64 bit by default, so we only implement
223
+ # this option for simplicity.
224
+ # The binary color sequence is mapped to an unsigned integer by interpreting
225
+ # the Boolean colors as bits of an unsigned integer. Thus, we need one
226
+ # additional bit per level to store also the color information. Thus, we
227
+ # can use this simple version with 64 bits up to a maximal length of 12
228
+ # (maximal_drop = 9; number_of_bits = 4; max_length * (number_of_bits + 1) = 60)
229
+ @inline function simple_hash (t:: BicoloredRootedTree , h_base:: UInt64 )
230
+ isempty (t. level_sequence) && return h_base
231
+ h = zero (h_base)
232
+ l_prev = first (t. level_sequence)
233
+ for l in t. level_sequence
234
+ h = (h << 4 ) | (l_prev + 1 - l)
235
+ l_prev = l
236
+ end
237
+ for c in t. color_sequence
238
+ h = (h << 1 ) | c
239
+ end
240
+ return hash (h, h_base)
241
+ end
242
+
205
243
206
244
# generation and canonical representation
207
- # TODO : ColoredRootedTree. Performance improvements possible using in-place sort
208
- function canonical_representation! (t:: ColoredRootedTree )
209
- subtr = subtrees (t)
210
- for i in eachindex (subtr)
211
- canonical_representation! (subtr[i])
245
+ # A very simple implementation of `canonical_representation!` could read as
246
+ # follows.
247
+ # function canonical_representation!(t::ColoredRootedTree)
248
+ # subtr = subtrees(t)
249
+ # for i in eachindex(subtr)
250
+ # canonical_representation!(subtr[i])
251
+ # end
252
+ # sort!(subtr, rev=true)
253
+
254
+ # i = 2
255
+ # for τ in subtr
256
+ # t.level_sequence[i:i+order(τ)-1] = τ.level_sequence
257
+ # t.color_sequence[i:i+order(τ)-1] = τ.color_sequence
258
+ # i += order(τ)
259
+ # end
260
+
261
+ # ColoredRootedTree(t.level_sequence, t.color_sequence, true)
262
+ # end
263
+ # However, this would create a lot of intermediate allocations, which make it
264
+ # rather slow. Since most trees in use are relatively small, we can use a
265
+ # non-allocating sorting algorithm instead - although bubble sort is slower in
266
+ # general when comparing the complexity with quicksort etc., it will be faster
267
+ # here since we can avoid allocations.
268
+ function canonical_representation! (t:: ColoredRootedTree ,
269
+ buffer_level= similar (t. level_sequence),
270
+ buffer_color= similar (t. color_sequence))
271
+ # Since we use a recursive implementation, it is useful to exit early for
272
+ # small trees. If there are at most 3 vertices in a valid rooted tree, its
273
+ # level sequence must already be in canonical representation. However, the
274
+ # color sequence of the bushy tree may be wrong. Thus, we can only skip the
275
+ # sorting for colored trees with at most two nodes.
276
+ if order (t) <= 2
277
+ return ColoredRootedTree (t. level_sequence, t. color_sequence, true )
212
278
end
213
- sort! (subtr, rev= true )
214
279
215
- i = 2
216
- for τ in subtr
217
- t. level_sequence[i: i+ order (τ)- 1 ] = τ. level_sequence
218
- t. color_sequence[i: i+ order (τ)- 1 ] = τ. color_sequence
219
- i += order (τ)
280
+ # First, sort all subtrees recursively. Here, we use `view`s to avoid memory
281
+ # allocations.
282
+ # TODO : Assume 1-based indexing in the following
283
+ subtree_root_index = 2
284
+ number_of_subtrees = 0
285
+
286
+ while subtree_root_index <= order (t)
287
+ subtree_last_index = _subtree_last_index (subtree_root_index, t. level_sequence)
288
+
289
+ # We found a complete subtree
290
+ idx_subtree = subtree_root_index: subtree_last_index
291
+ subtree = ColoredRootedTree (view (t. level_sequence, idx_subtree),
292
+ view (t. color_sequence, idx_subtree))
293
+ canonical_representation! (subtree,
294
+ view (buffer_level, idx_subtree),
295
+ view (buffer_color, idx_subtree))
296
+
297
+ subtree_root_index = subtree_last_index + 1
298
+ number_of_subtrees += 1
299
+ end
300
+
301
+ # Next, we need to sort the subtrees of `t` (in lexicographically decreasing
302
+ # order of the level sequences).
303
+ if number_of_subtrees > 1
304
+ # Simple bubble sort that can act in-place, avoiding allocations
305
+ # We keep track of the last index of the last subtree that we need to sort
306
+ # since we know that the last `n` subtrees are already sorted after `n`
307
+ # iterations.
308
+ subtree_last_index_to_sort = order (t)
309
+ swapped = true
310
+ while swapped
311
+ swapped = false
312
+
313
+ # Search the first complete subtree
314
+ subtree1_root_index = 2
315
+ subtree1_last_index = 0
316
+ subtree2_last_index = 0
317
+ while subtree1_root_index <= subtree_last_index_to_sort
318
+ subtree1_last_index = _subtree_last_index (subtree1_root_index, t. level_sequence)
319
+ subtree2_last_index = subtree1_last_index
320
+
321
+ # Search the next complete subtree
322
+ subtree1_last_index == subtree_last_index_to_sort && break
323
+
324
+ subtree2_root_index = subtree1_last_index + 1
325
+ subtree2_last_index = _subtree_last_index (subtree2_root_index, t. level_sequence)
326
+
327
+ # Swap the subtrees if they are not sorted correctly
328
+ subtree1_idx = subtree1_root_index: subtree1_last_index
329
+ subtree1 = ColoredRootedTree (view (t. level_sequence, subtree1_idx),
330
+ view (t. color_sequence, subtree1_idx))
331
+ subtree2_idx = subtree2_root_index: subtree2_last_index
332
+ subtree2 = ColoredRootedTree (view (t. level_sequence, subtree2_idx),
333
+ view (t. color_sequence, subtree2_idx))
334
+ if isless (subtree1, subtree2)
335
+ copyto! (buffer_level, 1 , t. level_sequence, subtree1_root_index, order (subtree1) + order (subtree2))
336
+ copyto! (t. level_sequence, subtree1_root_index, buffer_level, order (subtree1) + 1 , order (subtree2))
337
+ copyto! (t. level_sequence, subtree1_root_index + order (subtree2), buffer_level, 1 , order (subtree1))
338
+
339
+ copyto! (buffer_color, 1 , t. color_sequence, subtree1_root_index, order (subtree1) + order (subtree2))
340
+ copyto! (t. color_sequence, subtree1_root_index, buffer_color, order (subtree1) + 1 , order (subtree2))
341
+ copyto! (t. color_sequence, subtree1_root_index + order (subtree2), buffer_color, 1 , order (subtree1))
342
+
343
+ # `subtree1_root_index` will be updated below using `subtree1_last_index`.
344
+ # Thus, we need to adapt this variable here.
345
+ subtree1_last_index = subtree1_root_index + order (subtree2) - 1
346
+ swapped = true
347
+ end
348
+
349
+ # Move on to the next pair of subtrees
350
+ subtree2_last_index == subtree_last_index_to_sort && break
351
+ subtree1_root_index = subtree1_last_index + 1
352
+ end
353
+
354
+ # Update the last subtree we need to look at
355
+ subtree_last_index_to_sort = min (subtree1_last_index, subtree2_last_index)
356
+ end
220
357
end
221
358
222
359
ColoredRootedTree (t. level_sequence, t. color_sequence, true )
0 commit comments