1 /** 2 Copyright: Copyright (c) 2016- Alexander Orlov. All rights reserved. 3 License: $(LINK2 https://opensource.org/licenses/MIT, MIT License). 4 Authors: Alexander Orlov, $(LINK2 mailto:sascha.orlov@gmail.com, sascha.orlov@gmail.com) 5 */ 6 7 /** 8 This module implements a Van Emde Boas tree container. 9 All corrections, bug findings pull requests and comments are welcome. 10 The main idea of the container is, to restrict the capacity of the tree by the next power of two universe size, 11 given an arbitrary size at the initialization. 12 */ 13 14 /** 15 The main advantage of the Van Emde Boas tree appears on a large amount of elements, as the provided standard 16 operations of the tree are constant in time and of order O(lg2(lg2(U))), where U is the capacity of the tree, constant 17 after creation. For small amount of elements the overhead coming along with the structure take over. However, if the 18 universe size becomes bigger, the tree performance becomes better. 19 */ 20 21 /** 22 Be aware, the current container is intended to be used with keys. This implies, that the capacity, fixed on its 23 initialization has two meanings. As usual, it shows the maximum amount of elements the instanciated tree can keep. 24 But also, it states, that no value bigger then capacity - 1 exists in the tree. This, and the fact, that only 25 non-negative values can be used are infered from the term "key". 26 */ 27 28 /** 29 See_also: Thomas H. Cormen, Clifford Stein, Ronald L. Rivest, and Charles E. Leiserson. 2001. <em>Introduction to 30 Algorithms</em> (2nd ed.). McGraw-Hill Higher Education. 31 the idea of using bit operations was reused from the C++ implementation found at 32 http://www.keithschwarz.com/interesting/code/van-emde-boas-tree/ 33 */ 34 35 module vebtree; 36 import core.bitop; 37 import std.traits : ReturnType, isIterable, arity; 38 import std.typecons : Flag, Yes, No; 39 import std.math : nextPow2; 40 import core.stdc.limits : CHAR_BIT; 41 42 debug import std.format : format; 43 44 version (unittest) 45 { 46 import std.parallelism : parallel; 47 import std.conv : to; 48 import core.stdc.stdio : printf; 49 import std.container.rbtree : redBlackTree; 50 51 import std.range; 52 import std.random; 53 import std.format; 54 import std.container; // red black tree may be used in unittests for comparison. 55 import std.math : sqrt; 56 57 // helping function for output a given value in binary representation 58 void bin(size_t n) 59 { 60 /* step 1 */ 61 if (n > 1) 62 bin(n / 2); 63 /* step 2 */ 64 65 printf("%d", n % 2); 66 } 67 68 /// precalculated powers of two table for unit testing 69 import std.range : iota; 70 import std.algorithm.iteration : map; 71 72 enum powersOfTwo = (CHAR_BIT * size_t.sizeof).iota.map!(a => size_t(1) << a); 73 enum testMultiplier = 1; //16 74 75 auto generateVEBtree(size_t baseSize)(uint currentSeed, size_t front, size_t back, ref size_t M) 76 { 77 static assert(baseSize > 1); 78 static assert((baseSize & (baseSize - 1)) == 0); 79 assert(front >= 2); 80 rndGen.seed(currentSeed); //initialize the random generator 81 M = uniform(front, back + 1); // parameter for construction 82 return vebRoot!baseSize(M); 83 } 84 string generateDebugString(string identifier1, size_t identifier2, size_t baseSize, uint currentSeed, size_t M) 85 { 86 return format!"%s: %d baseSize: %d; seed: %d M: %d"(identifier1, identifier2, baseSize, currentSeed, M); 87 } 88 } 89 90 static foreach (_; 1 .. size_t.sizeof - 1) 91 { 92 /// 93 unittest 94 { 95 enum baseSize = 1 << _; 96 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 97 { 98 auto currentSeed = unpredictableSeed(); 99 size_t M; 100 101 auto vT = generateVEBtree!(1 << _)(currentSeed, 2UL, baseSize, M); 102 assert(vT.universe == M); 103 const errorString = generateDebugString("UT: node test: ", b, baseSize, currentSeed, M); 104 105 assert(vT.value_ == 0, errorString); 106 assert(vT.ptr_ is null, errorString); 107 assert(vT.capacity == baseSize, errorString); 108 assert(vT.empty == true, errorString); 109 assert(vT.front == NIL, errorString); 110 assert(vT.back == NIL, errorString); 111 assert(vT[].front == 0, errorString); 112 assert(vT[].back == vT.universe, errorString); 113 assert(vT().front == NIL, errorString); 114 assert(vT().back == NIL, errorString); 115 assert(vT.length == 0, errorString); 116 assert(vT.universe == M, errorString); 117 118 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 119 // make an array of length N 120 size_t[] testArray, cacheArray; 121 testArray = new size_t[N]; 122 cacheArray.reserve(N); 123 // fill the array with all possible values 124 foreach (ref el; testArray) 125 { 126 el = (2 * M).iota.choice; 127 } 128 129 foreach (testNumber; testArray) 130 { 131 assert(vT.universe == M, errorString); 132 const insertResult = vT.insert(testNumber); 133 134 if (insertResult) 135 { 136 assert(!vT.empty, errorString); 137 cacheArray ~= testNumber; 138 } 139 } 140 141 import std.algorithm.sorting : sort; 142 143 cacheArray.sort; 144 145 if (cacheArray.empty) 146 { 147 assert(vT.empty, errorString); 148 } 149 else 150 { 151 assert(!vT.empty, errorString); 152 } 153 154 foreach (el; cacheArray) 155 { 156 assert(bt(&vT.value_, el), errorString); 157 } 158 import std.algorithm.iteration : uniq; 159 import std.algorithm.searching : count; 160 161 assert(vT.length == cacheArray.uniq.count, errorString); 162 assert(vT.universe == M, errorString); 163 if (cacheArray.length) 164 { 165 assert(vT.front == cacheArray.front, errorString); 166 assert(vT.back == cacheArray.back, errorString); 167 } 168 else 169 { 170 assert(vT.front == NIL, errorString); 171 assert(vT.back == NIL, errorString); 172 } 173 174 auto currElement = vT.front; 175 foreach (el; cacheArray.uniq) 176 { 177 assert(currElement == el, errorString); 178 currElement = vT.next(currElement); 179 } 180 currElement = vT.back; 181 foreach (el; cacheArray.uniq.array.retro) 182 { 183 assert(currElement == el, errorString); 184 currElement = vT.prev(currElement); 185 } 186 187 foreach (key; 0 .. vT.universe) 188 { 189 import std.algorithm.searching : canFind; 190 191 if (cacheArray.uniq.array.canFind(key)) 192 { 193 assert(key in vT, errorString); 194 } 195 else 196 { 197 assert(!(key in vT), errorString); 198 } 199 } 200 auto deepCopy = vT.dup; 201 202 assert(deepCopy.value_ == vT.value_, errorString); 203 assert(vT == cacheArray.uniq, errorString); 204 assert(vT.prev(vT.front) == NIL, errorString); 205 assert(vT.next(vT.back) == NIL, errorString); 206 assert(vT == deepCopy, errorString); 207 assert(vT == deepCopy(), errorString); 208 209 if (cacheArray.length) 210 { 211 auto val = cacheArray.uniq.array.randomCover.front; 212 vT.remove(val); 213 assert((deepCopy.value_ ^ vT.value_) == (size_t(1) << val), errorString); 214 import std.algorithm.iteration : each; 215 import std.algorithm.searching : count, find; 216 import std.algorithm.mutation : remove; 217 218 cacheArray.count(val).iota.each!(i => cacheArray = cacheArray.remove( 219 cacheArray.length - cacheArray.find(val).length)); 220 } 221 else 222 { 223 assert((deepCopy.value_ ^ vT.value_) == 0, errorString); 224 } 225 226 foreach (key; 0 .. vT.capacity) 227 { 228 import std.algorithm.searching : canFind; 229 230 if (cacheArray.uniq.array.canFind(key)) 231 { 232 assert(vT.remove(key), errorString); 233 } 234 else 235 { 236 assert(!(vT.remove(key)), errorString); 237 } 238 } 239 assert(vT.value_ == 0, errorString); 240 assert(vT.empty, errorString); 241 } 242 } 243 } 244 245 static foreach (_; 1 .. size_t.sizeof - 1) 246 { 247 /// 248 unittest 249 { 250 import std.range : iota; 251 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 252 { 253 auto currentSeed = unpredictableSeed(); 254 size_t M; 255 auto vT = generateVEBtree!(1 << _) 256 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 257 const errorString = 258 generateDebugString("UT: tree test of capacity and universe: ", b, 1 << _, currentSeed, M); 259 260 assert(vT.universe == M, errorString); 261 assert(vT.capacity == (vT.universe - 1).nextPow2, 262 to!string("vT.capacity: " ~ to!string( 263 vT.capacity) ~ " vT.universe: " ~ to!string(vT.universe))); 264 assert(!(vT.ptr_ is null), errorString); 265 assert(vT.capacity == (vT.universe - 1).nextPow2, errorString); 266 } 267 } 268 } 269 270 static foreach (_; 1 .. size_t.sizeof - 1) 271 { 272 /// 273 unittest 274 { 275 import std.range : iota; 276 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 277 { 278 auto currentSeed = unpredictableSeed(); 279 size_t M; 280 auto vT = generateVEBtree!(1 << _) 281 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 282 const errorString = 283 generateDebugString("UT: tree test, general case: ", b, 1 << _, currentSeed, M); 284 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 285 286 // make an array of length N 287 size_t[] testArray, cacheArray; 288 testArray = new size_t[N]; 289 cacheArray.reserve(N); 290 // fill the array with all possible values 291 foreach (ref el; testArray) 292 el = (2 * M).iota.choice; 293 294 auto rbt = redBlackTree!size_t(); 295 296 foreach (val; testArray) 297 { 298 assert(vT.universe == M, errorString); 299 assert(vT.length == rbt.length, errorString); 300 301 bool insertExpectation; 302 if (val < vT.capacity && !(val in vT)) 303 { 304 insertExpectation = true; 305 } 306 const insertResult = vT.insert(val); 307 308 assert(insertResult == insertExpectation, errorString); 309 310 if (insertResult) 311 { 312 313 assert(val in vT, errorString); 314 assert(!vT.empty, errorString); 315 rbt.insert(val); 316 assert(vT.front == rbt.front, errorString); 317 assert(vT.back == rbt.back, 318 "val:" ~ to!string(val) ~ " vT.back: " ~ to!string( 319 vT.back) ~ " rbt.back: " ~ to!string(rbt.back)); 320 321 cacheArray ~= val; 322 } 323 else 324 { 325 if (!(val in rbt)) 326 { 327 assert(!(val in vT), errorString); 328 } 329 else 330 { 331 assert(val in vT, errorString); 332 } 333 } 334 } 335 336 import std.algorithm.sorting : sort; 337 cacheArray.sort; 338 339 foreach (i, el; cacheArray) 340 { 341 assert(el in vT, errorString); 342 if (i + 1 != cacheArray.length) 343 { 344 assert(vT.next(el) == cacheArray[i + 1],errorString); 345 } 346 else 347 { 348 assert(vT.next(el) == NIL, errorString); 349 } 350 } 351 352 foreach (i, el; vT) 353 assert(el == cacheArray[i], errorString); 354 355 assert(vT == cacheArray, errorString); 356 357 auto vT2 = vT.dup; 358 assert(vT == vT2); 359 360 if(cacheArray.length) 361 { 362 auto rndNum = cacheArray.choice; 363 vT2.remove(rndNum); 364 assert(!(rndNum in vT2)); 365 assert(rndNum in vT); 366 assert(vT != vT2); 367 rndNum = uniform(0UL, vT2.capacity); 368 if(!(rndNum in vT)) 369 { 370 assert(!(rndNum in vT), errorString ~ format!"rndNum: %d"(rndNum)); 371 assert(vT2.insert(rndNum), errorString); 372 } 373 assert(vT != vT2); 374 } 375 376 const rangeExclusive = vT(); 377 assert(vT == rangeExclusive); 378 379 auto rangeInclusive = vT[]; 380 import std.algorithm.comparison : equal; 381 import std.algorithm.iteration : uniq; 382 assert(equal(uniq(rangeInclusive), rangeInclusive)); 383 import std.range : enumerate; 384 foreach(i, el; rangeInclusive.enumerate) 385 { 386 if(i == 0) 387 { 388 if(!(0 in vT)) 389 { 390 continue; 391 } 392 } 393 else if(i + 1 != rangeInclusive.length) 394 { 395 assert(el in vT, errorString ~ format!" el: %d"(el)); 396 } 397 else if(i + 1 == rangeInclusive.length) 398 { 399 assert(el == vT.universe || el == vT.capacity); 400 if(el == vT.universe) 401 { 402 assert(vT.back <= vT.universe || vT.back == NIL, errorString ~ format!" length: %d"(vT.length)); 403 } 404 else 405 { 406 assert(vT.back > vT.universe, errorString); 407 assert(vT.back < vT.capacity, errorString); 408 } 409 } 410 else 411 { 412 assert(0); 413 } 414 } 415 416 import std.range : retro, enumerate; 417 foreach (i, el; cacheArray.retro.enumerate) 418 { 419 assert(el in vT, errorString); 420 if (i + 1 != cacheArray.length) 421 { 422 assert(vT.prev(el) == cacheArray[($ - 1) - (i + 1)], errorString); 423 } 424 else 425 { 426 assert(vT.prev(el) == NIL, errorString); 427 } 428 } 429 430 foreach (val; testArray) 431 { 432 assert(vT.length == rbt.length, errorString); 433 if (val in rbt) 434 { 435 assert(val in vT, errorString); 436 rbt.removeKey(val); 437 assert(vT.remove(val), errorString); 438 } 439 else 440 { 441 assert(!(val in vT), errorString); 442 assert(!vT.remove(val), errorString); 443 } 444 assert(!(val in rbt), errorString); 445 assert(!(val in vT), errorString); 446 } 447 assert(vT.length == 0, errorString); 448 assert(rbt.length == 0, errorString); 449 } 450 } 451 } 452 453 /** 454 define the absence of a key to be -1. 455 */ 456 enum NIL = ptrdiff_t(-1); 457 458 /** 459 The tree creator function. Optionally, the base size can be provided at compile time, however, the best results are 460 achieved with the default base size of CHAR_BIT * size_t.sizeof 461 */ 462 auto vebRoot(size_t baseSize = CHAR_BIT * size_t.sizeof)(size_t universe) 463 { 464 /** 465 Two parameters are provided: 466 - the base size is the maximal amount bits can be stored in a single node without branching (generating children) 467 - the universe is the user provided input, providing the expected amount of keys, going to be stored in the tree 468 */ 469 return VEBroot!baseSize(universe); 470 } 471 472 /** 473 A van Emde Boas node implementation 474 */ 475 struct VEBroot(size_t baseSize) if((baseSize & (baseSize - 1)) == 0) 476 { 477 /** 478 yields a deep copy of the node. I. e. copies all data in children and allocates another tree 479 */ 480 typeof(this) dup() 481 { 482 auto retVal = typeof(this)(universe); 483 foreach (el; opCall()) 484 retVal.insert(el); 485 return retVal; 486 } 487 488 /** 489 []-slicing. Yields a "random access range" with the content of the tree, always containing zero and the key after 490 the maximum element as keys. The key after the maximal key is the universe, if the tree is empty or the maximal 491 contained key is lesser then empty, otherwise the capacity of the tree. 492 */ 493 auto opIndex() @nogc 494 { 495 return vebTree!(Yes.inclusive)(this); 496 } 497 498 /** 499 slicing operation. recieves the opSlice!0(x,y) object from x .. y input. 500 as used with [] brackets, works as slicing with inclusion. 501 502 Note: This operation is not natural to the vebtree, as the indicies of keys are not known in general. 503 However, notably for the case of inclusive slicing (and indeed only working for this kind) one can iterate, e.g. 504 over all but some indicies from any end of the tree. This method is provided for such iterations. 505 506 During preparation, an inclusive slice is created and popFront (according popBack) is called corresponding times, 507 to remove the approprate number of elements from the front and from the back of the returned range. 508 */ 509 auto opIndex(size_t[2] input) @nogc 510 in(input[1] - input[0] <= this.length) 511 { 512 auto retVal = vebTree!(Yes.inclusive)(this); 513 514 import std.range : iota; 515 import std.algorithm.iteration : each; 516 517 input[0].iota.each!(i => retVal.popFront); 518 (this.length - input[1]).iota.each!(i => retVal.popBack); 519 520 return retVal; 521 } 522 523 /// opDollar works as "intended" and returns the length of the underlying array 524 @property size_t opDollar(size_t dim : 0)() @nogc 525 { 526 return this.length; 527 } 528 529 /// opSlice for slicing operation returns the static array of two numbers, for opIndex interop 530 size_t[2] opSlice(size_t dim : 0)(size_t x, size_t y) @nogc 531 { 532 import std.array : staticArray; 533 return [x, y].staticArray!size_t; 534 } 535 536 /** 537 ()-slicing. Yields a "random access range" with the content of the tree. Keys can be NIL. 538 */ 539 auto opCall() @nogc 540 { 541 return vebTree!(No.inclusive)(this); 542 } 543 544 /** 545 Equality operator checks for any iterable, whether in contains the same values, as the current tree. 546 */ 547 bool opEquals(T)(auto ref T input) const if (isIterable!T) 548 { 549 import std.range : hasLength; 550 static if (hasLength!T) 551 if (length != input.length) 552 return false; 553 554 size_t currentElem = this.front; 555 556 foreach (el; input) 557 { 558 if (el != currentElem) 559 return false; 560 currentElem = this.next(currentElem); 561 } 562 563 return true; 564 } 565 566 /** 567 member method for the case universe size < base size. Overloads by passing only one parameter, which is 568 the bit number of interest. Returns whether the appropriate bit inside the bitvector is set. 569 */ 570 bool opBinaryRight(string op)(size_t key) @nogc const if (op == "in") 571 { 572 if (key >= this.capacity) 573 return false; 574 575 if (this.empty) // if an empty intermediate node is found, nothing is stored below it. 576 return false; 577 578 if (this.isLeaf) 579 return bt(&value_, key) != 0; 580 else 581 { 582 // case of a single valued range. 583 if (key == this.front || key == this.back) 584 return true; 585 586 /* 587 else we have to descend, using the recursive indexing: 588 1. take the high(value, uS)-th child and 589 2. ask it about the reduced low(value, uS) value 590 3. use the lSR(uS) universe size of the childe node. 591 */ 592 return low(key) in ptr_[high(key)]; 593 } 594 } 595 596 /** 597 the opApply method grants the correct foreach behavior, nogc version 598 */ 599 int opApply(scope int delegate(ref size_t) @nogc operations) const @nogc 600 { 601 return opApplyImpl(operations); 602 } 603 604 /** 605 ditto 606 */ 607 int opApply(scope int delegate(ref size_t, ref size_t) @nogc operations) const @nogc 608 { 609 return opApplyImpl(operations); 610 } 611 612 /** 613 ditto 614 */ 615 int opApply(scope int delegate(ref size_t) operations) const 616 { 617 return opApplyImpl(operations); 618 } 619 620 /** 621 ditto 622 */ 623 int opApply(scope int delegate(ref size_t, ref size_t) operations) const 624 { 625 return opApplyImpl(operations); 626 } 627 628 /** 629 Node constructor. A universe size provided, if the size is below the cutoff there is nothing to be done, as the 630 underlying value created and set to zero by default. 631 If otherwise create an array of children. This array has to be (according to Cormen) of size of higher square 632 root of the current universe size + 1. The extra place is reserved for the summary. 633 For each just created child call its constructor. 634 For the summary with the universe size of the higher square root of the current universe size. 635 For each other child with the universe size of the lower square root of the currennt universe size. 636 Then, assign the fully initialized children array to the pointer in the current node, doing approprate steps to 637 show, that this node is an intermediate node, not containing any values yet. 638 The knowledge of the current universe size is lost at this moment. As this keeps every build up node smaller 639 (and there could be a lot of them). This is why, the VEBtree class continues to hold the global universe size, 640 which is passed on every call to the root node. In this way this, extern saved value has the role of being 641 outsourced array size for each (!) node in the tree, as its size is reconstructed during the access to them. 642 */ 643 644 @disable this(this); 645 646 /** 647 It is allowed to construct the root of the van Emde Boas tree directly, without the convenience method. 648 Params: 649 val = Expected universe size. The tree is generated so that every index below the universe size can be put 650 inside. 651 */ 652 this(size_t val) 653 in(val >= 2) 654 { 655 universe = val; 656 setEmpty; 657 658 assert(!length_ == this.empty); 659 660 if (!isLeaf) 661 { 662 assert(this.capacity == (universe - 1).nextPow2); 663 const arrSize = this.capacity.hSR + 1; 664 665 // reserve enough place for the summary and the children cluster 666 ptr_ = (new typeof(this)[arrSize]).ptr; 667 668 // add higher square root children with lower square root universe each. 669 foreach (i, ref el; cluster) 670 el = typeof(this)(this.capacity.lSR); 671 672 // add the summary with its universe of higher squaure root of the current universe 673 summary = typeof(this)(this.capacity.hSR); 674 } 675 assert(!length_ == this.empty); 676 } 677 678 /** 679 This tree has a length notion: it is the current number of inserted elements. 680 Returns: The current amount of contained keys. 681 */ 682 size_t length() const @nogc 683 { 684 return length_; 685 } 686 687 /** 688 the empty method to inform of an empty state of the tree. 689 Returns: Whether the tree is currently empty 690 */ 691 bool empty() const @nogc 692 { 693 return isLeaf ? value_ == 0 : value_ == -NIL; 694 } 695 696 /** 697 This yields whether the node is a leaf node. 698 Returns: Whether the node is a leaf. 699 */ 700 bool isLeaf() const @nogc 701 { 702 return universe <= baseSize; 703 } 704 705 /** 706 The minimal contained key in the van Emde Boas tree 707 Returns: The minimal contained element of the tree 708 */ 709 size_t front() @nogc const 710 { 711 if (empty) // do not proceed at all, if the root is empty 712 return NIL; 713 if (isLeaf) // pass control to the node 714 return bsf(value_); 715 return value_ & lowerMask; 716 } 717 718 /** 719 The maximal contained key in the van Emde Boas tree 720 Returns: The maximal contained element of the tree 721 */ 722 size_t back() @nogc const 723 { 724 if (empty) // do not proceed at all, if the root is empty 725 return NIL; 726 if (isLeaf) // pass control to the node 727 return bsr(value_); 728 return (value_ & higherMask) >> (CHAR_BIT * size_t.sizeof / 2); 729 } 730 731 /** 732 As a usual container, van Emde Boas tree provides the notion of capacity 733 Returns: The overall capacity of the tree. It is at least as high as the universe size provided on creation. 734 */ 735 size_t capacity() @nogc const 736 { 737 return isLeaf ? baseSize : (universe - 1).nextPow2; 738 } 739 740 /** 741 remove method of the van Emde Boas tree 742 Params: 743 val = The key to remove 744 Returns: Whether the key was removed. It is true, when the key was present, false otherwise 745 */ 746 bool remove(size_t val) @nogc 747 { 748 if (val >= capacity) // do not proceed at all, if the value can't be in the tree 749 return false; 750 if (empty) // do not proceed at all, if the root is empty 751 return false; 752 if (isLeaf) // pass control to the node 753 return length(length - (btr(&value_, val) != 0)); 754 if (front == back) // if the current node contains only a single value 755 { 756 assert(length == 1); 757 if (front != val) 758 return false; // do nothing if the given value is not the stored one 759 assert(length == 1); 760 return length(length - 1); 761 } 762 763 if (val == front) // if we met the minimum of a node 764 { 765 auto treeOffset = summary.front; // calculate an offset from the summary to continue with 766 if (treeOffset == NIL) // if the offset is invalid, then there is no further hierarchy and we are going to 767 { 768 front = back; // store a single value in this node. 769 assert(length == 2); 770 return length(length - 1); 771 } 772 auto m = cluster[treeOffset].front; // otherwise we get the minimum from the offset child 773 // remove it from the child 774 cluster[treeOffset].remove(m); 775 if (cluster[treeOffset].empty) 776 summary.remove(treeOffset); 777 //anyway, the new front of the current node become the restored value of the calculated offset. 778 front = index(treeOffset, m); 779 assert(length); 780 return length(length - 1); 781 } 782 783 if (val == back) // if we met the maximum of a node 784 { 785 // calculate an offset from the summary to contiue with 786 auto treeOffset = summary.back; 787 // if the offset is invalid, then there is no further hierarchy and we are going to 788 if (treeOffset == NIL) 789 { 790 // store a single value in this node. 791 back = front; 792 assert(length == 2); 793 return length(length - 1); 794 } 795 // otherwise we get maximum from the offset child 796 auto m = cluster[treeOffset].back; 797 // remove it from the child 798 cluster[treeOffset].remove(m); 799 if (cluster[treeOffset].empty) 800 summary.remove(treeOffset); 801 // anyway, the new back of the current node become the restored value of the calculated offset. 802 back = index(treeOffset, m); 803 assert(length); 804 return length(length - 1); 805 } 806 // if no condition was met we have to descend deeper. We get the offset by reducing the value to high(value, uS) 807 auto treeOffset = high(val); 808 auto res = length(length - cluster[treeOffset].remove(low(val))); 809 if (cluster[treeOffset].empty) 810 summary.remove(treeOffset); 811 return res; 812 } 813 814 /** 815 The successor search method of the van Emde Boas tree. 816 Params: 817 val = The key the next greater neighbor of which is looked for. 818 Returns: Ditto. If the next greater neighbor is missing a number out of bounds of the tree is returned. 819 */ 820 size_t next(size_t val) @nogc const 821 { 822 if (empty) // do not proceed at all, if the root is empty 823 return NIL; 824 if (isLeaf) // pass control to the node 825 { 826 if (val + 1 >= baseSize) // all vals are reduced by one. 827 return NIL; 828 829 // create a mask, which hides all lower bits of the stored value up to the given bit number, then apply 830 // bit search from the lowest bit. 831 auto maskedVal = value_ & ~((size_t(1) << (val + 1)) - 1); 832 833 if (maskedVal != 0) 834 return maskedVal.bsf; 835 836 return NIL; 837 } 838 // if given value is less then the front, return the front as successor 839 if (val < front) 840 return front; 841 // if given value is greater then the back, no predecessor exists 842 if (val >= back) 843 return NIL; 844 // if none of the break conditions was met, we have to descent further into the tree. 845 // calculate the child index by high(value, uS) 846 const childIndex = high(val); 847 // look into the child for its maximum 848 const maxlow = cluster[childIndex].back; 849 // if the maximum exists and the lowered given value is less then the child's maximum 850 if ((maxlow != NIL) && (low(val) < maxlow)) 851 { 852 auto offset = cluster[childIndex].next(low(val)); 853 // the result is given by reconstruction of the answer 854 return index(childIndex, offset); 855 } 856 else // otherwise we can not use the maximum of the child 857 { 858 auto succcluster = summary.next(childIndex); 859 // if the successor cluster is null 860 if (succcluster == NIL) 861 return back; 862 assert(succcluster != NIL); 863 assert(cluster[succcluster].front != NIL); 864 // if the successor cluster exists, the offset is given by its minimum 865 // and the result by the reconstruction of the offset. 866 return index(succcluster, cluster[succcluster].front); 867 } 868 } 869 870 /** 871 The predecessor search method of the van Emde Boas tree. 872 Params: 873 val = The key the next smaller neighbor of which is looked for. 874 Returns: Ditto. If the next smaller neighbor is missing a number out of bounds of the tree is returned. 875 */ 876 size_t prev(size_t val) @nogc const 877 { 878 if (empty) // do not proceed at all, if the root is empty 879 return NIL; 880 if (isLeaf) // pass control to the node 881 { 882 if (val != 0) 883 { 884 /* 885 create a mask, which hides all higher bits of the stored value down to the given bit number, then apply 886 bit search from the highest bit. 887 */ 888 auto maskedVal = value_ & ((size_t(1) << val) - 1); 889 890 if (maskedVal != 0) 891 return typeof(return)(maskedVal.bsr); 892 } 893 return NIL; 894 } 895 // if given value is greater then the stored back, the predecessor is back 896 if (val > back) 897 return back; 898 // if given value is less then the front, no predecessor exists. 899 if (val <= front) 900 return NIL; 901 // if none of the break conditions was met we have to descend further into the tree. 902 auto childIndex = high(val); // calculate the child index by high(value, uS) 903 const minlow = cluster[childIndex].front; // look into the child for its minimum 904 // if the minimum exists and the lowered given value is greater then the child's minimum 905 if ((minlow != NIL) && (low(val) > minlow)) 906 { 907 auto offset = cluster[childIndex].prev(low(val)); 908 // the result is given by reconstruction of the answer. 909 return index(childIndex, offset); 910 } 911 else // otherwise we can not use the minimum of the child 912 { 913 auto predcluster = summary.prev(childIndex); 914 // if the predecessor cluster is null return the current front, as this is the last remaining value 915 if (predcluster == NIL) 916 return front; 917 // if the predecessor cluster exists, the offset is given by its maximum 918 // and the result by the reconstruction of the offset. 919 return index(predcluster, cluster[predcluster].back); 920 } 921 } 922 923 /** 924 The insertion method of the van Emde Boas tree. 925 Params: 926 val = The key to insert 927 Returns: Whether the key was inserted. It is true, when the key was inserted, false otherwise 928 */ 929 bool insert(size_t val) @nogc 930 { 931 if (val >= capacity) // do not proceed at all, if the value won't fit into the tree 932 return false; 933 if (isLeaf) // pass control to the node 934 return length(length + (bts(&value_, val) == 0)); 935 if (empty) // if the current node does not contain anything put the value inside. 936 { 937 assert(empty); 938 front = val; 939 back = val; 940 assert(front == val); 941 assert(!empty); 942 assert(front == back); 943 assert(!empty); 944 return length(length + 1); 945 } 946 947 assert(!empty); 948 assert(front != NIL); 949 assert(back != NIL); 950 951 if (val == front || val == back) // if the value coincides with existing values, return 952 return false; 953 // if the node contains a single value only, expand the node to a range and leave. 954 if (front == back) 955 { 956 if (front > val) 957 front = val; 958 if (back < val) 959 back = val; 960 return length(length + 1); 961 } 962 /* 963 if none of the cases above was true (all of them are break conditions) we have to compare the given value 964 with the values present and adapt the range limits. This replaces the value we want to insert. 965 */ 966 967 // a swap can not be used here, as front is itself a (property) method 968 if (val < front) 969 { 970 const tmpKey = val; 971 val = front; 972 front = tmpKey; 973 assert(front == tmpKey); 974 } 975 976 // a swap can not be used here, as back is itself a (property) method 977 if (val > back) 978 { 979 const tmpKey = val; 980 val = back; 981 back = tmpKey; 982 assert(back == tmpKey); 983 } 984 985 // calculate the index of the children cluster by high(value, uS) we want to descent to. 986 const nextTreeIndex = high(val); 987 if (cluster[nextTreeIndex].empty) 988 summary.insert(nextTreeIndex); 989 return length(length + cluster[nextTreeIndex].insert(low(val))); 990 } 991 992 /** 993 The cached value of the universe, provided on creation 994 Returns: The cached input, provided on creation 995 */ 996 size_t universe() @nogc const 997 { 998 return universe_; 999 } 1000 1001 private: 1002 1003 size_t toHash() @nogc const nothrow { assert(0); } 1004 1005 bool front(size_t val) @nogc 1006 { 1007 if (isLeaf) // pass control to the node 1008 return insert(val); 1009 value_ = value_ & higherMask; // otherwise, set the lower part of the value, keeping the higher bits 1010 const retVal = ((value_ & lowerMask) == val) ? false : true; 1011 value_ = value_ | val; 1012 return retVal; // this is a bug! 1013 } 1014 1015 bool back(size_t val) @nogc 1016 { 1017 if (isLeaf) // pass control to the node 1018 return insert(val); 1019 value_ = value_ & lowerMask; // otherwise, set the higher part of the value, keeping the lower bits 1020 const retVal = (value_ & higherMask) == (val << (CHAR_BIT * size_t.sizeof / 2)) ? false : true; 1021 value_ = value_ | (val << (CHAR_BIT * size_t.sizeof / 2)); 1022 return retVal; // this is a bug! 1023 } 1024 1025 bool length(size_t input) @nogc 1026 in 1027 { 1028 assert(input <= this.capacity); 1029 1030 if (input != length) 1031 { 1032 input > length ? assert(input - length == 1) : assert(length - input == 1); 1033 } 1034 } 1035 do 1036 { 1037 const retVal = length != input; 1038 1039 length_ = input; 1040 1041 if (!length_) 1042 setEmpty; 1043 1044 return retVal; 1045 } 1046 1047 size_t index(size_t x, size_t y) const @nogc 1048 { 1049 return .index(this.capacity, x, y); 1050 } 1051 1052 size_t low(size_t val) const @nogc 1053 { 1054 return .low(this.capacity, val); 1055 } 1056 1057 size_t high(size_t val) const @nogc 1058 { 1059 return .high(this.capacity, val); 1060 } 1061 1062 void universe(size_t val) @nogc 1063 { 1064 universe_ = val; 1065 } 1066 1067 size_t value_; 1068 size_t universe_; 1069 size_t length_; 1070 typeof(this)* ptr_; 1071 1072 ref summary() inout @nogc 1073 in(!isLeaf) 1074 { // return the last element of the array of children, stored in the node. 1075 return ptr_[capacity.hSR]; 1076 } 1077 1078 auto cluster() inout @nogc 1079 in(!isLeaf) 1080 { // return all of the children in the stored array, but the last element 1081 return ptr_[0 .. capacity.hSR]; 1082 } 1083 1084 // The empty setter of a node. This function is kept for consistency in this module. 1085 void setEmpty() @nogc 1086 { 1087 value_ = isLeaf ? 0 : -NIL; 1088 } 1089 1090 // with the trick of https://forum.dlang.org/thread/erznqknpyxzxqivawnix@forum.dlang.org 1091 int opApplyImpl(O)(O operations) const 1092 { 1093 int result; 1094 size_t leading = this.front; 1095 1096 //for(size_t leading = front; leading < back; leading = this.next(leading)) 1097 1098 for (size_t i = 0; i < length; ++i) 1099 { 1100 static if (arity!operations == 1) 1101 result = operations(leading); 1102 else static if (arity!operations == 2) 1103 result = operations(i, leading); 1104 else 1105 assert(0); 1106 1107 if (result) 1108 break; 1109 1110 leading = this.next(leading); 1111 } 1112 1113 return result; 1114 } 1115 } 1116 1117 private: 1118 struct VEBtree(Flag!"inclusive" inclusive, T) 1119 { 1120 auto opBinaryRight(string op)(size_t key) @nogc if (op == "in") 1121 { 1122 return key in root; 1123 } 1124 1125 static if (inclusive) 1126 { 1127 size_t frontKey; 1128 size_t backKey; 1129 } 1130 else 1131 { 1132 ptrdiff_t frontKey; 1133 ptrdiff_t backKey; 1134 } 1135 1136 size_t length; 1137 1138 typeof(frontKey) front() @nogc 1139 { 1140 return frontKey; 1141 } 1142 1143 void popFront() @nogc 1144 in(!empty) 1145 { 1146 --length; 1147 frontKey = next(frontKey); 1148 } 1149 1150 typeof(backKey) back() @nogc 1151 { 1152 return backKey; 1153 } 1154 1155 void popBack() @nogc 1156 in(!empty) 1157 { 1158 --length; 1159 backKey = prev(backKey); 1160 } 1161 1162 auto prev(size_t key) @nogc 1163 { 1164 const pred = root.prev(key); 1165 static if (inclusive) 1166 return pred == NIL ? 0 : pred; 1167 else 1168 return pred; 1169 } 1170 1171 auto next(size_t key) @nogc 1172 { 1173 const succ = root.next(key); 1174 1175 static if(inclusive) 1176 debug 1177 if (succ == NIL) 1178 assert(length <= 1, format!"key: %d, length: %d\n"(key, length)); 1179 1180 static if (inclusive) 1181 if (succ == NIL) 1182 return backKey; 1183 else 1184 return succ; 1185 else 1186 return succ; 1187 } 1188 1189 bool empty() @nogc 1190 { 1191 return !length; 1192 } 1193 1194 auto save() const @nogc 1195 { 1196 return vebTree!(inclusive)(*root_, frontKey, backKey, length); 1197 } 1198 1199 size_t toHash() @nogc const nothrow { assert(0); } 1200 1201 /** 1202 for comparison with an iterable, the iterable will be iterated, as the current object. 1203 */ 1204 bool opEquals(T)(auto ref T input) const if (isIterable!T) 1205 { 1206 static if (is(T == typeof(this))) 1207 return root == input.root; 1208 1209 static if (hasLength!T) 1210 if (length != input.length) 1211 return false; 1212 1213 auto copy = this.save; 1214 1215 foreach (el; input) 1216 { 1217 if (el != copy.front) 1218 return false; 1219 copy.popFront(); 1220 } 1221 1222 return true; 1223 } 1224 1225 @disable this(); 1226 1227 private: 1228 T* root_; 1229 ref T root() { return *root_; } 1230 1231 this(T, Args...)(ref T _root, Args args) @nogc 1232 { 1233 root_ = &_root; 1234 1235 static if(Args.length) 1236 { 1237 frontKey = args[0]; 1238 backKey = args[1]; 1239 length = args[2]; 1240 } 1241 else 1242 { 1243 length = root.length; 1244 static if (inclusive) 1245 { 1246 if(!length) 1247 { 1248 backKey = root.universe; 1249 length = 2; 1250 } 1251 else 1252 { 1253 if(root.front > 0) 1254 ++length; 1255 1256 if(root.back <= root.universe) 1257 backKey = root.universe; 1258 else if(root.back <= root.capacity) 1259 backKey = root.capacity; 1260 1261 if(root.back < backKey) 1262 ++length; 1263 } 1264 } 1265 else 1266 { 1267 frontKey = root.front; 1268 backKey = root.back; 1269 } 1270 } 1271 } 1272 } 1273 1274 // bit mask representing uint.max. 1275 enum size_t lowerMask = size_t.max >> (size_t.sizeof * CHAR_BIT / 2); 1276 // bit mask representing size_t.back without uint.max. 1277 enum size_t higherMask = size_t.max ^ lowerMask; 1278 1279 /* 1280 This function returns the higher square root of the given input. It is needed in the initialization step 1281 of the VEB tree to calculate the number of children of a given layer. And this is the universe size of the 1282 summary of a node. The upper square root is defined by 2^{\lceil(\lg u)/2\rceil} 1283 */ 1284 size_t hSR(size_t val) @nogc 1285 { 1286 return size_t(1) << (bsr(val) / 2 + ((val.bsr & 1) || ((val != 0) && (val & (val - 1))))); 1287 } 1288 // 1289 unittest 1290 { 1291 1292 auto currentSeed = unpredictableSeed(); 1293 const errorString = format!"UT: hSR. seed: %d"(currentSeed); 1294 rndGen.seed(currentSeed); //initialize the random generator 1295 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1296 auto hSR = hSR(M); 1297 assert((hSR & (hSR - 1)) == 0, errorString); 1298 import std.range : array; 1299 import std.algorithm.searching : until; 1300 1301 auto check = powersOfTwo.until(hSR).array; 1302 assert((check[$ - 1]) * (check[$ - 1]) < M, errorString); 1303 } 1304 1305 /* 1306 This function returns the lower square root of the given input. It is needed by the indexing functions 1307 high(x), low(x) and index(x,y) of elements in the tree. Also, this is the universe size of a child of a node. The 1308 lower square root is defined by 2^{\lfloor(\lgu)/2\rfloor} 1309 */ 1310 size_t lSR(size_t val) @nogc 1311 { 1312 return size_t(1) << (bsr(val) / 2); 1313 } 1314 // 1315 unittest 1316 { 1317 auto currentSeed = unpredictableSeed(); 1318 const errorString = format!"UT: lSR seed: %d"(currentSeed); 1319 rndGen.seed(currentSeed); //initialize the random generator 1320 const M = uniform(1UL, uint.max); //set universe size to some integer. 1321 auto lSR = M.lSR; 1322 1323 assert((lSR & (lSR - 1)) == 0, errorString); 1324 assert(lSR * lSR < M, errorString); 1325 import std.algorithm.searching : find; 1326 1327 assert(!powersOfTwo.find(lSR).empty); 1328 } 1329 1330 /* 1331 This is an index function defined as \lfloor x/lSR(u)\rfloor. It is needed to find the appropriate cluster 1332 of a element in the tree. It is a part of the ideal indexing function. 1333 */ 1334 size_t high(size_t universe, size_t val) @nogc 1335 out (result; result == val / universe.lSR) // bithacks = keithschwarz 1336 { 1337 return val >> (bsr(universe) / 2); 1338 } 1339 // 1340 unittest 1341 { 1342 auto currentSeed = unpredictableSeed(); 1343 const errorString = format!"UT: high seed: %d"(currentSeed); 1344 rndGen.seed(currentSeed); //initialize the random generator 1345 const M = uniform(1UL, uint.max); //set universe size to some integer. 1346 assert(M, errorString); 1347 size_t U = M.nextPow2; 1348 assert(U, errorString); 1349 auto x = uniform(0UL, U); 1350 assert(high(U, x) == x / U.lSR, errorString); 1351 } 1352 1353 /* 1354 This is an index function defined as fmod(value, lSR(universe)). It is needed to find the appropriate 1355 value inside a cluster. It is part of the ideal indexing function 1356 */ 1357 size_t low(size_t universe, size_t val) @nogc 1358 out (retVal; retVal == (val & ((size_t(1) << (bsr(universe) / 2)) - 1))) 1359 { 1360 return val % universe.lSR; 1361 } 1362 // 1363 unittest 1364 { 1365 auto currentSeed = unpredictableSeed(); 1366 const errorString = format!"UT: low seed: %d"(currentSeed); 1367 rndGen.seed(currentSeed); //initialize the random generator 1368 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1369 size_t U = nextPow2(M); 1370 auto x = uniform(0UL, U); 1371 assert(low(U, x) == (x & (U.lSR - 1)), errorString); 1372 } 1373 1374 /* 1375 This is an index function to retain the searched value. It is defined as x * lSR(u) + y. Beyond this, the 1376 relation holds: x = index(high(x), x.low). This is the ideal indexing function of the tree. 1377 */ 1378 size_t index(size_t universe, size_t x, size_t y) @nogc 1379 { 1380 return (x * universe.lSR + y); 1381 } 1382 // 1383 unittest 1384 { 1385 auto currentSeed = unpredictableSeed(); 1386 const errorString = format!"UT: index seed: %d"(currentSeed); 1387 rndGen.seed(currentSeed); //initialize the random generator 1388 const M = uniform(0UL, uint.max); //set universe size to some integer. 1389 size_t U = M.nextPow2; 1390 auto x = uniform(0UL, U); 1391 assert(index(U, U.high(x), U.low(x)) == x, errorString); 1392 } 1393 1394 auto vebTree(Flag!"inclusive" inclusive, T, Args...)(ref T root, Args args) 1395 { 1396 return VEBtree!(inclusive, T)(root, args); 1397 }