1 /** 2 Copyright: Copyright (c) 2016- Alexander Orlov. All rights reserved. 3 License: $(LINK2 https://opensource.org/licenses/MIT, MIT License). 4 Authors: Alexander Orlov, $(LINK2 mailto:sascha.orlov@gmail.com, sascha.orlov@gmail.com) 5 */ 6 7 /** 8 This module implements a Van Emde Boas tree container. 9 All corrections, bug findings pull requests and comments are welcome. 10 The main idea of the container is, to restrict the capacity of the tree by the next power of two universe size, 11 given an arbitrary size at the initialization. 12 */ 13 14 /** 15 The main advantage of the Van Emde Boas tree appears on a large amount of elements, as the provided standard 16 operations of the tree are constant in time and of order O(lg2(lg2(U))), where U is the capacity of the tree, constant 17 after creation. For small amount of elements the overhead coming along with the structure take over. However, if the 18 universe size becomes bigger, the tree performance becomes better. 19 */ 20 21 /** 22 Be aware, the current container is intended to be used with keys. This implies, that the capacity, fixed on its 23 initialization has two meanings. As usual, it shows the maximum amount of elements the instanciated tree can keep. 24 But also, it states, that no value bigger then capacity - 1 exists in the tree. This, and the fact, that only 25 non-negative values can be used are infered from the term "key". 26 */ 27 28 /** 29 See_also: Thomas H. Cormen, Clifford Stein, Ronald L. Rivest, and Charles E. Leiserson. 2001. <em>Introduction to 30 Algorithms</em> (2nd ed.). McGraw-Hill Higher Education. 31 the idea of using bit operations was reused from the C++ implementation found at 32 http://www.keithschwarz.com/interesting/code/van-emde-boas-tree/ 33 */ 34 35 module vebtree; 36 import core.bitop; 37 import std.traits : ReturnType, isIterable, arity; 38 import std.typecons : Flag, Yes, No; 39 import std.math : nextPow2; 40 import core.stdc.limits : CHAR_BIT; 41 42 debug import std.format : format; 43 44 version (unittest) 45 { 46 import std.parallelism : parallel; 47 import std.conv : to; 48 import core.stdc.stdio : printf; 49 import std.container.rbtree : redBlackTree; 50 51 import std.range; 52 import std.random; 53 import std.format; 54 import std.container; // red black tree may be used in unittests for comparison. 55 import std.math : sqrt; 56 57 // helping function for output a given value in binary representation 58 void bin(size_t n) pure 59 { 60 /* step 1 */ 61 if (n > 1) 62 bin(n / 2); 63 /* step 2 */ 64 65 //printf("%d", n % 2); 66 } 67 68 /// precalculated powers of two table for unit testing 69 import std.range : iota; 70 import std.algorithm.iteration : map; 71 72 enum powersOfTwo = (CHAR_BIT * size_t.sizeof).iota.map!(a => size_t(1) << a); 73 enum testMultiplier = 1; //16 74 75 auto generateVEBtree(size_t baseSize)(uint currentSeed, size_t front, size_t back, ref size_t M) 76 { 77 static assert(baseSize > 1); 78 static assert((baseSize & (baseSize - 1)) == 0); 79 assert(!!front != front); assert(front >= 2); 80 rndGen.seed(currentSeed); //initialize the random generator 81 M = uniform(front, back + 1); // parameter for construction 82 return vebRoot!baseSize(M); 83 } 84 string generateDebugString(string identifier1, size_t identifier2, size_t baseSize, uint currentSeed, size_t M) 85 { 86 return format!"%s: %d baseSize: %d; seed: %d M: %d"(identifier1, identifier2, baseSize, currentSeed, M); 87 } 88 } 89 90 static foreach (_; 1 .. size_t.sizeof - 1) 91 { 92 /// 93 unittest 94 { 95 enum baseSize = 1 << _; 96 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 97 { 98 auto currentSeed = unpredictableSeed(); 99 size_t M; 100 101 auto vT = generateVEBtree!(1 << _)(currentSeed, 2UL, baseSize, M); 102 assert(vT.universe == M); 103 const errorString = generateDebugString("UT: node test: ", b, baseSize, currentSeed, M); 104 105 assert(vT.value_ == 0, errorString); 106 assert(vT.ptr_ is null, errorString); 107 assert(vT.capacity == baseSize, errorString); 108 assert(vT.empty == true, errorString); 109 assert(vT.front == NIL, errorString); 110 assert(vT.back == NIL, errorString); 111 assert(vT[].front == 0, errorString); 112 assert(vT[].back == vT.universe, errorString); 113 assert(vT().front == NIL, errorString); 114 assert(vT().back == NIL, errorString); 115 assert(vT.length == 0, errorString); 116 assert(vT.universe == M, errorString); 117 118 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 119 // make an array of length N 120 size_t[] testArray, cacheArray; 121 testArray = new size_t[N]; 122 cacheArray.reserve(N); 123 // fill the array with all possible values 124 foreach (ref el; testArray) 125 { 126 el = (2 * M).iota.choice; 127 } 128 129 foreach (testNumber; testArray) 130 { 131 assert(vT.universe == M, errorString); 132 const insertResult = vT.insert(testNumber); 133 134 if (insertResult) 135 { 136 assert(!vT.empty, errorString); 137 cacheArray ~= testNumber; 138 } 139 } 140 141 import std.algorithm.sorting : sort; 142 143 cacheArray.sort; 144 145 if (cacheArray.empty) 146 { 147 assert(vT.empty, errorString); 148 } 149 else 150 { 151 assert(!vT.empty, errorString); 152 } 153 154 foreach (el; cacheArray) 155 { 156 assert(bt(&vT.value_, el), errorString); 157 } 158 import std.algorithm.iteration : uniq; 159 import std.algorithm.searching : count; 160 161 assert(vT.length == cacheArray.uniq.count, errorString); 162 assert(vT.universe == M, errorString); 163 if (cacheArray.length) 164 { 165 assert(vT.front == cacheArray.front, errorString); 166 assert(vT.back == cacheArray.back, errorString); 167 } 168 else 169 { 170 assert(vT.front == NIL, errorString); 171 assert(vT.back == NIL, errorString); 172 } 173 174 auto currElement = vT.front; 175 foreach (el; cacheArray.uniq) 176 { 177 assert(currElement == el, errorString); 178 currElement = vT.next(currElement); 179 } 180 currElement = vT.back; 181 foreach (el; cacheArray.uniq.array.retro) 182 { 183 assert(currElement == el, errorString); 184 currElement = vT.prev(currElement); 185 } 186 187 foreach (key; 0 .. vT.universe) 188 { 189 import std.algorithm.searching : canFind; 190 191 if (cacheArray.uniq.array.canFind(key)) 192 { 193 assert(key in vT, errorString); 194 } 195 else 196 { 197 assert(!(key in vT), errorString); 198 } 199 } 200 auto deepCopy = vT.dup; 201 202 assert(deepCopy.value_ == vT.value_, errorString); 203 assert(vT == cacheArray.uniq, errorString); 204 assert(vT.prev(vT.front) == NIL, errorString); 205 assert(vT.next(vT.back) == NIL, errorString); 206 assert(vT == deepCopy, errorString); 207 assert(vT == deepCopy(), errorString); 208 209 if (cacheArray.length) 210 { 211 auto val = cacheArray.uniq.array.randomCover.front; 212 vT.remove(val); 213 assert((deepCopy.value_ ^ vT.value_) == (size_t(1) << val), errorString); 214 import std.algorithm.iteration : each; 215 import std.algorithm.searching : count, find; 216 import std.algorithm.mutation : remove; 217 218 cacheArray.count(val).iota.each!(i => cacheArray = cacheArray.remove( 219 cacheArray.length - cacheArray.find(val).length)); 220 } 221 else 222 { 223 assert((deepCopy.value_ ^ vT.value_) == 0, errorString); 224 } 225 226 foreach (key; 0 .. vT.capacity) 227 { 228 import std.algorithm.searching : canFind; 229 230 if (cacheArray.uniq.array.canFind(key)) 231 { 232 assert(vT.remove(key), errorString); 233 } 234 else 235 { 236 assert(!(vT.remove(key)), errorString); 237 } 238 } 239 assert(vT.value_ == 0, errorString); 240 assert(vT.empty, errorString); 241 } 242 } 243 } 244 245 static foreach (_; 1 .. size_t.sizeof - 1) 246 { 247 /// 248 unittest 249 { 250 import std.range : iota; 251 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 252 { 253 auto currentSeed = unpredictableSeed(); 254 size_t M; 255 auto vT = generateVEBtree!(1 << _) 256 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 257 const errorString = 258 generateDebugString("UT: tree test of capacity and universe: ", b, 1 << _, currentSeed, M); 259 260 assert(vT.universe == M, errorString); 261 assert(vT.capacity == (vT.universe - 1).nextPow2, 262 to!string("vT.capacity: " ~ to!string( 263 vT.capacity) ~ " vT.universe: " ~ to!string(vT.universe))); 264 assert(!(vT.ptr_ is null), errorString); 265 assert(vT.capacity == (vT.universe - 1).nextPow2, errorString); 266 } 267 } 268 } 269 270 static foreach (_; 1 .. size_t.sizeof - 1) 271 { 272 /// 273 unittest 274 { 275 import std.range : iota; 276 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 277 { 278 auto currentSeed = unpredictableSeed(); 279 size_t M; 280 auto vT = generateVEBtree!(1 << _) 281 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 282 const errorString = 283 generateDebugString("UT: tree test, general case: ", b, 1 << _, currentSeed, M); 284 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 285 286 // make an array of length N 287 size_t[] testArray, cacheArray; 288 testArray = new size_t[N]; 289 cacheArray.reserve(N); 290 // fill the array with all possible values 291 foreach (ref el; testArray) 292 el = (2 * M).iota.choice; 293 294 auto rbt = redBlackTree!size_t(); 295 296 foreach (val; testArray) 297 { 298 assert(vT.universe == M, errorString); 299 assert(vT.length == rbt.length, errorString); 300 301 bool insertExpectation; 302 if (val < vT.capacity && !(val in vT)) 303 { 304 insertExpectation = true; 305 } 306 const insertResult = vT.insert(val); 307 308 assert(insertResult == insertExpectation, errorString); 309 310 if (insertResult) 311 { 312 313 assert(val in vT, errorString); 314 assert(!vT.empty, errorString); 315 rbt.insert(val); 316 assert(vT.front == rbt.front, errorString); 317 assert(vT.back == rbt.back, 318 "val:" ~ to!string(val) ~ " vT.back: " ~ to!string( 319 vT.back) ~ " rbt.back: " ~ to!string(rbt.back)); 320 321 cacheArray ~= val; 322 } 323 else 324 { 325 if (!(val in rbt)) 326 { 327 assert(!(val in vT), errorString); 328 } 329 else 330 { 331 assert(val in vT, errorString); 332 } 333 } 334 } 335 336 import std.algorithm.sorting : sort; 337 cacheArray.sort; 338 339 foreach (i, el; cacheArray) 340 { 341 assert(el in vT, errorString); 342 if (i + 1 != cacheArray.length) 343 { 344 assert(vT.next(el) == cacheArray[i + 1],errorString); 345 } 346 else 347 { 348 assert(vT.next(el) == NIL, errorString); 349 } 350 } 351 352 foreach (i, el; vT) 353 assert(el == cacheArray[i], errorString); 354 355 assert(vT == cacheArray, errorString); 356 357 auto vT2 = vT.dup; 358 assert(vT == vT2); 359 360 if(cacheArray.length) 361 { 362 auto rndNum = cacheArray.choice; 363 vT2.remove(rndNum); 364 assert(!(rndNum in vT2)); 365 assert(rndNum in vT); 366 assert(vT != vT2); 367 rndNum = uniform(0UL, vT2.capacity); 368 if(!(rndNum in vT)) 369 { 370 assert(!(rndNum in vT), errorString ~ format!"rndNum: %d"(rndNum)); 371 assert(vT2.insert(rndNum), errorString); 372 } 373 assert(vT != vT2); 374 } 375 376 const rangeExclusive = vT(); 377 assert(vT == rangeExclusive); 378 379 auto rangeInclusive = vT[]; 380 import std.algorithm.comparison : equal; 381 import std.algorithm.iteration : uniq; 382 assert(equal(uniq(rangeInclusive), rangeInclusive)); 383 import std.range : enumerate; 384 foreach(i, el; rangeInclusive.enumerate) 385 { 386 if(i == 0) 387 { 388 if(!(0 in vT)) 389 { 390 continue; 391 } 392 } 393 else if(i + 1 != rangeInclusive.length) 394 { 395 assert(el in vT, errorString ~ format!" el: %d"(el)); 396 } 397 else if(i + 1 == rangeInclusive.length) 398 { 399 assert(el == vT.universe || el == vT.capacity); 400 if(el == vT.universe) 401 { 402 assert(vT.back <= vT.universe || vT.back == NIL, errorString ~ format!" length: %d"(vT.length)); 403 } 404 else 405 { 406 assert(vT.back > vT.universe, errorString); 407 assert(vT.back < vT.capacity, errorString); 408 } 409 } 410 else 411 { 412 assert(0); 413 } 414 } 415 416 import std.range : retro, enumerate; 417 foreach (i, el; cacheArray.retro.enumerate) 418 { 419 assert(el in vT, errorString); 420 if (i + 1 != cacheArray.length) 421 { 422 assert(vT.prev(el) == cacheArray[($ - 1) - (i + 1)], errorString); 423 } 424 else 425 { 426 assert(vT.prev(el) == NIL, errorString); 427 } 428 } 429 430 foreach (val; testArray) 431 { 432 assert(vT.length == rbt.length, errorString); 433 if (val in rbt) 434 { 435 assert(val in vT, errorString); 436 rbt.removeKey(val); 437 assert(vT.remove(val), errorString); 438 } 439 else 440 { 441 assert(!(val in vT), errorString); 442 assert(!vT.remove(val), errorString); 443 } 444 assert(!(val in rbt), errorString); 445 assert(!(val in vT), errorString); 446 } 447 assert(vT.length == 0, errorString); 448 assert(rbt.length == 0, errorString); 449 } 450 } 451 } 452 453 /** 454 define the absence of a key to be -1. 455 */ 456 enum NIL = ptrdiff_t(-1); 457 458 /** 459 The tree creator function. Optionally, the base size can be provided at compile time, however, the best results are 460 achieved with the default base size of CHAR_BIT * size_t.sizeof 461 */ 462 auto vebRoot(size_t baseSize = CHAR_BIT * size_t.sizeof)(size_t universe) 463 { 464 /** 465 Two parameters are provided: 466 - the base size is the maximal amount bits can be stored in a single node without branching (generating children) 467 - the universe is the user provided input, providing the expected amount of keys, going to be stored in the tree 468 */ 469 return VEBroot!baseSize(universe); 470 } 471 472 /** 473 A van Emde Boas node implementation 474 */ 475 struct VEBroot(size_t baseSize) if((baseSize & (baseSize - 1)) == 0) 476 { 477 /** 478 yields a deep copy of the node. I. e. copies all data in children and allocates another tree 479 */ 480 typeof(this) dup() pure 481 { 482 auto retVal = typeof(this)(universe); 483 foreach (el; opCall()) 484 retVal.insert(el); 485 return retVal; 486 } 487 488 /** 489 []-slicing. Yields a "random access range" with the content of the tree, always containing zero and the key after 490 the maximum element as keys. The key after the maximal key is the universe, if the tree is empty or the maximal 491 contained key is lesser then empty, otherwise the capacity of the tree. 492 */ 493 auto opIndex() @nogc pure 494 { 495 return vebTree!(Yes.inclusive)(this); 496 } 497 498 /** 499 slicing operation. recieves the opSlice!0(x,y) object from x .. y input. 500 as used with [] brackets, works as slicing with inclusion. 501 502 Note: This operation is not natural to the vebtree, as the indicies of keys are not known in general. 503 However, notably for the case of inclusive slicing (and indeed only working for this kind) one can iterate, e.g. 504 over all but some indicies from any end of the tree. This method is provided for such iterations. 505 506 During preparation, an inclusive slice is created and popFront (according popBack) is called corresponding times, 507 to remove the approprate number of elements from the front and from the back of the returned range. 508 */ 509 auto opIndex(size_t[2] input) @nogc pure 510 in(input[1] - input[0] <= this.length) 511 { 512 auto retVal = vebTree!(Yes.inclusive)(this); 513 514 import std.range : iota; 515 import std.algorithm.iteration : each; 516 517 input[0].iota.each!(i => retVal.popFront); 518 (this.length - input[1]).iota.each!(i => retVal.popBack); 519 520 return retVal; 521 } 522 523 /// opDollar works as "intended" and returns the length of the underlying array 524 @property size_t opDollar(size_t dim : 0)() @nogc pure 525 { 526 return this.length; 527 } 528 529 /// opSlice for slicing operation returns the static array of two numbers, for opIndex interop 530 size_t[2] opSlice(size_t dim : 0)(size_t x, size_t y) @nogc 531 { 532 import std.array : staticArray; 533 return [x, y].staticArray!size_t; 534 } 535 536 /** 537 ()-slicing. Yields a "random access range" with the content of the tree. Keys can be NIL. 538 */ 539 auto opCall() @nogc pure 540 { 541 return vebTree!(No.inclusive)(this); 542 } 543 544 /** 545 Equality operator checks for any iterable, whether in contains the same values, as the current tree. 546 */ 547 bool opEquals(T)(auto ref T input) const if (isIterable!T) 548 { 549 import std.range : hasLength; 550 static if (hasLength!T) 551 if (length != input.length) 552 return false; 553 554 size_t currentElem = this.front; 555 556 /* The rewrite 557 for (auto __rangeCopy = input; !__rangeCopy.empty; __rangeCopy.popFront) 558 does not work, because at least this very type is not copyable. However, without this rewrite, 559 the delegate of opApply cannot be pure: 560 https://forum.dlang.org/post/ebcihkybyafilzruimxn@forum.dlang.org 561 */ 562 563 foreach (el; input) 564 { 565 if (el != currentElem) 566 return false; 567 currentElem = this.next(currentElem); 568 } 569 570 return true; 571 } 572 573 /** 574 member method for the case universe size < base size. Overloads by passing only one parameter, which is 575 the bit number of interest. Returns whether the appropriate bit inside the bitvector is set. 576 */ 577 bool opBinaryRight(string op)(size_t key) @nogc const if (op == "in") 578 { 579 if (key >= this.capacity) 580 return false; 581 582 if (this.empty) // if an empty intermediate node is found, nothing is stored below it. 583 return false; 584 585 if (this.isLeaf) 586 return bt(&value_, key) != 0; 587 else 588 { 589 // case of a single valued range. 590 if (key == this.front || key == this.back) 591 return true; 592 593 /* 594 else we have to descend, using the recursive indexing: 595 1. take the high(value, uS)-th child and 596 2. ask it about the reduced low(value, uS) value 597 3. use the lSR(uS) universe size of the childe node. 598 */ 599 return low(key) in ptr_[high(key)]; 600 } 601 } 602 603 /** 604 the opApply method grants the correct foreach behavior, nogc version 605 */ 606 int opApply(scope int delegate(ref size_t) @nogc operations) const @nogc 607 { 608 return opApplyImpl(operations); 609 } 610 611 /** 612 ditto 613 */ 614 int opApply(scope int delegate(ref size_t, ref size_t) @nogc operations) const @nogc 615 { 616 return opApplyImpl(operations); 617 } 618 619 /** 620 ditto 621 */ 622 int opApply(scope int delegate(ref size_t) operations) const 623 { 624 return opApplyImpl(operations); 625 } 626 627 /** 628 ditto 629 */ 630 int opApply(scope int delegate(ref size_t, ref size_t) operations) const 631 { 632 return opApplyImpl(operations); 633 } 634 635 /** 636 Node constructor. A universe size provided, if the size is below the cutoff there is nothing to be done, as the 637 underlying value created and set to zero by default. 638 If otherwise create an array of children. This array has to be (according to Cormen) of size of higher square 639 root of the current universe size + 1. The extra place is reserved for the summary. 640 For each just created child call its constructor. 641 For the summary with the universe size of the higher square root of the current universe size. 642 For each other child with the universe size of the lower square root of the currennt universe size. 643 Then, assign the fully initialized children array to the pointer in the current node, doing approprate steps to 644 show, that this node is an intermediate node, not containing any values yet. 645 The knowledge of the current universe size is lost at this moment. As this keeps every build up node smaller 646 (and there could be a lot of them). This is why, the VEBtree class continues to hold the global universe size, 647 which is passed on every call to the root node. In this way this, extern saved value has the role of being 648 outsourced array size for each (!) node in the tree, as its size is reconstructed during the access to them. 649 */ 650 651 @disable this(this); 652 653 /** 654 It is allowed to construct the root of the van Emde Boas tree directly, without the convenience method. 655 Params: 656 val = Expected universe size. The tree is generated so that every index below the universe size can be put 657 inside. 658 */ 659 this(size_t val) pure 660 in(!!val != val) //the same as: val >= 2 661 { 662 // the restriction is somewhat technical: if a tree is created, then the universe size is at least 2, 663 // it cannot be less. 664 universe = val; 665 setEmpty; 666 667 assert(!length_ == this.empty); 668 669 if (!isLeaf) 670 { 671 assert(this.capacity == (universe - 1).nextPow2); 672 const arrSize = this.capacity.hSR + 1; 673 674 // reserve enough place for the summary and the children cluster 675 ptr_ = (new typeof(this)[arrSize]).ptr; 676 677 // add higher square root children with lower square root universe each. 678 foreach (i, ref el; cluster) 679 el = typeof(this)(this.capacity.lSR); 680 681 // add the summary with its universe of higher squaure root of the current universe 682 summary = typeof(this)(this.capacity.hSR); 683 } 684 assert(!length_ == this.empty); 685 } 686 687 /** 688 This tree has a length notion: it is the current number of inserted elements. 689 Returns: The current amount of contained keys. 690 */ 691 size_t length() const @nogc pure 692 { 693 return length_; 694 } 695 696 /** 697 the empty method to inform of an empty state of the tree. 698 Returns: Whether the tree is currently empty 699 */ 700 bool empty() const @nogc pure 701 { 702 return isLeaf ? value_ == 0 : value_ == -NIL; 703 } 704 705 /** 706 This yields whether the node is a leaf node. 707 Returns: Whether the node is a leaf. 708 */ 709 bool isLeaf() const @nogc pure 710 { 711 return universe <= baseSize; 712 } 713 714 /** 715 The minimal contained key in the van Emde Boas tree 716 Returns: The minimal contained element of the tree 717 */ 718 size_t front() @nogc const pure 719 { 720 if (empty) // do not proceed at all, if the root is empty 721 return NIL; 722 if (isLeaf) // pass control to the node 723 return bsf(value_); 724 return value_ & lowerMask; 725 } 726 727 /** 728 The maximal contained key in the van Emde Boas tree 729 Returns: The maximal contained element of the tree 730 */ 731 size_t back() @nogc const pure 732 { 733 if (empty) // do not proceed at all, if the root is empty 734 return NIL; 735 if (isLeaf) // pass control to the node 736 return bsr(value_); 737 return (value_ & higherMask) >> (CHAR_BIT * size_t.sizeof / 2); 738 } 739 740 /** 741 As a usual container, van Emde Boas tree provides the notion of capacity 742 Returns: The overall capacity of the tree. It is at least as high as the universe size provided on creation. 743 */ 744 size_t capacity() @nogc const pure 745 { 746 return isLeaf ? baseSize : (universe - 1).nextPow2; 747 } 748 749 /** 750 remove method of the van Emde Boas tree 751 Params: 752 val = The key to remove 753 Returns: Whether the key was removed. It is true, when the key was present, false otherwise 754 */ 755 bool remove(size_t val) @nogc pure 756 { 757 if (val >= capacity) // do not proceed at all, if the value can't be in the tree 758 return false; 759 if (empty) // do not proceed at all, if the root is empty 760 return false; 761 if (isLeaf) // pass control to the node 762 return length(length - (btr(&value_, val) != 0)); 763 if (front == back) // if the current node contains only a single value 764 { 765 assert(length == 1); 766 if (front != val) 767 return false; // do nothing if the given value is not the stored one 768 assert(length == 1); 769 return length(length - 1); 770 } 771 772 if (val == front) // if we met the minimum of a node 773 { 774 auto treeOffset = summary.front; // calculate an offset from the summary to continue with 775 if (treeOffset == NIL) // if the offset is invalid, then there is no further hierarchy and we are going to 776 { 777 front = back; // store a single value in this node. 778 assert(length == 2); 779 return length(length - 1); 780 } 781 auto m = cluster[treeOffset].front; // otherwise we get the minimum from the offset child 782 // remove it from the child 783 cluster[treeOffset].remove(m); 784 if (cluster[treeOffset].empty) 785 summary.remove(treeOffset); 786 //anyway, the new front of the current node become the restored value of the calculated offset. 787 front = index(treeOffset, m); 788 assert(length); 789 return length(length - 1); 790 } 791 792 if (val == back) // if we met the maximum of a node 793 { 794 // calculate an offset from the summary to contiue with 795 auto treeOffset = summary.back; 796 // if the offset is invalid, then there is no further hierarchy and we are going to 797 if (treeOffset == NIL) 798 { 799 // store a single value in this node. 800 back = front; 801 assert(length == 2); 802 return length(length - 1); 803 } 804 // otherwise we get maximum from the offset child 805 auto m = cluster[treeOffset].back; 806 // remove it from the child 807 cluster[treeOffset].remove(m); 808 if (cluster[treeOffset].empty) 809 summary.remove(treeOffset); 810 // anyway, the new back of the current node become the restored value of the calculated offset. 811 back = index(treeOffset, m); 812 assert(length); 813 return length(length - 1); 814 } 815 // if no condition was met we have to descend deeper. We get the offset by reducing the value to high(value, uS) 816 auto treeOffset = high(val); 817 auto res = length(length - cluster[treeOffset].remove(low(val))); 818 if (cluster[treeOffset].empty) 819 summary.remove(treeOffset); 820 return res; 821 } 822 823 /** 824 The successor search method of the van Emde Boas tree. 825 Params: 826 val = The key the next greater neighbor of which is looked for. 827 Returns: Ditto. If the next greater neighbor is missing a number out of bounds of the tree is returned. 828 */ 829 size_t next(size_t val) @nogc const pure 830 { 831 if (empty) // do not proceed at all, if the root is empty 832 return NIL; 833 if (isLeaf) // pass control to the node 834 { 835 if (val + 1 >= baseSize) // all vals are reduced by one. 836 return NIL; 837 838 // create a mask, which hides all lower bits of the stored value up to the given bit number, then apply 839 // bit search from the lowest bit. 840 auto maskedVal = value_ & ~((size_t(1) << (val + 1)) - 1); 841 842 if (maskedVal != 0) 843 return maskedVal.bsf; 844 845 return NIL; 846 } 847 // if given value is less then the front, return the front as successor 848 if (val < front) 849 return front; 850 // if given value is greater then the back, no predecessor exists 851 if (val >= back) 852 return NIL; 853 // if none of the break conditions was met, we have to descent further into the tree. 854 // calculate the child index by high(value, uS) 855 const childIndex = high(val); 856 // look into the child for its maximum 857 const maxlow = cluster[childIndex].back; 858 // if the maximum exists and the lowered given value is less then the child's maximum 859 if ((maxlow != NIL) && (low(val) < maxlow)) 860 { 861 auto offset = cluster[childIndex].next(low(val)); 862 // the result is given by reconstruction of the answer 863 return index(childIndex, offset); 864 } 865 else // otherwise we can not use the maximum of the child 866 { 867 auto succcluster = summary.next(childIndex); 868 // if the successor cluster is null 869 if (succcluster == NIL) 870 return back; 871 assert(succcluster != NIL); 872 assert(cluster[succcluster].front != NIL); 873 // if the successor cluster exists, the offset is given by its minimum 874 // and the result by the reconstruction of the offset. 875 return index(succcluster, cluster[succcluster].front); 876 } 877 } 878 879 /** 880 The predecessor search method of the van Emde Boas tree. 881 Params: 882 val = The key the next smaller neighbor of which is looked for. 883 Returns: Ditto. If the next smaller neighbor is missing a number out of bounds of the tree is returned. 884 */ 885 size_t prev(size_t val) @nogc const pure 886 { 887 if (empty) // do not proceed at all, if the root is empty 888 return NIL; 889 if (isLeaf) // pass control to the node 890 { 891 if (val != 0) 892 { 893 /* 894 create a mask, which hides all higher bits of the stored value down to the given bit number, then apply 895 bit search from the highest bit. 896 */ 897 auto maskedVal = value_ & ((size_t(1) << val) - 1); 898 899 if (maskedVal != 0) 900 return typeof(return)(maskedVal.bsr); 901 } 902 return NIL; 903 } 904 // if given value is greater then the stored back, the predecessor is back 905 if (val > back) 906 return back; 907 // if given value is less then the front, no predecessor exists. 908 if (val <= front) 909 return NIL; 910 // if none of the break conditions was met we have to descend further into the tree. 911 auto childIndex = high(val); // calculate the child index by high(value, uS) 912 const minlow = cluster[childIndex].front; // look into the child for its minimum 913 // if the minimum exists and the lowered given value is greater then the child's minimum 914 if ((minlow != NIL) && (low(val) > minlow)) 915 { 916 auto offset = cluster[childIndex].prev(low(val)); 917 // the result is given by reconstruction of the answer. 918 return index(childIndex, offset); 919 } 920 else // otherwise we can not use the minimum of the child 921 { 922 auto predcluster = summary.prev(childIndex); 923 // if the predecessor cluster is null return the current front, as this is the last remaining value 924 if (predcluster == NIL) 925 return front; 926 // if the predecessor cluster exists, the offset is given by its maximum 927 // and the result by the reconstruction of the offset. 928 return index(predcluster, cluster[predcluster].back); 929 } 930 } 931 932 /** 933 The insertion method of the van Emde Boas tree. 934 Params: 935 val = The key to insert 936 Returns: Whether the key was inserted. It is true, when the key was inserted, false otherwise 937 */ 938 bool insert(size_t val) @nogc pure 939 { 940 if (val >= capacity) // do not proceed at all, if the value won't fit into the tree 941 return false; 942 if (isLeaf) // pass control to the node 943 return length(length + (bts(&value_, val) == 0)); 944 if (empty) // if the current node does not contain anything put the value inside. 945 { 946 assert(empty); 947 front = val; 948 back = val; 949 assert(front == val); 950 assert(!empty); 951 assert(front == back); 952 assert(!empty); 953 return length(length + 1); 954 } 955 956 assert(!empty); 957 assert(front != NIL); 958 assert(back != NIL); 959 960 if (val == front || val == back) // if the value coincides with existing values, return 961 return false; 962 // if the node contains a single value only, expand the node to a range and leave. 963 if (front == back) 964 { 965 if (front > val) 966 front = val; 967 if (back < val) 968 back = val; 969 return length(length + 1); 970 } 971 /* 972 if none of the cases above was true (all of them are break conditions) we have to compare the given value 973 with the values present and adapt the range limits. This replaces the value we want to insert. 974 */ 975 976 // a swap can not be used here, as front is itself a (property) method 977 if (val < front) 978 { 979 const tmpKey = val; 980 val = front; 981 front = tmpKey; 982 assert(front == tmpKey); 983 } 984 985 // a swap can not be used here, as back is itself a (property) method 986 if (val > back) 987 { 988 const tmpKey = val; 989 val = back; 990 back = tmpKey; 991 assert(back == tmpKey); 992 } 993 994 // calculate the index of the children cluster by high(value, uS) we want to descent to. 995 const nextTreeIndex = high(val); 996 if (cluster[nextTreeIndex].empty) 997 summary.insert(nextTreeIndex); 998 return length(length + cluster[nextTreeIndex].insert(low(val))); 999 } 1000 1001 /** 1002 The cached value of the universe, provided on creation 1003 Returns: The cached input, provided on creation 1004 */ 1005 size_t universe() @nogc const pure 1006 { 1007 return universe_; 1008 } 1009 1010 private: 1011 1012 size_t toHash() @nogc const nothrow pure { assert(0); } 1013 1014 bool front(size_t val) @nogc pure 1015 { 1016 if (isLeaf) // pass control to the node 1017 return insert(val); 1018 value_ = value_ & higherMask; // otherwise, set the lower part of the value, keeping the higher bits 1019 const retVal = ((value_ & lowerMask) == val) ? false : true; 1020 value_ = value_ | val; 1021 return retVal; // this is a bug! 1022 } 1023 1024 bool back(size_t val) @nogc pure 1025 { 1026 if (isLeaf) // pass control to the node 1027 return insert(val); 1028 value_ = value_ & lowerMask; // otherwise, set the higher part of the value, keeping the lower bits 1029 const retVal = (value_ & higherMask) == (val << (CHAR_BIT * size_t.sizeof / 2)) ? false : true; 1030 value_ = value_ | (val << (CHAR_BIT * size_t.sizeof / 2)); 1031 return retVal; // this is a bug! 1032 } 1033 1034 bool length(size_t input) @nogc pure 1035 in 1036 { 1037 assert(input <= this.capacity); 1038 1039 if (input != length) 1040 { 1041 input > length ? assert(input - length == 1) : assert(length - input == 1); 1042 } 1043 } 1044 do 1045 { 1046 const retVal = length != input; 1047 1048 length_ = input; 1049 1050 if (!length_) 1051 setEmpty; 1052 1053 return retVal; 1054 } 1055 1056 size_t index(size_t x, size_t y) const @nogc pure 1057 { 1058 return .index(this.capacity, x, y); 1059 } 1060 1061 size_t low(size_t val) const @nogc pure 1062 { 1063 return .low(this.capacity, val); 1064 } 1065 1066 size_t high(size_t val) const @nogc pure 1067 { 1068 return .high(this.capacity, val); 1069 } 1070 1071 void universe(size_t val) @nogc pure 1072 { 1073 universe_ = val; 1074 } 1075 1076 size_t value_; 1077 size_t universe_; 1078 size_t length_; 1079 typeof(this)* ptr_; 1080 1081 ref summary() inout @nogc pure 1082 in(!isLeaf) 1083 { // return the last element of the array of children, stored in the node. 1084 return ptr_[capacity.hSR]; 1085 } 1086 1087 auto cluster() inout @nogc pure 1088 in(!isLeaf) 1089 { // return all of the children in the stored array, but the last element 1090 return ptr_[0 .. capacity.hSR]; 1091 } 1092 1093 // The empty setter of a node. This function is kept for consistency in this module. 1094 void setEmpty() @nogc pure 1095 { 1096 value_ = isLeaf ? 0 : -NIL; 1097 } 1098 1099 // with the trick of https://forum.dlang.org/thread/erznqknpyxzxqivawnix@forum.dlang.org 1100 int opApplyImpl(O)(O operations) const 1101 { 1102 int result; 1103 size_t leading = this.front; 1104 1105 //for(size_t leading = front; leading < back; leading = this.next(leading)) 1106 1107 for (size_t i = 0; i < length; ++i) 1108 { 1109 static if (arity!operations == 1) 1110 result = operations(leading); 1111 else static if (arity!operations == 2) 1112 result = operations(i, leading); 1113 else 1114 assert(0); 1115 1116 if (result) 1117 break; 1118 1119 leading = this.next(leading); 1120 } 1121 1122 return result; 1123 } 1124 } 1125 1126 private: 1127 struct VEBtree(Flag!"inclusive" inclusive, T) 1128 { 1129 auto opBinaryRight(string op)(size_t key) @nogc if (op == "in") 1130 { 1131 return key in root; 1132 } 1133 1134 static if (inclusive) 1135 { 1136 size_t frontKey; 1137 size_t backKey; 1138 } 1139 else 1140 { 1141 ptrdiff_t frontKey; 1142 ptrdiff_t backKey; 1143 } 1144 1145 size_t length; 1146 1147 typeof(frontKey) front() @nogc pure 1148 { 1149 return frontKey; 1150 } 1151 1152 void popFront() @nogc pure 1153 in(!empty) 1154 { 1155 --length; 1156 frontKey = next(frontKey); 1157 } 1158 1159 typeof(backKey) back() @nogc pure 1160 { 1161 return backKey; 1162 } 1163 1164 void popBack() @nogc pure 1165 in(!empty) 1166 { 1167 --length; 1168 backKey = prev(backKey); 1169 } 1170 1171 auto prev(size_t key) @nogc pure 1172 { 1173 const pred = root.prev(key); 1174 static if (inclusive) 1175 return pred == NIL ? 0 : pred; 1176 else 1177 return pred; 1178 } 1179 1180 auto next(size_t key) @nogc pure 1181 { 1182 const succ = root.next(key); 1183 1184 static if(inclusive) 1185 debug 1186 if (succ == NIL) 1187 assert(length <= 1, format!"key: %d, length: %d\n"(key, length)); 1188 1189 static if (inclusive) 1190 if (succ == NIL) 1191 return backKey; 1192 else 1193 return succ; 1194 else 1195 return succ; 1196 } 1197 1198 bool empty() @nogc pure 1199 { 1200 return !length; 1201 } 1202 1203 auto save() const @nogc pure 1204 { 1205 return vebTree!(inclusive)(*root_, frontKey, backKey, length); 1206 } 1207 1208 size_t toHash() @nogc const nothrow pure { assert(0); } 1209 1210 /** 1211 for comparison with an iterable, the iterable will be iterated, as the current object. 1212 */ 1213 bool opEquals(T)(auto ref T input) const if (isIterable!T) 1214 { 1215 static if (is(T == typeof(this))) 1216 return root == input.root; 1217 1218 static if (hasLength!T) 1219 if (length != input.length) 1220 return false; 1221 1222 auto copy = this.save; 1223 1224 /* The rewrite 1225 for (auto __rangeCopy = input; !__rangeCopy.empty; __rangeCopy.popFront) 1226 does not work, because at least this very type is not copyable. However, without this rewrite, 1227 the delegate of opApply cannot be pure: 1228 https://forum.dlang.org/post/ebcihkybyafilzruimxn@forum.dlang.org 1229 */ 1230 foreach (el; input) 1231 { 1232 if (el != copy.front) 1233 return false; 1234 copy.popFront(); 1235 } 1236 1237 return true; 1238 } 1239 1240 @disable this(); 1241 1242 private: 1243 T* root_; 1244 ref T root() pure { return *root_; } 1245 1246 this(T, Args...)(ref T _root, Args args) @nogc 1247 { 1248 root_ = &_root; 1249 1250 static if(Args.length) 1251 { 1252 frontKey = args[0]; 1253 backKey = args[1]; 1254 length = args[2]; 1255 } 1256 else 1257 { 1258 length = root.length; 1259 static if (inclusive) 1260 { 1261 if(!length) 1262 { 1263 backKey = root.universe; 1264 length = 2; 1265 } 1266 else 1267 { 1268 if(root.front > 0) 1269 ++length; 1270 1271 if(root.back <= root.universe) 1272 backKey = root.universe; 1273 else if(root.back <= root.capacity) 1274 backKey = root.capacity; 1275 1276 if(root.back < backKey) 1277 ++length; 1278 } 1279 } 1280 else 1281 { 1282 frontKey = root.front; 1283 backKey = root.back; 1284 } 1285 } 1286 } 1287 } 1288 1289 // bit mask representing uint.max. 1290 enum size_t lowerMask = size_t.max >> (size_t.sizeof * CHAR_BIT / 2); 1291 // bit mask representing size_t.back without uint.max. 1292 enum size_t higherMask = size_t.max ^ lowerMask; 1293 1294 /* 1295 This function returns the higher square root of the given input. It is needed in the initialization step 1296 of the VEB tree to calculate the number of children of a given layer. And this is the universe size of the 1297 summary of a node. The upper square root is defined by 2^{\lceil(\lg u)/2\rceil} 1298 */ 1299 size_t hSR(size_t val) @nogc pure 1300 { 1301 return size_t(1) << (bsr(val) / 2 + ((val.bsr & 1) || ((val != 0) && (val & (val - 1))))); 1302 } 1303 // 1304 unittest 1305 { 1306 1307 auto currentSeed = unpredictableSeed(); 1308 const errorString = format!"UT: hSR. seed: %d"(currentSeed); 1309 rndGen.seed(currentSeed); //initialize the random generator 1310 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1311 auto hSR = hSR(M); 1312 assert((hSR & (hSR - 1)) == 0, errorString); 1313 import std.range : array; 1314 import std.algorithm.searching : until; 1315 1316 auto check = powersOfTwo.until(hSR).array; 1317 assert((check[$ - 1]) * (check[$ - 1]) < M, errorString); 1318 } 1319 1320 /* 1321 This function returns the lower square root of the given input. It is needed by the indexing functions 1322 high(x), low(x) and index(x,y) of elements in the tree. Also, this is the universe size of a child of a node. The 1323 lower square root is defined by 2^{\lfloor(\lgu)/2\rfloor} 1324 */ 1325 size_t lSR(size_t val) @nogc pure 1326 { 1327 return size_t(1) << (bsr(val) / 2); 1328 } 1329 // 1330 unittest 1331 { 1332 auto currentSeed = unpredictableSeed(); 1333 const errorString = format!"UT: lSR seed: %d"(currentSeed); 1334 rndGen.seed(currentSeed); //initialize the random generator 1335 const M = uniform(1UL, uint.max); //set universe size to some integer. 1336 auto lSR = M.lSR; 1337 1338 assert((lSR & (lSR - 1)) == 0, errorString); 1339 assert(lSR * lSR < M, errorString); 1340 import std.algorithm.searching : find; 1341 1342 assert(!powersOfTwo.find(lSR).empty); 1343 } 1344 1345 /* 1346 This is an index function defined as \lfloor x/lSR(u)\rfloor. It is needed to find the appropriate cluster 1347 of a element in the tree. It is a part of the ideal indexing function. 1348 */ 1349 size_t high(size_t universe, size_t val) @nogc pure 1350 out (result; result == val / universe.lSR) // bithacks = keithschwarz 1351 { 1352 return val >> (bsr(universe) / 2); 1353 } 1354 // 1355 unittest 1356 { 1357 auto currentSeed = unpredictableSeed(); 1358 const errorString = format!"UT: high seed: %d"(currentSeed); 1359 rndGen.seed(currentSeed); //initialize the random generator 1360 const M = uniform(1UL, uint.max); //set universe size to some integer. 1361 assert(M, errorString); 1362 size_t U = M.nextPow2; 1363 assert(U, errorString); 1364 auto x = uniform(0UL, U); 1365 assert(high(U, x) == x / U.lSR, errorString); 1366 } 1367 1368 /* 1369 This is an index function defined as fmod(value, lSR(universe)). It is needed to find the appropriate 1370 value inside a cluster. It is part of the ideal indexing function 1371 */ 1372 size_t low(size_t universe, size_t val) @nogc pure 1373 out (retVal; retVal == (val & ((size_t(1) << (bsr(universe) / 2)) - 1))) 1374 { 1375 return val % universe.lSR; 1376 } 1377 // 1378 unittest 1379 { 1380 auto currentSeed = unpredictableSeed(); 1381 const errorString = format!"UT: low seed: %d"(currentSeed); 1382 rndGen.seed(currentSeed); //initialize the random generator 1383 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1384 size_t U = nextPow2(M); 1385 auto x = uniform(0UL, U); 1386 assert(low(U, x) == (x & (U.lSR - 1)), errorString); 1387 } 1388 1389 /* 1390 This is an index function to retain the searched value. It is defined as x * lSR(u) + y. Beyond this, the 1391 relation holds: x = index(high(x), x.low). This is the ideal indexing function of the tree. 1392 */ 1393 size_t index(size_t universe, size_t x, size_t y) @nogc pure 1394 { 1395 return (x * universe.lSR + y); 1396 } 1397 // 1398 unittest 1399 { 1400 auto currentSeed = unpredictableSeed(); 1401 const errorString = format!"UT: index seed: %d"(currentSeed); 1402 rndGen.seed(currentSeed); //initialize the random generator 1403 const M = uniform(0UL, uint.max); //set universe size to some integer. 1404 size_t U = M.nextPow2; 1405 auto x = uniform(0UL, U); 1406 assert(index(U, U.high(x), U.low(x)) == x, errorString); 1407 } 1408 1409 auto vebTree(Flag!"inclusive" inclusive, T, Args...)(ref T root, Args args) 1410 { 1411 return VEBtree!(inclusive, T)(root, args); 1412 }