1 /** 2 Copyright: Copyright (c) 2016- Alexander Orlov. All rights reserved. 3 License: $(LINK2 https://opensource.org/licenses/MIT, MIT License). 4 Authors: Alexander Orlov, $(LINK2 mailto:sascha.orlov@gmail.com, sascha.orlov@gmail.com) 5 */ 6 7 /** 8 This module implements a Van Emde Boas tree container. 9 All corrections, bug findings pull requests and comments are welcome. 10 The main idea of the container is, to restrict the capacity of the tree by the next power of two universe size, 11 given an arbitrary size at the initialization. 12 */ 13 14 /** 15 The main advantage of the Van Emde Boas tree appears on a large amount of elements, as the provided standard 16 operations of the tree are constant in time and of order O(lg2(lg2(U))), where U is the capacity of the tree, constant 17 after creation. For small amount of elements the overhead coming along with the structure take over. However, if the 18 universe size becomes bigger, the tree performance becomes better. 19 */ 20 21 /** 22 Be aware, the current container is intended to be used with keys. This implies, that the capacity, fixed on its 23 initialization has two meanings. As usual, it shows the maximum amount of elements the instanciated tree can keep. 24 But also, it states, that no value bigger then capacity - 1 exists in the tree. This, and the fact, that only 25 non-negative values can be used are infered from the term "key". 26 */ 27 28 /** 29 See_also: Thomas H. Cormen, Clifford Stein, Ronald L. Rivest, and Charles E. Leiserson. 2001. <em>Introduction to 30 Algorithms</em> (2nd ed.). McGraw-Hill Higher Education. 31 the idea of using bit operations was reused from the C++ implementation found at 32 http://www.keithschwarz.com/interesting/code/van-emde-boas-tree/ 33 */ 34 35 module vebtree; 36 import core.bitop; 37 import std.traits : ReturnType, isIterable, arity; 38 import std.typecons : Flag, Yes, No; 39 import std.math : nextPow2; 40 import core.stdc.limits : CHAR_BIT; 41 42 debug import std.format : format; 43 44 version (unittest) 45 { 46 import std.parallelism : parallel; 47 import std.conv : to; 48 import core.stdc.stdio : printf; 49 import std.container.rbtree : redBlackTree; 50 51 import std.range; 52 import std.random; 53 import std.format; 54 import std.container; // red black tree may be used in unittests for comparison. 55 import std.math : sqrt; 56 57 // helping function for output a given value in binary representation 58 void bin(size_t n) 59 { 60 /* step 1 */ 61 if (n > 1) 62 bin(n / 2); 63 /* step 2 */ 64 65 printf("%d", n % 2); 66 } 67 68 /// precalculated powers of two table for unit testing 69 import std.range : iota; 70 import std.algorithm.iteration : map; 71 72 enum powersOfTwo = (CHAR_BIT * size_t.sizeof).iota.map!(a => size_t(1) << a); 73 enum testMultiplier = 1; //16 74 75 auto generateVEBtree(size_t baseSize)(uint currentSeed, size_t front, size_t back, ref size_t M) 76 { 77 static assert(baseSize > 1); 78 static assert((baseSize & (baseSize - 1)) == 0); 79 assert(front >= 2); 80 rndGen.seed(currentSeed); //initialize the random generator 81 M = uniform(front, back + 1); // parameter for construction 82 return vebRoot!baseSize(M); 83 } 84 string generateDebugString(string identifier1, size_t identifier2, size_t baseSize, uint currentSeed, size_t M) 85 { 86 return format!"%s: %d baseSize: %d; seed: %d M: %d"(identifier1, identifier2, baseSize, currentSeed, M); 87 } 88 } 89 90 static foreach (_; 1 .. size_t.sizeof - 1) 91 { 92 /// 93 unittest 94 { 95 enum baseSize = 1 << _; 96 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 97 { 98 auto currentSeed = unpredictableSeed(); 99 size_t M; 100 101 auto vT = generateVEBtree!(1 << _)(currentSeed, 2UL, baseSize, M); 102 assert(vT.universe == M); 103 const errorString = generateDebugString("UT: node test: ", b, baseSize, currentSeed, M); 104 105 assert(vT.value_ == 0, errorString); 106 assert(vT.ptr_ is null, errorString); 107 assert(vT.capacity == baseSize, errorString); 108 assert(vT.empty == true, errorString); 109 assert(vT.front == NIL, errorString); 110 assert(vT.back == NIL, errorString); 111 assert(vT[].front == 0, errorString); 112 assert(vT[].back == vT.universe, errorString); 113 assert(vT().front == NIL, errorString); 114 assert(vT().back == NIL, errorString); 115 assert(vT.length == 0, errorString); 116 assert(vT.universe == M, errorString); 117 118 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 119 // make an array of length N 120 size_t[] testArray, cacheArray; 121 testArray = new size_t[N]; 122 cacheArray.reserve(N); 123 // fill the array with all possible values 124 foreach (ref el; testArray) 125 { 126 el = (2 * M).iota.choice; 127 } 128 129 foreach (testNumber; testArray) 130 { 131 assert(vT.universe == M, errorString); 132 const insertResult = vT.insert(testNumber); 133 134 if (insertResult) 135 { 136 assert(!vT.empty, errorString); 137 cacheArray ~= testNumber; 138 } 139 } 140 141 import std.algorithm.sorting : sort; 142 143 cacheArray.sort; 144 145 if (cacheArray.empty) 146 { 147 assert(vT.empty, errorString); 148 } 149 else 150 { 151 assert(!vT.empty, errorString); 152 } 153 154 foreach (el; cacheArray) 155 { 156 assert(bt(&vT.value_, el), errorString); 157 } 158 import std.algorithm.iteration : uniq; 159 import std.algorithm.searching : count; 160 161 assert(vT.length == cacheArray.uniq.count, errorString); 162 assert(vT.universe == M, errorString); 163 if (cacheArray.length) 164 { 165 assert(vT.front == cacheArray.front, errorString); 166 assert(vT.back == cacheArray.back, errorString); 167 } 168 else 169 { 170 assert(vT.front == NIL, errorString); 171 assert(vT.back == NIL, errorString); 172 } 173 174 auto currElement = vT.front; 175 foreach (el; cacheArray.uniq) 176 { 177 assert(currElement == el, errorString); 178 currElement = vT.next(currElement); 179 } 180 currElement = vT.back; 181 foreach (el; cacheArray.uniq.array.retro) 182 { 183 assert(currElement == el, errorString); 184 currElement = vT.prev(currElement); 185 } 186 187 foreach (key; 0 .. vT.universe) 188 { 189 import std.algorithm.searching : canFind; 190 191 if (cacheArray.uniq.array.canFind(key)) 192 { 193 assert(key in vT, errorString); 194 } 195 else 196 { 197 assert(!(key in vT), errorString); 198 } 199 } 200 auto deepCopy = vT.dup; 201 202 assert(deepCopy.value_ == vT.value_, errorString); 203 assert(vT == cacheArray.uniq, errorString); 204 assert(vT.prev(vT.front) == NIL, errorString); 205 assert(vT.next(vT.back) == NIL, errorString); 206 assert(vT == deepCopy, errorString); 207 assert(vT == deepCopy(), errorString); 208 209 if (cacheArray.length) 210 { 211 auto val = cacheArray.uniq.array.randomCover.front; 212 vT.remove(val); 213 assert((deepCopy.value_ ^ vT.value_) == (size_t(1) << val), errorString); 214 import std.algorithm.iteration : each; 215 import std.algorithm.searching : count, find; 216 import std.algorithm.mutation : remove; 217 218 cacheArray.count(val).iota.each!(i => cacheArray = cacheArray.remove( 219 cacheArray.length - cacheArray.find(val).length)); 220 } 221 else 222 { 223 assert((deepCopy.value_ ^ vT.value_) == 0, errorString); 224 } 225 226 foreach (key; 0 .. vT.capacity) 227 { 228 import std.algorithm.searching : canFind; 229 230 if (cacheArray.uniq.array.canFind(key)) 231 { 232 assert(vT.remove(key), errorString); 233 } 234 else 235 { 236 assert(!(vT.remove(key)), errorString); 237 } 238 } 239 assert(vT.value_ == 0, errorString); 240 assert(vT.empty, errorString); 241 } 242 } 243 } 244 245 static foreach (_; 1 .. size_t.sizeof - 1) 246 { 247 /// 248 unittest 249 { 250 import std.range : iota; 251 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 252 { 253 auto currentSeed = unpredictableSeed(); 254 size_t M; 255 auto vT = generateVEBtree!(1 << _) 256 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 257 const errorString = 258 generateDebugString("UT: tree test of capacity and universe: ", b, 1 << _, currentSeed, M); 259 260 assert(vT.universe == M, errorString); 261 assert(vT.capacity == (vT.universe - 1).nextPow2, 262 to!string("vT.capacity: " ~ to!string( 263 vT.capacity) ~ " vT.universe: " ~ to!string(vT.universe))); 264 assert(!(vT.ptr_ is null), errorString); 265 assert(vT.capacity == (vT.universe - 1).nextPow2, errorString); 266 } 267 } 268 } 269 270 static foreach (_; 1 .. size_t.sizeof - 1) 271 { 272 /// 273 unittest 274 { 275 import std.range : iota; 276 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 277 { 278 auto currentSeed = unpredictableSeed(); 279 size_t M; 280 auto vT = generateVEBtree!(1 << _) 281 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 282 const errorString = 283 generateDebugString("UT: tree test, general case: ", b, 1 << _, currentSeed, M); 284 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 285 286 // make an array of length N 287 size_t[] testArray, cacheArray; 288 testArray = new size_t[N]; 289 cacheArray.reserve(N); 290 // fill the array with all possible values 291 foreach (ref el; testArray) 292 el = (2 * M).iota.choice; 293 294 auto rbt = redBlackTree!size_t(); 295 296 foreach (val; testArray) 297 { 298 assert(vT.universe == M, errorString); 299 assert(vT.length == rbt.length, errorString); 300 301 bool insertExpectation; 302 if (val < vT.capacity && !(val in vT)) 303 { 304 insertExpectation = true; 305 } 306 const insertResult = vT.insert(val); 307 308 assert(insertResult == insertExpectation, errorString); 309 310 if (insertResult) 311 { 312 313 assert(val in vT, errorString); 314 assert(!vT.empty, errorString); 315 rbt.insert(val); 316 assert(vT.front == rbt.front, errorString); 317 assert(vT.back == rbt.back, 318 "val:" ~ to!string(val) ~ " vT.back: " ~ to!string( 319 vT.back) ~ " rbt.back: " ~ to!string(rbt.back)); 320 321 cacheArray ~= val; 322 } 323 else 324 { 325 if (!(val in rbt)) 326 { 327 assert(!(val in vT), errorString); 328 } 329 else 330 { 331 assert(val in vT, errorString); 332 } 333 } 334 } 335 336 import std.algorithm.sorting : sort; 337 cacheArray.sort; 338 339 foreach (i, el; cacheArray) 340 { 341 assert(el in vT, errorString); 342 if (i + 1 != cacheArray.length) 343 { 344 assert(vT.next(el) == cacheArray[i + 1],errorString); 345 } 346 else 347 { 348 assert(vT.next(el) == NIL, errorString); 349 } 350 } 351 352 foreach (i, el; vT) 353 assert(el == cacheArray[i], errorString); 354 355 assert(vT == cacheArray, errorString); 356 357 auto vT2 = vT.dup; 358 assert(vT == vT2); 359 360 if(cacheArray.length) 361 { 362 auto rndNum = cacheArray.choice; 363 vT2.remove(rndNum); 364 assert(!(rndNum in vT2)); 365 assert(rndNum in vT); 366 assert(vT != vT2); 367 rndNum = uniform(0UL, vT2.capacity); 368 if(!(rndNum in vT)) 369 { 370 assert(!(rndNum in vT), errorString ~ format!"rndNum: %d"(rndNum)); 371 assert(vT2.insert(rndNum), errorString); 372 } 373 assert(vT != vT2); 374 //auto vT3 = vT2; 375 //vT3.insert(rndNum); 376 //assert(rndNum in vT3); 377 //assert(rndNum in vT2); 378 //assert(vT2.length == vT3.length); 379 } 380 381 const rangeExclusive = vT(); 382 assert(vT == rangeExclusive); 383 384 auto rangeInclusive = vT[]; 385 import std.range : enumerate; 386 foreach(i, el; rangeInclusive.enumerate) 387 { 388 if(i == 0) 389 { 390 if(!(0 in vT)) 391 { 392 continue; 393 } 394 } 395 else if(i + 1 != rangeInclusive.length) 396 { 397 assert(el in vT, errorString ~ format!" el: %d"(el)); 398 } 399 else if(i + 1 == rangeInclusive.length) 400 { 401 assert(el == vT.universe || el == vT.capacity); 402 if(el == vT.universe) 403 { 404 assert(vT.back <= vT.universe || vT.back == NIL, errorString ~ format!" length: %d"(vT.length)); 405 } 406 else 407 { 408 assert(vT.back > vT.universe, errorString); 409 assert(vT.back < vT.capacity, errorString); 410 } 411 } 412 else 413 { 414 assert(0); 415 } 416 } 417 418 import std.range : retro, enumerate; 419 foreach (i, el; cacheArray.retro.enumerate) 420 { 421 assert(el in vT, errorString); 422 if (i + 1 != cacheArray.length) 423 { 424 assert(vT.prev(el) == cacheArray[($ - 1) - (i + 1)], errorString); 425 } 426 else 427 { 428 assert(vT.prev(el) == NIL, errorString); 429 } 430 } 431 432 foreach (val; testArray) 433 { 434 assert(vT.length == rbt.length, errorString); 435 if (val in rbt) 436 { 437 assert(val in vT, errorString); 438 rbt.removeKey(val); 439 assert(vT.remove(val), errorString); 440 } 441 else 442 { 443 assert(!(val in vT), errorString); 444 assert(!vT.remove(val), errorString); 445 } 446 assert(!(val in rbt), errorString); 447 assert(!(val in vT), errorString); 448 } 449 assert(vT.length == 0, errorString); 450 assert(rbt.length == 0, errorString); 451 } 452 } 453 } 454 455 /** 456 define the absence of a key to be -1. 457 */ 458 enum NIL = ptrdiff_t(-1); 459 460 /** 461 The tree creator function. Optionally, the base size can be provided at compile time, however, the best results are 462 achieved with the default base size of CHAR_BIT * size_t.sizeof 463 */ 464 auto vebRoot(size_t baseSize = CHAR_BIT * size_t.sizeof)(size_t universe) 465 { 466 /** 467 Two parameters are provided: 468 - the base size is the maximal amount bits can be stored in a single node without branching (generating children) 469 - the universe is the user provided input, providing the expected amount of keys, going to be stored in the tree 470 */ 471 return VEBroot!baseSize(universe); 472 } 473 474 /** 475 A van Emde Boas node implementation 476 */ 477 struct VEBroot(size_t baseSize) if((baseSize & (baseSize - 1)) == 0) 478 { 479 /** 480 yields a deep copy of the node. I. e. copies all data in children and allocates another tree 481 */ 482 typeof(this) dup() 483 { 484 auto retVal = typeof(this)(universe); 485 foreach (el; opCall()) 486 retVal.insert(el); 487 return retVal; 488 } 489 490 /** 491 []-slicing. Yields a "random access range" with the content of the tree, always containing zero and the key after 492 the maximum element as keys. The key after the maximal key is the universe, if the tree is empty or the maximal 493 contained key is lesser then empty, otherwise the capacity of the tree. 494 */ 495 auto opIndex() @nogc 496 { 497 return vebTree!(Yes.inclusive)(this); 498 } 499 500 /** 501 ()-slicing. Yields a "random access range" with the content of the tree. Keys can be NIL. 502 */ 503 auto opCall() @nogc 504 { 505 return vebTree!(No.inclusive)(this); 506 } 507 508 /** 509 Equality operator checks for any iterable, whether in contains the same values, as the current tree. 510 */ 511 bool opEquals(T)(auto ref T input) const if (isIterable!T) 512 { 513 import std.range : hasLength; 514 static if (hasLength!T) 515 if (length != input.length) 516 return false; 517 518 size_t currentElem = this.front; 519 520 foreach (el; input) 521 { 522 if (el != currentElem) 523 return false; 524 currentElem = this.next(currentElem); 525 } 526 527 return true; 528 } 529 530 /** 531 member method for the case universe size < base size. Overloads by passing only one parameter, which is 532 the bit number of interest. Returns whether the appropriate bit inside the bitvector is set. 533 */ 534 bool opBinaryRight(string op)(size_t key) @nogc const if (op == "in") 535 { 536 if (key >= this.capacity) 537 return false; 538 539 if (this.empty) // if an empty intermediate node is found, nothing is stored below it. 540 return false; 541 542 if (this.isLeaf) 543 return bt(&value_, key) != 0; 544 else 545 { 546 // case of a single valued range. 547 if (key == this.front || key == this.back) 548 return true; 549 550 /* 551 else we have to descend, using the recursive indexing: 552 1. take the high(value, uS)-th child and 553 2. ask it about the reduced low(value, uS) value 554 3. use the lSR(uS) universe size of the childe node. 555 */ 556 return low(key) in ptr_[high(key)]; 557 } 558 } 559 560 /** 561 the opApply method grants the correct foreach behavior, nogc version 562 */ 563 int opApply(scope int delegate(ref size_t) @nogc operations) const @nogc 564 { 565 return opApplyImpl(operations); 566 } 567 568 /** 569 ditto 570 */ 571 int opApply(scope int delegate(ref size_t, ref size_t) @nogc operations) const @nogc 572 { 573 return opApplyImpl(operations); 574 } 575 576 /** 577 ditto 578 */ 579 int opApply(scope int delegate(ref size_t) operations) const 580 { 581 return opApplyImpl(operations); 582 } 583 584 /** 585 ditto 586 */ 587 int opApply(scope int delegate(ref size_t, ref size_t) operations) const 588 { 589 return opApplyImpl(operations); 590 } 591 592 /** 593 Node constructor. A universe size provided, if the size is below the cutoff there is nothing to be done, as the 594 underlying value created and set to zero by default. 595 If otherwise create an array of children. This array has to be (according to Cormen) of size of higher square 596 root of the current universe size + 1. The extra place is reserved for the summary. 597 For each just created child call its constructor. 598 For the summary with the universe size of the higher square root of the current universe size. 599 For each other child with the universe size of the lower square root of the currennt universe size. 600 Then, assign the fully initialized children array to the pointer in the current node, doing approprate steps to 601 show, that this node is an intermediate node, not containing any values yet. 602 The knowledge of the current universe size is lost at this moment. As this keeps every build up node smaller 603 (and there could be a lot of them). This is why, the VEBtree class continues to hold the global universe size, 604 which is passed on every call to the root node. In this way this, extern saved value has the role of being 605 outsourced array size for each (!) node in the tree, as its size is reconstructed during the access to them. 606 */ 607 608 @disable this(this); 609 610 /** 611 It is allowed to construct the root of the van Emde Boas tree directly, without the convenience method. 612 Params: 613 val = Expected universe size. The tree is generated so that every index below the universe size can be put 614 inside. 615 */ 616 this(size_t val) 617 in(val >= 2) 618 { 619 universe = val; 620 setEmpty; 621 622 assert(!length_ == this.empty); 623 624 if (!isLeaf) 625 { 626 assert(this.capacity == (universe - 1).nextPow2); 627 const arrSize = this.capacity.hSR + 1; 628 629 // reserve enough place for the summary and the children cluster 630 ptr_ = (new typeof(this)[arrSize]).ptr; 631 632 // add higher square root children with lower square root universe each. 633 foreach (i, ref el; cluster) 634 el = typeof(this)(this.capacity.lSR); 635 636 // add the summary with its universe of higher squaure root of the current universe 637 summary = typeof(this)(this.capacity.hSR); 638 } 639 assert(!length_ == this.empty); 640 } 641 642 /** 643 This tree has a length notion: it is the current number of inserted elements. 644 Returns: The current amount of contained keys. 645 */ 646 size_t length() const @nogc 647 { 648 return length_; 649 } 650 651 /** 652 the empty method to inform of an empty state of the tree. 653 Returns: Whether the tree is currently empty 654 */ 655 bool empty() const @nogc 656 { 657 return isLeaf ? value_ == 0 : value_ == -NIL; 658 } 659 660 /** 661 This yields whether the node is a leaf node. 662 Returns: Whether the node is a leaf. 663 */ 664 bool isLeaf() const @nogc 665 { 666 return universe <= baseSize; 667 } 668 669 /** 670 The minimal contained key in the van Emde Boas tree 671 Returns: The minimal contained element of the tree 672 */ 673 size_t front() @nogc const 674 { 675 if (empty) // do not proceed at all, if the root is empty 676 return NIL; 677 if (isLeaf) // pass control to the node 678 return bsf(value_); 679 return value_ & lowerMask; 680 } 681 682 /** 683 The maximal contained key in the van Emde Boas tree 684 Returns: The maximal contained element of the tree 685 */ 686 size_t back() @nogc const 687 { 688 if (empty) // do not proceed at all, if the root is empty 689 return NIL; 690 if (isLeaf) // pass control to the node 691 return bsr(value_); 692 return (value_ & higherMask) >> (CHAR_BIT * size_t.sizeof / 2); 693 } 694 695 /** 696 As a usual container, van Emde Boas tree provides the notion of capacity 697 Returns: The overall capacity of the tree. It is at least as high as the universe size provided on creation. 698 */ 699 size_t capacity() @nogc const 700 { 701 return isLeaf ? baseSize : (universe - 1).nextPow2; 702 } 703 704 /** 705 remove method of the van Emde Boas tree 706 Params: 707 val = The key to remove 708 Returns: Whether the key was removed. It is true, when the key was present, false otherwise 709 */ 710 bool remove(size_t val) @nogc 711 { 712 if (val >= capacity) // do not proceed at all, if the value can't be in the tree 713 return false; 714 if (empty) // do not proceed at all, if the root is empty 715 return false; 716 if (isLeaf) // pass control to the node 717 return length(length - (btr(&value_, val) != 0)); 718 if (front == back) // if the current node contains only a single value 719 { 720 assert(length == 1); 721 if (front != val) 722 return false; // do nothing if the given value is not the stored one 723 assert(length == 1); 724 return length(length - 1); 725 } 726 727 if (val == front) // if we met the minimum of a node 728 { 729 auto treeOffset = summary.front; // calculate an offset from the summary to continue with 730 if (treeOffset == NIL) // if the offset is invalid, then there is no further hierarchy and we are going to 731 { 732 front = back; // store a single value in this node. 733 assert(length == 2); 734 return length(length - 1); 735 } 736 auto m = cluster[treeOffset].front; // otherwise we get the minimum from the offset child 737 // remove it from the child 738 cluster[treeOffset].remove(m); 739 if (cluster[treeOffset].empty) 740 summary.remove(treeOffset); 741 //anyway, the new front of the current node become the restored value of the calculated offset. 742 front = index(treeOffset, m); 743 assert(length); 744 return length(length - 1); 745 } 746 747 if (val == back) // if we met the maximum of a node 748 { 749 // calculate an offset from the summary to contiue with 750 auto treeOffset = summary.back; 751 // if the offset is invalid, then there is no further hierarchy and we are going to 752 if (treeOffset == NIL) 753 { 754 // store a single value in this node. 755 back = front; 756 assert(length == 2); 757 return length(length - 1); 758 } 759 // otherwise we get maximum from the offset child 760 auto m = cluster[treeOffset].back; 761 // remove it from the child 762 cluster[treeOffset].remove(m); 763 if (cluster[treeOffset].empty) 764 summary.remove(treeOffset); 765 // anyway, the new back of the current node become the restored value of the calculated offset. 766 back = index(treeOffset, m); 767 assert(length); 768 return length(length - 1); 769 } 770 // if no condition was met we have to descend deeper. We get the offset by reducing the value to high(value, uS) 771 auto treeOffset = high(val); 772 auto res = length(length - cluster[treeOffset].remove(low(val))); 773 if (cluster[treeOffset].empty) 774 summary.remove(treeOffset); 775 return res; 776 } 777 778 /** 779 The successor search method of the van Emde Boas tree. 780 Params: 781 val = The key the next greater neighbor of which is looked for. 782 Returns: Ditto. If the next greater neighbor is missing a number out of bounds of the tree is returned. 783 */ 784 size_t next(size_t val) @nogc const 785 { 786 if (empty) // do not proceed at all, if the root is empty 787 return NIL; 788 if (isLeaf) // pass control to the node 789 { 790 if (val + 1 >= baseSize) // all vals are reduced by one. 791 return NIL; 792 793 // create a mask, which hides all lower bits of the stored value up to the given bit number, then apply 794 // bit search from the lowest bit. 795 auto maskedVal = value_ & ~((size_t(1) << (val + 1)) - 1); 796 797 if (maskedVal != 0) 798 return maskedVal.bsf; 799 800 return NIL; 801 } 802 // if given value is less then the front, return the front as successor 803 if (val < front) 804 return front; 805 // if given value is greater then the back, no predecessor exists 806 if (val >= back) 807 return NIL; 808 // if none of the break conditions was met, we have to descent further into the tree. 809 // calculate the child index by high(value, uS) 810 const childIndex = high(val); 811 // look into the child for its maximum 812 const maxlow = cluster[childIndex].back; 813 // if the maximum exists and the lowered given value is less then the child's maximum 814 if ((maxlow != NIL) && (low(val) < maxlow)) 815 { 816 auto offset = cluster[childIndex].next(low(val)); 817 // the result is given by reconstruction of the answer 818 return index(childIndex, offset); 819 } 820 else // otherwise we can not use the maximum of the child 821 { 822 auto succcluster = summary.next(childIndex); 823 // if the successor cluster is null 824 if (succcluster == NIL) 825 return back; 826 assert(succcluster != NIL); 827 assert(cluster[succcluster].front != NIL); 828 // if the successor cluster exists, the offset is given by its minimum 829 // and the result by the reconstruction of the offset. 830 return index(succcluster, cluster[succcluster].front); 831 } 832 } 833 834 /** 835 The predecessor search method of the van Emde Boas tree. 836 Params: 837 val = The key the next smaller neighbor of which is looked for. 838 Returns: Ditto. If the next smaller neighbor is missing a number out of bounds of the tree is returned. 839 */ 840 size_t prev(size_t val) @nogc const 841 { 842 if (empty) // do not proceed at all, if the root is empty 843 return NIL; 844 if (isLeaf) // pass control to the node 845 { 846 if (val != 0) 847 { 848 /* 849 create a mask, which hides all higher bits of the stored value down to the given bit number, then apply 850 bit search from the highest bit. 851 */ 852 auto maskedVal = value_ & ((size_t(1) << val) - 1); 853 854 if (maskedVal != 0) 855 return typeof(return)(maskedVal.bsr); 856 } 857 return NIL; 858 } 859 // if given value is greater then the stored back, the predecessor is back 860 if (val > back) 861 return back; 862 // if given value is less then the front, no predecessor exists. 863 if (val <= front) 864 return NIL; 865 // if none of the break conditions was met we have to descend further into the tree. 866 auto childIndex = high(val); // calculate the child index by high(value, uS) 867 const minlow = cluster[childIndex].front; // look into the child for its minimum 868 // if the minimum exists and the lowered given value is greater then the child's minimum 869 if ((minlow != NIL) && (low(val) > minlow)) 870 { 871 auto offset = cluster[childIndex].prev(low(val)); 872 // the result is given by reconstruction of the answer. 873 return index(childIndex, offset); 874 } 875 else // otherwise we can not use the minimum of the child 876 { 877 auto predcluster = summary.prev(childIndex); 878 // if the predecessor cluster is null return the current front, as this is the last remaining value 879 if (predcluster == NIL) 880 return front; 881 // if the predecessor cluster exists, the offset is given by its maximum 882 // and the result by the reconstruction of the offset. 883 return index(predcluster, cluster[predcluster].back); 884 } 885 } 886 887 /** 888 The insertion method of the van Emde Boas tree. 889 Params: 890 val = The key to insert 891 Returns: Whether the key was inserted. It is true, when the key was inserted, false otherwise 892 */ 893 bool insert(size_t val) @nogc 894 { 895 if (val >= capacity) // do not proceed at all, if the value won't fit into the tree 896 return false; 897 if (isLeaf) // pass control to the node 898 return length(length + (bts(&value_, val) == 0)); 899 if (empty) // if the current node does not contain anything put the value inside. 900 { 901 assert(empty); 902 front = val; 903 back = val; 904 assert(front == val); 905 assert(!empty); 906 assert(front == back); 907 assert(!empty); 908 return length(length + 1); 909 } 910 911 assert(!empty); 912 assert(front != NIL); 913 assert(back != NIL); 914 915 if (val == front || val == back) // if the value coincides with existing values, return 916 return false; 917 // if the node contains a single value only, expand the node to a range and leave. 918 if (front == back) 919 { 920 if (front > val) 921 front = val; 922 if (back < val) 923 back = val; 924 return length(length + 1); 925 } 926 /* 927 if none of the cases above was true (all of them are break conditions) we have to compare the given value 928 with the values present and adapt the range limits. This replaces the value we want to insert. 929 */ 930 931 // a swap can not be used here, as front is itself a (property) method 932 if (val < front) 933 { 934 const tmpKey = val; 935 val = front; 936 front = tmpKey; 937 assert(front == tmpKey); 938 } 939 940 // a swap can not be used here, as back is itself a (property) method 941 if (val > back) 942 { 943 const tmpKey = val; 944 val = back; 945 back = tmpKey; 946 assert(back == tmpKey); 947 } 948 949 // calculate the index of the children cluster by high(value, uS) we want to descent to. 950 const nextTreeIndex = high(val); 951 if (cluster[nextTreeIndex].empty) 952 summary.insert(nextTreeIndex); 953 return length(length + cluster[nextTreeIndex].insert(low(val))); 954 } 955 956 /** 957 The cached value of the universe, provided on creation 958 Returns: The cached input, provided on creation 959 */ 960 size_t universe() @nogc const 961 { 962 return universe_; 963 } 964 965 private: 966 967 size_t toHash() @nogc const nothrow { assert(0); } 968 969 bool front(size_t val) @nogc 970 { 971 if (isLeaf) // pass control to the node 972 return insert(val); 973 value_ = value_ & higherMask; // otherwise, set the lower part of the value, keeping the higher bits 974 const retVal = ((value_ & lowerMask) == val) ? false : true; 975 value_ = value_ | val; 976 return retVal; // this is a bug! 977 } 978 979 bool back(size_t val) @nogc 980 { 981 if (isLeaf) // pass control to the node 982 return insert(val); 983 value_ = value_ & lowerMask; // otherwise, set the higher part of the value, keeping the lower bits 984 const retVal = (value_ & higherMask) == (val << (CHAR_BIT * size_t.sizeof / 2)) ? false : true; 985 value_ = value_ | (val << (CHAR_BIT * size_t.sizeof / 2)); 986 return retVal; // this is a bug! 987 } 988 989 bool length(size_t input) @nogc 990 in 991 { 992 assert(input <= this.capacity); 993 994 if (input != length) 995 { 996 input > length ? assert(input - length == 1) : assert(length - input == 1); 997 } 998 } 999 do 1000 { 1001 const retVal = length != input; 1002 1003 length_ = input; 1004 1005 if (!length_) 1006 setEmpty; 1007 1008 return retVal; 1009 } 1010 1011 size_t index(size_t x, size_t y) const @nogc 1012 { 1013 return .index(this.capacity, x, y); 1014 } 1015 1016 size_t low(size_t val) const @nogc 1017 { 1018 return .low(this.capacity, val); 1019 } 1020 1021 size_t high(size_t val) const @nogc 1022 { 1023 return .high(this.capacity, val); 1024 } 1025 1026 void universe(size_t val) @nogc 1027 { 1028 universe_ = val; 1029 } 1030 1031 size_t value_; 1032 size_t universe_; 1033 size_t length_; 1034 typeof(this)* ptr_; 1035 1036 ref summary() inout @nogc 1037 in(!isLeaf) 1038 { // return the last element of the array of children, stored in the node. 1039 return ptr_[capacity.hSR]; 1040 } 1041 1042 auto cluster() inout @nogc 1043 in(!isLeaf) 1044 { // return all of the children in the stored array, but the last element 1045 return ptr_[0 .. capacity.hSR]; 1046 } 1047 1048 // The empty setter of a node. This function is kept for consistency in this module. 1049 void setEmpty() @nogc 1050 { 1051 value_ = isLeaf ? 0 : -NIL; 1052 } 1053 1054 // with the trick of https://forum.dlang.org/thread/erznqknpyxzxqivawnix@forum.dlang.org 1055 int opApplyImpl(O)(O operations) const 1056 { 1057 int result; 1058 size_t leading = this.front; 1059 1060 //for(size_t leading = front; leading < back; leading = this.next(leading)) 1061 1062 for (size_t i = 0; i < length; ++i) 1063 { 1064 static if (arity!operations == 1) 1065 result = operations(leading); 1066 else static if (arity!operations == 2) 1067 result = operations(i, leading); 1068 else 1069 assert(0); 1070 1071 if (result) 1072 break; 1073 1074 leading = this.next(leading); 1075 } 1076 1077 return result; 1078 } 1079 } 1080 1081 private: 1082 struct VEBtree(Flag!"inclusive" inclusive, T) 1083 { 1084 auto opBinaryRight(string op)(size_t key) @nogc if (op == "in") 1085 { 1086 return key in root; 1087 } 1088 1089 static if (inclusive) 1090 { 1091 size_t frontKey; 1092 size_t backKey; 1093 } 1094 else 1095 { 1096 ptrdiff_t frontKey; 1097 ptrdiff_t backKey; 1098 } 1099 1100 size_t length; 1101 1102 typeof(frontKey) front() @nogc 1103 { 1104 return frontKey; 1105 } 1106 1107 void popFront() @nogc 1108 in(!empty) 1109 { 1110 --length; 1111 frontKey = next(frontKey); 1112 } 1113 1114 typeof(backKey) back() @nogc 1115 { 1116 return backKey; 1117 } 1118 1119 void popBack() @nogc 1120 in(!empty) 1121 { 1122 --length; 1123 backKey = prev(backKey); 1124 } 1125 1126 auto prev(size_t key) @nogc 1127 { 1128 const pred = root.prev(key); 1129 static if (inclusive) 1130 return pred == NIL ? 0 : pred; 1131 else 1132 return pred; 1133 } 1134 1135 auto next(size_t key) @nogc 1136 { 1137 const succ = root.next(key); 1138 1139 static if(inclusive) 1140 debug 1141 if (succ == NIL) 1142 assert(length <= 1, format!"key: %d, length: %d\n"(key, length)); 1143 1144 static if (inclusive) 1145 if (succ == NIL) 1146 return backKey; 1147 else 1148 return succ; 1149 else 1150 return succ; 1151 } 1152 1153 bool empty() @nogc 1154 { 1155 return !length; 1156 } 1157 1158 auto save() const @nogc 1159 { 1160 return vebTree!(inclusive)(*root_, frontKey, backKey, length); 1161 } 1162 1163 size_t toHash() @nogc const nothrow { assert(0); } 1164 1165 /** 1166 for comparison with an iterable, the iterable will be iterated, as the current object. 1167 */ 1168 bool opEquals(T)(auto ref T input) const if (isIterable!T) 1169 { 1170 static if (is(T == typeof(this))) 1171 return root == input.root; 1172 1173 static if (hasLength!T) 1174 if (length != input.length) 1175 return false; 1176 1177 auto copy = this.save; 1178 1179 foreach (el; input) 1180 { 1181 if (el != copy.front) 1182 return false; 1183 copy.popFront(); 1184 } 1185 1186 return true; 1187 } 1188 1189 @disable this(); 1190 1191 private: 1192 T* root_; 1193 ref T root() { return *root_; } 1194 1195 this(T, Args...)(ref T _root, Args args) @nogc 1196 { 1197 root_ = &_root; 1198 1199 static if(Args.length) 1200 { 1201 frontKey = args[0]; 1202 backKey = args[1]; 1203 length = args[2]; 1204 } 1205 else 1206 { 1207 length = root.length; 1208 static if (inclusive) 1209 { 1210 if(!length) 1211 { 1212 backKey = root.universe; 1213 length = 2; 1214 } 1215 else 1216 { 1217 if(root.front > 0) 1218 { 1219 ++length; 1220 } 1221 1222 if(root.back <= root.universe) 1223 { 1224 backKey = root.universe; 1225 ++length; 1226 } 1227 else if(root.back <= root.capacity) 1228 { 1229 backKey = root.capacity; 1230 ++length; 1231 } 1232 else 1233 { 1234 debug 1235 { 1236 assert(root.back == root.universe || root.back == -1, format!"back: %d\n"(root.back)); 1237 } 1238 else 1239 { 1240 assert(0); 1241 } 1242 } 1243 } 1244 } 1245 else 1246 { 1247 frontKey = root.front; 1248 backKey = root.back; 1249 } 1250 } 1251 } 1252 } 1253 1254 // bit mask representing uint.max. 1255 enum size_t lowerMask = size_t.max >> (size_t.sizeof * CHAR_BIT / 2); 1256 // bit mask representing size_t.back without uint.max. 1257 enum size_t higherMask = size_t.max ^ lowerMask; 1258 1259 /* 1260 This function returns the higher square root of the given input. It is needed in the initialization step 1261 of the VEB tree to calculate the number of children of a given layer. And this is the universe size of the 1262 summary of a node. The upper square root is defined by 2^{\lceil(\lg u)/2\rceil} 1263 */ 1264 size_t hSR(size_t val) @nogc 1265 { 1266 return size_t(1) << (bsr(val) / 2 + ((val.bsr & 1) || ((val != 0) && (val & (val - 1))))); 1267 } 1268 // 1269 unittest 1270 { 1271 1272 auto currentSeed = unpredictableSeed(); 1273 const errorString = format!"UT: hSR. seed: %d"(currentSeed); 1274 rndGen.seed(currentSeed); //initialize the random generator 1275 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1276 auto hSR = hSR(M); 1277 assert((hSR & (hSR - 1)) == 0, errorString); 1278 import std.range : array; 1279 import std.algorithm.searching : until; 1280 1281 auto check = powersOfTwo.until(hSR).array; 1282 assert((check[$ - 1]) * (check[$ - 1]) < M, errorString); 1283 } 1284 1285 /* 1286 This function returns the lower square root of the given input. It is needed by the indexing functions 1287 high(x), low(x) and index(x,y) of elements in the tree. Also, this is the universe size of a child of a node. The 1288 lower square root is defined by 2^{\lfloor(\lgu)/2\rfloor} 1289 */ 1290 size_t lSR(size_t val) @nogc 1291 { 1292 return size_t(1) << (bsr(val) / 2); 1293 } 1294 // 1295 unittest 1296 { 1297 auto currentSeed = unpredictableSeed(); 1298 const errorString = format!"UT: lSR seed: %d"(currentSeed); 1299 rndGen.seed(currentSeed); //initialize the random generator 1300 const M = uniform(1UL, uint.max); //set universe size to some integer. 1301 auto lSR = M.lSR; 1302 1303 assert((lSR & (lSR - 1)) == 0, errorString); 1304 assert(lSR * lSR < M, errorString); 1305 import std.algorithm.searching : find; 1306 1307 assert(!powersOfTwo.find(lSR).empty); 1308 } 1309 1310 /* 1311 This is an index function defined as \lfloor x/lSR(u)\rfloor. It is needed to find the appropriate cluster 1312 of a element in the tree. It is a part of the ideal indexing function. 1313 */ 1314 size_t high(size_t universe, size_t val) @nogc 1315 out (result; result == val / universe.lSR) // bithacks = keithschwarz 1316 { 1317 return val >> (bsr(universe) / 2); 1318 } 1319 // 1320 unittest 1321 { 1322 auto currentSeed = unpredictableSeed(); 1323 const errorString = format!"UT: high seed: %d"(currentSeed); 1324 rndGen.seed(currentSeed); //initialize the random generator 1325 const M = uniform(1UL, uint.max); //set universe size to some integer. 1326 assert(M, errorString); 1327 size_t U = M.nextPow2; 1328 assert(U, errorString); 1329 auto x = uniform(0UL, U); 1330 assert(high(U, x) == x / U.lSR, errorString); 1331 } 1332 1333 /* 1334 This is an index function defined as fmod(value, lSR(universe)). It is needed to find the appropriate 1335 value inside a cluster. It is part of the ideal indexing function 1336 */ 1337 size_t low(size_t universe, size_t val) @nogc 1338 out (retVal; retVal == (val & ((size_t(1) << (bsr(universe) / 2)) - 1))) 1339 { 1340 return val % universe.lSR; 1341 } 1342 // 1343 unittest 1344 { 1345 auto currentSeed = unpredictableSeed(); 1346 const errorString = format!"UT: low seed: %d"(currentSeed); 1347 rndGen.seed(currentSeed); //initialize the random generator 1348 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1349 size_t U = nextPow2(M); 1350 auto x = uniform(0UL, U); 1351 assert(low(U, x) == (x & (U.lSR - 1)), errorString); 1352 } 1353 1354 /* 1355 This is an index function to retain the searched value. It is defined as x * lSR(u) + y. Beyond this, the 1356 relation holds: x = index(high(x), x.low). This is the ideal indexing function of the tree. 1357 */ 1358 size_t index(size_t universe, size_t x, size_t y) @nogc 1359 { 1360 return (x * universe.lSR + y); 1361 } 1362 // 1363 unittest 1364 { 1365 auto currentSeed = unpredictableSeed(); 1366 const errorString = format!"UT: index seed: %d"(currentSeed); 1367 rndGen.seed(currentSeed); //initialize the random generator 1368 const M = uniform(0UL, uint.max); //set universe size to some integer. 1369 size_t U = M.nextPow2; 1370 auto x = uniform(0UL, U); 1371 assert(index(U, U.high(x), U.low(x)) == x, errorString); 1372 } 1373 1374 auto vebTree(Flag!"inclusive" inclusive, T, Args...)(ref T root, Args args) 1375 { 1376 return VEBtree!(inclusive, T)(root, args); 1377 }