1 /** 2 Copyright: Copyright (c) 2016- Alexander Orlov. All rights reserved. 3 License: $(LINK2 https://opensource.org/licenses/MIT, MIT License). 4 Authors: Alexander Orlov, $(LINK2 mailto:sascha.orlov@gmail.com, sascha.orlov@gmail.com) 5 */ 6 7 /** 8 This module implements a Van Emde Boas tree container. 9 All corrections, bug findings pull requests and comments are welcome. 10 The main idea of the container is, to restrict the capacity of the tree by the next power of two universe size, 11 given an arbitrary size at the initialization. 12 */ 13 14 /** 15 The main advantage of the Van Emde Boas tree appears on a large amount of elements, as the provided standard 16 operations of the tree are constant in time and of order O(lg2(lg2(U))), where U is the capacity of the tree, constant 17 after creation. For small amount of elements the overhead coming along with the structure take over. However, if the 18 universe size becomes bigger, the tree performance becomes better. 19 */ 20 21 /** 22 Be aware, the current container is intended to be used with keys. This implies, that the capacity, fixed on its 23 initialization has two meanings. As usual, it shows the maximum amount of elements the instanciated tree can keep. 24 But also, it states, that no value bigger then capacity - 1 exists in the tree. This, and the fact, that only 25 non-negative values can be used are infered from the term "key". 26 */ 27 28 /** 29 See_also: Thomas H. Cormen, Clifford Stein, Ronald L. Rivest, and Charles E. Leiserson. 2001. <em>Introduction to 30 Algorithms</em> (2nd ed.). McGraw-Hill Higher Education. 31 the idea of using bit operations was reused from the C++ implementation found at 32 http://www.keithschwarz.com/interesting/code/van-emde-boas-tree/ 33 */ 34 35 module vebtree; 36 import core.bitop; 37 import std.traits : ReturnType, isIterable, arity; 38 import std.typecons : Flag, Yes, No; 39 import std.math : nextPow2; 40 import core.stdc.limits : CHAR_BIT; 41 42 debug import std.format : format; 43 44 version (unittest) 45 { 46 import std.parallelism : parallel; 47 import std.conv : to; 48 import core.stdc.stdio : printf; 49 import std.container.rbtree : redBlackTree; 50 51 import std.range; 52 import std.random; 53 import std.format; 54 import std.container; // red black tree may be used in unittests for comparison. 55 import std.math : sqrt; 56 57 // helping function for output a given value in binary representation 58 void bin(size_t n) 59 { 60 /* step 1 */ 61 if (n > 1) 62 bin(n / 2); 63 /* step 2 */ 64 65 printf("%d", n % 2); 66 } 67 68 /// precalculated powers of two table for unit testing 69 import std.range : iota; 70 import std.algorithm.iteration : map; 71 72 enum powersOfTwo = (CHAR_BIT * size_t.sizeof).iota.map!(a => size_t(1) << a); 73 enum testMultiplier = 1; //16 74 75 auto generateVEBtree(size_t baseSize)(uint currentSeed, size_t front, size_t back, ref size_t M) 76 { 77 static assert(baseSize > 1); 78 static assert((baseSize & (baseSize - 1)) == 0); 79 assert(front >= 2); 80 rndGen.seed(currentSeed); //initialize the random generator 81 M = uniform(front, back + 1); // parameter for construction 82 return vebRoot!baseSize(M); 83 } 84 string generateDebugString(string identifier1, size_t identifier2, size_t baseSize, uint currentSeed, size_t M) 85 { 86 return format!"%s: %d baseSize: %d; seed: %d M: %d"(identifier1, identifier2, baseSize, currentSeed, M); 87 } 88 } 89 90 static foreach (_; 1 .. size_t.sizeof - 1) 91 { 92 /// 93 unittest 94 { 95 enum baseSize = 1 << _; 96 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 97 { 98 auto currentSeed = unpredictableSeed(); 99 size_t M; 100 101 auto vT = generateVEBtree!(1 << _)(currentSeed, 2UL, baseSize, M); 102 assert(vT.universe == M); 103 const errorString = generateDebugString("UT: node test: ", b, baseSize, currentSeed, M); 104 105 assert(vT.value_ == 0, errorString); 106 assert(vT.ptr_ is null, errorString); 107 assert(vT.capacity == baseSize, errorString); 108 assert(vT.empty == true, errorString); 109 assert(vT.front == NIL, errorString); 110 assert(vT.back == NIL, errorString); 111 assert(vT[].front == 0, errorString); 112 assert(vT[].back == vT.universe, errorString); 113 assert(vT().front == NIL, errorString); 114 assert(vT().back == NIL, errorString); 115 assert(vT.length == 0, errorString); 116 assert(vT.universe == M, errorString); 117 118 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 119 // make an array of length N 120 size_t[] testArray, cacheArray; 121 testArray = new size_t[N]; 122 cacheArray.reserve(N); 123 // fill the array with all possible values 124 foreach (ref el; testArray) 125 { 126 el = (2 * M).iota.choice; 127 } 128 129 foreach (testNumber; testArray) 130 { 131 assert(vT.universe == M, errorString); 132 const insertResult = vT.insert(testNumber); 133 134 if (insertResult) 135 { 136 assert(!vT.empty, errorString); 137 cacheArray ~= testNumber; 138 } 139 } 140 141 import std.algorithm.sorting : sort; 142 143 cacheArray.sort; 144 145 if (cacheArray.empty) 146 { 147 assert(vT.empty, errorString); 148 } 149 else 150 { 151 assert(!vT.empty, errorString); 152 } 153 154 foreach (el; cacheArray) 155 { 156 assert(bt(&vT.value_, el), errorString); 157 } 158 import std.algorithm.iteration : uniq; 159 import std.algorithm.searching : count; 160 161 assert(vT.length == cacheArray.uniq.count, errorString); 162 assert(vT.universe == M, errorString); 163 if (cacheArray.length) 164 { 165 assert(vT.front == cacheArray.front, errorString); 166 assert(vT.back == cacheArray.back, errorString); 167 } 168 else 169 { 170 assert(vT.front == NIL, errorString); 171 assert(vT.back == NIL, errorString); 172 } 173 174 auto currElement = vT.front; 175 foreach (el; cacheArray.uniq) 176 { 177 assert(currElement == el, errorString); 178 currElement = vT.next(currElement); 179 } 180 currElement = vT.back; 181 foreach (el; cacheArray.uniq.array.retro) 182 { 183 assert(currElement == el, errorString); 184 currElement = vT.prev(currElement); 185 } 186 187 foreach (key; 0 .. vT.universe) 188 { 189 import std.algorithm.searching : canFind; 190 191 if (cacheArray.uniq.array.canFind(key)) 192 { 193 assert(key in vT, errorString); 194 } 195 else 196 { 197 assert(!(key in vT), errorString); 198 } 199 } 200 auto deepCopy = vT.dup; 201 202 assert(deepCopy.value_ == vT.value_, errorString); 203 assert(vT == cacheArray.uniq, errorString); 204 assert(vT.prev(vT.front) == NIL, errorString); 205 assert(vT.next(vT.back) == NIL, errorString); 206 assert(vT == deepCopy, errorString); 207 assert(vT == deepCopy(), errorString); 208 209 if (cacheArray.length) 210 { 211 auto val = cacheArray.uniq.array.randomCover.front; 212 vT.remove(val); 213 assert((deepCopy.value_ ^ vT.value_) == (size_t(1) << val), errorString); 214 import std.algorithm.iteration : each; 215 import std.algorithm.searching : count, find; 216 import std.algorithm.mutation : remove; 217 218 cacheArray.count(val).iota.each!(i => cacheArray = cacheArray.remove( 219 cacheArray.length - cacheArray.find(val).length)); 220 } 221 else 222 { 223 assert((deepCopy.value_ ^ vT.value_) == 0, errorString); 224 } 225 226 foreach (key; 0 .. vT.capacity) 227 { 228 import std.algorithm.searching : canFind; 229 230 if (cacheArray.uniq.array.canFind(key)) 231 { 232 assert(vT.remove(key), errorString); 233 } 234 else 235 { 236 assert(!(vT.remove(key)), errorString); 237 } 238 } 239 assert(vT.value_ == 0, errorString); 240 assert(vT.empty, errorString); 241 } 242 } 243 } 244 245 static foreach (_; 1 .. size_t.sizeof - 1) 246 { 247 /// 248 unittest 249 { 250 import std.range : iota; 251 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 252 { 253 auto currentSeed = unpredictableSeed(); 254 size_t M; 255 auto vT = generateVEBtree!(1 << _) 256 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 257 const errorString = 258 generateDebugString("UT: tree test of capacity and universe: ", b, 1 << _, currentSeed, M); 259 260 assert(vT.universe == M, errorString); 261 assert(vT.capacity == (vT.universe - 1).nextPow2, 262 to!string("vT.capacity: " ~ to!string( 263 vT.capacity) ~ " vT.universe: " ~ to!string(vT.universe))); 264 assert(!(vT.ptr_ is null), errorString); 265 assert(vT.capacity == (vT.universe - 1).nextPow2, errorString); 266 } 267 } 268 } 269 270 static foreach (_; 1 .. size_t.sizeof - 1) 271 { 272 /// 273 unittest 274 { 275 import std.range : iota; 276 foreach (b; (CHAR_BIT * size_t.sizeof * testMultiplier).iota.parallel) 277 { 278 auto currentSeed = unpredictableSeed(); 279 size_t M; 280 auto vT = generateVEBtree!(1 << _) 281 (currentSeed, CHAR_BIT * size_t.sizeof, CHAR_BIT * size_t.sizeof * CHAR_BIT * size_t.sizeof, M); 282 const errorString = 283 generateDebugString("UT: tree test, general case: ", b, 1 << _, currentSeed, M); 284 size_t N = uniform(0UL, 2 * M); // independent parameter for testing 285 286 // make an array of length N 287 size_t[] testArray, cacheArray; 288 testArray = new size_t[N]; 289 cacheArray.reserve(N); 290 // fill the array with all possible values 291 foreach (ref el; testArray) 292 el = (2 * M).iota.choice; 293 294 auto rbt = redBlackTree!size_t(); 295 296 foreach (val; testArray) 297 { 298 assert(vT.universe == M, errorString); 299 assert(vT.length == rbt.length, errorString); 300 301 bool insertExpectation; 302 if (val < vT.capacity && !(val in vT)) 303 { 304 insertExpectation = true; 305 } 306 const insertResult = vT.insert(val); 307 308 assert(insertResult == insertExpectation, errorString); 309 310 if (insertResult) 311 { 312 313 assert(val in vT, errorString); 314 assert(!vT.empty, errorString); 315 rbt.insert(val); 316 assert(vT.front == rbt.front, errorString); 317 assert(vT.back == rbt.back, 318 "val:" ~ to!string(val) ~ " vT.back: " ~ to!string( 319 vT.back) ~ " rbt.back: " ~ to!string(rbt.back)); 320 321 cacheArray ~= val; 322 } 323 else 324 { 325 if (!(val in rbt)) 326 { 327 assert(!(val in vT), errorString); 328 } 329 else 330 { 331 assert(val in vT, errorString); 332 } 333 } 334 } 335 336 import std.algorithm.sorting : sort; 337 cacheArray.sort; 338 339 foreach (i, el; cacheArray) 340 { 341 assert(el in vT, errorString); 342 if (i + 1 != cacheArray.length) 343 { 344 assert(vT.next(el) == cacheArray[i + 1],errorString); 345 } 346 else 347 { 348 assert(vT.next(el) == NIL, errorString); 349 } 350 } 351 352 foreach (i, el; vT) 353 assert(el == cacheArray[i], errorString); 354 355 assert(vT == cacheArray, errorString); 356 357 auto vT2 = vT.dup; 358 assert(vT == vT2); 359 360 if(cacheArray.length) 361 { 362 auto rndNum = cacheArray.choice; 363 vT2.remove(rndNum); 364 assert(!(rndNum in vT2)); 365 assert(rndNum in vT); 366 assert(vT != vT2); 367 rndNum = uniform(0UL, vT2.capacity); 368 if(!(rndNum in vT)) 369 { 370 assert(!(rndNum in vT), errorString ~ format!"rndNum: %d"(rndNum)); 371 assert(vT2.insert(rndNum), errorString); 372 } 373 assert(vT != vT2); 374 } 375 376 const rangeExclusive = vT(); 377 assert(vT == rangeExclusive); 378 379 auto rangeInclusive = vT[]; 380 import std.algorithm.comparison : equal; 381 import std.algorithm.iteration : uniq; 382 assert(equal(uniq(rangeInclusive), rangeInclusive)); 383 import std.range : enumerate; 384 foreach(i, el; rangeInclusive.enumerate) 385 { 386 if(i == 0) 387 { 388 if(!(0 in vT)) 389 { 390 continue; 391 } 392 } 393 else if(i + 1 != rangeInclusive.length) 394 { 395 assert(el in vT, errorString ~ format!" el: %d"(el)); 396 } 397 else if(i + 1 == rangeInclusive.length) 398 { 399 assert(el == vT.universe || el == vT.capacity); 400 if(el == vT.universe) 401 { 402 assert(vT.back <= vT.universe || vT.back == NIL, errorString ~ format!" length: %d"(vT.length)); 403 } 404 else 405 { 406 assert(vT.back > vT.universe, errorString); 407 assert(vT.back < vT.capacity, errorString); 408 } 409 } 410 else 411 { 412 assert(0); 413 } 414 } 415 416 import std.range : retro, enumerate; 417 foreach (i, el; cacheArray.retro.enumerate) 418 { 419 assert(el in vT, errorString); 420 if (i + 1 != cacheArray.length) 421 { 422 assert(vT.prev(el) == cacheArray[($ - 1) - (i + 1)], errorString); 423 } 424 else 425 { 426 assert(vT.prev(el) == NIL, errorString); 427 } 428 } 429 430 foreach (val; testArray) 431 { 432 assert(vT.length == rbt.length, errorString); 433 if (val in rbt) 434 { 435 assert(val in vT, errorString); 436 rbt.removeKey(val); 437 assert(vT.remove(val), errorString); 438 } 439 else 440 { 441 assert(!(val in vT), errorString); 442 assert(!vT.remove(val), errorString); 443 } 444 assert(!(val in rbt), errorString); 445 assert(!(val in vT), errorString); 446 } 447 assert(vT.length == 0, errorString); 448 assert(rbt.length == 0, errorString); 449 } 450 } 451 } 452 453 /** 454 define the absence of a key to be -1. 455 */ 456 enum NIL = ptrdiff_t(-1); 457 458 /** 459 The tree creator function. Optionally, the base size can be provided at compile time, however, the best results are 460 achieved with the default base size of CHAR_BIT * size_t.sizeof 461 */ 462 auto vebRoot(size_t baseSize = CHAR_BIT * size_t.sizeof)(size_t universe) 463 { 464 /** 465 Two parameters are provided: 466 - the base size is the maximal amount bits can be stored in a single node without branching (generating children) 467 - the universe is the user provided input, providing the expected amount of keys, going to be stored in the tree 468 */ 469 return VEBroot!baseSize(universe); 470 } 471 472 /** 473 A van Emde Boas node implementation 474 */ 475 struct VEBroot(size_t baseSize) if((baseSize & (baseSize - 1)) == 0) 476 { 477 /** 478 yields a deep copy of the node. I. e. copies all data in children and allocates another tree 479 */ 480 typeof(this) dup() 481 { 482 auto retVal = typeof(this)(universe); 483 foreach (el; opCall()) 484 retVal.insert(el); 485 return retVal; 486 } 487 488 /** 489 []-slicing. Yields a "random access range" with the content of the tree, always containing zero and the key after 490 the maximum element as keys. The key after the maximal key is the universe, if the tree is empty or the maximal 491 contained key is lesser then empty, otherwise the capacity of the tree. 492 */ 493 auto opIndex() @nogc 494 { 495 return vebTree!(Yes.inclusive)(this); 496 } 497 498 /** 499 ()-slicing. Yields a "random access range" with the content of the tree. Keys can be NIL. 500 */ 501 auto opCall() @nogc 502 { 503 return vebTree!(No.inclusive)(this); 504 } 505 506 /** 507 Equality operator checks for any iterable, whether in contains the same values, as the current tree. 508 */ 509 bool opEquals(T)(auto ref T input) const if (isIterable!T) 510 { 511 import std.range : hasLength; 512 static if (hasLength!T) 513 if (length != input.length) 514 return false; 515 516 size_t currentElem = this.front; 517 518 foreach (el; input) 519 { 520 if (el != currentElem) 521 return false; 522 currentElem = this.next(currentElem); 523 } 524 525 return true; 526 } 527 528 /** 529 member method for the case universe size < base size. Overloads by passing only one parameter, which is 530 the bit number of interest. Returns whether the appropriate bit inside the bitvector is set. 531 */ 532 bool opBinaryRight(string op)(size_t key) @nogc const if (op == "in") 533 { 534 if (key >= this.capacity) 535 return false; 536 537 if (this.empty) // if an empty intermediate node is found, nothing is stored below it. 538 return false; 539 540 if (this.isLeaf) 541 return bt(&value_, key) != 0; 542 else 543 { 544 // case of a single valued range. 545 if (key == this.front || key == this.back) 546 return true; 547 548 /* 549 else we have to descend, using the recursive indexing: 550 1. take the high(value, uS)-th child and 551 2. ask it about the reduced low(value, uS) value 552 3. use the lSR(uS) universe size of the childe node. 553 */ 554 return low(key) in ptr_[high(key)]; 555 } 556 } 557 558 /** 559 the opApply method grants the correct foreach behavior, nogc version 560 */ 561 int opApply(scope int delegate(ref size_t) @nogc operations) const @nogc 562 { 563 return opApplyImpl(operations); 564 } 565 566 /** 567 ditto 568 */ 569 int opApply(scope int delegate(ref size_t, ref size_t) @nogc operations) const @nogc 570 { 571 return opApplyImpl(operations); 572 } 573 574 /** 575 ditto 576 */ 577 int opApply(scope int delegate(ref size_t) operations) const 578 { 579 return opApplyImpl(operations); 580 } 581 582 /** 583 ditto 584 */ 585 int opApply(scope int delegate(ref size_t, ref size_t) operations) const 586 { 587 return opApplyImpl(operations); 588 } 589 590 /** 591 Node constructor. A universe size provided, if the size is below the cutoff there is nothing to be done, as the 592 underlying value created and set to zero by default. 593 If otherwise create an array of children. This array has to be (according to Cormen) of size of higher square 594 root of the current universe size + 1. The extra place is reserved for the summary. 595 For each just created child call its constructor. 596 For the summary with the universe size of the higher square root of the current universe size. 597 For each other child with the universe size of the lower square root of the currennt universe size. 598 Then, assign the fully initialized children array to the pointer in the current node, doing approprate steps to 599 show, that this node is an intermediate node, not containing any values yet. 600 The knowledge of the current universe size is lost at this moment. As this keeps every build up node smaller 601 (and there could be a lot of them). This is why, the VEBtree class continues to hold the global universe size, 602 which is passed on every call to the root node. In this way this, extern saved value has the role of being 603 outsourced array size for each (!) node in the tree, as its size is reconstructed during the access to them. 604 */ 605 606 @disable this(this); 607 608 /** 609 It is allowed to construct the root of the van Emde Boas tree directly, without the convenience method. 610 Params: 611 val = Expected universe size. The tree is generated so that every index below the universe size can be put 612 inside. 613 */ 614 this(size_t val) 615 in(val >= 2) 616 { 617 universe = val; 618 setEmpty; 619 620 assert(!length_ == this.empty); 621 622 if (!isLeaf) 623 { 624 assert(this.capacity == (universe - 1).nextPow2); 625 const arrSize = this.capacity.hSR + 1; 626 627 // reserve enough place for the summary and the children cluster 628 ptr_ = (new typeof(this)[arrSize]).ptr; 629 630 // add higher square root children with lower square root universe each. 631 foreach (i, ref el; cluster) 632 el = typeof(this)(this.capacity.lSR); 633 634 // add the summary with its universe of higher squaure root of the current universe 635 summary = typeof(this)(this.capacity.hSR); 636 } 637 assert(!length_ == this.empty); 638 } 639 640 /** 641 This tree has a length notion: it is the current number of inserted elements. 642 Returns: The current amount of contained keys. 643 */ 644 size_t length() const @nogc 645 { 646 return length_; 647 } 648 649 /** 650 the empty method to inform of an empty state of the tree. 651 Returns: Whether the tree is currently empty 652 */ 653 bool empty() const @nogc 654 { 655 return isLeaf ? value_ == 0 : value_ == -NIL; 656 } 657 658 /** 659 This yields whether the node is a leaf node. 660 Returns: Whether the node is a leaf. 661 */ 662 bool isLeaf() const @nogc 663 { 664 return universe <= baseSize; 665 } 666 667 /** 668 The minimal contained key in the van Emde Boas tree 669 Returns: The minimal contained element of the tree 670 */ 671 size_t front() @nogc const 672 { 673 if (empty) // do not proceed at all, if the root is empty 674 return NIL; 675 if (isLeaf) // pass control to the node 676 return bsf(value_); 677 return value_ & lowerMask; 678 } 679 680 /** 681 The maximal contained key in the van Emde Boas tree 682 Returns: The maximal contained element of the tree 683 */ 684 size_t back() @nogc const 685 { 686 if (empty) // do not proceed at all, if the root is empty 687 return NIL; 688 if (isLeaf) // pass control to the node 689 return bsr(value_); 690 return (value_ & higherMask) >> (CHAR_BIT * size_t.sizeof / 2); 691 } 692 693 /** 694 As a usual container, van Emde Boas tree provides the notion of capacity 695 Returns: The overall capacity of the tree. It is at least as high as the universe size provided on creation. 696 */ 697 size_t capacity() @nogc const 698 { 699 return isLeaf ? baseSize : (universe - 1).nextPow2; 700 } 701 702 /** 703 remove method of the van Emde Boas tree 704 Params: 705 val = The key to remove 706 Returns: Whether the key was removed. It is true, when the key was present, false otherwise 707 */ 708 bool remove(size_t val) @nogc 709 { 710 if (val >= capacity) // do not proceed at all, if the value can't be in the tree 711 return false; 712 if (empty) // do not proceed at all, if the root is empty 713 return false; 714 if (isLeaf) // pass control to the node 715 return length(length - (btr(&value_, val) != 0)); 716 if (front == back) // if the current node contains only a single value 717 { 718 assert(length == 1); 719 if (front != val) 720 return false; // do nothing if the given value is not the stored one 721 assert(length == 1); 722 return length(length - 1); 723 } 724 725 if (val == front) // if we met the minimum of a node 726 { 727 auto treeOffset = summary.front; // calculate an offset from the summary to continue with 728 if (treeOffset == NIL) // if the offset is invalid, then there is no further hierarchy and we are going to 729 { 730 front = back; // store a single value in this node. 731 assert(length == 2); 732 return length(length - 1); 733 } 734 auto m = cluster[treeOffset].front; // otherwise we get the minimum from the offset child 735 // remove it from the child 736 cluster[treeOffset].remove(m); 737 if (cluster[treeOffset].empty) 738 summary.remove(treeOffset); 739 //anyway, the new front of the current node become the restored value of the calculated offset. 740 front = index(treeOffset, m); 741 assert(length); 742 return length(length - 1); 743 } 744 745 if (val == back) // if we met the maximum of a node 746 { 747 // calculate an offset from the summary to contiue with 748 auto treeOffset = summary.back; 749 // if the offset is invalid, then there is no further hierarchy and we are going to 750 if (treeOffset == NIL) 751 { 752 // store a single value in this node. 753 back = front; 754 assert(length == 2); 755 return length(length - 1); 756 } 757 // otherwise we get maximum from the offset child 758 auto m = cluster[treeOffset].back; 759 // remove it from the child 760 cluster[treeOffset].remove(m); 761 if (cluster[treeOffset].empty) 762 summary.remove(treeOffset); 763 // anyway, the new back of the current node become the restored value of the calculated offset. 764 back = index(treeOffset, m); 765 assert(length); 766 return length(length - 1); 767 } 768 // if no condition was met we have to descend deeper. We get the offset by reducing the value to high(value, uS) 769 auto treeOffset = high(val); 770 auto res = length(length - cluster[treeOffset].remove(low(val))); 771 if (cluster[treeOffset].empty) 772 summary.remove(treeOffset); 773 return res; 774 } 775 776 /** 777 The successor search method of the van Emde Boas tree. 778 Params: 779 val = The key the next greater neighbor of which is looked for. 780 Returns: Ditto. If the next greater neighbor is missing a number out of bounds of the tree is returned. 781 */ 782 size_t next(size_t val) @nogc const 783 { 784 if (empty) // do not proceed at all, if the root is empty 785 return NIL; 786 if (isLeaf) // pass control to the node 787 { 788 if (val + 1 >= baseSize) // all vals are reduced by one. 789 return NIL; 790 791 // create a mask, which hides all lower bits of the stored value up to the given bit number, then apply 792 // bit search from the lowest bit. 793 auto maskedVal = value_ & ~((size_t(1) << (val + 1)) - 1); 794 795 if (maskedVal != 0) 796 return maskedVal.bsf; 797 798 return NIL; 799 } 800 // if given value is less then the front, return the front as successor 801 if (val < front) 802 return front; 803 // if given value is greater then the back, no predecessor exists 804 if (val >= back) 805 return NIL; 806 // if none of the break conditions was met, we have to descent further into the tree. 807 // calculate the child index by high(value, uS) 808 const childIndex = high(val); 809 // look into the child for its maximum 810 const maxlow = cluster[childIndex].back; 811 // if the maximum exists and the lowered given value is less then the child's maximum 812 if ((maxlow != NIL) && (low(val) < maxlow)) 813 { 814 auto offset = cluster[childIndex].next(low(val)); 815 // the result is given by reconstruction of the answer 816 return index(childIndex, offset); 817 } 818 else // otherwise we can not use the maximum of the child 819 { 820 auto succcluster = summary.next(childIndex); 821 // if the successor cluster is null 822 if (succcluster == NIL) 823 return back; 824 assert(succcluster != NIL); 825 assert(cluster[succcluster].front != NIL); 826 // if the successor cluster exists, the offset is given by its minimum 827 // and the result by the reconstruction of the offset. 828 return index(succcluster, cluster[succcluster].front); 829 } 830 } 831 832 /** 833 The predecessor search method of the van Emde Boas tree. 834 Params: 835 val = The key the next smaller neighbor of which is looked for. 836 Returns: Ditto. If the next smaller neighbor is missing a number out of bounds of the tree is returned. 837 */ 838 size_t prev(size_t val) @nogc const 839 { 840 if (empty) // do not proceed at all, if the root is empty 841 return NIL; 842 if (isLeaf) // pass control to the node 843 { 844 if (val != 0) 845 { 846 /* 847 create a mask, which hides all higher bits of the stored value down to the given bit number, then apply 848 bit search from the highest bit. 849 */ 850 auto maskedVal = value_ & ((size_t(1) << val) - 1); 851 852 if (maskedVal != 0) 853 return typeof(return)(maskedVal.bsr); 854 } 855 return NIL; 856 } 857 // if given value is greater then the stored back, the predecessor is back 858 if (val > back) 859 return back; 860 // if given value is less then the front, no predecessor exists. 861 if (val <= front) 862 return NIL; 863 // if none of the break conditions was met we have to descend further into the tree. 864 auto childIndex = high(val); // calculate the child index by high(value, uS) 865 const minlow = cluster[childIndex].front; // look into the child for its minimum 866 // if the minimum exists and the lowered given value is greater then the child's minimum 867 if ((minlow != NIL) && (low(val) > minlow)) 868 { 869 auto offset = cluster[childIndex].prev(low(val)); 870 // the result is given by reconstruction of the answer. 871 return index(childIndex, offset); 872 } 873 else // otherwise we can not use the minimum of the child 874 { 875 auto predcluster = summary.prev(childIndex); 876 // if the predecessor cluster is null return the current front, as this is the last remaining value 877 if (predcluster == NIL) 878 return front; 879 // if the predecessor cluster exists, the offset is given by its maximum 880 // and the result by the reconstruction of the offset. 881 return index(predcluster, cluster[predcluster].back); 882 } 883 } 884 885 /** 886 The insertion method of the van Emde Boas tree. 887 Params: 888 val = The key to insert 889 Returns: Whether the key was inserted. It is true, when the key was inserted, false otherwise 890 */ 891 bool insert(size_t val) @nogc 892 { 893 if (val >= capacity) // do not proceed at all, if the value won't fit into the tree 894 return false; 895 if (isLeaf) // pass control to the node 896 return length(length + (bts(&value_, val) == 0)); 897 if (empty) // if the current node does not contain anything put the value inside. 898 { 899 assert(empty); 900 front = val; 901 back = val; 902 assert(front == val); 903 assert(!empty); 904 assert(front == back); 905 assert(!empty); 906 return length(length + 1); 907 } 908 909 assert(!empty); 910 assert(front != NIL); 911 assert(back != NIL); 912 913 if (val == front || val == back) // if the value coincides with existing values, return 914 return false; 915 // if the node contains a single value only, expand the node to a range and leave. 916 if (front == back) 917 { 918 if (front > val) 919 front = val; 920 if (back < val) 921 back = val; 922 return length(length + 1); 923 } 924 /* 925 if none of the cases above was true (all of them are break conditions) we have to compare the given value 926 with the values present and adapt the range limits. This replaces the value we want to insert. 927 */ 928 929 // a swap can not be used here, as front is itself a (property) method 930 if (val < front) 931 { 932 const tmpKey = val; 933 val = front; 934 front = tmpKey; 935 assert(front == tmpKey); 936 } 937 938 // a swap can not be used here, as back is itself a (property) method 939 if (val > back) 940 { 941 const tmpKey = val; 942 val = back; 943 back = tmpKey; 944 assert(back == tmpKey); 945 } 946 947 // calculate the index of the children cluster by high(value, uS) we want to descent to. 948 const nextTreeIndex = high(val); 949 if (cluster[nextTreeIndex].empty) 950 summary.insert(nextTreeIndex); 951 return length(length + cluster[nextTreeIndex].insert(low(val))); 952 } 953 954 /** 955 The cached value of the universe, provided on creation 956 Returns: The cached input, provided on creation 957 */ 958 size_t universe() @nogc const 959 { 960 return universe_; 961 } 962 963 private: 964 965 size_t toHash() @nogc const nothrow { assert(0); } 966 967 bool front(size_t val) @nogc 968 { 969 if (isLeaf) // pass control to the node 970 return insert(val); 971 value_ = value_ & higherMask; // otherwise, set the lower part of the value, keeping the higher bits 972 const retVal = ((value_ & lowerMask) == val) ? false : true; 973 value_ = value_ | val; 974 return retVal; // this is a bug! 975 } 976 977 bool back(size_t val) @nogc 978 { 979 if (isLeaf) // pass control to the node 980 return insert(val); 981 value_ = value_ & lowerMask; // otherwise, set the higher part of the value, keeping the lower bits 982 const retVal = (value_ & higherMask) == (val << (CHAR_BIT * size_t.sizeof / 2)) ? false : true; 983 value_ = value_ | (val << (CHAR_BIT * size_t.sizeof / 2)); 984 return retVal; // this is a bug! 985 } 986 987 bool length(size_t input) @nogc 988 in 989 { 990 assert(input <= this.capacity); 991 992 if (input != length) 993 { 994 input > length ? assert(input - length == 1) : assert(length - input == 1); 995 } 996 } 997 do 998 { 999 const retVal = length != input; 1000 1001 length_ = input; 1002 1003 if (!length_) 1004 setEmpty; 1005 1006 return retVal; 1007 } 1008 1009 size_t index(size_t x, size_t y) const @nogc 1010 { 1011 return .index(this.capacity, x, y); 1012 } 1013 1014 size_t low(size_t val) const @nogc 1015 { 1016 return .low(this.capacity, val); 1017 } 1018 1019 size_t high(size_t val) const @nogc 1020 { 1021 return .high(this.capacity, val); 1022 } 1023 1024 void universe(size_t val) @nogc 1025 { 1026 universe_ = val; 1027 } 1028 1029 size_t value_; 1030 size_t universe_; 1031 size_t length_; 1032 typeof(this)* ptr_; 1033 1034 ref summary() inout @nogc 1035 in(!isLeaf) 1036 { // return the last element of the array of children, stored in the node. 1037 return ptr_[capacity.hSR]; 1038 } 1039 1040 auto cluster() inout @nogc 1041 in(!isLeaf) 1042 { // return all of the children in the stored array, but the last element 1043 return ptr_[0 .. capacity.hSR]; 1044 } 1045 1046 // The empty setter of a node. This function is kept for consistency in this module. 1047 void setEmpty() @nogc 1048 { 1049 value_ = isLeaf ? 0 : -NIL; 1050 } 1051 1052 // with the trick of https://forum.dlang.org/thread/erznqknpyxzxqivawnix@forum.dlang.org 1053 int opApplyImpl(O)(O operations) const 1054 { 1055 int result; 1056 size_t leading = this.front; 1057 1058 //for(size_t leading = front; leading < back; leading = this.next(leading)) 1059 1060 for (size_t i = 0; i < length; ++i) 1061 { 1062 static if (arity!operations == 1) 1063 result = operations(leading); 1064 else static if (arity!operations == 2) 1065 result = operations(i, leading); 1066 else 1067 assert(0); 1068 1069 if (result) 1070 break; 1071 1072 leading = this.next(leading); 1073 } 1074 1075 return result; 1076 } 1077 } 1078 1079 private: 1080 struct VEBtree(Flag!"inclusive" inclusive, T) 1081 { 1082 auto opBinaryRight(string op)(size_t key) @nogc if (op == "in") 1083 { 1084 return key in root; 1085 } 1086 1087 static if (inclusive) 1088 { 1089 size_t frontKey; 1090 size_t backKey; 1091 } 1092 else 1093 { 1094 ptrdiff_t frontKey; 1095 ptrdiff_t backKey; 1096 } 1097 1098 size_t length; 1099 1100 typeof(frontKey) front() @nogc 1101 { 1102 return frontKey; 1103 } 1104 1105 void popFront() @nogc 1106 in(!empty) 1107 { 1108 --length; 1109 frontKey = next(frontKey); 1110 } 1111 1112 typeof(backKey) back() @nogc 1113 { 1114 return backKey; 1115 } 1116 1117 void popBack() @nogc 1118 in(!empty) 1119 { 1120 --length; 1121 backKey = prev(backKey); 1122 } 1123 1124 auto prev(size_t key) @nogc 1125 { 1126 const pred = root.prev(key); 1127 static if (inclusive) 1128 return pred == NIL ? 0 : pred; 1129 else 1130 return pred; 1131 } 1132 1133 auto next(size_t key) @nogc 1134 { 1135 const succ = root.next(key); 1136 1137 static if(inclusive) 1138 debug 1139 if (succ == NIL) 1140 assert(length <= 1, format!"key: %d, length: %d\n"(key, length)); 1141 1142 static if (inclusive) 1143 if (succ == NIL) 1144 return backKey; 1145 else 1146 return succ; 1147 else 1148 return succ; 1149 } 1150 1151 bool empty() @nogc 1152 { 1153 return !length; 1154 } 1155 1156 auto save() const @nogc 1157 { 1158 return vebTree!(inclusive)(*root_, frontKey, backKey, length); 1159 } 1160 1161 size_t toHash() @nogc const nothrow { assert(0); } 1162 1163 /** 1164 for comparison with an iterable, the iterable will be iterated, as the current object. 1165 */ 1166 bool opEquals(T)(auto ref T input) const if (isIterable!T) 1167 { 1168 static if (is(T == typeof(this))) 1169 return root == input.root; 1170 1171 static if (hasLength!T) 1172 if (length != input.length) 1173 return false; 1174 1175 auto copy = this.save; 1176 1177 foreach (el; input) 1178 { 1179 if (el != copy.front) 1180 return false; 1181 copy.popFront(); 1182 } 1183 1184 return true; 1185 } 1186 1187 @disable this(); 1188 1189 private: 1190 T* root_; 1191 ref T root() { return *root_; } 1192 1193 this(T, Args...)(ref T _root, Args args) @nogc 1194 { 1195 root_ = &_root; 1196 1197 static if(Args.length) 1198 { 1199 frontKey = args[0]; 1200 backKey = args[1]; 1201 length = args[2]; 1202 } 1203 else 1204 { 1205 length = root.length; 1206 static if (inclusive) 1207 { 1208 if(!length) 1209 { 1210 backKey = root.universe; 1211 length = 2; 1212 } 1213 else 1214 { 1215 if(root.front > 0) 1216 ++length; 1217 1218 if(root.back <= root.universe) 1219 backKey = root.universe; 1220 else if(root.back <= root.capacity) 1221 backKey = root.capacity; 1222 1223 if(root.back < backKey) 1224 ++length; 1225 } 1226 } 1227 else 1228 { 1229 frontKey = root.front; 1230 backKey = root.back; 1231 } 1232 } 1233 } 1234 } 1235 1236 // bit mask representing uint.max. 1237 enum size_t lowerMask = size_t.max >> (size_t.sizeof * CHAR_BIT / 2); 1238 // bit mask representing size_t.back without uint.max. 1239 enum size_t higherMask = size_t.max ^ lowerMask; 1240 1241 /* 1242 This function returns the higher square root of the given input. It is needed in the initialization step 1243 of the VEB tree to calculate the number of children of a given layer. And this is the universe size of the 1244 summary of a node. The upper square root is defined by 2^{\lceil(\lg u)/2\rceil} 1245 */ 1246 size_t hSR(size_t val) @nogc 1247 { 1248 return size_t(1) << (bsr(val) / 2 + ((val.bsr & 1) || ((val != 0) && (val & (val - 1))))); 1249 } 1250 // 1251 unittest 1252 { 1253 1254 auto currentSeed = unpredictableSeed(); 1255 const errorString = format!"UT: hSR. seed: %d"(currentSeed); 1256 rndGen.seed(currentSeed); //initialize the random generator 1257 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1258 auto hSR = hSR(M); 1259 assert((hSR & (hSR - 1)) == 0, errorString); 1260 import std.range : array; 1261 import std.algorithm.searching : until; 1262 1263 auto check = powersOfTwo.until(hSR).array; 1264 assert((check[$ - 1]) * (check[$ - 1]) < M, errorString); 1265 } 1266 1267 /* 1268 This function returns the lower square root of the given input. It is needed by the indexing functions 1269 high(x), low(x) and index(x,y) of elements in the tree. Also, this is the universe size of a child of a node. The 1270 lower square root is defined by 2^{\lfloor(\lgu)/2\rfloor} 1271 */ 1272 size_t lSR(size_t val) @nogc 1273 { 1274 return size_t(1) << (bsr(val) / 2); 1275 } 1276 // 1277 unittest 1278 { 1279 auto currentSeed = unpredictableSeed(); 1280 const errorString = format!"UT: lSR seed: %d"(currentSeed); 1281 rndGen.seed(currentSeed); //initialize the random generator 1282 const M = uniform(1UL, uint.max); //set universe size to some integer. 1283 auto lSR = M.lSR; 1284 1285 assert((lSR & (lSR - 1)) == 0, errorString); 1286 assert(lSR * lSR < M, errorString); 1287 import std.algorithm.searching : find; 1288 1289 assert(!powersOfTwo.find(lSR).empty); 1290 } 1291 1292 /* 1293 This is an index function defined as \lfloor x/lSR(u)\rfloor. It is needed to find the appropriate cluster 1294 of a element in the tree. It is a part of the ideal indexing function. 1295 */ 1296 size_t high(size_t universe, size_t val) @nogc 1297 out (result; result == val / universe.lSR) // bithacks = keithschwarz 1298 { 1299 return val >> (bsr(universe) / 2); 1300 } 1301 // 1302 unittest 1303 { 1304 auto currentSeed = unpredictableSeed(); 1305 const errorString = format!"UT: high seed: %d"(currentSeed); 1306 rndGen.seed(currentSeed); //initialize the random generator 1307 const M = uniform(1UL, uint.max); //set universe size to some integer. 1308 assert(M, errorString); 1309 size_t U = M.nextPow2; 1310 assert(U, errorString); 1311 auto x = uniform(0UL, U); 1312 assert(high(U, x) == x / U.lSR, errorString); 1313 } 1314 1315 /* 1316 This is an index function defined as fmod(value, lSR(universe)). It is needed to find the appropriate 1317 value inside a cluster. It is part of the ideal indexing function 1318 */ 1319 size_t low(size_t universe, size_t val) @nogc 1320 out (retVal; retVal == (val & ((size_t(1) << (bsr(universe) / 2)) - 1))) 1321 { 1322 return val % universe.lSR; 1323 } 1324 // 1325 unittest 1326 { 1327 auto currentSeed = unpredictableSeed(); 1328 const errorString = format!"UT: low seed: %d"(currentSeed); 1329 rndGen.seed(currentSeed); //initialize the random generator 1330 size_t M = uniform(1UL, uint.max); //set universe size to some integer. 1331 size_t U = nextPow2(M); 1332 auto x = uniform(0UL, U); 1333 assert(low(U, x) == (x & (U.lSR - 1)), errorString); 1334 } 1335 1336 /* 1337 This is an index function to retain the searched value. It is defined as x * lSR(u) + y. Beyond this, the 1338 relation holds: x = index(high(x), x.low). This is the ideal indexing function of the tree. 1339 */ 1340 size_t index(size_t universe, size_t x, size_t y) @nogc 1341 { 1342 return (x * universe.lSR + y); 1343 } 1344 // 1345 unittest 1346 { 1347 auto currentSeed = unpredictableSeed(); 1348 const errorString = format!"UT: index seed: %d"(currentSeed); 1349 rndGen.seed(currentSeed); //initialize the random generator 1350 const M = uniform(0UL, uint.max); //set universe size to some integer. 1351 size_t U = M.nextPow2; 1352 auto x = uniform(0UL, U); 1353 assert(index(U, U.high(x), U.low(x)) == x, errorString); 1354 } 1355 1356 auto vebTree(Flag!"inclusive" inclusive, T, Args...)(ref T root, Args args) 1357 { 1358 return VEBtree!(inclusive, T)(root, args); 1359 }