1 /* 2 * Hunt - A refined core library for D programming language. 3 * 4 * Copyright (C) 2018-2019 HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 module hunt.cache.Radix; 12 13 import std.stdio; 14 import core.memory; 15 import core.stdc.string; 16 import core.stdc.stdlib; 17 18 private: 19 20 void debug_log(A...)(A args) 21 { 22 return; 23 } 24 25 alias log_info = debug_log; 26 alias log_error = debug_log; 27 28 struct raxNode 29 { 30 size_t args; 31 // 1 iskey 32 // 1 isnull don't store it 33 // 1 iscompr 34 // 29 size 35 36 //void *data; 37 38 // node is not compr 39 // [abc][a-ptr][b-ptr][c-ptr](value-ptr?) 40 // 41 // node is compr 42 // [xyz][z-ptr](value-ptr?) 43 44 pragma(inline, true): 45 46 @property char* str() 47 { 48 return cast(char*)(&this + 1); 49 } 50 51 @property bool iskey() 52 { 53 return cast(bool)(args & 0x8000_0000UL); 54 } 55 56 @property bool iskey(bool value) 57 { 58 if (value) 59 args = args | 0x8000_0000UL; 60 else 61 args = args & (~0x8000_0000UL); 62 63 return value; 64 } 65 66 @property bool isnull() 67 { 68 return cast(bool)(args & 0x4000_0000UL); 69 } 70 71 @property bool isnull(bool value) 72 { 73 if (value) 74 args = args | 0x4000_0000UL; 75 else 76 args = args & (~0x4000_0000UL); 77 78 return value; 79 } 80 81 @property bool iscompr() 82 { 83 return cast(bool)(args & 0x2000_0000UL); 84 } 85 86 @property bool iscompr(bool value) 87 { 88 if (value) 89 args = args | 0x2000_0000UL; 90 else 91 args = args & (~0x2000_0000UL); 92 93 return value; 94 } 95 96 @property size_t size() 97 { 98 return args & 0x1FFF_FFFFUL; 99 } 100 101 @property size_t size(size_t value) 102 { 103 size_t v = args & (~0x1FFF_FFFFUL); 104 v += value; 105 args = v; 106 107 return value; 108 } 109 110 @property raxNode** orgin() 111 { 112 return cast(raxNode**)(str + size); 113 } 114 115 @property raxNode* next() 116 { 117 return *orgin; 118 } 119 120 @property raxNode* next(raxNode* n) 121 { 122 *orgin = n; 123 return n; 124 } 125 126 @property raxNode* nextChild(size_t index) 127 { 128 assert(index < size); 129 return orgin[index]; 130 } 131 132 @property raxNode* nextChild(size_t index, raxNode* n) 133 { 134 orgin[index] = n; 135 return n; 136 } 137 138 @property void* value() 139 { 140 if (iscompr) 141 return orgin[1]; 142 else 143 return orgin[size]; 144 } 145 146 @property void* value(void* v) 147 { 148 if (iscompr) 149 orgin[1] = cast(raxNode*) v; 150 else 151 orgin[size] = cast(raxNode*) v; 152 return v; 153 } 154 155 pragma(inline, false): 156 157 //alloc non-compr node 158 static raxNode* New(size_t children, bool hasdata) 159 { 160 size_t nodesize = raxNode.sizeof + children + (raxNode*).sizeof * children; 161 if (hasdata) 162 nodesize += (void*).sizeof; 163 164 raxNode* n = cast(raxNode*) malloc(nodesize); 165 if (n is null) 166 return null; 167 memset(n, 0, nodesize); 168 169 n.iskey = false; 170 n.isnull = false; 171 n.iscompr = false; 172 n.size = children; 173 174 return n; 175 } 176 177 static raxNode* NewComp(size_t length, bool hasdata) 178 { 179 size_t nodesize = raxNode.sizeof + length + (raxNode*).sizeof; 180 if (hasdata) 181 nodesize += (void*).sizeof; 182 183 raxNode* n = cast(raxNode*) malloc(nodesize); 184 if (n is null) 185 return null; 186 memset(n, 0, nodesize); 187 188 n.iskey = false; 189 n.isnull = false; 190 n.iscompr = true; 191 n.size = length; 192 193 return n; 194 } 195 196 static raxNode* Renew(raxNode* n, size_t children, bool hasdata) 197 { 198 size_t nodesize = raxNode.sizeof + children + (raxNode*).sizeof * children; 199 if (hasdata) 200 nodesize += (void*).sizeof; 201 202 auto node = cast(raxNode*) realloc(n, nodesize); 203 if (node is null) 204 return null; 205 memset(node, 0, nodesize); 206 207 node.iscompr = false; 208 return node; 209 } 210 211 static raxNode* RenewComp(raxNode* n, size_t length, bool hasdata) 212 { 213 size_t nodesize = raxNode.sizeof + length + (raxNode*).sizeof * length; 214 if (hasdata) 215 nodesize += (void*).sizeof; 216 217 auto node = cast(raxNode*) realloc(n, nodesize); 218 if (node is null) 219 return null; 220 memset(node, 0, nodesize); 221 222 node.iscompr = true; 223 return node; 224 } 225 226 static void Free(raxNode* n) 227 { 228 free(n); 229 } 230 } 231 232 struct raxItem 233 { 234 raxNode* n; 235 size_t index; 236 } 237 238 public: 239 240 struct rax 241 { 242 protected raxNode* head; 243 protected size_t numnodes; 244 size_t numele; 245 246 // 247 // api New 248 // 249 static rax* New() 250 { 251 rax* r = cast(rax*) malloc(rax.sizeof); 252 if (r is null) 253 return null; 254 memset(r, 0, rax.sizeof); 255 256 r.numele = 0; 257 r.numnodes = 1; 258 r.head = raxNode.NewComp(0, false); 259 260 if (r.head is null) 261 { 262 Free(r); 263 return null; 264 } 265 else 266 { 267 r.head.args = 0; 268 return r; 269 } 270 } 271 272 // 273 // api Free 274 // 275 static void Free(rax* r) 276 { 277 r.RecursiveFree(r.head); 278 free(r); 279 } 280 281 // 282 // api Clear 283 // 284 void Clear() 285 { 286 RecursiveFree(head); 287 288 numele = 0; 289 numnodes = 1; 290 head = raxNode.NewComp(0, false); 291 } 292 293 // 294 // api Remove 295 // 296 bool Remove(const ubyte[] s) 297 { 298 raxNode* h = head; 299 raxNode* p = head; 300 raxItem[] ts; 301 size_t index = 0; 302 size_t splitpos = 0; 303 size_t last = find(s, h, p, index, splitpos, ts); 304 305 if (last > 0) 306 { 307 log_error("remove ", cast(string) s, " ", last); 308 return false; 309 } 310 else 311 { 312 if (h.iskey) 313 { 314 numele--; 315 h.iskey = false; 316 if (h.size == 0) 317 { 318 if (p.iscompr) 319 { 320 //#1 最后一个节点为空 父节点压缩节点 且是key 则去除父节点 321 // (x) 322 // | - 'test' (x) 323 // ('test') -------------> | 324 // | () 325 // () 326 // 327 if (p.iskey) 328 { 329 h.iskey = true; 330 h.value = p.value; 331 if (p == head) 332 { 333 head = h; 334 } 335 else 336 { 337 raxItem item = ts[$ - 2]; 338 item.n.nextChild(item.index, h); 339 } 340 numnodes -= 1; 341 raxNode.Free(p); 342 log_info("#####r1"); 343 } 344 //#2 最后一个节点为空 父节点是压缩节点 不是key 父父节点必须是非压缩节点 345 // (t) 346 // | 347 // (A) 348 // | 349 // ['xyz'] 350 // / \ - 'test' 351 // (B) ('test') ----------------> 352 // | | 353 // (C) () 354 // 355 // 356 // #1 当['xy'] size == 2 357 // #1 当['xy']不是key,A为压缩节点 && 当B为压缩节点 且不是key,合并三项 358 // (t) 359 // | 360 // (A) 361 // | 362 // ['xy'] 363 // / \ - 'test' (t) 364 // (B) ('test') ----------------> | 365 // | | (A + 'x' + B) 366 // (C) () | 367 // (C) 368 // 369 // #2 当['xy']不是key,A为压缩节点 , 合并两项 370 // (t) 371 // | 372 // (A) 373 // | 374 // ['xy'] 375 // / \ - 'test' (t) 376 // (B) ('test') ----------------> | 377 // | | ( A + 'x') 378 // (C) () | 379 // (B) 380 // | 381 // (C) 382 // 383 // #3 当B为压缩节点 且不是key , 合并两项 384 // (t) 385 // | 386 // (A) 387 // | (t) 388 // ['xy'] | 389 // / \ - 'test' (A) 390 // (B) ('test') ----------------> | 391 // | | ( 'x' + B) 392 // (C) () | 393 // (C) 394 // 395 // #4 当都不能合并时 396 // (t) 397 // | 398 // (A) 399 // | (t) 400 // ['xy'] | 401 // / \ - 'test' (A) 402 // (B) ('test') ----------------> | 403 // | | ( 'x') 404 // (C) () | 405 // (B) 406 // | 407 // (C) 408 else // pp exist. & pp is non compr 409 { 410 //pp 411 if (p == head) 412 { 413 head = h; 414 numnodes -= 1; 415 log_info("#####r2"); 416 } 417 else 418 { 419 raxItem t1 = ts[$ - 2]; 420 raxNode* r1 = ts[$ - 2].n; 421 if (r1.size == 2) 422 { 423 raxNode* pp = null; 424 if (ts.length >= 3) 425 pp = ts[$ - 3].n; 426 bool ppCombine = pp && pp.iscompr && !r1.iskey; 427 raxNode* nh = r1.nextChild(r1.size - 1 - t1.index); 428 bool nhCombie = nh.iscompr && !nh.iskey; 429 430 if (ppCombine && nhCombie) 431 { 432 bool hasdata = pp.iskey && !pp.isnull; 433 raxNode* u = raxNode.NewComp(pp.size + nh.size + 1, hasdata); 434 memcpy(u.str, pp.str, pp.size); 435 memcpy(u.str + pp.size, r1.str + r1.size - 1 - t1.index, 1); 436 memcpy(u.str + pp.size + 1, nh.str, nh.size); 437 438 u.iskey = pp.iskey; 439 if (hasdata) 440 { 441 u.value = pp.value; 442 } 443 u.next(nh.next); 444 if (pp == head) 445 { 446 head = u; 447 } 448 else 449 { 450 raxItem item = ts[$ - 4]; 451 item.n.nextChild(item.index, u); 452 } 453 raxNode.Free(nh); 454 raxNode.Free(pp); 455 raxNode.Free(p); 456 raxNode.Free(h); 457 raxNode.Free(r1); 458 numnodes -= 4; 459 log_info("#####r211"); 460 } 461 else if (ppCombine) 462 { 463 bool hasdata = pp.iskey && !pp.isnull; 464 raxNode* u = raxNode.NewComp(pp.size + 1, hasdata); 465 memcpy(u.str, pp.str, pp.size); 466 memcpy(u.str + pp.size, r1.str + r1.size - 1 - t1.index, 1); 467 u.next(nh); 468 u.iskey = pp.iskey; 469 if (hasdata) 470 { 471 u.value = pp.value; 472 } 473 474 if (pp == head) 475 { 476 head = u; 477 } 478 else 479 { 480 raxItem item = ts[$ - 4]; 481 item.n.nextChild(item.index, u); 482 } 483 raxNode.Free(pp); 484 raxNode.Free(p); 485 raxNode.Free(h); 486 raxNode.Free(r1); 487 numnodes -= 3; 488 489 log_info("#####r212"); 490 } 491 else if (nhCombie) 492 { 493 bool hasdata = r1.iskey && !r1.isnull; 494 raxNode* u = raxNode.NewComp(1 + nh.size, hasdata); 495 memcpy(u.str, r1.str + r1.size - 1 - t1.index, 1); 496 memcpy(u.str + 1, nh.str, nh.size); 497 u.iskey = r1.iskey; 498 499 if (hasdata) 500 { 501 u.value = r1.value; 502 } 503 504 u.next(nh.next); 505 506 if (r1 == head) 507 { 508 head = u; 509 } 510 else 511 { 512 raxItem item = ts[$ - 3]; 513 log_info(getStr(item.n)); 514 item.n.nextChild(item.index, u); 515 } 516 raxNode.Free(nh); 517 raxNode.Free(p); 518 raxNode.Free(h); 519 raxNode.Free(r1); 520 numnodes -= 3; 521 log_info("#####r213"); 522 } 523 else 524 { 525 bool hasdata = r1.iskey && !r1.isnull; 526 raxNode* n = raxNode.NewComp(1, hasdata); 527 n.iskey = r1.iskey; 528 if (hasdata) 529 n.value = r1.value; 530 n.str[0] = r1.str[r1.size - 1 - t1.index]; 531 n.next(r1.nextChild(r1.size - 1 - t1.index)); 532 533 if (r1 == head) 534 { 535 head = n; 536 } 537 else 538 { 539 raxItem item = ts[$ - 3]; 540 item.n.nextChild(item.index, n); 541 } 542 543 raxNode.Free(h); 544 raxNode.Free(p); 545 raxNode.Free(r1); 546 numnodes -= 2; 547 log_info("#####r214"); 548 } 549 } 550 // #1 当['xyz'] 的size > 2 551 // 552 // (t) (t) 553 // | | 554 // (A) (A) 555 // | | 556 // ['xyz'] ['xz'] 557 // / \ \ - 'test' / \ 558 // (B)('test') (D) ----------------> ('B') (D) 559 // | | 560 // (C) () 561 // 562 else if (r1.size > 2) 563 { 564 bool hasdata = r1.iskey && !r1.isnull; 565 raxNode* u = raxNode.New(r1.size - 1, hasdata); 566 u.iskey = r1.iskey; 567 if (hasdata) 568 { 569 u.value = r1.value; 570 } 571 572 log_info("index ", t1.index, " ", r1.size); 573 574 if (t1.index == 0) 575 { 576 memcpy(u.str, r1.str + 1, r1.size - 1); 577 578 } 579 else if (t1.index == r1.size - 1) 580 { 581 memcpy(u.str, r1.str, r1.size - 1); 582 } 583 else 584 { 585 memcpy(u.str, r1.str, t1.index); 586 memcpy(u.str + t1.index, r1.str + t1.index + 1, 587 r1.size - t1.index - 1); 588 } 589 590 log_info(getStr(u)); 591 592 for (size_t i, j = 0; i < r1.size;) 593 { 594 if (i != t1.index) 595 u.orgin[j++] = r1.orgin[i++]; 596 else 597 i++; 598 } 599 600 //raxNode *test = null; 601 602 if (r1 == head) 603 { 604 head = u; 605 } 606 else 607 { 608 raxItem i = ts[$ - 3]; 609 610 i.n.nextChild(i.index, u); 611 612 } 613 614 raxNode.Free(r1); 615 raxNode.Free(h); 616 raxNode.Free(p); 617 618 numnodes -= 2; 619 log_info("####r22"); 620 } 621 else 622 { 623 log_error("####r23 none exist"); 624 } 625 } 626 } 627 } 628 // #3 当父节点为非压缩节点 629 // 630 // 631 // (A) 632 // | A+'y' 633 // ['xyz'] -----------> 634 // / | \ 635 // (C) () (D) 636 // 637 // 638 // 639 // #1 当['xy'] 的size == 2时 640 // 641 // 当#1 ['xy']非key,且(C)非key , 合并三项 642 // (t) 643 // | 644 // (A) 645 // | A+'y' (t) 646 // ['xy'] -----------> | 647 // / | (A + 'x' + C) 648 // (C) () | 649 // | (D) 650 // (D) 651 // 652 // 653 // 654 // 当#2 ['xy']非key , 合并两项 655 // (t) 656 // | 657 // (A) 658 // | A+'y' (t) 659 // ['xy'] -----------> | 660 // / | (A + 'x' ) 661 // (C) () | 662 // | (C) 663 // (D) | 664 // (D) 665 // 当#3 (C)非key , 合并两项 666 // (t) 667 // | (t) 668 // (A) | 669 // | A+'y' (A) 670 // ['xy'] -----------> | 671 // / | ('x' + C) 672 // (C) () | 673 // | (D) 674 // (D) 675 // 676 // 当#4 无合并 677 // 678 // (t) 679 // | (t) 680 // (A) | 681 // | A+'y' (A) 682 // ['xy'] -----------> | 683 // / | ('x') 684 // (C) () | 685 // | (C) 686 // (D) | 687 // (D) 688 else if (!p.iscompr) 689 { 690 // noncompr to compr 691 log_info("p ", getStr(p)); 692 if (p.size == 2) 693 { 694 raxNode* pp = null; 695 if (ts.length >= 2) 696 pp = ts[$ - 2].n; 697 bool ppCombine = pp && pp.iscompr && !p.iskey; 698 raxNode* nh = p.nextChild(p.size - 1 - index); 699 700 log_info("nh ", getStr(nh)); 701 bool nhCombie = nh.iscompr && !nh.iskey; 702 703 log_info(ppCombine, " ", nhCombie); 704 705 // #1 合并3个 706 if (ppCombine && nhCombie) 707 { 708 bool hasdata = pp.iskey && !pp.isnull; 709 raxNode* u = raxNode.NewComp(pp.size + nh.size + 1, hasdata); 710 memcpy(u.str, pp.str, pp.size); 711 memcpy(u.str + pp.size, p.str + p.size - 1 - index, 1); 712 memcpy(u.str + pp.size + 1, nh.str, nh.size); 713 714 u.iskey = pp.iskey; 715 if (hasdata) 716 u.value = pp.value; 717 718 u.next(nh.next); 719 if (pp == head) 720 { 721 head = u; 722 } 723 else 724 { 725 raxItem item = ts[$ - 3]; 726 item.n.nextChild(item.index, u); 727 } 728 raxNode.Free(nh); 729 raxNode.Free(pp); 730 raxNode.Free(p); 731 raxNode.Free(h); 732 733 numnodes -= 3; 734 735 log_info("#####r311"); 736 } 737 // #2 738 else if (ppCombine) 739 { 740 bool hasdata = pp.iskey && !pp.isnull; 741 raxNode* u = raxNode.NewComp(pp.size + 1, hasdata); 742 memcpy(u.str, pp.str, pp.size); 743 memcpy(u.str + pp.size, p.str + p.size - 1 - index, 1); 744 u.next(nh); 745 u.iskey = pp.iskey; 746 if (hasdata) 747 u.value = pp.value; 748 749 if (pp == head) 750 { 751 head = u; 752 } 753 else 754 { 755 raxItem item = ts[$ - 3]; 756 item.n.nextChild(item.index, u); 757 } 758 raxNode.Free(pp); 759 raxNode.Free(p); 760 raxNode.Free(h); 761 numnodes -= 2; 762 763 log_info("#####r312"); 764 } 765 else if (nhCombie) 766 { 767 bool hasdata = p.iskey && !p.isnull; 768 raxNode* u = raxNode.NewComp(1 + nh.size, hasdata); 769 memcpy(u.str, p.str + p.size - 1 - index, 1); 770 memcpy(u.str + 1, nh.str, nh.size); 771 u.iskey = p.iskey; 772 u.next(nh.next); 773 if (hasdata) 774 u.value = p.value; 775 if (p == head) 776 { 777 head = u; 778 } 779 else 780 { 781 raxItem item = ts[$ - 2]; 782 item.n.nextChild(item.index, u); 783 } 784 raxNode.Free(nh); 785 raxNode.Free(p); 786 raxNode.Free(h); 787 numnodes -= 2; 788 log_info("#####r313"); 789 } 790 // p.iskey or no combine. 791 else 792 { 793 bool hasdata = p.iskey && !p.isnull; 794 raxNode* n = raxNode.NewComp(1, hasdata); 795 n.iskey = p.iskey; 796 if (hasdata) 797 n.value = p.value; 798 n.str[0] = p.str[p.size - 1 - index]; 799 n.next(p.nextChild(p.size - 1 - index)); 800 801 if (p == head) 802 { 803 head = n; 804 } 805 else 806 { 807 raxItem item = ts[$ - 2]; 808 item.n.nextChild(item.index, n); 809 } 810 811 raxNode.Free(h); 812 raxNode.Free(p); 813 numnodes -= 1; 814 log_info("#####r314"); 815 } 816 } 817 // #2 当['xyz'] 的size > 2时 818 // (A) (A) 819 // | A+'y' | 820 // ['xyz'] -----------> ['xz'] 821 // / | \ / \ 822 // (C) () (D) (C) (D) 823 // 824 // 825 // 826 else if (p.size > 2) 827 { 828 bool hasdata = p.iskey && !p.isnull; 829 raxNode* u = raxNode.New(p.size - 1, hasdata); 830 u.iskey = p.iskey; 831 if (hasdata) 832 { 833 u.value = p.value; 834 } 835 836 log_info("index ", index, " ", p.size); 837 838 if (index == 0) 839 { 840 memcpy(u.str, p.str + 1, p.size - 1); 841 } 842 else if (index == p.size - 1) 843 { 844 memcpy(u.str, p.str, p.size - 1); 845 } 846 else 847 { 848 memcpy(u.str, p.str, index); 849 memcpy(u.str + index, p.str + index + 1, p.size - index - 1); 850 } 851 852 for (size_t i, j = 0; i < p.size;) 853 { 854 if (i != index) 855 u.orgin[j++] = p.orgin[i++]; 856 else 857 i++; 858 } 859 860 if (p == head) 861 { 862 head = u; 863 } 864 else 865 { 866 raxItem item = ts[$ - 2]; 867 item.n.nextChild(item.index, u); 868 } 869 870 raxNode.Free(h); 871 raxNode.Free(p); 872 numnodes--; 873 log_info("#####r32"); 874 } 875 } 876 } 877 // h.size > 0 878 else 879 { 880 // #4 节点是压缩节点 , 则合并 881 // (A) (A + 'test') 882 // | | 883 // ('test') - 'test' (B) 884 // | 885 // (B) -----------> 886 // 887 // 888 // #5 只是去掉一个值。 889 890 if (h.iscompr && p.iscompr) 891 { 892 bool hasdata = p.iskey && !p.isnull; 893 raxNode* u = raxNode.NewComp(p.size + h.size, hasdata); 894 u.iskey = p.iskey; 895 if (hasdata) 896 { 897 u.value = p.value; 898 } 899 900 memcpy(u.str, p.str, p.size); 901 memcpy(u.str + p.size, h.str, h.size); 902 u.next(h.next); 903 if (p == head) 904 { 905 head = u; 906 } 907 else 908 { 909 raxItem item = ts[$ - 2]; 910 item.n.nextChild(item.index, u); 911 } 912 numnodes--; 913 raxNode.Free(p); 914 raxNode.Free(h); 915 log_info("#####r4"); 916 } 917 else 918 { 919 log_info("#####r5"); 920 } 921 } 922 return true; 923 } 924 else 925 { 926 log_error(cast(string) s, " is not key ", getStr(h)); 927 return false; 928 } 929 } 930 } 931 932 // 933 // api Insert 934 // 935 bool Insert(const ubyte[] s, void* data) 936 { 937 raxNode* h = head; 938 raxNode* p = head; 939 raxItem[] ts; 940 size_t index = 0; 941 size_t splitpos = 0; 942 numele++; 943 944 size_t last = find(s, h, p, index, splitpos, ts); 945 946 log_info("find ", cast(string) s, " last ", last, " split ", splitpos, " index ", index); 947 948 //没有找到该s. 949 if (last > 0) 950 { 951 // #1 如果该树是空树. 952 // 953 // 'test' 954 // () ----------->('test') 955 // | 956 // () 957 // 958 if (p.size == 0) 959 { 960 raxNode* n = raxNode.NewComp(s.length, false); 961 memcpy(n.str, s.ptr, s.length); 962 963 p = raxNode.RenewComp(p, 0, true); 964 p.args = 0; 965 p.iskey = true; 966 p.value = data; 967 968 n.next = p; 969 head = n; 970 971 numnodes++; 972 973 log_info("####1"); 974 return true; 975 } 976 else 977 { 978 // #2 直到匹配到叶子节点,都没有匹配到,必须往该叶子节点后面加剩余的字符。 979 // 'tester' 980 // ("test") --------> ("test") 981 // | | 982 // () ("er") 983 // | 984 // () 985 if (h.size == 0) 986 { 987 //1 new comp node 988 raxNode* n = raxNode.NewComp(last, true); 989 memcpy(n.str, s[$ - last .. $].ptr, last); 990 n.iskey = true; 991 n.value = h.value; 992 993 h.value = data; 994 995 n.next = h; 996 p.nextChild(index, n); 997 998 numnodes++; 999 1000 log_info("####2"); 1001 return true; 1002 } 1003 // #3 匹配到压缩节点,1 必须截断前部分。2 取原字符及压缩节点匹配字符构成 两个字符的 非压缩节点。 1004 // 3 非压缩节点 两个子节点 分别指向 截断后半部分 及 原字符后半部分 1005 // 1006 // 'teacher' 1007 // ('test')---------------->('te') 1008 // | | 1009 // (x) ['as'] u2 1010 // / \ 1011 // u4 ('cher') ('t') u3 1012 // / \ 1013 // u5 () (x) 1014 // 1015 else if (h.iscompr) 1016 { 1017 raxNode* u1; 1018 1019 bool hasvalue = h.iskey && !h.isnull; 1020 auto u2 = raxNode.New(2, hasvalue && splitpos <= 0); 1021 u2.str[0] = s[$ - last]; 1022 u2.str[1] = h.str[splitpos]; 1023 numnodes++; 1024 1025 if (splitpos > 0) 1026 { 1027 u1 = raxNode.NewComp(splitpos, hasvalue); 1028 memcpy(u1.str, h.str, splitpos); 1029 u1.iskey = h.iskey; 1030 if (hasvalue) 1031 u1.value = h.value; 1032 numnodes++; 1033 } 1034 else 1035 { 1036 u1 = u2; 1037 u1.iskey = h.iskey; 1038 if (hasvalue) 1039 u1.value = h.value; 1040 } 1041 1042 size_t u3_len = h.size - splitpos - 1; 1043 raxNode* u3; 1044 bool bcombine = false; 1045 if (u3_len > 0) 1046 { 1047 //combin 1048 if (h.next.size > 0 && h.next.iscompr && !h.next.iskey) 1049 { 1050 u3 = raxNode.NewComp(u3_len + h.next.size, h.next.iskey && !h.next.isnull); 1051 memcpy(u3.str, h.str + splitpos + 1, h.size - splitpos - 1); 1052 memcpy(u3.str + h.size - splitpos - 1, h.next.str, h.next.size); 1053 numnodes++; 1054 bcombine = true; 1055 } 1056 else 1057 { 1058 u3 = raxNode.NewComp(h.size - splitpos - 1, false); 1059 memcpy(u3.str, h.str + splitpos + 1, h.size - splitpos - 1); 1060 numnodes++; 1061 } 1062 } 1063 else 1064 { 1065 u3 = h.next; 1066 } 1067 1068 //4 1069 size_t u4_len = last - 1; 1070 raxNode* u4; 1071 1072 //5 1073 auto u5 = raxNode.NewComp(0, true); 1074 u5.iskey = true; 1075 u5.value = data; 1076 numnodes++; 1077 1078 if (u4_len > 0) 1079 { 1080 u4 = raxNode.NewComp(last - 1, false); 1081 memcpy(u4.str, s.ptr + s.length - last + 1, last - 1); 1082 numnodes++; 1083 } 1084 else 1085 { 1086 u4 = u5; 1087 } 1088 1089 //relation 1090 if (u4_len > 0) 1091 u4.next = u5; 1092 1093 if (bcombine) 1094 { 1095 u3.next = h.next.next; 1096 raxNode.Free(h.next); 1097 numnodes--; 1098 } 1099 else if (u3_len > 0) 1100 { 1101 u3.next = h.next; 1102 } 1103 1104 u2.nextChild(0, u4); 1105 u2.nextChild(1, u3); 1106 1107 if (splitpos > 0) 1108 u1.next = u2; 1109 1110 p.nextChild(index, u1); 1111 1112 if (h == head) 1113 head = u1; 1114 1115 raxNode.Free(h); 1116 numnodes--; 1117 1118 log_info("####3"); 1119 return true; 1120 } 1121 // #4 都不匹配非压缩节点的任何子节点 1 增加该字符 2 截断原字符 1122 // 1123 // 'beer' 1124 // ["tes"] ---------> ['tesb'] 1125 // / / \ / / \ \ 1126 // () () () () () () ('eer') 1127 // \ 1128 // () 1129 else 1130 { 1131 bool hasdata = !h.isnull && h.iskey; 1132 auto i = raxNode.New(h.size + 1, hasdata); 1133 i.iskey = h.iskey; 1134 if (hasdata) 1135 { 1136 i.value = h.value; //modify 1137 } 1138 1139 numnodes++; 1140 memcpy(i.str, h.str, h.size); 1141 i.str[h.size] = s[$ - last]; 1142 memcpy(i.str + i.size, h.str + h.size, h.size * (raxNode*).sizeof); 1143 1144 auto u1_len = last - 1; 1145 raxNode* u1; 1146 1147 auto u2 = raxNode.NewComp(0, true); 1148 u2.value = data; 1149 u2.iskey = true; 1150 numnodes++; 1151 if (u1_len > 0) 1152 { 1153 u1 = raxNode.NewComp(u1_len, false); 1154 memcpy(u1.str, s.ptr + s.length - last + 1, u1_len); 1155 numnodes++; 1156 u1.next = u2; 1157 } 1158 else 1159 { 1160 u1 = u2; 1161 } 1162 1163 i.nextChild(h.size, u1); 1164 p.nextChild(index, i); 1165 1166 if (h == head) 1167 head = i; 1168 raxNode.Free(h); 1169 numnodes--; 1170 log_info("####4"); 1171 return true; 1172 } 1173 } 1174 } 1175 else 1176 { 1177 // #5 完全匹配,只要改个值 即可。 1178 // 'te' 1179 // ('te') -------------> the same 1180 // | 1181 // ['as'] 1182 // / \ 1183 // ('cher') ('t') 1184 // | | 1185 // () () 1186 if (splitpos == 0) 1187 { 1188 bool hasdata = (h.iskey && !h.isnull); 1189 if (hasdata) 1190 { 1191 h.value = data; 1192 if (h.iskey) //replaced 1193 numele--; 1194 else 1195 assert(0); 1196 1197 log_info("####50"); 1198 return false; 1199 } 1200 else 1201 { 1202 raxNode* u; 1203 if (h.iscompr) 1204 { 1205 u = raxNode.RenewComp(h, h.size, true); 1206 u.args = 0; 1207 } 1208 else 1209 { 1210 u = raxNode.Renew(h, h.size, true); 1211 } 1212 1213 u.value = data; 1214 u.iskey = true; 1215 p.nextChild(index, u); 1216 1217 log_info("####51"); 1218 return true; 1219 } 1220 } 1221 // #6 完全匹配压缩节点前半部分。 分割即可。 1222 // 'te' 1223 // ('test') ---------> ('te') 1224 // | | 1225 // (x) ('st') 1226 // | 1227 // (x) 1228 // 1229 else if (h.iscompr) 1230 { 1231 bool hasdata = (h.iskey && !h.isnull); 1232 auto u1 = raxNode.NewComp(splitpos, hasdata); 1233 memcpy(u1.str, h.str, splitpos); 1234 u1.iskey = h.iskey; 1235 if (hasdata) 1236 u1.value = h.value; 1237 numnodes++; 1238 1239 auto u2 = raxNode.NewComp(h.size - splitpos, true); 1240 memcpy(u2.str, h.str + splitpos, h.size - splitpos); 1241 u2.iskey = true; 1242 u2.value = data; 1243 numnodes++; 1244 u2.next = h.next; 1245 1246 u1.next = u2; 1247 1248 raxNode.Free(h); 1249 numnodes--; 1250 if (h == head) 1251 { 1252 head = u1; 1253 } 1254 else 1255 { 1256 p.nextChild(index, u1); 1257 } 1258 1259 log_info("####6"); 1260 return true; 1261 } 1262 else 1263 { 1264 writeln("assert"); 1265 assert(0); 1266 } 1267 } 1268 1269 } 1270 1271 // 1272 // api find 1273 // 1274 bool find(const ubyte[] s, out void* data) 1275 { 1276 raxNode* h = head; 1277 raxNode* p = head; 1278 raxItem[] ts; 1279 size_t index = 0; 1280 size_t splitpos = 0; 1281 size_t last = find(s, h, p, index, splitpos, ts); 1282 if (last == 0 && splitpos == 0 && h.iskey) 1283 { 1284 data = h.value; 1285 return true; 1286 } 1287 1288 return false; 1289 } 1290 1291 private: 1292 1293 void RecursiveFree(raxNode* n) 1294 { 1295 size_t numchildren = 0; 1296 if (n.iscompr) 1297 { 1298 numchildren = n.size > 0 ? 1 : 0; 1299 } 1300 else 1301 { 1302 numchildren = n.size; 1303 } 1304 while (numchildren--) 1305 { 1306 RecursiveFree(n.nextChild(numchildren)); 1307 } 1308 raxNode.Free(n); 1309 numnodes--; 1310 } 1311 1312 //find 1313 size_t find(const ubyte[] s, ref raxNode* r, ref raxNode* pr, ref size_t index, 1314 ref size_t splitpos, ref raxItem[] ts) 1315 { 1316 //find it 1317 1318 if (s.length == 0) 1319 { 1320 return 0; 1321 } 1322 1323 if (r.size == 0) 1324 { 1325 return s.length; 1326 } 1327 1328 if (r.iscompr) //is compr 1329 { 1330 char* p = r.str; 1331 size_t i = 0; 1332 for (i = 0; i < r.size && i < s.length; i++) 1333 { 1334 if (p[i] != s[i]) 1335 break; 1336 } 1337 1338 if (i == r.size) 1339 { 1340 pr = r; 1341 r = r.next; 1342 index = 0; 1343 raxItem item; 1344 item.n = pr; 1345 item.index = index; 1346 ts ~= item; 1347 return find(s[(*pr).size .. $], r, pr, index, splitpos, ts); 1348 } 1349 else 1350 { 1351 splitpos = i; 1352 return s.length - i; 1353 } 1354 } 1355 else 1356 { 1357 char* p = r.str; 1358 char* end = r.str + r.size; 1359 while (p != end) 1360 { 1361 if (*p == s[0]) 1362 break; 1363 p++; 1364 } 1365 1366 size_t i = p - r.str; 1367 if (p == end) 1368 { 1369 splitpos = i; 1370 return s.length; 1371 } 1372 else 1373 { 1374 pr = r; 1375 index = i; 1376 r = r.nextChild(index); 1377 raxItem item; 1378 item.n = pr; 1379 item.index = index; 1380 ts ~= item; 1381 return find(s[1 .. $], r, pr, index, splitpos, ts); 1382 } 1383 } 1384 } 1385 1386 string getStr(raxNode* h) 1387 { 1388 string str; 1389 for (size_t i = 0; i < h.size; i++) 1390 str ~= h.str[i]; 1391 return str; 1392 } 1393 1394 void Recursiveshow(raxNode* n, size_t level) 1395 { 1396 show(n, level); 1397 1398 if (n.size == 0) 1399 return; 1400 1401 if (n.iscompr) 1402 { 1403 Recursiveshow(n.next, ++level); 1404 } 1405 else 1406 { 1407 ++level; 1408 for (size_t i = 0; i < n.size; i++) 1409 { 1410 Recursiveshow(n.nextChild(i), level); 1411 } 1412 } 1413 } 1414 1415 void show(raxNode* n, size_t level) 1416 { 1417 for (size_t i = 0; i < level; i++) 1418 write("\t"); 1419 write(" key:", n.iskey, n.iscompr ? " (" : " ["); 1420 1421 for (size_t i = 0; i < n.size; i++) 1422 write(n.str[i]); 1423 1424 write(n.iscompr ? ") " : "] ", (n.iskey && !n.isnull) ? n.value : null, "\n"); 1425 } 1426 1427 void show() 1428 { 1429 raxNode* p = head; 1430 writef("numele:%d numnodes:%d\n", numele, numnodes); 1431 1432 Recursiveshow(p, 0); 1433 1434 writef("\n"); 1435 } 1436 }; 1437 1438 unittest 1439 { 1440 void test1() 1441 { 1442 string[] toadd = [ 1443 "alligator", "alien", "baloon", "chromodynamic", "romane", "romanus", 1444 "romulus", "rubens", "ruber", "rubicon", "rubicundus", "all", "rub", "ba" 1445 ]; 1446 rax* r = rax.New(); 1447 foreach (i, s; toadd) 1448 { 1449 r.Insert(cast(ubyte[]) s, cast(void*) i); 1450 } 1451 1452 foreach (s; toadd) 1453 { 1454 r.Remove(cast(ubyte[]) s); 1455 } 1456 r.show(); 1457 } 1458 1459 void test2() 1460 { 1461 string origin = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ~ "abcdefghijklmnopqrstuvwxyz" ~ "0123456789"; 1462 import std.random; 1463 1464 string[] keys; 1465 size_t num = 1000; 1466 1467 for (size_t j = 0; j < num; j++) 1468 { 1469 size_t len = uniform(1, 16); 1470 string key; 1471 for (size_t i = 0; i < len; i++) 1472 { 1473 size_t index = uniform(0, origin.length); 1474 key ~= origin[index]; 1475 } 1476 keys ~= key; 1477 } 1478 1479 rax* r = rax.New(); 1480 foreach (i, k; keys) 1481 { 1482 r.Insert(cast(ubyte[]) k, cast(void*) i); 1483 } 1484 1485 foreach (k; keys) 1486 { 1487 r.Remove(cast(ubyte[]) k); 1488 } 1489 1490 r.show(); 1491 //assert(r.numele == 0); There are still problems: inaccurate calculations of numele and numnodes. 1492 } 1493 1494 test1(); 1495 test2(); 1496 }