1 /** 2 * Fast, non-allocating string functions. 3 * 4 * Authors: 5 * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 6 * 7 * Copyright: 8 * © 2017-2023 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise), $(LINK2 mailto:etienne@cimons.com, Etienne Cimon) 9 * 10 * License: 11 * $(LINK2 https://mit-license.org/, The MIT License (MIT)) 12 */ 13 module fast..string; 14 15 import core.bitop; 16 import core.simd; 17 18 //import core.stdc.stdlib; 19 20 version (GNU) import gcc.attribute; 21 22 import std.range; 23 import std.traits; 24 25 import fast.buffer; 26 27 /** 28 * Splits a string in two around one or more compile-time known code units. 29 * 30 * Params: 31 * match = An expression that matches all characters around which a split should occur. 32 * str = The string to scan. 33 * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here. 34 * after = The part after the split is stored here. If no character in $(D match) is found, $(D null) is returned here. 35 * splitter = If not $(D null), this pointer will receive a copy of the splitting char. 36 * 37 * Returns: 38 * $(D true), iff a split occured. 39 */ 40 bool split(string match)(scope inout(char[]) str, ref inout(char)[] before, ref inout(char)[] after, char* splitter = null) 41 { 42 immutable pos = min(str.length, SimdMatcher!match.find(str.ptr, str.ptr + str.length)); 43 before = str[0 .. pos]; 44 if (pos < str.length) 45 { 46 after = str[pos + 1 .. $]; 47 if (splitter) 48 *splitter = str[pos]; 49 return true; 50 } 51 after = null; 52 return false; 53 } 54 55 /** 56 * Similar to the overload for strings, this function works a little faster as it lacks boundary checks. 57 * It assumes that one of the characters in $(D match) is actually contained in the string. 58 * 59 * Params: 60 * match = An expression that matches all characters around which a split should occur. 61 * ptr = The string to scan. 62 * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here. 63 * after = The pointer to the part after the split is stored here. 64 * 65 * Returns: 66 * The char that caused the split. (From $(D match).) 67 */ 68 char split(string match)(scope inout(char*) ptr, ref inout(char)[] before, ref inout(char)* after) 69 { 70 auto pos = SimdMatcher!match.find(ptr); 71 before = ptr[0 .. pos - ptr]; 72 after = pos + 1; 73 return *pos; 74 } 75 76 /******************************************************************************* 77 * 78 * Finds the first occurrence of a set of compile-time known code units in a 79 * string. While the algorithm is `O(n)` in relation to the count of given code 80 * units, the overhead when using it on short strings weights more for only 1 or 81 * 2 code units. 82 * 83 * Params: 84 * match = An expression that matches all characters around which a split 85 * should occur. 86 * str = The string to search for a code unit. 87 * 88 * Returns: 89 * If a match is found, the index into the string is returned. 90 * Otherwise an invalid index is returned. Check with 91 * `if (result < str.length)`. 92 * 93 * See_Also: 94 * split, 95 * $(LINK2 http://mischasan.wordpress.com/2011/11/09/the-generic-sse2-loop/, 96 * The Generic SSE2 Loop) 97 * 98 * Example: 99 * --- 100 * // Check if there is a '/' or '\' in the string 101 * auto pos = str.find!(`or(=/,=\)`); 102 * if (pos < str.length) { } 103 * --- 104 **************************************/ 105 size_t find(string match)(in char[] str) pure nothrow 106 { 107 return SimdMatcher!match.find(str.ptr, str.ptr + str.length); 108 } 109 110 /******************************************************************************* 111 * 112 * Same as the overload for strings, but with only a char*, making it faster as 113 * it cannot do a boundary check. 114 * 115 * Sometimes when looking for a character it is helpful to append it as a 116 * sentinel to the char buffer and then use this function instead of the slower 117 * one that checks the boundary constantly. 118 * 119 * Example: 120 * --- 121 * // Find a ']' in a buffer of 1024 bytes using an additional sentinel. 122 * size_t length = 1024; 123 * char[] buffer = new char[](length+1); 124 * buffer[length] = ']'; 125 * auto pos = buffer.ptr.find!("=]"); 126 * if (pos < length) { // was an actual find before the sentinel } 127 * --- 128 **************************************/ 129 inout(char)* find(string match)(inout(char*) ptr) pure nothrow 130 { 131 return SimdMatcher!match.find(ptr); 132 } 133 134 bool keyword1(string key)(in char[] str, 135 scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null) 136 { 137 auto strPtr = str.ptr; 138 auto keyPtr = key.ptr; 139 auto keyEnd = keyPtr + key.length; 140 141 while (keyPtr !is keyEnd) 142 { 143 while (*strPtr == '\\') 144 if (!mismatcher(keyPtr, strPtr)) 145 return false; 146 147 if (*strPtr == '"' || *strPtr != *keyPtr) 148 return false; 149 150 strPtr++; 151 keyPtr++; 152 } 153 return true; 154 } 155 156 size_t equalLength(scope inout(char[]) a, scope inout(char[]) b) 157 { 158 return 0; 159 } 160 161 /******************************************************************************* 162 * 163 * Concatenates a series of strings. 164 * 165 * Params: 166 * Strs = a series of string symbols or literals to be concatenated 167 * buffer = optional buffer, implicitly allocated 168 * 169 * Returns: 170 * A $(D TempBuffer!char) containing the concatenated string. It is kept alive 171 * for as long as it is in scope. 172 * 173 **************************************/ 174 nothrow @nogc 175 template concat(Strs...) 176 { 177 //import core.stdc.string : memcpy; 178 import fast.internal.helpers; 179 180 enum allocExpr = ctfeJoin!(Strs.length)("Strs[%s].length", "+") ~ "+1"; 181 182 auto concat(void* buffer = (mixin(allocExpr) <= allocaLimit) ? alloca(mixin(allocExpr)) : null) 183 { 184 immutable length = mixin(allocExpr); 185 auto result = TempBuffer!char( 186 (cast(char*)(buffer is null ? malloc(length) : buffer))[0 .. length - 1], 187 buffer is null); 188 189 import llvm.intrinsics; 190 191 char* p = result.ptr; 192 foreach (const(char[]) str; Strs) 193 { 194 llvm_memcpy(p, str.ptr, str.length); 195 p += str.length; 196 } 197 *p = '\0'; 198 199 return result; 200 } 201 } 202 203 private: 204 bool isWhite(dchar c) @safe pure nothrow @nogc 205 { 206 return c == ' ' || (c >= 0x09 && c <= 0x0D); 207 } 208 209 template SimdMatcher(string match) 210 { 211 import core.simd; 212 import fast.internal.sysdef; 213 214 static if (match != strip(match)) 215 { 216 // Reinstanciate the template with any whitespace stripped from the match string. 217 alias SimdMatcher = SimdMatcher!(strip(match)); 218 } 219 else 220 { 221 /* For SSE in DMD I am blocked by: 222 * https://d.puremagic.com/issues/show_bug.cgi?id=8047 223 * https://d.puremagic.com/issues/show_bug.cgi?id=11585 224 */ 225 enum isUsingSSE = hasSSE2 && (isLDC || isGDC); 226 enum isSingleChar = match.length == 2 && match[0] == '='; 227 static if (isSingleChar) 228 enum singleChar = match[1]; 229 static if (isUsingSSE) 230 { 231 // Using MOVMSKB we get one boolean per bit in a 16-bit value. 232 alias Word = ubyte16; 233 alias Mask = uint; 234 enum sparseness = 1; 235 } 236 else 237 { 238 // The fallback is to work with machine words and tricky bit-twiddling algorithms. 239 // As a result we get machine words where matching bytes have the high bit set. 240 alias Word = size_t; 241 alias Mask = size_t; 242 enum sparseness = 8; 243 } 244 enum matchCode = genMatchCode!isUsingSSE("*wp"); 245 // Used in generic comparison code 246 enum lows = size_t.max / 0xFF; 247 enum highs = lows * 0x80; 248 249 enum betterUseTables = (isDMD && matchCode.complexity >= 4) 250 || (isGDC && matchCode.complexity >= 18) 251 || (isLDC && matchCode.complexity >= 18); 252 253 static if (betterUseTables) 254 { 255 immutable matchTable = genMatchTable(); 256 257 size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow @nogc 258 { 259 //import core.stdc.string; 260 import fast.internal.helpers; 261 262 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 263 static if (isSingleChar) 264 { 265 return memchr(b, singleChar, e - b) - b; 266 } 267 else 268 { 269 if (b >= e) 270 return 0; 271 272 size_t off = cast(size_t) b % ushort.sizeof; 273 ushort* wp = cast(ushort*)(b - off); 274 ushort* we = cast(ushort*) alignPtrNext(e, ushort.sizeof); 275 if (off) 276 { 277 // Throw away bytes from before start of the string 278 if (auto mask = matchTable[*wp] >> off) 279 return bsf(mask); 280 if (++wp is we) 281 return size_t.max; 282 } 283 284 do 285 { 286 if (auto mask = matchTable[*wp]) 287 return bsf(mask) + (cast(char*) wp - b); 288 } 289 while (++wp !is we); 290 return size_t.max; 291 } 292 } 293 294 inout(char)* find(scope inout(char*) b) pure nothrow @nogc 295 { 296 //import core.stdc.string; 297 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 298 static if (isSingleChar && singleChar == '\0') 299 { 300 return strlen(b) + b; 301 } 302 else static if (isSingleChar && isDMD) 303 { // DMD is better off using optimized C library code. 304 return memchr(b, singleChar, e - b) - b; 305 } 306 else 307 { 308 size_t off = cast(size_t) b % ushort.sizeof; 309 ushort* wp = cast(ushort*)(b - off); 310 if (off) 311 { 312 // Throw away bytes from before start of the string 313 if (auto mask = matchTable[*wp] >> off) 314 return b + bsf(mask); 315 } 316 317 do 318 { 319 if (auto mask = matchTable[*wp]) 320 return cast(inout(char)*) wp + bsf(mask); 321 } 322 while (true); 323 } 324 } 325 } 326 else 327 { 328 //import core.stdc.string, core.simd; 329 import std.simd; 330 import fast.internal.helpers; 331 332 version (LDC) 333 { 334 import ldc.gccbuiltins_x86; 335 } 336 else version (GNU) 337 { 338 import gcc.builtins; 339 } 340 341 size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow 342 { 343 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 344 static if (isSingleChar) 345 { 346 return (cast(inout(char*)) memchr(b, singleChar, e - b)) - b; 347 } 348 else 349 { 350 if (b >= e) 351 return 0; 352 353 size_t off = cast(size_t) b % Word.sizeof; 354 Word* wp = cast(Word*)(b - off); 355 Word* we = cast(Word*) alignPtrNext(e, Word.sizeof); 356 if (off) 357 { 358 // Throw away bytes from before start of the string 359 if (auto mask = (mixin(matchCode.code)) >> (off * sparseness)) 360 return bsf(mask) / sparseness; 361 if (++wp is we) 362 return size_t.max; 363 } 364 365 do 366 { 367 if (auto mask = mixin(matchCode.code)) 368 return bsf(mask) / sparseness + (cast(char*) wp - b); 369 } 370 while (++wp !is we); 371 return size_t.max; 372 } 373 } 374 375 inout(char)* find(scope inout(char*) b) pure nothrow 376 { 377 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 378 static if (isSingleChar && singleChar == '\0') 379 { 380 return strlen(b) + b; 381 } 382 else static if (isSingleChar && isDMD) 383 { // DMD is better off using optimized C library code. 384 return cast(inout(char*)) memchr(b, singleChar, size_t.max); 385 } 386 else 387 { 388 size_t off = cast(size_t) b % Word.sizeof; 389 Word* wp = cast(Word*)(b - off); 390 if (off) 391 { 392 // Throw away bytes from before start of the string 393 if (auto mask = (mixin(matchCode.code)) >> (off * sparseness)) 394 return b + bsf(mask) / sparseness; 395 ++wp; 396 } 397 398 do 399 { 400 if (auto mask = mixin(matchCode.code)) 401 return cast(inout(char)*) wp + bsf(mask) / sparseness; 402 ++wp; 403 } 404 while (true); 405 } 406 } 407 } 408 409 enum genMatchCode(bool sse)(string var) 410 { 411 import std.exception; 412 413 struct Code 414 { 415 string code; 416 size_t complexity = 1; 417 } 418 419 Code result; 420 string[] nesting; 421 422 with (result) 423 { 424 for (size_t i = 0; i < match.length;) 425 { 426 string handleChar() 427 { 428 char c = match[i + 1]; 429 switch (c) 430 { 431 case 0: 432 return `'\0'`; 433 case '\\': 434 return `'\\'`; 435 case "'"[0]: 436 return `'\''`; 437 case '\t': 438 return `'\t'`; 439 case '\r': 440 return `'\r'`; 441 case '\n': 442 return `'\n'`; 443 default: 444 return `'` ~ c ~ `'`; 445 } 446 } 447 448 if (match[i] == '=') 449 { 450 static if (sse) 451 { 452 code ~= "maskEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 453 } 454 else if (match[i + 1] == 0) 455 { 456 code ~= "" ~ var ~ " - lows & ~" ~ var; 457 } 458 else 459 { 460 code ~= "(" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows & ~(" ~ var ~ " ^ lows * " ~ handleChar() ~ ")"; 461 } 462 i += 2; 463 } 464 else if (match[i] == '!') 465 { 466 static if (sse) 467 { 468 code ~= "maskNotEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 469 } 470 else if (match[i + 1] == 0) 471 { 472 code ~= "(~(" ~ var ~ " - lows) | " ~ var ~ ")"; 473 } 474 else 475 { 476 code ~= "(~((" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows) | (" ~ var ~ " ^ lows * " ~ handleChar() ~ "))"; 477 } 478 i += 2; 479 } 480 else if (match[i] == '<') 481 { 482 static if (sse) 483 code ~= "maskGreater(SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "), " ~ var ~ ")"; 484 else 485 code ~= "maskLessGeneric!" ~ handleChar() ~ "(" ~ var ~ ")"; 486 i += 2; 487 } 488 else if (match[i] == '>') 489 { 490 static if (sse) 491 code ~= "maskGreater(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 492 else 493 code ~= "maskGreaterGeneric!" ~ handleChar() ~ "(" ~ var ~ ")"; 494 i += 2; 495 } 496 else if (match[i .. $].startsWith("or(")) 497 { 498 static if (sse) 499 { 500 nesting ~= ", "; 501 code ~= "or("; 502 } 503 else 504 { 505 nesting ~= " | "; 506 } 507 complexity++; 508 i += 3; 509 } 510 else if (match[i .. $].startsWith("and(")) 511 { 512 static if (sse) 513 { 514 nesting ~= ", "; 515 code ~= "and("; 516 } 517 else 518 { 519 nesting ~= " & "; 520 } 521 complexity++; 522 i += 4; 523 } 524 else if (match[i] == ',') 525 { 526 enforce(nesting.length, "',' on top level"); 527 code ~= nesting[$ - 1]; 528 i++; 529 } 530 else if (match[i] == ')') 531 { 532 enforce(nesting.length, "Unbalanced closing parenthesis"); 533 nesting.length--; 534 static if (sse) 535 { 536 code ~= ")"; 537 } 538 i++; 539 } 540 else if (match[i]) 541 { 542 i++; 543 } 544 else 545 { 546 throw new Exception(format("Unexpected character at index %s: 0x%02x", i, match[i])); 547 } 548 } 549 static if (sse) 550 { 551 code = "__builtin_ia32_pmovmskb128(" ~ code ~ ")"; 552 } 553 else 554 { 555 code = "(" ~ code ~ ") & highs"; 556 } 557 } 558 return result; 559 } 560 561 enum genMatchTable() 562 { 563 ubyte[1 << 16] table; 564 ubyte[256] lut; 565 foreach (uint i; 0 .. 256) 566 { 567 lut[i] = (mixin(genMatchCode!false("i").code) >> 7) & 1; 568 } 569 foreach (i; 0 .. 256) 570 foreach (k; 0 .. 256) 571 { 572 table[i * 256 + k] = cast(ubyte)(lut[i] << 1 | lut[k]); 573 } 574 return table; 575 } 576 } 577 } 578 579 /** 580 * Template for searching a fixed value in a word sized memory block (i.e. 1, 2, 4 or 8 bytes). 581 * 582 * Params: 583 * value = The value you are looking for. 584 * word = The data word to search for the value. 585 * 586 * Returns: 587 * non-zero, iff the value is contained in the data word. 588 * Specifically it returns 0x80 for every byte of the word that was a match and 0x00 for others. 589 * 590 * See_Also: 591 * http://graphics.stanford.edu/~seander/bithacks.html#ValueInWord 592 */ 593 T maskEqualGeneric(ubyte value, T)(T word) @safe pure nothrow 594 if (isUnsigned!T) 595 { 596 // This value results in 0x01 for each byte of a T value. 597 enum lows = T.max / 0xFF; 598 static if (value == 0) 599 { 600 enum highs = lows * 0x80; 601 return (word - lows) & ~word & highs; 602 } 603 else 604 { 605 enum xor = lows * value; 606 return maskEqualGeneric!0(word ^ xor); 607 } 608 } 609 610 T maskLessGeneric(ubyte value, T)(T word) @safe pure nothrow 611 if (isUnsigned!T && value <= 128) 612 { 613 enum lows = T.max / 0xFF; 614 enum highs = lows * 0x80; 615 return (word - lows * value) & ~word & highs; 616 } 617 618 T maskGreaterGeneric(ubyte value, T)(T word) @safe pure nothrow 619 if (isUnsigned!T && value <= 127) 620 { 621 enum lows = T.max / 0xFF; 622 enum highs = lows * 0x80; 623 return (word + lows * (127 - value) | word) & highs; 624 } 625 626 T orGeneric(T)(T a, T b) @safe pure nothrow 627 if (isUnsigned!T) 628 { 629 return a | b; 630 }