1 /**
2  * Fast, non-allocating string functions.
3  *
4  * Authors:
5  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
6  *
7  * Copyright:
8  *   © 2017-2023 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise), $(LINK2 mailto:etienne@cimons.com, Etienne Cimon)
9  *
10  * License:
11  *   $(LINK2 https://mit-license.org/, The MIT License (MIT))
12  */
13 module fast..string;
14 
15 import core.bitop;
16 import core.simd;
17 
18 //import core.stdc.stdlib;
19 
20 version (GNU) import gcc.attribute;
21 
22 import std.range;
23 import std.traits;
24 
25 import fast.buffer;
26 
27 /**
28  * Splits a string in two around one or more compile-time known code units.
29  *
30  * Params:
31  *   match = An expression that matches all characters around which a split should occur.
32  *   str = The string to scan.
33  *   before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
34  *   after = The part after the split is stored here. If no character in $(D match) is found, $(D null) is returned here.
35  *   splitter = If not $(D null), this pointer will receive a copy of the splitting char.
36  *
37  * Returns:
38  *   $(D true), iff a split occured.
39  */
40 bool split(string match)(scope inout(char[]) str, ref inout(char)[] before, ref inout(char)[] after, char* splitter = null)
41 {
42 	immutable pos = min(str.length, SimdMatcher!match.find(str.ptr, str.ptr + str.length));
43 	before = str[0 .. pos];
44 	if (pos < str.length)
45 	{
46 		after = str[pos + 1 .. $];
47 		if (splitter)
48 			*splitter = str[pos];
49 		return true;
50 	}
51 	after = null;
52 	return false;
53 }
54 
55 /**
56  * Similar to the overload for strings, this function works a little faster as it lacks boundary checks.
57  * It assumes that one of the characters in $(D match) is actually contained in the string.
58  *
59  * Params:
60  *   match = An expression that matches all characters around which a split should occur.
61  *   ptr = The string to scan.
62  *   before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
63  *   after = The pointer to the part after the split is stored here.
64  * 
65  * Returns:
66  *   The char that caused the split. (From $(D match).)
67  */
68 char split(string match)(scope inout(char*) ptr, ref inout(char)[] before, ref inout(char)* after)
69 {
70 	auto pos = SimdMatcher!match.find(ptr);
71 	before = ptr[0 .. pos - ptr];
72 	after = pos + 1;
73 	return *pos;
74 }
75 
76 /*******************************************************************************
77  * 
78  * Finds the first occurrence of a set of compile-time known code units in a
79  * string. While the algorithm is `O(n)` in relation to the count of given code
80  * units, the overhead when using it on short strings weights more for only 1 or
81  * 2 code units.
82  *
83  * Params:
84  *   match = An expression that matches all characters around which a split
85  *           should occur.
86  *   str = The string to search for a code unit.
87  *
88  * Returns:
89  *   If a match is found, the index into the string is returned.
90  *   Otherwise an invalid index is returned. Check with
91  *   `if (result &lt; str.length)`.
92  *
93  * See_Also:
94  *   split,
95  *   $(LINK2 http://mischasan.wordpress.com/2011/11/09/the-generic-sse2-loop/,
96  *           The Generic SSE2 Loop)
97  *
98  * Example:
99  * ---
100  * // Check if there is a '/' or '\' in the string
101  * auto pos = str.find!(`or(=/,=\)`);
102  * if (pos < str.length) { }
103  * ---
104  **************************************/
105 size_t find(string match)(in char[] str) pure nothrow
106 {
107 	return SimdMatcher!match.find(str.ptr, str.ptr + str.length);
108 }
109 
110 /*******************************************************************************
111  * 
112  * Same as the overload for strings, but with only a char*, making it faster as
113  * it cannot do a boundary check.
114  *
115  * Sometimes when looking for a character it is helpful to append it as a
116  * sentinel to the char buffer and then use this function instead of the slower
117  * one that checks the boundary constantly.
118  *
119  * Example:
120  * ---
121  * // Find a ']' in a buffer of 1024 bytes using an additional sentinel.
122  * size_t length = 1024;
123  * char[] buffer = new char[](length+1);
124  * buffer[length] = ']';
125  * auto pos = buffer.ptr.find!("=]");
126  * if (pos < length) { // was an actual find before the sentinel }
127  * ---
128  **************************************/
129 inout(char)* find(string match)(inout(char*) ptr) pure nothrow
130 {
131 	return SimdMatcher!match.find(ptr);
132 }
133 
134 bool keyword1(string key)(in char[] str,
135 scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null)
136 {
137 	auto strPtr = str.ptr;
138 	auto keyPtr = key.ptr;
139 	auto keyEnd = keyPtr + key.length;
140 
141 	while (keyPtr !is keyEnd)
142 	{
143 		while (*strPtr == '\\')
144 			if (!mismatcher(keyPtr, strPtr))
145 				return false;
146 
147 		if (*strPtr == '"' || *strPtr != *keyPtr)
148 			return false;
149 
150 		strPtr++;
151 		keyPtr++;
152 	}
153 	return true;
154 }
155 
156 size_t equalLength(scope inout(char[]) a, scope inout(char[]) b)
157 {
158 	return 0;
159 }
160 
161 /*******************************************************************************
162  * 
163  * Concatenates a series of strings.
164  *
165  * Params:
166  *   Strs = a series of string symbols or literals to be concatenated
167  *   buffer = optional buffer, implicitly allocated
168  *
169  * Returns:
170  *   A $(D TempBuffer!char) containing the concatenated string. It is kept alive
171  *   for as long as it is in scope.
172  *
173  **************************************/
174 nothrow @nogc
175 template concat(Strs...)
176 {
177 	//import core.stdc.string : memcpy;
178 	import fast.internal.helpers;
179 
180 	enum allocExpr = ctfeJoin!(Strs.length)("Strs[%s].length", "+") ~ "+1";
181 
182 	auto concat(void* buffer = (mixin(allocExpr) <= allocaLimit) ? alloca(mixin(allocExpr)) : null)
183 	{
184 		immutable length = mixin(allocExpr);
185 		auto result = TempBuffer!char(
186 			(cast(char*)(buffer is null ? malloc(length) : buffer))[0 .. length - 1],
187 			buffer is null);
188 
189 		import llvm.intrinsics;
190 
191 		char* p = result.ptr;
192 		foreach (const(char[]) str; Strs)
193 		{
194 			llvm_memcpy(p, str.ptr, str.length);
195 			p += str.length;
196 		}
197 		*p = '\0';
198 
199 		return result;
200 	}
201 }
202 
203 private:
204 bool isWhite(dchar c) @safe pure nothrow @nogc
205 {
206 	return c == ' ' || (c >= 0x09 && c <= 0x0D);
207 }
208 
209 template SimdMatcher(string match)
210 {
211 	import core.simd;
212 	import fast.internal.sysdef;
213 
214 	static if (match != strip(match))
215 	{
216 		// Reinstanciate the template with any whitespace stripped from the match string.
217 		alias SimdMatcher = SimdMatcher!(strip(match));
218 	}
219 	else
220 	{
221 		/* For SSE in DMD I am blocked by:
222 		 * https://d.puremagic.com/issues/show_bug.cgi?id=8047
223 		 * https://d.puremagic.com/issues/show_bug.cgi?id=11585
224 		 */
225 		enum isUsingSSE = hasSSE2 && (isLDC || isGDC);
226 		enum isSingleChar = match.length == 2 && match[0] == '=';
227 		static if (isSingleChar)
228 			enum singleChar = match[1];
229 		static if (isUsingSSE)
230 		{
231 			// Using MOVMSKB we get one boolean per bit in a 16-bit value.
232 			alias Word = ubyte16;
233 			alias Mask = uint;
234 			enum sparseness = 1;
235 		}
236 		else
237 		{
238 			// The fallback is to work with machine words and tricky bit-twiddling algorithms.
239 			// As a result we get machine words where matching bytes have the high bit set.
240 			alias Word = size_t;
241 			alias Mask = size_t;
242 			enum sparseness = 8;
243 		}
244 		enum matchCode = genMatchCode!isUsingSSE("*wp");
245 		// Used in generic comparison code
246 		enum lows = size_t.max / 0xFF;
247 		enum highs = lows * 0x80;
248 
249 		enum betterUseTables = (isDMD && matchCode.complexity >= 4)
250 			|| (isGDC && matchCode.complexity >= 18)
251 			|| (isLDC && matchCode.complexity >= 18);
252 
253 		static if (betterUseTables)
254 		{
255 			immutable matchTable = genMatchTable();
256 
257 			size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow @nogc
258 			{
259 				//import core.stdc.string;
260 				import fast.internal.helpers;
261 
262 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
263 				static if (isSingleChar)
264 				{
265 					return memchr(b, singleChar, e - b) - b;
266 				}
267 				else
268 				{
269 					if (b >= e)
270 						return 0;
271 
272 					size_t off = cast(size_t) b % ushort.sizeof;
273 					ushort* wp = cast(ushort*)(b - off);
274 					ushort* we = cast(ushort*) alignPtrNext(e, ushort.sizeof);
275 					if (off)
276 					{
277 						// Throw away bytes from before start of the string
278 						if (auto mask = matchTable[*wp] >> off)
279 							return bsf(mask);
280 						if (++wp is we)
281 							return size_t.max;
282 					}
283 
284 					do
285 					{
286 						if (auto mask = matchTable[*wp])
287 							return bsf(mask) + (cast(char*) wp - b);
288 					}
289 					while (++wp !is we);
290 					return size_t.max;
291 				}
292 			}
293 
294 			inout(char)* find(scope inout(char*) b) pure nothrow @nogc
295 			{
296 				//import core.stdc.string;
297 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
298 				static if (isSingleChar && singleChar == '\0')
299 				{
300 					return strlen(b) + b;
301 				}
302 				else static if (isSingleChar && isDMD)
303 				{ // DMD is better off using optimized C library code.
304 					return memchr(b, singleChar, e - b) - b;
305 				}
306 				else
307 				{
308 					size_t off = cast(size_t) b % ushort.sizeof;
309 					ushort* wp = cast(ushort*)(b - off);
310 					if (off)
311 					{
312 						// Throw away bytes from before start of the string
313 						if (auto mask = matchTable[*wp] >> off)
314 							return b + bsf(mask);
315 					}
316 
317 					do
318 					{
319 						if (auto mask = matchTable[*wp])
320 							return cast(inout(char)*) wp + bsf(mask);
321 					}
322 					while (true);
323 				}
324 			}
325 		}
326 		else
327 		{
328 			//import core.stdc.string, core.simd;
329 			import std.simd;
330 			import fast.internal.helpers;
331 
332 			version (LDC)
333 			{
334 				import ldc.gccbuiltins_x86;
335 			}
336 			else version (GNU)
337 			{
338 				import gcc.builtins;
339 			}
340 
341 			size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow
342 			{
343 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
344 				static if (isSingleChar)
345 				{
346 					return (cast(inout(char*)) memchr(b, singleChar, e - b)) - b;
347 				}
348 				else
349 				{
350 					if (b >= e)
351 						return 0;
352 
353 					size_t off = cast(size_t) b % Word.sizeof;
354 					Word* wp = cast(Word*)(b - off);
355 					Word* we = cast(Word*) alignPtrNext(e, Word.sizeof);
356 					if (off)
357 					{
358 						// Throw away bytes from before start of the string
359 						if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
360 							return bsf(mask) / sparseness;
361 						if (++wp is we)
362 							return size_t.max;
363 					}
364 
365 					do
366 					{
367 						if (auto mask = mixin(matchCode.code))
368 							return bsf(mask) / sparseness + (cast(char*) wp - b);
369 					}
370 					while (++wp !is we);
371 					return size_t.max;
372 				}
373 			}
374 
375 			inout(char)* find(scope inout(char*) b) pure nothrow
376 			{
377 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
378 				static if (isSingleChar && singleChar == '\0')
379 				{
380 					return strlen(b) + b;
381 				}
382 				else static if (isSingleChar && isDMD)
383 				{ // DMD is better off using optimized C library code.
384 					return cast(inout(char*)) memchr(b, singleChar, size_t.max);
385 				}
386 				else
387 				{
388 					size_t off = cast(size_t) b % Word.sizeof;
389 					Word* wp = cast(Word*)(b - off);
390 					if (off)
391 					{
392 						// Throw away bytes from before start of the string
393 						if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
394 							return b + bsf(mask) / sparseness;
395 						++wp;
396 					}
397 
398 					do
399 					{
400 						if (auto mask = mixin(matchCode.code))
401 							return cast(inout(char)*) wp + bsf(mask) / sparseness;
402 						++wp;
403 					}
404 					while (true);
405 				}
406 			}
407 		}
408 
409 		enum genMatchCode(bool sse)(string var)
410 		{
411 			import std.exception;
412 
413 			struct Code
414 			{
415 				string code;
416 				size_t complexity = 1;
417 			}
418 
419 			Code result;
420 			string[] nesting;
421 
422 			with (result)
423 			{
424 				for (size_t i = 0; i < match.length;)
425 				{
426 					string handleChar()
427 					{
428 						char c = match[i + 1];
429 						switch (c)
430 						{
431 						case 0:
432 							return `'\0'`;
433 						case '\\':
434 							return `'\\'`;
435 						case "'"[0]:
436 							return `'\''`;
437 						case '\t':
438 							return `'\t'`;
439 						case '\r':
440 							return `'\r'`;
441 						case '\n':
442 							return `'\n'`;
443 						default:
444 							return `'` ~ c ~ `'`;
445 						}
446 					}
447 
448 					if (match[i] == '=')
449 					{
450 						static if (sse)
451 						{
452 							code ~= "maskEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
453 						}
454 						else if (match[i + 1] == 0)
455 						{
456 							code ~= "" ~ var ~ " - lows & ~" ~ var;
457 						}
458 						else
459 						{
460 							code ~= "(" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows & ~(" ~ var ~ " ^ lows * " ~ handleChar() ~ ")";
461 						}
462 						i += 2;
463 					}
464 					else if (match[i] == '!')
465 					{
466 						static if (sse)
467 						{
468 							code ~= "maskNotEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
469 						}
470 						else if (match[i + 1] == 0)
471 						{
472 							code ~= "(~(" ~ var ~ " - lows) | " ~ var ~ ")";
473 						}
474 						else
475 						{
476 							code ~= "(~((" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows) | (" ~ var ~ " ^ lows * " ~ handleChar() ~ "))";
477 						}
478 						i += 2;
479 					}
480 					else if (match[i] == '<')
481 					{
482 						static if (sse)
483 							code ~= "maskGreater(SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "), " ~ var ~ ")";
484 						else
485 							code ~= "maskLessGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
486 						i += 2;
487 					}
488 					else if (match[i] == '>')
489 					{
490 						static if (sse)
491 							code ~= "maskGreater(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
492 						else
493 							code ~= "maskGreaterGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
494 						i += 2;
495 					}
496 					else if (match[i .. $].startsWith("or("))
497 					{
498 						static if (sse)
499 						{
500 							nesting ~= ", ";
501 							code ~= "or(";
502 						}
503 						else
504 						{
505 							nesting ~= " | ";
506 						}
507 						complexity++;
508 						i += 3;
509 					}
510 					else if (match[i .. $].startsWith("and("))
511 					{
512 						static if (sse)
513 						{
514 							nesting ~= ", ";
515 							code ~= "and(";
516 						}
517 						else
518 						{
519 							nesting ~= " & ";
520 						}
521 						complexity++;
522 						i += 4;
523 					}
524 					else if (match[i] == ',')
525 					{
526 						enforce(nesting.length, "',' on top level");
527 						code ~= nesting[$ - 1];
528 						i++;
529 					}
530 					else if (match[i] == ')')
531 					{
532 						enforce(nesting.length, "Unbalanced closing parenthesis");
533 						nesting.length--;
534 						static if (sse)
535 						{
536 							code ~= ")";
537 						}
538 						i++;
539 					}
540 					else if (match[i])
541 					{
542 						i++;
543 					}
544 					else
545 					{
546 						throw new Exception(format("Unexpected character at index %s: 0x%02x", i, match[i]));
547 					}
548 				}
549 				static if (sse)
550 				{
551 					code = "__builtin_ia32_pmovmskb128(" ~ code ~ ")";
552 				}
553 				else
554 				{
555 					code = "(" ~ code ~ ") & highs";
556 				}
557 			}
558 			return result;
559 		}
560 
561 		enum genMatchTable()
562 		{
563 			ubyte[1 << 16] table;
564 			ubyte[256] lut;
565 			foreach (uint i; 0 .. 256)
566 			{
567 				lut[i] = (mixin(genMatchCode!false("i").code) >> 7) & 1;
568 			}
569 			foreach (i; 0 .. 256)
570 				foreach (k; 0 .. 256)
571 				{
572 					table[i * 256 + k] = cast(ubyte)(lut[i] << 1 | lut[k]);
573 				}
574 			return table;
575 		}
576 	}
577 }
578 
579 /**
580  * Template for searching a fixed value in a word sized memory block (i.e. 1, 2, 4 or 8 bytes).
581  *
582  * Params:
583  *   value = The value you are looking for.
584  *   word = The data word to search for the value.
585  *
586  * Returns:
587  *   non-zero, iff the value is contained in the data word.
588  *   Specifically it returns 0x80 for every byte of the word that was a match and 0x00 for others.
589  *
590  * See_Also:
591  *   http://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
592  */
593 T maskEqualGeneric(ubyte value, T)(T word) @safe pure nothrow
594 if (isUnsigned!T)
595 {
596 	// This value results in 0x01 for each byte of a T value.
597 	enum lows = T.max / 0xFF;
598 	static if (value == 0)
599 	{
600 		enum highs = lows * 0x80;
601 		return (word - lows) & ~word & highs;
602 	}
603 	else
604 	{
605 		enum xor = lows * value;
606 		return maskEqualGeneric!0(word ^ xor);
607 	}
608 }
609 
610 T maskLessGeneric(ubyte value, T)(T word) @safe pure nothrow
611 if (isUnsigned!T && value <= 128)
612 {
613 	enum lows = T.max / 0xFF;
614 	enum highs = lows * 0x80;
615 	return (word - lows * value) & ~word & highs;
616 }
617 
618 T maskGreaterGeneric(ubyte value, T)(T word) @safe pure nothrow
619 if (isUnsigned!T && value <= 127)
620 {
621 	enum lows = T.max / 0xFF;
622 	enum highs = lows * 0x80;
623 	return (word + lows * (127 - value) | word) & highs;
624 }
625 
626 T orGeneric(T)(T a, T b) @safe pure nothrow
627 if (isUnsigned!T)
628 {
629 	return a | b;
630 }