1 /*******************************************************************************
2  * 
3  * Functions for formatting data into strings and back.
4  * 
5  * Authors:
6  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7  * 
8  * Copyright:
9  *   © 2017-2023 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise), $(LINK2 mailto:etienne@cimons.com, Etienne Cimon)
10  * 
11  * License:
12  *   $(LINK2 https://mit-license.org/, The MIT License (MIT))
13  * 
14  **************************************/
15 module fast.format;
16 
17 //import core.stdc.stdlib;
18 //import core.stdc.string;
19 import core.bitop;
20 import std.traits;
21 import memutils.ct;
22 import std.typetuple;
23 import fast.internal.helpers;
24 
25 /+
26  ╔══════════════════════════════════════════════════════════════════════════════
27  ║ ⚑ Hex String
28  ╚══════════════════════════════════════════════════════════════════════════════
29  +/
30 
31 /**
32  * Converts an unsigned type into a fixed width 8 digits hex string using lower-case letters.
33  * 
34  * Params:
35  *   n = the number to convert
36  * 
37  * Returns:
38  *   hexadecimal representation of $(D n), lower-case letters
39  */
40 @safe pure nothrow @nogc
41 char[2 * U.sizeof] hexStrLower(U)(Unqual!U n) if (isUnsigned!U)
42 {
43 	char[2 * U.sizeof] hex = void;
44 	foreach_reverse (i; 0 .. 2 * U.sizeof)
45 	{
46 		U d = n & U(0xF);
47 		hex[i] = cast(char)(d < 10 ? '0' + d : 'a' + d - 10);
48 		n >>= 4;
49 	}
50 	return hex;
51 }
52 
53 /**
54  * Converts an unsigned type into a fixed width 8 digits hex string using upper-case letters.
55  * 
56  * Params:
57  *   n = the number to convert
58  * 
59  * Returns:
60  *   hexadecimal representation of $(D n), upper-case letters
61  */
62 @safe pure nothrow @nogc
63 char[2 * U.sizeof] hexStrUpper(U)(U n) if (isUnsigned!U)
64 {
65 	char[2 * U.sizeof] hex = void;
66 	foreach_reverse (i; 0 .. 2 * U.sizeof)
67 	{
68 		U d = n & U(0xF);
69 		hex[i] = cast(char)(d < 10 ? '0' + d : 'A' + d - 10);
70 		n >>= 4;
71 	}
72 	return hex;
73 }
74 
75 /+
76  ╔══════════════════════════════════════════════════════════════════════════════
77  ║ ⚑ Decimal String
78  ╚══════════════════════════════════════════════════════════════════════════════
79  +/
80 
81 template decDigits(T) if (isIntegral!T)
82 {
83 	static if (is(T == ulong))
84 		enum decDigits = 20;
85 	else static if (is(T == long))
86 		enum decDigits = 19;
87 	else static if (is(T == uint) || is(T == int))
88 		enum decDigits = 10;
89 	else static if (is(T == ushort) || is(T == short))
90 		enum decDigits = 5;
91 	else static if (is(T == ubyte) || is(T == byte))
92 		enum decDigits = 3;
93 }
94 
95 template decDigits(T) if (isFloatingPoint!T)
96 {
97 	static if (is(T == float))
98 		enum decDigits = 9;
99 	else static if (is(T == double))
100 		enum decDigits = 18;
101 }
102 
103 enum decChars(T) = decDigits!T + isSigned!T;
104 
105 @safe pure nothrow size_t decCharsVal(T)(T v)
106 		if (isIntegral!T && !isFloatingPoint!T)
107 {
108 	ulong maxsize = 10;
109 	size_t digits = 1;
110 	if (v < 0)
111 	{
112 		digits = 2;
113 		v *= -1;
114 	}
115 
116 	// calculate left of the decimal
117 	while (digits < decChars!T)
118 	{
119 		if (v < maxsize)
120 		{
121 			return digits;
122 		}
123 		maxsize *= 10;
124 		digits++;
125 	}
126 	return decChars!T;
127 }
128 
129 @safe nothrow size_t decCharsVal(T)(T v) if (isFloatingPoint!T)
130 {
131 	/*int maxsize = 10;
132 	uint u = cast(uint) (v < 0 ? -v : v);
133 	T dec = (v < 0 ? (-v) : (v)) - u;
134 	short digits = 1;
135 	if (dec != 0)
136 		digits++;
137 	if (v < 0)
138 		digits++;
139 	
140 
141 	// calculate left of the decimal
142 	while (digits < decChars!uint) {
143 		if (u < maxsize) {
144 			break;
145 		}
146 		maxsize *= 10;
147 		digits++;
148 	}
149 
150 	// calc decimals
151 	while (digits < decChars!T && dec > 0) {
152 		dec *= 10;
153 		uint val = cast(uint) dec;
154 		dec -= val;
155 		if (dec == 0.) {
156 			return digits;
157 		}
158 		digits++;
159 	}
160 
161 	return digits;*/
162 	return decStr(v).length;
163 }
164 
165 @safe pure nothrow @nogc
166 RevFillStr!(decChars!I) decStr(I)(I i) if (isFloatingPoint!I)
167 {
168 	RevFillStr!(decChars!I) str;
169 
170 	bool signed = i < 0;
171 	uint u = cast(uint)(i < 0 ? -i : i);
172 
173 	I dec = (i < 0 ? (-i) : (i)) - u;
174 
175 	short digits = signed ? 2 : 1;
176 	int maxsize = 10;
177 	// calculate left of the decimal
178 	while (digits < decChars!uint)
179 	{
180 		if (u < maxsize)
181 		{
182 			break;
183 		}
184 		maxsize *= 10;
185 		digits++;
186 	}
187 
188 	char[decDigits!I - 3] decimals = void;
189 	foreach (ref d; decimals)
190 		d = 0;
191 	if (dec != 0)
192 	{
193 		int j;
194 		do
195 		{
196 			dec *= 10;
197 			uint val = cast(uint) dec;
198 			decimals[j++] = char('0' + val % 10);
199 			dec -= val;
200 		}
201 		while (dec > 0 && j < decimals.length - digits);
202 		bool found_num;
203 		foreach_reverse (d; decimals)
204 		{
205 			if (d > 0 && d > '0' && d <= '9' && !found_num)
206 				found_num = true;
207 
208 			if (found_num)
209 				str ~= d;
210 
211 		}
212 		str ~= '.';
213 	}
214 
215 	do
216 	{
217 		str ~= char('0' + u % 10);
218 		u /= 10;
219 	}
220 	while (u > 0);
221 
222 	static if (isSigned!I)
223 		if (signed)
224 			str ~= '-';
225 
226 	return str;
227 }
228 
229 @safe pure nothrow @nogc
230 RevFillStr!(decChars!I) decStr(I)(I i) if (isIntegral!I)
231 {
232 	RevFillStr!(decChars!I) str;
233 
234 	static if (isSigned!I)
235 	{
236 		bool signed = i < 0;
237 		UnsignedOf!I u = i < 0 ? -i : i;
238 	}
239 	else
240 		alias u = i;
241 
242 	do
243 	{
244 		str ~= char('0' + u % 10);
245 		u /= 10;
246 	}
247 	while (u);
248 
249 	static if (isSigned!I)
250 		if (signed)
251 			str ~= '-';
252 
253 	return str;
254 }
255 
256 /+
257  ╔══════════════════════════════════════════════════════════════════════════════
258  ║ ⚑ Formatting
259  ╚══════════════════════════════════════════════════════════════════════════════
260  +/
261 
262 template hasKnownSpaceRequirement(T)
263 {
264 	static if (isIntegral!T || isPointer!T)
265 		enum hasKnownSpaceRequirement = true;
266 	else
267 		enum hasKnownSpaceRequirement = false;
268 }
269 
270 template spaceRequirement(string format, T) if (hasKnownSpaceRequirement!T)
271 {
272 	static if (isIntegral!T)
273 	{
274 		static if (format == "%s" || format == "%d" || format == "%S")
275 			enum spaceRequirement = decChars!T;
276 		else static if (isUnsigned!T && (format == "%x" || format == "%X"))
277 			enum spaceRequirement = 2 * T.sizeof;
278 		else
279 			static assert(0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format);
280 	}
281 	else static if (isPointer!T)
282 	{
283 		static if (format == "%s" || format == "%p" || format == "%S")
284 			enum spaceRequirement = 2 * T.sizeof;
285 		else
286 			static assert(0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format);
287 	}
288 	else
289 		static assert(0, "Don't know how to handle " ~ T.stringof);
290 }
291 
292 enum spaceRequirements(string format, Args...)()
293 			if (allSatisfy!(hasKnownSpaceRequirement, Args))
294 {
295 	size_t sum = 0;
296 
297 	alias parts = tokenizedFormatString!format;
298 	foreach (i; staticIota!(0, parts.length))
299 	{
300 		static if (parts[i][1] == size_t.max)
301 			sum += parts[i][0].length;
302 		else
303 			sum += spaceRequirement!(parts[i][0], Args[parts[i][1]]);
304 	}
305 
306 	return sum;
307 }
308 
309 ptrdiff_t indexOf(T)(T s, string arr) pure nothrow
310 {
311 	ptrdiff_t i;
312 	foreach (const c2; s)
313 	{
314 		foreach (immutable c1; arr)
315 		{
316 			if (c1 == c2)
317 				return i;
318 		}
319 		++i;
320 	}
321 	return -1;
322 }
323 
324 ptrdiff_t indexOf(T)(T s, char c) pure nothrow
325 {
326 	immutable c1 = c;
327 
328 	ptrdiff_t i;
329 	foreach (const c2; s)
330 	{
331 		if (c1 == c2)
332 			return i;
333 		++i;
334 	}
335 	return -1;
336 }
337 
338 template tokenizedFormatString(string fmt)
339 {
340 	enum impl()
341 	{
342 		Tuple!(string, size_t)[8] parts;
343 		size_t i = 0;
344 		size_t j = 0;
345 		string rest = fmt;
346 		while (1)
347 		{
348 			ptrdiff_t markerPos = indexOf(rest, '%');
349 			if (markerPos < 0)
350 			{
351 				if (rest.length)
352 					parts[j++] = tuple(rest, size_t.max);
353 				return parts;
354 			}
355 			if (markerPos)
356 			{
357 				parts[j++] = tuple(rest[0 .. markerPos], size_t.max);
358 				rest = rest[markerPos .. $];
359 			}
360 
361 			// TODO: more complex formats
362 			parts[j++] = tuple(rest[0 .. 2], i++);
363 			rest = rest[2 .. $];
364 		}
365 
366 	}
367 
368 	enum result = impl();
369 	static immutable Tuple!(string, size_t)[result.length] tokenizedFormatString = result;
370 }
371 
372 char[] formattedWrite(string format, Args...)(char* buffer, Args args)
373 {
374 	import ldc.intrinsics;
375 
376 	char* it = buffer;
377 
378 	alias parts = tokenizedFormatString!format;
379 	foreach (i; staticIota!(0, parts.length))
380 	{
381 		static if (parts[i][0] != null && parts[i][1] == size_t.max)
382 		{
383 			// Direct string copy
384 			if (__ctfe)
385 			{
386 				it[0 .. parts[i][0].length] = parts[i][0].ptr[0 .. parts[i][0].length];
387 			}
388 			else
389 				llvm_memcpy(it, parts[i][0].ptr, parts[i][0].length);
390 			it += parts[i][0].length;
391 		}
392 		else static if (parts[i][0] != null)
393 		{
394 			// Formatted argument
395 			formattedWriteItem!(parts[i][0])(it, args[parts[i][1]]);
396 		}
397 	}
398 
399 	return buffer[0 .. it - buffer];
400 }
401 
402 pure nothrow
403 void formattedWriteItem(string format, T)(ref char* buffer, T t)
404 		if (isUnsigned!T && format == "%x")
405 {
406 	alias RT = ReturnType!(hexStrLower!T);
407 	*cast(RT*) buffer = hexStrLower!T(t);
408 	buffer += RT.length;
409 }
410 
411 pure nothrow
412 void formattedWriteItem(string format, T)(ref char* buffer, T t)
413 		if (isUnsigned!T && format == "%X")
414 {
415 	alias RT = ReturnType!(hexStrUpper!T);
416 	*cast(RT*) buffer = hexStrUpper!T(t);
417 	buffer += RT.length;
418 }
419 
420 nothrow
421 void formattedWriteItem(string format, T)(ref char* buffer, T t)
422 		if (format == "%s" || format == "%d" || format == "%f" || format == "%S")
423 {
424 	import ldc.intrinsics;
425 
426 	static if (isIntegral!T || isFloatingPoint!T)
427 		auto str = decStr(t);
428 	else static if (isSomeChar!T)
429 		auto str = t;
430 	else
431 		auto str = t.ptr[0 .. t.length];
432 
433 	static if (is(typeof(str) : char) || isSomeChar!T)
434 	{
435 		if (__ctfe)
436 		{
437 			assert(__ctfe);
438 			buffer[0] = str;
439 		}
440 		else
441 			llvm_memcpy(buffer, &str, char.sizeof);
442 		buffer += char.sizeof;
443 	}
444 	else static if (format == "%S")
445 	{
446 		ptrdiff_t escape_idx = str.indexOf("\"\t\r\n\\\b\0");
447 		auto str_ptr = str.ptr;
448 		size_t remaining = str.length;
449 		while (escape_idx > -1)
450 		{
451 			if (__ctfe)
452 			{
453 				assert(__ctfe);
454 				buffer[0 .. escape_idx] = str_ptr[0 .. escape_idx];
455 
456 			}
457 			else
458 				llvm_memcpy(buffer, str_ptr, escape_idx);
459 			buffer += escape_idx;
460 			str_ptr += escape_idx;
461 			remaining -= escape_idx;
462 			char c = *str_ptr;
463 			if (c == '\t')
464 			{
465 				str_ptr++;
466 				escape_idx++;
467 				remaining--;
468 				*(buffer++) = '\\';
469 				*(buffer++) = 't';
470 			}
471 			else if (c == '\b')
472 			{
473 				str_ptr++;
474 				escape_idx++;
475 				remaining--;
476 				*(buffer++) = '\\';
477 				*(buffer++) = 'b';
478 			}
479 			else if (c == '\n')
480 			{
481 				str_ptr++;
482 				escape_idx++;
483 				remaining--;
484 				*(buffer++) = '\\';
485 				*(buffer++) = 'n';
486 			}
487 			else if (c == '\r')
488 			{
489 				str_ptr++;
490 				escape_idx++;
491 				remaining--;
492 				*(buffer++) = '\\';
493 				*(buffer++) = 'r';
494 			}
495 			else if (c == '"')
496 			{
497 				str_ptr++;
498 				escape_idx++;
499 				remaining--;
500 				*(buffer++) = '\\';
501 				*(buffer++) = '"';
502 			}
503 			else if (c == '\\')
504 			{
505 				str_ptr++;
506 				escape_idx++;
507 				remaining--;
508 				*(buffer++) = '\\';
509 				*(buffer++) = '\\';
510 			}
511 			else if (c == 0x00)
512 			{
513 				str_ptr++;
514 				escape_idx++;
515 				remaining--;
516 				*(buffer++) = '?';
517 			}
518 
519 			escape_idx = indexOf(str_ptr[0 .. remaining], "\"\t\r\n\\\b\0");
520 		}
521 		if (__ctfe)
522 		{
523 			buffer[0 .. remaining] = str_ptr[0 .. remaining];
524 		}
525 		else
526 			llvm_memcpy(buffer, str_ptr, remaining);
527 		buffer += remaining;
528 	}
529 	else
530 	{
531 		auto str_ptr = str.ptr;
532 		size_t remaining = str.length;
533 		if (__ctfe)
534 		{
535 			buffer[0 .. remaining] = str_ptr[0 .. remaining];
536 		}
537 		else
538 			llvm_memcpy(buffer, str_ptr, remaining);
539 		buffer += remaining;
540 	}
541 }
542 
543 nothrow size_t escapedLength(string str)
544 {
545 	ptrdiff_t escape_idx = str.indexOf("\"\t\r\n\\\b\0");
546 	auto str_ptr = str.ptr;
547 	size_t sz = str.length;
548 	size_t remaining = str.length;
549 	while (escape_idx > -1)
550 	{
551 		remaining -= escape_idx;
552 		if (remaining > 0)
553 			remaining--;
554 		str_ptr += escape_idx;
555 		escape_idx = indexOf(str_ptr[0 .. remaining], "\"\t\r\n\\\b\0");
556 		sz++;
557 	}
558 	return sz;
559 }
560 
561 pure nothrow
562 void formattedWriteItem(string format)(ref char* buffer, void* p)
563 		if (format == "%s" || format == "%p" || format == "%S")
564 {
565 	buffer.formattedWriteItem!"%X"(cast(size_t) p);
566 }
567 
568 /+
569  ╔══════════════════════════════════════════════════════════════════════════════
570  ║ ⚑ Helper Structs
571  ╚══════════════════════════════════════════════════════════════════════════════
572  +/
573 
574 struct RevFillStr(size_t n)
575 {
576 private:
577 
578 	size_t offset = n;
579 	char[n] buffer = '\0';
580 
581 public:
582 
583 	alias opSlice this;
584 
585 	@safe pure nothrow @nogc
586 	void opOpAssign(string op : "~")(char ch)
587 	in
588 	{
589 		assert(offset > 0);
590 	}
591 	body
592 	{
593 		buffer[--offset] = ch;
594 	}
595 
596 	@safe pure nothrow @nogc
597 	@property inout(char)[] opSlice() inout
598 	{
599 		return buffer[offset .. n];
600 	}
601 
602 	@safe pure nothrow @nogc
603 	@property inout(char)* ptr() inout
604 	{
605 		return &buffer[offset];
606 	}
607 
608 	@safe pure nothrow @nogc
609 	@property size_t length() const
610 	{
611 		return n - offset;
612 	}
613 }
614 
615 bool isValidDchar(dchar c) pure nothrow @safe @nogc
616 {
617 	return c < 0xD800 || (c > 0xDFFF && c <= 0x10FFFF);
618 }