memutils.dictionarylist source code

1 /**
2     Defines a string based dictionary list with conserved insertion order.
3 
4     Copyright: © 2012-2014 RejectedSoftware e.K.
5     License: Subject to the terms of the MIT license, as written in the included LICENSE file.
6     Authors: Sönke Ludwig
7 */
8 module memutils.dictionarylist;
9 
10 import memutils.helpers;
11 import memutils.allocators;
12 import memutils.refcounted;
13 import memutils.utils;
14 import memutils.vector;
15 
16 nothrow:
17 @trusted:
18 alias DictionaryListRef(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) = RefCounted!(DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS), ALLOC);
19 
20 /**
21  * 
22     Behaves similar to $(D VALUE[string]) but the insertion order is not changed
23     and multiple values per key are supported.
24     
25     Note that despite case not being relevant for matching keys, iterating
26     over the list will yield the original case of the key that was put in.
27 
28     Insertion and lookup has O(n) complexity.
29 */
30 struct DictionaryList(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) {
31 	nothrow:
32 	@trusted:
33 	@disable this(this);
34 
35 	import memutils.ct;
36 	
37 	private {
38 		static struct Field { uint keyCheckSum; KEY key; VALUE value; }
39 		Field[NUM_STATIC_FIELDS] m_fields;
40 		size_t m_fieldCount;
41 		Field[] m_extendedFields;
42 		size_t m_extendedFieldCount;
43 	}
44 
45 	~this() {
46 		if (m_extendedFields) {
47 			auto sz = m_extendedFields.length;
48 			freeArray!(Field, ALLOC)(m_extendedFields.ptr[0 .. m_extendedFieldCount], sz);
49 		}
50 	}
51 	
52 	alias KeyType = KEY;
53 	alias ValueType = VALUE;
54 	
55 	struct FieldTuple { KeyType key; ValueType value; }
56 
57 	@property bool empty() const { return length == 0; }
58 
59 	/** The number of fields present in the map.
60     */
61 	@property size_t length() const { return m_fieldCount + m_extendedFields.length; }
62 	
63 	/** Removes the first field that matches the given key.
64     */
65 	void remove(KeyType key)
66 	{
67 		auto keysum = computeCheckSumI(key);
68 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
69 		if( idx >= 0 ){
70 			auto slice = m_fields[0 .. m_fieldCount];
71 			removeFromArrayIdx(slice, idx);
72 			m_fieldCount--;
73 		} else {
74 			idx = getIndex(m_extendedFields, key, keysum);
75 			if (idx < 0) return;
76 			removeFromArrayIdx(m_extendedFields, idx);
77 		}
78 	}
79 	
80 	/** Removes all fields that matches the given key.
81     */
82 	void removeAll(KeyType key)
83 	{
84 		auto keysum = computeCheckSumI(key);
85 		for (size_t i = 0; i < m_fieldCount;) {
86 			if (m_fields[i].keyCheckSum == keysum && matches(m_fields[i].key, key)) {
87 				auto slice = m_fields[0 .. m_fieldCount];
88 				removeFromArrayIdx(slice, i);
89 				m_fieldCount--;
90 			} else i++;
91 		}
92 		
93 		for (size_t i = 0; i < m_extendedFields.length;) {
94 			if (m_extendedFields[i].keyCheckSum == keysum && matches(m_extendedFields[i].key, key))
95 				removeFromArrayIdx(m_extendedFields, i);
96 			else i++;
97 		}
98 	}
99 	
100 	/** Adds a new field to the map.
101 
102         The new field will be added regardless of any existing fields that
103         have the same key, possibly resulting in duplicates. Use opIndexAssign
104         if you want to avoid duplicates.
105     */
106 	void insert()(auto const ref KeyType key, ValueType value)
107 	{
108 		auto keysum = computeCheckSumI(key);
109 		if (m_fieldCount < m_fields.length) {
110 			m_fields[m_fieldCount++] = Field(keysum, *cast(KeyType*) &key, value);
111 		}
112 		else {
113 			grow(1);
114 			m_extendedFields[$-1] = Field(keysum, *cast(KeyType*) &key, value);
115 		}
116 	}
117 	
118 	/** Returns the first field that matches the given key.
119 
120         If no field is found, def_val is returned.
121     */
122 	ValueType get(KeyType key, ValueType def_val = ValueType.init) {
123 		if (auto pv = key in this) return *pv;
124 		return def_val;
125 	}
126 	
127 	const(ValueType) get(in KeyType key, const(ValueType) def_val = const(ValueType).init) const
128 	{
129 		if (auto pv = key in this) return *pv;
130 		return def_val;
131 	}
132 	
133 	/** Returns all values matching the given key.
134 
135         Note that the version returning an array will allocate using the same allocator for each call.
136     */
137 	Vector!(ValueType, ALLOC) getValuesAt()(auto const ref KeyType key)
138 	const {
139 		auto ret = Vector!(ValueType, ALLOC)(0);
140 		this.opApply( (k, const ref v) {
141 				// static if (is(ValueType == string)) logTrace("Checking field: ", v);
142 				//logTrace("Looping ", k, " => ", v);
143 				if (matches(key, k)) {
144 					//logTrace("Appending: ", v);
145 					ret ~= v;
146 				}
147 				return 0;
148 			});
149 		//logTrace("Finished getValuesAt with: ", ret[]);
150 		return ret.move();
151 	}
152 	
153 	/// ditto
154 	void getValuesAt(in KeyType key, scope void delegate(const(ValueType)) nothrow del)
155 	const {
156 		uint keysum = computeCheckSumI(key);
157 		foreach (ref f; m_fields[0 .. m_fieldCount]) {
158 			if (f == Field.init) continue;
159 			if (f.keyCheckSum != keysum) continue;
160 			if (matches(f.key, key)) del(f.value);
161 		}
162 		foreach (ref f; m_extendedFields) {
163 			if (f.keyCheckSum != keysum) continue;
164 			if (matches(f.key, key)) del(f.value);
165 		}
166 	}
167 	/** Returns the first value matching the given key.
168     */
169 	inout(ValueType) opIndex(KeyType key)
170 	inout {
171 		auto pitm = key in this;
172 		if (pitm is null) return "?";
173 		return *pitm;
174 	}
175 	
176 	/** Adds or replaces the given field with a new value.
177     */
178 	ValueType opIndexAssign(ValueType val, KeyType key)
179 	{
180 		auto pitm = key in this;
181 		if( pitm ) *pitm = val;
182 		else if( m_fieldCount < m_fields.length ) m_fields[m_fieldCount++] = Field(computeCheckSumI(key), key, val);
183 		else {
184 			grow(1);
185 			m_extendedFields[$-1] = Field(computeCheckSumI(key), key, val);
186 		}
187 		return val;
188 	}
189 	
190 	/** Returns a pointer to the first field that matches the given key.
191     */
192 	inout(ValueType)* opBinaryRight(string op)(in KeyType key) inout if(op == "in") {
193 		return find(key);
194 	}
195 	
196 	inout(ValueType)* find(in KeyType key) inout 
197 	{
198 		uint keysum = computeCheckSumI(key);
199 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
200 		if( idx >= 0 ) return &m_fields[idx].value;
201 		idx = getIndex(m_extendedFields, key, keysum);
202 		if( idx >= 0 ) return &m_extendedFields[idx].value;
203 		return null;
204 	}
205 	
206 	/// ditto
207 	bool opBinaryRight(string op)(KeyType key) inout if(op == "!in") {
208 		return !(key in this);
209 	}
210 	
211 	/** Iterates over all fields, including duplicates.
212     */
213 	int opApply(int delegate(KeyType key, ref ValueType val) nothrow del)
214 	{
215 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
216 			if (kv == Field.init) return 0;
217 			if (auto ret = del(kv.key, kv.value))
218 				return ret;
219 		}
220 		foreach (ref kv; m_extendedFields) {
221 			if (auto ret = del(kv.key, kv.value))
222 				return ret;
223 		}
224 		return 0;
225 	}
226 	
227 	int opApply(int delegate(const ref KeyType key, const ref ValueType val) nothrow del) const
228 	{
229 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
230 			if (kv == Field.init) return 0;
231 			if (auto ret = del(kv.key, kv.value))
232 				return ret;
233 		}
234 		foreach (ref kv; m_extendedFields) {
235 			if (auto ret = del(kv.key, kv.value))
236 				return ret;
237 		}
238 		return 0;
239 	}
240 	
241 	/// ditto
242 	int opApply(int delegate(ref ValueType val) nothrow del)
243 	{
244 		return this.opApply((KeyType key, ref ValueType val) { return del(val); });
245 	}
246 	
247 	/// ditto
248 	int opApply(int delegate(KeyType key, ref const(ValueType) val) del) const
249 	{
250 		return (cast() this).opApply(cast(int delegate(KeyType, ref ValueType) nothrow) del);
251 	}
252 	
253 	/// ditto
254 	int opApply(int delegate(ref const(ValueType) val) del) const
255 	{
256 		return (cast() this).opApply(cast(int delegate(ref ValueType) nothrow) del);
257 	}
258 
259 	bool opEquals(const ref DictionaryList!(KEY, VALUE, ALLOC) other) const
260 	{
261 		foreach (const ref KeyType key, const ref ValueType val; this)
262 		{
263 			bool found;
264 			other.getValuesAt(key, (const ValueType oval) {
265 					if (oval == val) {
266 						found = true;
267 						return;
268 					}
269 				});
270 			if (!found)
271 				return false;
272 		}
273 		
274 		return true;
275 	}
276 
277 	private void grow(size_t n) {
278 		if (m_extendedFields.length + n < m_extendedFieldCount) {
279 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFields.length + n];
280 			return;
281 		}
282 		if (m_extendedFields.length > 0)
283 		{
284 			size_t oldsz = m_extendedFields.length;
285 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFieldCount];
286 			m_extendedFieldCount = (m_extendedFieldCount + n)*3/2;
287 			m_extendedFields = reallocArray!(Field, ALLOC)(m_extendedFields, m_extendedFieldCount)[0 .. oldsz + n];
288 			memset(m_extendedFields.ptr + oldsz, 0, (m_extendedFieldCount-oldsz)*Field.sizeof);
289 		}
290 		else {
291 			m_extendedFieldCount = 16;
292 			m_extendedFields = allocArray!(Field, ALLOC)(16).ptr[0 .. n];
293 			memset(m_extendedFields.ptr, 0, m_extendedFieldCount*Field.sizeof);
294 		}
295 	}
296 
297 	private ptrdiff_t getIndex(in Field[] map, in KeyType key, uint keysum)
298 	const {
299 		foreach (i, ref const(Field) entry; map) {
300 			if (entry.keyCheckSum != keysum) continue;
301 			if (matches(entry.key, key)) return i;
302 		}
303 		return -1;
304 	}
305 	
306 	private static bool matches(in KeyType a, in KeyType b)
307 	{
308 		static if (case_sensitive) return a == b;
309 		else static if (is (KeyType == string)) return icmp2(a, b) == 0;
310 		else return a == b;
311 	}
312 	
313 	// very simple check sum function with a good chance to match
314 	// strings with different case equal
315 	private static uint computeCheckSumI(string s)
316 	@trusted {
317 		uint csum = 0;
318 		immutable(char)* pc = s.ptr, pe = s.ptr + s.length;
319 		for (; pc != pe; pc++) {
320 			static if (case_sensitive) csum ^= *pc;
321 			else csum ^= *pc & 0x1101_1111;
322 			csum = (csum << 1) | (csum >> 31);
323 		}
324 		return csum;
325 	}
326 	
327 	private static uint computeCheckSumI(T)(ref T obj)
328 	@trusted {
329 		return cast(uint)hashOf(obj, 0);
330 	}
331 }
332 
333 private:
334 nothrow:
335 void removeFromArrayIdx(T)(ref T[] array, size_t idx)
336 {
337 	foreach( j; idx+1 .. array.length) { 
338 		array[j-1] = array[j];
339 	}
340 	array[array.length-1].destructRecurse();
341 	array = array.ptr[0 .. array.length-1];
342 }
343 /* =================== Decode ======================= */
344 
345 /***************
346  * Decodes and returns character starting at s[idx]. idx is advanced past the
347  * decoded character. If the character is not well formed, a UtfException is
348  * thrown and idx remains unchanged.
349  */
350  @safe @nogc pure nothrow
351 bool isValidDchar(dchar c)
352 {
353     /* Note: FFFE and FFFF are specifically permitted by the
354      * Unicode standard for application internal use, but are not
355      * allowed for interchange.
356      * (thanks to Arcane Jill)
357      */
358 
359     return c < 0xD800 ||
360         (c > 0xDFFF && c <= 0x10FFFF /*&& c != 0xFFFE && c != 0xFFFF*/);
361 }
362 
363 @safe pure nothrow
364 dchar decode(const scope char[] s, ref size_t idx)
365     in
366     {
367         assert(idx >= 0 && idx < s.length);
368     }
369     out (result)
370     {
371         assert(isValidDchar(result));
372     }
373     do
374     {
375         size_t len = s.length;
376         dchar V;
377         size_t i = idx;
378         char u = s[i];
379 
380         if (u & 0x80)
381         {   uint n;
382             char u2;
383 
384             /* The following encodings are valid, except for the 5 and 6 byte
385              * combinations:
386              *  0xxxxxxx
387              *  110xxxxx 10xxxxxx
388              *  1110xxxx 10xxxxxx 10xxxxxx
389              *  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
390              *  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
391              *  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
392              */
393             for (n = 1; ; n++)
394             {
395                 if (n > 4)
396                     goto Lerr;          // only do the first 4 of 6 encodings
397                 if (((u << n) & 0x80) == 0)
398                 {
399                     if (n == 1)
400                         goto Lerr;
401                     break;
402                 }
403             }
404 
405             // Pick off (7 - n) significant bits of B from first byte of octet
406             V = cast(dchar)(u & ((1 << (7 - n)) - 1));
407 
408             if (i + (n - 1) >= len)
409                 goto Lerr;                      // off end of string
410 
411             /* The following combinations are overlong, and illegal:
412              *  1100000x (10xxxxxx)
413              *  11100000 100xxxxx (10xxxxxx)
414              *  11110000 1000xxxx (10xxxxxx 10xxxxxx)
415              *  11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
416              *  11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
417              */
418             u2 = s[i + 1];
419             if ((u & 0xFE) == 0xC0 ||
420                 (u == 0xE0 && (u2 & 0xE0) == 0x80) ||
421                 (u == 0xF0 && (u2 & 0xF0) == 0x80) ||
422                 (u == 0xF8 && (u2 & 0xF8) == 0x80) ||
423                 (u == 0xFC && (u2 & 0xFC) == 0x80))
424                 goto Lerr;                      // overlong combination
425 
426             for (uint j = 1; j != n; j++)
427             {
428                 u = s[i + j];
429                 if ((u & 0xC0) != 0x80)
430                     goto Lerr;                  // trailing bytes are 10xxxxxx
431                 V = (V << 6) | (u & 0x3F);
432             }
433             if (!isValidDchar(V))
434                 goto Lerr;
435             i += n;
436         }
437         else
438         {
439             V = cast(dchar) u;
440             i++;
441         }
442 
443         idx = i;
444         return V;
445 
446       Lerr:
447       return '?';
448     //return V; // dummy return
449     }
450 	
451 /// Special version of icmp() with optimization for ASCII characters
452 int icmp2(in string a, in string b)
453 @safe pure {
454 	size_t i = 0, j = 0;
455 	
456 	// fast skip equal prefix
457 	size_t min_len = min(a.length, b.length);
458 	while ( i < min_len && a[i] == b[i] ) i++;
459 	if( i > 0 && (a[i-1] & 0x80) ) i--; // don't stop half-way in a UTF-8 sequence
460 	j = i;
461 	
462 	// compare the differing character and the rest of the string
463 	while (i < a.length && j < b.length){
464 		uint ac = cast(uint)a[i];
465 		uint bc = cast(uint)b[j];
466 		if( !((ac | bc) & 0x80) ){
467 			i++;
468 			j++;
469 			if( ac >= 'A' && ac <= 'Z' ) ac += 'a' - 'A';
470 			if( bc >= 'A' && bc <= 'Z' ) bc += 'a' - 'A';
471 			if( ac < bc ) return -1;
472 			else if( ac > bc ) return 1;
473 		} else {
474 			dchar acp = decode(a, i);
475 			dchar bcp = decode(b, j);
476 			if( acp != bcp ){					
477 				if(acp >= 'A' && acp <= 'Z')
478 				{
479 					acp += ('a' - 'A');
480 				}
481 				if(bcp >= 'A' && bcp <= 'Z')
482 				{
483 					bcp += ('a' - 'A');
484 				}
485 				if( acp < bcp ) return -1;
486 				else if( acp > bcp ) return 1;
487 			}
488 		}
489 	}
490 	
491 	if( i < a.length ) return 1;
492 	else if( j < b.length ) return -1;
493 	
494 	assert(i == a.length || j == b.length, "Strings equal but we didn't fully compare them!?");
495 	return 0;
496 }