1 /** 2 Defines a string based dictionary list with conserved insertion order. 3 4 Copyright: © 2012-2014 RejectedSoftware e.K. 5 License: Subject to the terms of the MIT license, as written in the included LICENSE file. 6 Authors: Sönke Ludwig 7 */ 8 module memutils.dictionarylist; 9 10 import memutils.helpers; 11 import memutils.allocators; 12 import memutils.refcounted; 13 import memutils.utils; 14 import memutils.vector; 15 16 nothrow: 17 @trusted: 18 alias DictionaryListRef(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) = RefCounted!(DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS), ALLOC); 19 20 /** 21 * 22 Behaves similar to $(D VALUE[string]) but the insertion order is not changed 23 and multiple values per key are supported. 24 25 Note that despite case not being relevant for matching keys, iterating 26 over the list will yield the original case of the key that was put in. 27 28 Insertion and lookup has O(n) complexity. 29 */ 30 struct DictionaryList(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) { 31 nothrow: 32 @trusted: 33 @disable this(this); 34 35 import memutils.ct; 36 37 private { 38 static struct Field { uint keyCheckSum; KEY key; VALUE value; } 39 Field[NUM_STATIC_FIELDS] m_fields; 40 size_t m_fieldCount; 41 Field[] m_extendedFields; 42 size_t m_extendedFieldCount; 43 } 44 45 ~this() { 46 if (m_extendedFields) { 47 auto sz = m_extendedFields.length; 48 freeArray!(Field, ALLOC)(m_extendedFields.ptr[0 .. m_extendedFieldCount], sz); 49 } 50 } 51 52 alias KeyType = KEY; 53 alias ValueType = VALUE; 54 55 struct FieldTuple { KeyType key; ValueType value; } 56 57 @property bool empty() const { return length == 0; } 58 59 /** The number of fields present in the map. 60 */ 61 @property size_t length() const { return m_fieldCount + m_extendedFields.length; } 62 63 /** Removes the first field that matches the given key. 64 */ 65 void remove(KeyType key) 66 { 67 auto keysum = computeCheckSumI(key); 68 auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum); 69 if( idx >= 0 ){ 70 auto slice = m_fields[0 .. m_fieldCount]; 71 removeFromArrayIdx(slice, idx); 72 m_fieldCount--; 73 } else { 74 idx = getIndex(m_extendedFields, key, keysum); 75 if (idx < 0) return; 76 removeFromArrayIdx(m_extendedFields, idx); 77 } 78 } 79 80 /** Removes all fields that matches the given key. 81 */ 82 void removeAll(KeyType key) 83 { 84 auto keysum = computeCheckSumI(key); 85 for (size_t i = 0; i < m_fieldCount;) { 86 if (m_fields[i].keyCheckSum == keysum && matches(m_fields[i].key, key)) { 87 auto slice = m_fields[0 .. m_fieldCount]; 88 removeFromArrayIdx(slice, i); 89 m_fieldCount--; 90 } else i++; 91 } 92 93 for (size_t i = 0; i < m_extendedFields.length;) { 94 if (m_extendedFields[i].keyCheckSum == keysum && matches(m_extendedFields[i].key, key)) 95 removeFromArrayIdx(m_extendedFields, i); 96 else i++; 97 } 98 } 99 100 /** Adds a new field to the map. 101 102 The new field will be added regardless of any existing fields that 103 have the same key, possibly resulting in duplicates. Use opIndexAssign 104 if you want to avoid duplicates. 105 */ 106 void insert()(auto const ref KeyType key, ValueType value) 107 { 108 auto keysum = computeCheckSumI(key); 109 if (m_fieldCount < m_fields.length) { 110 m_fields[m_fieldCount++] = Field(keysum, *cast(KeyType*) &key, value); 111 } 112 else { 113 grow(1); 114 m_extendedFields[$-1] = Field(keysum, *cast(KeyType*) &key, value); 115 } 116 } 117 118 /** Returns the first field that matches the given key. 119 120 If no field is found, def_val is returned. 121 */ 122 ValueType get(KeyType key, ValueType def_val = ValueType.init) { 123 if (auto pv = key in this) return *pv; 124 return def_val; 125 } 126 127 const(ValueType) get(in KeyType key, const(ValueType) def_val = const(ValueType).init) const 128 { 129 if (auto pv = key in this) return *pv; 130 return def_val; 131 } 132 133 /** Returns all values matching the given key. 134 135 Note that the version returning an array will allocate using the same allocator for each call. 136 */ 137 Vector!(ValueType, ALLOC) getValuesAt()(auto const ref KeyType key) 138 const { 139 auto ret = Vector!(ValueType, ALLOC)(0); 140 this.opApply( (k, const ref v) { 141 // static if (is(ValueType == string)) logTrace("Checking field: ", v); 142 //logTrace("Looping ", k, " => ", v); 143 if (matches(key, k)) { 144 //logTrace("Appending: ", v); 145 ret ~= v; 146 } 147 return 0; 148 }); 149 //logTrace("Finished getValuesAt with: ", ret[]); 150 return ret.move(); 151 } 152 153 /// ditto 154 void getValuesAt(in KeyType key, scope void delegate(const(ValueType)) nothrow del) 155 const { 156 uint keysum = computeCheckSumI(key); 157 foreach (ref f; m_fields[0 .. m_fieldCount]) { 158 if (f == Field.init) continue; 159 if (f.keyCheckSum != keysum) continue; 160 if (matches(f.key, key)) del(f.value); 161 } 162 foreach (ref f; m_extendedFields) { 163 if (f.keyCheckSum != keysum) continue; 164 if (matches(f.key, key)) del(f.value); 165 } 166 } 167 /** Returns the first value matching the given key. 168 */ 169 inout(ValueType) opIndex(KeyType key) 170 inout { 171 auto pitm = key in this; 172 if (pitm is null) return "?"; 173 return *pitm; 174 } 175 176 /** Adds or replaces the given field with a new value. 177 */ 178 ValueType opIndexAssign(ValueType val, KeyType key) 179 { 180 auto pitm = key in this; 181 if( pitm ) *pitm = val; 182 else if( m_fieldCount < m_fields.length ) m_fields[m_fieldCount++] = Field(computeCheckSumI(key), key, val); 183 else { 184 grow(1); 185 m_extendedFields[$-1] = Field(computeCheckSumI(key), key, val); 186 } 187 return val; 188 } 189 190 /** Returns a pointer to the first field that matches the given key. 191 */ 192 inout(ValueType)* opBinaryRight(string op)(in KeyType key) inout if(op == "in") { 193 return find(key); 194 } 195 196 inout(ValueType)* find(in KeyType key) inout 197 { 198 uint keysum = computeCheckSumI(key); 199 auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum); 200 if( idx >= 0 ) return &m_fields[idx].value; 201 idx = getIndex(m_extendedFields, key, keysum); 202 if( idx >= 0 ) return &m_extendedFields[idx].value; 203 return null; 204 } 205 206 /// ditto 207 bool opBinaryRight(string op)(KeyType key) inout if(op == "!in") { 208 return !(key in this); 209 } 210 211 /** Iterates over all fields, including duplicates. 212 */ 213 int opApply(int delegate(KeyType key, ref ValueType val) nothrow del) 214 { 215 foreach (ref kv; m_fields[0 .. m_fieldCount]) { 216 if (kv == Field.init) return 0; 217 if (auto ret = del(kv.key, kv.value)) 218 return ret; 219 } 220 foreach (ref kv; m_extendedFields) { 221 if (auto ret = del(kv.key, kv.value)) 222 return ret; 223 } 224 return 0; 225 } 226 227 int opApply(int delegate(const ref KeyType key, const ref ValueType val) nothrow del) const 228 { 229 foreach (ref kv; m_fields[0 .. m_fieldCount]) { 230 if (kv == Field.init) return 0; 231 if (auto ret = del(kv.key, kv.value)) 232 return ret; 233 } 234 foreach (ref kv; m_extendedFields) { 235 if (auto ret = del(kv.key, kv.value)) 236 return ret; 237 } 238 return 0; 239 } 240 241 /// ditto 242 int opApply(int delegate(ref ValueType val) nothrow del) 243 { 244 return this.opApply((KeyType key, ref ValueType val) { return del(val); }); 245 } 246 247 /// ditto 248 int opApply(int delegate(KeyType key, ref const(ValueType) val) del) const 249 { 250 return (cast() this).opApply(cast(int delegate(KeyType, ref ValueType) nothrow) del); 251 } 252 253 /// ditto 254 int opApply(int delegate(ref const(ValueType) val) del) const 255 { 256 return (cast() this).opApply(cast(int delegate(ref ValueType) nothrow) del); 257 } 258 259 bool opEquals(const ref DictionaryList!(KEY, VALUE, ALLOC) other) const 260 { 261 foreach (const ref KeyType key, const ref ValueType val; this) 262 { 263 bool found; 264 other.getValuesAt(key, (const ValueType oval) { 265 if (oval == val) { 266 found = true; 267 return; 268 } 269 }); 270 if (!found) 271 return false; 272 } 273 274 return true; 275 } 276 277 private void grow(size_t n) { 278 if (m_extendedFields.length + n < m_extendedFieldCount) { 279 m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFields.length + n]; 280 return; 281 } 282 if (m_extendedFields.length > 0) 283 { 284 size_t oldsz = m_extendedFields.length; 285 m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFieldCount]; 286 m_extendedFieldCount = (m_extendedFieldCount + n)*3/2; 287 m_extendedFields = reallocArray!(Field, ALLOC)(m_extendedFields, m_extendedFieldCount)[0 .. oldsz + n]; 288 memset(m_extendedFields.ptr + oldsz, 0, (m_extendedFieldCount-oldsz)*Field.sizeof); 289 } 290 else { 291 m_extendedFieldCount = 16; 292 m_extendedFields = allocArray!(Field, ALLOC)(16).ptr[0 .. n]; 293 memset(m_extendedFields.ptr, 0, m_extendedFieldCount*Field.sizeof); 294 } 295 } 296 297 private ptrdiff_t getIndex(in Field[] map, in KeyType key, uint keysum) 298 const { 299 foreach (i, ref const(Field) entry; map) { 300 if (entry.keyCheckSum != keysum) continue; 301 if (matches(entry.key, key)) return i; 302 } 303 return -1; 304 } 305 306 private static bool matches(in KeyType a, in KeyType b) 307 { 308 static if (case_sensitive) return a == b; 309 else static if (is (KeyType == string)) return icmp2(a, b) == 0; 310 else return a == b; 311 } 312 313 // very simple check sum function with a good chance to match 314 // strings with different case equal 315 private static uint computeCheckSumI(string s) 316 @trusted { 317 uint csum = 0; 318 immutable(char)* pc = s.ptr, pe = s.ptr + s.length; 319 for (; pc != pe; pc++) { 320 static if (case_sensitive) csum ^= *pc; 321 else csum ^= *pc & 0x1101_1111; 322 csum = (csum << 1) | (csum >> 31); 323 } 324 return csum; 325 } 326 327 private static uint computeCheckSumI(T)(ref T obj) 328 @trusted { 329 return cast(uint)hashOf(obj, 0); 330 } 331 } 332 333 private: 334 nothrow: 335 void removeFromArrayIdx(T)(ref T[] array, size_t idx) 336 { 337 foreach( j; idx+1 .. array.length) { 338 array[j-1] = array[j]; 339 } 340 array[array.length-1].destructRecurse(); 341 array = array.ptr[0 .. array.length-1]; 342 } 343 /* =================== Decode ======================= */ 344 345 /*************** 346 * Decodes and returns character starting at s[idx]. idx is advanced past the 347 * decoded character. If the character is not well formed, a UtfException is 348 * thrown and idx remains unchanged. 349 */ 350 @safe @nogc pure nothrow 351 bool isValidDchar(dchar c) 352 { 353 /* Note: FFFE and FFFF are specifically permitted by the 354 * Unicode standard for application internal use, but are not 355 * allowed for interchange. 356 * (thanks to Arcane Jill) 357 */ 358 359 return c < 0xD800 || 360 (c > 0xDFFF && c <= 0x10FFFF /*&& c != 0xFFFE && c != 0xFFFF*/); 361 } 362 363 @safe pure nothrow 364 dchar decode(const scope char[] s, ref size_t idx) 365 in 366 { 367 assert(idx >= 0 && idx < s.length); 368 } 369 out (result) 370 { 371 assert(isValidDchar(result)); 372 } 373 do 374 { 375 size_t len = s.length; 376 dchar V; 377 size_t i = idx; 378 char u = s[i]; 379 380 if (u & 0x80) 381 { uint n; 382 char u2; 383 384 /* The following encodings are valid, except for the 5 and 6 byte 385 * combinations: 386 * 0xxxxxxx 387 * 110xxxxx 10xxxxxx 388 * 1110xxxx 10xxxxxx 10xxxxxx 389 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 390 * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 391 * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 392 */ 393 for (n = 1; ; n++) 394 { 395 if (n > 4) 396 goto Lerr; // only do the first 4 of 6 encodings 397 if (((u << n) & 0x80) == 0) 398 { 399 if (n == 1) 400 goto Lerr; 401 break; 402 } 403 } 404 405 // Pick off (7 - n) significant bits of B from first byte of octet 406 V = cast(dchar)(u & ((1 << (7 - n)) - 1)); 407 408 if (i + (n - 1) >= len) 409 goto Lerr; // off end of string 410 411 /* The following combinations are overlong, and illegal: 412 * 1100000x (10xxxxxx) 413 * 11100000 100xxxxx (10xxxxxx) 414 * 11110000 1000xxxx (10xxxxxx 10xxxxxx) 415 * 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) 416 * 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) 417 */ 418 u2 = s[i + 1]; 419 if ((u & 0xFE) == 0xC0 || 420 (u == 0xE0 && (u2 & 0xE0) == 0x80) || 421 (u == 0xF0 && (u2 & 0xF0) == 0x80) || 422 (u == 0xF8 && (u2 & 0xF8) == 0x80) || 423 (u == 0xFC && (u2 & 0xFC) == 0x80)) 424 goto Lerr; // overlong combination 425 426 for (uint j = 1; j != n; j++) 427 { 428 u = s[i + j]; 429 if ((u & 0xC0) != 0x80) 430 goto Lerr; // trailing bytes are 10xxxxxx 431 V = (V << 6) | (u & 0x3F); 432 } 433 if (!isValidDchar(V)) 434 goto Lerr; 435 i += n; 436 } 437 else 438 { 439 V = cast(dchar) u; 440 i++; 441 } 442 443 idx = i; 444 return V; 445 446 Lerr: 447 return '?'; 448 //return V; // dummy return 449 } 450 451 /// Special version of icmp() with optimization for ASCII characters 452 int icmp2(in string a, in string b) 453 @safe pure { 454 size_t i = 0, j = 0; 455 456 // fast skip equal prefix 457 size_t min_len = min(a.length, b.length); 458 while ( i < min_len && a[i] == b[i] ) i++; 459 if( i > 0 && (a[i-1] & 0x80) ) i--; // don't stop half-way in a UTF-8 sequence 460 j = i; 461 462 // compare the differing character and the rest of the string 463 while (i < a.length && j < b.length){ 464 uint ac = cast(uint)a[i]; 465 uint bc = cast(uint)b[j]; 466 if( !((ac | bc) & 0x80) ){ 467 i++; 468 j++; 469 if( ac >= 'A' && ac <= 'Z' ) ac += 'a' - 'A'; 470 if( bc >= 'A' && bc <= 'Z' ) bc += 'a' - 'A'; 471 if( ac < bc ) return -1; 472 else if( ac > bc ) return 1; 473 } else { 474 dchar acp = decode(a, i); 475 dchar bcp = decode(b, j); 476 if( acp != bcp ){ 477 if(acp >= 'A' && acp <= 'Z') 478 { 479 acp += ('a' - 'A'); 480 } 481 if(bcp >= 'A' && bcp <= 'Z') 482 { 483 bcp += ('a' - 'A'); 484 } 485 if( acp < bcp ) return -1; 486 else if( acp > bcp ) return 1; 487 } 488 } 489 } 490 491 if( i < a.length ) return 1; 492 else if( j < b.length ) return -1; 493 494 assert(i == a.length || j == b.length, "Strings equal but we didn't fully compare them!?"); 495 return 0; 496 }