本文是對c#中Dictionary內部實現原理進行簡單的剖析。如有表述錯誤,歡迎指正。 主要對照源碼來解析,目前對照源碼的版本是.Net Framwork 4.8,源碼地址。 1. 關鍵的欄位和Entry結構 2. 添加鍵值(Add) 2.1 數組entries和buckets初始化 2.2 添加鍵 ...
目錄
- 關鍵的欄位和Entry結構
- 添加鍵值(Add)
- 取鍵值(Find)
- 移除鍵值(Remove)
- 再插入鍵值
本文是對c#中Dictionary內部實現原理進行簡單的剖析。如有表述錯誤,歡迎指正。
主要對照源碼來解析,目前對照源碼的版本是.Net Framwork 4.8,源碼地址。
1. 關鍵的欄位和Entry結構
struct Entry { public int hashCode; // key的hashCode & 0x7FFFFFFF public int next; // 指向鏈表下一個元素的地址(實際就是entries的索引),最後一個元素為-1 public TKey key; public TValue value; } Entry[] entries; //存放鍵值 int[] buckets; //存儲entries最新元素的索引,其存儲位置由取模結果決定。例:假設鍵值存儲在entries的第1元素的位置上,且hashCode和長度的取模結果為2,那麼buckets[2] = 1 int count = 0; //已存儲鍵值的個數 int version; //記錄版本,防止迭代過程中集合被更改 IEqualityComparer<TKey> _comparer; int freeList; //entries中最新空元素的索引 int freeCount; //entries中空元素的個數
2. 添加鍵值(Add)
public void Add(TKey key, TValue value) { Insert(key, value, true); } private void Insert(TKey key, TValue value, bool add) { if( key == null ) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key); } if (buckets == null) Initialize(0); int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF; //取模 int targetBucket = hashCode % buckets.Length; #if FEATURE_RANDOMIZED_STRING_HASHING int collisionCount = 0; #endif for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next) { if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) { if (add) { ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate); } //對於已存在的Key重新賦值 entries[i].value = value; version++; return; } #if FEATURE_RANDOMIZED_STRING_HASHING collisionCount++; #endif } int index; if (freeCount > 0) { //存在entries中存在空元素 index = freeList; freeList = entries[index].next; freeCount--; } else { if (count == entries.Length) { //擴容:取大於count * 2的最小素數作為entries和bucket的新容量(即數組長度.Length) Resize(); targetBucket = hashCode % buckets.Length; } index = count; count++; } entries[index].hashCode = hashCode; entries[index].next = buckets[targetBucket]; entries[index].key = key; entries[index].value = value; //存取鏈表的頭元素的索引(即entries最後存入的元素的在enties中的索引) //便於取Key的時每次從鏈表的頭元素開始遍歷,詳細見FindEntry(TKey key)函數 buckets[targetBucket] = index; version++; #if FEATURE_RANDOMIZED_STRING_HASHING #if FEATURE_CORECLR // In case we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing // in this case will be EqualityComparer<string>.Default. // Note, randomized string hashing is turned on by default on coreclr so EqualityComparer<string>.Default will // be using randomized string hashing if (collisionCount > HashHelpers.HashCollisionThreshold && comparer == NonRandomizedStringEqualityComparer.Default) { comparer = (IEqualityComparer<TKey>) EqualityComparer<string>.Default; Resize(entries.Length, true); } #else if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(comparer)) { //如果碰撞次數(單鏈表長度)大於設置的最大碰撞閾值,需要擴容 comparer = (IEqualityComparer<TKey>) HashHelpers.GetRandomizedEqualityComparer(comparer); Resize(entries.Length, true); } #endif // FEATURE_CORECLR #endif } ****************************************************************************************************************************************** static void Foo() { var dicData = new Dictionary<int, int>(); //添加鍵值 new List<int> { 1, 2, 4 }.ForEach(item => Add(item, dicData)); new List<int> { 22, 29, 36, 20 }.ForEach(item => Add(item, dicData)); } static void Add(int key, Dictionary<int, int> dicData) { dicData.Add(key, key); }
2.1 數組entries和buckets初始化
private void Initialize(int capacity) { //取大於capacity的最小質數(素數) int size = HashHelpers.GetPrime(capacity); buckets = new int[size]; for (int i = 0; i < buckets.Length; i++) buckets[i] = -1; entries = new Entry[size]; freeList = -1; } **************************************************** internal static class HashHelpers { ...... public const int HashCollisionThreshold = 100; //碰撞閾值 ...... public static readonly int[] primes = { 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369}; //質數(素數)組 ...... public static int GetPrime(int min) { if (min < 0) throw new ArgumentException(Environment.GetResourceString("Arg_HTCapacityOverflow")); Contract.EndContractBlock(); //查找primes是否有滿足的質數(素數) for (int i = 0; i < primes.Length; i++) { int prime = primes[i]; if (prime >= min) return prime; } //outside of our predefined table. //compute the hard way. //primes沒有查找到滿足的質數(素數),自行計算 for (int i = (min | 1); i < Int32.MaxValue;i+=2) { if (IsPrime(i) && ((i - 1) % Hashtable.HashPrime != 0)) return i; } return min; } }
2.2 添加鍵值{1,1},則
hashCode = 1;
targetBucket = hasCode % buckets.Length; //targetBucket = 1
next = buckets[targetBucket]; //next = -1
buckets[targetBucket] = index; //buckets[1] = 0
2.3 添加鍵值{2,2},則
hashCode = 2;
targetBucket = hasCode % buckets.Length; //targetBucket = 2
next = buckets[targetBucket]; //next = -1
buckets[targetBucket] = index; //buckets[2] = 1
2.4 添加鍵值{4,4},則
hashCode = 4;
targetBucket = hasCode % buckets.Length; //targetBucket = 1
next = buckets[targetBucket]; //next = 0
buckets[targetBucket] = index; //buckets[1] = 2
接下來將entries數組以單鏈表的形式呈現(即enteries數組橫向);
2.5 在繼續添加鍵值之前,需要擴容操作,因為entries數組長度為3且都已有元素。擴容後需要對buckets和entries每個元素的Next需要重新賦值;
private void Resize() { //擴容的大小:取大於(當前容量*2)的最小素數 //例: Resize(HashHelpers.ExpandPrime(count), false); } private void Resize(int newSize, bool forceNewHashCodes) { Contract.Assert(newSize >= entries.Length); //實例化buckets,並將每個元素置為-1 int[] newBuckets = new int[newSize]; for (int i = 0; i < newBuckets.Length; i++) newBuckets[i] = -1; Entry[] newEntries = new Entry[newSize]; Array.Copy(entries, 0, newEntries, 0, count); //如果是Hash碰撞擴容,使用新HashCode函數重新計算Hash值 if(forceNewHashCodes) { for (int i = 0; i < count; i++) { if(newEntries[i].hashCode != -1) { newEntries[i].hashCode = (comparer.GetHashCode(newEntries[i].key) & 0x7FFFFFFF); } } } //重建單鏈表 for (int i = 0; i < count; i++) { if (newEntries[i].hashCode >= 0) { //取模重新設置next值和buckets int bucket = newEntries[i].hashCode % newSize; newEntries[i].next = newBuckets[bucket]; newBuckets[bucket] = i; } } buckets = newBuckets; entries = newEntries; } ******************************************************************* internal static class HashHelpers { ...... public static readonly int[] primes = { 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369}; //質數(素數)組 ...... // This is the maximum prime smaller than Array.MaxArrayLength public const int MaxPrimeArrayLength = 0x7FEFFFFD; //數組最大長度的最小質數 public static int ExpandPrime(int oldSize) { //翻倍 int newSize = 2 * oldSize; // Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow. // Note that this check works even when _items.Length overflowed thanks to the (uint) cast //翻倍的大小不能超過【數組最大長度的最小質數】 if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize) { Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength"); return MaxPrimeArrayLength; } //取最小的質數(素數) return GetPrime(newSize); } public static int GetPrime(int min) { if (min < 0) throw new ArgumentException(Environment.GetResourceString("Arg_HTCapacityOverflow")); Contract.EndContractBlock(); //查找primes是否有滿足的質數(素數) for (int i = 0; i < primes.Length; i++) { int prime = primes[i]; if (prime >= min) return prime; } //outside of our predefined table. //compute the hard way. //primes沒有查找到滿足的質數(素數),自行計算 for (int i = (min | 1); i < Int32.MaxValue;i+=2) { if (IsPrime(i) && ((i - 1) % Hashtable.HashPrime != 0)) return i; } return min; } }
2.6 繼續添加鍵值{22,22},{29,29},{36,36},{40,40},添加完後其內部存儲結果如下
3. 取鍵值(Find)
public TValue this[TKey key] { get { //取Key對應值在entries的索引 int i = FindEntry(key); if (i >= 0) return entries[i].value; ThrowHelper.ThrowKeyNotFoundException(); return default(TValue); } set { //更新Key對應的值 Insert(key, value, false); } } private int FindEntry(TKey key) { if( key == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key); } if (buckets != null) { int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF; //遍歷單鏈表 for (int i = buckets[hashCode % buckets.Length]; i >= 0; i = entries[i].next) { if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) return i; } } return -1; } ********************************************************************************************* static void Foo() { ...... //取Key=22 var val =dicData[22];
}
簡化取Key對應值的代碼
var hashCode =comparer.GetHashCode(key) & 0x7FFFFFFF; // 22 var targetBuget = hashCode % buckets.Length; //取模運算 1 var i = bucket[targetBuget]; //鏈表頭元素的索引 bucket[1] = 5 //遍歷單鏈表 for (; i >= 0; i = entries[i].next) { if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) return i; }
4. 移除鍵值(Remove)
public bool Remove(TKey key) { if(key == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key); } if (buckets != null) { int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF; int bucket = hashCode % buckets.Length; int last = -1; //其原理先取出鍵值,然後記錄entries空閑的索引(freeList)和空閑個數(freeCount) for (int i = buckets[bucket]; i >= 0; last = i, i = entries[i].next) { if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) { if (last < 0) { buckets[bucket] = entries[i].next; } else { entries[last].next = entries[i].next; } entries[i].hashCode = -1; //建立空閑鏈表 entries[i].next = freeList; entries[i].key = default(TKey); entries[i].value = default(TValue); //保存entryies中空元素的索引 //便於插入新鍵值時,放在當前索引的位置,減少entryies空間上的浪費 freeList = i; //空元素的個數加1 freeCount++; version++; return true; } } } return false; } ******************************************************************* static void Foo() { ...... //移除 new List<int> { 22, 29 }.ForEach(item => dicData.Remove(item)); }
4.1 移除Key=22後,freeList = 3, freeCount = 1,
4.2 移除Key=36後,freeList = 5, freeCount = 2,
5. 再插入鍵值
如上圖,當移除掉{36,36}後,會發現又誕生一個含有兩個元素的“新鏈表”(上圖灰色框)。這個作用就是為了插入新鍵值時,按照“新鏈表”記錄的索引順序插入到entries數組中。 例:添加鍵值{22,22},{25,25},此時freeList = 5,freeCount = 2;- 給entries[5]賦值,freeList = 3, freeCount = 1;
- 給entries[3]賦值,freeList = -1, freeCount = 0;
希望此文能夠讓你對於Dictionary內部實現有所認識。