Golang Map 分析

【導讀】這篇文章是作者的學習筆記，主要關注 golang map 的底層結構、get、set、del 和擴容方面的細節。

底層結構

// A header for a Go map.
type hmap struct {
 // Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go.
 // Make sure this stays in sync with the compiler's definition.
 count     int // # live cells == size of map.  Must be first (used by len() builtin)
 flags     uint8
 B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
 noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
 hash0     uint32 // hash seed

 buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
 oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
 nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)

 extra *mapextra // optional fields
}

count：map 集合中元素的個數，len 函數的返回值
B：buckets 數組大小的對數
buckets：底層數組，大小爲 2^B
oldbuckets：擴容時使用的數組，用於保存擴容前的 buckets 數組，大小爲新 buckets 的一半

type bmap struct {
    topbits  [8]uint8
    keys     [8]keytype
    values   [8]valuetype
    pad      uintptr
    overflow uintptr
}

buckets 數組內部存的就是 bmap 的數組。

topbits：存儲內部 8 個 k-v 鍵值對的 key 的高 8 位
keys：保存所有 key 的數組
values：保存所有 value 的數組，將 keys 和 values 分開數組存放是爲了內存對齊
overflow：指向溢出 bmap，即當放入當前 bmap 的 k-v 超出 8 個時，就會創建新的 bmap，然後 overflow 指向新的 bmap

get 操作

對 key 做 hash 運算
對結果取後 B 位，用於定位 buckets 數組中該 k-v 所在的 bmap 的位置
遍歷當前 bmap 中的 tophash 數組，比較 hash 結果的高 8 位於 tophash 的結果
如果 tophash 與 hash 結果高 8 位相同，再比較 key 和 bmap 中 keys 對應位置的 key，這裏可以看出 tophash 是不能直接進行定位的，只是起到一個比較的結果，當然 tophash 還有其他特殊值
如果 key 也相等，則定位到 values 數組中對應位置的 value
如果在當前 bmap 中遍歷完了所有的 tophash 和 keys 都沒有對應值，則遍歷 bmap 中的溢出鏈，尋找下一個 bmap，回到步驟 3
如果 bmap 及其所有溢出 bmap 都遍歷完了都沒有，則返回 value 的零值，而不是返回 nil

// mapaccess1 returns a pointer to h[key].  Never returns nil, instead
// it will return a reference to the zero object for the elem type if
// the key is not in the map.
// NOTE: The returned pointer may keep the whole map live, so don't
// hold onto it for very long.
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 // 如果 map 爲 nil 或者沒有元素，直接返回 value 零值
 if h == nil || h.count == 0 {
  if t.hashMightPanic() {
   t.hasher(key, 0) // see issue 23734
  }
  return unsafe.Pointer(&zeroVal[0])
 }
 // 讀寫衝突檢查
 if h.flags&hashWriting != 0 {
  throw("concurrent map read and map write")
 }
 // 計算 hash 值
 hash := t.hasher(key, uintptr(h.hash0))
 // 計算 2^B-1
 m := bucketMask(h.B)
 // hash&m 計算出後 B 爲，定位 bucket 位置
 // 這裏的運算應該是對 hash%buckets 的長度 來定位，這裏使用&運算來代替%運算加速。
 //    hash % len(buckets)
 // => hash % 2^B
 // => hash & (2^B-1)
 // => hash & m
 b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 // old 不爲空，說明在擴容，因此需要判斷是否去 oldbucket 中查找
 if c := h.oldbuckets; c != nil {
  // 如果不是等量擴容則需要將 m/2，因爲擴容時新 buckets 的容量是 old 的兩倍
  if !h.sameSizeGrow() {
   // There used to be half as many buckets; mask down one more power of two.
   m >>= 1
  }
  oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
      // oldb 未被擴容搬運，表示需要去 oldbucket 中查找，則將 b 指向 old
  if !evacuated(oldb) {
   b = oldb
  }
 }
 // 取 hash 值的高 8 位
 top := tophash(hash)
bucketloop:
 // 遍歷 bmap 及其溢出鏈
 for ; b != nil; b = b.overflow(t) {
  // bucketCntBits = 3
  // bucketCnt     = 1 << bucketCntBits
  // 一個 bmap 最多存 8 個 k-v
  for i := uintptr(0); i < bucketCnt; i++ {
   if b.tophash[i] != top {
    // emptyRest 表示當前 tophash 對應的 k-v 爲空，並且其後都爲空，因此不用繼續找下去了
    if b.tophash[i] == emptyRest {
     break bucketloop
    }
    continue
   }
   // 找到 tophash 對應值的 key，定位到 key 的位置
   k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
   if t.indirectkey() {
    k = *((*unsafe.Pointer)(k))
   }
   // 比較 key，如果相等則表示找到了對應的 k-v
   if t.key.equal(key, k) {
    e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
    if t.indirectelem() {
     e = *((*unsafe.Pointer)(e))
    }
    return e
   }
  }
 }
 // 所有的 bmap 都找過了，都沒有，則返回 value 的零值
 return unsafe.Pointer(&zeroVal[0])
}

用到的其他函數代碼

// 取後 h.B 位
// m := uintptr(1) << (h.B & (sys.PtrSize*8 - 1)) - 1
m := bucketMask(h.B)
func bucketMask(b uint8) uintptr {
 return bucketShift(b) - 1
}
func bucketShift(b uint8) uintptr {
 // Masking the shift amount allows overflow checks to be elided.
 return uintptr(1) << (b & (sys.PtrSize*8 - 1))
}

// 計算 hash 的高 8 位
func tophash(hash uintptr) uint8 {
 top := uint8(hash >> (sys.PtrSize*8 - 8))
 // minTopHash 及之前的一些值，是 map 預定義用來表示狀態的一些值
 // 如果計算的高 8 位小於需要加上 minTopHash 則需要加上 minTopHash，以區別正常 hash 值和狀態值
 if top < minTopHash {
  top += minTopHash
 }
 return top
}

get 操作還有返回 bool 值表示 k-v 是否存在的操作。實際由兩個函數來實現。

實現細節相同，mapaccess2 在返回值上多添加一個 bool

func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer
func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)

set 操作

與 get 操作類似，底層使用mapassign函數實現

// Like mapaccess, but allocates a slot for the key if it is not present in the map.
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 if h.flags&hashWriting != 0 {
  throw("concurrent map writes")
 }
 hash := t.hasher(key, uintptr(h.hash0))
 h.flags ^= hashWriting

 // bucket 爲空，分配第一個新的 bucket
 if h.buckets == nil {
  h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
 }

again:
 bucket := hash & bucketMask(h.B)
   // 如果是擴容狀態，則優先擴容當前 bucket
 if h.growing() {
  growWork(t, h, bucket)
 }
 b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
 top := tophash(hash)

 // inserti -> 表示新 k-v 在 tophash 數組中的位置
 // insertk -> 表示新 k-v 的 key 在 keys 中的位置
 // elem    -> 表示新 k-v 的 value 的地址
 var inserti *uint8
 var insertk unsafe.Pointer
 var elem unsafe.Pointer
bucketloop:
 // 與 mapaccess 類似。遍歷 bmap 及其溢出鏈
 for {
  for i := uintptr(0); i < bucketCnt; i++ {
   // 判斷新 k-v 是否在 bucket 中已經存在
   if b.tophash[i] != top {
    // 如果當前 tophash 位置不是新 k-v 的位置，則判斷他是否爲空位
    // 在插入新空位時，應該儘可能的在前面的空位插入，因此需要做 inserti == nil 判斷，
    // inserti != nil 表示已經找到了更前的空位，不使用新位置。
    if isEmpty(b.tophash[i]) && inserti == nil {
     inserti = &b.tophash[i]
     insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
     elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
    }
    if b.tophash[i] == emptyRest {
     break bucketloop
    }
    continue
   }
   // tophash 值與新 k-v 的高 8 位相同，則判斷 key 是否相同
   k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
   if t.indirectkey() {
    k = *((*unsafe.Pointer)(k))
   }
   // key 不相同則，則跳過，尋找下一個
   if !t.key.equal(key, k) {
    continue
   }
   // key 相同，則更新即可
   if t.needkeyupdate() {
    typedmemmove(t.key, k, key)
   }
   elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
   goto done
  }
  // 遍歷溢出鏈
  ovf := b.overflow(t)
  if ovf == nil {
   break
  }
  b = ovf
 }

 // 沒有在 map 找到 key，需要分配新的位置
 // 如果 map 中鍵值對數量觸發了擴容，或者 bmap 溢出過多，則需要擴容
 if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
  hashGrow(t, h)
  // 觸發了擴容，調整了 map 結構，因此需要重新查找合適的位置
  goto again // Growing the table invalidates everything, so try again
 }

 // 表示在 bmap 及其溢出鏈都沒有合適的位置，即都滿了，則需要創建新的溢出 bmap，將新 k-v 分配到新的 bmap 中
 if inserti == nil {
  newb := h.newoverflow(t, b)
  inserti = &newb.tophash[0]
  insertk = add(unsafe.Pointer(newb), dataOffset)
  elem = add(insertk, bucketCnt*uintptr(t.keysize))
 }

 // 在對應位置插入 key 和 value
 if t.indirectkey() {
  kmem := newobject(t.key)
  *(*unsafe.Pointer)(insertk) = kmem
  insertk = kmem
 }
 if t.indirectelem() {
  vmem := newobject(t.elem)
  *(*unsafe.Pointer)(elem) = vmem
 }
 typedmemmove(t.key, insertk, key)
 *inserti = top
 h.count++

done:
 if h.flags&hashWriting == 0 {
  throw("concurrent map writes")
 }
 h.flags &^= hashWriting
 if t.indirectelem() {
  elem = *((*unsafe.Pointer)(elem))
 }
 return elem
}

mapassign函數最後的返回值爲 value 的地址，需要通過彙編將實際的 value 值賦值到 value 的地址上。

https://github.com/cch123/golang-notes/blob/master/map.md

這裏有件比較奇怪的事情，mapassign 並沒有把用戶 m[k] = v 時的 v 寫入到 map 的 value 區域，而是直接返回了這個值所應該在的內存地址。那麼把 v 拷貝到該內存區域的操作是在哪裏做的呢？
var a = make(map[int]int, 7)
for i := 0; i < 1000; i++ {
  a[i] = 99999
}
看看生成的彙編部分：
0x003f 00063 (m.go:9)    MOVQ    DX, (SP) // 第一個參數
0x0043 00067 (m.go:9)    MOVQ    AX, 8(SP) // 第二個參數
0x0048 00072 (m.go:9)    MOVQ    CX, 16(SP) // 第三個參數
0x004d 00077 (m.go:9)    PCDATA    $0, $1 // GC 相關
0x004d 00077 (m.go:9)    CALL    runtime.mapassign_fast64(SB) // 調用函數
0x0052 00082 (m.go:9)    MOVQ    24(SP), AX // 返回值，即 value 應該存放的內存地址
0x0057 00087 (m.go:9)    MOVQ    $99999, (AX) // 把 99999 放入該地址中
賦值的最後一步實際上是編譯器額外生成的彙編指令來完成的，可見靠 runtime 有些工作是沒有做完的。這裏和 go 在函數調用時插入 prologue 和 epilogue 是類似的。編譯器和 runtime 配合，才能完成一些複雜的工作。

del 操作

底層使用mapdelete函數實現，首先查找到 cell 對應的位置，再將 key 和 value 內存清空，查找過程與 get 操作類似。

在清空內存後，還需要調整 bmap 及其溢出鏈的 tophash 的狀態值，將一些 emptyOne 調整爲 emptyReset

func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
 // map 爲空，或者沒有元素，則直接返回
 if h == nil || h.count == 0 {
  if t.hashMightPanic() {
   t.hasher(key, 0) // see issue 23734
  }
  return
 }
 if h.flags&hashWriting != 0 {
  throw("concurrent map writes")
 }

 hash := t.hasher(key, uintptr(h.hash0))

 // Set hashWriting after calling t.hasher, since t.hasher may panic,
 // in which case we have not actually done a write (delete).
 h.flags ^= hashWriting

 bucket := hash & bucketMask(h.B)
 // 如果正常擴容，則優先做當前 bucket 的擴容處理
 if h.growing() {
  growWork(t, h, bucket)
 }
 // 定位到對應的 bucket 上
 b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
 // 記錄對應 bucket 的第一個 bmap，後續的 reset 需要用到
 bOrig := b
 top := tophash(hash)
search:
 // 與賦值類似，遍歷 bmap 及其溢出鏈
 for ; b != nil; b = b.overflow(t) {
  for i := uintptr(0); i < bucketCnt; i++ {
   if b.tophash[i] != top {
    if b.tophash[i] == emptyRest {
     break search
    }
    continue
   }
   // 定位到對應的 key 的位置
   k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
   k2 := k
   if t.indirectkey() {
    k2 = *((*unsafe.Pointer)(k2))
   }
   if !t.key.equal(key, k2) {
    continue
   }
   // 清空 key 的內存
   if t.indirectkey() {
    *(*unsafe.Pointer)(k) = nil
   } else if t.key.ptrdata != 0 {
    memclrHasPointers(k, t.key.size)
   }
   // 定位 value 的位置，清空 value 的內存
   e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
   if t.indirectelem() {
    *(*unsafe.Pointer)(e) = nil
   } else if t.elem.ptrdata != 0 {
    memclrHasPointers(e, t.elem.size)
   } else {
    memclrNoHeapPointers(e, t.elem.size)
   }
   // 將當前位置對應的 tophash 設置爲 emptyOne 狀態，表示當前 cell 爲空
   b.tophash[i] = emptyOne

   // 調整當前 bucket 中 cell 的狀態，將一些 emptyOne 設置爲 emptyRest
   // emptyOne 表示當前 cell 爲空，emptyRest 表示當前 cell 及其後都爲空，因此 emptyRest 可以避免多餘的遍歷操作

   // 如果當前刪除的 k-v 爲 bmap 中的最後一個，即第 7 位
   if i == bucketCnt-1 {
    // 當前 bmap 後面還有溢出鏈，並且溢出鏈中有內容，則跳到收尾處理
    if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
     goto notLast
    }
   } else {
    // 當前刪除的 k-v 不是最後一個，則需要判斷他的下一位是否爲 emptyRest，
    // 如果下一位狀態不 emptyRest，則表示其後還有內存，則跳到收尾處理
    if b.tophash[i+1] != emptyRest {
     goto notLast
    }
   }
   // 如果能到這裏，表示當前刪除的 k-v 的後面所有 cell 狀態都爲 emptyRest，
   // 因此需要調整當前 cell 爲 emptyRest，並且繼續向前調整
   for {
    b.tophash[i] = emptyRest
    // 如果當前 cell 爲 bmap 中的首位，則需要找溢出鏈的前一個 bmap
    if i == 0 {
     // 表示當前已經是首個 bmap，因此調整完了。
     if b == bOrig {
      break
     }
     // 類似單向鏈表查找前一項，找到前一個 bmap
     c := b
     for b = bOrig; b.overflow(t) != c; b = b.overflow(t) {
     }
     // 此時 b 指向了前一個的 bmap。將 i 調整爲 bmap 的最後一位，即 7
     i = bucketCnt - 1
    } else { // 如果當前 cell 不是首位，則直接 i--往前找 cell
     i--
    }
    // 如果往前找到的 cell 位置狀態都爲 emptyOne，則需要將他們置爲 emptyRest，
    // 直到找到一個 cell 位置不爲空
    if b.tophash[i] != emptyOne {
     break
    }
   }
  notLast:
   // 最後的收尾處理，將 map 元素數量減 1
   h.count--
   break search
  }
 }

 if h.flags&hashWriting == 0 {
  throw("concurrent map writes")
 }
 h.flags &^= hashWriting
}

擴容

擴容觸發時機，在mapassign函數中，當需要分配新 bucket 時會檢查是否需要擴容

if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
    // 擴容函數
    hashGrow(t, h)
    goto again // Growing the table invalidates everything, so try again
}

擴容的兩個判斷條件：

元素數量超過擴容界限，即元素數量 > 6.5*2^B，此時 map 查詢效率降低，需要增量擴容來提高查找效率
bucket 溢出鏈過長，當某些元素落入同一 bucket 時，頻繁的插入和刪除，可能導致許多 bucket 中並沒有填滿 8 個，因此需要等量擴容，來緊縮這些內存空間

// overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor.
func overLoadFactor(count int, B uint8) bool {
 return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
}
// tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets.
// Note that most of these overflow buckets must be in sparse use;
// if use was dense, then we'd have already triggered regular map growth.
func tooManyOverflowBuckets(noverflow uint16, B uint8) bool {
 // If the threshold is too low, we do extraneous work.
 // If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory.
 // "too many" means (approximately) as many overflow buckets as regular buckets.
 // See incrnoverflow for more details.
 if B > 15 {
  B = 15
 }
 // The compiler doesn't see here that B < 16; mask B to generate shorter shift code.
 return noverflow >= uint16(1)<<(B&15)
}

擴容函數，擴容前的設置在hashGrow函數中

func hashGrow(t *maptype, h *hmap) {
 // 判斷是否爲超過擴容因子觸發的擴容
 bigger := uint8(1)
 if !overLoadFactor(h.count+1, h.B) {
  bigger = 0
  h.flags |= sameSizeGrow
 }
 // 將 buckets 掛載到 old 上
 oldbuckets := h.buckets
 // 創建大小爲 2^(h.B+bigger) 的新 buckets 數組
 newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)

 flags := h.flags &^ (iterator | oldIterator)
 if h.flags&iterator != 0 {
  flags |= oldIterator
 }
 // commit the grow (atomic wrt gc)
 h.B += bigger
 h.flags = flags
 h.oldbuckets = oldbuckets
 h.buckets = newbuckets
 h.nevacuate = 0
 h.noverflow = 0

 if h.extra != nil && h.extra.overflow != nil {
  // Promote current overflow buckets to the old generation.
  if h.extra.oldoverflow != nil {
   throw("oldoverflow is not nil")
  }
  h.extra.oldoverflow = h.extra.overflow
  h.extra.overflow = nil
 }
 if nextOverflow != nil {
  if h.extra == nil {
   h.extra = new(mapextra)
  }
  h.extra.nextOverflow = nextOverflow
 }
 // the actual copying of the hash table data is done incrementally
 // by growWork() and evacuate().
}

map 中 buckets 的重新分配調整在growWork函數中，因此mapassign函數中，是先通過判斷overLoadFactor函數和tooManyOverflowBuckets函數來檢測是否需要擴容，如果需要擴容，則調用hashGrow函數來設置擴容狀態，最後再跳回如何代碼段中，調用 growWork() 來擴容

if h.growing() {
    growWork(t, h, bucket)
}

擴容函數

func growWork(t *maptype, h *hmap, bucket uintptr) {
 // 確保搬運的 bucket 是正在使用的
 evacuate(t, h, bucket&h.oldbucketmask())

 // 如果還是擴容狀態，則再搬運一個
 if h.growing() {
  evacuate(t, h, h.nevacuate)
 }
}

搬運函數evacuate，該函數一次只能搬運一個 bucket

func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 // 定位當前 bucket 的位置
 b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
 // 記錄擴容前 map 的容量大小，即擴容前的 2^B
 newbit := h.noldbuckets()
 // 判斷當前 bucket 是否已經完成擴容
 if !evacuated(b) {

  // x 和 y 指向擴容後在新 buckets 中的位置
  var xy [2]evacDst
  x := &xy[0]
  // 定位目標 bmap 地址
  x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
  // 定位目標 bmap 的 keys 地址
  x.k = add(unsafe.Pointer(x.b), dataOffset)
  // 定位目標 bmap 的 values 地址
  x.e = add(x.k, bucketCnt*uintptr(t.keysize))

  // 如果是增量擴容，則 y 表示擴容後的高位地址，在增量擴容時，由於是兩倍放大，因此目標 bmap 有兩個，一個低位一個高位
  if !h.sameSizeGrow() {
   // Only calculate y pointers if we're growing bigger.
   // Otherwise GC can see bad pointers.
   y := &xy[1]
   y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
   y.k = add(unsafe.Pointer(y.b), dataOffset)
   y.e = add(y.k, bucketCnt*uintptr(t.keysize))
  }

  // 遍歷 old 中的該 bmap 及其溢出鏈
  for ; b != nil; b = b.overflow(t) {
   k := add(unsafe.Pointer(b), dataOffset)
   e := add(k, bucketCnt*uintptr(t.keysize))
   // 遍歷搬運所有 cell
   for i := 0; i < bucketCnt; i, k, e = i+1, add(k, uintptr(t.keysize)), add(e, uintptr(t.elemsize)) {
    top := b.tophash[i]
    // 如果該位爲空，則將其置爲已經搬運過的狀態
    if isEmpty(top) {
     b.tophash[i] = evacuatedEmpty
     continue
    }
    // 異常情況，未被搬運的只能是空或者 tophash 正常值的 cell
    if top < minTopHash {
     throw("bad map state")
    }
    k2 := k
    if t.indirectkey() {
     k2 = *((*unsafe.Pointer)(k2))
    }
    var useY uint8
    if !h.sameSizeGrow() {
     // 對需要搬運的 cell 的 key 做一次 hash 運算，決定需要搬運到 x 位置還是 y 位置
     hash := t.hasher(k2, uintptr(h.hash0))
     if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.equal(k2, k2) {
      // 針對 key 爲 NaN 的特殊處理
      useY = top & 1
      top = tophash(hash)
     } else {
      // 對 hash 判斷新的高位是否爲 1 來決定搬運到 x 位置還是 y 位置
      // 例如對於 oldhash 我們關注後 3 位，因此增量擴容後，則需要關注後 4 位
      // 因此如果第 4 位爲 0，則表示還是在地位，如果爲 1，則表示搬運到高位
      if hash&newbit != 0 {
       useY = 1
      }
     }
    }

    if evacuatedX+1 != evacuatedY || evacuatedX^1 != evacuatedY {
     throw("bad evacuatedN")
    }

    // evacuatedX + 1 == evacuatedY
    // evacuatedX 表示搬運到地位，evacuatedY 表示搬運到高位，與上面算的 useY 做加表示新狀態
    b.tophash[i] = evacuatedX + useY
    // 目的 bmap 的地址
    dst := &xy[useY]

    // 如果目標 bmap 已經滿了，則創建溢出 bmap
    if dst.i == bucketCnt {
     dst.b = h.newoverflow(t, dst.b)
     dst.i = 0
     dst.k = add(unsafe.Pointer(dst.b), dataOffset)
     dst.e = add(dst.k, bucketCnt*uintptr(t.keysize))
    }
    // 設置目標 bmap 的 tophash
    dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
    // 搬運 key
    if t.indirectkey() {
     *(*unsafe.Pointer)(dst.k) = k2 // copy pointer
    } else {
     typedmemmove(t.key, dst.k, k) // copy elem
    }
    // 搬運 value
    if t.indirectelem() {
     *(*unsafe.Pointer)(dst.e) = *(*unsafe.Pointer)(e)
    } else {
     typedmemmove(t.elem, dst.e, e)
    }
    // 目標 bmap 中的 cell 數量增加
    dst.i++
    // 將 dst 的 k 指針和 e 指針後移一個單位，指向新的空位
    dst.k = add(dst.k, uintptr(t.keysize))
    dst.e = add(dst.e, uintptr(t.elemsize))
   }
  }
  // 清除 old bucket
  if h.flags&oldIterator == 0 && t.bucket.ptrdata != 0 {
   b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
   // 只清除 keys 和 values，保留 tophash 來表示狀態
   ptr := add(b, dataOffset)
   n := uintptr(t.bucketsize) - dataOffset
   memclrHasPointers(ptr, n)
  }
 }

 if oldbucket == h.nevacuate {
  // 更新搬運進度
  advanceEvacuationMark(h, t, newbit)
 }
}

搬運的後續處理函數

func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {
 // 搬運進度+1，nevacuate 表示在其之前的 bucket 都已經搬運完成了
 h.nevacuate++
 // Experiments suggest that 1024 is overkill by at least an order of magnitude.
 // Put it in there as a safeguard anyway, to ensure O(1) behavior.
 stop := h.nevacuate + 1024
 if stop > newbit {
  stop = newbit
 }
 // 尋找還未搬運的 bucket
 for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
  h.nevacuate++
 }
 // 搬運進度等於 old buckets 的長度，則說明整個 old buckets 已經搬運完成了
 if h.nevacuate == newbit { // newbit == # of oldbuckets
  // 清除 old buckets，函數根據 h.oldbuckets 是否爲 nil 來表示是否正在擴容
  h.oldbuckets = nil
  // 清除 old overflow bucket
  if h.extra != nil {
   h.extra.oldoverflow = nil
  }
  h.flags &^= sameSizeGrow
 }
}

調用一次evacuate函數只能搬運一個 bucket，而且調用evacuate函數的地方只在growWork函數中，而growWork函數的調用只存在mapassign函數和mapdelete函數中，因此好像只有主動觸發了這兩個方法纔會使得擴容繼續進行。對於擴容邊界的地方，再增添一個新的 cell，觸發擴容，此時查看 hmap 的 oldbuckets 爲非 nil，表示處於擴容狀態，推測需要後續手動觸發增加擴容進度。這樣的設計佔用 oldbuckets 和 newbuckets 兩份內存不會浪費嗎？

本文由 Readfog 進行 AMP 轉碼，版權歸原作者所有。
來源：https://mp.weixin.qq.com/s/5AYPbXgnAlp0Z3aEOGsyhw

底層結構

get 操作

set 操作

del 操作

擴容

猜你喜歡