@eric1989
2017-01-15T06:21:03.000000Z
字数 11818
阅读 1293
JDK8版本的ConcurrentHashMap与前代的思路完全不同,不再采取分段锁的办法来提高并发.而是将并发粒度提高到了每一个槽位.由于槽位的分散粒度比分段锁大太多,因此很有效的降低了并发冲突.
简单的说,并发竞争在Node数组上每一个槽位.不同的槽位之间并没有并发竞争关系.而在一个槽位上执行读取操作是没有冲突的,数据的可见性通过Volatile修饰符来达成.而同一个槽位上的写操作冲突通过对首节点进行Sync加锁来完成互斥(还挺简单方便的).由于在同一个槽位上的元素都只能添加到末尾,因此锁住首节点就成为了一个非常方便的互斥手段.
在并发上的冲突降低主要就是依靠将冲突降低到了槽位的粒度来达成.由于没有分段锁这个概念,因为扩容的时候也可以多线程进行扩容(对比之前的版本,一个分段锁内只有一个线程在执行扩容操作).具体操作方式简单说,就是从当前Node数组的最大下标开始,每一个扩容的线程首先都通过cas获得一段属于该线程自身的下标范围,在这个下标范围内,这个线程的数据转移不会和其他的线程冲突.由于扩容是2N的方式扩容,在n槽位上的节点,扩容后或者在n槽位或者再2n槽位.因此只要每个线程的下标范围不重叠,就不会出现线程转移数据冲突的问题.也就是可以进行多线程协助扩容.
final V putVal(K key, V value, boolean onlyIfAbsent) {if (key == null || value == null) throw new NullPointerException();int hash = spread(key.hashCode());int binCount = 0;for (Node<K,V>[] tab = table;;) {Node<K,V> f; int n, i, fh;//如果table为null,则尝试初始化if (tab == null || (n = tab.length) == 0)tab = initTable();//经过hash计算,如果当前的槽位是空的,就尝试下cas将当前的值的包装node放入该槽位else if ((f = tabAt(tab, i = (n - 1) & hash)) == null) {//cas成功的话就可以走人了if (casTabAt(tab, i, null,new Node<K,V>(hash, key, value, null)))break; // no lock when adding to empty bin}//如果发现该节点是一个转移节点,则尝试帮助进行扩容过程else if ((fh = f.hash) == MOVED)tab = helpTransfer(tab, f);else {V oldVal = null;//首先锁定槽位上的元素(每个槽位上的node可能是以链表的形式存在也可能是以红黑树的形式存在,但无论如何,首节点确定后是不会更改的)synchronized (f) {//锁定成功后仍然需要再次确认,确认锁定前后是一致的if (tabAt(tab, i) == f) {//如果hash值大于0,意味着这是一个链条的node节点.if (fh >= 0) {//用于统计该链条上的节点数量binCount = 1;//for循环没有什么问题,就是不停的比对,直到找到为止,根据onlyIfAbsent是替换还是忽略for (Node<K,V> e = f;; ++binCount) {K ek;if (e.hash == hash &&((ek = e.key) == key ||(ek != null && key.equals(ek)))) {oldVal = e.val;if (!onlyIfAbsent)e.val = value;break;}Node<K,V> pred = e;if ((e = e.next) == null) {pred.next = new Node<K,V>(hash, key,value, null);break;}}}else if (f instanceof TreeBin) {Node<K,V> p;binCount = 2;if ((p = ((TreeBin<K,V>)f).putTreeVal(hash, key,value)) != null) {oldVal = p.val;if (!onlyIfAbsent)p.val = value;}}}}//如果链条长度超过了一定阀值,则将链条转化为红黑树if (binCount != 0) {if (binCount >= TREEIFY_THRESHOLD)//红黑树本身和并发无关,略过(其实是题主看不懂,手动滑稽)treeifyBin(tab, i);if (oldVal != null)return oldVal;break;}}}//完成插入后就开始增加总数了addCount(1L, binCount);return null;}
在看下面这段代码之前,首先需要关注类的一个控制属性
//这个属性用来完成对table的初始化或者扩容的控制权的归属争夺.主要有以下三种状态//1. -1意味着当前正在执行table的初始化//2. -(1+n)意味着当前有n个线程在执行扩容//3. 如果是正数,则是扩容阀值.意味着总体容量到达该数字的时候需要进行扩容private transient volatile int sizeCtl;
private final Node<K,V>[] initTable() {Node<K,V>[] tab; int sc;while ((tab = table) == null || tab.length == 0) {//发现已经有别的线程获得初始化或者扩容权限,则自旋if ((sc = sizeCtl) < 0)Thread.yield(); // lost initialization race; just spinelse if (U.compareAndSwapInt(this, SIZECTL, sc, -1)) {try {if ((tab = table) == null || tab.length == 0) {int n = (sc > 0) ? sc : DEFAULT_CAPACITY;@SuppressWarnings("unchecked")Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n];table = tab = nt;//sc的值就是75%的总容量sc = n - (n >>> 2);}} finally {sizeCtl = sc;}break;}}return tab;}
看这个方法前,需要了解下,在jdk8中为了解决高并发下的原子统计.对以前的AtoicInteger类进行了增强.通过空间换时间的方式提升了高并发下的性能,具体的原理可以看另外一篇文章
以下的三个参数合并在一起完成了统计的作用(作用类似LongAddr,不太明白为什么不直接使用)
//这个参数用于CounterCell的扩容权争夺private transient volatile int cellsBusy;private transient volatile long baseCount;private transient volatile CounterCell[] counterCells;@sun.misc.Contended static final class CounterCell {volatile long value;CounterCell(long x) { value = x; }}
private final void addCount(long x, int check) {CounterCell[] as; long b, s;//如果counterCells存在意味着存在竞争,//如果直接cas增加总数失败意味着存在竞争if ((as = counterCells) != null ||!U.compareAndSwapLong(this, BASECOUNT, b = baseCount, s = b + x)) {CounterCell a; long v; int m;boolean uncontended = true;//如果无法取得某一个counterCeller,或者在counterCeller上的cas直接增加失败,则进入到完整的增加总数代码中,也就是fullAddCountif (as == null || (m = as.length - 1) < 0 ||(a = as[ThreadLocalRandom.getProbe() & m]) == null ||!(uncontended =U.compareAndSwapLong(a, CELLVALUE, v = a.value, v + x))) {fullAddCount(x, uncontended);return;}if (check <= 1)return;//统计下当前线程能看到的总数(在很高并发的情况下,可以认为任何线程看到的总数都是滞后的)s = sumCount();}if (check >= 0) {Node<K,V>[] tab, nt; int n, sc;while (s >= (long)(sc = sizeCtl) && (tab = table) != null &&(n = tab.length) < MAXIMUM_CAPACITY) {//这个方法就一句,Integer.numberOfLeadingZeros(n) | (1 << (RESIZE_STAMP_BITS - 1))//具体的作用就是通过位运算的方式得到一个当前table的长度的一种"表示数字"int rs = resizeStamp(n);if (sc < 0) {if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 ||sc == rs + MAX_RESIZERS || (nt = nextTable) == null ||transferIndex <= 0)break;if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1))transfer(tab, nt);}else if (U.compareAndSwapInt(this, SIZECTL, sc,(rs << RESIZE_STAMP_SHIFT) + 2))transfer(tab, null);s = sumCount();}}}
以下的代码核心思路就是通过空间换时间的方式来提高高并发下的性能.如果使用一个long变量来统计,多cpu核心并行的cas必然导致大量的失败,那么只要将这些cas尽可能的分散的不同的地方,就可以减少失败进而提高性能.因为除了一个基本的long变量用于cas争夺,还需要额外的数量与cpu核心数相同的额外变量用于分散cas争夺.同时由于这些额外变量会在不同的核心共享,为了避免伪共享发生,还需要进行缓存行填充(这一点,jdk8通过内部的注解由jvm自动的完成了)
private final void fullAddCount(long x, boolean wasUncontended) {int h;//获取线程内的随机数,线程内的随机数,这是为了避免random方法的竞争if ((h = ThreadLocalRandom.getProbe()) == 0) {ThreadLocalRandom.localInit(); // force initializationh = ThreadLocalRandom.getProbe();wasUncontended = true;}//这是一个局部变量用于表达是否与其他的线程发生了碰撞boolean collide = false; // True if last slot nonemptyfor (;;) {CounterCell[] as; CounterCell a; int n; long v;if ((as = counterCells) != null && (n = as.length) > 0) {if ((a = as[(n - 1) & h]) == null) {//所在槽位为空,则尝试赋予一个初值.这里采取乐观心态,首先初始化出CounterCell,然后再抢夺控制权if (cellsBusy == 0) { // Try to attach new CellCounterCell r = new CounterCell(x); // Optimistic createif (cellsBusy == 0 &&U.compareAndSwapInt(this, CELLSBUSY, 0, 1)) {//控制权抢夺成功后先重新判断一次boolean created = false;try { // Recheck under lockCounterCell[] rs; int m, j;if ((rs = counterCells) != null &&(m = rs.length) > 0 &&rs[j = (m - 1) & h] == null) {rs[j] = r;created = true;}} finally {//这里面通过对volatile变量的写入,保证了数组中对应槽位的写入也是可见的.cellsBusy = 0;}if (created)break;continue; // Slot is now non-empty}}collide = false;}else if (!wasUncontended) // CAS already known to failwasUncontended = true; // Continue after rehash//尝试在对应的槽位上进行cas操作else if (U.compareAndSwapLong(a, CELLVALUE, v = a.value, v + x))break;else if (counterCells != as || n >= NCPU)collide = false; // At max size or staleelse if (!collide)collide = true;//在对应的CounterCell上cas失败1或者2次(看wasUncontended和collide的值)后,尝试进行扩容else if (cellsBusy == 0 &&U.compareAndSwapInt(this, CELLSBUSY, 0, 1)) {try {if (counterCells == as) {// Expand table unless staleCounterCell[] rs = new CounterCell[n << 1];for (int i = 0; i < n; ++i)rs[i] = as[i];counterCells = rs;}} finally {cellsBusy = 0;}collide = false;continue; // Retry with expanded table}h = ThreadLocalRandom.advanceProbe(h);}//程序走到这里意味着CounterCell数组为null.那么尝试抢夺初始化权限else if (cellsBusy == 0 && counterCells == as &&U.compareAndSwapInt(this, CELLSBUSY, 0, 1)) {boolean init = false;try { // Initialize table//抢夺到权限后首先要判断是否是之前的情况,如果是的话,则可以进行初始化,并且完成对应CounterCell内部值的填充.if (counterCells == as) {//注意,这里的初始化大小是2,但是只有一个槽位被填充,而剩下一个是null.考虑设计意图也许是为了避免浪费CounterCell[] rs = new CounterCell[2];rs[h & 1] = new CounterCell(x);counterCells = rs;init = true;}} finally {cellsBusy = 0;}if (init)break;}//CounterCell数组为null,但是初始化控制权抢夺失败,则尝试下在基础的baseCount再执行一次caselse if (U.compareAndSwapLong(this, BASECOUNT, v = baseCount, v + x))break; // Fall back on using base}}
private final void transfer(Node<K,V>[] tab, Node<K,V>[] nextTab) {int n = tab.length, stride;//计算出一个线程一次搬运的区间if ((stride = (NCPU > 1) ? (n >>> 3) / NCPU : n) < MIN_TRANSFER_STRIDE)stride = MIN_TRANSFER_STRIDE; // subdivide rangeif (nextTab == null) { // initiatingtry {@SuppressWarnings("unchecked")Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n << 1];nextTab = nt;} catch (Throwable ex) { // try to cope with OOMEsizeCtl = Integer.MAX_VALUE;return;}nextTable = nextTab;transferIndex = n;}int nextn = nextTab.length;ForwardingNode<K,V> fwd = new ForwardingNode<K,V>(nextTab);boolean advance = true;boolean finishing = false; // to ensure sweep before committing nextTabfor (int i = 0, bound = 0;;) {Node<K,V> f; int fh;//这个循环用于确定本次可以转移的槽位的下标范围,主要是通过最后一个else if的cas来完成while (advance) {int nextIndex, nextBound;//本次确定下标范围后还没有转移完成则继续转移if (--i >= bound || finishing)advance = false;//如果没有可以转移的内容了,则准备结束else if ((nextIndex = transferIndex) <= 0) {i = -1;advance = false;}//这个cas成功之后就确定了该线程搬运的槽位的下标范围else if (U.compareAndSwapInt(this, TRANSFERINDEX, nextIndex,nextBound = (nextIndex > stride ?nextIndex - stride : 0))) {bound = nextBound;//i能够取值的范围从nextIndex-1到boundi = nextIndex - 1;advance = false;}}//这边i会大于n的情况主要是参与扩容的线程因为cpu调度的原因导致长时间失去cpu资源,恢复过来后,i=nextIndex-1就会比入参的tab.length要大了if (i < 0 || i >= n || i + n >= nextn) {int sc;if (finishing) {nextTable = null;table = nextTab;sizeCtl = (n << 1) - (n >>> 1);return;}//每个扩容线程在结束扩容后都会执行这个语句来减少自己的计数sizectl的计数,而当最后的线程尝试退出后,sc会等于rs+1.这个值就意味着最后的线程正在提交扩容后的table前做最后一遍检查.//问题在于.这个检查的意义在什么地方,从代码上看不到为何需要检查if (U.compareAndSwapInt(this, SIZECTL, sc = sizeCtl, sc - 1)) {//因为可能存在多个线程参与扩容,那么只有符合下面这个等式的,才是最后一个退出扩容动作的线程if ((sc - 2) != resizeStamp(n) << RESIZE_STAMP_SHIFT)return;finishing = advance = true;//为什么要再次检查这边看不出原因i = n; // recheck before commit}}else if ((f = tabAt(tab, i)) == null)advance = casTabAt(tab, i, null, fwd);else if ((fh = f.hash) == MOVED)advance = true; // already processedelse {synchronized (f) {if (tabAt(tab, i) == f) {Node<K,V> ln, hn;//如果该槽位上的是链表节点if (fh >= 0) {int runBit = fh & n;Node<K,V> lastRun = f;//遍历该槽位上的所有数据,并且确定出一个lastRun.这个lastRun用于该节点以及之后的节点具备相当hash特征(他们的hash值&n都等于一个相同的数字),那么这些节点是需要迁移到nextTable的相同槽位的for (Node<K,V> p = f.next; p != null; p = p.next) {int b = p.hash & n;if (b != runBit) {runBit = b;lastRun = p;}}//这里的ln和hn是用来表达在扩容后原来i位置的节点,要么仍然在i位置,要么在i+n位置.这个是由于2n扩容的数学特性得到的,那么ln就是仍然在i位置的,hn就是在i+n位置的了.具体的说,如果本身hash&n==0,那么扩容后仍然在i位置,也就是ln.否则就是在i+n位置,也就是hn.if (runBit == 0) {ln = lastRun;hn = null;}else {hn = lastRun;ln = null;}//遍历槽位上的节点,分别得到最终的ln节点和hn节点for (Node<K,V> p = f; p != lastRun; p = p.next) {int ph = p.hash; K pk = p.key; V pv = p.val;if ((ph & n) == 0)ln = new Node<K,V>(ph, pk, pv, ln);elsehn = new Node<K,V>(ph, pk, pv, hn);}setTabAt(nextTab, i, ln);setTabAt(nextTab, i + n, hn);setTabAt(tab, i, fwd);advance = true;}//如果是红黑树节点,题主完全看不懂红黑树,这段代码放弃else if (f instanceof TreeBin) {TreeBin<K,V> t = (TreeBin<K,V>)f;TreeNode<K,V> lo = null, loTail = null;TreeNode<K,V> hi = null, hiTail = null;int lc = 0, hc = 0;for (Node<K,V> e = t.first; e != null; e = e.next) {int h = e.hash;TreeNode<K,V> p = new TreeNode<K,V>(h, e.key, e.val, null, null);if ((h & n) == 0) {if ((p.prev = loTail) == null)lo = p;elseloTail.next = p;loTail = p;++lc;}else {if ((p.prev = hiTail) == null)hi = p;elsehiTail.next = p;hiTail = p;++hc;}}ln = (lc <= UNTREEIFY_THRESHOLD) ? untreeify(lo) :(hc != 0) ? new TreeBin<K,V>(lo) : t;hn = (hc <= UNTREEIFY_THRESHOLD) ? untreeify(hi) :(lc != 0) ? new TreeBin<K,V>(hi) : t;setTabAt(nextTab, i, ln);setTabAt(nextTab, i + n, hn);setTabAt(tab, i, fwd);advance = true;}}}}}}
final Node<K,V>[] helpTransfer(Node<K,V>[] tab, Node<K,V> f) {Node<K,V>[] nextTab; int sc;//明确符合帮助扩容的基础条件后,尝试将自身线程加入到扩容线程中if (tab != null && (f instanceof ForwardingNode) &&(nextTab = ((ForwardingNode<K,V>)f).nextTable) != null) {int rs = resizeStamp(tab.length);while (nextTab == nextTable && table == tab &&(sc = sizeCtl) < 0) {//(sc >>> RESIZE_STAMP_SHIFT) != rs意味着tab已经变化了//sc == rs + 1意味着扩容已经结束,最后一个要退出的线程正在执行检查流程if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 ||sc == rs + MAX_RESIZERS || transferIndex <= 0)break;if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1)) {transfer(tab, nextTab);break;}}return nextTab;}return table;}
获取元素的方式和之前的版本相差不多,都是通过hash定位到对应的槽位上进行操作.与之前相比就是现在的槽位上可能是一个链表元素,或者一个树元素,还有可能是一种代表正在扩容的特殊Node.
public V get(Object key) {Node<K,V>[] tab; Node<K,V> e, p; int n, eh; K ek;int h = spread(key.hashCode());if ((tab = table) != null && (n = tab.length) > 0 &&(e = tabAt(tab, (n - 1) & h)) != null) {if ((eh = e.hash) == h) {if ((ek = e.key) == key || (ek != null && key.equals(ek)))return e.val;}else if (eh < 0)//需要注意的就是这个地方,槽位上不是链表的时候,通过特殊节点的find方法来进行对应key的寻找,下面以扩容节点来作为说明return (p = e.find(h, key)) != null ? p.val : null;while ((e = e.next) != null) {if (e.hash == h &&((ek = e.key) == key || (ek != null && key.equals(ek))))return e.val;}}return null;}
扩容节点的find方法如下
Node<K,V> find(int h, Object k) {// loop to avoid arbitrarily deep recursion on forwarding nodes//这里的nextTable是类属性,所以必然是有值,代表的下一个转移数据的table,寻找数据的思路和普通的tab没有区别,只不过在寻找的过程也可能发现tab又扩容了,此时碰到ForwandingNode的话,则重新开始循环,而不是直接调用其节点的find方法.根据jdk的注解,这个是为了避免过深的方法嵌套.outer: for (Node<K,V>[] tab = nextTable;;) {Node<K,V> e; int n;if (k == null || tab == null || (n = tab.length) == 0 ||(e = tabAt(tab, (n - 1) & h)) == null)return null;for (;;) {int eh; K ek;if ((eh = e.hash) == h &&((ek = e.key) == k || (ek != null && k.equals(ek))))return e;if (eh < 0) {if (e instanceof ForwardingNode) {tab = ((ForwardingNode<K,V>)e).nextTable;continue outer;}elsereturn e.find(h, k);}if ((e = e.next) == null)return null;}}}