@@ -14,19 +14,19 @@ Using 'nulls'
1414=============
1515
1616Using special makers (called 'nulls') is a convenient way
17- to solve following problem :
17+ to solve following problem.
1818
19- A typical RCU linked list managing objects which are
20- allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
21- use following algos :
19+ Without 'nulls', a typical RCU linked list managing objects which are
20+ allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
21+ algorithms :
2222
23- 1) Lookup algo
24- --------------
23+ 1) Lookup algorithm
24+ -------------------
2525
2626::
2727
28- rcu_read_lock()
2928 begin:
29+ rcu_read_lock()
3030 obj = lockless_lookup(key);
3131 if (obj) {
3232 if (!try_get_ref(obj)) // might fail for free objects
@@ -38,6 +38,7 @@ use following algos :
3838 */
3939 if (obj->key != key) { // not the object we expected
4040 put_ref(obj);
41+ rcu_read_unlock();
4142 goto begin;
4243 }
4344 }
@@ -52,9 +53,9 @@ but a version with an additional memory barrier (smp_rmb())
5253 {
5354 struct hlist_node *node, *next;
5455 for (pos = rcu_dereference((head)->first);
55- pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
56- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
57- pos = rcu_dereference(next))
56+ pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
57+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
58+ pos = rcu_dereference(next))
5859 if (obj->key == key)
5960 return obj;
6061 return NULL;
@@ -64,9 +65,9 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
6465
6566 struct hlist_node *node;
6667 for (pos = rcu_dereference((head)->first);
67- pos && ({ prefetch(pos->next); 1; }) &&
68- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
69- pos = rcu_dereference(pos->next))
68+ pos && ({ prefetch(pos->next); 1; }) &&
69+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
70+ pos = rcu_dereference(pos->next))
7071 if (obj->key == key)
7172 return obj;
7273 return NULL;
@@ -82,36 +83,32 @@ Quoting Corey Minyard::
8283 solved by pre-fetching the "next" field (with proper barriers) before
8384 checking the key."
8485
85- 2) Insert algo
86- --------------
86+ 2) Insertion algorithm
87+ ----------------------
8788
8889We need to make sure a reader cannot read the new 'obj->obj_next' value
89- and previous value of 'obj->key'. Or else , an item could be deleted
90+ and previous value of 'obj->key'. Otherwise , an item could be deleted
9091from a chain, and inserted into another chain. If new chain was empty
91- before the move, 'next' pointer is NULL, and lockless reader can
92- not detect it missed following items in original chain.
92+ before the move, 'next' pointer is NULL, and lockless reader can not
93+ detect the fact that it missed following items in original chain.
9394
9495::
9596
9697 /*
97- * Please note that new inserts are done at the head of list,
98- * not in the middle or end.
99- */
98+ * Please note that new inserts are done at the head of list,
99+ * not in the middle or end.
100+ */
100101 obj = kmem_cache_alloc(...);
101102 lock_chain(); // typically a spin_lock()
102103 obj->key = key;
103- /*
104- * we need to make sure obj->key is updated before obj->next
105- * or obj->refcnt
106- */
107- smp_wmb();
108- atomic_set(&obj->refcnt, 1);
104+ atomic_set_release(&obj->refcnt, 1); // key before refcnt
109105 hlist_add_head_rcu(&obj->obj_node, list);
110106 unlock_chain(); // typically a spin_unlock()
111107
112108
113- 3) Remove algo
114- --------------
109+ 3) Removal algorithm
110+ --------------------
111+
115112Nothing special here, we can use a standard RCU hlist deletion.
116113But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
117114very very fast (before the end of RCU grace period)
@@ -133,7 +130,7 @@ Avoiding extra smp_rmb()
133130========================
134131
135132With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
136- and extra smp_wmb () in insert function.
133+ and extra _release () in insert function.
137134
138135For example, if we choose to store the slot number as the 'nulls'
139136end-of-list marker for each slot of the hash table, we can detect
@@ -142,59 +139,61 @@ to another chain) checking the final 'nulls' value if
142139the lookup met the end of chain. If final 'nulls' value
143140is not the slot number, then we must restart the lookup at
144141the beginning. If the object was moved to the same chain,
145- then the reader doesn't care : It might eventually
142+ then the reader doesn't care: It might occasionally
146143scan the list again without harm.
147144
148145
149- 1) lookup algo
150- --------------
146+ 1) lookup algorithm
147+ -------------------
151148
152149::
153150
154151 head = &table[slot];
155- rcu_read_lock();
156152 begin:
153+ rcu_read_lock();
157154 hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
158155 if (obj->key == key) {
159- if (!try_get_ref(obj)) // might fail for free objects
156+ if (!try_get_ref(obj)) { // might fail for free objects
157+ rcu_read_unlock();
160158 goto begin;
159+ }
161160 if (obj->key != key) { // not the object we expected
162161 put_ref(obj);
162+ rcu_read_unlock();
163163 goto begin;
164164 }
165- goto out;
165+ goto out;
166+ }
167+ }
168+
169+ // If the nulls value we got at the end of this lookup is
170+ // not the expected one, we must restart lookup.
171+ // We probably met an item that was moved to another chain.
172+ if (get_nulls_value(node) != slot) {
173+ put_ref(obj);
174+ rcu_read_unlock();
175+ goto begin;
166176 }
167- /*
168- * if the nulls value we got at the end of this lookup is
169- * not the expected one, we must restart lookup.
170- * We probably met an item that was moved to another chain.
171- */
172- if (get_nulls_value(node) != slot)
173- goto begin;
174177 obj = NULL;
175178
176179 out:
177180 rcu_read_unlock();
178181
179- 2) Insert function
180- ------------------
182+ 2) Insert algorithm
183+ -------------------
181184
182185::
183186
184187 /*
185- * Please note that new inserts are done at the head of list,
186- * not in the middle or end.
187- */
188+ * Please note that new inserts are done at the head of list,
189+ * not in the middle or end.
190+ */
188191 obj = kmem_cache_alloc(cachep);
189192 lock_chain(); // typically a spin_lock()
190193 obj->key = key;
194+ atomic_set_release(&obj->refcnt, 1); // key before refcnt
191195 /*
192- * changes to obj->key must be visible before refcnt one
193- */
194- smp_wmb();
195- atomic_set(&obj->refcnt, 1);
196- /*
197- * insert obj in RCU way (readers might be traversing chain)
198- */
196+ * insert obj in RCU way (readers might be traversing chain)
197+ */
199198 hlist_nulls_add_head_rcu(&obj->obj_node, list);
200199 unlock_chain(); // typically a spin_unlock()
0 commit comments