@@ -483,38 +483,20 @@ void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
483483
484484int drbd_al_begin_io_nonblock (struct drbd_device * device , struct drbd_interval * i )
485485{
486- struct lru_cache * al = device -> act_log ;
487486 /* for bios crossing activity log extent boundaries,
488487 * we may need to activate two extents in one go */
489488 unsigned first = i -> sector >> (AL_EXTENT_SHIFT - 9 );
490489 unsigned last = i -> size == 0 ? first : (i -> sector + (i -> size >> 9 ) - 1 ) >> (AL_EXTENT_SHIFT - 9 );
491- unsigned nr_al_extents ;
492- unsigned available_update_slots ;
493490 unsigned enr ;
494491
495- D_ASSERT (device , first <= last );
496-
497- nr_al_extents = 1 + last - first ; /* worst case: all touched extends are cold. */
498- available_update_slots = min (al -> nr_elements - al -> used ,
499- al -> max_pending_changes - al -> pending_changes );
500-
501- /* We want all necessary updates for a given request within the same transaction
502- * We could first check how many updates are *actually* needed,
503- * and use that instead of the worst-case nr_al_extents */
504- if (available_update_slots < nr_al_extents ) {
505- /* Too many activity log extents are currently "hot".
506- *
507- * If we have accumulated pending changes already,
508- * we made progress.
509- *
510- * If we cannot get even a single pending change through,
511- * stop the fast path until we made some progress,
512- * or requests to "cold" extents could be starved. */
513- if (!al -> pending_changes )
514- __set_bit (__LC_STARVING , & device -> act_log -> flags );
515- return - ENOBUFS ;
492+ if (i -> partially_in_al_next_enr ) {
493+ D_ASSERT (device , first < i -> partially_in_al_next_enr );
494+ D_ASSERT (device , last >= i -> partially_in_al_next_enr );
495+ first = i -> partially_in_al_next_enr ;
516496 }
517497
498+ D_ASSERT (device , first <= last );
499+
518500 /* Is resync active in this area? */
519501 for (enr = first ; enr <= last ; enr ++ ) {
520502 struct lc_element * tmp ;
@@ -529,14 +511,21 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
529511 }
530512 }
531513
532- /* Checkout the refcounts.
533- * Given that we checked for available elements and update slots above,
534- * this has to be successful. */
514+ /* Try to checkout the refcounts. */
535515 for (enr = first ; enr <= last ; enr ++ ) {
536516 struct lc_element * al_ext ;
537517 al_ext = lc_get_cumulative (device -> act_log , enr );
538- if (!al_ext )
539- drbd_info (device , "LOGIC BUG for enr=%u\n" , enr );
518+
519+ if (!al_ext ) {
520+ /* Did not work. We may have exhausted the possible
521+ * changes per transaction. Or raced with someone
522+ * "locking" it against changes.
523+ * Remember where to continue from.
524+ */
525+ if (enr > first )
526+ i -> partially_in_al_next_enr = enr ;
527+ return - ENOBUFS ;
528+ }
540529 }
541530 return 0 ;
542531}
@@ -556,7 +545,11 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
556545
557546 for (enr = first ; enr <= last ; enr ++ ) {
558547 extent = lc_find (device -> act_log , enr );
559- if (!extent ) {
548+ /* Yes, this masks a bug elsewhere. However, during normal
549+ * operation this is harmless, so no need to crash the kernel
550+ * by the BUG_ON(refcount == 0) in lc_put().
551+ */
552+ if (!extent || extent -> refcnt == 0 ) {
560553 drbd_err (device , "al_complete_io() called on inactive extent %u\n" , enr );
561554 continue ;
562555 }
0 commit comments