@@ -427,6 +427,14 @@ static inline xfarray_idx_t *xfarray_sortinfo_hi(struct xfarray_sortinfo *si)
427427 return xfarray_sortinfo_lo (si ) + si -> max_stack_depth ;
428428}
429429
430+ /* Size of each element in the quicksort pivot array. */
431+ static inline size_t
432+ xfarray_pivot_rec_sz (
433+ struct xfarray * array )
434+ {
435+ return round_up (array -> obj_size , 8 ) + sizeof (xfarray_idx_t );
436+ }
437+
430438/* Allocate memory to handle the sort. */
431439static inline int
432440xfarray_sortinfo_alloc (
@@ -437,8 +445,16 @@ xfarray_sortinfo_alloc(
437445{
438446 struct xfarray_sortinfo * si ;
439447 size_t nr_bytes = sizeof (struct xfarray_sortinfo );
448+ size_t pivot_rec_sz = xfarray_pivot_rec_sz (array );
440449 int max_stack_depth ;
441450
451+ /*
452+ * The median-of-nine pivot algorithm doesn't work if a subset has
453+ * fewer than 9 items. Make sure the in-memory sort will always take
454+ * over for subsets where this wouldn't be the case.
455+ */
456+ BUILD_BUG_ON (XFARRAY_QSORT_PIVOT_NR >= XFARRAY_ISORT_NR );
457+
442458 /*
443459 * Tail-call recursion during the partitioning phase means that
444460 * quicksort will never recurse more than log2(nr) times. We need one
@@ -453,8 +469,10 @@ xfarray_sortinfo_alloc(
453469 /* Each level of quicksort uses a lo and a hi index */
454470 nr_bytes += max_stack_depth * sizeof (xfarray_idx_t ) * 2 ;
455471
456- /* Scratchpad for in-memory sort, or one record for the pivot */
457- nr_bytes += (XFARRAY_ISORT_NR * array -> obj_size );
472+ /* Scratchpad for in-memory sort, or finding the pivot */
473+ nr_bytes += max_t (size_t ,
474+ (XFARRAY_QSORT_PIVOT_NR + 1 ) * pivot_rec_sz ,
475+ XFARRAY_ISORT_NR * array -> obj_size );
458476
459477 si = kvzalloc (nr_bytes , XCHK_GFP_FLAGS );
460478 if (!si )
@@ -632,91 +650,143 @@ static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si)
632650 return xfarray_sortinfo_hi (si ) + si -> max_stack_depth ;
633651}
634652
653+ /* Return a pointer to the start of the pivot array. */
654+ static inline void *
655+ xfarray_sortinfo_pivot_array (
656+ struct xfarray_sortinfo * si )
657+ {
658+ return xfarray_sortinfo_pivot (si ) + si -> array -> obj_size ;
659+ }
660+
661+ /* The xfarray record is stored at the start of each pivot array element. */
662+ static inline void *
663+ xfarray_pivot_array_rec (
664+ void * pa ,
665+ size_t pa_recsz ,
666+ unsigned int pa_idx )
667+ {
668+ return pa + (pa_recsz * pa_idx );
669+ }
670+
671+ /* The xfarray index is stored at the end of each pivot array element. */
672+ static inline xfarray_idx_t *
673+ xfarray_pivot_array_idx (
674+ void * pa ,
675+ size_t pa_recsz ,
676+ unsigned int pa_idx )
677+ {
678+ return xfarray_pivot_array_rec (pa , pa_recsz , pa_idx + 1 ) -
679+ sizeof (xfarray_idx_t );
680+ }
681+
635682/*
636683 * Find a pivot value for quicksort partitioning, swap it with a[lo], and save
637684 * the cached pivot record for the next step.
638685 *
639- * Select the median value from a[lo], a[mid], and a[hi]. Put the median in
640- * a[lo], the lowest in a[mid], and the highest in a[hi] . Using the median of
641- * the three reduces the chances that we pick the worst case pivot value, since
642- * it's likely that our array values are nearly sorted.
686+ * Load evenly-spaced records within the given range into memory, sort them,
687+ * and choose the pivot from the median record . Using multiple points will
688+ * improve the quality of the pivot selection, and hopefully avoid the worst
689+ * quicksort behavior, since our array values are nearly always evenly sorted.
643690 */
644691STATIC int
645692xfarray_qsort_pivot (
646693 struct xfarray_sortinfo * si ,
647694 xfarray_idx_t lo ,
648695 xfarray_idx_t hi )
649696{
650- void * a = xfarray_sortinfo_pivot (si );
651- void * b = xfarray_scratch (si -> array );
652- xfarray_idx_t mid = lo + ((hi - lo ) / 2 );
697+ void * pivot = xfarray_sortinfo_pivot (si );
698+ void * parray = xfarray_sortinfo_pivot_array (si );
699+ void * recp ;
700+ xfarray_idx_t * idxp ;
701+ xfarray_idx_t step = (hi - lo ) / (XFARRAY_QSORT_PIVOT_NR - 1 );
702+ size_t pivot_rec_sz = xfarray_pivot_rec_sz (si -> array );
703+ int i , j ;
653704 int error ;
654705
655- /* if a[mid] < a[lo], swap a[mid] and a[lo]. */
656- error = xfarray_sort_load (si , mid , a );
657- if (error )
658- return error ;
659- error = xfarray_sort_load (si , lo , b );
660- if (error )
661- return error ;
662- if (xfarray_sort_cmp (si , a , b ) < 0 ) {
663- error = xfarray_sort_store (si , lo , a );
664- if (error )
665- return error ;
666- error = xfarray_sort_store (si , mid , b );
667- if (error )
668- return error ;
669- }
706+ ASSERT (step > 0 );
670707
671- /* if a[hi] < a[mid], swap a[mid] and a[hi]. */
672- error = xfarray_sort_load (si , hi , a );
673- if (error )
674- return error ;
675- error = xfarray_sort_load (si , mid , b );
676- if (error )
677- return error ;
678- if (xfarray_sort_cmp (si , a , b ) < 0 ) {
679- error = xfarray_sort_store (si , mid , a );
680- if (error )
681- return error ;
682- error = xfarray_sort_store (si , hi , b );
683- if (error )
684- return error ;
685- } else {
686- goto move_front ;
708+ /*
709+ * Load the xfarray indexes of the records we intend to sample into the
710+ * pivot array.
711+ */
712+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , 0 );
713+ * idxp = lo ;
714+ for (i = 1 ; i < XFARRAY_QSORT_PIVOT_NR - 1 ; i ++ ) {
715+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , i );
716+ * idxp = lo + (i * step );
687717 }
718+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz ,
719+ XFARRAY_QSORT_PIVOT_NR - 1 );
720+ * idxp = hi ;
688721
689- /* if a[mid] < a[lo], swap a[mid] and a[lo]. */
690- error = xfarray_sort_load (si , mid , a );
691- if (error )
692- return error ;
693- error = xfarray_sort_load (si , lo , b );
694- if (error )
695- return error ;
696- if (xfarray_sort_cmp (si , a , b ) < 0 ) {
697- error = xfarray_sort_store (si , lo , a );
698- if (error )
699- return error ;
700- error = xfarray_sort_store (si , mid , b );
722+ /* Load the selected xfarray records into the pivot array. */
723+ for (i = 0 ; i < XFARRAY_QSORT_PIVOT_NR ; i ++ ) {
724+ xfarray_idx_t idx ;
725+
726+ recp = xfarray_pivot_array_rec (parray , pivot_rec_sz , i );
727+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , i );
728+
729+ /* No unset records; load directly into the array. */
730+ if (likely (si -> array -> unset_slots == 0 )) {
731+ error = xfarray_sort_load (si , * idxp , recp );
732+ if (error )
733+ return error ;
734+ continue ;
735+ }
736+
737+ /*
738+ * Load non-null records into the scratchpad without changing
739+ * the xfarray_idx_t in the pivot array.
740+ */
741+ idx = * idxp ;
742+ xfarray_sort_bump_loads (si );
743+ error = xfarray_load_next (si -> array , & idx , recp );
701744 if (error )
702745 return error ;
703746 }
704747
705- move_front :
748+ xfarray_sort_bump_heapsorts (si );
749+ sort (parray , XFARRAY_QSORT_PIVOT_NR , pivot_rec_sz , si -> cmp_fn , NULL );
750+
706751 /*
707- * Move our selected pivot to a[lo]. Recall that a == si->pivot, so
708- * this leaves us with the pivot cached in the sortinfo structure.
752+ * We sorted the pivot array records (which includes the xfarray
753+ * indices) in xfarray record order. The median element of the pivot
754+ * array contains the xfarray record that we will use as the pivot.
755+ * Copy that xfarray record to the designated space.
709756 */
710- error = xfarray_sort_load (si , lo , b );
711- if (error )
712- return error ;
713- error = xfarray_sort_load (si , mid , a );
714- if (error )
715- return error ;
716- error = xfarray_sort_store (si , mid , b );
757+ recp = xfarray_pivot_array_rec (parray , pivot_rec_sz ,
758+ XFARRAY_QSORT_PIVOT_NR / 2 );
759+ memcpy (pivot , recp , si -> array -> obj_size );
760+
761+ /* If the pivot record we chose was already in a[lo] then we're done. */
762+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz ,
763+ XFARRAY_QSORT_PIVOT_NR / 2 );
764+ if (* idxp == lo )
765+ return 0 ;
766+
767+ /*
768+ * Find the cached copy of a[lo] in the pivot array so that we can swap
769+ * a[lo] and a[pivot].
770+ */
771+ for (i = 0 , j = -1 ; i < XFARRAY_QSORT_PIVOT_NR ; i ++ ) {
772+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz , i );
773+ if (* idxp == lo )
774+ j = i ;
775+ }
776+ if (j < 0 ) {
777+ ASSERT (j >= 0 );
778+ return - EFSCORRUPTED ;
779+ }
780+
781+ /* Swap a[lo] and a[pivot]. */
782+ error = xfarray_sort_store (si , lo , pivot );
717783 if (error )
718784 return error ;
719- return xfarray_sort_store (si , lo , a );
785+
786+ recp = xfarray_pivot_array_rec (parray , pivot_rec_sz , j );
787+ idxp = xfarray_pivot_array_idx (parray , pivot_rec_sz ,
788+ XFARRAY_QSORT_PIVOT_NR / 2 );
789+ return xfarray_sort_store (si , * idxp , recp );
720790}
721791
722792/*
@@ -828,7 +898,7 @@ xfarray_sort_load_cached(
828898 * particularly expensive in the kernel.
829899 *
830900 * 2. For arrays with records in arbitrary or user-controlled order, choose the
831- * pivot element using a median-of-three decision tree. This reduces the
901+ * pivot element using a median-of-nine decision tree. This reduces the
832902 * probability of selecting a bad pivot value which causes worst case
833903 * behavior (i.e. partition sizes of 1).
834904 *
0 commit comments