@@ -3112,13 +3112,146 @@ u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig)
31123112}
31133113EXPORT_SYMBOL_FOR_MODULES (cxl_calculate_hpa_offset , "cxl_translate" );
31143114
3115+ static int decode_pos (int region_ways , int hb_ways , int pos , int * pos_port ,
3116+ int * pos_hb )
3117+ {
3118+ int devices_per_hb ;
3119+
3120+ /*
3121+ * Decode for 3-6-12 way interleaves as defined in the CXL
3122+ * Spec 4.0 9.13.1.1 Legal Interleaving Configurations.
3123+ * Region creation should prevent invalid combinations but
3124+ * sanity check here to avoid a silent bad decode.
3125+ */
3126+ switch (hb_ways ) {
3127+ case 3 :
3128+ if (region_ways != 3 && region_ways != 6 && region_ways != 12 )
3129+ return - EINVAL ;
3130+ break ;
3131+ case 6 :
3132+ if (region_ways != 6 && region_ways != 12 )
3133+ return - EINVAL ;
3134+ break ;
3135+ case 12 :
3136+ if (region_ways != 12 )
3137+ return - EINVAL ;
3138+ break ;
3139+ default :
3140+ return - EINVAL ;
3141+ }
3142+ /*
3143+ * Each host bridge contributes an equal number of endpoints
3144+ * that are laid out contiguously per host bridge. Modulo
3145+ * selects the port within a host bridge and division selects
3146+ * the host bridge position.
3147+ */
3148+ devices_per_hb = region_ways / hb_ways ;
3149+ * pos_port = pos % devices_per_hb ;
3150+ * pos_hb = pos / devices_per_hb ;
3151+
3152+ return 0 ;
3153+ }
3154+
3155+ /*
3156+ * restore_parent() reconstruct the address in parent
3157+ *
3158+ * This math, specifically the bitmask creation 'mask = gran - 1' relies
3159+ * on the CXL Spec requirement that interleave granularity is always a
3160+ * power of two.
3161+ *
3162+ * [mask] isolate the offset with the granularity
3163+ * [addr & ~mask] remove the offset leaving the aligned portion
3164+ * [* ways] distribute across all interleave ways
3165+ * [+ (pos * gran)] add the positional offset
3166+ * [+ (addr & mask)] restore the masked offset
3167+ */
3168+ static u64 restore_parent (u64 addr , u64 pos , u64 gran , u64 ways )
3169+ {
3170+ u64 mask = gran - 1 ;
3171+
3172+ return ((addr & ~mask ) * ways ) + (pos * gran ) + (addr & mask );
3173+ }
3174+
3175+ /*
3176+ * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
3177+ * start address is not aligned at Host Bridge Interleave Ways * 256MB.
3178+ *
3179+ * Unaligned start addresses only occur with MOD3 interleaves. All power-
3180+ * of-two interleaves are guaranteed aligned.
3181+ */
3182+ static u64 unaligned_dpa_to_hpa (struct cxl_decoder * cxld ,
3183+ struct cxl_region_params * p , int pos , u64 dpa )
3184+ {
3185+ int ways_port = p -> interleave_ways / cxld -> interleave_ways ;
3186+ int gran_port = p -> interleave_granularity ;
3187+ int gran_hb = cxld -> interleave_granularity ;
3188+ int ways_hb = cxld -> interleave_ways ;
3189+ int pos_port , pos_hb , gran_shift ;
3190+ u64 hpa_port = 0 ;
3191+
3192+ /* Decode an endpoint 'pos' into port and host-bridge components */
3193+ if (decode_pos (p -> interleave_ways , ways_hb , pos , & pos_port , & pos_hb )) {
3194+ dev_dbg (& cxld -> dev , "not supported for region ways:%d\n" ,
3195+ p -> interleave_ways );
3196+ return ULLONG_MAX ;
3197+ }
3198+
3199+ /* Restore the port parent address if needed */
3200+ if (gran_hb != gran_port )
3201+ hpa_port = restore_parent (dpa , pos_port , gran_port , ways_port );
3202+ else
3203+ hpa_port = dpa ;
3204+
3205+ /*
3206+ * Complete the HPA reconstruction by restoring the address as if
3207+ * each HB position is a candidate. Test against expected pos_hb
3208+ * to confirm match.
3209+ */
3210+ gran_shift = ilog2 (gran_hb );
3211+ for (int position = 0 ; position < ways_hb ; position ++ ) {
3212+ u64 shifted , hpa ;
3213+
3214+ hpa = restore_parent (hpa_port , position , gran_hb , ways_hb );
3215+ hpa += p -> res -> start ;
3216+
3217+ shifted = hpa >> gran_shift ;
3218+ if (do_div (shifted , ways_hb ) == pos_hb )
3219+ return hpa ;
3220+ }
3221+
3222+ dev_dbg (& cxld -> dev , "fail dpa:%#llx region:%pr pos:%d\n" , dpa , p -> res ,
3223+ pos );
3224+ dev_dbg (& cxld -> dev , " port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n" ,
3225+ ways_port , gran_port , pos_port , ways_hb , gran_hb , pos_hb );
3226+
3227+ return ULLONG_MAX ;
3228+ }
3229+
3230+ static bool region_is_unaligned_mod3 (struct cxl_region * cxlr )
3231+ {
3232+ struct cxl_root_decoder * cxlrd = to_cxl_root_decoder (cxlr -> dev .parent );
3233+ struct cxl_decoder * cxld = & cxlrd -> cxlsd .cxld ;
3234+ struct cxl_region_params * p = & cxlr -> params ;
3235+ int hbiw = cxld -> interleave_ways ;
3236+ u64 rem ;
3237+
3238+ if (is_power_of_2 (hbiw ))
3239+ return false;
3240+
3241+ div64_u64_rem (p -> res -> start , (u64 )hbiw * SZ_256M , & rem );
3242+
3243+ return (rem != 0 );
3244+ }
3245+
31153246u64 cxl_dpa_to_hpa (struct cxl_region * cxlr , const struct cxl_memdev * cxlmd ,
31163247 u64 dpa )
31173248{
31183249 struct cxl_root_decoder * cxlrd = to_cxl_root_decoder (cxlr -> dev .parent );
3250+ struct cxl_decoder * cxld = & cxlrd -> cxlsd .cxld ;
31193251 struct cxl_region_params * p = & cxlr -> params ;
31203252 struct cxl_endpoint_decoder * cxled = NULL ;
31213253 u64 dpa_offset , hpa_offset , hpa ;
3254+ bool unaligned = false;
31223255 u16 eig = 0 ;
31233256 u8 eiw = 0 ;
31243257 int pos ;
@@ -3132,15 +3265,32 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
31323265 if (!cxled )
31333266 return ULLONG_MAX ;
31343267
3268+ dpa_offset = dpa - cxl_dpa_resource_start (cxled );
3269+
3270+ /* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */
3271+ unaligned = region_is_unaligned_mod3 (cxlr );
3272+ if (unaligned ) {
3273+ hpa = unaligned_dpa_to_hpa (cxld , p , cxled -> pos , dpa_offset );
3274+ if (hpa == ULLONG_MAX )
3275+ return ULLONG_MAX ;
3276+
3277+ goto skip_aligned ;
3278+ }
3279+ /*
3280+ * Aligned calc for all power-of-2 interleaves and for MOD3
3281+ * interleaves that are aligned at hbiw * 256MB
3282+ */
31353283 pos = cxled -> pos ;
31363284 ways_to_eiw (p -> interleave_ways , & eiw );
31373285 granularity_to_eig (p -> interleave_granularity , & eig );
31383286
3139- dpa_offset = dpa - cxl_dpa_resource_start (cxled );
31403287 hpa_offset = cxl_calculate_hpa_offset (dpa_offset , pos , eiw , eig );
31413288
31423289 /* Apply the hpa_offset to the region base address */
3143- hpa = hpa_offset + p -> res -> start + p -> cache_size ;
3290+ hpa = hpa_offset + p -> res -> start ;
3291+
3292+ skip_aligned :
3293+ hpa += p -> cache_size ;
31443294
31453295 /* Root decoder translation overrides typical modulo decode */
31463296 if (cxlrd -> ops .hpa_to_spa )
@@ -3151,9 +3301,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
31513301 "Addr trans fail: hpa 0x%llx not in region\n" , hpa );
31523302 return ULLONG_MAX ;
31533303 }
3154-
3155- /* Simple chunk check, by pos & gran, only applies to modulo decodes */
3156- if (! cxlrd -> ops . hpa_to_spa && !cxl_is_hpa_in_chunk (hpa , cxlr , pos ))
3304+ /* Chunk check applies to aligned modulo decodes only */
3305+ if (! unaligned && ! cxlrd -> ops . hpa_to_spa &&
3306+ !cxl_is_hpa_in_chunk (hpa , cxlr , pos ))
31573307 return ULLONG_MAX ;
31583308
31593309 return hpa ;
0 commit comments