@@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
11131113 if (new_start == lower_32_bits (* prev_start ))
11141114 return ;
11151115
1116+ /*
1117+ * When gt is unparked, we update the gt timestamp and start the ping
1118+ * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1119+ * is unparked, all switched in contexts will have a start time that is
1120+ * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1121+ *
1122+ * If neither gt_stamp nor new_start has rolled over, then the
1123+ * gt_stamp_hi does not need to be adjusted, however if one of them has
1124+ * rolled over, we need to adjust gt_stamp_hi accordingly.
1125+ *
1126+ * The below conditions address the cases of new_start rollover and
1127+ * gt_stamp_last rollover respectively.
1128+ */
11161129 if (new_start < gt_stamp_last &&
11171130 (new_start - gt_stamp_last ) <= POLL_TIME_CLKS )
11181131 gt_stamp_hi ++ ;
@@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
11241137 * prev_start = ((u64 )gt_stamp_hi << 32 ) | new_start ;
11251138}
11261139
1127- static void guc_update_engine_gt_clks (struct intel_engine_cs * engine )
1140+ /*
1141+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1142+ * we run into a race where the value read is inconsistent. Sometimes the
1143+ * inconsistency is in reading the upper MSB bytes of the last_in value when
1144+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1145+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1146+ * determine validity of these values. Instead we read the values multiple times
1147+ * until they are consistent. In test runs, 3 attempts results in consistent
1148+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
1149+ * any new occurences.
1150+ */
1151+ static void __get_engine_usage_record (struct intel_engine_cs * engine ,
1152+ u32 * last_in , u32 * id , u32 * total )
11281153{
11291154 struct guc_engine_usage_record * rec = intel_guc_engine_usage (engine );
1155+ int i = 0 ;
1156+
1157+ do {
1158+ * last_in = READ_ONCE (rec -> last_switch_in_stamp );
1159+ * id = READ_ONCE (rec -> current_context_index );
1160+ * total = READ_ONCE (rec -> total_runtime );
1161+
1162+ if (READ_ONCE (rec -> last_switch_in_stamp ) == * last_in &&
1163+ READ_ONCE (rec -> current_context_index ) == * id &&
1164+ READ_ONCE (rec -> total_runtime ) == * total )
1165+ break ;
1166+ } while (++ i < 6 );
1167+ }
1168+
1169+ static void guc_update_engine_gt_clks (struct intel_engine_cs * engine )
1170+ {
11301171 struct intel_engine_guc_stats * stats = & engine -> stats .guc ;
11311172 struct intel_guc * guc = & engine -> gt -> uc .guc ;
1132- u32 last_switch = rec -> last_switch_in_stamp ;
1133- u32 ctx_id = rec -> current_context_index ;
1134- u32 total = rec -> total_runtime ;
1173+ u32 last_switch , ctx_id , total ;
11351174
11361175 lockdep_assert_held (& guc -> timestamp .lock );
11371176
1177+ __get_engine_usage_record (engine , & last_switch , & ctx_id , & total );
1178+
11381179 stats -> running = ctx_id != ~0U && last_switch ;
11391180 if (stats -> running )
11401181 __extend_last_switch (guc , & stats -> start_gt_clk , last_switch );
@@ -1149,23 +1190,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
11491190 }
11501191}
11511192
1152- static void guc_update_pm_timestamp (struct intel_guc * guc ,
1153- struct intel_engine_cs * engine ,
1154- ktime_t * now )
1193+ static u32 gpm_timestamp_shift (struct intel_gt * gt )
1194+ {
1195+ intel_wakeref_t wakeref ;
1196+ u32 reg , shift ;
1197+
1198+ with_intel_runtime_pm (gt -> uncore -> rpm , wakeref )
1199+ reg = intel_uncore_read (gt -> uncore , RPM_CONFIG0 );
1200+
1201+ shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK ) >>
1202+ GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT ;
1203+
1204+ return 3 - shift ;
1205+ }
1206+
1207+ static u64 gpm_timestamp (struct intel_gt * gt )
1208+ {
1209+ u32 lo , hi , old_hi , loop = 0 ;
1210+
1211+ hi = intel_uncore_read (gt -> uncore , MISC_STATUS1 );
1212+ do {
1213+ lo = intel_uncore_read (gt -> uncore , MISC_STATUS0 );
1214+ old_hi = hi ;
1215+ hi = intel_uncore_read (gt -> uncore , MISC_STATUS1 );
1216+ } while (old_hi != hi && loop ++ < 2 );
1217+
1218+ return ((u64 )hi << 32 ) | lo ;
1219+ }
1220+
1221+ static void guc_update_pm_timestamp (struct intel_guc * guc , ktime_t * now )
11551222{
1156- u32 gt_stamp_now , gt_stamp_hi ;
1223+ struct intel_gt * gt = guc_to_gt (guc );
1224+ u32 gt_stamp_lo , gt_stamp_hi ;
1225+ u64 gpm_ts ;
11571226
11581227 lockdep_assert_held (& guc -> timestamp .lock );
11591228
11601229 gt_stamp_hi = upper_32_bits (guc -> timestamp .gt_stamp );
1161- gt_stamp_now = intel_uncore_read ( engine -> uncore ,
1162- RING_TIMESTAMP ( engine -> mmio_base ) );
1230+ gpm_ts = gpm_timestamp ( gt ) >> guc -> timestamp . shift ;
1231+ gt_stamp_lo = lower_32_bits ( gpm_ts );
11631232 * now = ktime_get ();
11641233
1165- if (gt_stamp_now < lower_32_bits (guc -> timestamp .gt_stamp ))
1234+ if (gt_stamp_lo < lower_32_bits (guc -> timestamp .gt_stamp ))
11661235 gt_stamp_hi ++ ;
11671236
1168- guc -> timestamp .gt_stamp = ((u64 )gt_stamp_hi << 32 ) | gt_stamp_now ;
1237+ guc -> timestamp .gt_stamp = ((u64 )gt_stamp_hi << 32 ) | gt_stamp_lo ;
11691238}
11701239
11711240/*
@@ -1208,8 +1277,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
12081277 if (!in_reset && intel_gt_pm_get_if_awake (gt )) {
12091278 stats_saved = * stats ;
12101279 gt_stamp_saved = guc -> timestamp .gt_stamp ;
1280+ /*
1281+ * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1282+ * start_gt_clk' calculation below for active engines.
1283+ */
12111284 guc_update_engine_gt_clks (engine );
1212- guc_update_pm_timestamp (guc , engine , now );
1285+ guc_update_pm_timestamp (guc , now );
12131286 intel_gt_pm_put_async (gt );
12141287 if (i915_reset_count (gpu_error ) != reset_count ) {
12151288 * stats = stats_saved ;
@@ -1241,8 +1314,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
12411314
12421315 spin_lock_irqsave (& guc -> timestamp .lock , flags );
12431316
1317+ guc_update_pm_timestamp (guc , & unused );
12441318 for_each_engine (engine , gt , id ) {
1245- guc_update_pm_timestamp (guc , engine , & unused );
12461319 guc_update_engine_gt_clks (engine );
12471320 engine -> stats .guc .prev_total = 0 ;
12481321 }
@@ -1259,10 +1332,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
12591332 ktime_t unused ;
12601333
12611334 spin_lock_irqsave (& guc -> timestamp .lock , flags );
1262- for_each_engine (engine , gt , id ) {
1263- guc_update_pm_timestamp (guc , engine , & unused );
1335+
1336+ guc_update_pm_timestamp (guc , & unused );
1337+ for_each_engine (engine , gt , id )
12641338 guc_update_engine_gt_clks (engine );
1265- }
1339+
12661340 spin_unlock_irqrestore (& guc -> timestamp .lock , flags );
12671341}
12681342
@@ -1335,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
13351409void intel_guc_busyness_unpark (struct intel_gt * gt )
13361410{
13371411 struct intel_guc * guc = & gt -> uc .guc ;
1412+ unsigned long flags ;
1413+ ktime_t unused ;
13381414
13391415 if (!guc_submission_initialized (guc ))
13401416 return ;
13411417
1418+ spin_lock_irqsave (& guc -> timestamp .lock , flags );
1419+ guc_update_pm_timestamp (guc , & unused );
1420+ spin_unlock_irqrestore (& guc -> timestamp .lock , flags );
13421421 mod_delayed_work (system_highpri_wq , & guc -> timestamp .work ,
13431422 guc -> timestamp .ping_delay );
13441423}
@@ -1783,6 +1862,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
17831862 spin_lock_init (& guc -> timestamp .lock );
17841863 INIT_DELAYED_WORK (& guc -> timestamp .work , guc_timestamp_ping );
17851864 guc -> timestamp .ping_delay = (POLL_TIME_CLKS / gt -> clock_frequency + 1 ) * HZ ;
1865+ guc -> timestamp .shift = gpm_timestamp_shift (gt );
17861866
17871867 return 0 ;
17881868}
0 commit comments