Skip to content

Commit 9af8f2b

Browse files
author
Jocelyn Falempe
committed
drm/panic: Add a u64 divide by 10 for arm32
On 32bits ARM, u64 divided by a constant is not optimized to a multiply by inverse by the compiler [1]. So do the multiply by inverse explicitly for this architecture. Link: llvm/llvm-project#37280 [1] Reported-by: Andrei Lalaev <andrey.lalaev@gmail.com> Closes: https://lore.kernel.org/dri-devel/c0a2771c-f3f5-4d4c-aa82-d673b3c5cb46@gmail.com/ Fixes: 675008f ("drm/panic: Use a decimal fifo to avoid u64 by u64 divide") Reviewed-by: Alice Ryhl <aliceryhl@google.com> Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
1 parent 3600772 commit 9af8f2b

1 file changed

Lines changed: 21 additions & 1 deletion

File tree

drivers/gpu/drm/drm_panic_qr.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,26 @@ struct DecFifo {
381381
len: usize,
382382
}
383383

384+
// On arm32 architecture, dividing an `u64` by a constant will generate a call
385+
// to `__aeabi_uldivmod` which is not present in the kernel.
386+
// So use the multiply by inverse method for this architecture.
387+
fn div10(val: u64) -> u64 {
388+
if cfg!(target_arch = "arm") {
389+
let val_h = val >> 32;
390+
let val_l = val & 0xFFFFFFFF;
391+
let b_h: u64 = 0x66666666;
392+
let b_l: u64 = 0x66666667;
393+
394+
let tmp1 = val_h * b_l + ((val_l * b_l) >> 32);
395+
let tmp2 = val_l * b_h + (tmp1 & 0xffffffff);
396+
let tmp3 = val_h * b_h + (tmp1 >> 32) + (tmp2 >> 32);
397+
398+
tmp3 >> 2
399+
} else {
400+
val / 10
401+
}
402+
}
403+
384404
impl DecFifo {
385405
fn push(&mut self, data: u64, len: usize) {
386406
let mut chunk = data;
@@ -389,7 +409,7 @@ impl DecFifo {
389409
}
390410
for i in 0..len {
391411
self.decimals[i] = (chunk % 10) as u8;
392-
chunk /= 10;
412+
chunk = div10(chunk);
393413
}
394414
self.len += len;
395415
}

0 commit comments

Comments
 (0)