|
19 | 19 | #include <asm/cacheflush.h> |
20 | 20 | #include <asm/cpufeature.h> |
21 | 21 | #include <asm/hwcap.h> |
| 22 | +#include <asm/hwprobe.h> |
22 | 23 | #include <asm/patch.h> |
23 | 24 | #include <asm/processor.h> |
24 | 25 | #include <asm/vector.h> |
25 | 26 |
|
| 27 | +#include "copy-unaligned.h" |
| 28 | + |
26 | 29 | #define NUM_ALPHA_EXTS ('z' - 'a' + 1) |
27 | 30 |
|
| 31 | +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 |
| 32 | +#define MISALIGNED_BUFFER_SIZE 0x4000 |
| 33 | +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) |
| 34 | + |
28 | 35 | unsigned long elf_hwcap __read_mostly; |
29 | 36 |
|
30 | 37 | /* Host ISA bitmap */ |
@@ -396,6 +403,103 @@ unsigned long riscv_get_elf_hwcap(void) |
396 | 403 | return hwcap; |
397 | 404 | } |
398 | 405 |
|
| 406 | +void check_unaligned_access(int cpu) |
| 407 | +{ |
| 408 | + u64 start_cycles, end_cycles; |
| 409 | + u64 word_cycles; |
| 410 | + u64 byte_cycles; |
| 411 | + int ratio; |
| 412 | + unsigned long start_jiffies, now; |
| 413 | + struct page *page; |
| 414 | + void *dst; |
| 415 | + void *src; |
| 416 | + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; |
| 417 | + |
| 418 | + page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); |
| 419 | + if (!page) { |
| 420 | + pr_warn("Can't alloc pages to measure memcpy performance"); |
| 421 | + return; |
| 422 | + } |
| 423 | + |
| 424 | + /* Make an unaligned destination buffer. */ |
| 425 | + dst = (void *)((unsigned long)page_address(page) | 0x1); |
| 426 | + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ |
| 427 | + src = dst + (MISALIGNED_BUFFER_SIZE / 2); |
| 428 | + src += 2; |
| 429 | + word_cycles = -1ULL; |
| 430 | + /* Do a warmup. */ |
| 431 | + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 432 | + preempt_disable(); |
| 433 | + start_jiffies = jiffies; |
| 434 | + while ((now = jiffies) == start_jiffies) |
| 435 | + cpu_relax(); |
| 436 | + |
| 437 | + /* |
| 438 | + * For a fixed amount of time, repeatedly try the function, and take |
| 439 | + * the best time in cycles as the measurement. |
| 440 | + */ |
| 441 | + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 442 | + start_cycles = get_cycles64(); |
| 443 | + /* Ensure the CSR read can't reorder WRT to the copy. */ |
| 444 | + mb(); |
| 445 | + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 446 | + /* Ensure the copy ends before the end time is snapped. */ |
| 447 | + mb(); |
| 448 | + end_cycles = get_cycles64(); |
| 449 | + if ((end_cycles - start_cycles) < word_cycles) |
| 450 | + word_cycles = end_cycles - start_cycles; |
| 451 | + } |
| 452 | + |
| 453 | + byte_cycles = -1ULL; |
| 454 | + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 455 | + start_jiffies = jiffies; |
| 456 | + while ((now = jiffies) == start_jiffies) |
| 457 | + cpu_relax(); |
| 458 | + |
| 459 | + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 460 | + start_cycles = get_cycles64(); |
| 461 | + mb(); |
| 462 | + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 463 | + mb(); |
| 464 | + end_cycles = get_cycles64(); |
| 465 | + if ((end_cycles - start_cycles) < byte_cycles) |
| 466 | + byte_cycles = end_cycles - start_cycles; |
| 467 | + } |
| 468 | + |
| 469 | + preempt_enable(); |
| 470 | + |
| 471 | + /* Don't divide by zero. */ |
| 472 | + if (!word_cycles || !byte_cycles) { |
| 473 | + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", |
| 474 | + cpu); |
| 475 | + |
| 476 | + goto out; |
| 477 | + } |
| 478 | + |
| 479 | + if (word_cycles < byte_cycles) |
| 480 | + speed = RISCV_HWPROBE_MISALIGNED_FAST; |
| 481 | + |
| 482 | + ratio = div_u64((byte_cycles * 100), word_cycles); |
| 483 | + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", |
| 484 | + cpu, |
| 485 | + ratio / 100, |
| 486 | + ratio % 100, |
| 487 | + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); |
| 488 | + |
| 489 | + per_cpu(misaligned_access_speed, cpu) = speed; |
| 490 | + |
| 491 | +out: |
| 492 | + __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); |
| 493 | +} |
| 494 | + |
| 495 | +static int check_unaligned_access_boot_cpu(void) |
| 496 | +{ |
| 497 | + check_unaligned_access(0); |
| 498 | + return 0; |
| 499 | +} |
| 500 | + |
| 501 | +arch_initcall(check_unaligned_access_boot_cpu); |
| 502 | + |
399 | 503 | #ifdef CONFIG_RISCV_ALTERNATIVE |
400 | 504 | /* |
401 | 505 | * Alternative patch sites consider 48 bits when determining when to patch |
|
0 commit comments