|
18 | 18 | #include <asm/cacheflush.h> |
19 | 19 | #include <asm/cpufeature.h> |
20 | 20 | #include <asm/hwcap.h> |
| 21 | +#include <asm/hwprobe.h> |
21 | 22 | #include <asm/patch.h> |
22 | 23 | #include <asm/processor.h> |
23 | 24 | #include <asm/vector.h> |
24 | 25 |
|
| 26 | +#include "copy-unaligned.h" |
| 27 | + |
25 | 28 | #define NUM_ALPHA_EXTS ('z' - 'a' + 1) |
26 | 29 |
|
| 30 | +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 |
| 31 | +#define MISALIGNED_BUFFER_SIZE 0x4000 |
| 32 | +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) |
| 33 | + |
27 | 34 | unsigned long elf_hwcap __read_mostly; |
28 | 35 |
|
29 | 36 | /* Host ISA bitmap */ |
@@ -549,6 +556,103 @@ unsigned long riscv_get_elf_hwcap(void) |
549 | 556 | return hwcap; |
550 | 557 | } |
551 | 558 |
|
| 559 | +void check_unaligned_access(int cpu) |
| 560 | +{ |
| 561 | + u64 start_cycles, end_cycles; |
| 562 | + u64 word_cycles; |
| 563 | + u64 byte_cycles; |
| 564 | + int ratio; |
| 565 | + unsigned long start_jiffies, now; |
| 566 | + struct page *page; |
| 567 | + void *dst; |
| 568 | + void *src; |
| 569 | + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; |
| 570 | + |
| 571 | + page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); |
| 572 | + if (!page) { |
| 573 | + pr_warn("Can't alloc pages to measure memcpy performance"); |
| 574 | + return; |
| 575 | + } |
| 576 | + |
| 577 | + /* Make an unaligned destination buffer. */ |
| 578 | + dst = (void *)((unsigned long)page_address(page) | 0x1); |
| 579 | + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ |
| 580 | + src = dst + (MISALIGNED_BUFFER_SIZE / 2); |
| 581 | + src += 2; |
| 582 | + word_cycles = -1ULL; |
| 583 | + /* Do a warmup. */ |
| 584 | + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 585 | + preempt_disable(); |
| 586 | + start_jiffies = jiffies; |
| 587 | + while ((now = jiffies) == start_jiffies) |
| 588 | + cpu_relax(); |
| 589 | + |
| 590 | + /* |
| 591 | + * For a fixed amount of time, repeatedly try the function, and take |
| 592 | + * the best time in cycles as the measurement. |
| 593 | + */ |
| 594 | + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 595 | + start_cycles = get_cycles64(); |
| 596 | + /* Ensure the CSR read can't reorder WRT to the copy. */ |
| 597 | + mb(); |
| 598 | + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 599 | + /* Ensure the copy ends before the end time is snapped. */ |
| 600 | + mb(); |
| 601 | + end_cycles = get_cycles64(); |
| 602 | + if ((end_cycles - start_cycles) < word_cycles) |
| 603 | + word_cycles = end_cycles - start_cycles; |
| 604 | + } |
| 605 | + |
| 606 | + byte_cycles = -1ULL; |
| 607 | + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 608 | + start_jiffies = jiffies; |
| 609 | + while ((now = jiffies) == start_jiffies) |
| 610 | + cpu_relax(); |
| 611 | + |
| 612 | + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 613 | + start_cycles = get_cycles64(); |
| 614 | + mb(); |
| 615 | + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 616 | + mb(); |
| 617 | + end_cycles = get_cycles64(); |
| 618 | + if ((end_cycles - start_cycles) < byte_cycles) |
| 619 | + byte_cycles = end_cycles - start_cycles; |
| 620 | + } |
| 621 | + |
| 622 | + preempt_enable(); |
| 623 | + |
| 624 | + /* Don't divide by zero. */ |
| 625 | + if (!word_cycles || !byte_cycles) { |
| 626 | + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", |
| 627 | + cpu); |
| 628 | + |
| 629 | + goto out; |
| 630 | + } |
| 631 | + |
| 632 | + if (word_cycles < byte_cycles) |
| 633 | + speed = RISCV_HWPROBE_MISALIGNED_FAST; |
| 634 | + |
| 635 | + ratio = div_u64((byte_cycles * 100), word_cycles); |
| 636 | + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", |
| 637 | + cpu, |
| 638 | + ratio / 100, |
| 639 | + ratio % 100, |
| 640 | + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); |
| 641 | + |
| 642 | + per_cpu(misaligned_access_speed, cpu) = speed; |
| 643 | + |
| 644 | +out: |
| 645 | + __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); |
| 646 | +} |
| 647 | + |
| 648 | +static int check_unaligned_access_boot_cpu(void) |
| 649 | +{ |
| 650 | + check_unaligned_access(0); |
| 651 | + return 0; |
| 652 | +} |
| 653 | + |
| 654 | +arch_initcall(check_unaligned_access_boot_cpu); |
| 655 | + |
552 | 656 | #ifdef CONFIG_RISCV_ALTERNATIVE |
553 | 657 | /* |
554 | 658 | * Alternative patch sites consider 48 bits when determining when to patch |
|
0 commit comments