Skip to content

Commit 529676c

Browse files
edumazethansendc
authored andcommitted
x86/lib: Inline csum_ipv6_magic()
Inline this small helper. It has been observed to consume up to 0.75%, which is significant for such a small function. This should reduce register pressure, as saddr and daddr are often back to back in memory. For instance code inlined in tcp6_gro_receive() will look like: 55a: 48 03 73 28 add 0x28(%rbx),%rsi 55e: 8b 43 70 mov 0x70(%rbx),%eax 561: 29 f8 sub %edi,%eax 563: 0f c8 bswap %eax 565: 89 c0 mov %eax,%eax 567: 48 05 00 06 00 00 add $0x600,%rax 56d: 48 03 46 08 add 0x8(%rsi),%rax 571: 48 13 46 10 adc 0x10(%rsi),%rax 575: 48 13 46 18 adc 0x18(%rsi),%rax 579: 48 13 46 20 adc 0x20(%rsi),%rax 57d: 48 83 d0 00 adc $0x0,%rax 581: 48 89 c6 mov %rax,%rsi 584: 48 c1 ee 20 shr $0x20,%rsi 588: 01 f0 add %esi,%eax 58a: 83 d0 00 adc $0x0,%eax 58d: 89 c6 mov %eax,%esi 58f: 66 31 c0 xor %ax,%ax Surprisingly, this inlining does not seem to bloat kernel text size. It at least two cases[1], it either has no effect or results in a slightly smaller kernel. 1. https://lore.kernel.org/all/CANn89iJzcb_XO9oCApKYfRxsMMmg7BHukRDqWTca3ZLQ8HT0iQ@mail.gmail.com/ [ dhansen: add justification and note about lack of kernel bloat ] Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Dave Hansen <dave.hansen@linux.intel.com> Link: https://patch.msgid.link/20251113154545.594580-1-edumazet@google.com
1 parent 9ace475 commit 529676c

2 files changed

Lines changed: 33 additions & 34 deletions

File tree

arch/x86/include/asm/checksum_64.h

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
*/
1010

1111
#include <linux/compiler.h>
12+
#include <linux/in6.h>
1213
#include <asm/byteorder.h>
1314

1415
/**
@@ -145,6 +146,17 @@ extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
145146
*/
146147
extern __sum16 ip_compute_csum(const void *buff, int len);
147148

149+
static inline unsigned add32_with_carry(unsigned a, unsigned b)
150+
{
151+
asm("addl %2,%0\n\t"
152+
"adcl $0,%0"
153+
: "=r" (a)
154+
: "0" (a), "rm" (b));
155+
return a;
156+
}
157+
158+
#define _HAVE_ARCH_IPV6_CSUM 1
159+
148160
/**
149161
* csum_ipv6_magic - Compute checksum of an IPv6 pseudo header.
150162
* @saddr: source address
@@ -158,20 +170,29 @@ extern __sum16 ip_compute_csum(const void *buff, int len);
158170
* Returns the unfolded 32bit checksum.
159171
*/
160172

161-
struct in6_addr;
173+
static inline __sum16 csum_ipv6_magic(
174+
const struct in6_addr *_saddr, const struct in6_addr *_daddr,
175+
__u32 len, __u8 proto, __wsum sum)
176+
{
177+
const unsigned long *saddr = (const unsigned long *)_saddr;
178+
const unsigned long *daddr = (const unsigned long *)_daddr;
179+
__u64 sum64;
162180

163-
#define _HAVE_ARCH_IPV6_CSUM 1
164-
extern __sum16
165-
csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr,
166-
__u32 len, __u8 proto, __wsum sum);
181+
sum64 = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
182+
(__force __u64)sum;
167183

168-
static inline unsigned add32_with_carry(unsigned a, unsigned b)
169-
{
170-
asm("addl %2,%0\n\t"
171-
"adcl $0,%0"
172-
: "=r" (a)
173-
: "0" (a), "rm" (b));
174-
return a;
184+
asm(" addq %1,%[sum64]\n"
185+
" adcq %2,%[sum64]\n"
186+
" adcq %3,%[sum64]\n"
187+
" adcq %4,%[sum64]\n"
188+
" adcq $0,%[sum64]\n"
189+
190+
: [sum64] "+r" (sum64)
191+
: "m" (saddr[0]), "m" (saddr[1]),
192+
"m" (daddr[0]), "m" (daddr[1]));
193+
194+
return csum_fold(
195+
(__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
175196
}
176197

177198
#define HAVE_ARCH_CSUM_ADD

arch/x86/lib/csum-wrappers_64.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,25 +68,3 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
6868
}
6969
EXPORT_SYMBOL(csum_partial_copy_nocheck);
7070

71-
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
72-
const struct in6_addr *daddr,
73-
__u32 len, __u8 proto, __wsum sum)
74-
{
75-
__u64 rest, sum64;
76-
77-
rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
78-
(__force __u64)sum;
79-
80-
asm(" addq (%[saddr]),%[sum]\n"
81-
" adcq 8(%[saddr]),%[sum]\n"
82-
" adcq (%[daddr]),%[sum]\n"
83-
" adcq 8(%[daddr]),%[sum]\n"
84-
" adcq $0,%[sum]\n"
85-
86-
: [sum] "=r" (sum64)
87-
: "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr));
88-
89-
return csum_fold(
90-
(__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
91-
}
92-
EXPORT_SYMBOL(csum_ipv6_magic);

0 commit comments

Comments
 (0)