2626#include <xen/events.h>
2727#endif
2828
29+ #include <asm/apic.h>
2930#include <asm/desc.h>
3031#include <asm/traps.h>
3132#include <asm/vdso.h>
@@ -167,7 +168,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
167168 }
168169}
169170
170- /* Handles int $0x80 */
171+ #ifdef CONFIG_IA32_EMULATION
172+ static __always_inline bool int80_is_external (void )
173+ {
174+ const unsigned int offs = (0x80 / 32 ) * 0x10 ;
175+ const u32 bit = BIT (0x80 % 32 );
176+
177+ /* The local APIC on XENPV guests is fake */
178+ if (cpu_feature_enabled (X86_FEATURE_XENPV ))
179+ return false;
180+
181+ /*
182+ * If vector 0x80 is set in the APIC ISR then this is an external
183+ * interrupt. Either from broken hardware or injected by a VMM.
184+ *
185+ * Note: In guest mode this is only valid for secure guests where
186+ * the secure module fully controls the vAPIC exposed to the guest.
187+ */
188+ return apic_read (APIC_ISR + offs ) & bit ;
189+ }
190+
191+ /**
192+ * int80_emulation - 32-bit legacy syscall entry
193+ *
194+ * This entry point can be used by 32-bit and 64-bit programs to perform
195+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
196+ * various programs and libraries. It is also used by the vDSO's
197+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
198+ * entry method. Restarted 32-bit system calls also fall back to INT
199+ * $0x80 regardless of what instruction was originally used to do the
200+ * system call.
201+ *
202+ * This is considered a slow path. It is not used by most libc
203+ * implementations on modern hardware except during process startup.
204+ *
205+ * The arguments for the INT $0x80 based syscall are on stack in the
206+ * pt_regs structure:
207+ * eax: system call number
208+ * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
209+ */
210+ DEFINE_IDTENTRY_RAW (int80_emulation )
211+ {
212+ int nr ;
213+
214+ /* Kernel does not use INT $0x80! */
215+ if (unlikely (!user_mode (regs ))) {
216+ irqentry_enter (regs );
217+ instrumentation_begin ();
218+ panic ("Unexpected external interrupt 0x80\n" );
219+ }
220+
221+ /*
222+ * Establish kernel context for instrumentation, including for
223+ * int80_is_external() below which calls into the APIC driver.
224+ * Identical for soft and external interrupts.
225+ */
226+ enter_from_user_mode (regs );
227+
228+ instrumentation_begin ();
229+ add_random_kstack_offset ();
230+
231+ /* Validate that this is a soft interrupt to the extent possible */
232+ if (unlikely (int80_is_external ()))
233+ panic ("Unexpected external interrupt 0x80\n" );
234+
235+ /*
236+ * The low level idtentry code pushed -1 into regs::orig_ax
237+ * and regs::ax contains the syscall number.
238+ *
239+ * User tracing code (ptrace or signal handlers) might assume
240+ * that the regs::orig_ax contains a 32-bit number on invoking
241+ * a 32-bit syscall.
242+ *
243+ * Establish the syscall convention by saving the 32bit truncated
244+ * syscall number in regs::orig_ax and by invalidating regs::ax.
245+ */
246+ regs -> orig_ax = regs -> ax & GENMASK (31 , 0 );
247+ regs -> ax = - ENOSYS ;
248+
249+ nr = syscall_32_enter (regs );
250+
251+ local_irq_enable ();
252+ nr = syscall_enter_from_user_mode_work (regs , nr );
253+ do_syscall_32_irqs_on (regs , nr );
254+
255+ instrumentation_end ();
256+ syscall_exit_to_user_mode (regs );
257+ }
258+ #else /* CONFIG_IA32_EMULATION */
259+
260+ /* Handles int $0x80 on a 32bit kernel */
171261__visible noinstr void do_int80_syscall_32 (struct pt_regs * regs )
172262{
173263 int nr = syscall_32_enter (regs );
@@ -186,6 +276,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
186276 instrumentation_end ();
187277 syscall_exit_to_user_mode (regs );
188278}
279+ #endif /* !CONFIG_IA32_EMULATION */
189280
190281static noinstr bool __do_fast_syscall_32 (struct pt_regs * regs )
191282{
0 commit comments