Skip to content

Commit fcc1d0d

Browse files
rrendecKAGA-KOKO
authored andcommitted
genirq: Add interrupt redirection infrastructure
Add infrastructure to redirect interrupt handler execution to a different CPU when the current CPU is not part of the interrupt's CPU affinity mask. This is primarily aimed at (de)multiplexed interrupts, where the child interrupt handler runs in the context of the parent interrupt handler, and therefore CPU affinity control for the child interrupt is typically not available. With the new infrastructure, the child interrupt is allowed to freely change its affinity setting, independently of the parent. If the interrupt handler happens to be triggered on an "incompatible" CPU (a CPU that's not part of the child interrupt's affinity mask), the handler is redirected and runs in IRQ work context on a "compatible" CPU. No functional change is being made to any existing irqchip driver, and irqchip drivers must be explicitly modified to use the newly added infrastructure to support interrupt redirection. Originally-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Radu Rendec <rrendec@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/linux-pci/878qpg4o4t.ffs@tglx/ Link: https://patch.msgid.link/20251128212055.1409093-2-rrendec@redhat.com
1 parent 0317e0a commit fcc1d0d

5 files changed

Lines changed: 144 additions & 6 deletions

File tree

include/linux/irq.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
459459
* checks against the supplied affinity mask are not
460460
* required. This is used for CPU hotplug where the
461461
* target CPU is not yet set in the cpu_online_mask.
462+
* @irq_pre_redirect: Optional function to be invoked before redirecting
463+
* an interrupt via irq_work. Called only on CONFIG_SMP.
462464
* @irq_retrigger: resend an IRQ to the CPU
463465
* @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
464466
* @irq_set_wake: enable/disable power-management wake-on of an IRQ
@@ -503,6 +505,7 @@ struct irq_chip {
503505
void (*irq_eoi)(struct irq_data *data);
504506

505507
int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
508+
void (*irq_pre_redirect)(struct irq_data *data);
506509
int (*irq_retrigger)(struct irq_data *data);
507510
int (*irq_set_type)(struct irq_data *data, unsigned int flow_type);
508511
int (*irq_set_wake)(struct irq_data *data, unsigned int on);
@@ -687,6 +690,13 @@ extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data,
687690
extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type);
688691
extern int irq_chip_request_resources_parent(struct irq_data *data);
689692
extern void irq_chip_release_resources_parent(struct irq_data *data);
693+
#ifdef CONFIG_SMP
694+
void irq_chip_pre_redirect_parent(struct irq_data *data);
695+
#endif
696+
#endif
697+
698+
#ifdef CONFIG_SMP
699+
int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force);
690700
#endif
691701

692702
/* Disable or mask interrupts during a kernel kexec */

include/linux/irqdesc.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
#ifndef _LINUX_IRQDESC_H
33
#define _LINUX_IRQDESC_H
44

5-
#include <linux/rcupdate.h>
5+
#include <linux/irq_work.h>
66
#include <linux/kobject.h>
77
#include <linux/mutex.h>
8+
#include <linux/rcupdate.h>
89

910
/*
1011
* Core internal functions to deal with irq descriptors
@@ -29,6 +30,17 @@ struct irqstat {
2930
#endif
3031
};
3132

33+
/**
34+
* struct irq_redirect - interrupt redirection metadata
35+
* @work: Harg irq_work item for handler execution on a different CPU
36+
* @target_cpu: CPU to run irq handler on in case the current CPU is not part
37+
* of the irq affinity mask
38+
*/
39+
struct irq_redirect {
40+
struct irq_work work;
41+
unsigned int target_cpu;
42+
};
43+
3244
/**
3345
* struct irq_desc - interrupt descriptor
3446
* @irq_common_data: per irq and chip data passed down to chip functions
@@ -46,6 +58,7 @@ struct irqstat {
4658
* @threads_handled: stats field for deferred spurious detection of threaded handlers
4759
* @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
4860
* @lock: locking for SMP
61+
* @redirect: Facility for redirecting interrupts via irq_work
4962
* @affinity_hint: hint to user space for preferred irq affinity
5063
* @affinity_notify: context for notification of affinity changes
5164
* @pending_mask: pending rebalanced interrupts
@@ -83,6 +96,7 @@ struct irq_desc {
8396
raw_spinlock_t lock;
8497
struct cpumask *percpu_enabled;
8598
#ifdef CONFIG_SMP
99+
struct irq_redirect redirect;
86100
const struct cpumask *affinity_hint;
87101
struct irq_affinity_notify *affinity_notify;
88102
#ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -185,6 +199,7 @@ int generic_handle_irq_safe(unsigned int irq);
185199
int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq);
186200
int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq);
187201
int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq);
202+
bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq);
188203
#endif
189204

190205
/* Test to see if a driver has successfully requested an irq */

kernel/irq/chip.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1122,7 +1122,7 @@ void irq_cpu_offline(void)
11221122
}
11231123
#endif
11241124

1125-
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
1125+
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
11261126

11271127
#ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS
11281128
/**
@@ -1194,6 +1194,15 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq);
11941194

11951195
#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */
11961196

1197+
#ifdef CONFIG_SMP
1198+
void irq_chip_pre_redirect_parent(struct irq_data *data)
1199+
{
1200+
data = data->parent_data;
1201+
data->chip->irq_pre_redirect(data);
1202+
}
1203+
EXPORT_SYMBOL_GPL(irq_chip_pre_redirect_parent);
1204+
#endif
1205+
11971206
/**
11981207
* irq_chip_set_parent_state - set the state of a parent interrupt.
11991208
*
@@ -1476,6 +1485,17 @@ void irq_chip_release_resources_parent(struct irq_data *data)
14761485
data->chip->irq_release_resources(data);
14771486
}
14781487
EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent);
1488+
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
1489+
1490+
#ifdef CONFIG_SMP
1491+
int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force)
1492+
{
1493+
struct irq_redirect *redir = &irq_data_to_desc(data)->redirect;
1494+
1495+
WRITE_ONCE(redir->target_cpu, cpumask_first(dest));
1496+
return IRQ_SET_MASK_OK;
1497+
}
1498+
EXPORT_SYMBOL_GPL(irq_chip_redirect_set_affinity);
14791499
#endif
14801500

14811501
/**

kernel/irq/irqdesc.c

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,12 @@ static int alloc_masks(struct irq_desc *desc, int node)
7878
return 0;
7979
}
8080

81-
static void desc_smp_init(struct irq_desc *desc, int node,
82-
const struct cpumask *affinity)
81+
static void irq_redirect_work(struct irq_work *work)
82+
{
83+
handle_irq_desc(container_of(work, struct irq_desc, redirect.work));
84+
}
85+
86+
static void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity)
8387
{
8488
if (!affinity)
8589
affinity = irq_default_affinity;
@@ -91,6 +95,7 @@ static void desc_smp_init(struct irq_desc *desc, int node,
9195
#ifdef CONFIG_NUMA
9296
desc->irq_common_data.node = node;
9397
#endif
98+
desc->redirect.work = IRQ_WORK_INIT_HARD(irq_redirect_work);
9499
}
95100

96101
static void free_masks(struct irq_desc *desc)
@@ -766,6 +771,83 @@ int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq)
766771
WARN_ON_ONCE(!in_nmi());
767772
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
768773
}
774+
775+
#ifdef CONFIG_SMP
776+
static bool demux_redirect_remote(struct irq_desc *desc)
777+
{
778+
guard(raw_spinlock)(&desc->lock);
779+
const struct cpumask *m = irq_data_get_effective_affinity_mask(&desc->irq_data);
780+
unsigned int target_cpu = READ_ONCE(desc->redirect.target_cpu);
781+
782+
if (desc->irq_data.chip->irq_pre_redirect)
783+
desc->irq_data.chip->irq_pre_redirect(&desc->irq_data);
784+
785+
/*
786+
* If the interrupt handler is already running on a CPU that's included
787+
* in the interrupt's affinity mask, redirection is not necessary.
788+
*/
789+
if (cpumask_test_cpu(smp_processor_id(), m))
790+
return false;
791+
792+
/*
793+
* The desc->action check protects against IRQ shutdown: __free_irq() sets
794+
* desc->action to NULL while holding desc->lock, which we also hold.
795+
*
796+
* Calling irq_work_queue_on() here is safe w.r.t. CPU unplugging:
797+
* - takedown_cpu() schedules multi_cpu_stop() on all active CPUs,
798+
* including the one that's taken down.
799+
* - multi_cpu_stop() acts like a barrier, which means all active
800+
* CPUs go through MULTI_STOP_DISABLE_IRQ and disable hard IRQs
801+
* *before* the dying CPU runs take_cpu_down() in MULTI_STOP_RUN.
802+
* - Hard IRQs are re-enabled at the end of multi_cpu_stop(), *after*
803+
* the dying CPU has run take_cpu_down() in MULTI_STOP_RUN.
804+
* - Since we run in hard IRQ context, we run either before or after
805+
* take_cpu_down() but never concurrently.
806+
* - If we run before take_cpu_down(), the dying CPU hasn't been marked
807+
* offline yet (it's marked via take_cpu_down() -> __cpu_disable()),
808+
* so the WARN in irq_work_queue_on() can't occur.
809+
* - Furthermore, the work item we queue will be flushed later via
810+
* take_cpu_down() -> cpuhp_invoke_callback_range_nofail() ->
811+
* smpcfd_dying_cpu() -> irq_work_run().
812+
* - If we run after take_cpu_down(), target_cpu has been already
813+
* updated via take_cpu_down() -> __cpu_disable(), which eventually
814+
* calls irq_do_set_affinity() during IRQ migration. So, target_cpu
815+
* no longer points to the dying CPU in this case.
816+
*/
817+
if (desc->action)
818+
irq_work_queue_on(&desc->redirect.work, target_cpu);
819+
820+
return true;
821+
}
822+
#else /* CONFIG_SMP */
823+
static bool demux_redirect_remote(struct irq_desc *desc)
824+
{
825+
return false;
826+
}
827+
#endif
828+
829+
/**
830+
* generic_handle_demux_domain_irq - Invoke the handler for a hardware interrupt
831+
* of a demultiplexing domain.
832+
* @domain: The domain where to perform the lookup
833+
* @hwirq: The hardware interrupt number to convert to a logical one
834+
*
835+
* Returns: True on success, or false if lookup has failed
836+
*/
837+
bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq)
838+
{
839+
struct irq_desc *desc = irq_resolve_mapping(domain, hwirq);
840+
841+
if (unlikely(!desc))
842+
return false;
843+
844+
if (demux_redirect_remote(desc))
845+
return true;
846+
847+
return !handle_irq_desc(desc);
848+
}
849+
EXPORT_SYMBOL_GPL(generic_handle_demux_domain_irq);
850+
769851
#endif
770852

771853
/* Dynamic interrupt handling */

kernel/irq/manage.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ static int __init setup_forced_irqthreads(char *arg)
3535
early_param("threadirqs", setup_forced_irqthreads);
3636
#endif
3737

38+
#ifdef CONFIG_SMP
39+
static inline void synchronize_irqwork(struct irq_desc *desc)
40+
{
41+
/* Synchronize pending or on the fly redirect work */
42+
irq_work_sync(&desc->redirect.work);
43+
}
44+
#else
45+
static inline void synchronize_irqwork(struct irq_desc *desc) { }
46+
#endif
47+
3848
static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state);
3949

4050
static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
@@ -107,7 +117,9 @@ EXPORT_SYMBOL(synchronize_hardirq);
107117

108118
static void __synchronize_irq(struct irq_desc *desc)
109119
{
120+
synchronize_irqwork(desc);
110121
__synchronize_hardirq(desc, true);
122+
111123
/*
112124
* We made sure that no hardirq handler is running. Now verify that no
113125
* threaded handlers are active.
@@ -217,8 +229,7 @@ static inline void irq_validate_effective_affinity(struct irq_data *data) { }
217229

218230
static DEFINE_PER_CPU(struct cpumask, __tmp_mask);
219231

220-
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
221-
bool force)
232+
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
222233
{
223234
struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask);
224235
struct irq_desc *desc = irq_data_to_desc(data);

0 commit comments

Comments
 (0)