From 6bf9041ddcd4613ec0c958ecb44b49cf785e483c Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Tue, 10 Sep 2024 18:04:04 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=86=85=E6=A0=B8EAS?= =?UTF-8?q?=E8=83=BD=E9=87=8F=E6=84=9F=E7=9F=A5=E8=B0=83=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: ma_yulong --- include/linux/sched/sysctl.h | 8 + include/trace/events/eas_sched.h | 76 +++++++++ include/trace/events/sched.h | 4 + init/Kconfig | 21 +++ kernel/sched/core.c | 12 +- kernel/sched/fair.c | 170 ++++++++++++++++++- kernel/sched/rt.c | 281 ++++++++++++++++++++++++++++++- kernel/sched/sched.h | 50 +++++- kernel/sched/topology.c | 14 ++ kernel/sysctl.c | 18 ++ 10 files changed, 647 insertions(+), 7 deletions(-) create mode 100755 include/trace/events/eas_sched.h diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0e098c5a0ff3..546e750f71ee 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -24,6 +24,14 @@ extern int sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif + +#ifdef CONFIG_SCHED_RT_CAS +extern unsigned int sysctl_sched_enable_rt_cas; +#endif +#ifdef CONFIG_SCHED_RT_ACTIVE_LB +extern unsigned int sysctl_sched_enable_rt_active_lb; +#endif + enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_LOG, diff --git a/include/trace/events/eas_sched.h b/include/trace/events/eas_sched.h new file mode 100755 index 000000000000..d015a3cf493d --- /dev/null +++ b/include/trace/events/eas_sched.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_SCHED_RT_CAS +TRACE_EVENT(sched_find_cas_cpu_each, + + TP_PROTO(struct task_struct *task, int cpu, int target_cpu, + int isolated, int idle, unsigned long task_util, + unsigned long cpu_util, int cpu_cap), + + TP_ARGS(task, cpu, target_cpu, isolated, idle, task_util, cpu_util, cpu_cap), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, prio) + __field(int, cpu) + __field(int, target_cpu) + __field(int, isolated) + __field(unsigned long, idle) + __field(unsigned long, task_util) + __field(unsigned long, cpu_util) + __field(unsigned long, cpu_cap) + ), + + TP_fast_assign( + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + __entry->prio = task->prio; + __entry->cpu = cpu; + __entry->target_cpu = target_cpu; + __entry->isolated = isolated; + __entry->idle = idle; + __entry->task_util = task_util; + __entry->cpu_util = cpu_util; + __entry->cpu_cap = cpu_cap; + ), + + TP_printk("comm=%s pid=%d prio=%d cpu=%d target_cpu=%d isolated=%d idle=%lu task_util=%lu cpu_util=%lu cpu_cap=%lu", + __entry->comm, __entry->pid, __entry->prio, + __entry->cpu, __entry->target_cpu, __entry->isolated, + __entry->idle, __entry->task_util, + __entry->cpu_util, __entry->cpu_cap) +); + +TRACE_EVENT(sched_find_cas_cpu, + + TP_PROTO(struct task_struct *task, struct cpumask *lowest_mask, + unsigned long tutil, int prev_cpu, int target_cpu), + + TP_ARGS(task, lowest_mask, tutil, prev_cpu, target_cpu), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(unsigned int, prio) + __bitmask(lowest, num_possible_cpus()) + __field(unsigned long, tutil) + __field(int, prev_cpu) + __field(int, target_cpu) + ), + + TP_fast_assign( + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + __entry->prio = task->prio; + __assign_bitmask(lowest, cpumask_bits(lowest_mask), num_possible_cpus()); + __entry->tutil = tutil; + __entry->prev_cpu = prev_cpu; + __entry->target_cpu = target_cpu; + ), + + TP_printk("comm=%s pid=%d prio=%d lowest_mask=%s tutil=%lu prev=%d target=%d ", + __entry->comm, __entry->pid, __entry->prio, + __get_bitmask(lowest), __entry->tutil, + __entry->prev_cpu, __entry->target_cpu) +); +#endif /* CONFIG_SCHED_RT_CAS */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index df0ace51c578..8a3c787e0cff 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -11,6 +11,10 @@ #include #include +#ifdef CONFIG_SCHED_RT_CAS +#include "eas_sched.h" +#endif + /* * Tracepoint for calling kthread_stop, performed to end a kthread: */ diff --git a/init/Kconfig b/init/Kconfig index da8d4664f1a4..aba34750256f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -890,6 +890,27 @@ config SCHED_LATENCY_NICE This feature use latency nice priority to decide if a cfs task can preempt the current running task. + +config SCHED_EAS + bool "EAS scheduler optimization" + default n + help + Check and migrate the CFS process to a more suitable CPU in the tick. + +config SCHED_RT_CAS + bool "rt-cas optimization" + depends on SCHED_EAS + default n + help + RT task detects capacity during CPU selection + +config SCHED_RT_ACTIVE_LB + bool "RT Capacity Aware Misfit Task" + depends on SCHED_EAS + default n + help + Check and migrate the RT process to a more suitable CPU in the tick. + endmenu # diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4fb4e967cd12..dc3ee249258b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5832,6 +5832,11 @@ void scheduler_tick(void) #ifdef CONFIG_SMP rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq); + +#ifdef CONFIG_SCHED_EAS + if (curr->sched_class->check_for_migration) + curr->sched_class->check_for_migration(rq, curr); +#endif #endif } @@ -9964,14 +9969,14 @@ static void clear_eas_migration_request(int cpu) if (rq->push_task) { struct task_struct *push_task = NULL; - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_lock_irqsave(&rq->__lock, flags); if (rq->push_task) { clear_reserved(rq->push_cpu); push_task = rq->push_task; rq->push_task = NULL; } rq->active_balance = 0; - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_unlock_irqrestore(&rq->__lock, flags); if (push_task) put_task_struct(push_task); } @@ -10423,6 +10428,7 @@ int sched_cpu_starting(unsigned int cpu) sched_core_cpu_starting(cpu); sched_rq_cpu_starting(cpu); sched_tick_start(cpu); + clear_eas_migration_request(cpu); return 0; } @@ -10496,6 +10502,8 @@ int sched_cpu_dying(unsigned int cpu) } rq_unlock_irqrestore(rq, &rf); + clear_eas_migration_request(cpu); + calc_load_migrate(rq); update_max_interval(); hrtick_clear(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 075e006a0df9..0c0680a89052 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4804,6 +4804,34 @@ static inline unsigned long task_util_est(struct task_struct *p) return max(task_util(p), _task_util_est(p)); } +#ifdef CONFIG_UCLAMP_TASK +#ifdef CONFIG_SCHED_RT_CAS +unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) +#else +static inline unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) +#endif +{ + return clamp(task_util_est(p), uclamp_min, uclamp_max); +} +#else +#ifdef CONFIG_SCHED_RT_CAS +unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) +#else +static inline unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) +#endif +{ + return task_util_est(p); +} +#endif + static inline void util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) { @@ -11775,9 +11803,13 @@ static int active_load_balance_cpu_stop(void *data) int busiest_cpu = cpu_of(busiest_rq); int target_cpu = busiest_rq->push_cpu; struct rq *target_rq = cpu_rq(target_cpu); - struct sched_domain *sd; + struct sched_domain *sd = NULL; struct task_struct *p = NULL; struct rq_flags rf; +#ifdef CONFIG_SCHED_EAS + struct task_struct *push_task; + int push_task_detached = 0; +#endif rq_lock_irq(busiest_rq, &rf); /* @@ -11803,6 +11835,31 @@ static int active_load_balance_cpu_stop(void *data) * Bjorn Helgaas on a 128-CPU setup. */ WARN_ON_ONCE(busiest_rq == target_rq); +#ifdef CONFIG_SCHED_EAS + push_task = busiest_rq->push_task; + target_cpu = busiest_rq->push_cpu; + if (push_task) { + struct lb_env env = { + .sd = sd, + .dst_cpu = target_cpu, + .dst_rq = target_rq, + .src_cpu = busiest_rq->cpu, + .src_rq = busiest_rq, + .idle = CPU_IDLE, + .flags = 0, + .loop = 0, + }; + if (task_on_rq_queued(push_task) && + push_task->__state == TASK_RUNNING && + task_cpu(push_task) == busiest_cpu && + cpu_online(target_cpu)) { + update_rq_clock(busiest_rq); + detach_task(push_task, &env); + push_task_detached = 1; + } + goto out_unlock; + } +#endif /* Search for an sd spanning us and the target CPU. */ rcu_read_lock(); @@ -11837,8 +11894,23 @@ static int active_load_balance_cpu_stop(void *data) rcu_read_unlock(); out_unlock: busiest_rq->active_balance = 0; + +#ifdef CONFIG_SCHED_EAS + push_task = busiest_rq->push_task; + if (push_task) + busiest_rq->push_task = NULL; +#endif rq_unlock(busiest_rq, &rf); +#ifdef CONFIG_SCHED_EAS + if (push_task) { + if (push_task_detached) + attach_one_task(target_rq, push_task); + + put_task_struct(push_task); + } +#endif + if (p) attach_one_task(target_rq, p); @@ -12697,6 +12769,98 @@ static void rq_offline_fair(struct rq *rq) unthrottle_offline_cfs_rqs(rq); } +#ifdef CONFIG_SCHED_EAS +static inline int +kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu) +{ + unsigned long flags; + int rc = 0; + + if (cpu_of(rq) == new_cpu) + return rc; + + /* Invoke active balance to force migrate currently running task */ + raw_spin_lock_irqsave(&rq->__lock, flags); + if (!rq->active_balance) { + rq->active_balance = 1; + rq->push_cpu = new_cpu; + get_task_struct(p); + rq->push_task = p; + rc = 1; + } + raw_spin_unlock_irqrestore(&rq->__lock, flags); + return rc; +} + +DEFINE_RAW_SPINLOCK(migration_lock); +static void check_for_migration_fair(struct rq *rq, struct task_struct *p) +{ + int active_balance; + int new_cpu = -1; + int prev_cpu = task_cpu(p); + int ret; + +#ifdef CONFIG_SCHED_RTG + bool need_down_migrate = false; + struct cpumask *rtg_target = find_rtg_target(p); + + if (rtg_target && + (capacity_orig_of(prev_cpu) > + capacity_orig_of(cpumask_first(rtg_target)))) + need_down_migrate = true; +#endif + + if (rq->misfit_task_load) { + if (rq->curr->__state != TASK_RUNNING || + rq->curr->nr_cpus_allowed == 1) + return; + + raw_spin_lock(&migration_lock); +#ifdef CONFIG_SCHED_RTG + if (rtg_target) { + new_cpu = find_rtg_cpu(p); + + if (new_cpu != -1 && need_down_migrate && + cpumask_test_cpu(new_cpu, rtg_target) && + idle_cpu(new_cpu)) + goto do_active_balance; + + if (new_cpu != -1 && + capacity_orig_of(new_cpu) > capacity_orig_of(prev_cpu)) + goto do_active_balance; + + goto out_unlock; + } +#endif + rcu_read_lock(); + new_cpu = find_energy_efficient_cpu(p, prev_cpu); + rcu_read_unlock(); + + if (new_cpu == -1 || + capacity_orig_of(new_cpu) <= capacity_orig_of(prev_cpu)) + goto out_unlock; +#ifdef CONFIG_SCHED_RTG +do_active_balance: +#endif + active_balance = kick_active_balance(rq, p, new_cpu); + if (active_balance) { + mark_reserved(new_cpu); + raw_spin_unlock(&migration_lock); + ret = stop_one_cpu_nowait(prev_cpu, + active_load_balance_cpu_stop, rq, + &rq->active_balance_work); + if (!ret) + clear_reserved(new_cpu); + else + wake_up_if_idle(new_cpu); + return; + } +out_unlock: + raw_spin_unlock(&migration_lock); + } +} +#endif /* CONFIG_SCHED_EAS */ + #endif /* CONFIG_SMP */ #ifdef CONFIG_SCHED_CORE @@ -13403,7 +13567,9 @@ DEFINE_SCHED_CLASS(fair) = { #ifdef CONFIG_SCHED_WALT .fixup_walt_sched_stats = walt_fixup_sched_stats_fair, #endif - +#ifdef CONFIG_SCHED_EAS + .check_for_migration = check_for_migration_fair, +#endif }; #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 48b93b020e75..3f281c4ed821 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -14,6 +14,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; +#ifdef CONFIG_SCHED_RT_CAS +unsigned int sysctl_sched_enable_rt_cas = 1; +#endif + +#ifdef CONFIG_SCHED_RT_ACTIVE_LB +unsigned int sysctl_sched_enable_rt_active_lb = 1; +#endif + /* * period over which we measure -rt task CPU usage in us. * default: 1s @@ -1649,6 +1657,9 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags) test = curr && unlikely(rt_task(curr)) && (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); +#ifdef CONFIG_SCHED_RT_CAS + test |= sysctl_sched_enable_rt_cas; +#endif if (test || !rt_task_fits_capacity(p, cpu)) { int target = find_lowest_rq(p); @@ -1664,8 +1675,11 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags) * Don't bother moving it if the destination CPU is * not running a lower priority task. */ - if (target != -1 && - p->prio < cpu_rq(target)->rt.highest_prio.curr) + if (target != -1 && ( +#ifdef CONFIG_SCHED_RT_CAS + sysctl_sched_enable_rt_cas || +#endif + p->prio < cpu_rq(target)->rt.highest_prio.curr)) cpu = target; } @@ -1884,6 +1898,170 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) return NULL; } +#ifdef CONFIG_SCHED_RT_CAS +static int find_cas_cpu(struct sched_domain *sd, + struct task_struct *task, struct cpumask *lowest_mask) +{ + struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + struct sched_group *sg = NULL; + struct sched_group *sg_target = NULL; + struct sched_group *sg_backup = NULL; + struct cpumask search_cpu, backup_search_cpu; + int cpu = -1; + int target_cpu = -1; + unsigned long cpu_capacity; + unsigned long boosted_tutil = uclamp_task_util(task, uclamp_eff_value(task, UCLAMP_MIN), uclamp_eff_value(task, UCLAMP_MAX)); + unsigned long target_capacity = ULONG_MAX; + unsigned long util; + unsigned long target_cpu_util = ULONG_MAX; + int prev_cpu = task_cpu(task); +#ifdef CONFIG_SCHED_RTG + struct cpumask *rtg_target = NULL; +#endif + bool boosted = uclamp_boosted(task); + + if (!sysctl_sched_enable_rt_cas) + return -1; + + rcu_read_lock(); + +#ifdef CONFIG_SCHED_RTG + rtg_target = find_rtg_target(task); +#endif + + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, 0)); + if (!sd) { + rcu_read_unlock(); + return -1; + } + + sg = sd->groups; + do { + if (!cpumask_intersects(lowest_mask, sched_group_span(sg))) + continue; + + if (boosted) { + if (cpumask_test_cpu(rd->max_cap_orig_cpu, + sched_group_span(sg))) { + sg_target = sg; + break; + } + } + + cpu = group_first_cpu(sg); +#ifdef CONFIG_SCHED_RTG + /* honor the rtg tasks */ + if (rtg_target) { + if (cpumask_test_cpu(cpu, rtg_target)) { + sg_target = sg; + break; + } + + /* active LB or big_task favor cpus with more capacity */ + if (task->state == TASK_RUNNING || boosted) { + if (capacity_orig_of(cpu) > + capacity_orig_of(cpumask_any(rtg_target))) { + sg_target = sg; + break; + } + + sg_backup = sg; + continue; + } + } +#endif + /* + * 1. add margin to support task migration + * 2. if task_util is high then all cpus, make sure the + * sg_backup with the most powerful cpus is selected + */ + if (!rt_task_fits_capacity(task, cpu)) { + sg_backup = sg; + continue; + } + + /* support task boost */ + cpu_capacity = capacity_orig_of(cpu); + if (boosted_tutil > cpu_capacity) { + sg_backup = sg; + continue; + } + + /* sg_target: select the sg with smaller capacity */ + if (cpu_capacity < target_capacity) { + target_capacity = cpu_capacity; + sg_target = sg; + } + } while (sg = sg->next, sg != sd->groups); + + if (!sg_target) + sg_target = sg_backup; + + if (sg_target) { + cpumask_and(&search_cpu, lowest_mask, sched_group_span(sg_target)); + cpumask_copy(&backup_search_cpu, lowest_mask); + cpumask_andnot(&backup_search_cpu, &backup_search_cpu, &search_cpu); + } else { + cpumask_copy(&search_cpu, lowest_mask); + cpumask_clear(&backup_search_cpu); + } + +retry: + cpu = cpumask_first(&search_cpu); + do { + trace_sched_find_cas_cpu_each(task, cpu, target_cpu, + cpu_isolated(cpu), + idle_cpu(cpu), boosted_tutil, cpu_util_cfs(cpu), + capacity_orig_of(cpu)); + + if (cpu_isolated(cpu)) + continue; + + if (!cpumask_test_cpu(cpu, task->cpus_ptr)) + continue; + + /* find best cpu with smallest max_capacity */ + if (target_cpu != -1 && + capacity_orig_of(cpu) > capacity_orig_of(target_cpu)) + continue; + + util = cpu_util_cfs(cpu); + + /* Find the least loaded CPU */ + if (util > target_cpu_util) + continue; + + /* + * If the preivous CPU has same load, keep it as + * target_cpu + */ + if (target_cpu_util == util && target_cpu == prev_cpu) + continue; + + /* + * If candidate CPU is the previous CPU, select it. + * If all above conditions are same, select the least + * cumulative window demand CPU. + */ + target_cpu_util = util; + target_cpu = cpu; + } while ((cpu = cpumask_next(cpu, &search_cpu)) < nr_cpu_ids); + + if (target_cpu != -1 && cpumask_test_cpu(target_cpu, lowest_mask)) { + goto done; + } else if (!cpumask_empty(&backup_search_cpu)) { + cpumask_copy(&search_cpu, &backup_search_cpu); + cpumask_clear(&backup_search_cpu); + goto retry; + } + +done: + trace_sched_find_cas_cpu(task, lowest_mask, boosted_tutil, prev_cpu, target_cpu); + rcu_read_unlock(); + return target_cpu; +} +#endif + static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static int find_lowest_rq(struct task_struct *task) @@ -1893,6 +2071,9 @@ static int find_lowest_rq(struct task_struct *task) int this_cpu = smp_processor_id(); int cpu = task_cpu(task); int ret; +#ifdef CONFIG_SCHED_RT_CAS + int cas_cpu; +#endif /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) @@ -1919,6 +2100,12 @@ static int find_lowest_rq(struct task_struct *task) if (!ret) return -1; /* No targets found */ +#ifdef CONFIG_SCHED_RT_CAS + cas_cpu = find_cas_cpu(sd, task, lowest_mask); + if (cas_cpu != -1) + return cas_cpu; +#endif + /* * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect @@ -2684,6 +2871,93 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) } } +#ifdef CONFIG_SCHED_RT_ACTIVE_LB +static int rt_active_load_balance_cpu_stop(void *data) +{ + struct rq *busiest_rq = data; + struct task_struct *next_task = busiest_rq->rt_push_task; + struct rq *lowest_rq = NULL; + unsigned long flags; + + raw_spin_lock_irqsave(&busiest_rq->__lock, flags); + busiest_rq->rt_active_balance = 0; + + if (!task_on_rq_queued(next_task) || + task_cpu(next_task) != cpu_of(busiest_rq)) + goto out; + + /* find_lock_lowest_rq locks the rq if found */ + lowest_rq = find_lock_lowest_rq(next_task, busiest_rq); + if (!lowest_rq) + goto out; + + if (capacity_orig_of(cpu_of(lowest_rq)) <= capacity_orig_of(task_cpu(next_task))) + goto unlock; + + deactivate_task(busiest_rq, next_task, 0); + set_task_cpu(next_task, lowest_rq->cpu); + activate_task(lowest_rq, next_task, 0); + + resched_curr(lowest_rq); +unlock: + double_unlock_balance(busiest_rq, lowest_rq); +out: + put_task_struct(next_task); + raw_spin_unlock_irqrestore(&busiest_rq->__lock, flags); + + return 0; +} + +static void check_for_migration_rt(struct rq *rq, struct task_struct *p) +{ + bool need_actvie_lb = false; + bool misfit_task = false; + int cpu = task_cpu(p); + unsigned long cpu_orig_cap; +#ifdef CONFIG_SCHED_RTG + struct cpumask *rtg_target = NULL; +#endif + + if (!sysctl_sched_enable_rt_active_lb) + return; + + if (p->nr_cpus_allowed == 1) + return; + + cpu_orig_cap = capacity_orig_of(cpu); + /* cpu has max capacity, no need to do balance */ + if (cpu_orig_cap == rq->rd->max_cpu_capacity) + return; + +#ifdef CONFIG_SCHED_RTG + rtg_target = find_rtg_target(p); + if (rtg_target) + misfit_task = capacity_orig_of(cpumask_first(rtg_target)) > + cpu_orig_cap; + else + misfit_task = !rt_task_fits_capacity(p, cpu); +#else + misfit_task = !rt_task_fits_capacity(p, cpu); +#endif + + if (misfit_task) { + raw_spin_lock(&rq->__lock); + if (!rq->active_balance && !rq->rt_active_balance) { + rq->rt_active_balance = 1; + rq->rt_push_task = p; + get_task_struct(p); + need_actvie_lb = true; + } + raw_spin_unlock(&rq->__lock); + + if (need_actvie_lb) + stop_one_cpu_nowait(task_cpu(p), + rt_active_load_balance_cpu_stop, + rq, &rq->rt_active_balance_work); + } +} +#endif + static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) { /* @@ -2753,6 +3027,9 @@ DEFINE_SCHED_CLASS(rt) = { #ifdef CONFIG_SCHED_WALT .fixup_walt_sched_stats = fixup_walt_sched_stats_common, #endif +#ifdef CONFIG_SCHED_RT_ACTIVE_LB + .check_for_migration = check_for_migration_rt, +#endif }; #ifdef CONFIG_RT_GROUP_SCHED diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 484a34ddc821..e17bf7cb98d7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -101,6 +101,12 @@ struct rq; struct cpuidle_state; +#ifdef CONFIG_SCHED_RT_CAS +extern unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max); +#endif + #ifdef CONFIG_SCHED_WALT extern unsigned int sched_ravg_window; extern unsigned int walt_cpu_util_freq_divisor; @@ -970,6 +976,9 @@ struct root_domain { * CPUs of the rd. Protected by RCU. */ struct perf_domain __rcu *pd; +#ifdef CONFIG_SCHED_RT_CAS + int max_cap_orig_cpu; +#endif }; extern void init_defrootdomain(void); @@ -1141,8 +1150,18 @@ struct rq { /* For active balancing */ int active_balance; int push_cpu; +#ifdef CONFIG_SCHED_EAS + struct task_struct *push_task; +#endif struct cpu_stop_work active_balance_work; + /* For rt active balancing */ +#ifdef CONFIG_SCHED_RT_ACTIVE_LB + int rt_active_balance; + struct task_struct *rt_push_task; + struct cpu_stop_work rt_active_balance_work; +#endif + /* CPU of this runqueue: */ int cpu; int online; @@ -2041,6 +2060,15 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg) return to_cpumask(sg->sgc->cpumask); } +/** + * group_first_cpu - Returns the first CPU in the cpumask of a sched_group. + * @group: The group whose first CPU is to be returned. + */ +static inline unsigned int group_first_cpu(struct sched_group *group) +{ + return cpumask_first(sched_group_span(group)); +} + extern int group_balance_cpu(struct sched_group *sg); #ifdef CONFIG_SCHED_DEBUG @@ -2403,7 +2431,9 @@ struct sched_class { void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p, u16 updated_demand_scaled); #endif - +#ifdef CONFIG_SCHED_EAS + void (*check_for_migration)(struct rq *rq, struct task_struct *p); +#endif #ifdef CONFIG_SCHED_CORE int (*task_is_throttled)(struct task_struct *p, int cpu); #endif @@ -3249,6 +3279,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, return clamp(util, min_util, max_util); } +static inline bool uclamp_boosted(struct task_struct *p) +{ + return uclamp_eff_value(p, UCLAMP_MIN) > 0; +} + /* Is the rq being capped/throttled by uclamp_max? */ static inline bool uclamp_rq_is_capped(struct rq *rq) { @@ -3293,6 +3328,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, return util; } +static inline bool uclamp_boosted(struct task_struct *p) +{ + return false; +} + static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; } static inline bool uclamp_is_used(void) @@ -3412,6 +3452,14 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) extern void swake_up_all_locked(struct swait_queue_head *q); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); +#ifdef CONFIG_SCHED_RTG +extern bool task_fits_max(struct task_struct *p, int cpu); +extern unsigned long capacity_spare_without(int cpu, struct task_struct *p); +extern int update_preferred_cluster(struct related_thread_group *grp, + struct task_struct *p, u32 old_load, bool from_tick); +extern struct cpumask *find_rtg_target(struct task_struct *p); +#endif + extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags); #ifdef CONFIG_PREEMPT_DYNAMIC diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index c530731a755c..c46367544fc1 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -558,6 +558,10 @@ static int init_rootdomain(struct root_domain *rd) if (cpupri_init(&rd->cpupri) != 0) goto free_cpudl; + +#ifdef CONFIG_SCHED_RT_CAS + rd->max_cap_orig_cpu = -1; +#endif return 0; free_cpudl: @@ -2491,9 +2495,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att /* Attach the domains */ rcu_read_lock(); for_each_cpu(i, cpu_map) { +#ifdef CONFIG_SCHED_RT_CAS + int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu); +#endif + rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); +#ifdef CONFIG_SCHED_RT_CAS + if (max_cpu < 0 || arch_scale_cpu_capacity(i) > + arch_scale_cpu_capacity(max_cpu)) + WRITE_ONCE(d.rd->max_cap_orig_cpu, i); +#endif + /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b0151dbbd162..3cacf8cfaa59 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1623,6 +1623,24 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_RT_CAS + { + .procname = "sched_enable_rt_cas", + .data = &sysctl_sched_enable_rt_cas, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_SCHED_RT_ACTIVE_LB + { + .procname = "sched_enable_rt_active_lb", + .data = &sysctl_sched_enable_rt_active_lb, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_SCHED_WALT { .procname = "sched_use_walt_cpu_util", -- Gitee