From 6bf9041ddcd4613ec0c958ecb44b49cf785e483c Mon Sep 17 00:00:00 2001
From: ma_yulong <ma_yulong@hoperun.com>
Date: Tue, 10 Sep 2024 18:04:04 +0800
Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=86=85=E6=A0=B8EAS?=
 =?UTF-8?q?=E8=83=BD=E9=87=8F=E6=84=9F=E7=9F=A5=E8=B0=83=E5=BA=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: ma_yulong <ma_yulong@hoperun.com>
---
 include/linux/sched/sysctl.h     |   8 +
 include/trace/events/eas_sched.h |  76 +++++++++
 include/trace/events/sched.h     |   4 +
 init/Kconfig                     |  21 +++
 kernel/sched/core.c              |  12 +-
 kernel/sched/fair.c              | 170 ++++++++++++++++++-
 kernel/sched/rt.c                | 281 ++++++++++++++++++++++++++++++-
 kernel/sched/sched.h             |  50 +++++-
 kernel/sched/topology.c          |  14 ++
 kernel/sysctl.c                  |  18 ++
 10 files changed, 647 insertions(+), 7 deletions(-)
 create mode 100755 include/trace/events/eas_sched.h

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 0e098c5a0ff3..546e750f71ee 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -24,6 +24,14 @@ extern int
 sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table,
 		int write, void __user *buffer, size_t *length, loff_t *ppos);
 #endif
+
+#ifdef CONFIG_SCHED_RT_CAS
+extern unsigned int sysctl_sched_enable_rt_cas;
+#endif
+#ifdef CONFIG_SCHED_RT_ACTIVE_LB
+extern unsigned int sysctl_sched_enable_rt_active_lb;
+#endif
+
 enum sched_tunable_scaling {
 	SCHED_TUNABLESCALING_NONE,
 	SCHED_TUNABLESCALING_LOG,
diff --git a/include/trace/events/eas_sched.h b/include/trace/events/eas_sched.h
new file mode 100755
index 000000000000..d015a3cf493d
--- /dev/null
+++ b/include/trace/events/eas_sched.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef CONFIG_SCHED_RT_CAS
+TRACE_EVENT(sched_find_cas_cpu_each,
+
+	TP_PROTO(struct task_struct *task, int cpu, int target_cpu,
+		 int isolated, int idle, unsigned long task_util,
+		 unsigned long cpu_util, int cpu_cap),
+
+	TP_ARGS(task, cpu, target_cpu, isolated, idle, task_util, cpu_util, cpu_cap),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(int, prio)
+		__field(int, cpu)
+		__field(int, target_cpu)
+		__field(int, isolated)
+		__field(unsigned long, idle)
+		__field(unsigned long, task_util)
+		__field(unsigned long, cpu_util)
+		__field(unsigned long, cpu_cap)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->pid		= task->pid;
+		__entry->prio		= task->prio;
+		__entry->cpu		= cpu;
+		__entry->target_cpu	= target_cpu;
+		__entry->isolated	= isolated;
+		__entry->idle		= idle;
+		__entry->task_util	= task_util;
+		__entry->cpu_util	= cpu_util;
+		__entry->cpu_cap	= cpu_cap;
+	),
+
+	TP_printk("comm=%s pid=%d prio=%d cpu=%d target_cpu=%d isolated=%d idle=%lu task_util=%lu cpu_util=%lu cpu_cap=%lu",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->cpu, __entry->target_cpu, __entry->isolated,
+		  __entry->idle, __entry->task_util,
+		  __entry->cpu_util, __entry->cpu_cap)
+);
+
+TRACE_EVENT(sched_find_cas_cpu,
+
+	TP_PROTO(struct task_struct *task, struct cpumask *lowest_mask,
+		 unsigned long tutil,  int prev_cpu, int target_cpu),
+
+	TP_ARGS(task, lowest_mask, tutil, prev_cpu, target_cpu),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(unsigned int, prio)
+		__bitmask(lowest, num_possible_cpus())
+		__field(unsigned long, tutil)
+		__field(int, prev_cpu)
+		__field(int, target_cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->pid		= task->pid;
+		__entry->prio		= task->prio;
+		__assign_bitmask(lowest, cpumask_bits(lowest_mask), num_possible_cpus());
+		__entry->tutil		= tutil;
+		__entry->prev_cpu	= prev_cpu;
+		__entry->target_cpu	= target_cpu;
+	),
+
+	TP_printk("comm=%s pid=%d prio=%d lowest_mask=%s tutil=%lu prev=%d target=%d ",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __get_bitmask(lowest), __entry->tutil,
+		  __entry->prev_cpu, __entry->target_cpu)
+);
+#endif /* CONFIG_SCHED_RT_CAS */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index df0ace51c578..8a3c787e0cff 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -11,6 +11,10 @@
 #include <linux/tracepoint.h>
 #include <linux/binfmts.h>
 
+#ifdef CONFIG_SCHED_RT_CAS
+#include "eas_sched.h"
+#endif
+
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
  */
diff --git a/init/Kconfig b/init/Kconfig
index da8d4664f1a4..aba34750256f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -890,6 +890,27 @@ config SCHED_LATENCY_NICE
 	  This feature use latency nice priority to decide if a cfs task can
 	  preempt the current running task.
 
+
+config SCHED_EAS
+	bool "EAS scheduler optimization"
+	default n
+	help
+	  Check and migrate the CFS process to a more suitable CPU in the tick.
+
+config SCHED_RT_CAS
+	bool "rt-cas optimization"
+	depends on SCHED_EAS
+	default n
+	help
+	  RT task detects capacity during CPU selection
+
+config SCHED_RT_ACTIVE_LB
+	bool "RT Capacity Aware Misfit Task"
+	depends on SCHED_EAS
+	default n
+	help
+	  Check and migrate the RT process to a more suitable CPU in the tick.
+
 endmenu
 
 #
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4fb4e967cd12..dc3ee249258b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5832,6 +5832,11 @@ void scheduler_tick(void)
 #ifdef CONFIG_SMP
 	rq->idle_balance = idle_cpu(cpu);
 	trigger_load_balance(rq);
+
+#ifdef CONFIG_SCHED_EAS
+	if (curr->sched_class->check_for_migration)
+		curr->sched_class->check_for_migration(rq, curr);
+#endif
 #endif
 }
 
@@ -9964,14 +9969,14 @@ static void clear_eas_migration_request(int cpu)
 	if (rq->push_task) {
 		struct task_struct *push_task = NULL;
 
-		raw_spin_lock_irqsave(&rq->lock, flags);
+		raw_spin_lock_irqsave(&rq->__lock, flags);
 		if (rq->push_task) {
 			clear_reserved(rq->push_cpu);
 			push_task = rq->push_task;
 			rq->push_task = NULL;
 		}
 		rq->active_balance = 0;
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
+		raw_spin_unlock_irqrestore(&rq->__lock, flags);
 		if (push_task)
 			put_task_struct(push_task);
 	}
@@ -10423,6 +10428,7 @@ int sched_cpu_starting(unsigned int cpu)
 	sched_core_cpu_starting(cpu);
 	sched_rq_cpu_starting(cpu);
 	sched_tick_start(cpu);
+	clear_eas_migration_request(cpu);
 	return 0;
 }
 
@@ -10496,6 +10502,8 @@ int sched_cpu_dying(unsigned int cpu)
 	}
 	rq_unlock_irqrestore(rq, &rf);
 
+	clear_eas_migration_request(cpu);
+
 	calc_load_migrate(rq);
 	update_max_interval();
 	hrtick_clear(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 075e006a0df9..0c0680a89052 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4804,6 +4804,34 @@ static inline unsigned long task_util_est(struct task_struct *p)
 	return max(task_util(p), _task_util_est(p));
 }
 
+#ifdef CONFIG_UCLAMP_TASK
+#ifdef CONFIG_SCHED_RT_CAS
+unsigned long uclamp_task_util(struct task_struct *p,
+					     unsigned long uclamp_min,
+					     unsigned long uclamp_max)
+#else
+static inline unsigned long uclamp_task_util(struct task_struct *p,
+					     unsigned long uclamp_min,
+					     unsigned long uclamp_max)
+#endif
+{
+	return clamp(task_util_est(p), uclamp_min, uclamp_max);
+}
+#else
+#ifdef CONFIG_SCHED_RT_CAS
+unsigned long uclamp_task_util(struct task_struct *p,
+					     unsigned long uclamp_min,
+					     unsigned long uclamp_max)
+#else
+static inline unsigned long uclamp_task_util(struct task_struct *p,
+					     unsigned long uclamp_min,
+					     unsigned long uclamp_max)
+#endif
+{
+	return task_util_est(p);
+}
+#endif
+
 static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
 				    struct task_struct *p)
 {
@@ -11775,9 +11803,13 @@ static int active_load_balance_cpu_stop(void *data)
 	int busiest_cpu = cpu_of(busiest_rq);
 	int target_cpu = busiest_rq->push_cpu;
 	struct rq *target_rq = cpu_rq(target_cpu);
-	struct sched_domain *sd;
+	struct sched_domain *sd = NULL;
 	struct task_struct *p = NULL;
 	struct rq_flags rf;
+#ifdef CONFIG_SCHED_EAS
+	struct task_struct *push_task;
+	int push_task_detached = 0;
+#endif
 
 	rq_lock_irq(busiest_rq, &rf);
 	/*
@@ -11803,6 +11835,31 @@ static int active_load_balance_cpu_stop(void *data)
 	 * Bjorn Helgaas on a 128-CPU setup.
 	 */
 	WARN_ON_ONCE(busiest_rq == target_rq);
+#ifdef CONFIG_SCHED_EAS
+	push_task = busiest_rq->push_task;
+	target_cpu = busiest_rq->push_cpu;
+	if (push_task) {
+		struct lb_env env = {
+			.sd		= sd,
+			.dst_cpu	= target_cpu,
+			.dst_rq		= target_rq,
+			.src_cpu	= busiest_rq->cpu,
+			.src_rq		= busiest_rq,
+			.idle		= CPU_IDLE,
+			.flags		= 0,
+			.loop		= 0,
+		};
+		if (task_on_rq_queued(push_task) &&
+		    push_task->__state ==  TASK_RUNNING &&
+		    task_cpu(push_task) == busiest_cpu &&
+		    cpu_online(target_cpu)) {
+			update_rq_clock(busiest_rq);
+			detach_task(push_task, &env);
+			push_task_detached = 1;
+		}
+		goto out_unlock;
+	}
+#endif
 
 	/* Search for an sd spanning us and the target CPU. */
 	rcu_read_lock();
@@ -11837,8 +11894,23 @@ static int active_load_balance_cpu_stop(void *data)
 	rcu_read_unlock();
 out_unlock:
 	busiest_rq->active_balance = 0;
+
+#ifdef CONFIG_SCHED_EAS
+	push_task = busiest_rq->push_task;
+	if (push_task)
+		busiest_rq->push_task = NULL;
+#endif
 	rq_unlock(busiest_rq, &rf);
 
+#ifdef CONFIG_SCHED_EAS
+	if (push_task) {
+		if (push_task_detached)
+			attach_one_task(target_rq, push_task);
+
+		put_task_struct(push_task);
+	}
+#endif
+
 	if (p)
 		attach_one_task(target_rq, p);
 
@@ -12697,6 +12769,98 @@ static void rq_offline_fair(struct rq *rq)
 	unthrottle_offline_cfs_rqs(rq);
 }
 
+#ifdef CONFIG_SCHED_EAS
+static inline int
+kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu)
+{
+	unsigned long flags;
+	int rc = 0;
+
+	if (cpu_of(rq) == new_cpu)
+		return rc;
+
+	/* Invoke active balance to force migrate currently running task */
+	raw_spin_lock_irqsave(&rq->__lock, flags);
+	if (!rq->active_balance) {
+		rq->active_balance = 1;
+		rq->push_cpu = new_cpu;
+		get_task_struct(p);
+		rq->push_task = p;
+		rc = 1;
+	}
+	raw_spin_unlock_irqrestore(&rq->__lock, flags);
+	return rc;
+}
+
+DEFINE_RAW_SPINLOCK(migration_lock);
+static void check_for_migration_fair(struct rq *rq, struct task_struct *p)
+{
+	int active_balance;
+	int new_cpu = -1;
+	int prev_cpu = task_cpu(p);
+	int ret;
+
+#ifdef CONFIG_SCHED_RTG
+	bool need_down_migrate = false;
+	struct cpumask *rtg_target = find_rtg_target(p);
+
+	if (rtg_target &&
+	    (capacity_orig_of(prev_cpu) >
+	     capacity_orig_of(cpumask_first(rtg_target))))
+		need_down_migrate = true;
+#endif
+
+	if (rq->misfit_task_load) {
+		if (rq->curr->__state != TASK_RUNNING ||
+		    rq->curr->nr_cpus_allowed == 1)
+			return;
+
+		raw_spin_lock(&migration_lock);
+#ifdef CONFIG_SCHED_RTG
+		if (rtg_target) {
+			new_cpu = find_rtg_cpu(p);
+
+			if (new_cpu != -1 && need_down_migrate &&
+			    cpumask_test_cpu(new_cpu, rtg_target) &&
+			    idle_cpu(new_cpu))
+				goto do_active_balance;
+
+			if (new_cpu != -1 &&
+			    capacity_orig_of(new_cpu) > capacity_orig_of(prev_cpu))
+				goto do_active_balance;
+
+			goto out_unlock;
+		}
+#endif
+		rcu_read_lock();
+		new_cpu = find_energy_efficient_cpu(p, prev_cpu);
+		rcu_read_unlock();
+
+		if (new_cpu == -1 ||
+		    capacity_orig_of(new_cpu) <= capacity_orig_of(prev_cpu))
+			goto out_unlock;
+#ifdef CONFIG_SCHED_RTG
+do_active_balance:
+#endif
+		active_balance = kick_active_balance(rq, p, new_cpu);
+		if (active_balance) {
+			mark_reserved(new_cpu);
+			raw_spin_unlock(&migration_lock);
+			ret = stop_one_cpu_nowait(prev_cpu,
+				active_load_balance_cpu_stop, rq,
+				&rq->active_balance_work);
+			if (!ret)
+				clear_reserved(new_cpu);
+			else
+				wake_up_if_idle(new_cpu);
+			return;
+		}
+out_unlock:
+		raw_spin_unlock(&migration_lock);
+	}
+}
+#endif /* CONFIG_SCHED_EAS */
+
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_SCHED_CORE
@@ -13403,7 +13567,9 @@ DEFINE_SCHED_CLASS(fair) = {
 #ifdef CONFIG_SCHED_WALT
 	.fixup_walt_sched_stats	= walt_fixup_sched_stats_fair,
 #endif
-
+#ifdef CONFIG_SCHED_EAS
+	.check_for_migration	= check_for_migration_fair,
+#endif
 };
 
 #ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 48b93b020e75..3f281c4ed821 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -14,6 +14,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 
 struct rt_bandwidth def_rt_bandwidth;
 
+#ifdef CONFIG_SCHED_RT_CAS
+unsigned int sysctl_sched_enable_rt_cas = 1;
+#endif
+
+#ifdef CONFIG_SCHED_RT_ACTIVE_LB
+unsigned int sysctl_sched_enable_rt_active_lb = 1;
+#endif
+
 /*
  * period over which we measure -rt task CPU usage in us.
  * default: 1s
@@ -1649,6 +1657,9 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
 	test = curr &&
 	       unlikely(rt_task(curr)) &&
 	       (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
+#ifdef CONFIG_SCHED_RT_CAS
+	test |= sysctl_sched_enable_rt_cas;
+#endif
 
 	if (test || !rt_task_fits_capacity(p, cpu)) {
 		int target = find_lowest_rq(p);
@@ -1664,8 +1675,11 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
 		 * Don't bother moving it if the destination CPU is
 		 * not running a lower priority task.
 		 */
-		if (target != -1 &&
-		    p->prio < cpu_rq(target)->rt.highest_prio.curr)
+		if (target != -1 && (
+#ifdef CONFIG_SCHED_RT_CAS
+		    sysctl_sched_enable_rt_cas ||
+#endif
+		    p->prio < cpu_rq(target)->rt.highest_prio.curr))
 			cpu = target;
 	}
 
@@ -1884,6 +1898,170 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
 	return NULL;
 }
 
+#ifdef CONFIG_SCHED_RT_CAS
+static int find_cas_cpu(struct sched_domain *sd,
+		 struct task_struct *task, struct cpumask *lowest_mask)
+{
+	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+	struct sched_group *sg = NULL;
+	struct sched_group *sg_target = NULL;
+	struct sched_group *sg_backup = NULL;
+	struct cpumask search_cpu, backup_search_cpu;
+	int cpu = -1;
+	int target_cpu = -1;
+	unsigned long cpu_capacity;
+	unsigned long boosted_tutil = uclamp_task_util(task, uclamp_eff_value(task, UCLAMP_MIN), uclamp_eff_value(task, UCLAMP_MAX));
+	unsigned long target_capacity = ULONG_MAX;
+	unsigned long util;
+	unsigned long target_cpu_util = ULONG_MAX;
+	int prev_cpu = task_cpu(task);
+#ifdef CONFIG_SCHED_RTG
+	struct cpumask *rtg_target = NULL;
+#endif
+	bool boosted = uclamp_boosted(task);
+
+	if (!sysctl_sched_enable_rt_cas)
+		return -1;
+
+	rcu_read_lock();
+
+#ifdef CONFIG_SCHED_RTG
+	rtg_target = find_rtg_target(task);
+#endif
+
+	sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, 0));
+	if (!sd) {
+		rcu_read_unlock();
+		return -1;
+	}
+
+	sg = sd->groups;
+	do {
+		if (!cpumask_intersects(lowest_mask, sched_group_span(sg)))
+			continue;
+
+		if (boosted) {
+			if (cpumask_test_cpu(rd->max_cap_orig_cpu,
+					     sched_group_span(sg))) {
+				sg_target = sg;
+				break;
+			}
+		}
+
+		cpu = group_first_cpu(sg);
+#ifdef CONFIG_SCHED_RTG
+		/* honor the rtg tasks */
+		if (rtg_target) {
+			if (cpumask_test_cpu(cpu, rtg_target)) {
+				sg_target = sg;
+				break;
+			}
+
+			/* active LB or big_task favor cpus with more capacity */
+			if (task->state == TASK_RUNNING || boosted) {
+				if (capacity_orig_of(cpu) >
+				    capacity_orig_of(cpumask_any(rtg_target))) {
+					sg_target = sg;
+					break;
+				}
+
+				sg_backup = sg;
+				continue;
+			}
+		}
+#endif
+		/*
+		 * 1. add margin to support task migration
+		 * 2. if task_util is high then all cpus, make sure the
+		 * sg_backup with the most powerful cpus is selected
+		 */
+		if (!rt_task_fits_capacity(task, cpu)) {
+			sg_backup = sg;
+			continue;
+		}
+
+		/* support task boost */
+		cpu_capacity = capacity_orig_of(cpu);
+		if (boosted_tutil > cpu_capacity) {
+			sg_backup = sg;
+			continue;
+		}
+
+		/* sg_target: select the sg with smaller capacity */
+		if (cpu_capacity < target_capacity) {
+			target_capacity = cpu_capacity;
+			sg_target = sg;
+		}
+	} while (sg = sg->next, sg != sd->groups);
+
+	if (!sg_target)
+		sg_target = sg_backup;
+
+	if (sg_target) {
+		cpumask_and(&search_cpu, lowest_mask, sched_group_span(sg_target));
+		cpumask_copy(&backup_search_cpu, lowest_mask);
+		cpumask_andnot(&backup_search_cpu, &backup_search_cpu, &search_cpu);
+	} else {
+		cpumask_copy(&search_cpu, lowest_mask);
+		cpumask_clear(&backup_search_cpu);
+	}
+
+retry:
+	cpu = cpumask_first(&search_cpu);
+	do {
+		trace_sched_find_cas_cpu_each(task, cpu, target_cpu,
+			cpu_isolated(cpu),
+			idle_cpu(cpu), boosted_tutil, cpu_util_cfs(cpu),
+			capacity_orig_of(cpu));
+
+		if (cpu_isolated(cpu))
+			continue;
+
+		if (!cpumask_test_cpu(cpu, task->cpus_ptr))
+			continue;
+
+		/* find best cpu with smallest max_capacity */
+		if (target_cpu != -1 &&
+		    capacity_orig_of(cpu) > capacity_orig_of(target_cpu))
+			continue;
+
+		util = cpu_util_cfs(cpu);
+
+		/* Find the least loaded CPU */
+		if (util > target_cpu_util)
+			continue;
+
+		/*
+		 * If the preivous CPU has same load, keep it as
+		 * target_cpu
+		 */
+		if (target_cpu_util == util && target_cpu == prev_cpu)
+			continue;
+
+		/*
+		 * If candidate CPU is the previous CPU, select it.
+		 * If all above conditions are same, select the least
+		 * cumulative window demand CPU.
+		 */
+		target_cpu_util = util;
+		target_cpu = cpu;
+	} while ((cpu = cpumask_next(cpu, &search_cpu)) < nr_cpu_ids);
+
+	if (target_cpu != -1 && cpumask_test_cpu(target_cpu, lowest_mask)) {
+		goto done;
+	} else if (!cpumask_empty(&backup_search_cpu)) {
+		cpumask_copy(&search_cpu, &backup_search_cpu);
+		cpumask_clear(&backup_search_cpu);
+		goto retry;
+	}
+
+done:
+	trace_sched_find_cas_cpu(task, lowest_mask, boosted_tutil, prev_cpu, target_cpu);
+	rcu_read_unlock();
+	return target_cpu;
+}
+#endif
+
 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 
 static int find_lowest_rq(struct task_struct *task)
@@ -1893,6 +2071,9 @@ static int find_lowest_rq(struct task_struct *task)
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 	int ret;
+#ifdef CONFIG_SCHED_RT_CAS
+	int cas_cpu;
+#endif
 
 	/* Make sure the mask is initialized first */
 	if (unlikely(!lowest_mask))
@@ -1919,6 +2100,12 @@ static int find_lowest_rq(struct task_struct *task)
 	if (!ret)
 		return -1; /* No targets found */
 
+#ifdef CONFIG_SCHED_RT_CAS
+	cas_cpu = find_cas_cpu(sd, task, lowest_mask);
+	if (cas_cpu != -1)
+		return cas_cpu;
+#endif
+
 	/*
 	 * At this point we have built a mask of CPUs representing the
 	 * lowest priority tasks in the system.  Now we want to elect
@@ -2684,6 +2871,93 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	}
 }
 
+#ifdef CONFIG_SCHED_RT_ACTIVE_LB
+static int rt_active_load_balance_cpu_stop(void *data)
+{
+	struct rq *busiest_rq = data;
+	struct task_struct *next_task = busiest_rq->rt_push_task;
+	struct rq *lowest_rq = NULL;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&busiest_rq->__lock, flags);
+	busiest_rq->rt_active_balance = 0;
+
+	if (!task_on_rq_queued(next_task) ||
+	    task_cpu(next_task) != cpu_of(busiest_rq))
+		goto out;
+
+	/* find_lock_lowest_rq locks the rq if found */
+	lowest_rq = find_lock_lowest_rq(next_task, busiest_rq);
+	if (!lowest_rq)
+		goto out;
+
+	if (capacity_orig_of(cpu_of(lowest_rq)) <= capacity_orig_of(task_cpu(next_task)))
+		goto unlock;
+
+	deactivate_task(busiest_rq, next_task, 0);
+	set_task_cpu(next_task, lowest_rq->cpu);
+	activate_task(lowest_rq, next_task, 0);
+
+	resched_curr(lowest_rq);
+unlock:
+	double_unlock_balance(busiest_rq, lowest_rq);
+out:
+	put_task_struct(next_task);
+	raw_spin_unlock_irqrestore(&busiest_rq->__lock, flags);
+
+	return 0;
+}
+
+static void check_for_migration_rt(struct rq *rq, struct task_struct *p)
+{
+	bool need_actvie_lb = false;
+	bool misfit_task = false;
+	int cpu = task_cpu(p);
+	unsigned long cpu_orig_cap;
+#ifdef CONFIG_SCHED_RTG
+	struct cpumask *rtg_target = NULL;
+#endif
+
+	if (!sysctl_sched_enable_rt_active_lb)
+		return;
+
+	if (p->nr_cpus_allowed == 1)
+		return;
+
+	cpu_orig_cap = capacity_orig_of(cpu);
+	/* cpu has max capacity, no need to do balance */
+	if (cpu_orig_cap ==  rq->rd->max_cpu_capacity)
+		return;
+
+#ifdef CONFIG_SCHED_RTG
+	rtg_target = find_rtg_target(p);
+	if (rtg_target)
+		misfit_task = capacity_orig_of(cpumask_first(rtg_target)) >
+				cpu_orig_cap;
+	else
+		misfit_task = !rt_task_fits_capacity(p, cpu);
+#else
+	misfit_task = !rt_task_fits_capacity(p, cpu);
+#endif
+
+	if (misfit_task) {
+		raw_spin_lock(&rq->__lock);
+		if (!rq->active_balance && !rq->rt_active_balance) {
+			rq->rt_active_balance = 1;
+			rq->rt_push_task = p;
+			get_task_struct(p);
+			need_actvie_lb = true;
+		}
+		raw_spin_unlock(&rq->__lock);
+
+		if (need_actvie_lb)
+			stop_one_cpu_nowait(task_cpu(p),
+					    rt_active_load_balance_cpu_stop,
+					    rq, &rq->rt_active_balance_work);
+	}
+}
+#endif
+
 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 {
 	/*
@@ -2753,6 +3027,9 @@ DEFINE_SCHED_CLASS(rt) = {
 #ifdef CONFIG_SCHED_WALT
 	.fixup_walt_sched_stats	= fixup_walt_sched_stats_common,
 #endif
+#ifdef CONFIG_SCHED_RT_ACTIVE_LB
+	.check_for_migration	= check_for_migration_rt,
+#endif
 };
 
 #ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 484a34ddc821..e17bf7cb98d7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -101,6 +101,12 @@
 struct rq;
 struct cpuidle_state;
 
+#ifdef CONFIG_SCHED_RT_CAS
+extern unsigned long uclamp_task_util(struct task_struct *p,
+					     unsigned long uclamp_min,
+					     unsigned long uclamp_max);
+#endif
+
 #ifdef CONFIG_SCHED_WALT
 extern unsigned int sched_ravg_window;
 extern unsigned int walt_cpu_util_freq_divisor;
@@ -970,6 +976,9 @@ struct root_domain {
 	 * CPUs of the rd. Protected by RCU.
 	 */
 	struct perf_domain __rcu *pd;
+#ifdef CONFIG_SCHED_RT_CAS
+	int max_cap_orig_cpu;
+#endif
 };
 
 extern void init_defrootdomain(void);
@@ -1141,8 +1150,18 @@ struct rq {
 	/* For active balancing */
 	int			active_balance;
 	int			push_cpu;
+#ifdef CONFIG_SCHED_EAS
+	struct task_struct	*push_task;
+#endif
 	struct cpu_stop_work	active_balance_work;
 
+	/* For rt active balancing */
+#ifdef CONFIG_SCHED_RT_ACTIVE_LB
+	int rt_active_balance;
+	struct task_struct *rt_push_task;
+	struct cpu_stop_work rt_active_balance_work;
+#endif
+
 	/* CPU of this runqueue: */
 	int			cpu;
 	int			online;
@@ -2041,6 +2060,15 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg)
 	return to_cpumask(sg->sgc->cpumask);
 }
 
+/**
+ * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
+ * @group: The group whose first CPU is to be returned.
+ */
+static inline unsigned int group_first_cpu(struct sched_group *group)
+{
+	return cpumask_first(sched_group_span(group));
+}
+
 extern int group_balance_cpu(struct sched_group *sg);
 
 #ifdef CONFIG_SCHED_DEBUG
@@ -2403,7 +2431,9 @@ struct sched_class {
 	void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p,
 					u16 updated_demand_scaled);
 #endif
-
+#ifdef CONFIG_SCHED_EAS
+	void  (*check_for_migration)(struct rq *rq, struct task_struct *p);
+#endif
 #ifdef CONFIG_SCHED_CORE
 	int (*task_is_throttled)(struct task_struct *p, int cpu);
 #endif
@@ -3249,6 +3279,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
 	return clamp(util, min_util, max_util);
 }
 
+static inline bool uclamp_boosted(struct task_struct *p)
+{
+	return uclamp_eff_value(p, UCLAMP_MIN) > 0;
+}
+
 /* Is the rq being capped/throttled by uclamp_max? */
 static inline bool uclamp_rq_is_capped(struct rq *rq)
 {
@@ -3293,6 +3328,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
 	return util;
 }
 
+static inline bool uclamp_boosted(struct task_struct *p)
+{
+	return false;
+}
+
 static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
 
 static inline bool uclamp_is_used(void)
@@ -3412,6 +3452,14 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
 extern void swake_up_all_locked(struct swait_queue_head *q);
 extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
 
+#ifdef CONFIG_SCHED_RTG
+extern bool task_fits_max(struct task_struct *p, int cpu);
+extern unsigned long capacity_spare_without(int cpu, struct task_struct *p);
+extern int update_preferred_cluster(struct related_thread_group *grp,
+			struct task_struct *p, u32 old_load, bool from_tick);
+extern struct cpumask *find_rtg_target(struct task_struct *p);
+#endif
+
 extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags);
 
 #ifdef CONFIG_PREEMPT_DYNAMIC
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index c530731a755c..c46367544fc1 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -558,6 +558,10 @@ static int init_rootdomain(struct root_domain *rd)
 
 	if (cpupri_init(&rd->cpupri) != 0)
 		goto free_cpudl;
+
+#ifdef CONFIG_SCHED_RT_CAS
+	rd->max_cap_orig_cpu = -1;
+#endif
 	return 0;
 
 free_cpudl:
@@ -2491,9 +2495,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	/* Attach the domains */
 	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
+#ifdef CONFIG_SCHED_RT_CAS
+		int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);
+#endif
+
 		rq = cpu_rq(i);
 		sd = *per_cpu_ptr(d.sd, i);
 
+#ifdef CONFIG_SCHED_RT_CAS
+		if (max_cpu < 0 || arch_scale_cpu_capacity(i) >
+			arch_scale_cpu_capacity(max_cpu))
+			WRITE_ONCE(d.rd->max_cap_orig_cpu, i);
+#endif
+
 		/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
 		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
 			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b0151dbbd162..3cacf8cfaa59 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1623,6 +1623,24 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_SCHED_RT_CAS
+	{
+		.procname	= "sched_enable_rt_cas",
+		.data		= &sysctl_sched_enable_rt_cas,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_SCHED_RT_ACTIVE_LB
+	{
+		.procname	= "sched_enable_rt_active_lb",
+		.data		= &sysctl_sched_enable_rt_active_lb,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 #ifdef CONFIG_SCHED_WALT
 	{
 		.procname	= "sched_use_walt_cpu_util",
-- 
Gitee