From b61876ed122f816660fe49e0de1b7ee4891deaa2 Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <patrick.bellasi@arm.com>
Date: Tue, 18 Dec 2018 10:31:30 +0000
Subject: [PATCH 01/15] ANDROID: sched/fair: EAS: Add uclamp support to
 find_energy_efficient_cpu()

Utilization clamping can be used to boost the utilization of small tasks
or cap that of big tasks. Thus, one of its possible usages is to bias
tasks placement to "promote" small tasks on higher capacity (less energy
efficient) CPUs or "constraint" big tasks on small capacity (more energy
efficient) CPUs.

When the Energy Aware Scheduler (EAS) looks for the most energy
efficient CPU to run a task on, it currently considers only the
effective utiliation estimated for a task.

Fix this by adding an additional check to skip CPUs which capacity is
smaller then the task clamped utilization.

Change-Id: I43fa6fa27e27c1eb5272c6a45d1a6a5b0faae1aa
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 83ab35e2374f..c80cf449f9fb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6026,6 +6026,19 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	return target;
 }
 
+static unsigned int uclamp_task_util(struct task_struct *p)
+{
+#ifdef CONFIG_UCLAMP_TASK
+	unsigned int min_util = uclamp_eff_value(p, UCLAMP_MIN);
+	unsigned int max_util = uclamp_eff_value(p, UCLAMP_MAX);
+	unsigned int est_util = task_util_est(p);
+
+	return clamp(est_util, min_util, max_util);
+#else
+	return task_util_est(p);
+#endif
+}
+
 /**
  * Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
  * @cpu: the CPU to get the utilization of
@@ -6370,6 +6383,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 			if (!fits_capacity(util, cpu_cap))
 				continue;
 
+			/* Skip CPUs which do not fit task requirements */
+			if (cpu_cap < uclamp_task_util(p))
+				continue;
+
 			/* Always use prev_cpu as a candidate. */
 			if (cpu == prev_cpu) {
 				prev_delta = compute_energy(p, prev_cpu, pd);

From 79e3a4a27ee502ffed05bb8011d9540867a66719 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Wed, 27 Mar 2019 17:15:17 +0000
Subject: [PATCH 02/15] ANDROID: sched: Unconditionally honor sync flag for
 energy-aware wakeups

Since we don't do energy-aware wakeups when we are overutilized, always
honoring sync wakeups in this state does not prevent wake-wide mechanics
overruling the flag as normal.

This patch is based upon previous work to build EAS for android products.

sync-hint code taken from commit 4a5e890ec60d
"sched/fair: add tunable to force selection at cpu granularity" written
by Juri Lelli <juri.lelli@arm.com>

Change-Id: I4b3d79141fc8e53dc51cd63ac11096c2e3cb10f5
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
(cherry-picked from commit f1ec666a62dec1083ed52fe1ddef093b84373aaf)
[ Moved the feature to find_energy_efficient_cpu() and removed the
  sysctl knob ]
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c80cf449f9fb..baefb5ebc85e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6336,7 +6336,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
  * other use-cases too. So, until someone finds a better way to solve this,
  * let's keep things simple by re-using the existing slow path.
  */
-static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
 {
 	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
@@ -6350,6 +6350,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	if (!pd || READ_ONCE(rd->overutilized))
 		goto fail;
 
+	cpu = smp_processor_id();
+	if (sync && cpumask_test_cpu(cpu, p->cpus_ptr)) {
+		rcu_read_unlock();
+		return cpu;
+	}
+
 	/*
 	 * Energy-aware wake-up happens on the lowest sched_domain starting
 	 * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
@@ -6461,7 +6467,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 		record_wakee(p);
 
 		if (sched_energy_enabled()) {
-			new_cpu = find_energy_efficient_cpu(p, prev_cpu);
+			new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync);
 			if (new_cpu >= 0)
 				return new_cpu;
 			new_cpu = prev_cpu;

From 27ad2abc032bff8761bd0dd03f6e90d0bbd9e3e3 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Thu, 10 May 2018 15:48:06 +0100
Subject: [PATCH 03/15] ANDROID: sched/fair: add arch scaling function for max
 frequency capping

To be able to scale the cpu capacity by this factor introduce a call to
the new arch scaling function arch_scale_max_freq_capacity() in
update_cpu_capacity() and provide a default implementation which returns
SCHED_CAPACITY_SCALE.

Another subsystem (e.g. cpufreq) or architectural or platform specific
code can overwrite this default implementation, exactly as for frequency
and cpu invariance. It has to be enabled by the arch by defining
arch_scale_max_freq_capacity to the actual implementation.

Change-Id: I770a8b1f4f7340e9e314f71c64a765bf880f4b4d
Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
( Fixed conflict with scaling against the PELT-based scale_rt_capacity )
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c  | 12 ++++++++----
 kernel/sched/sched.h |  9 +++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index baefb5ebc85e..fee99f8cb193 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7725,10 +7725,9 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
 	};
 }
 
-static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
+static unsigned long scale_rt_capacity(int cpu, unsigned long max)
 {
 	struct rq *rq = cpu_rq(cpu);
-	unsigned long max = arch_scale_cpu_capacity(cpu);
 	unsigned long used, free;
 	unsigned long irq;
 
@@ -7750,10 +7749,15 @@ static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
 
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
-	unsigned long capacity = scale_rt_capacity(sd, cpu);
+	unsigned long capacity = arch_scale_cpu_capacity(cpu);
 	struct sched_group *sdg = sd->groups;
 
-	cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
+	cpu_rq(cpu)->cpu_capacity_orig = capacity;
+
+	capacity *= arch_scale_max_freq_capacity(sd, cpu);
+	capacity >>= SCHED_CAPACITY_SHIFT;
+
+	capacity = scale_rt_capacity(cpu, capacity);
 
 	if (!capacity)
 		capacity = 1;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..f0b6e3ab2c96 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1961,6 +1961,15 @@ unsigned long arch_scale_freq_capacity(int cpu)
 }
 #endif
 
+#ifndef arch_scale_max_freq_capacity
+struct sched_domain;
+static __always_inline
+unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
+{
+	return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPTION
 

From 0cfe39fe403ea6dbe2fdfb38edd36022b35d4d66 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Thu, 10 May 2018 16:52:33 +0100
Subject: [PATCH 04/15] ANDROID: cpufreq: arch_topology: implement max
 frequency capping

Implements the Max Frequency Capping Engine (MFCE) getter function
topology_get_max_freq_scale() to provide the scheduler with a
maximum frequency scaling correction factor for more accurate cpu
capacity handling by being able to deal with max frequency capping.

This scaling factor describes the influence of running a cpu with a
current maximum frequency (policy) lower than the maximum possible
frequency (cpuinfo).

The factor is:

  policy_max_freq(cpu) << SCHED_CAPACITY_SHIFT / cpuinfo_max_freq(cpu)

It also implements the MFCE setter function arch_set_max_freq_scale()
which is called from cpufreq_set_policy().

Change-Id: I59e52861ee260755ab0518fe1f7183a2e4e3d0fc
Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
[Trivial cherry-pick issue in cpufreq.c]
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 drivers/base/arch_topology.c  | 25 ++++++++++++++++++++++++-
 drivers/cpufreq/cpufreq.c     |  8 ++++++++
 include/linux/arch_topology.h |  8 ++++++++
 include/linux/cpufreq.h       |  2 ++
 4 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 1eb81f113786..c0cf9ef5c2f5 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -22,6 +22,8 @@
 #include <linux/smp.h>
 
 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+DEFINE_PER_CPU(unsigned long, max_cpu_freq);
+DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
 
 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 			 unsigned long max_freq)
@@ -31,8 +33,29 @@ void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 
 	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
 
-	for_each_cpu(i, cpus)
+	for_each_cpu(i, cpus) {
 		per_cpu(freq_scale, i) = scale;
+		per_cpu(max_cpu_freq, i) = max_freq;
+	}
+}
+
+void arch_set_max_freq_scale(struct cpumask *cpus,
+			     unsigned long policy_max_freq)
+{
+	unsigned long scale, max_freq;
+	int cpu = cpumask_first(cpus);
+
+	if (cpu > nr_cpu_ids)
+		return;
+
+	max_freq = per_cpu(max_cpu_freq, cpu);
+	if (!max_freq)
+		return;
+
+	scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
+
+	for_each_cpu(cpu, cpus)
+		per_cpu(max_freq_scale, cpu) = scale;
 }
 
 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c52d6fa32aac..cf118aa0a83e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -152,6 +152,12 @@ __weak void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 }
 EXPORT_SYMBOL_GPL(arch_set_freq_scale);
 
+__weak void arch_set_max_freq_scale(struct cpumask *cpus,
+				    unsigned long policy_max_freq)
+{
+}
+EXPORT_SYMBOL_GPL(arch_set_max_freq_scale);
+
 /*
  * This is a generic cpufreq init() routine which can be used by cpufreq
  * drivers of SMP systems. It will do following:
@@ -2398,6 +2404,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
 	policy->max = new_policy->max;
 	trace_cpu_frequency_limits(policy);
 
+	arch_set_max_freq_scale(policy->cpus, policy->max);
+
 	policy->cached_target_freq = UINT_MAX;
 
 	pr_debug("new min and max freqs are %u - %u kHz\n",
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 42f2b5126094..5402bc0e2b1e 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -33,6 +33,14 @@ unsigned long topology_get_freq_scale(int cpu)
 	return per_cpu(freq_scale, cpu);
 }
 
+DECLARE_PER_CPU(unsigned long, max_freq_scale);
+
+static inline
+unsigned long topology_get_max_freq_scale(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(max_freq_scale, cpu);
+}
+
 struct cpu_topology {
 	int thread_id;
 	int core_id;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index c57e88e85c41..fca45a2749d4 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -984,6 +984,8 @@ extern unsigned int arch_freq_get_on_cpu(int cpu);
 
 extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 				unsigned long max_freq);
+extern void arch_set_max_freq_scale(struct cpumask *cpus,
+				    unsigned long policy_max_freq);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;

From a273512dafbf6322b134716873d50d1af803972f Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Thu, 10 May 2018 16:54:16 +0100
Subject: [PATCH 05/15] ANDROID: arm64: enable max frequency capping

Defines arch_scale_max_freq_capacity() to use the topology driver
scale function.

Change-Id: If7565747ec862e42ac55196240522ef8d22ca67d
Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 arch/arm64/include/asm/topology.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a4d945db95a2..70697177d6ec 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -19,6 +19,9 @@ int pcibus_to_node(struct pci_bus *bus);
 /* Replace task scheduler's default frequency-invariant accounting */
 #define arch_scale_freq_capacity topology_get_freq_scale
 
+/* Replace task scheduler's default max-frequency-invariant accounting */
+#define arch_scale_max_freq_capacity topology_get_max_freq_scale
+
 /* Replace task scheduler's default cpu-invariant accounting */
 #define arch_scale_cpu_capacity topology_get_cpu_scale
 

From 150b7ce48a29566d996bda293e8857ef45298bdc Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Thu, 10 May 2018 16:58:04 +0100
Subject: [PATCH 06/15] ANDROID: arm: enable max frequency capping

Defines arch_scale_max_freq_capacity() to use the topology driver
scale function.

Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
Change-Id: I79f444399ea3b2948364fde80ccee52a9ece5b9a
---
 arch/arm/include/asm/topology.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 8a0fae94d45e..a2edacb56459 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -10,6 +10,9 @@
 /* Replace task scheduler's default frequency-invariant accounting */
 #define arch_scale_freq_capacity topology_get_freq_scale
 
+/* Replace task scheduler's default max-frequency-invariant accounting */
+#define arch_scale_max_freq_capacity topology_get_max_freq_scale
+
 /* Replace task scheduler's default cpu-invariant accounting */
 #define arch_scale_cpu_capacity topology_get_cpu_scale
 

From 24c44377976ea7812e321072d6bb366a648f7356 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Sat, 26 Sep 2015 18:19:54 +0100
Subject: [PATCH 07/15] ANDROID: sched: Update max cpu capacity in case of max
 frequency constraints

Wakeup balancing uses cpu capacity awareness and needs to know the
system-wide maximum cpu capacity.

Patch "sched: Store system-wide maximum cpu capacity in root domain"
finds the system-wide maximum cpu capacity during scheduler domain
hierarchy setup. This is sufficient as long as maximum frequency
invariance is not enabled.

If it is enabled, the system-wide maximum cpu capacity can change
between scheduler domain hierarchy setups due to frequency capping.

The cpu capacity is changed in update_cpu_capacity() which is called in
load balance on the lowest scheduler domain hierarchy level. To be able
to know if a change in cpu capacity for a certain cpu also has an effect
on the system-wide maximum cpu capacity it is normally necessary to
iterate over all cpus. This would be way too costly. That's why this
patch follows a different approach.

The unsigned long max_cpu_capacity value in struct root_domain is
replaced with a struct max_cpu_capacity, containing value (the
max_cpu_capacity) and cpu (the cpu index of the cpu providing the
maximum cpu_capacity).

Changes to the system-wide maximum cpu capacity and the cpu index are
made if:

 1 System-wide maximum cpu capacity < cpu capacity
 2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu

There are no changes to the system-wide maximum cpu capacity in all
other cases.

Atomic read and write access to the pair (max_cpu_capacity.val,
max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock.

The access to max_cpu_capacity.val in task_fits_max() is still performed
without taking the max_cpu_capacity.lock.

The code to set max cpu capacity in build_sched_domains() has been
removed because the whole functionality is now provided by
update_cpu_capacity() instead.

This approach can introduce errors temporarily, e.g. in case the cpu
currently providing the max cpu capacity has its cpu capacity lowered
due to frequency capping and calls update_cpu_capacity() before any cpu
which might provide the max cpu now.

Change-Id: I5063befab088fbf49e5d5e484ce0c6ee6165283a
Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>*
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
(- Fixed cherry-pick issues, and conflict with a0fe2cf086ae "sched/fair:
   Tune down misfit NOHZ kicks" which makes use of max_cpu_capacity
 - Squashed "sched/fair: remove printk while schedule is in progress"
   fix from Caesar Wang <wxt@rock-chips.com>)
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c     | 34 ++++++++++++++++++++++++++++++++--
 kernel/sched/sched.h    | 10 +++++++++-
 kernel/sched/topology.c | 15 +++------------
 3 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fee99f8cb193..b4314e918c29 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6195,7 +6195,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
 		return 0;
 
 	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
-	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
+	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;
 
 	/* Minimum capacity is close to max, no need to abort wake_affine */
 	if (max_cap - min_cap < max_cap >> 3)
@@ -7747,16 +7747,46 @@ static unsigned long scale_rt_capacity(int cpu, unsigned long max)
 	return scale_irq_capacity(free, irq, max);
 }
 
+void init_max_cpu_capacity(struct max_cpu_capacity *mcc) {
+	raw_spin_lock_init(&mcc->lock);
+	mcc->val = 0;
+	mcc->cpu = -1;
+}
+
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long capacity = arch_scale_cpu_capacity(cpu);
 	struct sched_group *sdg = sd->groups;
+	struct max_cpu_capacity *mcc;
+	unsigned long max_capacity;
+	int max_cap_cpu;
+	unsigned long flags;
 
 	cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
 	capacity *= arch_scale_max_freq_capacity(sd, cpu);
 	capacity >>= SCHED_CAPACITY_SHIFT;
 
+	mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
+
+	raw_spin_lock_irqsave(&mcc->lock, flags);
+	max_capacity = mcc->val;
+	max_cap_cpu = mcc->cpu;
+
+	if ((max_capacity > capacity && max_cap_cpu == cpu) ||
+	    (max_capacity < capacity)) {
+		mcc->val = capacity;
+		mcc->cpu = cpu;
+#ifdef CONFIG_SCHED_DEBUG
+		raw_spin_unlock_irqrestore(&mcc->lock, flags);
+		printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
+				cpu, capacity);
+		goto skip_unlock;
+#endif
+	}
+	raw_spin_unlock_irqrestore(&mcc->lock, flags);
+
+skip_unlock: __attribute__ ((unused));
 	capacity = scale_rt_capacity(cpu, capacity);
 
 	if (!capacity)
@@ -7861,7 +7891,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
 {
 	return rq->misfit_task_load &&
-		(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
+		(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity.val ||
 		 check_cpu_capacity(rq, sd));
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f0b6e3ab2c96..9015c687de19 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -717,6 +717,12 @@ struct perf_domain {
 	struct rcu_head rcu;
 };
 
+struct max_cpu_capacity {
+	raw_spinlock_t lock;
+	unsigned long val;
+	int cpu;
+};
+
 /* Scheduling group status flags */
 #define SG_OVERLOAD		0x1 /* More than one runnable task on a CPU. */
 #define SG_OVERUTILIZED		0x2 /* One or more CPUs are over-utilized. */
@@ -775,7 +781,8 @@ struct root_domain {
 	cpumask_var_t		rto_mask;
 	struct cpupri		cpupri;
 
-	unsigned long		max_cpu_capacity;
+	/* Maximum cpu capacity in the system. */
+	struct max_cpu_capacity max_cpu_capacity;
 
 	/*
 	 * NULL-terminated list of performance domains intersecting with the
@@ -785,6 +792,7 @@ struct root_domain {
 };
 
 extern void init_defrootdomain(void);
+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
 extern int sched_init_domains(const struct cpumask *cpu_map);
 extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
 extern void sched_get_rd(struct root_domain *rd);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index b5667a273bf6..4ad1e5194555 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -510,6 +510,9 @@ static int init_rootdomain(struct root_domain *rd)
 
 	if (cpupri_init(&rd->cpupri) != 0)
 		goto free_cpudl;
+
+	init_max_cpu_capacity(&rd->max_cpu_capacity);
+
 	return 0;
 
 free_cpudl:
@@ -1951,7 +1954,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	enum s_alloc alloc_state;
 	struct sched_domain *sd;
 	struct s_data d;
-	struct rq *rq = NULL;
 	int i, ret = -ENOMEM;
 	struct sched_domain_topology_level *tl_asym;
 	bool has_asym = false;
@@ -2014,13 +2016,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	/* Attach the domains */
 	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
-		rq = cpu_rq(i);
 		sd = *per_cpu_ptr(d.sd, i);
-
-		/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
-		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
-			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
-
 		cpu_attach_domain(sd, d.rd, i);
 	}
 	rcu_read_unlock();
@@ -2028,11 +2024,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	if (has_asym)
 		static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
 
-	if (rq && sched_debug_enabled) {
-		pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
-			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
-	}
-
 	ret = 0;
 error:
 	__free_domain_allocs(&d, alloc_state, cpu_map);

From 65b8ddef4ef06f78b29207ef5ec508fb58ad807f Mon Sep 17 00:00:00 2001
From: Morten Rasmussen <morten.rasmussen@arm.com>
Date: Thu, 2 Jul 2015 17:16:34 +0100
Subject: [PATCH 08/15] ANDROID: sched: Prevent unnecessary active balance of
 single task in sched group

Scenarios with the busiest group having just one task and the local
being idle on topologies with sched groups with different numbers of
cpus manage to dodge all load-balance bailout conditions resulting the
nr_balance_failed counter to be incremented. This eventually causes a
pointless active migration of the task. This patch prevents this by not
incrementing the counter when the busiest group only has one task.
ASYM_PACKING migrations and migrations due to reduced capacity should
still take place as these are explicitly captured by
need_active_balance().

A better solution would be to not attempt the load-balance in the first
place, but that requires significant changes to the order of bailout
conditions and statistics gathering.

Change-Id: I28f69c72febe0211decbe77b7bc3e48839d3d7b3
cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b4314e918c29..df279e422001 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7112,6 +7112,7 @@ struct lb_env {
 	int			new_dst_cpu;
 	enum cpu_idle_type	idle;
 	long			imbalance;
+	unsigned int		src_grp_nr_running;
 	/* The set of CPUs under consideration for load-balancing */
 	struct cpumask		*cpus;
 
@@ -8294,6 +8295,8 @@ next_group:
 	if (env->sd->flags & SD_NUMA)
 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
 
+	env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
+
 	if (!env->sd->parent) {
 		struct root_domain *rd = env->dst_rq->rd;
 
@@ -8988,7 +8991,8 @@ more_balance:
 		 * excessive cache_hot migrations and active balances.
 		 */
 		if (idle != CPU_NEWLY_IDLE)
-			sd->nr_balance_failed++;
+			if (env.src_grp_nr_running > 1)
+				sd->nr_balance_failed++;
 
 		if (need_active_balance(&env)) {
 			unsigned long flags;

From f351885fc72a3c13e5a713fcc37097f7c2eceee8 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Fri, 1 Jun 2018 20:34:10 +0100
Subject: [PATCH 09/15] ANDROID: sched/fair: Attempt to improve throughput for
 asym cap systems

In some systems the capacity and group weights line up to defeat all the
small imbalance correction conditions in fix_small_imbalance, which can
cause bad task placement. Add a new condition if the existing code can't
see anything to fix:

If we have asymmetric capacity, and there are more tasks than CPUs in
the busiest group *and* there are less tasks than CPUs in the local group
then we try to pull something. There could be transient small tasks which
prevent this from working, but on the whole it is beneficial for those
systems with inconvenient capacity/cluster size relationships.

Change-Id: Icf81cde215c082a61f816534b7990ccb70aee409
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index df279e422001..1962e68fcf60 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8425,7 +8425,22 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 	capa_move /= SCHED_CAPACITY_SCALE;
 
 	/* Move if we gain throughput */
-	if (capa_move > capa_now)
+	if (capa_move > capa_now) {
+		env->imbalance = busiest->load_per_task;
+		return;
+	}
+
+	/* We can't see throughput improvement with the load-based
+	 * method, but it is possible depending upon group size and
+	 * capacity range that there might still be an underutilized
+	 * cpu available in an asymmetric capacity system. Do one last
+	 * check just in case.
+	 */
+	if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
+		busiest->group_type == group_overloaded &&
+		busiest->sum_nr_running > busiest->group_weight &&
+		local->sum_nr_running < local->group_weight &&
+		local->group_capacity < busiest->group_capacity)
 		env->imbalance = busiest->load_per_task;
 }
 

From a7455f8123479ddcad3ff7b366ce143cb78a9ae3 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 5 Jun 2018 11:47:57 +0100
Subject: [PATCH 10/15] ANDROID: sched/fair: Don't balance misfits if it would
 overload local group

When load balancing in a system with misfit tasks present, if we always
pull a misfit task to the local group this can lead to pulling a running
task from a smaller capacity CPUs to a bigger CPU which is busy. In this
situation, the pulled task is likely not to get a chance to run before
an idle balance on another small CPU pulls it back. This penalises the
pulled task as it is stopped for a short amount of time and then likely
relocated to a different CPU (since the original CPU just did a NEWLY_IDLE
balance and reset the periodic interval).

If we only do this unconditionally for NEWLY_IDLE balance, we can be
sure that any tasks and load which are present on the local group are
related to short-running tasks which we are happy to displace for a
longer running task in a system with misfit tasks present.

However, other balance types should only pull a task if we think
that the local group is underutilized - checking the number of tasks
gives us a conservative estimate here since if they were short tasks
we would have been doing NEWLY_IDLE balances instead.

Change-Id: I710add1ab1139482620b6addc8370ad194791beb
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1962e68fcf60..af23c63eca32 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8509,8 +8509,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 		(sds->avg_load - local->avg_load) * local->group_capacity
 	) / SCHED_CAPACITY_SCALE;
 
-	/* Boost imbalance to allow misfit task to be balanced. */
-	if (busiest->group_type == group_misfit_task) {
+	/* Boost imbalance to allow misfit task to be balanced.
+	 * Always do this if we are doing a NEWLY_IDLE balance
+	 * on the assumption that any tasks we have must not be
+	 * long-running (and hence we cannot rely upon load).
+	 * However if we are not idle, we should assume the tasks
+	 * we have are longer running and not override load-based
+	 * calculations above unless we are sure that the local
+	 * group is underutilized.
+	 */
+	if (busiest->group_type == group_misfit_task &&
+		(env->idle == CPU_NEWLY_IDLE ||
+		local->sum_nr_running < local->group_weight)) {
 		env->imbalance = max_t(long, env->imbalance,
 				       busiest->group_misfit_task_load);
 	}

From 44ab1611a8fa4ee1dc59a80b656f8b2e6aa4e57d Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 5 Jun 2018 12:21:33 +0100
Subject: [PATCH 11/15] ANDROID: sched/fair: Also do misfit in overloaded
 groups

If we can classify the group as overloaded, that overrides
any classification as misfit but we may still have misfit
tasks present. Check the rq we're looking at to see if
this is the case.

Change-Id: Ida8eb66aa625e34de3fe2ee1b0dd8a78926273d8
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
[Removed stray reference to rq_has_misfit]
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index af23c63eca32..4012266f4f64 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8798,6 +8798,9 @@ static int need_active_balance(struct lb_env *env)
 	if (voluntary_active_balance(env))
 		return 1;
 
+	if (env->src_grp_type == group_overloaded && env->src_rq->misfit_task_load)
+		return 1;
+
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 

From 588085feb44fa553aec7eb98545a570e189eabad Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Wed, 3 Jul 2019 10:48:14 +0100
Subject: [PATCH 12/15] ANDROID: arm64: defconfig: Enable EAS by default

Use schedutil as default cpufreq governor so EAS can start. Also, enable
util-clamp to enable frequency selection biasing.

Change-Id: Iec9098f27c0353dabc23bd98efbca6479de41796
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 arch/arm64/configs/defconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 8e05c39eab08..2c029af5d7e0 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -13,10 +13,12 @@ CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_UCLAMP_TASK=y
 CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
+CONFIG_UCLAMP_TASK_GROUP=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_HUGETLB=y
 CONFIG_CPUSETS=y
@@ -71,10 +73,12 @@ CONFIG_COMPAT=y
 CONFIG_RANDOMIZE_BASE=y
 CONFIG_HIBERNATION=y
 CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
+CONFIG_ENERGY_MODEL=y
 CONFIG_ARM_CPUIDLE=y
 CONFIG_ARM_PSCI_CPUIDLE=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y

From c28f9d3945f10f655983cd4fb0e71d972efa747b Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Tue, 30 Jul 2019 13:54:00 +0100
Subject: [PATCH 13/15] ANDROID: sched/core: Add a latency-sensitive flag to
 uclamp

Add a 'latency_sensitive' flag to uclamp in order to express the need
for some tasks to find a CPU where they can wake-up quickly. This is not
expected to be used without cgroup support, so add solely a cgroup
interface for it.

As this flag represents a boolean attribute and not an amount of
resources to be shared, it is not clear what the delegation logic should
be. As such, it is kept simple: every new cgroup starts with
latency_sensitive set to false, regardless of the parent.

In essence, this is similar to SchedTune's prefer-idle flag which was
used in android-4.19 and prior.

Change-Id: I722d8ecabb428bb7b95a5b54bc70a87f182dde2a
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/core.c  | 33 +++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  2 ++
 2 files changed, 35 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7880f4f64d0e..7571fc49af89 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7356,6 +7356,27 @@ static int cpu_uclamp_max_show(struct seq_file *sf, void *v)
 	cpu_uclamp_print(sf, UCLAMP_MAX);
 	return 0;
 }
+
+static int cpu_uclamp_ls_write_u64(struct cgroup_subsys_state *css,
+				   struct cftype *cftype, u64 ls)
+{
+	struct task_group *tg;
+
+	if (ls > 1)
+		return -EINVAL;
+	tg = css_tg(css);
+	tg->latency_sensitive = (unsigned int) ls;
+
+	return 0;
+}
+
+static u64 cpu_uclamp_ls_read_u64(struct cgroup_subsys_state *css,
+				  struct cftype *cft)
+{
+	struct task_group *tg = css_tg(css);
+
+	return (u64) tg->latency_sensitive;
+}
 #endif /* CONFIG_UCLAMP_TASK_GROUP */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -7716,6 +7737,12 @@ static struct cftype cpu_legacy_files[] = {
 		.seq_show = cpu_uclamp_max_show,
 		.write = cpu_uclamp_max_write,
 	},
+	{
+		.name = "uclamp.latency_sensitive",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_uclamp_ls_read_u64,
+		.write_u64 = cpu_uclamp_ls_write_u64,
+	},
 #endif
 	{ }	/* Terminate */
 };
@@ -7897,6 +7924,12 @@ static struct cftype cpu_files[] = {
 		.seq_show = cpu_uclamp_max_show,
 		.write = cpu_uclamp_max_write,
 	},
+	{
+		.name = "uclamp.latency_sensitive",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_uclamp_ls_read_u64,
+		.write_u64 = cpu_uclamp_ls_write_u64,
+	},
 #endif
 	{ }	/* terminate */
 };
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9015c687de19..7de9f16cb838 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -399,6 +399,8 @@ struct task_group {
 	struct uclamp_se	uclamp_req[UCLAMP_CNT];
 	/* Effective clamp values used for a task group */
 	struct uclamp_se	uclamp[UCLAMP_CNT];
+	/* Latency-sensitive flag used for a task group */
+	unsigned int		latency_sensitive;
 #endif
 
 };

From 0e00b6f9dd78f16d214959bb4b1af7ec4e62f9e4 Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Tue, 30 Jul 2019 13:58:29 +0100
Subject: [PATCH 14/15] ANDROID: sched: Introduce uclamp latency and boost
 wrapper

Introduce a simple helper to read the latency_sensitive flag from a
task. It is called uclamp_latency_sensitive() to match the API
proposed by Patrick.

While at it, introduce uclamp_boosted() which returns true only when a
task has a non-null min-clamp.

Change-Id: I5fc747da8b58625257a6604a3c88487b657fbe7a
Suggested-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/sched.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7de9f16cb838..4f0d1668c283 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2333,6 +2333,11 @@ static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
 {
 	return uclamp_util_with(rq, util, NULL);
 }
+
+static inline bool uclamp_boosted(struct task_struct *p)
+{
+	return uclamp_eff_value(p, UCLAMP_MIN) > 0;
+}
 #else /* CONFIG_UCLAMP_TASK */
 static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
 					    struct task_struct *p)
@@ -2343,8 +2348,31 @@ static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
 {
 	return util;
 }
+static inline bool uclamp_boosted(struct task_struct *p)
+{
+	return false;
+}
 #endif /* CONFIG_UCLAMP_TASK */
 
+#ifdef CONFIG_UCLAMP_TASK_GROUP
+static inline bool uclamp_latency_sensitive(struct task_struct *p)
+{
+	struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
+	struct task_group *tg;
+
+	if (!css)
+		return false;
+	tg = container_of(css, struct task_group, css);
+
+	return tg->latency_sensitive;
+}
+#else
+static inline bool uclamp_latency_sensitive(struct task_struct *p)
+{
+	return false;
+}
+#endif /* CONFIG_UCLAMP_TASK_GROUP */
+
 #ifdef arch_scale_freq_capacity
 # ifndef arch_scale_freq_invariant
 #  define arch_scale_freq_invariant()	true

From 760b82c9b88d2c8125abfc5f732cc3cd460b2a54 Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Wed, 27 Feb 2019 11:21:24 +0000
Subject: [PATCH 15/15] ANDROID: sched/fair: Bias EAS placement for latency

Add to find_energy_efficient_cpu() a latency sensitive case which mimics
what was done for prefer-idle in android-4.19 and before (see [1] for
reference).

This isn't strictly equivalent to the legacy algorithm but comes real
close, and isn't very invasive. Overall, the idea is to select the
biggest idle CPU we can find for latency-sensitive boosted tasks, and
the smallest CPU where the can fit for latency-sensitive non-boosted
tasks.

The main differences with the legacy behaviour are the following:

 1. the policy for 'prefer idle' when there isn't a single idle CPU in
    the system is simpler now. We just pick the CPU with the highest
    spare capacity;

 2. the cstate awareness is implemented by minimizing the exit latency
    rather than the idle state index. This is how it is done in the slow
    path (find_idlest_group_cpu()), it doesn't require us to keep hooks
    into CPUIdle, and should actually be better because what we want is
    a CPU that can wake up quickly;

 3. non-latency-sensitive tasks just use the standard mainline
    energy-aware wake-up path, which decides the placement using the
    Energy Model;

 4. the 'boosted' and 'latency_sensitive' attributes of a task come from
    util_clamp (which now replaces schedtune).

[1] https://android.googlesource.com/kernel/common.git/+/c27c56105dcaaae54ecc39ef33fbfac87a1486fc

Change-Id: Ia58516906e9cb5abe08385a8cd088097043d8703
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4012266f4f64..9b43520dc248 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6340,8 +6340,13 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
 {
 	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+	int max_spare_cap_cpu_ls = prev_cpu, best_idle_cpu = -1;
+	unsigned long max_spare_cap_ls = 0, target_cap;
 	unsigned long cpu_cap, util, base_energy = 0;
+	bool boosted, latency_sensitive = false;
+	unsigned int min_exit_lat = UINT_MAX;
 	int cpu, best_energy_cpu = prev_cpu;
+	struct cpuidle_state *idle;
 	struct sched_domain *sd;
 	struct perf_domain *pd;
 
@@ -6370,6 +6375,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
 	if (!task_util_est(p))
 		goto unlock;
 
+	latency_sensitive = uclamp_latency_sensitive(p);
+	boosted = uclamp_boosted(p);
+	target_cap = boosted ? 0 : ULONG_MAX;
+
 	for (; pd; pd = pd->next) {
 		unsigned long cur_delta, spare_cap, max_spare_cap = 0;
 		unsigned long base_energy_pd;
@@ -6394,7 +6403,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
 				continue;
 
 			/* Always use prev_cpu as a candidate. */
-			if (cpu == prev_cpu) {
+			if (!latency_sensitive && cpu == prev_cpu) {
 				prev_delta = compute_energy(p, prev_cpu, pd);
 				prev_delta -= base_energy_pd;
 				best_delta = min(best_delta, prev_delta);
@@ -6409,10 +6418,34 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
 				max_spare_cap = spare_cap;
 				max_spare_cap_cpu = cpu;
 			}
+
+			if (!latency_sensitive)
+				continue;
+
+			if (idle_cpu(cpu)) {
+				cpu_cap = capacity_orig_of(cpu);
+				if (boosted && cpu_cap < target_cap)
+					continue;
+				if (!boosted && cpu_cap > target_cap)
+					continue;
+				idle = idle_get_state(cpu_rq(cpu));
+				if (idle && idle->exit_latency > min_exit_lat &&
+						cpu_cap == target_cap)
+					continue;
+
+				if (idle)
+					min_exit_lat = idle->exit_latency;
+				target_cap = cpu_cap;
+				best_idle_cpu = cpu;
+			} else if (spare_cap > max_spare_cap_ls) {
+				max_spare_cap_ls = spare_cap;
+				max_spare_cap_cpu_ls = cpu;
+			}
 		}
 
 		/* Evaluate the energy impact of using this CPU. */
-		if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+		if (!latency_sensitive && max_spare_cap_cpu >= 0 &&
+						max_spare_cap_cpu != prev_cpu) {
 			cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
 			cur_delta -= base_energy_pd;
 			if (cur_delta < best_delta) {
@@ -6424,6 +6457,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
 unlock:
 	rcu_read_unlock();
 
+	if (latency_sensitive)
+		return best_idle_cpu >= 0 ? best_idle_cpu : max_spare_cap_cpu_ls;
+
 	/*
 	 * Pick the best CPU if prev_cpu cannot be used, or if it saves at
 	 * least 6% of the energy used by prev_cpu.