Merge branch 'eas-dev' into android-mainline

Update to latest version of EAS

Change-Id: I760cf5c0b7b869abf0564da2e4e944ee0654c88c
This commit is contained in:
Todd Kjos
2019-10-02 15:30:40 -07:00
11 changed files with 275 additions and 28 deletions

View File

@@ -10,6 +10,9 @@
/* Replace task scheduler's default frequency-invariant accounting */
#define arch_scale_freq_capacity topology_get_freq_scale
/* Replace task scheduler's default max-frequency-invariant accounting */
#define arch_scale_max_freq_capacity topology_get_max_freq_scale
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale

View File

@@ -13,10 +13,12 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_UCLAMP_TASK=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_UCLAMP_TASK_GROUP=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CPUSETS=y
@@ -71,10 +73,12 @@ CONFIG_COMPAT=y
CONFIG_RANDOMIZE_BASE=y
CONFIG_HIBERNATION=y
CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
CONFIG_ENERGY_MODEL=y
CONFIG_ARM_CPUIDLE=y
CONFIG_ARM_PSCI_CPUIDLE=y
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y

View File

@@ -19,6 +19,9 @@ int pcibus_to_node(struct pci_bus *bus);
/* Replace task scheduler's default frequency-invariant accounting */
#define arch_scale_freq_capacity topology_get_freq_scale
/* Replace task scheduler's default max-frequency-invariant accounting */
#define arch_scale_max_freq_capacity topology_get_max_freq_scale
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale

View File

@@ -22,6 +22,8 @@
#include <linux/smp.h>
DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
DEFINE_PER_CPU(unsigned long, max_cpu_freq);
DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
unsigned long max_freq)
@@ -31,8 +33,29 @@ void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
for_each_cpu(i, cpus)
for_each_cpu(i, cpus) {
per_cpu(freq_scale, i) = scale;
per_cpu(max_cpu_freq, i) = max_freq;
}
}
void arch_set_max_freq_scale(struct cpumask *cpus,
unsigned long policy_max_freq)
{
unsigned long scale, max_freq;
int cpu = cpumask_first(cpus);
if (cpu > nr_cpu_ids)
return;
max_freq = per_cpu(max_cpu_freq, cpu);
if (!max_freq)
return;
scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
for_each_cpu(cpu, cpus)
per_cpu(max_freq_scale, cpu) = scale;
}
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;

View File

@@ -153,6 +153,12 @@ __weak void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
}
EXPORT_SYMBOL_GPL(arch_set_freq_scale);
__weak void arch_set_max_freq_scale(struct cpumask *cpus,
unsigned long policy_max_freq)
{
}
EXPORT_SYMBOL_GPL(arch_set_max_freq_scale);
/*
* This is a generic cpufreq init() routine which can be used by cpufreq
* drivers of SMP systems. It will do following:
@@ -2407,6 +2413,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
policy->max = new_policy->max;
trace_cpu_frequency_limits(policy);
arch_set_max_freq_scale(policy->cpus, policy->max);
policy->cached_target_freq = UINT_MAX;
pr_debug("new min and max freqs are %u - %u kHz\n",

View File

@@ -33,6 +33,14 @@ unsigned long topology_get_freq_scale(int cpu)
return per_cpu(freq_scale, cpu);
}
DECLARE_PER_CPU(unsigned long, max_freq_scale);
static inline
unsigned long topology_get_max_freq_scale(struct sched_domain *sd, int cpu)
{
return per_cpu(max_freq_scale, cpu);
}
struct cpu_topology {
int thread_id;
int core_id;

View File

@@ -984,6 +984,8 @@ extern unsigned int arch_freq_get_on_cpu(int cpu);
extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
unsigned long max_freq);
extern void arch_set_max_freq_scale(struct cpumask *cpus,
unsigned long policy_max_freq);
/* the following are really really optional */
extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;

View File

@@ -7356,6 +7356,27 @@ static int cpu_uclamp_max_show(struct seq_file *sf, void *v)
cpu_uclamp_print(sf, UCLAMP_MAX);
return 0;
}
static int cpu_uclamp_ls_write_u64(struct cgroup_subsys_state *css,
struct cftype *cftype, u64 ls)
{
struct task_group *tg;
if (ls > 1)
return -EINVAL;
tg = css_tg(css);
tg->latency_sensitive = (unsigned int) ls;
return 0;
}
static u64 cpu_uclamp_ls_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct task_group *tg = css_tg(css);
return (u64) tg->latency_sensitive;
}
#endif /* CONFIG_UCLAMP_TASK_GROUP */
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -7716,6 +7737,12 @@ static struct cftype cpu_legacy_files[] = {
.seq_show = cpu_uclamp_max_show,
.write = cpu_uclamp_max_write,
},
{
.name = "uclamp.latency_sensitive",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = cpu_uclamp_ls_read_u64,
.write_u64 = cpu_uclamp_ls_write_u64,
},
#endif
{ } /* Terminate */
};
@@ -7897,6 +7924,12 @@ static struct cftype cpu_files[] = {
.seq_show = cpu_uclamp_max_show,
.write = cpu_uclamp_max_write,
},
{
.name = "uclamp.latency_sensitive",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = cpu_uclamp_ls_read_u64,
.write_u64 = cpu_uclamp_ls_write_u64,
},
#endif
{ } /* terminate */
};

View File

@@ -6026,6 +6026,19 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
return target;
}
static unsigned int uclamp_task_util(struct task_struct *p)
{
#ifdef CONFIG_UCLAMP_TASK
unsigned int min_util = uclamp_eff_value(p, UCLAMP_MIN);
unsigned int max_util = uclamp_eff_value(p, UCLAMP_MAX);
unsigned int est_util = task_util_est(p);
return clamp(est_util, min_util, max_util);
#else
return task_util_est(p);
#endif
}
/**
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
* @cpu: the CPU to get the utilization of
@@ -6182,7 +6195,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
return 0;
min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;
/* Minimum capacity is close to max, no need to abort wake_affine */
if (max_cap - min_cap < max_cap >> 3)
@@ -6323,12 +6336,17 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
* other use-cases too. So, until someone finds a better way to solve this,
* let's keep things simple by re-using the existing slow path.
*/
static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
{
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
int max_spare_cap_cpu_ls = prev_cpu, best_idle_cpu = -1;
unsigned long max_spare_cap_ls = 0, target_cap;
unsigned long cpu_cap, util, base_energy = 0;
bool boosted, latency_sensitive = false;
unsigned int min_exit_lat = UINT_MAX;
int cpu, best_energy_cpu = prev_cpu;
struct cpuidle_state *idle;
struct sched_domain *sd;
struct perf_domain *pd;
@@ -6337,6 +6355,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!pd || READ_ONCE(rd->overutilized))
goto fail;
cpu = smp_processor_id();
if (sync && cpumask_test_cpu(cpu, p->cpus_ptr)) {
rcu_read_unlock();
return cpu;
}
/*
* Energy-aware wake-up happens on the lowest sched_domain starting
* from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
@@ -6351,6 +6375,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!task_util_est(p))
goto unlock;
latency_sensitive = uclamp_latency_sensitive(p);
boosted = uclamp_boosted(p);
target_cap = boosted ? 0 : ULONG_MAX;
for (; pd; pd = pd->next) {
unsigned long cur_delta, spare_cap, max_spare_cap = 0;
unsigned long base_energy_pd;
@@ -6370,8 +6398,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!fits_capacity(util, cpu_cap))
continue;
/* Skip CPUs which do not fit task requirements */
if (cpu_cap < uclamp_task_util(p))
continue;
/* Always use prev_cpu as a candidate. */
if (cpu == prev_cpu) {
if (!latency_sensitive && cpu == prev_cpu) {
prev_delta = compute_energy(p, prev_cpu, pd);
prev_delta -= base_energy_pd;
best_delta = min(best_delta, prev_delta);
@@ -6386,10 +6418,34 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
max_spare_cap = spare_cap;
max_spare_cap_cpu = cpu;
}
if (!latency_sensitive)
continue;
if (idle_cpu(cpu)) {
cpu_cap = capacity_orig_of(cpu);
if (boosted && cpu_cap < target_cap)
continue;
if (!boosted && cpu_cap > target_cap)
continue;
idle = idle_get_state(cpu_rq(cpu));
if (idle && idle->exit_latency > min_exit_lat &&
cpu_cap == target_cap)
continue;
if (idle)
min_exit_lat = idle->exit_latency;
target_cap = cpu_cap;
best_idle_cpu = cpu;
} else if (spare_cap > max_spare_cap_ls) {
max_spare_cap_ls = spare_cap;
max_spare_cap_cpu_ls = cpu;
}
}
/* Evaluate the energy impact of using this CPU. */
if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
if (!latency_sensitive && max_spare_cap_cpu >= 0 &&
max_spare_cap_cpu != prev_cpu) {
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
cur_delta -= base_energy_pd;
if (cur_delta < best_delta) {
@@ -6401,6 +6457,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
unlock:
rcu_read_unlock();
if (latency_sensitive)
return best_idle_cpu >= 0 ? best_idle_cpu : max_spare_cap_cpu_ls;
/*
* Pick the best CPU if prev_cpu cannot be used, or if it saves at
* least 6% of the energy used by prev_cpu.
@@ -6444,7 +6503,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
record_wakee(p);
if (sched_energy_enabled()) {
new_cpu = find_energy_efficient_cpu(p, prev_cpu);
new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync);
if (new_cpu >= 0)
return new_cpu;
new_cpu = prev_cpu;
@@ -7089,6 +7148,7 @@ struct lb_env {
int new_dst_cpu;
enum cpu_idle_type idle;
long imbalance;
unsigned int src_grp_nr_running;
/* The set of CPUs under consideration for load-balancing */
struct cpumask *cpus;
@@ -7702,10 +7762,9 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
};
}
static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
static unsigned long scale_rt_capacity(int cpu, unsigned long max)
{
struct rq *rq = cpu_rq(cpu);
unsigned long max = arch_scale_cpu_capacity(cpu);
unsigned long used, free;
unsigned long irq;
@@ -7725,12 +7784,47 @@ static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
return scale_irq_capacity(free, irq, max);
}
void init_max_cpu_capacity(struct max_cpu_capacity *mcc) {
raw_spin_lock_init(&mcc->lock);
mcc->val = 0;
mcc->cpu = -1;
}
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
unsigned long capacity = scale_rt_capacity(sd, cpu);
unsigned long capacity = arch_scale_cpu_capacity(cpu);
struct sched_group *sdg = sd->groups;
struct max_cpu_capacity *mcc;
unsigned long max_capacity;
int max_cap_cpu;
unsigned long flags;
cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
cpu_rq(cpu)->cpu_capacity_orig = capacity;
capacity *= arch_scale_max_freq_capacity(sd, cpu);
capacity >>= SCHED_CAPACITY_SHIFT;
mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
raw_spin_lock_irqsave(&mcc->lock, flags);
max_capacity = mcc->val;
max_cap_cpu = mcc->cpu;
if ((max_capacity > capacity && max_cap_cpu == cpu) ||
(max_capacity < capacity)) {
mcc->val = capacity;
mcc->cpu = cpu;
#ifdef CONFIG_SCHED_DEBUG
raw_spin_unlock_irqrestore(&mcc->lock, flags);
printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
cpu, capacity);
goto skip_unlock;
#endif
}
raw_spin_unlock_irqrestore(&mcc->lock, flags);
skip_unlock: __attribute__ ((unused));
capacity = scale_rt_capacity(cpu, capacity);
if (!capacity)
capacity = 1;
@@ -7834,7 +7928,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
{
return rq->misfit_task_load &&
(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity.val ||
check_cpu_capacity(rq, sd));
}
@@ -8237,6 +8331,8 @@ next_group:
if (env->sd->flags & SD_NUMA)
env->fbq_type = fbq_classify_group(&sds->busiest_stat);
env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
if (!env->sd->parent) {
struct root_domain *rd = env->dst_rq->rd;
@@ -8365,7 +8461,22 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
capa_move /= SCHED_CAPACITY_SCALE;
/* Move if we gain throughput */
if (capa_move > capa_now)
if (capa_move > capa_now) {
env->imbalance = busiest->load_per_task;
return;
}
/* We can't see throughput improvement with the load-based
* method, but it is possible depending upon group size and
* capacity range that there might still be an underutilized
* cpu available in an asymmetric capacity system. Do one last
* check just in case.
*/
if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
busiest->group_type == group_overloaded &&
busiest->sum_nr_running > busiest->group_weight &&
local->sum_nr_running < local->group_weight &&
local->group_capacity < busiest->group_capacity)
env->imbalance = busiest->load_per_task;
}
@@ -8434,8 +8545,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
(sds->avg_load - local->avg_load) * local->group_capacity
) / SCHED_CAPACITY_SCALE;
/* Boost imbalance to allow misfit task to be balanced. */
if (busiest->group_type == group_misfit_task) {
/* Boost imbalance to allow misfit task to be balanced.
* Always do this if we are doing a NEWLY_IDLE balance
* on the assumption that any tasks we have must not be
* long-running (and hence we cannot rely upon load).
* However if we are not idle, we should assume the tasks
* we have are longer running and not override load-based
* calculations above unless we are sure that the local
* group is underutilized.
*/
if (busiest->group_type == group_misfit_task &&
(env->idle == CPU_NEWLY_IDLE ||
local->sum_nr_running < local->group_weight)) {
env->imbalance = max_t(long, env->imbalance,
busiest->group_misfit_task_load);
}
@@ -8713,6 +8834,9 @@ static int need_active_balance(struct lb_env *env)
if (voluntary_active_balance(env))
return 1;
if (env->src_grp_type == group_overloaded && env->src_rq->misfit_task_load)
return 1;
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
}
@@ -8931,7 +9055,8 @@ more_balance:
* excessive cache_hot migrations and active balances.
*/
if (idle != CPU_NEWLY_IDLE)
sd->nr_balance_failed++;
if (env.src_grp_nr_running > 1)
sd->nr_balance_failed++;
if (need_active_balance(&env)) {
unsigned long flags;

View File

@@ -399,6 +399,8 @@ struct task_group {
struct uclamp_se uclamp_req[UCLAMP_CNT];
/* Effective clamp values used for a task group */
struct uclamp_se uclamp[UCLAMP_CNT];
/* Latency-sensitive flag used for a task group */
unsigned int latency_sensitive;
#endif
};
@@ -717,6 +719,12 @@ struct perf_domain {
struct rcu_head rcu;
};
struct max_cpu_capacity {
raw_spinlock_t lock;
unsigned long val;
int cpu;
};
/* Scheduling group status flags */
#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */
#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
@@ -775,7 +783,8 @@ struct root_domain {
cpumask_var_t rto_mask;
struct cpupri cpupri;
unsigned long max_cpu_capacity;
/* Maximum cpu capacity in the system. */
struct max_cpu_capacity max_cpu_capacity;
/*
* NULL-terminated list of performance domains intersecting with the
@@ -785,6 +794,7 @@ struct root_domain {
};
extern void init_defrootdomain(void);
extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
extern int sched_init_domains(const struct cpumask *cpu_map);
extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
extern void sched_get_rd(struct root_domain *rd);
@@ -1961,6 +1971,15 @@ unsigned long arch_scale_freq_capacity(int cpu)
}
#endif
#ifndef arch_scale_max_freq_capacity
struct sched_domain;
static __always_inline
unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
{
return SCHED_CAPACITY_SCALE;
}
#endif
#ifdef CONFIG_SMP
#ifdef CONFIG_PREEMPTION
@@ -2314,6 +2333,11 @@ static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
{
return uclamp_util_with(rq, util, NULL);
}
static inline bool uclamp_boosted(struct task_struct *p)
{
return uclamp_eff_value(p, UCLAMP_MIN) > 0;
}
#else /* CONFIG_UCLAMP_TASK */
static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
struct task_struct *p)
@@ -2324,8 +2348,31 @@ static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
{
return util;
}
static inline bool uclamp_boosted(struct task_struct *p)
{
return false;
}
#endif /* CONFIG_UCLAMP_TASK */
#ifdef CONFIG_UCLAMP_TASK_GROUP
static inline bool uclamp_latency_sensitive(struct task_struct *p)
{
struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
struct task_group *tg;
if (!css)
return false;
tg = container_of(css, struct task_group, css);
return tg->latency_sensitive;
}
#else
static inline bool uclamp_latency_sensitive(struct task_struct *p)
{
return false;
}
#endif /* CONFIG_UCLAMP_TASK_GROUP */
#ifdef arch_scale_freq_capacity
# ifndef arch_scale_freq_invariant
# define arch_scale_freq_invariant() true

View File

@@ -510,6 +510,9 @@ static int init_rootdomain(struct root_domain *rd)
if (cpupri_init(&rd->cpupri) != 0)
goto free_cpudl;
init_max_cpu_capacity(&rd->max_cpu_capacity);
return 0;
free_cpudl:
@@ -1951,7 +1954,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
enum s_alloc alloc_state;
struct sched_domain *sd;
struct s_data d;
struct rq *rq = NULL;
int i, ret = -ENOMEM;
struct sched_domain_topology_level *tl_asym;
bool has_asym = false;
@@ -2014,13 +2016,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
/* Attach the domains */
rcu_read_lock();
for_each_cpu(i, cpu_map) {
rq = cpu_rq(i);
sd = *per_cpu_ptr(d.sd, i);
/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
cpu_attach_domain(sd, d.rd, i);
}
rcu_read_unlock();
@@ -2028,11 +2024,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (has_asym)
static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
if (rq && sched_debug_enabled) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
}
ret = 0;
error:
__free_domain_allocs(&d, alloc_state, cpu_map);