kernel/sched_fair.c

   1 /*
   2  * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)
   3  *
   4  *  Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   5  *
   6  *  Interactivity improvements by Mike Galbraith
   7  *  (C) 2007 Mike Galbraith <efault@gmx.de>
   8  *
   9  *  Various enhancements by Dmitry Adamushko.
  10  *  (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
  11  *
  12  *  Group scheduling enhancements by Srivatsa Vaddagiri
  13  *  Copyright IBM Corporation, 2007
  14  *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
  15  *
  16  *  Scaled math optimizations by Thomas Gleixner
  17  *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
  18  *
  19  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
  20  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  21  */
  22
  23 #include <linux/latencytop.h>
  24
  25 /*
  26  * Targeted preemption latency for CPU-bound tasks:
  27  * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds)
  28  *
  29  * NOTE: this latency value is not the same as the concept of
  30  * 'timeslice length' - timeslices in CFS are of variable length
  31  * and have no persistent notion like in traditional, time-slice
  32  * based scheduling concepts.
  33  *
  34  * (to see the precise effective timeslice length of your workload,
  35  *  run vmstat and monitor the context-switches (cs) field)
  36  */
  37 unsigned int sysctl_sched_latency = 20000000ULL;
  38
  39 /*
  40  * Minimal preemption granularity for CPU-bound tasks:
  41  * (default: 4 msec * (1 + ilog(ncpus)), units: nanoseconds)
  42  */
  43 unsigned int sysctl_sched_min_granularity = 4000000ULL;
  44
  45 /*
  46  * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
  47  */
  48 static unsigned int sched_nr_latency = 5;
  49
  50 /*
  51  * After fork, child runs first. (default) If set to 0 then
  52  * parent will (try to) run first.
  53  */
  54 const_debug unsigned int sysctl_sched_child_runs_first = 1;
  55
  56 /*
  57  * sys_sched_yield() compat mode
  58  *
  59  * This option switches the agressive yield implementation of the
  60  * old scheduler back on.
  61  */
  62 unsigned int __read_mostly sysctl_sched_compat_yield;
  63
  64 /*
  65  * SCHED_BATCH wake-up granularity.
  66  * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
  67  *
  68  * This option delays the preemption effects of decoupled workloads
  69  * and reduces their over-scheduling. Synchronous workloads will still
  70  * have immediate wakeup/sleep latencies.
  71  */
  72 unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
  73
  74 /*
  75  * SCHED_OTHER wake-up granularity.
  76 <<<<<<< HEAD:kernel/sched_fair.c
  77  * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
  78 =======
  79  * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
  80 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
  81  *
  82  * This option delays the preemption effects of decoupled workloads
  83  * and reduces their over-scheduling. Synchronous workloads will still
  84  * have immediate wakeup/sleep latencies.
  85  */
  86 <<<<<<< HEAD:kernel/sched_fair.c
  87 unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
  88 =======
  89 unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
  90 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
  91
  92 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
  93
  94 /**************************************************************
  95  * CFS operations on generic schedulable entities:
  96  */
  97
  98 #ifdef CONFIG_FAIR_GROUP_SCHED
  99
 100 /* cpu runqueue to which this cfs_rq is attached */
 101 static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
 102 {
 103         return cfs_rq->rq;
 104 }
 105
 106 /* An entity is a task if it doesn't "own" a runqueue */
 107 #define entity_is_task(se)      (!se->my_q)
 108
 109 #else   /* CONFIG_FAIR_GROUP_SCHED */
 110
 111 static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
 112 {
 113         return container_of(cfs_rq, struct rq, cfs);
 114 }
 115
 116 #define entity_is_task(se)      1
 117
 118 #endif  /* CONFIG_FAIR_GROUP_SCHED */
 119
 120 static inline struct task_struct *task_of(struct sched_entity *se)
 121 {
 122         return container_of(se, struct task_struct, se);
 123 }
 124
 125
 126 /**************************************************************
 127  * Scheduling class tree data structure manipulation methods:
 128  */
 129
 130 static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime)
 131 {
 132         s64 delta = (s64)(vruntime - min_vruntime);
 133         if (delta > 0)
 134                 min_vruntime = vruntime;
 135
 136         return min_vruntime;
 137 }
 138
 139 static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
 140 {
 141         s64 delta = (s64)(vruntime - min_vruntime);
 142         if (delta < 0)
 143                 min_vruntime = vruntime;
 144
 145         return min_vruntime;
 146 }
 147
 148 static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
 149 {
 150         return se->vruntime - cfs_rq->min_vruntime;
 151 }
 152
 153 /*
 154  * Enqueue an entity into the rb-tree:
 155  */
 156 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 157 {
 158         struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
 159         struct rb_node *parent = NULL;
 160         struct sched_entity *entry;
 161         s64 key = entity_key(cfs_rq, se);
 162         int leftmost = 1;
 163
 164         /*
 165          * Find the right place in the rbtree:
 166          */
 167         while (*link) {
 168                 parent = *link;
 169                 entry = rb_entry(parent, struct sched_entity, run_node);
 170                 /*
 171                  * We dont care about collisions. Nodes with
 172                  * the same key stay together.
 173                  */
 174                 if (key < entity_key(cfs_rq, entry)) {
 175                         link = &parent->rb_left;
 176                 } else {
 177                         link = &parent->rb_right;
 178                         leftmost = 0;
 179                 }
 180         }
 181
 182         /*
 183          * Maintain a cache of leftmost tree entries (it is frequently
 184          * used):
 185          */
 186 <<<<<<< HEAD:kernel/sched_fair.c
 187         if (leftmost)
 188 =======
 189         if (leftmost) {
 190 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 191                 cfs_rq->rb_leftmost = &se->run_node;
 192 <<<<<<< HEAD:kernel/sched_fair.c
 193 =======
 194                 /*
 195                  * maintain cfs_rq->min_vruntime to be a monotonic increasing
 196                  * value tracking the leftmost vruntime in the tree.
 197                  */
 198                 cfs_rq->min_vruntime =
 199                         max_vruntime(cfs_rq->min_vruntime, se->vruntime);
 200         }
 201 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 202
 203         rb_link_node(&se->run_node, parent, link);
 204         rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
 205 }
 206
 207 static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 208 {
 209 <<<<<<< HEAD:kernel/sched_fair.c
 210         if (cfs_rq->rb_leftmost == &se->run_node)
 211                 cfs_rq->rb_leftmost = rb_next(&se->run_node);
 212 =======
 213         if (cfs_rq->rb_leftmost == &se->run_node) {
 214                 struct rb_node *next_node;
 215                 struct sched_entity *next;
 216
 217                 next_node = rb_next(&se->run_node);
 218                 cfs_rq->rb_leftmost = next_node;
 219
 220                 if (next_node) {
 221                         next = rb_entry(next_node,
 222                                         struct sched_entity, run_node);
 223                         cfs_rq->min_vruntime =
 224                                 max_vruntime(cfs_rq->min_vruntime,
 225                                              next->vruntime);
 226                 }
 227         }
 228
 229         if (cfs_rq->next == se)
 230                 cfs_rq->next = NULL;
 231 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 232
 233         rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
 234 }
 235
 236 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
 237 {
 238         return cfs_rq->rb_leftmost;
 239 }
 240
 241 static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
 242 {
 243         return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);
 244 }
 245
 246 static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 247 {
 248 <<<<<<< HEAD:kernel/sched_fair.c
 249         struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
 250         struct sched_entity *se = NULL;
 251         struct rb_node *parent;
 252 =======
 253         struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
 254 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 255
 256 <<<<<<< HEAD:kernel/sched_fair.c
 257         while (*link) {
 258                 parent = *link;
 259                 se = rb_entry(parent, struct sched_entity, run_node);
 260                 link = &parent->rb_right;
 261         }
 262 =======
 263         if (!last)
 264                 return NULL;
 265 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 266
 267 <<<<<<< HEAD:kernel/sched_fair.c
 268         return se;
 269 =======
 270         return rb_entry(last, struct sched_entity, run_node);
 271 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 272 }
 273
 274 /**************************************************************
 275  * Scheduling class statistics methods:
 276  */
 277
 278 #ifdef CONFIG_SCHED_DEBUG
 279 int sched_nr_latency_handler(struct ctl_table *table, int write,
 280                 struct file *filp, void __user *buffer, size_t *lenp,
 281                 loff_t *ppos)
 282 {
 283         int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
 284
 285         if (ret || !write)
 286                 return ret;
 287
 288         sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
 289                                         sysctl_sched_min_granularity);
 290
 291         return 0;
 292 }
 293 #endif
 294
 295 /*
 296  * The idea is to set a period in which each task runs once.
 297  *
 298  * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
 299  * this period because otherwise the slices get too small.
 300  *
 301  * p = (nr <= nl) ? l : l*nr/nl
 302  */
 303 static u64 __sched_period(unsigned long nr_running)
 304 {
 305         u64 period = sysctl_sched_latency;
 306         unsigned long nr_latency = sched_nr_latency;
 307
 308         if (unlikely(nr_running > nr_latency)) {
 309                 period = sysctl_sched_min_granularity;
 310                 period *= nr_running;
 311         }
 312
 313         return period;
 314 }
 315
 316 /*
 317  * We calculate the wall-time slice from the period by taking a part
 318  * proportional to the weight.
 319  *
 320  * s = p*w/rw
 321  */
 322 static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 323 {
 324 <<<<<<< HEAD:kernel/sched_fair.c
 325         u64 slice = __sched_period(cfs_rq->nr_running);
 326
 327         slice *= se->load.weight;
 328         do_div(slice, cfs_rq->load.weight);
 329
 330         return slice;
 331 =======
 332         return calc_delta_mine(__sched_period(cfs_rq->nr_running),
 333                                se->load.weight, &cfs_rq->load);
 334 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 335 }
 336
 337 /*
 338  * We calculate the vruntime slice.
 339  *
 340  * vs = s/w = p/rw
 341  */
 342 static u64 __sched_vslice(unsigned long rq_weight, unsigned long nr_running)
 343 {
 344         u64 vslice = __sched_period(nr_running);
 345
 346         vslice *= NICE_0_LOAD;
 347         do_div(vslice, rq_weight);
 348
 349         return vslice;
 350 }
 351
 352 static u64 sched_vslice(struct cfs_rq *cfs_rq)
 353 {
 354         return __sched_vslice(cfs_rq->load.weight, cfs_rq->nr_running);
 355 }
 356
 357 static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 358 {
 359         return __sched_vslice(cfs_rq->load.weight + se->load.weight,
 360                         cfs_rq->nr_running + 1);
 361 }
 362
 363 /*
 364  * Update the current task's runtime statistics. Skip current tasks that
 365  * are not in our scheduling class.
 366  */
 367 static inline void
 368 __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 369               unsigned long delta_exec)
 370 {
 371         unsigned long delta_exec_weighted;
 372 <<<<<<< HEAD:kernel/sched_fair.c
 373         u64 vruntime;
 374 =======
 375 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 376
 377         schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 378
 379         curr->sum_exec_runtime += delta_exec;
 380         schedstat_add(cfs_rq, exec_clock, delta_exec);
 381         delta_exec_weighted = delta_exec;
 382         if (unlikely(curr->load.weight != NICE_0_LOAD)) {
 383                 delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
 384                                                         &curr->load);
 385         }
 386         curr->vruntime += delta_exec_weighted;
 387 <<<<<<< HEAD:kernel/sched_fair.c
 388
 389         /*
 390          * maintain cfs_rq->min_vruntime to be a monotonic increasing
 391          * value tracking the leftmost vruntime in the tree.
 392          */
 393         if (first_fair(cfs_rq)) {
 394                 vruntime = min_vruntime(curr->vruntime,
 395                                 __pick_next_entity(cfs_rq)->vruntime);
 396         } else
 397                 vruntime = curr->vruntime;
 398
 399         cfs_rq->min_vruntime =
 400                 max_vruntime(cfs_rq->min_vruntime, vruntime);
 401 =======
 402 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 403 }
 404
 405 static void update_curr(struct cfs_rq *cfs_rq)
 406 {
 407         struct sched_entity *curr = cfs_rq->curr;
 408         u64 now = rq_of(cfs_rq)->clock;
 409         unsigned long delta_exec;
 410
 411         if (unlikely(!curr))
 412                 return;
 413
 414         /*
 415          * Get the amount of time the current task was running
 416          * since the last time we changed load (this cannot
 417          * overflow on 32 bits):
 418          */
 419         delta_exec = (unsigned long)(now - curr->exec_start);
 420
 421         __update_curr(cfs_rq, curr, delta_exec);
 422         curr->exec_start = now;
 423
 424         if (entity_is_task(curr)) {
 425                 struct task_struct *curtask = task_of(curr);
 426
 427                 cpuacct_charge(curtask, delta_exec);
 428         }
 429 }
 430
 431 static inline void
 432 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 433 {
 434         schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
 435 }
 436
 437 /*
 438  * Task is being enqueued - update stats:
 439  */
 440 static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 441 {
 442         /*
 443          * Are we enqueueing a waiting task? (for current tasks
 444          * a dequeue/enqueue event is a NOP)
 445          */
 446         if (se != cfs_rq->curr)
 447                 update_stats_wait_start(cfs_rq, se);
 448 }
 449
 450 static void
 451 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 452 {
 453         schedstat_set(se->wait_max, max(se->wait_max,
 454                         rq_of(cfs_rq)->clock - se->wait_start));
 455         schedstat_set(se->wait_count, se->wait_count + 1);
 456         schedstat_set(se->wait_sum, se->wait_sum +
 457                         rq_of(cfs_rq)->clock - se->wait_start);
 458         schedstat_set(se->wait_start, 0);
 459 }
 460
 461 static inline void
 462 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 463 {
 464         /*
 465          * Mark the end of the wait period if dequeueing a
 466          * waiting task:
 467          */
 468         if (se != cfs_rq->curr)
 469                 update_stats_wait_end(cfs_rq, se);
 470 }
 471
 472 /*
 473  * We are picking a new current task - update its stats:
 474  */
 475 static inline void
 476 update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 477 {
 478         /*
 479          * We are starting a new run period:
 480          */
 481         se->exec_start = rq_of(cfs_rq)->clock;
 482 }
 483
 484 /**************************************************
 485  * Scheduling class queueing methods:
 486  */
 487
 488 static void
 489 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 490 {
 491         update_load_add(&cfs_rq->load, se->load.weight);
 492         cfs_rq->nr_running++;
 493         se->on_rq = 1;
 494 }
 495
 496 static void
 497 account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 498 {
 499         update_load_sub(&cfs_rq->load, se->load.weight);
 500         cfs_rq->nr_running--;
 501         se->on_rq = 0;
 502 }
 503
 504 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 505 {
 506 #ifdef CONFIG_SCHEDSTATS
 507         if (se->sleep_start) {
 508                 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
 509                 struct task_struct *tsk = task_of(se);
 510
 511                 if ((s64)delta < 0)
 512                         delta = 0;
 513
 514                 if (unlikely(delta > se->sleep_max))
 515                         se->sleep_max = delta;
 516
 517                 se->sleep_start = 0;
 518                 se->sum_sleep_runtime += delta;
 519
 520                 account_scheduler_latency(tsk, delta >> 10, 1);
 521         }
 522         if (se->block_start) {
 523                 u64 delta = rq_of(cfs_rq)->clock - se->block_start;
 524                 struct task_struct *tsk = task_of(se);
 525
 526                 if ((s64)delta < 0)
 527                         delta = 0;
 528
 529                 if (unlikely(delta > se->block_max))
 530                         se->block_max = delta;
 531
 532                 se->block_start = 0;
 533                 se->sum_sleep_runtime += delta;
 534
 535                 /*
 536                  * Blocking time is in units of nanosecs, so shift by 20 to
 537                  * get a milliseconds-range estimation of the amount of
 538                  * time that the task spent sleeping:
 539                  */
 540                 if (unlikely(prof_on == SLEEP_PROFILING)) {
 541
 542                         profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
 543                                      delta >> 20);
 544                 }
 545                 account_scheduler_latency(tsk, delta >> 10, 0);
 546         }
 547 #endif
 548 }
 549
 550 static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 551 {
 552 #ifdef CONFIG_SCHED_DEBUG
 553         s64 d = se->vruntime - cfs_rq->min_vruntime;
 554
 555         if (d < 0)
 556                 d = -d;
 557
 558         if (d > 3*sysctl_sched_latency)
 559                 schedstat_inc(cfs_rq, nr_spread_over);
 560 #endif
 561 }
 562
 563 static void
 564 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 565 {
 566         u64 vruntime;
 567
 568 <<<<<<< HEAD:kernel/sched_fair.c
 569         vruntime = cfs_rq->min_vruntime;
 570 =======
 571         if (first_fair(cfs_rq)) {
 572                 vruntime = min_vruntime(cfs_rq->min_vruntime,
 573                                 __pick_next_entity(cfs_rq)->vruntime);
 574         } else
 575                 vruntime = cfs_rq->min_vruntime;
 576 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 577
 578         if (sched_feat(TREE_AVG)) {
 579                 struct sched_entity *last = __pick_last_entity(cfs_rq);
 580                 if (last) {
 581                         vruntime += last->vruntime;
 582                         vruntime >>= 1;
 583                 }
 584         } else if (sched_feat(APPROX_AVG) && cfs_rq->nr_running)
 585                 vruntime += sched_vslice(cfs_rq)/2;
 586
 587         /*
 588          * The 'current' period is already promised to the current tasks,
 589          * however the extra weight of the new task will slow them down a
 590          * little, place the new task so that it fits in the slot that
 591          * stays open at the end.
 592          */
 593         if (initial && sched_feat(START_DEBIT))
 594                 vruntime += sched_vslice_add(cfs_rq, se);
 595
 596         if (!initial) {
 597                 /* sleeps upto a single latency don't count. */
 598 <<<<<<< HEAD:kernel/sched_fair.c
 599                 if (sched_feat(NEW_FAIR_SLEEPERS))
 600                         vruntime -= sysctl_sched_latency;
 601 =======
 602                 if (sched_feat(NEW_FAIR_SLEEPERS)) {
 603                         vruntime -= calc_delta_fair(sysctl_sched_latency,
 604                                                     &cfs_rq->load);
 605                 }
 606 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 607
 608                 /* ensure we never gain time by being placed backwards. */
 609                 vruntime = max_vruntime(se->vruntime, vruntime);
 610         }
 611
 612         se->vruntime = vruntime;
 613 }
 614
 615 static void
 616 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
 617 {
 618         /*
 619          * Update run-time statistics of the 'current'.
 620          */
 621         update_curr(cfs_rq);
 622
 623         if (wakeup) {
 624                 place_entity(cfs_rq, se, 0);
 625                 enqueue_sleeper(cfs_rq, se);
 626         }
 627
 628         update_stats_enqueue(cfs_rq, se);
 629         check_spread(cfs_rq, se);
 630         if (se != cfs_rq->curr)
 631                 __enqueue_entity(cfs_rq, se);
 632         account_entity_enqueue(cfs_rq, se);
 633 }
 634
 635 <<<<<<< HEAD:kernel/sched_fair.c
 636 =======
 637 static void update_avg(u64 *avg, u64 sample)
 638 {
 639         s64 diff = sample - *avg;
 640         *avg += diff >> 3;
 641 }
 642
 643 static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
 644 {
 645         if (!se->last_wakeup)
 646                 return;
 647
 648         update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
 649         se->last_wakeup = 0;
 650 }
 651
 652 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 653 static void
 654 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 655 {
 656         /*
 657          * Update run-time statistics of the 'current'.
 658          */
 659         update_curr(cfs_rq);
 660
 661         update_stats_dequeue(cfs_rq, se);
 662         if (sleep) {
 663 <<<<<<< HEAD:kernel/sched_fair.c
 664 =======
 665                 update_avg_stats(cfs_rq, se);
 666 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 667 #ifdef CONFIG_SCHEDSTATS
 668                 if (entity_is_task(se)) {
 669                         struct task_struct *tsk = task_of(se);
 670
 671                         if (tsk->state & TASK_INTERRUPTIBLE)
 672                                 se->sleep_start = rq_of(cfs_rq)->clock;
 673                         if (tsk->state & TASK_UNINTERRUPTIBLE)
 674                                 se->block_start = rq_of(cfs_rq)->clock;
 675                 }
 676 #endif
 677         }
 678
 679         if (se != cfs_rq->curr)
 680                 __dequeue_entity(cfs_rq, se);
 681         account_entity_dequeue(cfs_rq, se);
 682 }
 683
 684 /*
 685  * Preempt the current task with a newly woken task if needed:
 686  */
 687 static void
 688 check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 689 {
 690         unsigned long ideal_runtime, delta_exec;
 691
 692         ideal_runtime = sched_slice(cfs_rq, curr);
 693         delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
 694         if (delta_exec > ideal_runtime)
 695                 resched_task(rq_of(cfs_rq)->curr);
 696 }
 697
 698 static void
 699 set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 700 {
 701         /* 'current' is not kept within the tree. */
 702         if (se->on_rq) {
 703                 /*
 704                  * Any task has to be enqueued before it get to execute on
 705                  * a CPU. So account for the time it spent waiting on the
 706                  * runqueue.
 707                  */
 708                 update_stats_wait_end(cfs_rq, se);
 709                 __dequeue_entity(cfs_rq, se);
 710         }
 711
 712         update_stats_curr_start(cfs_rq, se);
 713         cfs_rq->curr = se;
 714 #ifdef CONFIG_SCHEDSTATS
 715         /*
 716          * Track our maximum slice length, if the CPU's load is at
 717          * least twice that of our own weight (i.e. dont track it
 718          * when there are only lesser-weight tasks around):
 719          */
 720         if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
 721                 se->slice_max = max(se->slice_max,
 722                         se->sum_exec_runtime - se->prev_sum_exec_runtime);
 723         }
 724 #endif
 725         se->prev_sum_exec_runtime = se->sum_exec_runtime;
 726 }
 727
 728 <<<<<<< HEAD:kernel/sched_fair.c
 729 =======
 730 static struct sched_entity *
 731 pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
 732 {
 733         s64 diff, gran;
 734
 735         if (!cfs_rq->next)
 736                 return se;
 737
 738         diff = cfs_rq->next->vruntime - se->vruntime;
 739         if (diff < 0)
 740                 return se;
 741
 742         gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load);
 743         if (diff > gran)
 744                 return se;
 745
 746         return cfs_rq->next;
 747 }
 748
 749 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 750 static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
 751 {
 752         struct sched_entity *se = NULL;
 753
 754         if (first_fair(cfs_rq)) {
 755                 se = __pick_next_entity(cfs_rq);
 756 <<<<<<< HEAD:kernel/sched_fair.c
 757 =======
 758                 se = pick_next(cfs_rq, se);
 759 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 760                 set_next_entity(cfs_rq, se);
 761         }
 762
 763         return se;
 764 }
 765
 766 static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 767 {
 768         /*
 769          * If still on the runqueue then deactivate_task()
 770          * was not called and update_curr() has to be done:
 771          */
 772         if (prev->on_rq)
 773                 update_curr(cfs_rq);
 774
 775         check_spread(cfs_rq, prev);
 776         if (prev->on_rq) {
 777                 update_stats_wait_start(cfs_rq, prev);
 778                 /* Put 'current' back into the tree. */
 779                 __enqueue_entity(cfs_rq, prev);
 780         }
 781         cfs_rq->curr = NULL;
 782 }
 783
 784 static void
 785 entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 786 {
 787         /*
 788          * Update run-time statistics of the 'current'.
 789          */
 790         update_curr(cfs_rq);
 791
 792 #ifdef CONFIG_SCHED_HRTICK
 793         /*
 794          * queued ticks are scheduled to match the slice, so don't bother
 795          * validating it and just reschedule.
 796          */
 797         if (queued)
 798                 return resched_task(rq_of(cfs_rq)->curr);
 799         /*
 800          * don't let the period tick interfere with the hrtick preemption
 801          */
 802         if (!sched_feat(DOUBLE_TICK) &&
 803                         hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
 804                 return;
 805 #endif
 806
 807         if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
 808                 check_preempt_tick(cfs_rq, curr);
 809 }
 810
 811 /**************************************************
 812  * CFS operations on tasks:
 813  */
 814
 815 #ifdef CONFIG_FAIR_GROUP_SCHED
 816
 817 /* Walk up scheduling entities hierarchy */
 818 #define for_each_sched_entity(se) \
 819                 for (; se; se = se->parent)
 820
 821 static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
 822 {
 823         return p->se.cfs_rq;
 824 }
 825
 826 /* runqueue on which this entity is (to be) queued */
 827 static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
 828 {
 829         return se->cfs_rq;
 830 }
 831
 832 /* runqueue "owned" by this group */
 833 static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
 834 {
 835         return grp->my_q;
 836 }
 837
 838 /* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
 839  * another cpu ('this_cpu')
 840  */
 841 static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
 842 {
 843         return cfs_rq->tg->cfs_rq[this_cpu];
 844 }
 845
 846 /* Iterate thr' all leaf cfs_rq's on a runqueue */
 847 #define for_each_leaf_cfs_rq(rq, cfs_rq) \
 848         list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
 849
 850 /* Do the two (enqueued) entities belong to the same group ? */
 851 static inline int
 852 is_same_group(struct sched_entity *se, struct sched_entity *pse)
 853 {
 854         if (se->cfs_rq == pse->cfs_rq)
 855                 return 1;
 856
 857         return 0;
 858 }
 859
 860 static inline struct sched_entity *parent_entity(struct sched_entity *se)
 861 {
 862         return se->parent;
 863 }
 864
 865 <<<<<<< HEAD:kernel/sched_fair.c
 866 #define GROUP_IMBALANCE_PCT     20
 867
 868 =======
 869 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 870 #else   /* CONFIG_FAIR_GROUP_SCHED */
 871
 872 #define for_each_sched_entity(se) \
 873                 for (; se; se = NULL)
 874
 875 static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
 876 {
 877         return &task_rq(p)->cfs;
 878 }
 879
 880 static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
 881 {
 882         struct task_struct *p = task_of(se);
 883         struct rq *rq = task_rq(p);
 884
 885         return &rq->cfs;
 886 }
 887
 888 /* runqueue "owned" by this group */
 889 static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
 890 {
 891         return NULL;
 892 }
 893
 894 static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
 895 {
 896         return &cpu_rq(this_cpu)->cfs;
 897 }
 898
 899 #define for_each_leaf_cfs_rq(rq, cfs_rq) \
 900                 for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
 901
 902 static inline int
 903 is_same_group(struct sched_entity *se, struct sched_entity *pse)
 904 {
 905         return 1;
 906 }
 907
 908 static inline struct sched_entity *parent_entity(struct sched_entity *se)
 909 {
 910         return NULL;
 911 }
 912
 913 #endif  /* CONFIG_FAIR_GROUP_SCHED */
 914
 915 #ifdef CONFIG_SCHED_HRTICK
 916 static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 917 {
 918         int requeue = rq->curr == p;
 919         struct sched_entity *se = &p->se;
 920         struct cfs_rq *cfs_rq = cfs_rq_of(se);
 921
 922         WARN_ON(task_rq(p) != rq);
 923
 924         if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) {
 925                 u64 slice = sched_slice(cfs_rq, se);
 926                 u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
 927                 s64 delta = slice - ran;
 928
 929                 if (delta < 0) {
 930                         if (rq->curr == p)
 931                                 resched_task(p);
 932                         return;
 933                 }
 934
 935                 /*
 936                  * Don't schedule slices shorter than 10000ns, that just
 937                  * doesn't make sense. Rely on vruntime for fairness.
 938                  */
 939                 if (!requeue)
 940                         delta = max(10000LL, delta);
 941
 942                 hrtick_start(rq, delta, requeue);
 943         }
 944 }
 945 #else
 946 static inline void
 947 hrtick_start_fair(struct rq *rq, struct task_struct *p)
 948 {
 949 }
 950 #endif
 951
 952 /*
 953  * The enqueue_task method is called before nr_running is
 954  * increased. Here we update the fair scheduling stats and
 955  * then put the task into the rbtree:
 956  */
 957 static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 958 {
 959         struct cfs_rq *cfs_rq;
 960 <<<<<<< HEAD:kernel/sched_fair.c
 961         struct sched_entity *se = &p->se,
 962                             *topse = NULL;      /* Highest schedulable entity */
 963         int incload = 1;
 964 =======
 965         struct sched_entity *se = &p->se;
 966 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 967
 968         for_each_sched_entity(se) {
 969 <<<<<<< HEAD:kernel/sched_fair.c
 970                 topse = se;
 971                 if (se->on_rq) {
 972                         incload = 0;
 973 =======
 974                 if (se->on_rq)
 975 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 976                         break;
 977 <<<<<<< HEAD:kernel/sched_fair.c
 978                 }
 979 =======
 980 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 981                 cfs_rq = cfs_rq_of(se);
 982                 enqueue_entity(cfs_rq, se, wakeup);
 983                 wakeup = 1;
 984         }
 985 <<<<<<< HEAD:kernel/sched_fair.c
 986         /* Increment cpu load if we just enqueued the first task of a group on
 987          * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
 988          * at the highest grouping level.
 989          */
 990         if (incload)
 991                 inc_cpu_load(rq, topse->load.weight);
 992 =======
 993 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
 994
 995         hrtick_start_fair(rq, rq->curr);
 996 }
 997
 998 /*
 999  * The dequeue_task method is called before nr_running is
1000  * decreased. We remove the task from the rbtree and
1001  * update the fair scheduling stats:
1002  */
1003 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
1004 {
1005         struct cfs_rq *cfs_rq;
1006 <<<<<<< HEAD:kernel/sched_fair.c
1007         struct sched_entity *se = &p->se,
1008                             *topse = NULL;      /* Highest schedulable entity */
1009         int decload = 1;
1010 =======
1011         struct sched_entity *se = &p->se;
1012 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1013
1014         for_each_sched_entity(se) {
1015 <<<<<<< HEAD:kernel/sched_fair.c
1016                 topse = se;
1017 =======
1018 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1019                 cfs_rq = cfs_rq_of(se);
1020                 dequeue_entity(cfs_rq, se, sleep);
1021                 /* Don't dequeue parent if it has other entities besides us */
1022 <<<<<<< HEAD:kernel/sched_fair.c
1023                 if (cfs_rq->load.weight) {
1024                         if (parent_entity(se))
1025                                 decload = 0;
1026 =======
1027                 if (cfs_rq->load.weight)
1028 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1029                         break;
1030 <<<<<<< HEAD:kernel/sched_fair.c
1031                 }
1032 =======
1033 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1034                 sleep = 1;
1035         }
1036 <<<<<<< HEAD:kernel/sched_fair.c
1037         /* Decrement cpu load if we just dequeued the last task of a group on
1038          * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
1039          * at the highest grouping level.
1040          */
1041         if (decload)
1042                 dec_cpu_load(rq, topse->load.weight);
1043 =======
1044 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1045
1046         hrtick_start_fair(rq, rq->curr);
1047 }
1048
1049 /*
1050  * sched_yield() support is very simple - we dequeue and enqueue.
1051  *
1052  * If compat_yield is turned on then we requeue to the end of the tree.
1053  */
1054 static void yield_task_fair(struct rq *rq)
1055 {
1056         struct task_struct *curr = rq->curr;
1057         struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1058         struct sched_entity *rightmost, *se = &curr->se;
1059
1060         /*
1061          * Are we the only task in the tree?
1062          */
1063         if (unlikely(cfs_rq->nr_running == 1))
1064                 return;
1065
1066         if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
1067                 __update_rq_clock(rq);
1068                 /*
1069                  * Update run-time statistics of the 'current'.
1070                  */
1071                 update_curr(cfs_rq);
1072
1073                 return;
1074         }
1075         /*
1076          * Find the rightmost entry in the rbtree:
1077          */
1078         rightmost = __pick_last_entity(cfs_rq);
1079         /*
1080          * Already in the rightmost position?
1081          */
1082         if (unlikely(rightmost->vruntime < se->vruntime))
1083                 return;
1084
1085         /*
1086          * Minimally necessary key value to be last in the tree:
1087          * Upon rescheduling, sched_class::put_prev_task() will place
1088          * 'current' within the tree based on its new key value.
1089          */
1090         se->vruntime = rightmost->vruntime + 1;
1091 }
1092
1093 /*
1094  * wake_idle() will wake a task on an idle cpu if task->cpu is
1095  * not idle and an idle cpu is available.  The span of cpus to
1096  * search starts with cpus closest then further out as needed,
1097  * so we always favor a closer, idle cpu.
1098  *
1099  * Returns the CPU we should wake onto.
1100  */
1101 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1102 static int wake_idle(int cpu, struct task_struct *p)
1103 {
1104         cpumask_t tmp;
1105         struct sched_domain *sd;
1106         int i;
1107
1108         /*
1109          * If it is idle, then it is the best cpu to run this task.
1110          *
1111          * This cpu is also the best, if it has more than one task already.
1112          * Siblings must be also busy(in most cases) as they didn't already
1113          * pickup the extra load from this cpu and hence we need not check
1114          * sibling runqueue info. This will avoid the checks and cache miss
1115          * penalities associated with that.
1116          */
1117         if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
1118                 return cpu;
1119
1120         for_each_domain(cpu, sd) {
1121                 if (sd->flags & SD_WAKE_IDLE) {
1122                         cpus_and(tmp, sd->span, p->cpus_allowed);
1123                         for_each_cpu_mask(i, tmp) {
1124                                 if (idle_cpu(i)) {
1125                                         if (i != task_cpu(p)) {
1126                                                 schedstat_inc(p,
1127                                                        se.nr_wakeups_idle);
1128                                         }
1129                                         return i;
1130                                 }
1131                         }
1132                 } else {
1133                         break;
1134                 }
1135         }
1136         return cpu;
1137 }
1138 #else
1139 static inline int wake_idle(int cpu, struct task_struct *p)
1140 {
1141         return cpu;
1142 }
1143 #endif
1144
1145 #ifdef CONFIG_SMP
1146 <<<<<<< HEAD:kernel/sched_fair.c
1147 static int select_task_rq_fair(struct task_struct *p, int sync)
1148 =======
1149
1150 static const struct sched_class fair_sched_class;
1151
1152 static int
1153 wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
1154             struct task_struct *p, int prev_cpu, int this_cpu, int sync,
1155             int idx, unsigned long load, unsigned long this_load,
1156             unsigned int imbalance)
1157 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1158 {
1159 <<<<<<< HEAD:kernel/sched_fair.c
1160         int cpu, this_cpu;
1161         struct rq *rq;
1162         struct sched_domain *sd, *this_sd = NULL;
1163         int new_cpu;
1164 =======
1165         struct task_struct *curr = this_rq->curr;
1166         unsigned long tl = this_load;
1167         unsigned long tl_per_task;
1168
1169         if (!(this_sd->flags & SD_WAKE_AFFINE))
1170                 return 0;
1171
1172         /*
1173          * If the currently running task will sleep within
1174          * a reasonable amount of time then attract this newly
1175          * woken task:
1176          */
1177         if (sync && curr->sched_class == &fair_sched_class) {
1178                 if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
1179                                 p->se.avg_overlap < sysctl_sched_migration_cost)
1180                         return 1;
1181         }
1182
1183         schedstat_inc(p, se.nr_wakeups_affine_attempts);
1184         tl_per_task = cpu_avg_load_per_task(this_cpu);
1185
1186         /*
1187          * If sync wakeup then subtract the (maximum possible)
1188          * effect of the currently running task from the load
1189          * of the current CPU:
1190          */
1191         if (sync)
1192                 tl -= current->se.load.weight;
1193
1194         if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
1195                         100*(tl + p->se.load.weight) <= imbalance*load) {
1196                 /*
1197                  * This domain has SD_WAKE_AFFINE and
1198                  * p is cache cold in this domain, and
1199                  * there is no bad imbalance.
1200                  */
1201                 schedstat_inc(this_sd, ttwu_move_affine);
1202                 schedstat_inc(p, se.nr_wakeups_affine);
1203 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1204
1205 <<<<<<< HEAD:kernel/sched_fair.c
1206         cpu      = task_cpu(p);
1207         rq       = task_rq(p);
1208         this_cpu = smp_processor_id();
1209         new_cpu  = cpu;
1210 =======
1211                 return 1;
1212         }
1213         return 0;
1214 }
1215 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1216
1217 <<<<<<< HEAD:kernel/sched_fair.c
1218         if (cpu == this_cpu)
1219                 goto out_set_cpu;
1220 =======
1221 static int select_task_rq_fair(struct task_struct *p, int sync)
1222 {
1223         struct sched_domain *sd, *this_sd = NULL;
1224         int prev_cpu, this_cpu, new_cpu;
1225         unsigned long load, this_load;
1226         struct rq *rq, *this_rq;
1227         unsigned int imbalance;
1228         int idx;
1229
1230         prev_cpu        = task_cpu(p);
1231         rq              = task_rq(p);
1232         this_cpu        = smp_processor_id();
1233         this_rq         = cpu_rq(this_cpu);
1234         new_cpu         = prev_cpu;
1235 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1236
1237 <<<<<<< HEAD:kernel/sched_fair.c
1238 =======
1239         /*
1240          * 'this_sd' is the first domain that both
1241          * this_cpu and prev_cpu are present in:
1242          */
1243 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1244         for_each_domain(this_cpu, sd) {
1245 <<<<<<< HEAD:kernel/sched_fair.c
1246                 if (cpu_isset(cpu, sd->span)) {
1247 =======
1248                 if (cpu_isset(prev_cpu, sd->span)) {
1249 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1250                         this_sd = sd;
1251                         break;
1252                 }
1253         }
1254
1255         if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
1256 <<<<<<< HEAD:kernel/sched_fair.c
1257                 goto out_set_cpu;
1258 =======
1259                 goto out;
1260 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1261
1262         /*
1263          * Check for affine wakeup and passive balancing possibilities.
1264          */
1265 <<<<<<< HEAD:kernel/sched_fair.c
1266         if (this_sd) {
1267                 int idx = this_sd->wake_idx;
1268                 unsigned int imbalance;
1269                 unsigned long load, this_load;
1270
1271                 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
1272
1273                 load = source_load(cpu, idx);
1274                 this_load = target_load(this_cpu, idx);
1275
1276                 new_cpu = this_cpu; /* Wake to this CPU if we can */
1277
1278                 if (this_sd->flags & SD_WAKE_AFFINE) {
1279                         unsigned long tl = this_load;
1280                         unsigned long tl_per_task;
1281
1282                         /*
1283                          * Attract cache-cold tasks on sync wakeups:
1284                          */
1285                         if (sync && !task_hot(p, rq->clock, this_sd))
1286                                 goto out_set_cpu;
1287
1288                         schedstat_inc(p, se.nr_wakeups_affine_attempts);
1289                         tl_per_task = cpu_avg_load_per_task(this_cpu);
1290
1291                         /*
1292                          * If sync wakeup then subtract the (maximum possible)
1293                          * effect of the currently running task from the load
1294                          * of the current CPU:
1295                          */
1296                         if (sync)
1297                                 tl -= current->se.load.weight;
1298
1299                         if ((tl <= load &&
1300                                 tl + target_load(cpu, idx) <= tl_per_task) ||
1301                                100*(tl + p->se.load.weight) <= imbalance*load) {
1302                                 /*
1303                                  * This domain has SD_WAKE_AFFINE and
1304                                  * p is cache cold in this domain, and
1305                                  * there is no bad imbalance.
1306                                  */
1307                                 schedstat_inc(this_sd, ttwu_move_affine);
1308                                 schedstat_inc(p, se.nr_wakeups_affine);
1309                                 goto out_set_cpu;
1310                         }
1311                 }
1312 =======
1313         if (!this_sd)
1314                 goto out;
1315 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1316
1317 <<<<<<< HEAD:kernel/sched_fair.c
1318                 /*
1319                  * Start passive balancing when half the imbalance_pct
1320                  * limit is reached.
1321                  */
1322                 if (this_sd->flags & SD_WAKE_BALANCE) {
1323                         if (imbalance*this_load <= 100*load) {
1324                                 schedstat_inc(this_sd, ttwu_move_balance);
1325                                 schedstat_inc(p, se.nr_wakeups_passive);
1326                                 goto out_set_cpu;
1327                         }
1328 =======
1329         idx = this_sd->wake_idx;
1330
1331         imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
1332
1333         load = source_load(prev_cpu, idx);
1334         this_load = target_load(this_cpu, idx);
1335
1336         if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
1337                                      load, this_load, imbalance))
1338                 return this_cpu;
1339
1340         if (prev_cpu == this_cpu)
1341                 goto out;
1342
1343         /*
1344          * Start passive balancing when half the imbalance_pct
1345          * limit is reached.
1346          */
1347         if (this_sd->flags & SD_WAKE_BALANCE) {
1348                 if (imbalance*this_load <= 100*load) {
1349                         schedstat_inc(this_sd, ttwu_move_balance);
1350                         schedstat_inc(p, se.nr_wakeups_passive);
1351                         return this_cpu;
1352 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1353                 }
1354         }
1355
1356 <<<<<<< HEAD:kernel/sched_fair.c
1357         new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
1358 out_set_cpu:
1359 =======
1360 out:
1361 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1362         return wake_idle(new_cpu, p);
1363 }
1364 #endif /* CONFIG_SMP */
1365
1366
1367 /*
1368  * Preempt the current task with a newly woken task if needed:
1369  */
1370 static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1371 {
1372         struct task_struct *curr = rq->curr;
1373         struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1374         struct sched_entity *se = &curr->se, *pse = &p->se;
1375         unsigned long gran;
1376
1377         if (unlikely(rt_prio(p->prio))) {
1378                 update_rq_clock(rq);
1379                 update_curr(cfs_rq);
1380                 resched_task(curr);
1381                 return;
1382         }
1383 <<<<<<< HEAD:kernel/sched_fair.c
1384 =======
1385
1386         se->last_wakeup = se->sum_exec_runtime;
1387         if (unlikely(se == pse))
1388                 return;
1389
1390         cfs_rq_of(pse)->next = pse;
1391
1392 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1393         /*
1394          * Batch tasks do not preempt (their preemption is driven by
1395          * the tick):
1396          */
1397         if (unlikely(p->policy == SCHED_BATCH))
1398                 return;
1399
1400         if (!sched_feat(WAKEUP_PREEMPT))
1401                 return;
1402
1403         while (!is_same_group(se, pse)) {
1404                 se = parent_entity(se);
1405                 pse = parent_entity(pse);
1406         }
1407
1408         gran = sysctl_sched_wakeup_granularity;
1409         /*
1410          * More easily preempt - nice tasks, while not making
1411          * it harder for + nice tasks.
1412          */
1413         if (unlikely(se->load.weight > NICE_0_LOAD))
1414                 gran = calc_delta_fair(gran, &se->load);
1415
1416         if (pse->vruntime + gran < se->vruntime)
1417                 resched_task(curr);
1418 }
1419
1420 static struct task_struct *pick_next_task_fair(struct rq *rq)
1421 {
1422         struct task_struct *p;
1423         struct cfs_rq *cfs_rq = &rq->cfs;
1424         struct sched_entity *se;
1425
1426         if (unlikely(!cfs_rq->nr_running))
1427                 return NULL;
1428
1429         do {
1430                 se = pick_next_entity(cfs_rq);
1431                 cfs_rq = group_cfs_rq(se);
1432         } while (cfs_rq);
1433
1434         p = task_of(se);
1435         hrtick_start_fair(rq, p);
1436
1437         return p;
1438 }
1439
1440 /*
1441  * Account for a descheduled task:
1442  */
1443 static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
1444 {
1445         struct sched_entity *se = &prev->se;
1446         struct cfs_rq *cfs_rq;
1447
1448         for_each_sched_entity(se) {
1449                 cfs_rq = cfs_rq_of(se);
1450                 put_prev_entity(cfs_rq, se);
1451         }
1452 }
1453
1454 #ifdef CONFIG_SMP
1455 /**************************************************
1456  * Fair scheduling class load-balancing methods:
1457  */
1458
1459 /*
1460  * Load-balancing iterator. Note: while the runqueue stays locked
1461  * during the whole iteration, the current task might be
1462  * dequeued so the iterator has to be dequeue-safe. Here we
1463  * achieve that by always pre-iterating before returning
1464  * the current task:
1465  */
1466 static struct task_struct *
1467 __load_balance_iterator(struct cfs_rq *cfs_rq, struct rb_node *curr)
1468 {
1469         struct task_struct *p;
1470
1471         if (!curr)
1472                 return NULL;
1473
1474         p = rb_entry(curr, struct task_struct, se.run_node);
1475         cfs_rq->rb_load_balance_curr = rb_next(curr);
1476
1477         return p;
1478 }
1479
1480 static struct task_struct *load_balance_start_fair(void *arg)
1481 {
1482         struct cfs_rq *cfs_rq = arg;
1483
1484         return __load_balance_iterator(cfs_rq, first_fair(cfs_rq));
1485 }
1486
1487 static struct task_struct *load_balance_next_fair(void *arg)
1488 {
1489         struct cfs_rq *cfs_rq = arg;
1490
1491         return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
1492 }
1493
1494 <<<<<<< HEAD:kernel/sched_fair.c
1495 =======
1496 #ifdef CONFIG_FAIR_GROUP_SCHED
1497 static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
1498 {
1499         struct sched_entity *curr;
1500         struct task_struct *p;
1501
1502         if (!cfs_rq->nr_running || !first_fair(cfs_rq))
1503                 return MAX_PRIO;
1504
1505         curr = cfs_rq->curr;
1506         if (!curr)
1507                 curr = __pick_next_entity(cfs_rq);
1508
1509         p = task_of(curr);
1510
1511         return p->prio;
1512 }
1513 #endif
1514
1515 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1516 static unsigned long
1517 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1518                   unsigned long max_load_move,
1519                   struct sched_domain *sd, enum cpu_idle_type idle,
1520                   int *all_pinned, int *this_best_prio)
1521 {
1522         struct cfs_rq *busy_cfs_rq;
1523         long rem_load_move = max_load_move;
1524         struct rq_iterator cfs_rq_iterator;
1525 <<<<<<< HEAD:kernel/sched_fair.c
1526         unsigned long load_moved;
1527 =======
1528 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1529
1530         cfs_rq_iterator.start = load_balance_start_fair;
1531         cfs_rq_iterator.next = load_balance_next_fair;
1532
1533         for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
1534 #ifdef CONFIG_FAIR_GROUP_SCHED
1535 <<<<<<< HEAD:kernel/sched_fair.c
1536                 struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];
1537                 unsigned long maxload, task_load, group_weight;
1538                 unsigned long thisload, per_task_load;
1539                 struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
1540
1541                 task_load = busy_cfs_rq->load.weight;
1542                 group_weight = se->load.weight;
1543 =======
1544                 struct cfs_rq *this_cfs_rq;
1545                 long imbalance;
1546                 unsigned long maxload;
1547 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1548
1549 <<<<<<< HEAD:kernel/sched_fair.c
1550                 /*
1551                  * 'group_weight' is contributed by tasks of total weight
1552                  * 'task_load'. To move 'rem_load_move' worth of weight only,
1553                  * we need to move a maximum task load of:
1554                  *
1555                  *      maxload = (remload / group_weight) * task_load;
1556                  */
1557                 maxload = (rem_load_move * task_load) / group_weight;
1558 =======
1559                 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
1560 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1561
1562 <<<<<<< HEAD:kernel/sched_fair.c
1563                 if (!maxload || !task_load)
1564 =======
1565                 imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
1566                 /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
1567                 if (imbalance <= 0)
1568 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1569                         continue;
1570
1571 <<<<<<< HEAD:kernel/sched_fair.c
1572                 per_task_load = task_load / busy_cfs_rq->nr_running;
1573                 /*
1574                  * balance_tasks will try to forcibly move atleast one task if
1575                  * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
1576                  * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
1577                  */
1578                  if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
1579                         continue;
1580 =======
1581                 /* Don't pull more than imbalance/2 */
1582                 imbalance /= 2;
1583                 maxload = min(rem_load_move, imbalance);
1584 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1585
1586 <<<<<<< HEAD:kernel/sched_fair.c
1587                 /* Disable priority-based load balance */
1588                 *this_best_prio = 0;
1589                 thisload = this_cfs_rq->load.weight;
1590 =======
1591                 *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
1592 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1593 #else
1594 # define maxload rem_load_move
1595 #endif
1596                 /*
1597                  * pass busy_cfs_rq argument into
1598                  * load_balance_[start|next]_fair iterators
1599                  */
1600                 cfs_rq_iterator.arg = busy_cfs_rq;
1601 <<<<<<< HEAD:kernel/sched_fair.c
1602                 load_moved = balance_tasks(this_rq, this_cpu, busiest,
1603 =======
1604                 rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
1605 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1606                                                maxload, sd, idle, all_pinned,
1607                                                this_best_prio,
1608                                                &cfs_rq_iterator);
1609
1610 <<<<<<< HEAD:kernel/sched_fair.c
1611 #ifdef CONFIG_FAIR_GROUP_SCHED
1612                 /*
1613                  * load_moved holds the task load that was moved. The
1614                  * effective (group) weight moved would be:
1615                  *      load_moved_eff = load_moved/task_load * group_weight;
1616                  */
1617                 load_moved = (group_weight * load_moved) / task_load;
1618
1619                 /* Adjust shares on both cpus to reflect load_moved */
1620                 group_weight -= load_moved;
1621                 set_se_shares(se, group_weight);
1622
1623                 se = busy_cfs_rq->tg->se[this_cpu];
1624                 if (!thisload)
1625                         group_weight = load_moved;
1626                 else
1627                         group_weight = se->load.weight + load_moved;
1628                 set_se_shares(se, group_weight);
1629 #endif
1630
1631                 rem_load_move -= load_moved;
1632
1633 =======
1634 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1635                 if (rem_load_move <= 0)
1636                         break;
1637         }
1638
1639         return max_load_move - rem_load_move;
1640 }
1641
1642 static int
1643 move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1644                    struct sched_domain *sd, enum cpu_idle_type idle)
1645 {
1646         struct cfs_rq *busy_cfs_rq;
1647         struct rq_iterator cfs_rq_iterator;
1648
1649         cfs_rq_iterator.start = load_balance_start_fair;
1650         cfs_rq_iterator.next = load_balance_next_fair;
1651
1652         for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
1653                 /*
1654                  * pass busy_cfs_rq argument into
1655                  * load_balance_[start|next]_fair iterators
1656                  */
1657                 cfs_rq_iterator.arg = busy_cfs_rq;
1658                 if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
1659                                        &cfs_rq_iterator))
1660                     return 1;
1661         }
1662
1663         return 0;
1664 }
1665 #endif
1666
1667 /*
1668  * scheduler tick hitting a task of our scheduling class:
1669  */
1670 static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
1671 {
1672         struct cfs_rq *cfs_rq;
1673         struct sched_entity *se = &curr->se;
1674
1675         for_each_sched_entity(se) {
1676                 cfs_rq = cfs_rq_of(se);
1677                 entity_tick(cfs_rq, se, queued);
1678         }
1679 }
1680
1681 #define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0)
1682
1683 /*
1684  * Share the fairness runtime between parent and child, thus the
1685  * total amount of pressure for CPU stays equal - new tasks
1686  * get a chance to run but frequent forkers are not allowed to
1687  * monopolize the CPU. Note: the parent runqueue is locked,
1688  * the child is not running yet.
1689  */
1690 static void task_new_fair(struct rq *rq, struct task_struct *p)
1691 {
1692         struct cfs_rq *cfs_rq = task_cfs_rq(p);
1693         struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
1694         int this_cpu = smp_processor_id();
1695
1696         sched_info_queued(p);
1697
1698         update_curr(cfs_rq);
1699         place_entity(cfs_rq, se, 1);
1700
1701         /* 'curr' will be NULL if the child belongs to a different group */
1702         if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
1703                         curr && curr->vruntime < se->vruntime) {
1704                 /*
1705                  * Upon rescheduling, sched_class::put_prev_task() will place
1706                  * 'current' within the tree based on its new key value.
1707                  */
1708                 swap(curr->vruntime, se->vruntime);
1709         }
1710
1711         enqueue_task_fair(rq, p, 0);
1712         resched_task(rq->curr);
1713 }
1714
1715 /*
1716  * Priority of the task has changed. Check to see if we preempt
1717  * the current task.
1718  */
1719 static void prio_changed_fair(struct rq *rq, struct task_struct *p,
1720                               int oldprio, int running)
1721 {
1722         /*
1723          * Reschedule if we are currently running on this runqueue and
1724          * our priority decreased, or if we are not currently running on
1725          * this runqueue and our priority is higher than the current's
1726          */
1727         if (running) {
1728                 if (p->prio > oldprio)
1729                         resched_task(rq->curr);
1730         } else
1731                 check_preempt_curr(rq, p);
1732 }
1733
1734 /*
1735  * We switched to the sched_fair class.
1736  */
1737 static void switched_to_fair(struct rq *rq, struct task_struct *p,
1738                              int running)
1739 {
1740         /*
1741          * We were most likely switched from sched_rt, so
1742          * kick off the schedule if running, otherwise just see
1743          * if we can still preempt the current task.
1744          */
1745         if (running)
1746                 resched_task(rq->curr);
1747         else
1748                 check_preempt_curr(rq, p);
1749 }
1750
1751 /* Account for a task changing its policy or group.
1752  *
1753  * This routine is mostly called to set cfs_rq->curr field when a task
1754  * migrates between groups/classes.
1755  */
1756 static void set_curr_task_fair(struct rq *rq)
1757 {
1758         struct sched_entity *se = &rq->curr->se;
1759
1760         for_each_sched_entity(se)
1761                 set_next_entity(cfs_rq_of(se), se);
1762 }
1763
1764 <<<<<<< HEAD:kernel/sched_fair.c
1765 =======
1766 #ifdef CONFIG_FAIR_GROUP_SCHED
1767 static void moved_group_fair(struct task_struct *p)
1768 {
1769         struct cfs_rq *cfs_rq = task_cfs_rq(p);
1770
1771         update_curr(cfs_rq);
1772         place_entity(cfs_rq, &p->se, 1);
1773 }
1774 #endif
1775
1776 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1777 /*
1778  * All the scheduling class methods:
1779  */
1780 static const struct sched_class fair_sched_class = {
1781         .next                   = &idle_sched_class,
1782         .enqueue_task           = enqueue_task_fair,
1783         .dequeue_task           = dequeue_task_fair,
1784         .yield_task             = yield_task_fair,
1785 #ifdef CONFIG_SMP
1786         .select_task_rq         = select_task_rq_fair,
1787 #endif /* CONFIG_SMP */
1788
1789         .check_preempt_curr     = check_preempt_wakeup,
1790
1791         .pick_next_task         = pick_next_task_fair,
1792         .put_prev_task          = put_prev_task_fair,
1793
1794 #ifdef CONFIG_SMP
1795         .load_balance           = load_balance_fair,
1796         .move_one_task          = move_one_task_fair,
1797 #endif
1798
1799         .set_curr_task          = set_curr_task_fair,
1800         .task_tick              = task_tick_fair,
1801         .task_new               = task_new_fair,
1802
1803         .prio_changed           = prio_changed_fair,
1804         .switched_to            = switched_to_fair,
1805 <<<<<<< HEAD:kernel/sched_fair.c
1806 =======
1807
1808 #ifdef CONFIG_FAIR_GROUP_SCHED
1809         .moved_group            = moved_group_fair,
1810 #endif
1811 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/sched_fair.c
1812 };
1813
1814 #ifdef CONFIG_SCHED_DEBUG
1815 static void print_cfs_stats(struct seq_file *m, int cpu)
1816 {
1817         struct cfs_rq *cfs_rq;
1818
1819 #ifdef CONFIG_FAIR_GROUP_SCHED
1820         print_cfs_rq(m, cpu, &cpu_rq(cpu)->cfs);
1821 #endif
1822         rcu_read_lock();
1823         for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
1824                 print_cfs_rq(m, cpu, cfs_rq);
1825         rcu_read_unlock();
1826 }
1827 #endif