Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Debug info and other statistics fixes and related enhancements"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/numa: Fix numa balancing stats in /proc/pid/sched
  sched/numa: Show numa_group ID in /proc/sched_debug task listings
  sched/debug: Move print_cfs_rq() declaration to kernel/sched/sched.h
  sched/stat: Expose /proc/pid/schedstat if CONFIG_SCHED_INFO=y
  sched/stat: Simplify the sched_info accounting dependency
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1d540b3..87782e8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -491,14 +491,17 @@
 }
 #endif
 
-#ifdef CONFIG_SCHEDSTATS
+#ifdef CONFIG_SCHED_INFO
 /*
  * Provides /proc/PID/schedstat
  */
 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
 			      struct pid *pid, struct task_struct *task)
 {
-	seq_printf(m, "%llu %llu %lu\n",
+	if (unlikely(!sched_info_on()))
+		seq_printf(m, "0 0 0\n");
+	else
+		seq_printf(m, "%llu %llu %lu\n",
 		   (unsigned long long)task->se.sum_exec_runtime,
 		   (unsigned long long)task->sched_info.run_delay,
 		   task->sched_info.pcount);
@@ -2787,7 +2790,7 @@
 #ifdef CONFIG_STACKTRACE
 	ONE("stack",      S_IRUSR, proc_pid_stack),
 #endif
-#ifdef CONFIG_SCHEDSTATS
+#ifdef CONFIG_SCHED_INFO
 	ONE("schedstat",  S_IRUGO, proc_pid_schedstat),
 #endif
 #ifdef CONFIG_LATENCYTOP
@@ -3135,7 +3138,7 @@
 #ifdef CONFIG_STACKTRACE
 	ONE("stack",      S_IRUSR, proc_pid_stack),
 #endif
-#ifdef CONFIG_SCHEDSTATS
+#ifdef CONFIG_SCHED_INFO
 	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
 #endif
 #ifdef CONFIG_LATENCYTOP
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8aa4a25..ae21f15 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -192,8 +192,6 @@
 #ifdef CONFIG_SCHED_DEBUG
 extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
 extern void proc_sched_set_task(struct task_struct *p);
-extern void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
 #endif
 
 /*
@@ -838,7 +836,7 @@
 struct backing_dev_info;
 struct reclaim_state;
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_SCHED_INFO
 struct sched_info {
 	/* cumulative counters */
 	unsigned long pcount;	      /* # of times run on this cpu */
@@ -848,7 +846,7 @@
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
 };
-#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
+#endif /* CONFIG_SCHED_INFO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 struct task_delay_info {
@@ -1397,7 +1395,7 @@
 	int rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_SCHED_INFO
 	struct sched_info sched_info;
 #endif
 
diff --git a/init/Kconfig b/init/Kconfig
index 53c41a8..af09b4f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -435,6 +435,7 @@
 config TASK_DELAY_ACCT
 	bool "Enable per-task delay accounting"
 	depends on TASKSTATS
+	select SCHED_INFO
 	help
 	  Collect information on time spent by a task waiting for system
 	  resources like cpu, synchronous block I/O completion and swapping
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b803e1b..5caa029 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2164,7 +2164,7 @@
 	set_task_cpu(p, cpu);
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_SCHED_INFO
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 315c68e..4222ec5 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -142,7 +142,7 @@
 		0LL, 0L);
 #endif
 #ifdef CONFIG_NUMA_BALANCING
-	SEQ_printf(m, " %d", task_node(p));
+	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
 #endif
 #ifdef CONFIG_CGROUP_SCHED
 	SEQ_printf(m, " %s", task_group_path(task_group(p)));
@@ -517,11 +517,21 @@
 	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 
 
+#ifdef CONFIG_NUMA_BALANCING
+void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+		unsigned long tpf, unsigned long gsf, unsigned long gpf)
+{
+	SEQ_printf(m, "numa_faults node=%d ", node);
+	SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf);
+	SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf);
+}
+#endif
+
+
 static void sched_show_numa(struct task_struct *p, struct seq_file *m)
 {
 #ifdef CONFIG_NUMA_BALANCING
 	struct mempolicy *pol;
-	int node, i;
 
 	if (p->mm)
 		P(mm->numa_scan_seq);
@@ -533,26 +543,12 @@
 	mpol_get(pol);
 	task_unlock(p);
 
-	SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0));
-
-	for_each_online_node(node) {
-		for (i = 0; i < 2; i++) {
-			unsigned long nr_faults = -1;
-			int cpu_current, home_node;
-
-			if (p->numa_faults)
-				nr_faults = p->numa_faults[2*node + i];
-
-			cpu_current = !i ? (task_node(p) == node) :
-				(pol && node_isset(node, pol->v.nodes));
-
-			home_node = (p->numa_preferred_nid == node);
-
-			SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n",
-				i, node, cpu_current, home_node, nr_faults);
-		}
-	}
-
+	P(numa_pages_migrated);
+	P(numa_preferred_nid);
+	P(total_numa_faults);
+	SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
+			task_node(p), task_numa_group_id(p));
+	show_numa_stats(p, m);
 	mpol_put(pol);
 #endif
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3d57cc0..65c8f3e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8473,7 +8473,27 @@
 		print_cfs_rq(m, cpu, cfs_rq);
 	rcu_read_unlock();
 }
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+void show_numa_stats(struct task_struct *p, struct seq_file *m)
+{
+	int node;
+	unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
+
+	for_each_online_node(node) {
+		if (p->numa_faults) {
+			tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
+			tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
+		}
+		if (p->numa_group) {
+			gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
+			gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
+		}
+		print_numa_stats(m, node, tsf, tpf, gsf, gpf);
+	}
+}
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
 
 __init void init_sched_fair_class(void)
 {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8858891..84d4879 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1689,9 +1689,22 @@
 
 extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
 extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
+
+#ifdef	CONFIG_SCHED_DEBUG
 extern void print_cfs_stats(struct seq_file *m, int cpu);
 extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
+extern void
+print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void
+show_numa_stats(struct task_struct *p, struct seq_file *m);
+extern void
+print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+	unsigned long tpf, unsigned long gsf, unsigned long gpf);
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
 
 extern void init_cfs_rq(struct cfs_rq *cfs_rq);
 extern void init_rt_rq(struct rt_rq *rt_rq);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 077ebbd..b0fbc76 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -47,7 +47,7 @@
 # define schedstat_set(var, val)	do { } while (0)
 #endif
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_SCHED_INFO
 static inline void sched_info_reset_dequeued(struct task_struct *t)
 {
 	t->sched_info.last_queued = 0;
@@ -156,7 +156,7 @@
 #define sched_info_depart(rq, t)		do { } while (0)
 #define sched_info_arrive(rq, next)		do { } while (0)
 #define sched_info_switch(rq, t, next)		do { } while (0)
-#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
+#endif /* CONFIG_SCHED_INFO */
 
 /*
  * The following are functions that support scheduler-internal time accounting.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b908048..e2894b2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -841,9 +841,14 @@
 	  that can help debug the scheduler. The runtime overhead of this
 	  option is minimal.
 
+config SCHED_INFO
+	bool
+	default n
+
 config SCHEDSTATS
 	bool "Collect scheduler statistics"
 	depends on DEBUG_KERNEL && PROC_FS
+	select SCHED_INFO
 	help
 	  If you say Y here, additional code will be inserted into the
 	  scheduler and related routines to collect statistics about