printk_persist: flush the printk buffer all the way to RAM.

Previously, the buffer was getting stuck in L2 cache, which is lost on CPU
reset, so the last kernel messages before a reboot tended to be lost.  This
was mitigated somewhat since switching to userspace seems to rapidly
invalidate most/all of the L2 cache, but usually the last kernel messages
before a reboot would be missing.  This was especially true in the case of
kernel panics, unfortunately, which means we have probably been missing a
lot of kernel panics.

Performance effect on mindspeed c2000 CPU: for a loop printk'ing 1000
messages in a tight loop, flushing cache after each iteration:
	original code: 8ms
	flush_range on the entire 8MB printk_buffer: 7201ms
	flush_range twice on the buffer: 14388ms
	flush_all (no range specified): 138ms
	flush_all twice: 253ms
	flush_range only the logbits array: 8ms
	flush_range logbits + only changed section of log_buf: 7ms

So:
- flushing twice takes twice as long.
- flushing a large range is slower than flushing everything.
- flushing just the right part makes it *faster*, which is a bit weird
  (but the results were consistent across several runs)

Anyway, at least this patch doesn't make it slower.

Change-Id: I3b263be1816de2dbb4aecb806501ad36e23f47fe
diff --git a/kernel/printk.c b/kernel/printk.c
index 40a766c..d6e99fb 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -43,6 +43,7 @@
 #include <linux/rculist.h>
 
 #include <asm/uaccess.h>
+#include <asm/cacheflush.h>
 
 /*
  * Architectures can override it:
@@ -1019,12 +1020,28 @@
 	}
 }
 
+static inline void flush_persist(void *ptr, size_t len) {
+#ifdef CONFIG_PRINTK_PERSIST
+	/*
+	 * If PRINTK_PERSIST, we need to make sure log messages are fully
+	 * flushed all the way to RAM in case the system gets reset
+	 * suddenly (eg. by a watchdog).  Such a case is exactly when
+	 * PRINTK_PERSIST is most useful.
+	 */
+	if (cpu_cache.flush_kern_dcache_area)
+		cpu_cache.flush_kern_dcache_area(ptr, len);
+	outer_flush_range(virt_to_phys(ptr),
+			  virt_to_phys(ptr) + len);
+#endif
+}
+
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
 	int printed_len = 0;
 	int current_log_level = default_message_loglevel;
 	unsigned long flags;
 	int this_cpu;
+	unsigned orig_end = log_end & LOG_BUF_MASK, new_end;
 	char *p;
 	size_t plen;
 	char special;
@@ -1151,6 +1168,14 @@
 
 	lockdep_on();
 out_restore_irqs:
+	new_end = log_end & LOG_BUF_MASK;
+	if (new_end >= orig_end) {
+		flush_persist(log_buf + orig_end, new_end - orig_end);
+	} else {
+		flush_persist(log_buf + orig_end, log_buf_len - orig_end);
+		flush_persist(log_buf, new_end);
+	}
+	flush_persist(logbits, sizeof(*logbits));
 	raw_local_irq_restore(flags);
 
 	preempt_enable();