Make BBSI driver callback oriented

We previously used spi_sync to access the SPI bus. This turned out to be
very slow. spi_sync kicks off a SPI transaction and then puts our thread
to sleep. It is woken up as soon as the SPI transaction has finished. At
least for read operations, the old design transfered only four bytes at
a time which meant that our thread had to be woken up and scheduled for
every four bytes which turned out to be time consuming.

The new design is callback oriented and uses spi_async to kick off SPI
transactions. The callback routine which is invoked on completion
reloads the next SPI transaction immediately without the need to wake up
a different kernel thread.

To improve performance even further, we exploit the full 32 bytes of the
FIFO in our SPI controller and read multiple words (4 bytes each) in one
SPI transaction.

Change-Id: I12dbe9d9e901db052c7a7a74eb43d02e0ac75cb9
diff --git a/bbsi.h b/bbsi.h
index 7907864..afd051f 100644
--- a/bbsi.h
+++ b/bbsi.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 Google Inc.
+ * Copyright (C) 2013-2014 Google Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -17,9 +17,14 @@
 #ifndef __BBSI_H
 #define __BBSI_H
 
+/*
+ * Resources:
+ * BCM6803 Data Sheet (Document ID: 6803-DS205-R), Chapter 5: Host Interface and CPU
+ * BBVLSI Serial Interface (BBSI) (Document ID: BBSI-UM200-R)
+ */
+
 #include <linux/netdevice.h>
 #include <linux/spi/spi.h>
-#include <linux/vmalloc.h>
 
 #ifndef KSEG1
 #define KSEG1 0  // just to appease non-MIPS CPUs. Not really used.
@@ -27,6 +32,47 @@
 
 #define BP_MOCA_MAX_NUM 1
 
+#define SPI_FIFO_LEN 32
+
+#define BBSI_COMMAND_BYTE 0x80
+#define STATUS_REGISTER_ADDR 0x06
+#define CONFIG_REGISTER_ADDR 0x07
+#define DATA0_REGISTER_ADDR 0x0c
+
+#define READ_RBUS (1<<0)
+#define SPECULATIVE_READ_EN (1<<1)
+#define NO_RBUS_ADDR_INC (1<<2)
+
+#define STATUS_BUSY (1<<4)
+
+enum bbsi_operation_state {
+	START,
+	COMMAND_STATE,
+	DATA_STATE,
+	POLLSTATUS_STATE,
+	DONE,
+	FAIL,
+};
+
+struct bbsi_operation {
+	enum bbsi_operation_state state;
+	struct spi_device *spi;
+	struct spi_message sm;
+	struct spi_transfer st[2];
+	/* address on MoCA chip we want to read from/write to */
+	uint32_t addr;
+	/* len must be a multiple of 4 */
+	size_t len;
+	uint8_t *data;
+	/* number of bytes already read or written. data_len <= len */
+	size_t data_len;
+	uint8_t tx_buf[8];
+	uint8_t rx_buf[SPI_FIFO_LEN - 2];
+	/* See interpret_data() for a definition of rx_state */
+	int rx_state;
+	struct completion done;
+};
+
 /*
  * The exact values here don't matter, as they're translated into "real"
  * values before talking to mocad.  This is just for the device registration
@@ -58,9 +104,6 @@
 	}
 }
 
-static uint32_t _spi_read32(struct spi_device *spi, uint32_t addr);
-
-
 // TODO(apenwarr): don't make this global.
 //   Or fix the driver to just only enable/disable interrupts at the right
 //   times.
@@ -80,141 +123,325 @@
 	}
 }
 
-static uint8_t __pollstatus(struct spi_device *spi) {
-	uint8_t wclear[] = { 0x80, 0x06 };
-	uint8_t rdata[1] = { 0 };
-	struct spi_transfer t[2] = {
-		{ .tx_buf = wclear, .len = sizeof(wclear) },
-		{ .rx_buf = rdata, .len = sizeof(rdata) },
-	};
-	struct spi_message m;
-	int i;
+static void state_machine_read(void *context);
+static void state_machine_write(void *context);
 
-	spi_message_init(&m);
-	spi_message_add_tail(&t[0], &m);
-	spi_message_add_tail(&t[1], &m);
-
-	for (i = 0; i < 10; i++) {
-		if (spi_sync_locked(spi, &m) < 0) {
-			pr_warn("spi _pollstatus: SPI error\n");
-			return 0x01; // error code
-		}
-		if (rdata[0] & 0x01) {
-			pr_warn("spi _pollstatus: rbus error: %02X\n", rdata[0]);
-			return 0x01; // error result; stop polling now
-		}
-		if (!(rdata[0] & 0x10)) return 0;   // transaction finished
-	}
-	// if we get here, the transaction still isn't finished: weird
-	pr_warn("spi _pollstatus: still busy: %02X\n", rdata[0]);
-	return rdata[0];
+static void bbsi_op_fail(struct bbsi_operation *t) {
+	t->state = FAIL;
+	complete(&t->done);
 }
 
-static uint32_t __spi_read32a(struct spi_device *spi, uint32_t addr,
-				int speculative) {
-	uint8_t waddr[] = {
-		0x81, 0x07,
-		0x01 | (speculative ? 0x02 : 0),
-		0, 0, 0, 0 };
-	struct spi_transfer addrt[1] = {
-		{ .tx_buf = waddr, .len = sizeof(waddr) },
-	};
-	struct spi_message addrm;
-	int j, st;
-
-	spi_message_init(&addrm);
-	spi_message_add_tail(&addrt[0], &addrm);
-
-	__pollstatus(spi);
-	for (j = 0; j < 10; j++) {
-		// write address reg, which triggers the read
-		writel(cpu_to_be32(addr), waddr + sizeof(waddr) - 4);
-		if (spi_sync_locked(spi, &addrm) < 0) {
-			pr_warn("spi_read_addr: error\n");
-		}
-		st = __pollstatus(spi);
-		if (!st) break;
+static int send_readcmd(struct bbsi_operation *t, int burst) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_read;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE | 0x01;
+	t->tx_buf[1] = CONFIG_REGISTER_ADDR;
+	t->tx_buf[2] = READ_RBUS |
+		(burst ? SPECULATIVE_READ_EN : NO_RBUS_ADDR_INC);
+	writel(cpu_to_be32(t->addr), t->tx_buf + 3);
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 7 });
+	spi_message_add_tail(&t->st[0], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
 	}
-	return st;
+	return ret;
 }
 
-static uint32_t __spi_read32d_noswap(struct spi_device *spi) {
-	uint8_t wdata[] = { 0x80, 0x0c };
-	uint8_t rdata[4];
-	struct spi_transfer datat[2] = {
-		{ .tx_buf = wdata, .len = sizeof(wdata) },
-		{ .rx_buf = rdata, .len = sizeof(rdata) },
-	};
-	struct spi_message datam;
-
-	spi_message_init(&datam);
-	spi_message_add_tail(&datat[0], &datam);
-	spi_message_add_tail(&datat[1], &datam);
-
-	// retrieve actual data bits
-	if (spi_sync_locked(spi, &datam) < 0) {
-		pr_warn("spi_read_data: error\n");
+static int send_writecmd(struct bbsi_operation *t) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_write;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE | 0x01;
+	t->tx_buf[1] = CONFIG_REGISTER_ADDR;
+	t->tx_buf[2] = 0;
+	writel(cpu_to_be32(t->addr), t->tx_buf + 3);
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 7 });
+	spi_message_add_tail(&t->st[0], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
 	}
-	return readl(rdata);
+	return ret;
+}
+
+static int send_pollstatuscmd(struct bbsi_operation *t, void (*complete)(void *context)) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = complete;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE;
+	t->tx_buf[1] = STATUS_REGISTER_ADDR;
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 2 });
+	t->st[1] = ((struct spi_transfer) { .rx_buf = t->rx_buf, .len = 1 });
+	spi_message_add_tail(&t->st[0], &t->sm);
+	spi_message_add_tail(&t->st[1], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
+	}
+	return ret;
+}
+
+static int read_data(struct bbsi_operation *t) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_read;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE;
+	/* We want to continue reading where we left off. See interpret_data()
+	 * for the definition of t->rx_state */
+	if (t->rx_state == 0) {
+		t->tx_buf[1] = STATUS_REGISTER_ADDR;
+	} else {
+		t->tx_buf[1] = DATA0_REGISTER_ADDR + t->rx_state - 1;
+	}
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 2 });
+	t->st[1] = ((struct spi_transfer) { .rx_buf = t->rx_buf,
+			.len = sizeof(t->rx_buf)});
+	spi_message_add_tail(&t->st[0], &t->sm);
+	spi_message_add_tail(&t->st[1], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
+	}
+	return ret;
+}
+
+static int write_data(struct bbsi_operation *t) {
+	int ret;
+	size_t len;
+	len = min(t->len - t->data_len, sizeof(t->rx_buf));
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_write;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE | 0x01;
+	t->tx_buf[1] = DATA0_REGISTER_ADDR + (t->data_len & 3);
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 2 });
+	t->st[1] = ((struct spi_transfer) { .tx_buf = t->data + t->data_len,
+			.len = len});
+	spi_message_add_tail(&t->st[0], &t->sm);
+	spi_message_add_tail(&t->st[1], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
+	}
+	t->data_len += len;
+	return ret;
+}
+
+static int interpret_data(struct bbsi_operation *t) {
+	/* The bytes we are receiving over the SPI bus are the values of
+	 * registers in the BBSI interface on the BCM6803. The register csrAddr
+	 * points to the register that we are going to read next. It cycles
+	 * through the following registers:
+	 *  - STATUS (t->rx_state==0)
+	 *  - DATA0 (t->rx_state==1)
+	 *  - DATA1 (t->rx_state==2)
+	 *  - DATA2 (t->rx_state==3)
+	 *  - DATA3 (t->rx_state==4)
+	 *
+	 * If csrAddr points to the status register, csrAddr is not incremented
+	 * if the busy bit in the status register is set. As a result, we might
+	 * read the status register multiple times before we get to DATA0. We
+	 * therefore might have to skip over multiple status bytes.
+	 *
+	 * t->rx_state defines the register that we are going to read next: 0
+	 * stands for STATUS, 1 stands for DATA0, 2 for DATA1 and so on.
+	*/
+	uint8_t *r;
+	r = t->rx_buf;
+	for(r = t->rx_buf;r < t->rx_buf + sizeof(t->rx_buf);r++) {
+		if (t->rx_state == 0) {
+			if (*r & STATUS_BUSY)
+				/* t->rx_state stays at 0 because the next byte
+				 * will be another status byte */
+				continue;
+			else if (*r) {
+				pr_err("rbus error 0x%02x while trying to read %u "
+						"bytes from 0x%08x\n",
+						(unsigned) *r, t->len, t->addr);
+				bbsi_op_fail(t);
+				return -1;
+			} else
+				/* The busy bit is not set which means that the next byte will be DATA0. */
+				t->rx_state = 1;
+		} else {
+			BUG_ON(t->data_len >= t->len);
+			t->data[t->data_len++] = *r;
+			/* t->rx_state==4 means that we just read DATA3. The
+			 * next byte will be a status byte (t->rx_state==0) */
+			t->rx_state++;
+			t->rx_state%=5;
+			if (t->data_len == t->len) return 0;
+		}
+
+	}
+	return 0;
+}
+
+static void state_machine_read(void *context) {
+	struct bbsi_operation *t = (struct bbsi_operation *) context;
+	int ret;
+
+	BUG_ON(t->len&3);
+
+	switch (t->state) {
+		case START:
+			t->state = COMMAND_STATE;
+			ret = send_readcmd(t, t->len>4);
+			if (ret) return;
+			break;
+		case COMMAND_STATE:
+			if (t->sm.status) {
+				pr_err("readcmd returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			t->state = DATA_STATE;
+			ret = read_data(t);
+			if (ret) return;
+			break;
+		case DATA_STATE:
+			if (t->sm.status) {
+				pr_err("read_data returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			if (interpret_data(t)) return;
+			if (t->data_len == t->len) {
+				t->state = DONE;
+				complete(&t->done);
+			} else {
+				/* Stay in this state */
+				ret = read_data(t);
+				if (ret) return;
+			}
+			break;
+		default:
+			BUG();
+			break;
+	}
+}
+
+static void state_machine_write(void *context) {
+	struct bbsi_operation *t = (struct bbsi_operation *) context;
+	int ret;
+
+	BUG_ON(t->len&3);
+
+	switch (t->state) {
+		case START:
+			t->state = COMMAND_STATE;
+			ret = send_writecmd(t);
+			if (ret) return;
+			break;
+		case COMMAND_STATE:
+			if (t->sm.status) {
+				pr_err("writecmd returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			t->state = DATA_STATE;
+			ret = write_data(t);
+			if (ret) return;
+			break;
+		case DATA_STATE:
+			if (t->sm.status) {
+				pr_err("write_data returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			if (t->data_len == t->len) {
+				t->state = POLLSTATUS_STATE;
+				ret = send_pollstatuscmd(t, state_machine_write);
+				if (ret) return;
+			} else {
+				/* Stay in this state */
+				ret = write_data(t);
+				if (ret) return;
+			}
+			break;
+		case POLLSTATUS_STATE:
+			if (t->sm.status) {
+				pr_err("pollstatuscmd returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			if (*t->rx_buf & STATUS_BUSY) {
+				/* Stay in this state */
+				ret = send_pollstatuscmd(t, state_machine_write);
+				if (ret) return;
+			} else if (*t->rx_buf) {
+				pr_err("rbus error 0x%02x while trying to write %u "
+						"bytes to 0x%08x\n",
+						(unsigned) *t->rx_buf, t->len, t->addr);
+				bbsi_op_fail(t);
+			} else {
+				t->state = DONE;
+				complete(&t->done);
+			}
+			break;
+		default:
+			BUG();
+			break;
+	}
+}
+
+static uint32_t bbsi_read(struct spi_device *spi, uint32_t addr, void *dst, size_t len) {
+	struct bbsi_operation *t;
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	t->state = START;
+	t->spi = spi;
+	t->addr = addr;
+	t->len = len;
+	t->data = dst;
+	t->data_len = 0;
+	t->rx_state = 0;
+	init_completion(&t->done);
+	state_machine_read((void*) t);
+	wait_for_completion(&t->done);
+	kfree(t);
+	return 0;
+}
+
+static uint32_t bbsi_write(struct spi_device *spi, uint32_t addr, const void *src, size_t len) {
+	struct bbsi_operation *t;
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	t->state = START;
+	t->spi = spi;
+	t->addr = addr;
+	t->len = len;
+	t->data = (void*) src;
+	t->data_len = 0;
+	init_completion(&t->done);
+	state_machine_write((void*) t);
+	wait_for_completion(&t->done);
+	kfree(t);
+	return 0;
 }
 
 static uint32_t _spi_read32(struct spi_device *spi, uint32_t addr) {
-	int st;
-	uint32_t retval;
+	uint32_t retval = 0;
 
 	spi_bus_lock(spi->master);
-
-	st = __spi_read32a(spi, addr, 0);
-	if (st) {
-		retval = 0x00000000; // error
-	} else {
-		retval = be32_to_cpu(__spi_read32d_noswap(spi));
-	}
+	bbsi_read(spi, addr, &retval, sizeof(retval));
 	spi_bus_unlock(spi->master);
-	return retval;
+	return be32_to_cpu(retval);
 }
 
-static void __spi_write32a(struct spi_device *spi, uint32_t addr) {
-	uint8_t waddr[] = { 0x81, 0x07, 0x00, 0, 0, 0, 0  };
-	struct spi_transfer t[1] = {
-		{ .tx_buf = waddr, .len = sizeof(waddr) },
-	};
-	struct spi_message m;
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t[0], &m);
-
-	// write address reg
-	writel(cpu_to_be32(addr), waddr + sizeof(waddr) - 4);
-	if (spi_sync_locked(spi, &m) < 0) {
-		pr_warn("spi_write: error\n");
-	}
-}
-
-static void __spi_write32d_noswap(struct spi_device *spi, uint32_t value) {
-	uint8_t wdata[] = { 0x81, 0x0c, 0, 0, 0, 0 };
-	struct spi_transfer t[1] = {
-		{ .tx_buf = wdata, .len = sizeof(wdata) },
-	};
-	struct spi_message m;
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t[0], &m);
-
-	// write data reg
-	writel(value, wdata + sizeof(wdata) - 4);
-	if (spi_sync_locked(spi, &m) < 0) {
-		pr_warn("spi_write: error\n");
-	}
-}
-
-
 static void _spi_write32(struct spi_device *spi, uint32_t addr, uint32_t value) {
 	spi_bus_lock(spi->master);
-	__pollstatus(spi);
-	__spi_write32a(spi, addr);
-	__spi_write32d_noswap(spi, cpu_to_be32(value));
-	__pollstatus(spi);
+	value = cpu_to_be32(value);
+	bbsi_write(spi, addr, &value, sizeof(value));
 	spi_bus_unlock(spi->master);
 }
 
@@ -227,72 +454,32 @@
 }
 
 static void kerSysBcmSpiSlaveReadBuf(struct spi_device *spi, uint32_t addr, void *dst, int len, int wordsize) {
-	int i;
-	uint32_t *buf = dst;
-
-	spi_bus_lock(spi->master);
-
+	if (len == 0) {
+		pr_warn("spi readbuf: buffer size 0 invalid\n");
+		return;
+	}
 	if (wordsize != 4) {
 		pr_info("SPI readbuf: only word size == 4 bytes is supported!\n");
 		return;
 	}
-	__spi_read32a(spi, addr, 1);
-	for (i = 0; i < len; i += wordsize) {
-		buf[i/4] = __spi_read32d_noswap(spi);
-		__pollstatus(spi);
-	}
-
+	spi_bus_lock(spi->master);
+	bbsi_read(spi, addr, dst, len);
 	spi_bus_unlock(spi->master);
 }
 
 static void kerSysBcmSpiSlaveWriteBuf(struct spi_device *spi, uint32_t addr, const void *src, int len, int wordsize) {
-	int i, nelems = len/4;
-	const uint32_t *buf = src;
-	uint8_t wdata[] = { 0x81, 0x0c };
-	struct spi_transfer *t, *tp;
-	struct spi_message m;
-
 	if (len > 8192) {
 		pr_warn("spi writebuf: buffer size %d is too large\n", len);
 		return;
 	}
-	if (wordsize != 4) {
+	if (wordsize != 4 || len&3) {
 		pr_err("SPI writebuf: only word size == 4 bytes is supported!\n");
 		return;
 	}
 
-	t = vmalloc(nelems * sizeof(struct spi_transfer) * 2);
-	if (!t) {
-		pr_warn("spi writebuf: out of memory\n");
-		return;
-	}
-
-	memset(t, 0, nelems * sizeof(struct spi_transfer) * 2);
-	spi_message_init(&m);
-
-	for (i = 0, tp = t; i < nelems; i++) {
-		tp->tx_buf = wdata;
-		tp->len = sizeof(wdata);
-		spi_message_add_tail(tp, &m);
-		tp++;
-
-		tp->tx_buf = &buf[i];
-		tp->len = 4;
-		tp->cs_change = 1;
-		spi_message_add_tail(tp, &m);
-		tp++;
-	}
-
 	spi_bus_lock(spi->master);
-
-	__pollstatus(spi);
-	writel(cpu_to_be32(addr), wdata + 2);
-	__spi_write32a(spi, addr);
-	spi_sync_locked(spi, &m);
-	__pollstatus(spi);
-
+	bbsi_write(spi, addr, src, len);
 	spi_bus_unlock(spi->master);
-	vfree(t);
 }
 
 #endif // __BBSI_H