diff --git a/bbsi.h b/bbsi.h
index 7907864..afd051f 100644
--- a/bbsi.h
+++ b/bbsi.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 Google Inc.
+ * Copyright (C) 2013-2014 Google Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -17,9 +17,14 @@
 #ifndef __BBSI_H
 #define __BBSI_H
 
+/*
+ * Resources:
+ * BCM6803 Data Sheet (Document ID: 6803-DS205-R), Chapter 5: Host Interface and CPU
+ * BBVLSI Serial Interface (BBSI) (Document ID: BBSI-UM200-R)
+ */
+
 #include <linux/netdevice.h>
 #include <linux/spi/spi.h>
-#include <linux/vmalloc.h>
 
 #ifndef KSEG1
 #define KSEG1 0  // just to appease non-MIPS CPUs. Not really used.
@@ -27,6 +32,47 @@
 
 #define BP_MOCA_MAX_NUM 1
 
+#define SPI_FIFO_LEN 32
+
+#define BBSI_COMMAND_BYTE 0x80
+#define STATUS_REGISTER_ADDR 0x06
+#define CONFIG_REGISTER_ADDR 0x07
+#define DATA0_REGISTER_ADDR 0x0c
+
+#define READ_RBUS (1<<0)
+#define SPECULATIVE_READ_EN (1<<1)
+#define NO_RBUS_ADDR_INC (1<<2)
+
+#define STATUS_BUSY (1<<4)
+
+enum bbsi_operation_state {
+	START,
+	COMMAND_STATE,
+	DATA_STATE,
+	POLLSTATUS_STATE,
+	DONE,
+	FAIL,
+};
+
+struct bbsi_operation {
+	enum bbsi_operation_state state;
+	struct spi_device *spi;
+	struct spi_message sm;
+	struct spi_transfer st[2];
+	/* address on MoCA chip we want to read from/write to */
+	uint32_t addr;
+	/* len must be a multiple of 4 */
+	size_t len;
+	uint8_t *data;
+	/* number of bytes already read or written. data_len <= len */
+	size_t data_len;
+	uint8_t tx_buf[8];
+	uint8_t rx_buf[SPI_FIFO_LEN - 2];
+	/* See interpret_data() for a definition of rx_state */
+	int rx_state;
+	struct completion done;
+};
+
 /*
  * The exact values here don't matter, as they're translated into "real"
  * values before talking to mocad.  This is just for the device registration
@@ -58,9 +104,6 @@
 	}
 }
 
-static uint32_t _spi_read32(struct spi_device *spi, uint32_t addr);
-
-
 // TODO(apenwarr): don't make this global.
 //   Or fix the driver to just only enable/disable interrupts at the right
 //   times.
@@ -80,141 +123,325 @@
 	}
 }
 
-static uint8_t __pollstatus(struct spi_device *spi) {
-	uint8_t wclear[] = { 0x80, 0x06 };
-	uint8_t rdata[1] = { 0 };
-	struct spi_transfer t[2] = {
-		{ .tx_buf = wclear, .len = sizeof(wclear) },
-		{ .rx_buf = rdata, .len = sizeof(rdata) },
-	};
-	struct spi_message m;
-	int i;
+static void state_machine_read(void *context);
+static void state_machine_write(void *context);
 
-	spi_message_init(&m);
-	spi_message_add_tail(&t[0], &m);
-	spi_message_add_tail(&t[1], &m);
-
-	for (i = 0; i < 10; i++) {
-		if (spi_sync_locked(spi, &m) < 0) {
-			pr_warn("spi _pollstatus: SPI error\n");
-			return 0x01; // error code
-		}
-		if (rdata[0] & 0x01) {
-			pr_warn("spi _pollstatus: rbus error: %02X\n", rdata[0]);
-			return 0x01; // error result; stop polling now
-		}
-		if (!(rdata[0] & 0x10)) return 0;   // transaction finished
-	}
-	// if we get here, the transaction still isn't finished: weird
-	pr_warn("spi _pollstatus: still busy: %02X\n", rdata[0]);
-	return rdata[0];
+static void bbsi_op_fail(struct bbsi_operation *t) {
+	t->state = FAIL;
+	complete(&t->done);
 }
 
-static uint32_t __spi_read32a(struct spi_device *spi, uint32_t addr,
-				int speculative) {
-	uint8_t waddr[] = {
-		0x81, 0x07,
-		0x01 | (speculative ? 0x02 : 0),
-		0, 0, 0, 0 };
-	struct spi_transfer addrt[1] = {
-		{ .tx_buf = waddr, .len = sizeof(waddr) },
-	};
-	struct spi_message addrm;
-	int j, st;
-
-	spi_message_init(&addrm);
-	spi_message_add_tail(&addrt[0], &addrm);
-
-	__pollstatus(spi);
-	for (j = 0; j < 10; j++) {
-		// write address reg, which triggers the read
-		writel(cpu_to_be32(addr), waddr + sizeof(waddr) - 4);
-		if (spi_sync_locked(spi, &addrm) < 0) {
-			pr_warn("spi_read_addr: error\n");
-		}
-		st = __pollstatus(spi);
-		if (!st) break;
+static int send_readcmd(struct bbsi_operation *t, int burst) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_read;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE | 0x01;
+	t->tx_buf[1] = CONFIG_REGISTER_ADDR;
+	t->tx_buf[2] = READ_RBUS |
+		(burst ? SPECULATIVE_READ_EN : NO_RBUS_ADDR_INC);
+	writel(cpu_to_be32(t->addr), t->tx_buf + 3);
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 7 });
+	spi_message_add_tail(&t->st[0], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
 	}
-	return st;
+	return ret;
 }
 
-static uint32_t __spi_read32d_noswap(struct spi_device *spi) {
-	uint8_t wdata[] = { 0x80, 0x0c };
-	uint8_t rdata[4];
-	struct spi_transfer datat[2] = {
-		{ .tx_buf = wdata, .len = sizeof(wdata) },
-		{ .rx_buf = rdata, .len = sizeof(rdata) },
-	};
-	struct spi_message datam;
-
-	spi_message_init(&datam);
-	spi_message_add_tail(&datat[0], &datam);
-	spi_message_add_tail(&datat[1], &datam);
-
-	// retrieve actual data bits
-	if (spi_sync_locked(spi, &datam) < 0) {
-		pr_warn("spi_read_data: error\n");
+static int send_writecmd(struct bbsi_operation *t) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_write;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE | 0x01;
+	t->tx_buf[1] = CONFIG_REGISTER_ADDR;
+	t->tx_buf[2] = 0;
+	writel(cpu_to_be32(t->addr), t->tx_buf + 3);
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 7 });
+	spi_message_add_tail(&t->st[0], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
 	}
-	return readl(rdata);
+	return ret;
+}
+
+static int send_pollstatuscmd(struct bbsi_operation *t, void (*complete)(void *context)) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = complete;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE;
+	t->tx_buf[1] = STATUS_REGISTER_ADDR;
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 2 });
+	t->st[1] = ((struct spi_transfer) { .rx_buf = t->rx_buf, .len = 1 });
+	spi_message_add_tail(&t->st[0], &t->sm);
+	spi_message_add_tail(&t->st[1], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
+	}
+	return ret;
+}
+
+static int read_data(struct bbsi_operation *t) {
+	int ret;
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_read;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE;
+	/* We want to continue reading where we left off. See interpret_data()
+	 * for the definition of t->rx_state */
+	if (t->rx_state == 0) {
+		t->tx_buf[1] = STATUS_REGISTER_ADDR;
+	} else {
+		t->tx_buf[1] = DATA0_REGISTER_ADDR + t->rx_state - 1;
+	}
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 2 });
+	t->st[1] = ((struct spi_transfer) { .rx_buf = t->rx_buf,
+			.len = sizeof(t->rx_buf)});
+	spi_message_add_tail(&t->st[0], &t->sm);
+	spi_message_add_tail(&t->st[1], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
+	}
+	return ret;
+}
+
+static int write_data(struct bbsi_operation *t) {
+	int ret;
+	size_t len;
+	len = min(t->len - t->data_len, sizeof(t->rx_buf));
+	spi_message_init(&t->sm);
+	t->sm.complete = state_machine_write;
+	t->sm.context = t;
+	t->tx_buf[0] = BBSI_COMMAND_BYTE | 0x01;
+	t->tx_buf[1] = DATA0_REGISTER_ADDR + (t->data_len & 3);
+	t->st[0] = ((struct spi_transfer) { .tx_buf = t->tx_buf, .len = 2 });
+	t->st[1] = ((struct spi_transfer) { .tx_buf = t->data + t->data_len,
+			.len = len});
+	spi_message_add_tail(&t->st[0], &t->sm);
+	spi_message_add_tail(&t->st[1], &t->sm);
+	ret = spi_async_locked(t->spi, &t->sm);
+	if (ret) {
+		pr_err("%s: spi_async failed with %d\n", __func__, ret);
+		bbsi_op_fail(t);
+	}
+	t->data_len += len;
+	return ret;
+}
+
+static int interpret_data(struct bbsi_operation *t) {
+	/* The bytes we are receiving over the SPI bus are the values of
+	 * registers in the BBSI interface on the BCM6803. The register csrAddr
+	 * points to the register that we are going to read next. It cycles
+	 * through the following registers:
+	 *  - STATUS (t->rx_state==0)
+	 *  - DATA0 (t->rx_state==1)
+	 *  - DATA1 (t->rx_state==2)
+	 *  - DATA2 (t->rx_state==3)
+	 *  - DATA3 (t->rx_state==4)
+	 *
+	 * If csrAddr points to the status register, csrAddr is not incremented
+	 * if the busy bit in the status register is set. As a result, we might
+	 * read the status register multiple times before we get to DATA0. We
+	 * therefore might have to skip over multiple status bytes.
+	 *
+	 * t->rx_state defines the register that we are going to read next: 0
+	 * stands for STATUS, 1 stands for DATA0, 2 for DATA1 and so on.
+	*/
+	uint8_t *r;
+	r = t->rx_buf;
+	for(r = t->rx_buf;r < t->rx_buf + sizeof(t->rx_buf);r++) {
+		if (t->rx_state == 0) {
+			if (*r & STATUS_BUSY)
+				/* t->rx_state stays at 0 because the next byte
+				 * will be another status byte */
+				continue;
+			else if (*r) {
+				pr_err("rbus error 0x%02x while trying to read %u "
+						"bytes from 0x%08x\n",
+						(unsigned) *r, t->len, t->addr);
+				bbsi_op_fail(t);
+				return -1;
+			} else
+				/* The busy bit is not set which means that the next byte will be DATA0. */
+				t->rx_state = 1;
+		} else {
+			BUG_ON(t->data_len >= t->len);
+			t->data[t->data_len++] = *r;
+			/* t->rx_state==4 means that we just read DATA3. The
+			 * next byte will be a status byte (t->rx_state==0) */
+			t->rx_state++;
+			t->rx_state%=5;
+			if (t->data_len == t->len) return 0;
+		}
+
+	}
+	return 0;
+}
+
+static void state_machine_read(void *context) {
+	struct bbsi_operation *t = (struct bbsi_operation *) context;
+	int ret;
+
+	BUG_ON(t->len&3);
+
+	switch (t->state) {
+		case START:
+			t->state = COMMAND_STATE;
+			ret = send_readcmd(t, t->len>4);
+			if (ret) return;
+			break;
+		case COMMAND_STATE:
+			if (t->sm.status) {
+				pr_err("readcmd returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			t->state = DATA_STATE;
+			ret = read_data(t);
+			if (ret) return;
+			break;
+		case DATA_STATE:
+			if (t->sm.status) {
+				pr_err("read_data returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			if (interpret_data(t)) return;
+			if (t->data_len == t->len) {
+				t->state = DONE;
+				complete(&t->done);
+			} else {
+				/* Stay in this state */
+				ret = read_data(t);
+				if (ret) return;
+			}
+			break;
+		default:
+			BUG();
+			break;
+	}
+}
+
+static void state_machine_write(void *context) {
+	struct bbsi_operation *t = (struct bbsi_operation *) context;
+	int ret;
+
+	BUG_ON(t->len&3);
+
+	switch (t->state) {
+		case START:
+			t->state = COMMAND_STATE;
+			ret = send_writecmd(t);
+			if (ret) return;
+			break;
+		case COMMAND_STATE:
+			if (t->sm.status) {
+				pr_err("writecmd returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			t->state = DATA_STATE;
+			ret = write_data(t);
+			if (ret) return;
+			break;
+		case DATA_STATE:
+			if (t->sm.status) {
+				pr_err("write_data returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			if (t->data_len == t->len) {
+				t->state = POLLSTATUS_STATE;
+				ret = send_pollstatuscmd(t, state_machine_write);
+				if (ret) return;
+			} else {
+				/* Stay in this state */
+				ret = write_data(t);
+				if (ret) return;
+			}
+			break;
+		case POLLSTATUS_STATE:
+			if (t->sm.status) {
+				pr_err("pollstatuscmd returned bad status %d\n", t->sm.status);
+				bbsi_op_fail(t);
+				return;
+			}
+			if (*t->rx_buf & STATUS_BUSY) {
+				/* Stay in this state */
+				ret = send_pollstatuscmd(t, state_machine_write);
+				if (ret) return;
+			} else if (*t->rx_buf) {
+				pr_err("rbus error 0x%02x while trying to write %u "
+						"bytes to 0x%08x\n",
+						(unsigned) *t->rx_buf, t->len, t->addr);
+				bbsi_op_fail(t);
+			} else {
+				t->state = DONE;
+				complete(&t->done);
+			}
+			break;
+		default:
+			BUG();
+			break;
+	}
+}
+
+static uint32_t bbsi_read(struct spi_device *spi, uint32_t addr, void *dst, size_t len) {
+	struct bbsi_operation *t;
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	t->state = START;
+	t->spi = spi;
+	t->addr = addr;
+	t->len = len;
+	t->data = dst;
+	t->data_len = 0;
+	t->rx_state = 0;
+	init_completion(&t->done);
+	state_machine_read((void*) t);
+	wait_for_completion(&t->done);
+	kfree(t);
+	return 0;
+}
+
+static uint32_t bbsi_write(struct spi_device *spi, uint32_t addr, const void *src, size_t len) {
+	struct bbsi_operation *t;
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	t->state = START;
+	t->spi = spi;
+	t->addr = addr;
+	t->len = len;
+	t->data = (void*) src;
+	t->data_len = 0;
+	init_completion(&t->done);
+	state_machine_write((void*) t);
+	wait_for_completion(&t->done);
+	kfree(t);
+	return 0;
 }
 
 static uint32_t _spi_read32(struct spi_device *spi, uint32_t addr) {
-	int st;
-	uint32_t retval;
+	uint32_t retval = 0;
 
 	spi_bus_lock(spi->master);
-
-	st = __spi_read32a(spi, addr, 0);
-	if (st) {
-		retval = 0x00000000; // error
-	} else {
-		retval = be32_to_cpu(__spi_read32d_noswap(spi));
-	}
+	bbsi_read(spi, addr, &retval, sizeof(retval));
 	spi_bus_unlock(spi->master);
-	return retval;
+	return be32_to_cpu(retval);
 }
 
-static void __spi_write32a(struct spi_device *spi, uint32_t addr) {
-	uint8_t waddr[] = { 0x81, 0x07, 0x00, 0, 0, 0, 0  };
-	struct spi_transfer t[1] = {
-		{ .tx_buf = waddr, .len = sizeof(waddr) },
-	};
-	struct spi_message m;
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t[0], &m);
-
-	// write address reg
-	writel(cpu_to_be32(addr), waddr + sizeof(waddr) - 4);
-	if (spi_sync_locked(spi, &m) < 0) {
-		pr_warn("spi_write: error\n");
-	}
-}
-
-static void __spi_write32d_noswap(struct spi_device *spi, uint32_t value) {
-	uint8_t wdata[] = { 0x81, 0x0c, 0, 0, 0, 0 };
-	struct spi_transfer t[1] = {
-		{ .tx_buf = wdata, .len = sizeof(wdata) },
-	};
-	struct spi_message m;
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t[0], &m);
-
-	// write data reg
-	writel(value, wdata + sizeof(wdata) - 4);
-	if (spi_sync_locked(spi, &m) < 0) {
-		pr_warn("spi_write: error\n");
-	}
-}
-
-
 static void _spi_write32(struct spi_device *spi, uint32_t addr, uint32_t value) {
 	spi_bus_lock(spi->master);
-	__pollstatus(spi);
-	__spi_write32a(spi, addr);
-	__spi_write32d_noswap(spi, cpu_to_be32(value));
-	__pollstatus(spi);
+	value = cpu_to_be32(value);
+	bbsi_write(spi, addr, &value, sizeof(value));
 	spi_bus_unlock(spi->master);
 }
 
@@ -227,72 +454,32 @@
 }
 
 static void kerSysBcmSpiSlaveReadBuf(struct spi_device *spi, uint32_t addr, void *dst, int len, int wordsize) {
-	int i;
-	uint32_t *buf = dst;
-
-	spi_bus_lock(spi->master);
-
+	if (len == 0) {
+		pr_warn("spi readbuf: buffer size 0 invalid\n");
+		return;
+	}
 	if (wordsize != 4) {
 		pr_info("SPI readbuf: only word size == 4 bytes is supported!\n");
 		return;
 	}
-	__spi_read32a(spi, addr, 1);
-	for (i = 0; i < len; i += wordsize) {
-		buf[i/4] = __spi_read32d_noswap(spi);
-		__pollstatus(spi);
-	}
-
+	spi_bus_lock(spi->master);
+	bbsi_read(spi, addr, dst, len);
 	spi_bus_unlock(spi->master);
 }
 
 static void kerSysBcmSpiSlaveWriteBuf(struct spi_device *spi, uint32_t addr, const void *src, int len, int wordsize) {
-	int i, nelems = len/4;
-	const uint32_t *buf = src;
-	uint8_t wdata[] = { 0x81, 0x0c };
-	struct spi_transfer *t, *tp;
-	struct spi_message m;
-
 	if (len > 8192) {
 		pr_warn("spi writebuf: buffer size %d is too large\n", len);
 		return;
 	}
-	if (wordsize != 4) {
+	if (wordsize != 4 || len&3) {
 		pr_err("SPI writebuf: only word size == 4 bytes is supported!\n");
 		return;
 	}
 
-	t = vmalloc(nelems * sizeof(struct spi_transfer) * 2);
-	if (!t) {
-		pr_warn("spi writebuf: out of memory\n");
-		return;
-	}
-
-	memset(t, 0, nelems * sizeof(struct spi_transfer) * 2);
-	spi_message_init(&m);
-
-	for (i = 0, tp = t; i < nelems; i++) {
-		tp->tx_buf = wdata;
-		tp->len = sizeof(wdata);
-		spi_message_add_tail(tp, &m);
-		tp++;
-
-		tp->tx_buf = &buf[i];
-		tp->len = 4;
-		tp->cs_change = 1;
-		spi_message_add_tail(tp, &m);
-		tp++;
-	}
-
 	spi_bus_lock(spi->master);
-
-	__pollstatus(spi);
-	writel(cpu_to_be32(addr), wdata + 2);
-	__spi_write32a(spi, addr);
-	spi_sync_locked(spi, &m);
-	__pollstatus(spi);
-
+	bbsi_write(spi, addr, src, len);
 	spi_bus_unlock(spi->master);
-	vfree(t);
 }
 
 #endif // __BBSI_H
