bcmgenet: don't panic the kernel just because of an skb alloc failure.
It's (rather rare but) normal to run out of memory in an interrupt handler;
that is definitely not worthy of a kernel BUG() call. We're getting several
of these crashes per day across GFiberTV. In our tests, this change
eliminates the crashes entirely. (If we fail to allocate memory, that's
still a lost packet though.)
In case of a memory allocation failure, we keep the device ring buffer full
by discarding the packet that *was* waiting in the ring buffer, then re-use
the already-allocated skb.
b/8522311
Change-Id: I7e2e6b160c635264c0806db83f4bc5f7bde1d9f3
diff --git a/drivers/net/bcmgenet/bcmgenet.c b/drivers/net/bcmgenet/bcmgenet.c
index 92f9855..39fd6f0 100755
--- a/drivers/net/bcmgenet/bcmgenet.c
+++ b/drivers/net/bcmgenet/bcmgenet.c
@@ -59,6 +59,8 @@
#include "bcmgenet.h"
#include "if_net.h"
+#define MY_BUG_ON(c) do { if ((c)) { printk(KERN_EMERG "MY_BUG_ON(%s) at %s:%d\n", #c, __FILE__, __LINE__); BUG(); } } while (0)
+
#ifdef CONFIG_NET_SCH_MULTIQ
#if CONFIG_BRCM_GENET_VERSION == 1
@@ -866,7 +868,7 @@
-------------------------------------------------------------------------- */
static void bcmgenet_timeout(struct net_device *dev)
{
- BUG_ON(dev == NULL);
+ MY_BUG_ON(dev == NULL);
TRACE(("%s: bcmgenet_timeout\n", dev->name));
@@ -2207,7 +2209,7 @@
struct BcmEnet_devctrl *pDevCtrl = ptr;
struct net_device *dev = pDevCtrl->dev;
struct Enet_CB *cb;
- struct sk_buff *skb;
+ struct sk_buff *skb, *new_skb;
unsigned long dmaFlag;
int len, discard_cnt = 0;
unsigned int rxpktprocessed = 0, rxpkttoprocess = 0;
@@ -2215,6 +2217,8 @@
unsigned long start_addr, end_addr;
volatile struct rDmaRingRegs *rDma_desc;
+ MY_BUG_ON(pDevCtrl->num_new_skbs != 0);
+
rDma_desc = &pDevCtrl->rxDma->rDmaRings[index];
p_index = rDma_desc->rdma_producer_index;
@@ -2254,7 +2258,8 @@
cb = &pDevCtrl->rxCbs[read_ptr];
skb = cb->skb;
- BUG_ON(skb == NULL);
+ MY_BUG_ON(skb == NULL);
+ cb->skb = NULL;
dma_unmap_single(&dev->dev, cb->dma_addr,
pDevCtrl->rxBufLen, DMA_FROM_DEVICE);
@@ -2274,8 +2279,8 @@
pDevCtrl->rxBds[read_ptr].length_status);
dev->stats.rx_dropped++;
dev->stats.rx_errors++;
- dev_kfree_skb_any(cb->skb);
- cb->skb = NULL;
+ MY_BUG_ON(pDevCtrl->num_new_skbs >= TOTAL_DESC * 2);
+ pDevCtrl->new_skbs[pDevCtrl->num_new_skbs++] = skb;
continue;
}
/* report errors */
@@ -2297,11 +2302,25 @@
dev->stats.rx_errors++;
/* discard the packet and advance consumer index.*/
- dev_kfree_skb_any(cb->skb);
- cb->skb = NULL;
+ MY_BUG_ON(pDevCtrl->num_new_skbs >= TOTAL_DESC * 2);
+ pDevCtrl->new_skbs[pDevCtrl->num_new_skbs++] = skb;
continue;
} /* error packet */
+ MY_BUG_ON(pDevCtrl->num_new_skbs >= TOTAL_DESC * 2);
+ new_skb = netdev_alloc_skb(pDevCtrl->dev,
+ pDevCtrl->rxBufLen + SKB_ALIGNMENT);
+ if (!new_skb) {
+ pr_err_ratelimited("%s: failed to allocate skb, "
+ "dropping old packet.\n", dev->name);
+ pDevCtrl->new_skbs[pDevCtrl->num_new_skbs++] = skb;
+ dev->stats.rx_over_errors++;
+ dev->stats.rx_dropped++;
+ continue;
+ }
+ handleAlignment(pDevCtrl, new_skb);
+ pDevCtrl->new_skbs[pDevCtrl->num_new_skbs++] = new_skb;
+
skb_put(skb, len);
if (pDevCtrl->rbuf->rbuf_ctrl & RBUF_64B_EN) {
struct status_64 *status;
@@ -2346,7 +2365,6 @@
#else
netif_receive_skb(skb);
#endif
- cb->skb = NULL;
TRACE(("pushed up to kernel\n"));
}
@@ -2356,6 +2374,7 @@
* rdma_read_pointer so do not update it until after
* assign_rx_buffers_for_queue has been called.
*/
+ MY_BUG_ON(rxpktprocessed != pDevCtrl->num_new_skbs);
assign_rx_buffers_for_queue(pDevCtrl, index);
rDma_desc->rdma_read_pointer = (read_ptr << 1) &
DMA_RW_POINTER_MASK;
@@ -2441,13 +2460,21 @@
read_ptr = (read_pointer & DMA_RW_POINTER_MASK) >> 1;
while (pDevCtrl->rxBds[read_ptr].address == 0) {
cb = &pDevCtrl->rxCbs[read_ptr];
- skb = netdev_alloc_skb(pDevCtrl->dev,
- pDevCtrl->rxBufLen + SKB_ALIGNMENT);
- if (!skb) {
- printk(KERN_ERR " failed to allocate skb for rx\n");
- break;
+ if (pDevCtrl->num_new_skbs > 0) {
+ skb = pDevCtrl->new_skbs[--pDevCtrl->num_new_skbs];
+ pDevCtrl->new_skbs[pDevCtrl->num_new_skbs] = NULL;
+ MY_BUG_ON(!skb);
+ } else {
+ skb = netdev_alloc_skb(pDevCtrl->dev,
+ pDevCtrl->rxBufLen + SKB_ALIGNMENT);
+ if (!skb) {
+ pr_err_ratelimited(
+ "%s: failed to allocate skb for rx\n",
+ pDevCtrl->dev->name);
+ break;
+ }
+ handleAlignment(pDevCtrl, skb);
}
- handleAlignment(pDevCtrl, skb);
/* keep count of any BD's we refill */
bdsfilled++;
@@ -2608,7 +2635,7 @@
rDma_desc->rdma_producer_index = 0;
rDma_desc->rdma_consumer_index = 0;
/* Initialize default queue. */
- BUG_ON(GENET_RX_TOTAL_MQ_BD > TOTAL_DESC);
+ MY_BUG_ON(GENET_RX_TOTAL_MQ_BD > TOTAL_DESC);
rDma_desc->rdma_ring_buf_size = ((GENET_RX_DEFAULT_BD_CNT <<
DMA_RING_SIZE_SHIFT) | RX_BUF_LENGTH);
rDma_desc->rdma_start_addr = 2 * GENET_RX_TOTAL_MQ_BD;
@@ -3260,8 +3287,8 @@
volatile unsigned long *addr =
&pDevCtrl->hfb[filter * filter_size];
- BUG_ON(PCP_START < 0 || PCP_START >= PCP_COUNT);
- BUG_ON(PCP_END < 0 || PCP_END >= PCP_COUNT);
+ MY_BUG_ON(PCP_START < 0 || PCP_START >= PCP_COUNT);
+ MY_BUG_ON(PCP_END < 0 || PCP_END >= PCP_COUNT);
/*
* Mask the first 12 bytes (destination mac address, source mac
* address. This involves setting the first 24 bytes (NOT 12!)
diff --git a/drivers/net/bcmgenet/bcmgenet.h b/drivers/net/bcmgenet/bcmgenet.h
index 74bce21..3685d56 100755
--- a/drivers/net/bcmgenet/bcmgenet.h
+++ b/drivers/net/bcmgenet/bcmgenet.h
@@ -117,6 +117,8 @@
unsigned int rxRingCIndex[GENET_RX_RING_COUNT]; /* consumer index for each ring */
unsigned int rxRingDiscCnt[GENET_RX_RING_COUNT]; /* # of discarded pkt for each ring */
unsigned char *rxRingStart[GENET_RX_RING_COUNT]; /* ring buffer start addr.*/
+ struct sk_buff *new_skbs[TOTAL_DESC * 2]; /* skbs to put in queue */
+ int num_new_skbs; /* number of slots used in new_skbs */
/* other misc variables */
int irq0; /* regular IRQ */