Add support for DMA coherent skb heads

Allocate dma heads from DMA coherent memory which is mapped
non-cachable. This requires a proprietary change in the kernel that adds
the dma_coherent field to struct sk_buff.

Using DMA coherent (non-cachable) memory only makes sense when using the
fast forwarding path for the NXP (formerly Freescale) QorIQ LS1024A
(formerly Mindspeed Comcerto 2000) SoC.

See Documentation/networking/ls1024a-fastforwarding.txt in our 4.1
kernel for more information.

It should be noted that the DMA coherent approach is different from the
ZONE_DMA_NCNB approach that was used for the 3.2 kernel. In the 3.2
kernel, they repurposed ZONE_DMA as ZONE_DMA_NCNB. They mapped the
entire zone as non-cachable (but bufferable) and hacked the zone
allocator to not fall back from ZONE_NORMAL to ZONE_DMA (ZONE_DMA_NCNB)
under memory pressure. This allowed them to use kmalloc() and friends
on the non-cachable memory area.

Change-Id: If536c91ed0d393d0bc0969e66556fcf8fbccda65
diff --git a/.local-symbols b/.local-symbols
index a558d06..4837225 100644
--- a/.local-symbols
+++ b/.local-symbols
@@ -184,6 +184,7 @@
 ATH10K_USE_NCNB_DESCR=
 ATH10K_USE_NCNB_SKB=
 ATH10K_USE_COMCERTO_WIFI_FASTPATH=
+ATH10K_USE_DMA_COHERENT_SKB=
 WCN36XX=
 WCN36XX_DEBUGFS=
 B43=
diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig
index bf01fad..47d564a 100644
--- a/drivers/net/wireless/ath/ath10k/Kconfig
+++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -52,6 +52,7 @@
 config ATH10K_USE_NCNB_DESCR
 	bool "Atheros support for GFP_DMA_NCNB for descriptors"
 	depends on ATH10K
+	depends on COMCERTO_ZONE_DMA_NCNB
 	default n
 	---help---
 	  This option enables use of GFP_DMA_NCNB for descriptors
@@ -61,11 +62,24 @@
 config ATH10K_USE_NCNB_SKB
 	bool "Atheros support for GFP_DMA_NCNB for sk_buffs"
 	depends on ATH10K
+	depends on COMCERTO_ZONE_DMA_NCNB
 	default n
 	---help---
 	  This option enables use of GFP_DMA_NCNB for sk_buffs on
 	  platforms like the Mindspeed Comcerto C2000.
 
+config ATH10K_USE_DMA_COHERENT_SKB
+	bool "Atheros support for DMA coherent skb heads"
+	depends on ATH10K
+	depends on COMCERTO_DMA_COHERENT_SKB
+	depends on ATH10K_USE_COMCERTO_WIFI_FASTPATH
+	default n
+	---help---
+	  This option enables use of DMA coherent skb heads (data buffers) on
+	  platforms like the NXP (formerly Freescale) QorIQ LS1024A
+	  (f.k.a. Mindspeed Comcerto 2000). This requires a custom modification
+	  to the kernel that adds the dma_coherent field to struct sk_buff.
+
 config ATH10K_USE_COMCERTO_WIFI_FASTPATH
 	bool "Atheros support for comcerto_wifi_rx_fastpath"
 	depends on ATH10K
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index a12e399..179dc38 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -103,6 +103,9 @@
 #ifdef CPTCFG_ATH10K_USE_NCNB_SKB
 		skb = __dev_alloc_skb(HTT_RX_BUF_SIZE + HTT_RX_DESC_ALIGN,
 		                      GFP_DMA_NCNB | GFP_ATOMIC);
+#elif defined(CPTCFG_ATH10K_USE_DMA_COHERENT_SKB)
+		skb = alloc_dma_coherent_skb(HTT_RX_BUF_SIZE +
+				HTT_RX_DESC_ALIGN);
 #else
 		skb = dev_alloc_skb(HTT_RX_BUF_SIZE + HTT_RX_DESC_ALIGN);
 #endif
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 2b9f355..fa4c5a7 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -398,6 +398,8 @@
 #ifdef CPTCFG_ATH10K_USE_NCNB_SKB
 	skb = __dev_alloc_skb(pipe->buf_sz,
 	                      GFP_DMA_NCNB | GFP_ATOMIC);
+#elif defined(CPTCFG_ATH10K_USE_DMA_COHERENT_SKB)
+	skb = alloc_dma_coherent_skb(HTT_RX_BUF_SIZE + HTT_RX_DESC_ALIGN);
 #else
 	skb = dev_alloc_skb(pipe->buf_sz);
 #endif
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6da1ca5..5e726ff 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2113,6 +2113,41 @@
 			/* fastpath handled this packet */
 			slowpath = 0;
 		}
+#ifdef CPTCFG_ATH10K_USE_DMA_COHERENT_SKB
+		else {
+			if (skb->dma_coherent) {
+				/*
+				 * We must never inject DMA coherent skb heads
+				 * into the kernel which is why we copy the skb
+				 * into normal memory. The reason we must avoid
+				 * DMA coherent memory is that struct
+				 * skb_shared_info is stored at the end of the
+				 * skb head. This struct has a field named
+				 * dataref which is defiend as atomic_t, and
+				 * ARM prohibits atomic operations on memory
+				 * that is not normal. In practice,
+				 * __skb_clone() hangs at
+				 * atomic_inc(&(skb_shinfo(skb)->dataref));
+				 * when it tries to operate on DMA coherent
+				 * (non-cachable) memory.
+				 *
+				 * One of the situations where this code path
+				 * is executed is when we receive multicast
+				 * frames. comcerto_wifi_rx_fastpath() always
+				 * rejects multicast frames which is why we end
+				 * up here. The bridge code might have to clone
+				 * these frames because they need to be copied
+				 * to multiple output ports. Hence, we need to
+				 * move the data buffer into normal memory. */
+				struct sk_buff *n = skb_copy(skb, GFP_ATOMIC);
+				consume_skb(skb);
+				if (!n)
+					slowpath = 0;
+				else
+					skb = n;
+			}
+		}
+#endif
 		if (slowpath)
 #endif
 		{