| /* |
| * This file implements the Chelsio CPL5 message processing. |
| * |
| * Copyright (C) 2006-2009 Chelsio Communications. All rights reserved. |
| * |
| * Written by Dimitris Michailidis (dm@chelsio.com) |
| * |
| * This program is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this |
| * release for licensing terms and conditions. |
| */ |
| |
| #include <linux/mm.h> |
| #include <linux/pci.h> |
| #include <linux/highmem.h> |
| #ifndef LINUX_2_4 |
| #include <linux/dma-mapping.h> |
| #endif /* LINUX_2_4 */ |
| #include "defs.h" |
| #include "tom.h" |
| #include "t3_ddp.h" |
| #include "tcb.h" |
| #include "trace.h" |
| |
| /* |
| * Return the # of page pods needed to accommodate a # of pages. |
| */ |
| static inline unsigned int pages2ppods(unsigned int pages) |
| { |
| return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS; |
| } |
| |
| /** |
| * t3_pin_pages - pin a user memory range and prepare it for DDP |
| * @addr - the starting address |
| * @len - the length of the range |
| * @newgl - contains the pages and physical addresses of the pinned range |
| * @gl - an existing gather list, may be %NULL |
| * |
| * Pins the pages in the user-space memory range [addr, addr + len) and |
| * maps them for DMA. Returns a gather list with the pinned pages and |
| * their physical addresses. If @gl is non NULL the pages it describes |
| * are compared against the pages for [addr, addr + len), and if the |
| * existing gather list already covers the range a new list is not |
| * allocated. Returns 0 on success, or a negative errno. On success if |
| * a new gather list was allocated it is returned in @newgl. |
| */ |
| int t3_pin_pages(struct pci_dev *pdev, unsigned long addr, size_t len, |
| struct ddp_gather_list **newgl, |
| const struct ddp_gather_list *gl) |
| { |
| int i, err; |
| size_t pg_off; |
| unsigned int npages; |
| struct ddp_gather_list *p; |
| |
| if (segment_eq(get_fs(), KERNEL_DS) || !len) |
| return -EINVAL; |
| if (!access_ok(VERIFY_WRITE, addr, len)) |
| return -EFAULT; |
| |
| pg_off = addr & ~PAGE_MASK; |
| npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| p = kmalloc(sizeof(struct ddp_gather_list) + |
| npages * (sizeof(dma_addr_t) + sizeof(struct page *)), |
| GFP_KERNEL); |
| if (!p) |
| return -ENOMEM; |
| |
| p->type = DDP_TYPE_USER; |
| p->pages = (struct page **)&p->phys_addr[npages]; |
| down_read(¤t->mm->mmap_sem); |
| /* |
| * get_user_pages() will mark the pages dirty so we don't need to do it |
| * later. See how get_user_pages() uses FOLL_TOUCH | FOLL_WRITE. |
| */ |
| err = get_user_pages(current, current->mm, addr, npages, 1, 0, |
| p->pages, NULL); |
| up_read(¤t->mm->mmap_sem); |
| if (err != npages) { |
| if (err < 0) |
| goto free_gl; |
| npages = err; |
| err = -EFAULT; |
| goto unpin; |
| } |
| |
| if (gl && gl->offset == pg_off && gl->nelem >= npages && |
| gl->length >= len) { |
| for (i = 0; i < npages; ++i) |
| if (p->pages[i] != gl->pages[i]) |
| goto different_gl; |
| err = 0; |
| goto unpin; |
| } |
| |
| different_gl: |
| p->length = len; |
| p->offset = pg_off; |
| p->nelem = npages; |
| p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off, |
| PAGE_SIZE - pg_off, |
| PCI_DMA_FROMDEVICE) - pg_off; |
| if (unlikely(t3_pci_dma_mapping_error(pdev, p->phys_addr[0]))) { |
| err = -ENOMEM; |
| goto unpin; |
| } |
| for (i = 1; i < npages; ++i) { |
| p->phys_addr[i] = pci_map_page(pdev, p->pages[i], 0, PAGE_SIZE, |
| PCI_DMA_FROMDEVICE); |
| if (unlikely(t3_pci_dma_mapping_error(pdev, p->phys_addr[i]))) { |
| err = -ENOMEM; |
| goto unpin; |
| } |
| } |
| |
| *newgl = p; |
| return 0; |
| unpin: |
| for (i = 0; i < npages; ++i) |
| put_page(p->pages[i]); |
| free_gl: |
| kfree(p); |
| *newgl = NULL; |
| return err; |
| } |
| |
| /** |
| * t3_map_pages - map a kernel memory range and prepare it for DDP |
| * and assumes caller handles page refcounting. |
| * In all other respects same as t3_pin_pages. |
| * @addr - the starting address |
| * @len - the length of the range |
| * @newgl - contains the pages and physical addresses of the range |
| * @gl - an existing gather list, may be %NULL |
| */ |
| |
| int t3_map_pages(struct pci_dev *pdev, unsigned long addr, size_t len, |
| struct ddp_gather_list **newgl, |
| const struct ddp_gather_list *gl) |
| { |
| int i, err; |
| size_t pg_off; |
| unsigned int npages; |
| struct ddp_gather_list *p; |
| |
| if (!len) |
| return -EINVAL; |
| |
| pg_off = addr & ~PAGE_MASK; |
| npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| p = kmalloc(sizeof(struct ddp_gather_list) + |
| npages * (sizeof(dma_addr_t) + sizeof(struct page *)), |
| GFP_KERNEL); |
| if (!p) |
| return -ENOMEM; |
| |
| p->type = DDP_TYPE_KERNEL; |
| p->pages = (struct page **)&p->phys_addr[npages]; |
| |
| for (i=0; i < npages; i++) { |
| if ((addr < VMALLOC_START) || (addr >= VMALLOC_END)) |
| p->pages[i] = virt_to_page((void *)addr); |
| else |
| p->pages[i] = vmalloc_to_page((void *)addr); |
| addr += PAGE_SIZE; |
| } |
| |
| if (gl && gl->offset == pg_off && gl->nelem >= npages && |
| gl->length >= len) { |
| for (i = 0; i < npages; ++i) |
| if (p->pages[i] != gl->pages[i]) { |
| goto different_gl; |
| } |
| err = 0; |
| goto free_gl; |
| } |
| |
| different_gl: |
| p->length = len; |
| p->offset = pg_off; |
| p->nelem = npages; |
| p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off, |
| PAGE_SIZE - pg_off, |
| PCI_DMA_FROMDEVICE) - pg_off; |
| if (unlikely(t3_pci_dma_mapping_error(pdev, p->phys_addr[0]))) { |
| err = -ENOMEM; |
| goto free_gl; |
| } |
| for (i = 1; i < npages; ++i) { |
| p->phys_addr[i] = pci_map_page(pdev, p->pages[i], 0, PAGE_SIZE, |
| PCI_DMA_FROMDEVICE); |
| if (unlikely(t3_pci_dma_mapping_error(pdev, p->phys_addr[i]))) { |
| err = -ENOMEM; |
| goto free_gl; |
| } |
| } |
| |
| *newgl = p; |
| return 0; |
| free_gl: |
| kfree(p); |
| *newgl = NULL; |
| return err; |
| } |
| |
| static void unmap_ddp_gl(struct pci_dev *pdev, const struct ddp_gather_list *gl) |
| { |
| int i; |
| |
| if (!gl->nelem) |
| return; |
| |
| pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset, |
| PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE); |
| for (i = 1; i < gl->nelem; ++i) |
| pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE, |
| PCI_DMA_FROMDEVICE); |
| } |
| |
| static void ddp_gl_free_pages(struct ddp_gather_list *gl) |
| { |
| int i; |
| |
| for (i = 0; i < gl->nelem; ++i) |
| put_page(gl->pages[i]); |
| } |
| |
| void t3_free_ddp_gl(struct pci_dev *pdev, struct ddp_gather_list *gl) |
| { |
| unmap_ddp_gl(pdev, gl); |
| if (gl->type == DDP_TYPE_USER) { |
| ddp_gl_free_pages(gl); |
| } |
| kfree(gl); |
| } |
| |
| /* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */ |
| #define MAX_PPODS 64U |
| |
| /* |
| * Allocate page pods for DDP buffer 1 (the user buffer) and set up the tag in |
| * the TCB. We allocate page pods in multiples of PPOD_CLUSTER_SIZE. First we |
| * try to allocate enough page pods to accommodate the whole buffer, subject to |
| * the MAX_PPODS limit. If that fails we try to allocate PPOD_CLUSTER_SIZE page |
| * pods before failing entirely. |
| */ |
| static int alloc_buf1_ppods(struct sock *sk, struct ddp_state *p, |
| unsigned long addr, unsigned int len) |
| { |
| int tag, npages, nppods; |
| struct tom_data *d = TOM_DATA(CPL_IO_STATE(sk)->toedev); |
| |
| npages = ((addr & ~PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| nppods = min(pages2ppods(npages), MAX_PPODS); |
| nppods = ALIGN(nppods, PPOD_CLUSTER_SIZE); |
| tag = t3_alloc_ppods(d, nppods); |
| if (tag < 0 && nppods > PPOD_CLUSTER_SIZE) { |
| nppods = PPOD_CLUSTER_SIZE; |
| tag = t3_alloc_ppods(d, nppods); |
| } |
| if (tag < 0) |
| return -ENOMEM; |
| |
| p->ubuf_nppods = nppods; |
| p->ubuf_tag = tag; |
| #if NUM_DDP_KBUF == 1 |
| t3_set_ddp_tag(sk, 1, tag << 6); |
| #endif |
| return 0; |
| } |
| |
| /* |
| * Starting offset for the user DDP buffer. A non-0 value ensures a DDP flush |
| * won't block indefinitely if there's nothing to place (which should be rare). |
| */ |
| #define UBUF_OFFSET 1 |
| |
| static inline unsigned long select_ddp_flags(const struct sock *sk, int buf_idx, |
| int nonblock, int rcv_flags) |
| { |
| struct toedev *tdev = CPL_IO_STATE(sk)->toedev; |
| |
| if (buf_idx == 1) { |
| if (unlikely(rcv_flags & MSG_WAITALL)) |
| return V_TF_DDP_PUSH_DISABLE_1(1); |
| |
| if (nonblock) |
| return V_TF_DDP_BUF1_FLUSH(1); |
| |
| return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(tdev, ddp_push_wait)); |
| } |
| |
| if (unlikely(rcv_flags & MSG_WAITALL)) |
| return V_TF_DDP_PUSH_DISABLE_0(1); |
| |
| if (nonblock) |
| return V_TF_DDP_BUF0_FLUSH(1); |
| |
| return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(tdev, ddp_push_wait)); |
| } |
| |
| /* |
| * Reposts the kernel DDP buffer after it has been previously become full and |
| * invalidated. We just need to reset the offset and adjust the DDP flags. |
| * Conveniently, we can set the flags and the offset with a single message. |
| * Note that this function does not set the buffer length. Again conveniently |
| * our kernel buffer is of fixed size. If the length needs to be changed it |
| * needs to be done separately. |
| */ |
| void t3_repost_kbuf(struct sock *sk, unsigned int bufidx, int modulate, |
| int activate, int nonblock) |
| { |
| struct ddp_state *p = DDP_STATE(sk); |
| unsigned long flags; |
| |
| p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->offset; |
| p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0; |
| p->buf_state[bufidx].gl = p->kbuf[bufidx]; |
| p->cur_buf = bufidx; |
| p->kbuf_idx = bufidx; |
| |
| flags = select_ddp_flags(sk, bufidx, nonblock, 0); |
| |
| if (!bufidx) |
| t3_setup_ddpbufs(sk, 0, 0, 0, 0, flags | |
| V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) | |
| V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) | |
| V_TF_DDP_BUF0_VALID(1), |
| V_TF_DDP_BUF0_FLUSH(1) | |
| V_TF_DDP_PSH_NO_INVALIDATE0(1) | |
| V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) | |
| V_TF_DDP_BUF0_VALID(1) | |
| V_TF_DDP_ACTIVE_BUF(activate), modulate); |
| else |
| t3_setup_ddpbufs(sk, 0, 0, 0, 0, flags | |
| V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) | |
| V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) | |
| V_TF_DDP_BUF1_VALID(1) | |
| V_TF_DDP_ACTIVE_BUF(activate), |
| V_TF_DDP_BUF1_FLUSH(1) | |
| V_TF_DDP_PSH_NO_INVALIDATE0(1) | |
| V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) | |
| V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), |
| modulate); |
| |
| } |
| |
| /** |
| * setup_iovec_ppods - setup HW page pods for a user iovec |
| * @sk: the associated socket |
| * @iov: the iovec |
| * @oft: additional bytes to map before the start of the buffer |
| * |
| * Pins a user iovec and sets up HW page pods for DDP into it. We allocate |
| * page pods for user buffers on the first call per socket. Afterwards we |
| * limit the buffer length to whatever the existing page pods can accommodate. |
| * Returns a negative error code or the length of the mapped buffer. |
| * |
| * The current implementation handles iovecs with only one entry. |
| */ |
| static int setup_iovec_ppods(struct sock *sk, const struct iovec *iov, int oft) |
| { |
| int err; |
| unsigned int len; |
| struct ddp_gather_list *gl; |
| struct ddp_state *p = DDP_STATE(sk); |
| unsigned long addr = (unsigned long)iov->iov_base - oft; |
| |
| if (unlikely(!p->ubuf_nppods)) { |
| err = alloc_buf1_ppods(sk, p, addr, iov->iov_len + oft); |
| if (err) |
| return err; |
| } |
| |
| len = (p->ubuf_nppods - NUM_SENTINEL_PPODS) * PPOD_PAGES * PAGE_SIZE; |
| len -= addr & ~PAGE_MASK; |
| if (len > M_TCB_RX_DDP_BUF0_LEN) |
| len = M_TCB_RX_DDP_BUF0_LEN; |
| len = min(len, tcp_sk(sk)->rcv_wnd - 32768); |
| len = min_t(int, len, iov->iov_len + oft); |
| |
| if (len <= p->kbuf[0]->length) |
| return -EINVAL; |
| |
| if (!segment_eq(get_fs(), KERNEL_DS)) |
| err = t3_pin_pages(p->pdev, addr, len, &gl, p->ubuf); |
| else |
| err = t3_map_pages(p->pdev, addr, len, &gl, p->ubuf); |
| if (err < 0) |
| return err; |
| if (gl) { |
| if (p->ubuf) |
| t3_free_ddp_gl(p->pdev, p->ubuf); |
| p->ubuf = gl; |
| t3_setup_ppods(sk, gl, pages2ppods(gl->nelem), p->ubuf_tag, len, |
| gl->offset, 0); |
| } |
| return len; |
| } |
| #if 0 |
| /* |
| * Post a user buffer as DDP buffer 1. |
| */ |
| int t3_post_ubuf(struct sock *sk, const struct iovec *iov, |
| int nonblock, int rcv_flags, int modulate, int post_kbuf) |
| { |
| int len; |
| unsigned long flags; |
| struct ddp_state *p = DDP_STATE(sk); |
| |
| len = setup_iovec_ppods(sk, iov, UBUF_OFFSET); |
| if (len < 0) |
| return len; |
| |
| p->buf_state[1].cur_offset = UBUF_OFFSET; |
| p->buf_state[1].flags = DDP_BF_NOCOPY; |
| p->buf_state[1].gl = p->ubuf; |
| p->cur_buf = 1; |
| |
| flags = select_ddp_flags(sk, 1, nonblock, rcv_flags); |
| |
| if (post_kbuf) { |
| /* kbuf_noinval must be 0 for concurrent posting */ |
| p->buf_state[0].cur_offset = p->kbuf.offset; |
| p->buf_state[0].flags = 0; |
| p->buf_state[0].gl = &p->kbuf; |
| flags |= V_TF_DDP_BUF0_VALID(1); |
| } |
| |
| /* |
| * Do not disable DDP off here, HW may have turned it off due to memory |
| * exhaustion and we don't want to reenable it for this connection. |
| */ |
| t3_setup_ddpbufs(sk, 0, 0, len, UBUF_OFFSET, V_TF_DDP_BUF1_VALID(1) | |
| V_TF_DDP_ACTIVE_BUF(1) | flags, |
| V_TF_DDP_PSH_NO_INVALIDATE(1) | |
| V_TF_DDP_BUF1_FLUSH(1) | V_TF_DDP_PUSH_DISABLE_1(1) | |
| V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_BUF0_VALID(1) | |
| V_TF_DDP_ACTIVE_BUF(1) | V_TF_DDP_INDICATE_OUT(1) | |
| (M_TCB_RX_DDP_BUF0_OFFSET << |
| (S_TCB_RX_DDP_BUF0_OFFSET + 32)), modulate); |
| return 0; |
| } |
| #endif |
| |
| |
| /* |
| * |
| */ |
| void t3_cancel_ubuf(struct sock *sk, long *timeo) |
| { |
| struct ddp_state *p = DDP_STATE(sk); |
| int rc; |
| int ubuf_pending = t3_ddp_ubuf_pending(sk); |
| long gettcbtimeo; |
| int canceled=0; |
| int norcv=0; |
| |
| #ifdef LINUX_2_4 |
| DECLARE_WAITQUEUE(wait, current); |
| #else |
| DEFINE_WAIT(wait); |
| #endif /* LINUX_2_4 */ |
| |
| |
| if (!p->ddp_setup || !p->pdev) |
| return; |
| |
| gettcbtimeo = max_t(long, msecs_to_jiffies(1), *timeo); |
| p->cancel_ubuf = 1; |
| |
| while (ubuf_pending && !norcv) { |
| #ifdef T3_TRACE |
| T3_TRACE3(TIDTB(sk), |
| "t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d", |
| p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY), |
| p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY), |
| p->get_tcb_count); |
| #endif |
| if (!canceled && !p->get_tcb_count) { |
| canceled = 1; |
| t3_cancel_ddpbuf(sk, p->cur_buf); |
| } |
| |
| #ifdef LINUX_2_4 |
| add_wait_queue(sk->sleep, &wait); |
| #endif /* LINUX_2_4 */ |
| do { |
| #ifdef LINUX_2_4 |
| set_current_state(TASK_INTERRUPTIBLE); |
| #else |
| prepare_to_wait(sk_sleep(sk), &wait, |
| TASK_INTERRUPTIBLE); |
| #endif /* LINUX_2_4 */ |
| rc = sk_wait_event(sk, &gettcbtimeo, |
| !(DDP_STATE(sk)->ddp_setup ? DDP_STATE(sk)->get_tcb_count : 0) && |
| !(sk->sk_shutdown & RCV_SHUTDOWN)); |
| p = DDP_STATE(sk); |
| |
| #ifndef LINUX_2_4 |
| finish_wait(sk_sleep(sk), &wait); |
| #endif /* LINUX_2_4 */ |
| if (signal_pending(current)) |
| break; |
| |
| gettcbtimeo = max_t(long, gettcbtimeo << 1, *timeo); |
| norcv = (sk->sk_err == ECONNRESET) || (sk->sk_shutdown & RCV_SHUTDOWN); |
| } while ((p->ddp_setup ? p->get_tcb_count : 0) && !norcv); |
| |
| #ifdef LINUX_2_4 |
| set_current_state(TASK_RUNNING); |
| remove_wait_queue(sk->sleep, &wait); |
| #endif /* LINUX_2_4 */ |
| |
| ubuf_pending = t3_ddp_ubuf_pending(sk); |
| |
| if (signal_pending(current)) |
| break; |
| } |
| |
| while (t3_ddp_ubuf_pending(sk) && !norcv) { |
| if (!canceled && !p->get_tcb_count) { |
| canceled=1; |
| t3_cancel_ddpbuf(sk, p->cur_buf); |
| } |
| |
| do { |
| release_sock(sk); |
| gettcbtimeo = (net_random() % (HZ / 2)) + 2; |
| __set_current_state(TASK_UNINTERRUPTIBLE); |
| schedule_timeout(gettcbtimeo); |
| lock_sock(sk); |
| p = DDP_STATE(sk); |
| norcv = (sk->sk_err == ECONNRESET) || (sk->sk_shutdown & RCV_SHUTDOWN); |
| } while ((p->ddp_setup ? p->get_tcb_count : 0) && !norcv); |
| } |
| |
| if (p->ddp_setup) |
| p->cancel_ubuf = 0; |
| |
| return; |
| } |
| |
| #define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE0(1) | \ |
| V_TF_DDP_PSH_NO_INVALIDATE1(1) | \ |
| V_TF_DDP_BUF1_FLUSH(1) | \ |
| V_TF_DDP_BUF0_FLUSH(1) | \ |
| V_TF_DDP_PUSH_DISABLE_1(1) | \ |
| V_TF_DDP_PUSH_DISABLE_0(1) | \ |
| V_TF_DDP_INDICATE_OUT(1)) |
| |
| /* |
| * Post a user buffer as an overlay on top of the current kernel buffer. |
| */ |
| int t3_overlay_ubuf(struct sock *sk, const struct iovec *iov, |
| int nonblock, int rcv_flags, int modulate, int post_kbuf) |
| { |
| int len, ubuf_idx; |
| unsigned long flags; |
| struct ddp_state *p = DDP_STATE(sk); |
| |
| if (!p->ddp_setup || !p->pdev) |
| return -1; |
| |
| len = setup_iovec_ppods(sk, iov, 0); |
| if (len < 0) |
| return len; |
| |
| ubuf_idx = p->kbuf_idx; |
| p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP; |
| /* Use existing offset */ |
| /* Don't need to update .gl, user buffer isn't copied. */ |
| p->cur_buf = ubuf_idx; |
| |
| flags = select_ddp_flags(sk, ubuf_idx, nonblock, rcv_flags); |
| |
| if (post_kbuf) { |
| struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1]; |
| |
| dbs->cur_offset = 0; |
| dbs->flags = 0; |
| dbs->gl = p->kbuf[ubuf_idx ^ 1]; |
| p->kbuf_idx ^= 1; |
| flags |= p->kbuf_idx ? |
| V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_PUSH_DISABLE_1(0) : |
| V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_PUSH_DISABLE_0(0); |
| } |
| |
| if (ubuf_idx == 0) { |
| t3_overlay_ddpbuf(sk, 0, p->ubuf_tag << 6, p->kbuf_tag[1] << 6, |
| len); |
| t3_setup_ddpbufs(sk, 0, 0, p->kbuf[1]->length, 0, |
| flags, |
| OVERLAY_MASK | flags, 1); |
| } else { |
| t3_overlay_ddpbuf(sk, 1, p->kbuf_tag[0] << 6, p->ubuf_tag << 6, |
| len); |
| t3_setup_ddpbufs(sk, p->kbuf[0]->length, 0, 0, 0, |
| flags, |
| OVERLAY_MASK | flags, 1); |
| } |
| #ifdef T3_TRACE |
| T3_TRACE5(TIDTB(sk), |
| "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d " |
| " kbuf_idx %d", |
| p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx); |
| #endif |
| return 0; |
| } |
| |
| /* |
| * Clean up DDP state that needs to survive until socket close time, such as the |
| * DDP buffers. The buffers are already unmapped at this point as unmapping |
| * needs the PCI device and a socket may close long after the device is removed. |
| */ |
| void t3_cleanup_ddp(struct sock *sk) |
| { |
| struct ddp_state *p = DDP_STATE(sk); |
| int idx; |
| |
| if (!p->ddp_setup) |
| return; |
| |
| for (idx = 0; idx < NUM_DDP_KBUF; idx++) |
| if (p->kbuf[idx]) { |
| ddp_gl_free_pages(p->kbuf[idx]); |
| kfree(p->kbuf[idx]); |
| } |
| |
| if (p->ubuf) { |
| ddp_gl_free_pages(p->ubuf); |
| kfree(p->ubuf); |
| } |
| p->ddp_setup = 0; |
| } |
| |
| /* |
| * This is a companion to t3_cleanup_ddp() and releases the HW resources |
| * associated with a connection's DDP state, such as the page pods. |
| * It's called when HW is done with a connection. The rest of the state |
| * remains available until both HW and the app are done with the connection. |
| */ |
| void t3_release_ddp_resources(struct sock *sk) |
| { |
| struct ddp_state *p = DDP_STATE(sk); |
| |
| if (p->ddp_setup) { |
| struct tom_data *d = TOM_DATA(CPL_IO_STATE(sk)->toedev); |
| int idx; |
| |
| for (idx = 0; idx < NUM_DDP_KBUF; idx++) { |
| t3_free_ppods(d, p->kbuf_tag[idx], |
| p->kbuf_nppods[idx]); |
| unmap_ddp_gl(p->pdev, p->kbuf[idx]); |
| } |
| |
| if (p->ubuf_nppods) { |
| t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods); |
| p->ubuf_nppods = 0; |
| } |
| if (p->ubuf) |
| unmap_ddp_gl(p->pdev, p->ubuf); |
| |
| p->pdev = NULL; |
| } |
| } |
| #if 0 |
| void t3_post_kbuf(struct sock *sk, int modulate) |
| { |
| struct ddp_state *p = DDP_STATE(sk); |
| |
| t3_set_ddp_tag(sk, 0, p->kbuf_tag[0] << 6); |
| t3_set_ddp_buf(sk, 0, 0, p->kbuf[0]->length); |
| t3_repost_kbuf(sk, modulate, 1); |
| |
| #ifdef T3_TRACE |
| T3_TRACE1(TIDTB(sk), |
| "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf); |
| #endif |
| } |
| #else |
| void t3_post_kbuf(struct sock *sk, int modulate, int nonblock) |
| { |
| struct ddp_state *p = DDP_STATE(sk); |
| |
| t3_set_ddp_tag(sk, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6); |
| t3_set_ddp_buf(sk, p->cur_buf, 0, p->kbuf[p->cur_buf]->length); |
| t3_repost_kbuf(sk, p->cur_buf, modulate, 1, nonblock); |
| |
| |
| #ifdef T3_TRACE |
| T3_TRACE1(TIDTB(sk), |
| "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf); |
| #endif |
| } |
| #endif |
| |
| /* |
| * Prepare a socket for DDP. Must be called when the socket is known to be |
| * open. |
| */ |
| int t3_enter_ddp(struct sock *sk, unsigned int kbuf_size, unsigned int waitall, int nonblock) |
| { |
| int i, err = -ENOMEM; |
| unsigned int nppods, kbuf_pages, idx, dack_mode = 0; |
| struct ddp_state *p = DDP_STATE(sk); |
| struct toedev *tdev = CPL_IO_STATE(sk)->toedev; |
| struct tom_data *d = TOM_DATA(tdev); |
| |
| if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN) |
| return -EINVAL; |
| |
| kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| nppods = pages2ppods(kbuf_pages); |
| |
| /* start the initialization of DDP state */ |
| BUG_ON(p->ddp_setup); |
| memset((void *)p, 0, sizeof *p); |
| p->ddp_setup = 1; |
| |
| p->pdev = d->pdev; |
| p->kbuf_noinval = !!waitall; |
| |
| p->kbuf_tag[NUM_DDP_KBUF - 1] = -1; |
| for (idx = 0; idx < NUM_DDP_KBUF; idx++) { |
| p->kbuf[idx] = |
| kmalloc(sizeof (struct ddp_gather_list) + kbuf_pages * |
| (sizeof(dma_addr_t) + sizeof(struct page *)), |
| GFP_KERNEL); |
| if (!p->kbuf[idx]) |
| goto err; |
| |
| p->kbuf_tag[idx] = t3_alloc_ppods(d, nppods); |
| if (p->kbuf_tag[idx] < 0) |
| goto err; |
| |
| p->kbuf_nppods[idx] = nppods; |
| p->kbuf[idx]->length = kbuf_size; |
| p->kbuf[idx]->offset = 0; |
| p->kbuf[idx]->nelem = kbuf_pages; |
| p->kbuf[idx]->pages = |
| (struct page **)&p->kbuf[idx]->phys_addr[kbuf_pages]; |
| |
| for (i = 0; i < kbuf_pages; ++i) { |
| p->kbuf[idx]->pages[i] = alloc_page(sk->sk_allocation); |
| if (!p->kbuf[idx]->pages[i]) { |
| p->kbuf[idx]->nelem = i; |
| goto err; |
| } |
| } |
| |
| for (i = 0; i < kbuf_pages; ++i) { |
| p->kbuf[idx]->phys_addr[i] = |
| pci_map_page(p->pdev, p->kbuf[idx]->pages[i], |
| 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); |
| if (unlikely(t3_pci_dma_mapping_error(p->pdev, |
| p->kbuf[idx]->phys_addr[i]))) { |
| err = -ENOMEM; |
| goto err; |
| } |
| } |
| t3_setup_ppods(sk, p->kbuf[idx], nppods, p->kbuf_tag[idx], |
| p->kbuf[idx]->length, 0, 0); |
| } |
| t3_set_ddp_tag(sk, 0, p->kbuf_tag[0] << 6); |
| t3_set_ddp_buf(sk, 0, 0, p->kbuf[0]->length); |
| t3_repost_kbuf(sk, 0, 0, 1, nonblock); |
| |
| dack_mode = t3_select_delack(sk); |
| |
| if (dack_mode == 1) { |
| t3_set_tcb_field(sk, W_TCB_T_FLAGS1, V_TF_RCV_COALESCE_ENABLE(1ULL)| |
| V_TF_DACK_MSS(1ULL)| |
| V_TF_DACK(1ULL), |
| (unsigned long long)TOM_TUNABLE(tdev,ddp_rcvcoalesce)| |
| V_TF_DACK(1ULL)); |
| } else if (dack_mode == 2) { |
| t3_set_tcb_field(sk, W_TCB_T_FLAGS1, V_TF_RCV_COALESCE_ENABLE(1ULL)| |
| V_TF_DACK_MSS(1ULL)| |
| V_TF_DACK(1ULL), |
| (unsigned long long)TOM_TUNABLE(tdev,ddp_rcvcoalesce)| |
| V_TF_DACK_MSS(1ULL)); |
| } else if (dack_mode == 3) { |
| t3_set_tcb_field(sk, W_TCB_T_FLAGS1, V_TF_RCV_COALESCE_ENABLE(1ULL)| |
| V_TF_DACK_MSS(1ULL)| |
| V_TF_DACK(1ULL), |
| (unsigned long long)TOM_TUNABLE(tdev,ddp_rcvcoalesce)| |
| V_TF_DACK(1ULL)| |
| V_TF_DACK(1ULL)); |
| } |
| |
| #ifdef T3_TRACE |
| T3_TRACE4(TIDTB(sk), |
| "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d", |
| kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]); |
| #endif |
| |
| return 0; |
| |
| err: |
| t3_release_ddp_resources(sk); |
| t3_cleanup_ddp(sk); |
| return err; |
| } |
| |
| int t3_ddp_copy(const struct sk_buff *skb, int offset, struct iovec *to, |
| int len) |
| { |
| int err, page_no, page_off; |
| struct ddp_gather_list *gl = skb_gl(skb); |
| |
| if (!gl->pages) { |
| dump_stack(); |
| BUG_ON(1); |
| } |
| |
| offset += gl->offset + skb_ulp_ddp_offset(skb); |
| page_no = offset >> PAGE_SHIFT; |
| page_off = offset & ~PAGE_MASK; |
| |
| while (len) { |
| int copy = min_t(int, len, PAGE_SIZE - page_off); |
| |
| err = memcpy_toiovec(to, page_address(gl->pages[page_no]) + |
| page_off, copy); |
| if (err) |
| return -EFAULT; |
| page_no++; |
| page_off = 0; |
| len -= copy; |
| } |
| return 0; |
| } |
| |
| /* Pagepod allocator */ |
| |
| /* |
| * Allocate n page pods. Returns -1 on failure or the page pod tag. |
| */ |
| int t3_alloc_ppods(struct tom_data *td, unsigned int n) |
| { |
| unsigned int i, j; |
| |
| if (unlikely(!td->ppod_map)) |
| return -1; |
| |
| spin_lock_bh(&td->ppod_map_lock); |
| /* |
| * Look for n consecutive available page pods. |
| * Make sure to guard from scanning beyond the table. |
| */ |
| for (i = 0; i + n - 1 < td->nppods; ) { |
| for (j = 0; j < n; ++j) /* scan ppod_map[i..i+n-1] */ |
| if (td->ppod_map[i + j]) { |
| i = i + j + 1; |
| goto next; |
| } |
| |
| memset(&td->ppod_map[i], 1, n); /* allocate range */ |
| spin_unlock_bh(&td->ppod_map_lock); |
| return i; |
| next: ; |
| } |
| spin_unlock_bh(&td->ppod_map_lock); |
| return -1; |
| } |
| |
| void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n) |
| { |
| /* No need to take ppod_lock here */ |
| memset(&td->ppod_map[tag], 0, n); |
| } |