| /* |
| * TCP offload support. |
| * |
| * Copyright (C) 2003-2009 Chelsio Communications. All rights reserved. |
| * |
| * Written by Dimitris Michailidis (dm@chelsio.com) |
| * |
| * This program is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this |
| * release for licensing terms and conditions. |
| */ |
| |
| #ifdef LINUX_2_4 |
| #include <linux/stddef.h> |
| #include <linux/netdevice.h> |
| #endif /* LINUX_2_4 */ |
| #include <linux/module.h> |
| #include <linux/notifier.h> |
| #include <linux/toedev.h> |
| #include <net/sock.h> |
| #include <net/tcp.h> |
| #include <net/offload.h> |
| #include <linux/if_vlan.h> |
| |
| #include "toe_compat.h" |
| |
| #ifndef RAW_NOTIFIER_HEAD |
| # define RAW_NOTIFIER_HEAD(name) struct notifier_block *name |
| # define raw_notifier_call_chain notifier_call_chain |
| # define raw_notifier_chain_register notifier_chain_register |
| # define raw_notifier_chain_unregister notifier_chain_unregister |
| #endif |
| |
| static DEFINE_MUTEX(notify_mutex); |
| static RAW_NOTIFIER_HEAD(listen_offload_notify_list); |
| |
| int register_listen_offload_notifier(struct notifier_block *nb) |
| { |
| int err; |
| |
| mutex_lock(¬ify_mutex); |
| err = raw_notifier_chain_register(&listen_offload_notify_list, nb); |
| mutex_unlock(¬ify_mutex); |
| return err; |
| } |
| EXPORT_SYMBOL(register_listen_offload_notifier); |
| |
| int unregister_listen_offload_notifier(struct notifier_block *nb) |
| { |
| int err; |
| |
| mutex_lock(¬ify_mutex); |
| err = raw_notifier_chain_unregister(&listen_offload_notify_list, nb); |
| mutex_unlock(¬ify_mutex); |
| return err; |
| } |
| EXPORT_SYMBOL(unregister_listen_offload_notifier); |
| |
| #if defined(CONFIG_TCP_OFFLOAD) || \ |
| (defined(CONFIG_TCP_OFFLOAD_MODULE) && defined(MODULE)) |
| /* |
| * Called when an active open has been requested through connect(2). Decides |
| * if the connection may be offloaded based on the system's offload policies |
| * and the capabilities of the egress interface. |
| * |
| * Returns 1 if the connection is offloaded and 0 otherwise. |
| */ |
| int tcp_connect_offload(struct sock *sk) |
| { |
| struct net_device *netdev = __sk_dst_get(sk)->dev; |
| |
| if (netdev_is_offload(netdev)) { |
| struct toedev *dev = TOEDEV(netdev); |
| |
| if (!dev || !dev->can_offload(dev, sk)) |
| return 0; |
| if (dev->connect(dev, sk, netdev) == 0) { |
| offload_socket_ops(sk); |
| return 1; |
| } |
| } |
| return 0; |
| } |
| EXPORT_SYMBOL(tcp_connect_offload); |
| |
| /* |
| * TOE capable backlog handler. This is used for offloaded listening sockets |
| * so they can deal with non-IP (TOE) packets queued in their backlogs. We |
| * distinguish TOE from IP packets easily as the former lack network headers. |
| * Such TOE packets are fed to a TOE-specific backlog handler. |
| */ |
| static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb) |
| { |
| if (likely(skb->transport_header != skb->network_header)) |
| return tcp_v4_do_rcv(sk, skb); |
| BLOG_SKB_CB(skb)->backlog_rcv(sk, skb); |
| return 0; |
| } |
| |
| static int locally_bound_v4(const struct sock *sk) |
| { |
| #ifdef LINUX_2_4 |
| return LOOPBACK(sk->inet_rcv_saddr) ? 1 : 0; |
| #else |
| return ipv4_is_loopback(inet_sk(sk)->inet_rcv_saddr) ? 1 : 0; |
| #endif /* LINUX_2_4 */ |
| } |
| |
| /* |
| * Called when the SW stack has transitioned a socket to listen state. |
| * We check if the socket should be offloaded according to the current |
| * offloading policies, and if so, publish an OFFLOAD_LISTEN_START event. |
| */ |
| int start_listen_offload(struct sock *sk) |
| { |
| if (sk->sk_protocol != IPPROTO_TCP) |
| return -EPROTONOSUPPORT; |
| |
| // filter out loopback listens |
| if (locally_bound_v4(sk)) |
| return -EADDRNOTAVAIL; |
| |
| // Install a TOE capable backlog handler |
| sk->sk_backlog_rcv = listen_backlog_rcv; |
| |
| // if needed install offload-capable socket ops |
| offload_socket_ops(sk); |
| |
| mutex_lock(¬ify_mutex); |
| raw_notifier_call_chain(&listen_offload_notify_list, |
| OFFLOAD_LISTEN_START, sk); |
| mutex_unlock(¬ify_mutex); |
| return 0; |
| } |
| EXPORT_SYMBOL(start_listen_offload); |
| |
| /* |
| * Called when the SW stack is preparing to close an existing listening socket. |
| * We publish an OFFLOAD_LISTEN_STOP event. |
| */ |
| int stop_listen_offload(struct sock *sk) |
| { |
| if (sk->sk_protocol != IPPROTO_TCP) |
| return -EPROTONOSUPPORT; |
| |
| mutex_lock(¬ify_mutex); |
| raw_notifier_call_chain(&listen_offload_notify_list, |
| OFFLOAD_LISTEN_STOP, sk); |
| mutex_unlock(¬ify_mutex); |
| return 0; |
| } |
| EXPORT_SYMBOL(stop_listen_offload); |
| |
| void walk_listens(void *handle, int (*func)(void *handle, struct sock *sk)) |
| { |
| #ifdef CONFIG_CHELSIO_OFFLOAD_EXISTING_LISTENERS |
| /* |
| * Offloading existing listeners doesn't work in all configurations. |
| * Rather than try to confuse customers by describing when this can be |
| * done, we simply disable this code by default and tell customers |
| * that they will need to restart any services which they want |
| * offloaded _after_ the offload driver is installed. |
| */ |
| int i; |
| struct sock *sk; |
| |
| #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,28) |
| struct inet_listen_hashbucket *ilb; |
| struct hlist_nulls_node *node; |
| |
| mutex_lock(¬ify_mutex); |
| |
| for (i = 0; i < INET_LHTABLE_SIZE; i++) { |
| ilb = &tcp_hashinfo.listening_hash[i]; |
| spin_lock(&ilb->lock); |
| sk_nulls_for_each(sk, node, &ilb->head) { |
| if (sk->sk_family == PF_INET && locally_bound_v4(sk)) |
| continue; |
| if (func(dev, sk) < 0) { |
| spin_unlock(&ilb->lock); |
| goto out; |
| } |
| |
| } |
| spin_unlock(&ilb->lock); |
| } |
| out: |
| #elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) |
| struct hlist_node *node; |
| |
| mutex_lock(¬ify_mutex); |
| inet_listen_lock(&tcp_hashinfo); |
| |
| for (i = 0; i < INET_LHTABLE_SIZE; i++) |
| sk_for_each(sk, node, &tcp_hashinfo.listening_hash[i]) { |
| if (sk->sk_family == PF_INET && locally_bound_v4(sk)) |
| continue; |
| if (func(handle, sk) < 0) |
| goto out; |
| } |
| |
| out: inet_listen_unlock(&tcp_hashinfo); |
| #else |
| struct hlist_node *node; |
| |
| mutex_lock(¬ify_mutex); |
| tcp_listen_lock(); |
| |
| for (i = 0; i < TCP_LHTABLE_SIZE; i++) |
| sk_for_each(sk, node, &tcp_listening_hash[i]) { |
| if (sk->sk_family == PF_INET && locally_bound_v4(sk)) |
| continue; |
| if (func(dev, sk) < 0) |
| goto out; |
| } |
| |
| out: tcp_listen_unlock(); |
| #endif |
| mutex_unlock(¬ify_mutex); |
| #endif /* CONFIG_CHELSIO_OFFLOAD_EXISTING_LISTENERS */ |
| } |
| EXPORT_SYMBOL(walk_listens); |
| |
| static int run_opt_classifier(const struct offload_policy *h, |
| const struct offload_req *req) |
| { |
| const u32 *r = (const u32 *)req; |
| const u32 *ip = h->opt_prog_start; /* instruction pointer */ |
| |
| while (1) { |
| int off = ip[0] & 0xffff; |
| u32 data = r[off] & ip[3]; |
| const u32 *vals = ip + 4; |
| |
| for (off = ip[0] >> 16; off; off--, vals++) |
| if (*vals == data) { |
| off = ip[2]; |
| goto next; |
| } |
| off = ip[1]; |
| next: |
| if (off <= 0) |
| return -off; |
| ip += off; |
| } |
| } |
| |
| /* |
| * Note that the caller is responsible to call rcu_read_unlock(). |
| * linux-2.4: the caller is responsible to call read_unlock() |
| */ |
| const struct offload_settings *lookup_ofld_policy(const struct toedev *dev, |
| const struct offload_req *req, |
| int cop_managed_offloading) |
| { |
| static struct offload_settings no_cop_default_settings = { |
| 1, -1, -1, -1, -1, -1 |
| }; |
| static struct offload_settings cop_default_settings = { |
| 0, -1, -1, -1, -1, -1 |
| }; |
| |
| int match; |
| const struct offload_policy *policy; |
| |
| #ifndef LINUX_2_4 |
| rcu_read_lock(); |
| policy = rcu_dereference(dev->policy); |
| #else |
| read_lock(&dev->policy_lock); |
| policy = dev->policy; |
| #endif |
| if (!policy) |
| return (cop_managed_offloading |
| ? &cop_default_settings |
| : &no_cop_default_settings); |
| if (policy->match_all >= 0) |
| match = policy->match_all; |
| else |
| match = run_opt_classifier(policy, req); |
| printk(KERN_DEBUG "match = %d\n", match); |
| return &policy->settings[match]; |
| } |
| EXPORT_SYMBOL(lookup_ofld_policy); |
| |
| void offload_req_from_sk(struct offload_req *req, struct sock *sk, int otype) |
| { |
| const struct dst_entry *dst; |
| const struct net_device *ndev; |
| |
| #ifndef LINUX_2_4 |
| req->sip[0] = inet_sk(sk)->inet_rcv_saddr; |
| #else |
| req->sip[0] = sk->inet_rcv_saddr; |
| #endif |
| req->sip[1] = req->sip[2] = req->sip[3] = 0; |
| #ifndef LINUX_2_4 |
| req->dip[0] = inet_sk(sk)->inet_daddr; |
| #else |
| req->dip[0] = sk->inet_daddr; |
| #endif |
| req->dip[1] = req->dip[2] = req->dip[3] = 0; |
| #ifndef LINUX_2_4 |
| req->sport = inet_sk(sk)->inet_sport; |
| req->dport = inet_sk(sk)->inet_dport; |
| #else |
| req->sport = sk->inet_sport; |
| req->dport = sk->inet_dport; |
| #endif |
| req->ipvers_opentype = (otype << 4) | (sk->sk_family == AF_INET ? 4:6); |
| req->tos = inet_sk(sk)->tos; |
| |
| dst = __sk_dst_get(sk); |
| ndev = dst ? dst->neighbour->dev : NULL; |
| if (dst && (ndev->priv_flags & IFF_802_1Q_VLAN)) |
| req->vlan = htons(vlan_dev_vlan_id(ndev) & VLAN_VID_MASK); |
| else |
| req->vlan = htons(0xfff); |
| #ifdef SO_MARK |
| req->mark = sk->sk_mark; |
| #else |
| req->mark = 0; |
| #endif |
| } |
| EXPORT_SYMBOL(offload_req_from_sk); |
| |
| #ifndef LINUX_2_4 |
| static void rcu_free_policy(struct rcu_head *h) |
| { |
| kfree(container_of(h, struct offload_policy, rcu_head)); |
| } |
| |
| static inline void free_policy(struct offload_policy *policy) |
| { |
| if (policy) |
| call_rcu(&policy->rcu_head, rcu_free_policy); |
| } |
| #endif |
| |
| int set_offload_policy(struct toedev *dev, const struct ofld_policy_file *f) |
| { |
| unsigned int len; |
| struct offload_policy *p = NULL, *oldpolicy; |
| |
| if (f) { |
| len = (f->nrules + 1) * sizeof(struct offload_settings) + |
| f->prog_size * sizeof(struct ofld_prog_inst) + |
| f->opt_prog_size * sizeof(u32); |
| |
| p = kmalloc(len + sizeof(*p), GFP_KERNEL); |
| if (!p) |
| return -ENOMEM; |
| |
| #ifndef LINUX_2_4 |
| INIT_RCU_HEAD(&p->rcu_head); |
| #endif |
| p->match_all = f->output_everything; |
| p->use_opt = 1; |
| memcpy(p->prog, f->prog, len); |
| p->opt_prog_start = (const u32 *)&p->prog[f->prog_size]; |
| p->settings = (void *)&p->opt_prog_start[f->opt_prog_size]; |
| } |
| oldpolicy = dev->policy; |
| #ifndef LINUX_2_4 |
| rcu_assign_pointer(dev->policy, p); |
| free_policy(oldpolicy); |
| #else |
| write_lock(&dev->policy_lock); |
| dev->policy = p; |
| write_unlock(&dev->policy_lock); |
| #endif |
| return 0; |
| } |
| EXPORT_SYMBOL(set_offload_policy); |
| |
| #if defined(CONFIG_TCP_OFFLOAD) |
| /* If modular there's a separate definition in module_support.c */ |
| void security_inet_conn_estab(struct sock *sk, struct sk_buff *skb) |
| { |
| #ifdef CONFIG_SECURITY_NETWORK |
| security_inet_conn_established(sk, skb); |
| #endif |
| } |
| EXPORT_SYMBOL(security_inet_conn_estab); |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) |
| EXPORT_SYMBOL(skb_splice_bits); |
| #endif |
| #endif |
| |
| #endif |