blob: 5615e0ce0bcb4cd64a68abac121f93caabbbbe49 [file] [log] [blame]
/* ***************************************************************
*
* (C) 2004-13 - ntop.org
*
* This code includes contributions courtesy of
* - Amit D. Chaudhary <amit_ml@rajgad.com>
* - Andrew Gallatin <gallatyn@myri.com>
* - Brad Doctor <brad@stillsecure.com>
* - Felipe Huici <felipe.huici@nw.neclab.eu>
* - Francesco Fusco <fusco@ntop.org> (IP defrag)
* - Helmut Manck <helmut.manck@secunet.com>
* - Hitoshi Irino <irino@sfc.wide.ad.jp> (IPv6 support)
* - Jakov Haron <jyh@cabel.net>
* - Jeff Randall <jrandall@nexvu.com>
* - Kevin Wormington <kworm@sofnet.com>
* - Mahdi Dashtbozorgi <rdfm2000@gmail.com>
* - Marketakis Yannis <marketak@ics.forth.gr>
* - Matthew J. Roth <mroth@imminc.com>
* - Michael Stiller <ms@2scale.net> (VM memory support)
* - Noam Dev <noamdev@gmail.com>
* - Siva Kollipara <siva@cs.arizona.edu>
* - Vincent Carrier <vicarrier@wanadoo.fr>
* - Eugene Bogush <b_eugene@ukr.net>
* - Samir Chang <coobyhb@gmail.com>
* - Ury Stankevich <urykhy@gmail.com>
* - Raja Mukerji <raja@mukerji.com>
* - Davide Viti <zinosat@tiscali.it>
* - Will Metcalf <william.metcalf@gmail.com>
* - Godbach <nylzhaowei@gmail.com>
* - Nicola Bonelli <bonelli@antifork.org>
* - Jan Alsenz
* - valxdater@seznam.cz
* - Vito Piserchia <vpiserchia@metatype.it>
* - Guo Chen <johncg1983@gmail.com>
* - Dan Kruchinin <dkruchinin@acm.org>
* - Andreas Tsopelas <tsopelas@kth.se>
* - Alex Aronson <alexa@silicom.co.il>
* - Piotr Romanus <promanus@crossbeamsys.com>
* - Lior Okman <lior.okman@insightix.com>
* - Fedor Sakharov <fedor.sakharov@gmail.com>
* - Daniel Christopher <Chris.Daniel@visualnetworksystems.com>
* - Martin Holste <mcholste@gmail.com>
* - Eric Leblond <eric@regit.org>
* - Momina Khan <momina.azam@gmail.com>
* - XTao <xutao881001@gmail.com>
* - James Juran <james.juran@mandiant.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
#include <linux/version.h>
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
#error **********************************************************************
#error * PF_RING works on kernel 2.6.18 or newer. Please update your kernel *
#error **********************************************************************
#endif
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18))
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,33))
#include <generated/autoconf.h>
#else
#include <linux/autoconf.h>
#endif
#else
#include <linux/config.h>
#endif
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/kernel.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/in6.h>
#include <linux/init.h>
#include <linux/filter.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/proc_fs.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <net/xfrm.h>
#include <net/sock.h>
#include <asm/io.h> /* needed for virt_to_phys() */
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
#include <net/ip.h>
#include <net/ipv6.h>
#include <linux/pci.h>
#include <asm/shmparam.h>
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,30))
#include <linux/eventfd.h>
#define VPFRING_SUPPORT
#endif
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31))
#define I82599_HW_FILTERING_SUPPORT
#endif
#include <linux/pf_ring.h>
#ifndef SVN_REV
#define SVN_REV ""
#endif
/* ************************************************* */
#define TH_FIN_MULTIPLIER 0x01
#define TH_SYN_MULTIPLIER 0x02
#define TH_RST_MULTIPLIER 0x04
#define TH_PUSH_MULTIPLIER 0x08
#define TH_ACK_MULTIPLIER 0x10
#define TH_URG_MULTIPLIER 0x20
/* ************************************************* */
#define PROC_INFO "info"
#define PROC_DEV "dev"
#define PROC_STATS "stats"
#define PROC_RULES "rules"
#define PROC_PLUGINS_INFO "plugins_info"
/* ************************************************* */
const static ip_addr ip_zero = { IN6ADDR_ANY_INIT };
static u_int8_t pfring_enabled = 1;
/* Dummy 'any' device */
static ring_device_element any_device_element, none_device_element;
/* List of all ring sockets. */
static lockless_list ring_table;
static u_int ring_table_size;
/*
List where we store pointers that we need to remove in
a delayed fashion when we're done with all operations
*/
static lockless_list delayed_memory_table;
/* Protocol hook */
static struct packet_type prot_hook;
/*
For each device, pf_ring keeps a list of the number of
available ring socket slots. So that a caller knows in advance whether
there are slots available (for rings bound to such device)
that can potentially host the packet
*/
static struct list_head device_ring_list[MAX_NUM_DEVICES];
/* List of virtual filtering devices */
static struct list_head virtual_filtering_devices_list;
static rwlock_t virtual_filtering_lock =
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39))
RW_LOCK_UNLOCKED
#else
(rwlock_t)__RW_LOCK_UNLOCKED(virtual_filtering_lock)
#endif
;
/* List of all clusters */
static lockless_list ring_cluster_list;
/* List of all devices on which PF_RING has been registered */
static struct list_head ring_aware_device_list; /* List of ring_device_element */
/* quick mode <if_index, channel> to <ring> table */
static struct pf_ring_socket* device_rings[MAX_NUM_IFIDX][MAX_NUM_RX_CHANNELS] = { { NULL } };
/* Keep track of number of rings per device (plus any) */
static u_int8_t num_rings_per_device[MAX_NUM_IFIDX] = { 0 };
static u_int8_t num_any_rings = 0;
/*
Fragment handling for clusters
As in a cluster packet fragments cannot be hashed, we have a cache where we can keep
the association between the IP packet identifier and the balanced application.
*/
static u_int16_t num_cluster_fragments = 0;
static u_int32_t num_cluster_discarded_fragments = 0;
static unsigned long next_fragment_purge_jiffies = 0;
static struct hash_fragment_node *cluster_fragment_hash[NUM_FRAGMENTS_HASH_SLOTS] = { NULL };
static rwlock_t cluster_fragments_lock =
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39))
RW_LOCK_UNLOCKED
#else
(rwlock_t)__RW_LOCK_UNLOCKED(cluster_fragments_lock)
#endif
;
/* List of all DNA (Direct NIC Access) devices */
static struct list_head ring_dna_devices_list;
static u_int dna_devices_list_size = 0;
/* List of all plugins */
static u_int plugin_registration_size = 0;
static struct pfring_plugin_registration *plugin_registration[MAX_PLUGIN_ID] = { NULL };
static u_short max_registered_plugin_id = 0;
/* List of userspace rings */
static struct list_head userspace_ring_list;
static rwlock_t userspace_ring_lock =
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39))
RW_LOCK_UNLOCKED
#else
(rwlock_t) __RW_LOCK_UNLOCKED(userspace_ring_lock)
#endif
;
/* List of DNA clusters */
static struct list_head dna_cluster_list;
static rwlock_t dna_cluster_lock =
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39))
RW_LOCK_UNLOCKED
#else
(rwlock_t) __RW_LOCK_UNLOCKED(dna_cluster_lock)
#endif
;
/* List of generic cluster referees */
static struct list_head cluster_referee_list;
static rwlock_t cluster_referee_lock =
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39))
RW_LOCK_UNLOCKED
#else
(rwlock_t) __RW_LOCK_UNLOCKED(cluster_referee_lock)
#endif
;
/* Dummy buffer used for loopback_test */
u_int32_t loobpack_test_buffer_len = 4*1024*1024;
u_char *loobpack_test_buffer = NULL;
/* ********************************** */
/* /proc entry for ring module */
struct proc_dir_entry *ring_proc_dir = NULL, *ring_proc_dev_dir = NULL, *ring_proc_stats_dir = NULL;
struct proc_dir_entry *ring_proc = NULL;
struct proc_dir_entry *ring_proc_plugins_info = NULL;
static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
static int ring_proc_get_plugin_info(char *, char **, off_t, int, int *,
void *);
static void ring_proc_add(struct pf_ring_socket *pfr);
static void ring_proc_remove(struct pf_ring_socket *pfr);
static void ring_proc_init(void);
static void ring_proc_term(void);
static int reflect_packet(struct sk_buff *skb,
struct pf_ring_socket *pfr,
struct net_device *reflector_dev,
int displ, rule_action_behaviour behaviour,
u_int8_t do_clone_skb);
static void purge_idle_fragment_cache(void);
/* ********************************** */
static rwlock_t ring_mgmt_lock;
static inline void init_ring_readers(void) {
ring_mgmt_lock =
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39))
RW_LOCK_UNLOCKED
#else
(rwlock_t) __RW_LOCK_UNLOCKED(ring_mgmt_lock)
#endif
;
}
static inline void ring_write_lock(void) { write_lock_bh(&ring_mgmt_lock); }
static inline void ring_write_unlock(void) { write_unlock_bh(&ring_mgmt_lock); }
/* use ring_read_lock/ring_read_unlock in process context (a bottom half may use write_lock) */
static inline void ring_read_lock(void) { read_lock_bh(&ring_mgmt_lock); }
static inline void ring_read_unlock(void) { read_unlock_bh(&ring_mgmt_lock); }
/* use ring_read_lock_inbh/ring_read_unlock_inbh in bottom half contex */
static inline void ring_read_lock_inbh(void) { read_lock(&ring_mgmt_lock); }
static inline void ring_read_unlock_inbh(void) { read_unlock(&ring_mgmt_lock); }
/* ********************************** */
/*
Caveat
[http://lists.metaprl.org/pipermail/cs134-labs/2002-October/000025.html]
GFP_ATOMIC means roughly "make the allocation operation atomic". This
means that the kernel will try to find the memory using a pile of free
memory set aside for urgent allocation. If that pile doesn't have
enough free pages, the operation will fail. This flag is useful for
allocation within interrupt handlers.
GFP_KERNEL will try a little harder to find memory. There's a
possibility that the call to kmalloc() will sleep while the kernel is
trying to find memory (thus making it unsuitable for interrupt
handlers). It's much more rare for an allocation with GFP_KERNEL to
fail than with GFP_ATOMIC.
In all cases, kmalloc() should only be used allocating small amounts of
memory (a few kb). vmalloc() is better for larger amounts.
Also note that in lab 1 and lab 2, it would have been arguably better to
use GFP_KERNEL instead of GFP_ATOMIC. GFP_ATOMIC should be saved for
those instances in which a sleep would be totally unacceptable.
*/
/* ********************************** */
/* Forward */
static struct proto_ops ring_ops;
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
static struct proto ring_proto;
#endif
static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
u_int8_t real_skb, u_int8_t *skb_reference_in_use,
u_int32_t channel_id, u_int32_t num_rx_channels);
static int buffer_ring_handler(struct net_device *dev, char *data, int len);
static int remove_from_cluster(struct sock *sock, struct pf_ring_socket *pfr);
static int ring_map_dna_device(struct pf_ring_socket *pfr,
dna_device_mapping * mapping);
static int get_fragment_app_id(u_int32_t ipv4_src_host, u_int32_t ipv4_dst_host, u_int16_t fragment_id);
static void add_fragment_app_id(u_int32_t ipv4_src_host, u_int32_t ipv4_dst_host, u_int16_t fragment_id, u_int8_t app_id);
/* Extern */
extern
#if(LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23))
struct sk_buff *
#else
int
#endif
ip_defrag(struct sk_buff *skb, u32 user);
/* ********************************** */
/* Defaults */
static unsigned int min_num_slots = 4096;
static unsigned int perfect_rules_hash_size = DEFAULT_RING_HASH_SIZE;
static unsigned int enable_tx_capture = 1;
static unsigned int enable_ip_defrag = 0;
static unsigned int quick_mode = 0;
static unsigned int enable_debug = 0;
static unsigned int transparent_mode = standard_linux_path;
static atomic_t ring_id_serial = ATOMIC_INIT(0);
#ifdef REDBORDER_PATCH
char *bypass_interfaces[MAX_NUM_DEVICES] = { 0 };
#endif
#if defined(RHEL_RELEASE_CODE)
#if(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,8))
#define REDHAT_PATCHED_KERNEL
#endif
#endif
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)) || defined(REDHAT_PATCHED_KERNEL)
module_param(min_num_slots, uint, 0644);
module_param(perfect_rules_hash_size, uint, 0644);
module_param(transparent_mode, uint, 0644);
module_param(enable_debug, uint, 0644);
module_param(enable_tx_capture, uint, 0644);
module_param(enable_ip_defrag, uint, 0644);
module_param(quick_mode, uint, 0644);
#ifdef REDBORDER_PATCH
module_param_array(bypass_interfaces, charp, NULL, 0444);
#endif
#else
MODULE_PARM(min_num_slots, "i");
MODULE_PARM(perfect_rules_hash_size, "i");
MODULE_PARM(transparent_mode, "i");
MODULE_PARM(enable_debug, "i");
MODULE_PARM(enable_tx_capture, "i");
MODULE_PARM(enable_ip_defrag, "i");
MODULE_PARM(quick_mode, "i");
#endif
MODULE_PARM_DESC(min_num_slots, "Min number of ring slots");
MODULE_PARM_DESC(perfect_rules_hash_size, "Perfect rules hash size");
MODULE_PARM_DESC(transparent_mode,
"0=standard Linux, 1=direct2pfring+transparent, 2=direct2pfring+non transparent"
"For 1 and 2 you need to use a PF_RING aware driver");
MODULE_PARM_DESC(enable_debug, "Set to 1 to enable PF_RING debug tracing into the syslog");
MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
MODULE_PARM_DESC(enable_ip_defrag,
"Set to 1 to enable IP defragmentation"
"(only rx traffic is defragmentead)");
MODULE_PARM_DESC(quick_mode,
"Set to 1 to run at full speed but with up"
"to one socket per interface");
#ifdef REDBORDER_PATCH
MODULE_PARM_DESC(bypass_interfaces,
"Comma separated list of interfaces where bypass"
"will be enabled on link down");
#endif
/* ********************************** */
#define MIN_QUEUED_PKTS 64
#define MAX_QUEUE_LOOPS 64
#define ring_sk_datatype(__sk) ((struct pf_ring_socket *)__sk)
#define ring_sk(__sk) ((__sk)->sk_protinfo)
#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
/* ***************** Legacy code ************************ */
u_int get_num_rx_queues(struct net_device *dev) {
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
return(1);
#else
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38)) && defined(CONFIG_RPS)
return(min_val(dev->real_num_rx_queues, dev->real_num_tx_queues));
#else
return(dev->real_num_tx_queues);
// return(1);
#endif
#endif
}
#if defined(RHEL_MAJOR) && (RHEL_MAJOR == 5) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18))
/* Redhat backports these functions to 2.6.18 so do nothing */
#else
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23))
static inline void skb_reset_network_header(struct sk_buff *skb) {
/* skb->network_header = skb->data - skb->head; */
}
static inline void skb_reset_transport_header(struct sk_buff *skb) {
/* skb->transport_header = skb->data - skb->head; */
}
static inline void skb_set_network_header(struct sk_buff *skb, const int offset) {
skb_reset_network_header(skb);
/* skb->network_header += offset; */
}
#endif /* KERNEL_VERSION */
#endif /* RH_MAJOR */
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)) || (defined(RHEL_MAJOR) && (RHEL_MAJOR == 5) && (RHEL_MINOR < 2))
static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
{
return(struct iphdr *)skb->nh.iph;
}
#if(!defined(REDHAT_PATCHED_KERNEL)) || ((RHEL_MAJOR == 5) && (RHEL_MINOR < 2))
static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
{
skb->nh.iph = (struct iphdr *)skb->data + offset;
}
static inline void skb_reset_network_header(struct sk_buff *skb)
{
;
}
static inline void skb_reset_transport_header(struct sk_buff *skb)
{
;
}
#endif
#endif
/* ************************************************** */
#if defined(REDHAT_PATCHED_KERNEL)
/* Always the same RH crap */
#if((RHEL_MAJOR == 5) && (RHEL_MINOR <= 8 /* 5 */))
void msleep(unsigned int msecs)
{
unsigned long timeout = msecs_to_jiffies(msecs) + 1;
while (timeout)
timeout = schedule_timeout_uninterruptible(timeout);
}
#endif
#endif
/* ************************************************** */
void init_lockless_list(lockless_list *l) {
memset(l, 0, sizeof(lockless_list));
l->list_lock =
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39))
RW_LOCK_UNLOCKED
#else
(rwlock_t) __RW_LOCK_UNLOCKED(l->list_lock)
#endif
;
}
/* ************************************************** */
/* Return the index where the element has been add or -1 in case of no room left */
int lockless_list_add(lockless_list *l, void *elem) {
int i;
if(unlikely(enable_debug))
printk("[PF_RING] -> BEGIN %s() [total=%u]\n", __FUNCTION__, l->num_elements);
if(l->num_elements >= MAX_NUM_LIST_ELEMENTS) {
printk("[PF_RING] Exceeded the maximum number of list items\n");
return(-1); /* Too many */
}
/* I could avoid mutexes but ... */
write_lock_bh(&l->list_lock);
for(i=0; i<MAX_NUM_LIST_ELEMENTS; i++) {
void *old_slot_value;
/* Set l->list_elements[i]=elem if l->list_elements[i]=NULL */
old_slot_value = cmpxchg(&l->list_elements[i], NULL, elem);
if(old_slot_value == NULL)
break; /* We succeeded */
}
#if 0
/* 2 - Set the ring table bit */
if(l->list_elements[first_bit_unset] != NULL) {
/* Purge old element first, that was stored previously */
kfree(l->list_elements[first_bit_unset]);
}
#endif
if(l->top_element_id < i)
l->top_element_id = i;
l->num_elements++;
if(unlikely(enable_debug)) {
printk("[PF_RING] -> END %s() [total=%u][id=%u][top_element_id=%u]\n",
__FUNCTION__, l->num_elements, i, l->top_element_id);
for(i=0; i<MAX_NUM_LIST_ELEMENTS; i++) {
if(l->list_elements[i])
printk("[PF_RING] -> %s() [slot %u is full]\n", __FUNCTION__, i);
}
}
write_unlock_bh(&l->list_lock);
return(i);
}
/* ************************************************** */
/* http://community.topcoder.com/tc?module=Static&d1=tutorials&d2=bitManipulation */
/*
Return the index where the element has been add or -1 in case the element to
be removed was not found
NOTE: NO MEMORY IS FREED
*/
int lockless_list_remove(lockless_list *l, void *elem) {
int i, old_full_slot = -1;
if(unlikely(enable_debug))
printk("[PF_RING] -> BEGIN %s() [total=%u]\n", __FUNCTION__, l->num_elements);
if(l->num_elements == 0) return(-1); /* Not found */
write_lock_bh(&l->list_lock);
for(i=0; i<MAX_NUM_LIST_ELEMENTS; i++) {
if(l->list_elements[i] == elem) {
(void)xchg(&l->list_elements[i], NULL);
while((l->top_element_id > 0) && (l->list_elements[l->top_element_id] == NULL))
l->top_element_id--;
l->num_elements--, old_full_slot = i;
break;
}
}
if(unlikely(enable_debug)) {
printk("[PF_RING] -> END %s() [total=%u][top_element_id=%u]\n", __FUNCTION__, l->num_elements, l->top_element_id);
for(i=0; i<MAX_NUM_LIST_ELEMENTS; i++) {
if(l->list_elements[i])
printk("[PF_RING] -> %s() [slot %u is full]\n", __FUNCTION__, i);
}
}
write_unlock_bh(&l->list_lock);
wmb();
return(old_full_slot);
}
/* ************************************************** */
void* lockless_list_get_next(lockless_list *l, u_int32_t *last_idx) {
while(*last_idx <= l->top_element_id) {
void *elem;
elem = l->list_elements[*last_idx];
(*last_idx)++;
if(elem != NULL)
return(elem);
}
return(NULL);
}
/* ************************************************** */
void* lockless_list_get_first(lockless_list *l, u_int32_t *last_idx) {
*last_idx = 0;
return(lockless_list_get_next(l, last_idx));
}
/* ************************************************** */
void lockless_list_empty(lockless_list *l, u_int8_t free_memory) {
int i;
if(free_memory) {
write_lock_bh(&l->list_lock);
for(i=0; i<MAX_NUM_LIST_ELEMENTS; i++) {
if(l->list_elements[i] != NULL) {
kfree(l->list_elements[i]);
l->list_elements[i] = NULL;
}
}
l->num_elements = 0;
write_unlock_bh(&l->list_lock);
wmb();
}
}
/* ************************************************** */
void term_lockless_list(lockless_list *l, u_int8_t free_memory) {
lockless_list_empty(l, free_memory);
}
/* ************************************************** */
static inline char* get_slot(struct pf_ring_socket *pfr, u_int32_t off) { return(&(pfr->ring_slots[off])); }
/* ********************************** */
static inline int get_next_slot_offset(struct pf_ring_socket *pfr, u_int32_t off)
{
struct pfring_pkthdr *hdr;
u_int32_t real_slot_size;
// smp_rmb();
hdr = (struct pfring_pkthdr*)get_slot(pfr, off);
real_slot_size = pfr->slot_header_len + hdr->caplen;
if(pfr->header_len == long_pkt_header)
real_slot_size += hdr->extended_hdr.parsed_header_len;
/* padding at the end of the packet (magic number added on insert) */
real_slot_size += sizeof(u_int16_t); /* RING_MAGIC_VALUE */
/* Align slot size to 64 bit */
real_slot_size = ALIGN(real_slot_size, sizeof(u_int64_t));
if((off + real_slot_size + pfr->slots_info->slot_len) > (pfr->slots_info->tot_mem - sizeof(FlowSlotInfo))) {
return 0;
}
return (off + real_slot_size);
}
/* ********************************** */
static inline u_int64_t num_queued_pkts(struct pf_ring_socket *pfr)
{
// smp_rmb();
if(pfr->ring_slots != NULL) {
/* 64-bit counters, no need to ahndle wrap
u_int64_t tot_insert = pfr->slots_info->tot_insert, tot_read = pfr->slots_info->tot_read;
if(tot_insert >= tot_read) {
return(tot_insert - tot_read);
} else {
return(((u_int64_t) - 1) + tot_insert - tot_read);
}
if(unlikely(enable_debug)) {
printk("[PF_RING] -> [tot_insert=%llu][tot_read=%llu]\n",
tot_insert, tot_read);
}
*/
return pfr->slots_info->tot_insert - pfr->slots_info->tot_read;
} else
return(0);
}
/* ************************************* */
static inline u_int64_t num_kernel_queued_pkts(struct pf_ring_socket *pfr)
{
if(pfr->ring_slots != NULL) {
return pfr->slots_info->tot_insert - pfr->slots_info->kernel_tot_read;
} else
return(0);
}
/* ************************************* */
static inline u_int64_t get_num_ring_free_slots(struct pf_ring_socket * pfr)
{
u_int64_t nqpkts = num_queued_pkts(pfr);
if(nqpkts < (pfr->slots_info->min_num_slots))
return(pfr->slots_info->min_num_slots - nqpkts);
else
return(0);
}
/* ********************************** */
/*
Consume packets that have been read by userland but not
yet by kernel
*/
static void consume_pending_pkts(struct pf_ring_socket *pfr, u_int8_t synchronized)
{
while(pfr->slots_info->kernel_remove_off != pfr->slots_info->remove_off &&
/* one slot back (pfring_mod_send_last_rx_packet is called after pfring_recv has updated remove_off) */
(synchronized || pfr->slots_info->remove_off != get_next_slot_offset(pfr, pfr->slots_info->kernel_remove_off))) {
struct pfring_pkthdr *hdr = (struct pfring_pkthdr*) &pfr->ring_slots[pfr->slots_info->kernel_remove_off];
if(unlikely(enable_debug))
printk("[PF_RING] Original offset [kernel_remove_off=%u][remove_off=%u][skb=%p]\n",
pfr->slots_info->kernel_remove_off,
pfr->slots_info->remove_off,
hdr->extended_hdr.tx.reserved);
if(hdr->extended_hdr.tx.reserved != NULL) {
/* Can't forward the packet on the same interface it has been received */
if(hdr->extended_hdr.tx.bounce_interface == pfr->ring_netdev->dev->ifindex) {
hdr->extended_hdr.tx.bounce_interface = UNKNOWN_INTERFACE;
}
if(hdr->extended_hdr.tx.bounce_interface != UNKNOWN_INTERFACE) {
/* Let's check if the last used device is still the prefered one */
if(pfr->tx.last_tx_dev_idx != hdr->extended_hdr.tx.bounce_interface) {
if(pfr->tx.last_tx_dev != NULL) {
dev_put(pfr->tx.last_tx_dev); /* Release device */
}
/* Reset all */
pfr->tx.last_tx_dev = NULL, pfr->tx.last_tx_dev_idx = UNKNOWN_INTERFACE;
pfr->tx.last_tx_dev = __dev_get_by_index(
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
&init_net,
#endif
hdr->extended_hdr.tx.bounce_interface);
if(pfr->tx.last_tx_dev != NULL) {
/* We have found the device */
pfr->tx.last_tx_dev_idx = hdr->extended_hdr.tx.bounce_interface;
dev_hold(pfr->tx.last_tx_dev); /* Prevent it from being freed */
}
}
if(pfr->tx.last_tx_dev) {
if(unlikely(enable_debug))
printk("[PF_RING] Bouncing packet to interface %d/%s\n",
hdr->extended_hdr.tx.bounce_interface,
pfr->tx.last_tx_dev->name);
reflect_packet(hdr->extended_hdr.tx.reserved, pfr,
pfr->tx.last_tx_dev, 0 /* displ */,
forward_packet_and_stop_rule_evaluation,
0 /* don't clone skb */);
} else {
kfree_skb(hdr->extended_hdr.tx.reserved); /* Free memory */
}
} else {
if(unlikely(enable_debug))
printk("[PF_RING] Freeing cloned (unforwarded) packet\n");
kfree_skb(hdr->extended_hdr.tx.reserved); /* Free memory */
}
}
hdr->extended_hdr.tx.reserved = NULL;
hdr->extended_hdr.tx.bounce_interface = UNKNOWN_INTERFACE;
pfr->slots_info->kernel_remove_off = get_next_slot_offset(pfr, pfr->slots_info->kernel_remove_off);
pfr->slots_info->kernel_tot_read++;
if(unlikely(enable_debug))
printk("[PF_RING] New offset [kernel_remove_off=%u][remove_off=%u]\n",
pfr->slots_info->kernel_remove_off,
pfr->slots_info->remove_off);
}
}
/* ********************************** */
static inline int check_free_ring_slot(struct pf_ring_socket *pfr)
{
u_int32_t remove_off;
// smp_rmb();
if(pfr->tx.enable_tx_with_bounce && pfr->header_len == long_pkt_header) /* fast-tx enabled */
remove_off = pfr->slots_info->kernel_remove_off;
else
remove_off = pfr->slots_info->remove_off;
if(pfr->slots_info->insert_off == remove_off) {
u_int64_t queued_pkts;
/*
Both insert and remove offset are set on the same slot.
We need to find out whether the memory is full or empty
*/
if(pfr->tx.enable_tx_with_bounce && pfr->header_len == long_pkt_header)
queued_pkts = num_kernel_queued_pkts(pfr);
else
queued_pkts = num_queued_pkts(pfr);
if(queued_pkts >= pfr->slots_info->min_num_slots)
return(0); /* Memory is full */
} else {
/* There are packets in the ring. We have to check whether we have
enough space to accommodate a new packet */
if(pfr->slots_info->insert_off < remove_off) {
/* Zero-copy recv: this prevents from overwriting packets while apps are processing them */
if((remove_off - pfr->slots_info->insert_off) < (2 * pfr->slots_info->slot_len))
return(0);
} else {
/* We have enough room for the incoming packet as after we insert a packet, the insert_off
offset is wrapped to the beginning in case the space remaining is less than slot_len
(i.e. the memory needed to accommodate a packet)
*/
/* Zero-copy recv: this prevents from overwriting packets while apps are processing them */
if((pfr->slots_info->tot_mem - sizeof(FlowSlotInfo) - pfr->slots_info->insert_off) < (2 * pfr->slots_info->slot_len) &&
remove_off == 0)
return(0);
}
}
return(1);
}
/* ********************************** */
#define IP_DEFRAG_RING 1234
/* Returns new sk_buff, or NULL */
static struct sk_buff *ring_gather_frags(struct sk_buff *skb)
{
#if(LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23))
skb
#else
int status
#endif
= ip_defrag(skb, IP_DEFRAG_RING);
if(
#if(LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23))
skb == NULL
#else
status
#endif
)
skb = NULL;
else
ip_send_check(ip_hdr(skb));
return(skb);
}
/* ********************************** */
static void ring_sock_destruct(struct sock *sk)
{
struct pf_ring_socket *pfr;
skb_queue_purge(&sk->sk_receive_queue);
if(!sock_flag(sk, SOCK_DEAD)) {
if(unlikely(enable_debug)) {
printk("[PF_RING] Attempt to release alive ring socket: %p\n", sk);
}
return;
}
pfr = ring_sk(sk);
if(pfr)
kfree(pfr);
}
/* ********************************** */
static void ring_proc_add(struct pf_ring_socket *pfr)
{
if((ring_proc_dir != NULL)
&& (pfr->sock_proc_name[0] == '\0')) {
snprintf(pfr->sock_proc_name, sizeof(pfr->sock_proc_name),
"%d-%s.%d", pfr->ring_pid,
pfr->ring_netdev->dev->name, pfr->ring_id);
create_proc_read_entry(pfr->sock_proc_name, 0 /* read-only */,
ring_proc_dir,
ring_proc_get_info, pfr);
if(unlikely(enable_debug))
printk("[PF_RING] Added /proc/net/pf_ring/%s\n", pfr->sock_proc_name);
ring_table_size++;
}
}
/* ********************************** */
static void ring_proc_remove(struct pf_ring_socket *pfr)
{
if((ring_proc_dir != NULL)
&& (pfr->sock_proc_name[0] != '\0')) {
if(unlikely(enable_debug))
printk("[PF_RING] Removing /proc/net/pf_ring/%s\n", pfr->sock_proc_name);
remove_proc_entry(pfr->sock_proc_name, ring_proc_dir);
if(unlikely(enable_debug))
printk("[PF_RING] Removed /proc/net/pf_ring/%s\n", pfr->sock_proc_name);
pfr->sock_proc_name[0] = '\0';
if(pfr->sock_proc_stats_name[0] != '\0') {
if(unlikely(enable_debug))
printk("[PF_RING] Removing /proc/net/pf_ring/stats/%s\n", pfr->sock_proc_stats_name);
remove_proc_entry(pfr->sock_proc_stats_name, ring_proc_stats_dir);
if(unlikely(enable_debug))
printk("[PF_RING] Removed /proc/net/pf_ring/stats/%s\n", pfr->sock_proc_stats_name);
pfr->sock_proc_stats_name[0] = '\0';
}
ring_table_size--;
}
}
/* ********************************** */
static int ring_proc_dev_get_info(char *buf, char **start, off_t offset,
int len, int *unused, void *data)
{
int rlen = 0;
if(data != NULL) {
ring_device_element *dev_ptr = (ring_device_element*)data;
struct net_device *dev = dev_ptr->dev;
char dev_buf[16] = { 0 }, *dev_family = "???";
if(dev_ptr->is_dna_device) {
switch(dev_ptr->dna_device_model) {
case intel_e1000:
dev_family = "Intel e1000"; break;
case intel_e1000e:
dev_family = "Intel e1000e"; break;
case intel_igb:
dev_family = "Intel igb"; break;
break;
case intel_igb_82580:
dev_family = "Intel igb 82580/i350 HW TS"; break;
break;
case intel_ixgbe:
dev_family = "Intel ixgbe"; break;
break;
case intel_ixgbe_82598:
dev_family = "Intel ixgbe 82598"; break;
break;
case intel_ixgbe_82599:
dev_family = "Intel ixgbe 82599"; break;
break;
case intel_ixgbe_82599_ts:
dev_family = "Silicom ixgbe 82599 HW TS"; break;
break;
}
} else {
switch(dev_ptr->device_type) {
case standard_nic_family: dev_family = "Standard NIC"; break;
case intel_82599_family: dev_family = "Intel 82599"; break;
}
}
rlen = sprintf(buf, "Name: %s\n", dev->name);
rlen += sprintf(buf+rlen, "Index: %d\n", dev->ifindex);
rlen += sprintf(buf+rlen, "Address: %02X:%02X:%02X:%02X:%02X:%02X\n",
dev->perm_addr[0], dev->perm_addr[1], dev->perm_addr[2],
dev->perm_addr[3], dev->perm_addr[4], dev->perm_addr[5]);
rlen += sprintf(buf+rlen, "Polling Mode: %s\n", dev_ptr->is_dna_device ? "DNA" : "NAPI/TNAPI");
switch(dev->type) {
case 1: strcpy(dev_buf, "Ethernet"); break;
case 772: strcpy(dev_buf, "Loopback"); break;
default: sprintf(dev_buf, "%d", dev->type); break;
}
rlen += sprintf(buf+rlen, "Type: %s\n", dev_buf);
rlen += sprintf(buf+rlen, "Family: %s\n", dev_family);
if(!dev_ptr->is_dna_device) {
if(dev->ifindex < MAX_NUM_IFIDX) {
rlen += sprintf(buf+rlen, "# Bound Sockets: %d\n",
num_rings_per_device[dev->ifindex]);
}
}
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31))
rlen += sprintf(buf+rlen, "Max # TX Queues: %d\n", dev->real_num_tx_queues);
#endif
rlen += sprintf(buf+rlen, "# Used RX Queues: %d\n",
dev_ptr->is_dna_device ? dev_ptr->num_dna_rx_queues : get_num_rx_queues(dev));
}
return rlen;
}
/* **************** 82599 ****************** */
static int i82599_generic_handler(struct pf_ring_socket *pfr,
hw_filtering_rule *rule, hw_filtering_rule_command request) {
int rc = -1;
#ifdef I82599_HW_FILTERING_SUPPORT
struct net_device *dev = pfr->ring_netdev->dev;
intel_82599_five_tuple_filter_hw_rule *ftfq_rule;
intel_82599_perfect_filter_hw_rule *perfect_rule;
struct ethtool_rxnfc cmd;
struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *) &cmd.fs;
if(dev == NULL) return(-1);
if((dev->ethtool_ops == NULL) || (dev->ethtool_ops->set_rxnfc == NULL)) return(-1);
if(unlikely(enable_debug))
printk("[PF_RING] hw_filtering_rule[%s][request=%d][%p]\n",
dev->name, request, dev->ethtool_ops->set_rxnfc);
memset(&cmd, 0, sizeof(struct ethtool_rxnfc));
switch (rule->rule_family_type) {
case intel_82599_five_tuple_rule:
ftfq_rule = &rule->rule_family.five_tuple_rule;
fsp->h_u.tcp_ip4_spec.ip4src = ftfq_rule->s_addr;
fsp->h_u.tcp_ip4_spec.psrc = ftfq_rule->s_port;
fsp->h_u.tcp_ip4_spec.ip4dst = ftfq_rule->d_addr;
fsp->h_u.tcp_ip4_spec.pdst = ftfq_rule->d_port;
fsp->flow_type = ftfq_rule->proto;
fsp->ring_cookie = ftfq_rule->queue_id;
fsp->location = rule->rule_id;
cmd.cmd = (request == add_hw_rule ? ETHTOOL_PFRING_SRXFTRLINS : ETHTOOL_PFRING_SRXFTRLDEL);
break;
case intel_82599_perfect_filter_rule:
perfect_rule = &rule->rule_family.perfect_rule;
fsp->ring_cookie = perfect_rule->queue_id;
fsp->location = rule->rule_id;
if(perfect_rule->s_addr) {
fsp->h_u.tcp_ip4_spec.ip4src = htonl(perfect_rule->s_addr);
fsp->m_u.tcp_ip4_spec.ip4src = 0xFFFFFFFF;
}
if(perfect_rule->d_addr) {
fsp->h_u.tcp_ip4_spec.ip4dst = htonl(perfect_rule->d_addr);
fsp->m_u.tcp_ip4_spec.ip4dst = 0xFFFFFFFF;
}
if(perfect_rule->s_port) {
fsp->h_u.tcp_ip4_spec.psrc = htons(perfect_rule->s_port);
fsp->m_u.tcp_ip4_spec.psrc = 0xFFFF;
}
if(perfect_rule->d_port) {
fsp->h_u.tcp_ip4_spec.pdst = htons(perfect_rule->d_port);
fsp->m_u.tcp_ip4_spec.pdst = 0xFFFF;
}
if(perfect_rule->vlan_id) {
fsp->h_ext.vlan_tci = perfect_rule->vlan_id;
fsp->m_ext.vlan_tci = 0xFFF; // VLANID meaningful, VLAN priority ignored
/* fsp->h_ext.vlan_etype
* fsp->m_ext.vlan_etype */
fsp->flow_type |= FLOW_EXT;
}
switch (perfect_rule->proto) {
case 6: /* TCP */
fsp->flow_type = TCP_V4_FLOW;
break;
case 132: /* SCTP */
fsp->flow_type = SCTP_V4_FLOW;
break;
case 17: /* UDP */
fsp->flow_type = UDP_V4_FLOW;
break;
default: /* * */
fsp->flow_type = IP_USER_FLOW;
break;
}
cmd.cmd = (request == add_hw_rule ? ETHTOOL_SRXCLSRLINS : ETHTOOL_SRXCLSRLDEL);
break;
default:
break;
}
if(cmd.cmd) {
rc = dev->ethtool_ops->set_rxnfc(dev, &cmd);
if(unlikely(enable_debug)
&& rule->rule_family_type == intel_82599_perfect_filter_rule
&& rc < 0) {
intel_82599_perfect_filter_hw_rule *perfect_rule = &rule->rule_family.perfect_rule;
printk("[DNA][DEBUG] %s() ixgbe_set_rxnfc(%d.%d.%d.%d:%d -> %d.%d.%d.%d:%d) returned %d\n",
__FUNCTION__,
perfect_rule->s_addr >> 24 & 0xFF, perfect_rule->s_addr >> 16 & 0xFF,
perfect_rule->s_addr >> 8 & 0xFF, perfect_rule->s_addr >> 0 & 0xFF,
perfect_rule->s_port & 0xFFFF,
perfect_rule->d_addr >> 24 & 0xFF, perfect_rule->d_addr >> 16 & 0xFF,
perfect_rule->d_addr >> 8 & 0xFF, perfect_rule->d_addr >> 0 & 0xFF,
perfect_rule->d_port & 0xFFFF,
rc);
}
}
#endif
return(rc);
}
/* ************************************* */
static int handle_hw_filtering_rule(struct pf_ring_socket *pfr,
hw_filtering_rule *rule,
hw_filtering_rule_command command) {
if(unlikely(enable_debug))
printk("[PF_RING] --> handle_hw_filtering_rule(command=%d)\n", command);
switch(rule->rule_family_type) {
case intel_82599_five_tuple_rule:
if(pfr->ring_netdev->hw_filters.filter_handlers.five_tuple_handler == NULL)
return(-EINVAL);
else
return(i82599_generic_handler(pfr, rule, command));
break;
case intel_82599_perfect_filter_rule:
if(pfr->ring_netdev->hw_filters.filter_handlers.perfect_filter_handler == NULL)
return(-EINVAL);
else
return(i82599_generic_handler(pfr, rule, command));
break;
case silicom_redirector_rule:
return(-EINVAL); /* handled in userland */
break;
}
return(-EINVAL);
}
/* ***************************************** */
#ifdef ENABLE_PROC_WRITE_RULE
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31))
static int ring_proc_dev_rule_read(char *buf, char **start, off_t offset,
int len, int *unused, void *data)
{
int rlen = 0;
if(data != NULL) {
ring_device_element *dev_ptr = (ring_device_element*)data;
struct net_device *dev = dev_ptr->dev;
rlen = sprintf(buf, "Name: %s\n", dev->name);
rlen += sprintf(buf+rlen, "# Filters: %d\n", dev_ptr->hw_filters.num_filters);
rlen += sprintf(buf+rlen, "\nFiltering Rules:\n"
"[perfect rule] +|-(rule_id,queue_id,vlan,tcp|udp,src_ip/mask,src_port,dst_ip/mask,dst_port)\n"
"Example:\t+(1,-1,0,tcp,192.168.0.10/32,25,10.6.0.0/16,0) (queue_id = -1 => drop)\n\n"
"[5 tuple rule] +|-(rule_id,queue_id,tcp|udp,src_ip,src_port,dst_ip,dst_port)\n"
"Example:\t+(1,-1,tcp,192.168.0.10,25,0.0.0.0,0)\n\n"
"Note:\n\t- queue_id = -1 => drop\n\t- 0 = ignore value\n");
}
return rlen;
}
#endif
/* ********************************** */
#ifdef ENABLE_PROC_WRITE_RULE
static void init_intel_82599_five_tuple_filter_hw_rule(u_int8_t queue_id, u_int8_t proto,
u_int32_t s_addr, u_int32_t d_addr,
u_int16_t s_port, u_int16_t d_port,
intel_82599_five_tuple_filter_hw_rule *rule) {
/* printk("init_intel_82599_five_tuple_filter_hw_rule()\n"); */
memset(rule, 0, sizeof(intel_82599_five_tuple_filter_hw_rule));
rule->queue_id = queue_id, rule->proto = proto;
rule->s_addr = s_addr, rule->d_addr = d_addr;
rule->s_port = s_port, rule->d_port = d_port;
}
/* ********************************** */
static void init_intel_82599_perfect_filter_hw_rule(u_int8_t queue_id,
u_int8_t proto, u_int16_t vlan,
u_int32_t s_addr, u_int8_t s_mask,
u_int32_t d_addr, u_int8_t d_mask,
u_int16_t s_port, u_int16_t d_port,
intel_82599_perfect_filter_hw_rule *rule) {
u_int32_t netmask;
/* printk("init_intel_82599_perfect_filter_hw_rule()\n"); */
memset(rule, 0, sizeof(intel_82599_perfect_filter_hw_rule));
rule->queue_id = queue_id, rule->vlan_id = vlan, rule->proto = proto;
rule->s_addr = s_addr;
if(s_mask == 32) netmask = 0xFFFFFFFF; else netmask = ~(0xFFFFFFFF >> s_mask);
rule->s_addr &= netmask;
rule->d_addr = d_addr;
if(d_mask == 32) netmask = 0xFFFFFFFF; else netmask = ~(0xFFFFFFFF >> d_mask);
rule->d_addr &= netmask;
rule->s_port = s_port, rule->d_port = d_port;
}
#endif /* ENABLE_PROC_WRITE_RULE */
/* ********************************** */
#ifdef ENABLE_PROC_WRITE_RULE
static int ring_proc_dev_rule_write(struct file *file,
const char __user *buffer,
unsigned long count, void *data)
{
char buf[128], add, proto[4] = { 0 };
ring_device_element *dev_ptr = (ring_device_element*)data;
int num, queue_id, vlan, rc, rule_id, protocol;
int s_a, s_b, s_c, s_d, s_mask, s_port;
int d_a, d_b, d_c, d_d, d_mask, d_port;
hw_filtering_rule_request rule;
u_int8_t found = 0;
int debug = 0;
if(data == NULL) return(0);
if(count > (sizeof(buf)-1)) count = sizeof(buf) - 1;
if(copy_from_user(buf, buffer, count)) return(-EFAULT);
buf[sizeof(buf)-1] = '\0', buf[count] = '\0';
if(unlikely(enable_debug)) printk("[PF_RING] ring_proc_dev_rule_write(%s)\n", buf);
num = sscanf(buf, "%c(%d,%d,%d,%c%c%c,%d.%d.%d.%d/%d,%d,%d.%d.%d.%d/%d,%d)",
&add, &rule_id, &queue_id, &vlan,
&proto[0], &proto[1], &proto[2],
&s_a, &s_b, &s_c, &s_d, &s_mask, &s_port,
&d_a, &d_b, &d_c, &d_d, &d_mask, &d_port);
if(unlikely(enable_debug))
printk("[PF_RING] ring_proc_dev_rule_write(%s): num=%d (1)\n", buf, num);
if(num == 19) {
if(proto[0] == 't')
protocol = 6; /* TCP */
else /* if(proto[0] == 'u') */
protocol = 17; /* UDP */
rule.rule.rule_id = rule_id;
init_intel_82599_perfect_filter_hw_rule(queue_id, protocol, vlan,
((s_a & 0xff) << 24) + ((s_b & 0xff) << 16) + ((s_c & 0xff) << 8) + (s_d & 0xff), s_mask,
((d_a & 0xff) << 24) + ((d_b & 0xff) << 16) + ((d_c & 0xff) << 8) + (d_d & 0xff), d_mask,
s_port, d_port, &rule.rule.rule_family.perfect_rule);
rule.rule.rule_family_type = intel_82599_perfect_filter_rule;
found = 1;
}
if(!found) {
num = sscanf(buf, "%c(%d,%d,%c%c%c,%d.%d.%d.%d,%d,%d.%d.%d.%d,%d)",
&add, &rule_id, &queue_id,
&proto[0], &proto[1], &proto[2],
&s_a, &s_b, &s_c, &s_d, &s_port,
&d_a, &d_b, &d_c, &d_d, &d_port);
if(unlikely(enable_debug))
printk("[PF_RING] ring_proc_dev_rule_write(%s): num=%d (2)\n", buf, num);
if(num == 16) {
if(proto[0] == 't')
protocol = 6; /* TCP */
else if(proto[0] == 'u')
protocol = 17; /* UDP */
else
protocol = 0; /* any */
rule.rule.rule_id = rule_id;
init_intel_82599_five_tuple_filter_hw_rule(queue_id, protocol,
((s_a & 0xff) << 24) + ((s_b & 0xff) << 16) + ((s_c & 0xff) << 8) + (s_d & 0xff),
((d_a & 0xff) << 24) + ((d_b & 0xff) << 16) + ((d_c & 0xff) << 8) + (d_d & 0xff),
s_port, d_port, &rule.rule.rule_family.five_tuple_rule);
rule.rule.rule_family_type = intel_82599_five_tuple_rule;
found = 1;
}
}
if(!found)
return(-1);
rule.command = (add == '+') ? add_hw_rule : remove_hw_rule;
rc = handle_hw_filtering_rule(dev_ptr->dev, &rule);
if(rc != -1) {
/* Rule programmed successfully */
if(add == '+')
dev_ptr->hw_filters.num_filters++, pfr->num_hw_filtering_rules++;
else {
if(dev_ptr->hw_filters.num_filters > 0)
dev_ptr->hw_filters.num_filters--;
pfr->num_hw_filtering_rules--;
}
}
return((int)count);
}
#endif
#endif
/* ********************************** */
static char* direction2string(packet_direction d) {
switch(d) {
case rx_and_tx_direction: return("RX+TX");
case rx_only_direction: return("RX only");
case tx_only_direction: return("TX only");
}
return("???");
}
/* ********************************** */
static char* sockmode2string(socket_mode m) {
switch(m) {
case send_and_recv_mode: return("RX+TX");
case recv_only_mode: return("RX only");
case send_only_mode: return("TX only");
}
return("???");
}
/* ********************************** */
static int ring_proc_get_info(char *buf, char **start, off_t offset,
int len, int *unused, void *data)
{
int rlen = 0;
FlowSlotInfo *fsi;
if(data == NULL) {
/* /proc/net/pf_ring/info */
rlen = sprintf(buf, "PF_RING Version : %s ($Revision: %s$)\n", RING_VERSION, SVN_REV);
rlen += sprintf(buf + rlen, "Total rings : %d\n", ring_table_size);
rlen += sprintf(buf + rlen, "\nStandard (non DNA) Options\n");
rlen += sprintf(buf + rlen, "Ring slots : %d\n", min_num_slots);
rlen += sprintf(buf + rlen, "Slot version : %d\n", RING_FLOWSLOT_VERSION);
rlen += sprintf(buf + rlen, "Capture TX : %s\n", enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
rlen += sprintf(buf + rlen, "IP Defragment : %s\n", enable_ip_defrag ? "Yes" : "No");
rlen += sprintf(buf + rlen, "Socket Mode : %s\n", quick_mode ? "Quick" : "Standard");
rlen += sprintf(buf + rlen, "Transparent mode : %s\n",
(transparent_mode == standard_linux_path ? "Yes [mode 0]" :
(transparent_mode == driver2pf_ring_transparent ? "Yes [mode 1]" : "No [mode 2]")));
rlen += sprintf(buf + rlen, "Total plugins : %d\n", plugin_registration_size);
purge_idle_fragment_cache();
rlen += sprintf(buf + rlen, "Cluster Fragment Queue : %u\n", num_cluster_fragments);
rlen += sprintf(buf + rlen, "Cluster Fragment Discard : %u\n", num_cluster_discarded_fragments);
} else {
/* Detailed statistics about a PF_RING */
struct pf_ring_socket *pfr = (struct pf_ring_socket *)data;
if(pfr) {
int num = 0;
struct list_head *ptr, *tmp_ptr;
fsi = pfr->slots_info;
rlen = sprintf(buf, "Bound Device(s) : ");
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(test_bit(dev_ptr->dev->ifindex, pfr->netdev_mask)) {
rlen += sprintf(buf + rlen, "%s%s", (num > 0) ? "," : "", dev_ptr->dev->name);
num++;
}
}
rlen += sprintf(buf + rlen, "\n");
rlen += sprintf(buf + rlen, "Active : %d\n", pfr->ring_active || pfr->dna_cluster);
rlen += sprintf(buf + rlen, "Breed : %s\n", (pfr->dna_device_entry != NULL) ? "DNA" : "Non-DNA");
rlen += sprintf(buf + rlen, "Sampling Rate : %d\n", pfr->sample_rate);
rlen += sprintf(buf + rlen, "Capture Direction : %s\n", direction2string(pfr->direction));
rlen += sprintf(buf + rlen, "Socket Mode : %s\n", sockmode2string(pfr->mode));
rlen += sprintf(buf + rlen, "Appl. Name : %s\n", pfr->appl_name ? pfr->appl_name : "<unknown>");
rlen += sprintf(buf + rlen, "IP Defragment : %s\n", enable_ip_defrag ? "Yes" : "No");
rlen += sprintf(buf + rlen, "BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled");
rlen += sprintf(buf + rlen, "# Sw Filt. Rules : %d\n", pfr->num_sw_filtering_rules);
rlen += sprintf(buf + rlen, "# Hw Filt. Rules : %d\n", pfr->num_hw_filtering_rules);
rlen += sprintf(buf + rlen, "Poll Pkt Watermark : %d\n", pfr->poll_num_pkts_watermark);
rlen += sprintf(buf + rlen, "Num Poll Calls : %u\n", pfr->num_poll_calls);
if(pfr->dna_device_entry != NULL) {
/* DNA */
rlen += sprintf(buf + rlen, "Channel Id : %d\n", pfr->dna_device_entry->dev.channel_id);
rlen += sprintf(buf + rlen, "Num RX Slots : %d\n", pfr->dna_device_entry->dev.mem_info.rx.packet_memory_num_slots);
rlen += sprintf(buf + rlen, "Num TX Slots : %d\n", pfr->dna_device_entry->dev.mem_info.tx.packet_memory_num_slots);
rlen += sprintf(buf + rlen, "Tot Memory : %u bytes\n",
( pfr->dna_device_entry->dev.mem_info.rx.packet_memory_num_chunks *
pfr->dna_device_entry->dev.mem_info.rx.packet_memory_chunk_len )
+(pfr->dna_device_entry->dev.mem_info.tx.packet_memory_num_chunks *
pfr->dna_device_entry->dev.mem_info.tx.packet_memory_chunk_len )
+ pfr->dna_device_entry->dev.mem_info.rx.descr_packet_memory_tot_len
+ pfr->dna_device_entry->dev.mem_info.tx.descr_packet_memory_tot_len);
if(pfr->dna_cluster && pfr->dna_cluster_type == cluster_master && pfr->dna_cluster->stats) {
rlen += sprintf(buf + rlen, "Cluster: Tot Recvd : %lu\n", (unsigned long)pfr->dna_cluster->stats->tot_rx_packets);
rlen += sprintf(buf + rlen, "Cluster: Tot Sent : %lu\n", (unsigned long)pfr->dna_cluster->stats->tot_tx_packets);
}
} else if(fsi != NULL) {
/* Standard PF_RING */
rlen += sprintf(buf + rlen, "Channel Id Mask : 0x%08X\n", pfr->channel_id_mask);
rlen += sprintf(buf + rlen, "Cluster Id : %d\n", pfr->cluster_id);
rlen += sprintf(buf + rlen, "Slot Version : %d [%s]\n", fsi->version, RING_VERSION);
rlen += sprintf(buf + rlen, "Min Num Slots : %d\n", fsi->min_num_slots);
rlen += sprintf(buf + rlen, "Bucket Len : %d\n", fsi->data_len);
rlen += sprintf(buf + rlen, "Slot Len : %d [bucket+header]\n", fsi->slot_len);
rlen += sprintf(buf + rlen, "Tot Memory : %d\n", fsi->tot_mem);
rlen += sprintf(buf + rlen, "Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts);
rlen += sprintf(buf + rlen, "Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost);
rlen += sprintf(buf + rlen, "Tot Insert : %lu\n", (unsigned long)fsi->tot_insert);
rlen += sprintf(buf + rlen, "Tot Read : %lu\n", (unsigned long)fsi->tot_read);
rlen += sprintf(buf + rlen, "Insert Offset : %lu\n", (unsigned long)fsi->insert_off);
rlen += sprintf(buf + rlen, "Remove Offset : %lu\n", (unsigned long)fsi->remove_off);
rlen += sprintf(buf + rlen, "TX: Send Ok : %lu\n", (unsigned long)fsi->good_pkt_sent);
rlen += sprintf(buf + rlen, "TX: Send Errors : %lu\n", (unsigned long)fsi->pkt_send_error);
rlen += sprintf(buf + rlen, "Reflect: Fwd Ok : %lu\n", (unsigned long)fsi->tot_fwd_ok);
rlen += sprintf(buf + rlen, "Reflect: Fwd Errors: %lu\n", (unsigned long)fsi->tot_fwd_notok);
rlen += sprintf(buf + rlen, "Num Free Slots : %lu\n", (unsigned long)get_num_ring_free_slots(pfr));
}
} else
rlen = sprintf(buf, "WARNING data == NULL\n");
}
return rlen;
}
/* ********************************** */
static int ring_proc_get_plugin_info(char *buf, char **start, off_t offset,
int len, int *unused, void *data)
{
int rlen = 0, i = 0;
struct pfring_plugin_registration *tmp = NULL;
/* FIXME: I should now the number of plugins registered */
if(!plugin_registration_size)
return rlen;
/* plugins_info */
rlen += sprintf(buf + rlen, "ID\tPlugin\n");
for(i = 0; i < MAX_PLUGIN_ID; i++) {
tmp = plugin_registration[i];
if(tmp) {
rlen += sprintf(buf + rlen, "%d\t%s [%s]\n",
tmp->plugin_id, tmp->name,
tmp->description);
}
}
return rlen;
}
/* ********************************** */
static void ring_proc_init(void)
{
ring_proc_dir = proc_mkdir("pf_ring",
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
init_net.
#endif
proc_net);
if(ring_proc_dir) {
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
ring_proc_dir->owner = THIS_MODULE;
#endif
ring_proc_dev_dir = proc_mkdir(PROC_DEV, ring_proc_dir);
ring_proc_stats_dir = proc_mkdir(PROC_STATS, ring_proc_dir);
ring_proc = create_proc_read_entry(PROC_INFO, 0 /* read-only */,
ring_proc_dir,
ring_proc_get_info, NULL);
ring_proc_plugins_info =
create_proc_read_entry(PROC_PLUGINS_INFO, 0 /* read-only */,
ring_proc_dir,
ring_proc_get_plugin_info, NULL);
if(!ring_proc || !ring_proc_plugins_info)
printk("[PF_RING] unable to register proc file\n");
else {
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
ring_proc->owner = THIS_MODULE;
ring_proc_plugins_info->owner = THIS_MODULE;
#endif
printk("[PF_RING] registered /proc/net/pf_ring/\n");
}
} else
printk("[PF_RING] unable to create /proc/net/pf_ring\n");
}
/* ********************************** */
static void ring_proc_term(void)
{
if(ring_proc != NULL) {
remove_proc_entry(PROC_INFO, ring_proc_dir);
if(unlikely(enable_debug)) printk("[PF_RING] removed /proc/net/pf_ring/%s\n", PROC_INFO);
remove_proc_entry(PROC_PLUGINS_INFO, ring_proc_dir);
if(unlikely(enable_debug)) printk("[PF_RING] removed /proc/net/pf_ring/%s\n", PROC_PLUGINS_INFO);
remove_proc_entry(PROC_STATS, ring_proc_dir);
remove_proc_entry(PROC_DEV, ring_proc_dir);
if(ring_proc_dir != NULL) {
remove_proc_entry("pf_ring",
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
init_net.
#endif
proc_net);
if(unlikely(enable_debug)) printk("[PF_RING] deregistered /proc/net/pf_ring\n");
}
}
}
/* ********************************** */
static char *allocate_shared_memory(u_int32_t *mem_len)
{
u_int32_t tot_mem = *mem_len;
char *shared_mem;
tot_mem = PAGE_ALIGN(tot_mem);
/* Alignment necessary on ARM platforms */
tot_mem += SHMLBA - (tot_mem % SHMLBA);
/* rounding size to the next power of 2 (needed by vPFRing) */
tot_mem--;
tot_mem |= tot_mem >> 1;
tot_mem |= tot_mem >> 2;
tot_mem |= tot_mem >> 4;
tot_mem |= tot_mem >> 8;
tot_mem |= tot_mem >> 16;
tot_mem++;
/* Memory is already zeroed */
shared_mem = vmalloc_user(tot_mem);
*mem_len = tot_mem;
return shared_mem;
}
/*
* Allocate ring memory used later on for
* mapping it to userland
*/
static int ring_alloc_mem(struct sock *sk)
{
u_int the_slot_len;
u_int32_t tot_mem;
struct pf_ring_socket *pfr = ring_sk(sk);
/* Userspace RING
* - producer attaching to a ring
* - or consumer re-opening an old ring already attachred */
if(pfr->userspace_ring != NULL
&& (pfr->userspace_ring_type == userspace_ring_producer
|| (pfr->userspace_ring_type == userspace_ring_consumer
&& pfr->userspace_ring->ring_memory != NULL))) {
if(pfr->userspace_ring->ring_memory == NULL)
return (-1); /* Consumr ring memory has not yet been allocated */
pfr->slot_header_len = pfr->userspace_ring->slot_header_len;
pfr->bucket_len = pfr->userspace_ring->bucket_len;
pfr->ring_memory = pfr->userspace_ring->ring_memory;
pfr->slots_info = (FlowSlotInfo *) pfr->ring_memory;
pfr->ring_slots = (char *) (pfr->ring_memory + sizeof(FlowSlotInfo));
pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
pfr->sw_filtering_rules_default_accept_policy = 1;
pfr->num_sw_filtering_rules = pfr->num_hw_filtering_rules = 0;
}
/* Check if the memory has been already allocated */
if(pfr->ring_memory != NULL) return(0);
if(unlikely(enable_debug))
printk("[PF_RING] ring_alloc_mem(bucket_len=%d)\n", pfr->bucket_len);
/* **********************************************
* *************************************
* * *
* * FlowSlotInfo *
* * *
* ************************************* <-+
* * FlowSlot * |
* ************************************* |
* * FlowSlot * |
* ************************************* +- >= min_num_slots
* * FlowSlot * |
* ************************************* |
* * FlowSlot * |
* ************************************* <-+
*
* ********************************************** */
if(pfr->header_len == short_pkt_header)
pfr->slot_header_len = sizeof(struct timeval) + sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_int64_t) /* ts+caplen+len+timestamp_ns */;
else
pfr->slot_header_len = sizeof(struct pfring_pkthdr);
the_slot_len = pfr->slot_header_len + pfr->bucket_len;
the_slot_len = ALIGN(the_slot_len + sizeof(u_int16_t) /* RING_MAGIC_VALUE */, sizeof(u_int64_t));
if(unlikely((UINT_MAX - sizeof(FlowSlotInfo)) / the_slot_len < min_num_slots)) {
printk("[PF_RING] ERROR: min_num_slots (%u, slot len = %u) causes memory size to wrap\n", min_num_slots, the_slot_len);
return(-1);
}
tot_mem = sizeof(FlowSlotInfo) + (min_num_slots * the_slot_len);
/* Memory is already zeroed */
pfr->ring_memory = allocate_shared_memory(&tot_mem);
if(pfr->ring_memory != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] successfully allocated %lu bytes at 0x%08lx\n",
(unsigned long)tot_mem, (unsigned long)pfr->ring_memory);
} else {
printk("[PF_RING] ERROR: not enough memory for ring\n");
return(-1);
}
pfr->slots_info = (FlowSlotInfo *) pfr->ring_memory;
pfr->ring_slots = (char *)(pfr->ring_memory + sizeof(FlowSlotInfo));
pfr->slots_info->version = RING_FLOWSLOT_VERSION;
pfr->slots_info->slot_len = the_slot_len;
pfr->slots_info->data_len = pfr->bucket_len;
pfr->slots_info->min_num_slots = (tot_mem - sizeof(FlowSlotInfo)) / the_slot_len;
pfr->slots_info->tot_mem = tot_mem;
pfr->slots_info->sample_rate = 1;
if(unlikely(enable_debug))
printk("[PF_RING] allocated %d slots [slot_len=%d][tot_mem=%u]\n",
pfr->slots_info->min_num_slots, pfr->slots_info->slot_len,
pfr->slots_info->tot_mem);
pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
pfr->sw_filtering_rules_default_accept_policy = 1;
pfr->num_sw_filtering_rules = pfr->num_hw_filtering_rules = 0;
/* UserSpace RING
* - consumer creating a new ring */
if((pfr->userspace_ring != NULL)
&& (pfr->userspace_ring_type == userspace_ring_consumer)) {
pfr->userspace_ring->slot_header_len = pfr->slot_header_len;
pfr->userspace_ring->bucket_len = pfr->bucket_len;
pfr->userspace_ring->tot_mem = pfr->slots_info->tot_mem;
pfr->userspace_ring->ring_memory = pfr->ring_memory;
}
return(0);
}
/* ********************************** */
/*
* ring_insert()
*
* store the sk in a new element and add it
* to the head of the list.
*/
static inline int ring_insert(struct sock *sk)
{
struct pf_ring_socket *pfr;
if(unlikely(enable_debug))
printk("[PF_RING] ring_insert()\n");
if(lockless_list_add(&ring_table, sk) == -1)
return -1;
pfr = (struct pf_ring_socket *)ring_sk(sk);
pfr->ring_pid = current->pid;
bitmap_zero(pfr->netdev_mask, MAX_NUM_DEVICES_ID), pfr->num_bound_devices = 0;
return 0;
}
/* ********************************** */
/*
* ring_remove()
*
* For each of the elements in the list:
* - check if this is the element we want to delete
* - if it is, remove it from the list, and free it.
*
* stop when we find the one we're looking for(break),
* or when we reach the end of the list.
*/
static inline void ring_remove(struct sock *sk_to_delete)
{
struct pf_ring_socket *pfr_to_delete = ring_sk(sk_to_delete);
u_int8_t master_found = 0, socket_found = 0;
u_int32_t last_list_idx;
struct sock *sk;
if(unlikely(enable_debug))
printk("[PF_RING] ring_remove()\n");
sk = (struct sock*)lockless_list_get_first(&ring_table, &last_list_idx);
while(sk != NULL) {
struct pf_ring_socket *pfr;
pfr = ring_sk(sk);
if(pfr->master_ring == pfr_to_delete) {
if(unlikely(enable_debug))
printk("[PF_RING] Removing master ring\n");
pfr->master_ring = NULL, master_found = 1;
} else if(sk == sk_to_delete) {
if(unlikely(enable_debug))
printk("[PF_RING] Found socket to remove\n");
socket_found = 1;
}
if(master_found && socket_found)
break;
else
sk = (struct sock*)lockless_list_get_next(&ring_table, &last_list_idx);
}
if(socket_found) {
lockless_list_remove(&ring_table, sk_to_delete);
} else
printk("[PF_RING] WARNING: Unable to find socket to remove!!!\n");
if(unlikely(enable_debug))
printk("[PF_RING] leaving ring_remove()\n");
}
/* ********************************** */
static inline u_int32_t hash_pkt(u_int16_t vlan_id, u_int8_t proto,
ip_addr host_peer_a, ip_addr host_peer_b,
u_int16_t port_peer_a, u_int16_t port_peer_b)
{
if(unlikely(enable_debug))
printk("[PF_RING] hash_pkt(vlan_id=%u, proto=%u, port_peer_a=%u, port_peer_b=%u)\n",
vlan_id,proto, port_peer_a, port_peer_b);
return(vlan_id+proto+
host_peer_a.v6.s6_addr32[0]+host_peer_a.v6.s6_addr32[1]+
host_peer_a.v6.s6_addr32[2]+host_peer_a.v6.s6_addr32[3]+
host_peer_b.v6.s6_addr32[0]+host_peer_b.v6.s6_addr32[1]+
host_peer_b.v6.s6_addr32[2]+host_peer_b.v6.s6_addr32[3]+
port_peer_a+port_peer_b);
}
/* ********************************** */
#define HASH_PKT_HDR_RECOMPUTE 1<<0
#define HASH_PKT_HDR_MASK_SRC 1<<1
#define HASH_PKT_HDR_MASK_DST 1<<2
#define HASH_PKT_HDR_MASK_PORT 1<<3
#define HASH_PKT_HDR_MASK_PROTO 1<<4
#define HASH_PKT_HDR_MASK_VLAN 1<<5
static inline u_int32_t hash_pkt_header(struct pfring_pkthdr * hdr, u_int32_t flags)
{
if(hdr->extended_hdr.pkt_hash == 0 || flags & HASH_PKT_HDR_RECOMPUTE) {
u_int8_t use_tunneled_peers = hdr->extended_hdr.parsed_pkt.tunnel.tunnel_id == NO_TUNNEL_ID ? 0 : 1;
hdr->extended_hdr.pkt_hash = hash_pkt(
(flags & HASH_PKT_HDR_MASK_VLAN) ? 0 : hdr->extended_hdr.parsed_pkt.vlan_id,
(flags & HASH_PKT_HDR_MASK_PROTO) ? 0 :
(use_tunneled_peers ? hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto
: hdr->extended_hdr.parsed_pkt.l3_proto),
(flags & HASH_PKT_HDR_MASK_SRC) ? ip_zero :
(use_tunneled_peers ? hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_src
: hdr->extended_hdr.parsed_pkt.ip_src),
(flags & HASH_PKT_HDR_MASK_DST) ? ip_zero :
(use_tunneled_peers ? hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_dst
: hdr->extended_hdr.parsed_pkt.ip_dst),
(flags & (HASH_PKT_HDR_MASK_SRC | HASH_PKT_HDR_MASK_PORT)) ? 0 :
(use_tunneled_peers ? hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_src_port
: hdr->extended_hdr.parsed_pkt.l4_src_port),
(flags & (HASH_PKT_HDR_MASK_DST | HASH_PKT_HDR_MASK_PORT)) ? 0 :
(use_tunneled_peers ? hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_dst_port
: hdr->extended_hdr.parsed_pkt.l4_dst_port));
}
return(hdr->extended_hdr.pkt_hash);
}
/* ******************************************************* */
static int parse_raw_pkt(u_char *data, u_int data_len,
struct pfring_pkthdr *hdr,
u_int16_t *ip_id,
u_int8_t *first_fragment,
u_int8_t *second_fragment)
{
struct ethhdr *eh = (struct ethhdr *)data;
u_int16_t displ = 0, ip_len, fragment_offset = 0, tunnel_offset = 0;
memset(&hdr->extended_hdr.parsed_pkt, 0, sizeof(hdr->extended_hdr.parsed_pkt));
/* Default */
hdr->extended_hdr.parsed_pkt.tunnel.tunnel_id = NO_TUNNEL_ID;
*ip_id = 0, *first_fragment = 0, *second_fragment = 0;
if(data_len < sizeof(struct ethhdr)) return(0);
/* MAC address */
memcpy(&hdr->extended_hdr.parsed_pkt.dmac, eh->h_dest, sizeof(eh->h_dest));
memcpy(&hdr->extended_hdr.parsed_pkt.smac, eh->h_source, sizeof(eh->h_source));
hdr->extended_hdr.parsed_pkt.eth_type = ntohs(eh->h_proto);
hdr->extended_hdr.parsed_pkt.offset.eth_offset = 0;
hdr->extended_hdr.parsed_pkt.offset.vlan_offset = 0;
hdr->extended_hdr.parsed_pkt.vlan_id = 0; /* Any VLAN */
if(hdr->extended_hdr.parsed_pkt.eth_type == ETH_P_8021Q /* 802.1q (VLAN) */) {
struct eth_vlan_hdr *vh;
hdr->extended_hdr.parsed_pkt.offset.vlan_offset = sizeof(struct ethhdr) - sizeof(struct eth_vlan_hdr);
while (hdr->extended_hdr.parsed_pkt.eth_type == ETH_P_8021Q /* 802.1q (VLAN) */) {
hdr->extended_hdr.parsed_pkt.offset.vlan_offset += sizeof(struct eth_vlan_hdr);
vh = (struct eth_vlan_hdr *) &data[hdr->extended_hdr.parsed_pkt.offset.vlan_offset];
hdr->extended_hdr.parsed_pkt.vlan_id = ntohs(vh->h_vlan_id) & 0x0fff;
hdr->extended_hdr.parsed_pkt.eth_type = ntohs(vh->h_proto);
displ += sizeof(struct eth_vlan_hdr);
}
}
hdr->extended_hdr.parsed_pkt.offset.l3_offset = hdr->extended_hdr.parsed_pkt.offset.eth_offset + displ + sizeof(struct ethhdr);
if(unlikely(enable_debug))
printk("[PF_RING] [eth_type=%04X]\n", hdr->extended_hdr.parsed_pkt.eth_type);
if(hdr->extended_hdr.parsed_pkt.eth_type == ETH_P_IP /* IPv4 */) {
struct iphdr *ip;
u_int16_t frag_off;
hdr->extended_hdr.parsed_pkt.ip_version = 4;
if(data_len < hdr->extended_hdr.parsed_pkt.offset.l3_offset + sizeof(struct iphdr)) return(0);
ip = (struct iphdr *)(&data[hdr->extended_hdr.parsed_pkt.offset.l3_offset]);
*ip_id = ip->id, frag_off = ntohs(ip->frag_off);
if(unlikely(enable_debug)) printk("[PF_RING] frag_off=%04X\n", frag_off);
if(frag_off & 0x1FFF /* Fragment offset */)
*second_fragment = 1; /* Packet offset > 0 */
else if(frag_off & 0x2000 /* More Fragments set */)
*first_fragment = 1;
hdr->extended_hdr.parsed_pkt.ipv4_src = ntohl(ip->saddr);
hdr->extended_hdr.parsed_pkt.ipv4_dst = ntohl(ip->daddr);
hdr->extended_hdr.parsed_pkt.l3_proto = ip->protocol;
hdr->extended_hdr.parsed_pkt.ipv4_tos = ip->tos;
fragment_offset = ip->frag_off & htons(IP_OFFSET); /* fragment, but not the first */
ip_len = ip->ihl*4;
} else if(hdr->extended_hdr.parsed_pkt.eth_type == ETH_P_IPV6 /* IPv6 */) {
struct ipv6hdr *ipv6;
hdr->extended_hdr.parsed_pkt.ip_version = 6;
if(data_len < hdr->extended_hdr.parsed_pkt.offset.l3_offset + sizeof(struct ipv6hdr)) return(0);
ipv6 = (struct ipv6hdr*)(&data[hdr->extended_hdr.parsed_pkt.offset.l3_offset]);
ip_len = sizeof(struct ipv6hdr);
/* Values of IPv6 addresses are stored as network byte order */
memcpy(&hdr->extended_hdr.parsed_pkt.ip_src.v6, &ipv6->saddr, sizeof(ipv6->saddr));
memcpy(&hdr->extended_hdr.parsed_pkt.ip_dst.v6, &ipv6->daddr, sizeof(ipv6->daddr));
hdr->extended_hdr.parsed_pkt.l3_proto = ipv6->nexthdr;
hdr->extended_hdr.parsed_pkt.ipv6_tos = ipv6->priority; /* IPv6 class of service */
/*
RFC2460 4.1 Extension Header Order
IPv6 header
Hop-by-Hop Options header
Destination Options header
Routing header
Fragment header
Authentication header
Encapsulating Security Payload header
Destination Options header
upper-layer header
*/
while(hdr->extended_hdr.parsed_pkt.l3_proto == NEXTHDR_HOP ||
hdr->extended_hdr.parsed_pkt.l3_proto == NEXTHDR_DEST ||
hdr->extended_hdr.parsed_pkt.l3_proto == NEXTHDR_ROUTING ||
hdr->extended_hdr.parsed_pkt.l3_proto == NEXTHDR_AUTH ||
hdr->extended_hdr.parsed_pkt.l3_proto == NEXTHDR_ESP ||
hdr->extended_hdr.parsed_pkt.l3_proto == NEXTHDR_FRAGMENT) {
struct ipv6_opt_hdr *ipv6_opt;
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.l3_offset+ip_len+sizeof(struct ipv6_opt_hdr))) return(1);
ipv6_opt = (struct ipv6_opt_hdr *)(&data[hdr->extended_hdr.parsed_pkt.offset.l3_offset+ip_len]);
ip_len += sizeof(struct ipv6_opt_hdr);
if(hdr->extended_hdr.parsed_pkt.l3_proto == NEXTHDR_AUTH)
/*
RFC4302 2.2. Payload Length: This 8-bit field specifies the
length of AH in 32-bit words (4-byte units), minus "2".
*/
ip_len += ipv6_opt->hdrlen * 4;
else if(hdr->extended_hdr.parsed_pkt.l3_proto != NEXTHDR_FRAGMENT)
ip_len += ipv6_opt->hdrlen;
hdr->extended_hdr.parsed_pkt.l3_proto = ipv6_opt->nexthdr;
}
} else {
hdr->extended_hdr.parsed_pkt.l3_proto = 0;
return(0); /* No IP */
}
if(unlikely(enable_debug))
printk("[PF_RING] [l3_proto=%d]\n", hdr->extended_hdr.parsed_pkt.l3_proto);
hdr->extended_hdr.parsed_pkt.offset.l4_offset = hdr->extended_hdr.parsed_pkt.offset.l3_offset+ip_len;
if(((hdr->extended_hdr.parsed_pkt.l3_proto == IPPROTO_TCP)
|| (hdr->extended_hdr.parsed_pkt.l3_proto == IPPROTO_GRE)
|| (hdr->extended_hdr.parsed_pkt.l3_proto == IPPROTO_UDP))
&& (!fragment_offset)) {
if(hdr->extended_hdr.parsed_pkt.l3_proto == IPPROTO_TCP) {
struct tcphdr *tcp;
if(data_len < hdr->extended_hdr.parsed_pkt.offset.l4_offset + sizeof(struct tcphdr)) return(1);
tcp = (struct tcphdr *)(&data[hdr->extended_hdr.parsed_pkt.offset.l4_offset]);
hdr->extended_hdr.parsed_pkt.l4_src_port = ntohs(tcp->source);
hdr->extended_hdr.parsed_pkt.l4_dst_port = ntohs(tcp->dest);
hdr->extended_hdr.parsed_pkt.offset.payload_offset = hdr->extended_hdr.parsed_pkt.offset.l4_offset + (tcp->doff * 4);
hdr->extended_hdr.parsed_pkt.tcp.seq_num = ntohl(tcp->seq);
hdr->extended_hdr.parsed_pkt.tcp.ack_num = ntohl(tcp->ack_seq);
hdr->extended_hdr.parsed_pkt.tcp.flags = (tcp->fin * TH_FIN_MULTIPLIER) + (tcp->syn * TH_SYN_MULTIPLIER) +
(tcp->rst * TH_RST_MULTIPLIER) + (tcp->psh * TH_PUSH_MULTIPLIER) +
(tcp->ack * TH_ACK_MULTIPLIER) + (tcp->urg * TH_URG_MULTIPLIER);
} else if(hdr->extended_hdr.parsed_pkt.l3_proto == IPPROTO_UDP) {
struct udphdr *udp;
if(data_len < hdr->extended_hdr.parsed_pkt.offset.l4_offset + sizeof(struct udphdr)) return(1);
udp = (struct udphdr *)(&data[hdr->extended_hdr.parsed_pkt.offset.l4_offset]);
hdr->extended_hdr.parsed_pkt.l4_src_port = ntohs(udp->source), hdr->extended_hdr.parsed_pkt.l4_dst_port = ntohs(udp->dest);
hdr->extended_hdr.parsed_pkt.offset.payload_offset = hdr->extended_hdr.parsed_pkt.offset.l4_offset + sizeof(struct udphdr);
/* GTP */
if((hdr->extended_hdr.parsed_pkt.l4_src_port == GTP_SIGNALING_PORT)
|| (hdr->extended_hdr.parsed_pkt.l4_dst_port == GTP_SIGNALING_PORT)
|| (hdr->extended_hdr.parsed_pkt.l4_src_port == GTP_U_DATA_PORT)
|| (hdr->extended_hdr.parsed_pkt.l4_dst_port == GTP_U_DATA_PORT)) {
struct gtp_v1_hdr *gtp;
u_int16_t gtp_len;
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+sizeof(struct gtp_v1_hdr))) return(1);
gtp = (struct gtp_v1_hdr *) (&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset]);
gtp_len = sizeof(struct gtp_v1_hdr);
if(((gtp->flags & GTP_FLAGS_VERSION) >> GTP_FLAGS_VERSION_SHIFT) == GTP_VERSION_1) {
struct iphdr *tunneled_ip;
hdr->extended_hdr.parsed_pkt.tunnel.tunnel_id = ntohl(gtp->teid);
if(unlikely(enable_debug))
printk("[PF_RING] GTPv1 [%04X]\n", hdr->extended_hdr.parsed_pkt.tunnel.tunnel_id);
if((hdr->extended_hdr.parsed_pkt.l4_src_port == GTP_U_DATA_PORT)
|| (hdr->extended_hdr.parsed_pkt.l4_dst_port == GTP_U_DATA_PORT)) {
if(gtp->flags & (GTP_FLAGS_EXTENSION | GTP_FLAGS_SEQ_NUM | GTP_FLAGS_NPDU_NUM)) {
struct gtp_v1_opt_hdr *gtpopt;
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len+sizeof(struct gtp_v1_opt_hdr)))
return(1);
gtpopt = (struct gtp_v1_opt_hdr *) (&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset + gtp_len]);
gtp_len += sizeof(struct gtp_v1_opt_hdr);
if((gtp->flags & GTP_FLAGS_EXTENSION) && gtpopt->next_ext_hdr) {
struct gtp_v1_ext_hdr *gtpext;
u_int8_t *next_ext_hdr = NULL;
do {
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len+1 /* 8 bit len field */)) return(1);
gtpext = (struct gtp_v1_ext_hdr *) (&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len]);
gtp_len += (gtpext->len * GTP_EXT_HDR_LEN_UNIT_BYTES);
if((gtpext->len == 0) || (data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len))) return(1);
next_ext_hdr = (u_int8_t *) (&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len-1 /* 8 bit next_ext_hdr field */]);
} while(*next_ext_hdr != 0);
}
}
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len+sizeof(struct iphdr))) return(1);
tunneled_ip = (struct iphdr *) (&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset + gtp_len]);
if(tunneled_ip->version == 4 /* IPv4 */) {
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto = tunneled_ip->protocol;
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_src.v4 = ntohl(tunneled_ip->saddr);
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_dst.v4 = ntohl(tunneled_ip->daddr);
fragment_offset = tunneled_ip->frag_off & htons(IP_OFFSET); /* fragment, but not the first */
ip_len = tunneled_ip->ihl*4;
tunnel_offset = hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len+ip_len;
} else if(tunneled_ip->version == 6 /* IPv6 */) {
struct ipv6hdr* tunneled_ipv6;
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len+sizeof(struct ipv6hdr))) return(1);
tunneled_ipv6 = (struct ipv6hdr *) (&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset + gtp_len]);
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto = tunneled_ipv6->nexthdr;
/* Values of IPv6 addresses are stored as network byte order */
memcpy(&hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_src.v6, &tunneled_ipv6->saddr, sizeof(tunneled_ipv6->saddr));
memcpy(&hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_dst.v6, &tunneled_ipv6->daddr, sizeof(tunneled_ipv6->daddr));
ip_len = sizeof(struct ipv6hdr), tunnel_offset = hdr->extended_hdr.parsed_pkt.offset.payload_offset+gtp_len;
while(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_HOP
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_DEST
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_ROUTING
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_AUTH
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_ESP
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_FRAGMENT) {
struct ipv6_opt_hdr *ipv6_opt;
if(data_len < (tunnel_offset+ip_len+sizeof(struct ipv6_opt_hdr))) return(1);
ipv6_opt = (struct ipv6_opt_hdr *)(&data[tunnel_offset+ip_len]);
ip_len += sizeof(struct ipv6_opt_hdr), fragment_offset = 0;
if(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_AUTH)
/*
RFC4302 2.2. Payload Length: This 8-bit field specifies the
length of AH in 32-bit words (4-byte units), minus "2".
*/
ip_len += ipv6_opt->hdrlen * 4;
else if(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto != NEXTHDR_FRAGMENT)
ip_len += ipv6_opt->hdrlen;
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto = ipv6_opt->nexthdr;
} /* while */
tunnel_offset += ip_len;
} else
return(1);
parse_tunneled_packet:
if(!fragment_offset) {
if(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == IPPROTO_TCP) {
struct tcphdr *tcp;
if(data_len < tunnel_offset + sizeof(struct tcphdr)) return(1);
tcp = (struct tcphdr *)(&data[tunnel_offset]);
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_src_port = ntohs(tcp->source),
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_dst_port = ntohs(tcp->dest);
} else if(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == IPPROTO_UDP) {
struct udphdr *udp;
if(data_len < tunnel_offset + sizeof(struct udphdr)) return(1);
udp = (struct udphdr *)(&data[tunnel_offset]);
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_src_port = ntohs(udp->source),
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_dst_port = ntohs(udp->dest);
if((hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_src_port == MOBILE_IP_PORT)
|| (hdr->extended_hdr.parsed_pkt.tunnel.tunneled_l4_dst_port == MOBILE_IP_PORT)) {
/* FIX: missing implementation (TODO) */
}
}
}
}
}
} else if((hdr->extended_hdr.parsed_pkt.l4_src_port == MOBILE_IP_PORT)
|| (hdr->extended_hdr.parsed_pkt.l4_dst_port == MOBILE_IP_PORT)) {
/* FIX: missing implementation (TODO) */
}
} else if(hdr->extended_hdr.parsed_pkt.l3_proto == IPPROTO_GRE /* 0x47 */) {
struct gre_header *gre = (struct gre_header*)(&data[hdr->extended_hdr.parsed_pkt.offset.l4_offset]);
int gre_offset;
gre->flags_and_version = ntohs(gre->flags_and_version);
gre->proto = ntohs(gre->proto);
gre_offset = sizeof(struct gre_header);
if((gre->flags_and_version & GRE_HEADER_VERSION) == 0) {
if(gre->flags_and_version & (GRE_HEADER_CHECKSUM | GRE_HEADER_ROUTING)) gre_offset += 4;
if(gre->flags_and_version & GRE_HEADER_KEY) {
u_int32_t *tunnel_id = (u_int32_t*)(&data[hdr->extended_hdr.parsed_pkt.offset.l4_offset+gre_offset]);
gre_offset += 4;
hdr->extended_hdr.parsed_pkt.tunnel.tunnel_id = ntohl(*tunnel_id);
}
if(gre->flags_and_version & GRE_HEADER_SEQ_NUM) gre_offset += 4;
hdr->extended_hdr.parsed_pkt.offset.payload_offset = hdr->extended_hdr.parsed_pkt.offset.l4_offset + gre_offset;
if(gre->proto == ETH_P_IP /* IPv4 */) {
struct iphdr *tunneled_ip;
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+sizeof(struct iphdr))) return(1);
tunneled_ip = (struct iphdr *)(&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset]);
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto = tunneled_ip->protocol;
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_src.v4 = ntohl(tunneled_ip->saddr);
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_dst.v4 = ntohl(tunneled_ip->daddr);
fragment_offset = tunneled_ip->frag_off & htons(IP_OFFSET); /* fragment, but not the first */
ip_len = tunneled_ip->ihl*4;
tunnel_offset = hdr->extended_hdr.parsed_pkt.offset.payload_offset + ip_len;
} else if(gre->proto == ETH_P_IPV6 /* IPv6 */) {
struct ipv6hdr* tunneled_ipv6;
if(data_len < (hdr->extended_hdr.parsed_pkt.offset.payload_offset+sizeof(struct ipv6hdr))) return(1);
tunneled_ipv6 = (struct ipv6hdr *)(&data[hdr->extended_hdr.parsed_pkt.offset.payload_offset]);
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto = tunneled_ipv6->nexthdr;
/* Values of IPv6 addresses are stored as network byte order */
memcpy(&hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_src.v6, &tunneled_ipv6->saddr, sizeof(tunneled_ipv6->saddr));
memcpy(&hdr->extended_hdr.parsed_pkt.tunnel.tunneled_ip_dst.v6, &tunneled_ipv6->daddr, sizeof(tunneled_ipv6->daddr));
ip_len = sizeof(struct ipv6hdr), tunnel_offset = hdr->extended_hdr.parsed_pkt.offset.payload_offset;
while(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_HOP
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_DEST
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_ROUTING
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_AUTH
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_ESP
|| hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_FRAGMENT) {
struct ipv6_opt_hdr *ipv6_opt;
if(data_len < (tunnel_offset+ip_len+sizeof(struct ipv6_opt_hdr))) return(1);
ipv6_opt = (struct ipv6_opt_hdr *)(&data[tunnel_offset+ip_len]);
ip_len += sizeof(struct ipv6_opt_hdr), fragment_offset = 0;
if(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto == NEXTHDR_AUTH)
ip_len += ipv6_opt->hdrlen * 4;
else if(hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto != NEXTHDR_FRAGMENT)
ip_len += ipv6_opt->hdrlen;
hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto = ipv6_opt->nexthdr;
} /* while */
tunnel_offset += ip_len;
} else
return(1);
goto parse_tunneled_packet; /* Parse tunneled ports */
} else { /* TODO handle other GRE versions */
hdr->extended_hdr.parsed_pkt.offset.payload_offset = hdr->extended_hdr.parsed_pkt.offset.l4_offset;
}
} else
hdr->extended_hdr.parsed_pkt.offset.payload_offset = hdr->extended_hdr.parsed_pkt.offset.l4_offset;
if(unlikely(enable_debug))
printk("[PF_RING] [l4_offset=%d][l4_src_port/l4_dst_port=%d/%d]\n",
hdr->extended_hdr.parsed_pkt.offset.l4_offset,
hdr->extended_hdr.parsed_pkt.l4_src_port,
hdr->extended_hdr.parsed_pkt.l4_dst_port);
} else
hdr->extended_hdr.parsed_pkt.l4_src_port = hdr->extended_hdr.parsed_pkt.l4_dst_port = 0;
hash_pkt_header(hdr, 0);
return(1); /* IP */
}
/* ********************************** */
static int parse_pkt(struct sk_buff *skb,
u_int8_t real_skb,
u_int16_t skb_displ,
struct pfring_pkthdr *hdr,
u_int16_t *ip_id,
u_int8_t *first_fragment,
u_int8_t *second_fragment) {
int rc;
u_char buffer[128]; /* Enough for standard and tunneled headers */
u_int16_t data_len = min((u_int16_t)(skb->len + skb_displ), (u_int16_t)sizeof(buffer));
skb_copy_bits(skb, -skb_displ, buffer, data_len);
rc = parse_raw_pkt(buffer, data_len, hdr, ip_id, first_fragment, second_fragment);
hdr->extended_hdr.parsed_pkt.offset.eth_offset = -skb_displ;
return(rc);
}
/* ********************************** */
static int hash_bucket_match(sw_filtering_hash_bucket * hash_bucket,
struct pfring_pkthdr *hdr,
u_char mask_src, u_char mask_dst)
{
#if 0
printk("[PF_RING] hash_bucket_match() (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u) "
"(%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u)\n",
hash_bucket->rule.vlan_id, hash_bucket->rule.proto,
((hash_bucket->rule.host4_peer_a >> 24) & 0xff),
((hash_bucket->rule.host4_peer_a >> 16) & 0xff),
((hash_bucket->rule.host4_peer_a >> 8) & 0xff),
((hash_bucket->rule.host4_peer_a >> 0) & 0xff),
hash_bucket->rule.port_peer_a,
((hash_bucket->rule.host4_peer_b >> 24) & 0xff),
((hash_bucket->rule.host4_peer_b >> 16) & 0xff),
((hash_bucket->rule.host4_peer_b >> 8) & 0xff),
((hash_bucket->rule.host4_peer_b >> 0) & 0xff),
hash_bucket->rule.port_peer_b,
hdr->extended_hdr.parsed_pkt.vlan_id,
hdr->extended_hdr.parsed_pkt.l3_proto,
((hdr->extended_hdr.parsed_pkt.ipv4_src >> 24) & 0xff),
((hdr->extended_hdr.parsed_pkt.ipv4_src >> 16) & 0xff),
((hdr->extended_hdr.parsed_pkt.ipv4_src >> 8) & 0xff),
((hdr->extended_hdr.parsed_pkt.ipv4_src >> 0) & 0xff),
hdr->extended_hdr.parsed_pkt.l4_src_port,
((hdr->extended_hdr.parsed_pkt.ipv4_dst >> 24) & 0xff),
((hdr->extended_hdr.parsed_pkt.ipv4_dst >> 16) & 0xff),
((hdr->extended_hdr.parsed_pkt.ipv4_dst >> 8) & 0xff),
((hdr->extended_hdr.parsed_pkt.ipv4_dst >> 0) & 0xff),
hdr->extended_hdr.parsed_pkt.l4_dst_port);
#endif
/*
When protocol of host_peer is IPv4, s6_addr32[0] contains IPv4
address and the value of other elements of s6_addr32 are 0.
*/
if((hash_bucket->rule.proto == hdr->extended_hdr.parsed_pkt.l3_proto)
&& (hash_bucket->rule.vlan_id == hdr->extended_hdr.parsed_pkt.vlan_id)
&& (((hash_bucket->rule.host4_peer_a == (mask_src ? 0 : hdr->extended_hdr.parsed_pkt.ipv4_src))
&& (hash_bucket->rule.host4_peer_b == (mask_dst ? 0 : hdr->extended_hdr.parsed_pkt.ipv4_dst))
&& (hash_bucket->rule.port_peer_a == (mask_src ? 0 : hdr->extended_hdr.parsed_pkt.l4_src_port))
&& (hash_bucket->rule.port_peer_b == (mask_dst ? 0 : hdr->extended_hdr.parsed_pkt.l4_dst_port)))
||
((hash_bucket->rule.host4_peer_a == (mask_dst ? 0 : hdr->extended_hdr.parsed_pkt.ipv4_dst))
&& (hash_bucket->rule.host4_peer_b == (mask_src ? 0 : hdr->extended_hdr.parsed_pkt.ipv4_src))
&& (hash_bucket->rule.port_peer_a == (mask_dst ? 0 : hdr->extended_hdr.parsed_pkt.l4_dst_port))
&& (hash_bucket->rule.port_peer_b == (mask_src ? 0 : hdr->extended_hdr.parsed_pkt.l4_src_port)))))
{
if(hdr->extended_hdr.parsed_pkt.ip_version == 6) {
if(((memcmp(&hash_bucket->rule.host6_peer_a,
(mask_src ? &ip_zero.v6 : &hdr->extended_hdr.parsed_pkt.ipv6_src),
sizeof(ip_addr) == 0))
&& (memcmp(&hash_bucket->rule.host6_peer_b,
(mask_dst ? &ip_zero.v6 : &hdr->extended_hdr.parsed_pkt.ipv6_dst),
sizeof(ip_addr) == 0)))
||
((memcmp(&hash_bucket->rule.host6_peer_a,
(mask_src ? &ip_zero.v6 : &hdr->extended_hdr.parsed_pkt.ipv6_dst),
sizeof(ip_addr) == 0))
&& (memcmp(&hash_bucket->rule.host6_peer_b,
(mask_dst ? &ip_zero.v6 : &hdr->extended_hdr.parsed_pkt.ipv6_src),
sizeof(ip_addr) == 0)))) {
return(1);
} else {
return(0);
}
} else {
return(1);
}
} else {
return(0);
}
}
/* ********************************** */
static inline int hash_bucket_match_rule(sw_filtering_hash_bucket * hash_bucket,
hash_filtering_rule * rule)
{
if(unlikely(enable_debug))
printk("[PF_RING] (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u) "
"(%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u)\n",
hash_bucket->rule.vlan_id, hash_bucket->rule.proto,
((hash_bucket->rule.host4_peer_a >> 24) & 0xff),
((hash_bucket->rule.host4_peer_a >> 16) & 0xff),
((hash_bucket->rule.host4_peer_a >> 8) & 0xff),
((hash_bucket->rule.host4_peer_a >> 0) & 0xff),
hash_bucket->rule.port_peer_a,
((hash_bucket->rule.host4_peer_b >> 24) & 0xff),
((hash_bucket->rule.host4_peer_b >> 16) & 0xff),
((hash_bucket->rule.host4_peer_b >> 8) & 0xff),
((hash_bucket->rule.host4_peer_b >> 0) & 0xff),
hash_bucket->rule.port_peer_b,
rule->vlan_id, rule->proto,
((rule->host4_peer_a >> 24) & 0xff),
((rule->host4_peer_a >> 16) & 0xff),
((rule->host4_peer_a >> 8) & 0xff),
((rule->host4_peer_a >> 0) & 0xff),
rule->port_peer_a,
((rule->host4_peer_b >> 24) & 0xff),
((rule->host4_peer_b >> 16) & 0xff),
((rule->host4_peer_b >> 8) & 0xff),
((rule->host4_peer_b >> 0) & 0xff), rule->port_peer_b);
if((hash_bucket->rule.proto == rule->proto)
&& (hash_bucket->rule.vlan_id == rule->vlan_id)
&& (((hash_bucket->rule.host4_peer_a == rule->host4_peer_a)
&& (hash_bucket->rule.host4_peer_b == rule->host4_peer_b)
&& (hash_bucket->rule.port_peer_a == rule->port_peer_a)
&& (hash_bucket->rule.port_peer_b == rule->port_peer_b))
|| ((hash_bucket->rule.host4_peer_a == rule->host4_peer_b)
&& (hash_bucket->rule.host4_peer_b == rule->host4_peer_a)
&& (hash_bucket->rule.port_peer_a == rule->port_peer_b)
&& (hash_bucket->rule.port_peer_b == rule->port_peer_a)))) {
hash_bucket->rule.internals.jiffies_last_match = jiffies;
return(1);
} else
return(0);
}
/* ********************************** */
static inline int hash_filtering_rule_match(hash_filtering_rule * a,
hash_filtering_rule * b)
{
if(unlikely(enable_debug))
printk("[PF_RING] (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u) "
"(%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u)\n",
a->vlan_id, a->proto,
((a->host4_peer_a >> 24) & 0xff),
((a->host4_peer_a >> 16) & 0xff),
((a->host4_peer_a >> 8) & 0xff),
((a->host4_peer_a >> 0) & 0xff),
a->port_peer_a,
((a->host4_peer_b >> 24) & 0xff),
((a->host4_peer_b >> 16) & 0xff),
((a->host4_peer_b >> 8) & 0xff),
((a->host4_peer_b >> 0) & 0xff),
a->port_peer_b,
b->vlan_id, b->proto,
((b->host4_peer_a >> 24) & 0xff),
((b->host4_peer_a >> 16) & 0xff),
((b->host4_peer_a >> 8) & 0xff),
((b->host4_peer_a >> 0) & 0xff),
b->port_peer_a,
((b->host4_peer_b >> 24) & 0xff),
((b->host4_peer_b >> 16) & 0xff),
((b->host4_peer_b >> 8) & 0xff),
((b->host4_peer_b >> 0) & 0xff), b->port_peer_b);
if((a->proto == b->proto)
&& (a->vlan_id == b->vlan_id)
&& (((a->host4_peer_a == b->host4_peer_a)
&& (a->host4_peer_b == b->host4_peer_b)
&& (a->port_peer_a == b->port_peer_a)
&& (a->port_peer_b == b->port_peer_b))
|| ((a->host4_peer_a == b->host4_peer_b)
&& (a->host4_peer_b == b->host4_peer_a)
&& (a->port_peer_a == b->port_peer_b)
&& (a->port_peer_b == b->port_peer_a)))) {
return(1);
} else
return(0);
}
/* ********************************** */
static inline int match_ipv6(ip_addr *addr, ip_addr *rule_addr, ip_addr *rule_mask) {
int i;
if(rule_mask->v6.s6_addr32[0] != 0)
for(i=0; i<4; i++)
if((addr->v6.s6_addr32[i] & rule_mask->v6.s6_addr32[i]) != rule_addr->v6.s6_addr32[i])
return(0);
return(1);
}
/* ********************************** */
/* 0 = no match, 1 = match */
static int match_filtering_rule(struct pf_ring_socket *pfr,
sw_filtering_rule_element * rule,
struct pfring_pkthdr *hdr,
struct sk_buff *skb,
int displ,
struct parse_buffer *parse_memory_buffer[],
u_int8_t *free_parse_mem,
u_int *last_matched_plugin,
rule_action_behaviour *behaviour)
{
u_int8_t empty_mac[ETH_ALEN] = { 0 }; /* NULL MAC address */
if(unlikely(enable_debug)) printk("[PF_RING] %s()\n", __FUNCTION__);
*behaviour = rule->rule.rule_action;
if((rule->rule.core_fields.vlan_id > 0)
&& (hdr->extended_hdr.parsed_pkt.vlan_id != rule->rule.core_fields.vlan_id))
return(0);
if((rule->rule.core_fields.proto > 0)
&& (hdr->extended_hdr.parsed_pkt.l3_proto != rule->rule.core_fields.proto))
return(0);
if((rule->rule.extended_fields.optional_fields & FILTER_TUNNEL_ID_FLAG)
&& (hdr->extended_hdr.parsed_pkt.tunnel.tunnel_id != rule->rule.extended_fields.tunnel.tunnel_id))
return(0);
if(hdr->extended_hdr.parsed_pkt.ip_version == 6) {
/* IPv6 */
if(!match_ipv6(&hdr->extended_hdr.parsed_pkt.ip_src, &rule->rule.extended_fields.tunnel.dhost_mask,
&rule->rule.extended_fields.tunnel.dhost)
|| !match_ipv6(&hdr->extended_hdr.parsed_pkt.ip_dst, &rule->rule.extended_fields.tunnel.shost_mask,
&rule->rule.extended_fields.tunnel.shost))
return(0);
} else {
/* IPv4 */
if((hdr->extended_hdr.parsed_pkt.ip_src.v4 & rule->rule.extended_fields.tunnel.dhost_mask.v4) != rule->rule.extended_fields.tunnel.dhost.v4
|| (hdr->extended_hdr.parsed_pkt.ip_dst.v4 & rule->rule.extended_fields.tunnel.shost_mask.v4) != rule->rule.extended_fields.tunnel.shost.v4)
return(0);
}
if((memcmp(rule->rule.core_fields.dmac, empty_mac, ETH_ALEN) != 0)
&& (memcmp(hdr->extended_hdr.parsed_pkt.dmac, rule->rule.core_fields.dmac, ETH_ALEN) != 0))
goto swap_direction;
if((memcmp(rule->rule.core_fields.smac, empty_mac, ETH_ALEN) != 0)
&& (memcmp(hdr->extended_hdr.parsed_pkt.smac, rule->rule.core_fields.smac, ETH_ALEN) != 0))
goto swap_direction;
if(hdr->extended_hdr.parsed_pkt.ip_version == 6) {
/* IPv6 */
if(!match_ipv6(&hdr->extended_hdr.parsed_pkt.ip_src, &rule->rule.core_fields.shost_mask,
&rule->rule.core_fields.shost)
|| !match_ipv6(&hdr->extended_hdr.parsed_pkt.ip_dst, &rule->rule.core_fields.dhost_mask,
&rule->rule.core_fields.dhost))
goto swap_direction;
} else {
/* IPv4 */
if((hdr->extended_hdr.parsed_pkt.ip_src.v4 & rule->rule.core_fields.shost_mask.v4) != rule->rule.core_fields.shost.v4
|| (hdr->extended_hdr.parsed_pkt.ip_dst.v4 & rule->rule.core_fields.dhost_mask.v4) != rule->rule.core_fields.dhost.v4)
goto swap_direction;
}
if((rule->rule.core_fields.sport_high != 0)
&& ((hdr->extended_hdr.parsed_pkt.l4_src_port < rule->rule.core_fields.sport_low)
|| (hdr->extended_hdr.parsed_pkt.l4_src_port > rule->rule.core_fields.sport_high)))
goto swap_direction;
if((rule->rule.core_fields.dport_high != 0)
&& ((hdr->extended_hdr.parsed_pkt.l4_dst_port < rule->rule.core_fields.dport_low)
|| (hdr->extended_hdr.parsed_pkt.l4_dst_port > rule->rule.core_fields.dport_high)))
goto swap_direction;
goto success;
swap_direction:
if(!rule->rule.bidirectional)
return(0);
if((memcmp(rule->rule.core_fields.dmac, empty_mac, ETH_ALEN) != 0)
&& (memcmp(hdr->extended_hdr.parsed_pkt.smac, rule->rule.core_fields.dmac, ETH_ALEN) != 0))
return(0);
if((memcmp(rule->rule.core_fields.smac, empty_mac, ETH_ALEN) != 0)
&& (memcmp(hdr->extended_hdr.parsed_pkt.dmac, rule->rule.core_fields.smac, ETH_ALEN) != 0))
return(0);
if(hdr->extended_hdr.parsed_pkt.ip_version == 6) {
/* IPv6 */
if(!match_ipv6(&hdr->extended_hdr.parsed_pkt.ip_src, &rule->rule.core_fields.dhost_mask,
&rule->rule.core_fields.dhost)
|| !match_ipv6(&hdr->extended_hdr.parsed_pkt.ip_dst, &rule->rule.core_fields.shost_mask,
&rule->rule.core_fields.shost))
return(0);
} else {
/* IPv4 */
if((hdr->extended_hdr.parsed_pkt.ip_src.v4 & rule->rule.core_fields.dhost_mask.v4) != rule->rule.core_fields.dhost.v4
|| (hdr->extended_hdr.parsed_pkt.ip_dst.v4 & rule->rule.core_fields.shost_mask.v4) != rule->rule.core_fields.shost.v4)
return(0);
}
if((rule->rule.core_fields.sport_high != 0)
&& ((hdr->extended_hdr.parsed_pkt.l4_dst_port < rule->rule.core_fields.sport_low)
|| (hdr->extended_hdr.parsed_pkt.l4_dst_port > rule->rule.core_fields.sport_high)))
return(0);
if((rule->rule.core_fields.dport_high != 0)
&& ((hdr->extended_hdr.parsed_pkt.l4_src_port < rule->rule.core_fields.dport_low)
|| (hdr->extended_hdr.parsed_pkt.l4_src_port > rule->rule.core_fields.dport_high)))
return(0);
success:
if(rule->rule.balance_pool > 0) {
u_int32_t balance_hash = hash_pkt_header(hdr, 0) % rule->rule.balance_pool;
if(balance_hash != rule->rule.balance_id)
return(0);
}
#ifdef CONFIG_TEXTSEARCH
if(rule->pattern[0] != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] pattern\n");
if((hdr->extended_hdr.parsed_pkt.offset.payload_offset > 0)
&& (hdr->caplen > hdr->extended_hdr.parsed_pkt.offset.payload_offset)) {
char *payload = (char *)&(skb->data[hdr->extended_hdr.parsed_pkt.offset.payload_offset /* -displ */ ]);
int rc = 0, payload_len =
hdr->caplen - hdr->extended_hdr.parsed_pkt.offset.payload_offset - displ;
if(payload_len > 0) {
int i;
struct ts_state state;
if(unlikely(enable_debug)) {
printk("[PF_RING] Trying to match pattern [caplen=%d][len=%d][displ=%d][payload_offset=%d][",
hdr->caplen, payload_len, displ,
hdr->extended_hdr.parsed_pkt.offset.payload_offset);
for(i = 0; i < payload_len; i++)
printk("[%d/%c]", i, payload[i] & 0xFF);
printk("]\n");
}
payload[payload_len] = '\0';
if(unlikely(enable_debug))
printk("[PF_RING] Attempt to match [%s]\n", payload);
for(i = 0; (i < MAX_NUM_PATTERN) && (rule->pattern[i] != NULL); i++) {
if(unlikely(enable_debug))
printk("[PF_RING] Attempt to match pattern %d\n", i);
rc = (textsearch_find_continuous
(rule->pattern[i], &state,
payload, payload_len) != UINT_MAX) ? 1 : 0;
if(rc == 1)
break;
}
if(unlikely(enable_debug))
printk("[PF_RING] Match returned: %d [payload_len=%d][%s]\n",
rc, payload_len, payload);
if(rc == 0)
return(0); /* No match */
} else
return(0); /* No payload data */
} else
return(0); /* No payload data */
}
#endif
/* Step 1 - Filter (optional) */
if((rule->rule.extended_fields.filter_plugin_id > 0)
&& (rule->rule.extended_fields.filter_plugin_id < MAX_PLUGIN_ID)
&& (plugin_registration[rule->rule.extended_fields.filter_plugin_id] != NULL)
&& (plugin_registration[rule->rule.extended_fields.filter_plugin_id]->pfring_plugin_filter_skb != NULL)
) {
int rc;
if(unlikely(enable_debug))
printk("[PF_RING] rule->plugin_id [rule_id=%d]"
"[filter_plugin_id=%d][plugin_action=%d][ptr=%p]\n",
rule->rule.rule_id,
rule->rule.extended_fields.filter_plugin_id,
rule->rule.plugin_action.plugin_id,
plugin_registration[rule->rule.plugin_action.plugin_id]);
rc = plugin_registration[rule->rule.extended_fields.filter_plugin_id]->pfring_plugin_filter_skb
(pfr, rule, hdr, skb, displ, &parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]);
if(parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id])
*free_parse_mem = 1;
if(rc <= 0) {
return(0); /* No match */
} else {
*last_matched_plugin = rule->rule.extended_fields.filter_plugin_id;
hdr->extended_hdr.parsed_pkt.last_matched_plugin_id =
rule->rule.extended_fields.filter_plugin_id;
if(unlikely(enable_debug))
printk("[PF_RING] [last_matched_plugin = %d][buffer=%p][len=%d]\n",
*last_matched_plugin,
parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id],
parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id] ?
parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]->mem_len : 0);
}
}
/* Step 2 - Handle skb */
/* Action to be performed in case of match */
if((rule->rule.plugin_action.plugin_id != NO_PLUGIN_ID)
&& (rule->rule.plugin_action.plugin_id < MAX_PLUGIN_ID)
&& (plugin_registration[rule->rule.plugin_action.plugin_id] != NULL)
&& (plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_handle_skb != NULL)
) {
int rc;
if(unlikely(enable_debug))
printk("[PF_RING] Calling pfring_plugin_handle_skb(pluginId=%d)\n",
rule->rule.plugin_action.plugin_id);
rc = plugin_registration[rule->rule.plugin_action.plugin_id]
->pfring_plugin_handle_skb(pfr, rule, NULL, hdr, skb, displ,
rule->rule.extended_fields.filter_plugin_id,
&parse_memory_buffer[rule->rule.plugin_action.plugin_id],
behaviour);
if(rc <= 0)
return(0); /* No match */
if(*last_matched_plugin == 0)
*last_matched_plugin = rule->rule.plugin_action.plugin_id;
if(parse_memory_buffer[rule->rule.plugin_action.plugin_id])
*free_parse_mem = 1;
} else {
if(unlikely(enable_debug))
printk("[PF_RING] Skipping pfring_plugin_handle_skb(plugin_action=%d)\n",
rule->rule.plugin_action.plugin_id);
*behaviour = rule->rule.rule_action;
if(unlikely(enable_debug))
printk("[PF_RING] Rule %d behaviour: %d\n",
rule->rule.rule_id, rule->rule.rule_action);
}
if(unlikely(enable_debug)) {
printk("[PF_RING] MATCH: %s(vlan=%u, proto=%u, sip=%u, sport=%u, dip=%u, dport=%u)\n"
" [rule(vlan=%u, proto=%u, ip=%u:%u, port=%u:%u-%u:%u)(behaviour=%d)]\n",
__FUNCTION__,
hdr->extended_hdr.parsed_pkt.vlan_id, hdr->extended_hdr.parsed_pkt.l3_proto,
hdr->extended_hdr.parsed_pkt.ipv4_src, hdr->extended_hdr.parsed_pkt.l4_src_port,
hdr->extended_hdr.parsed_pkt.ipv4_dst, hdr->extended_hdr.parsed_pkt.l4_dst_port,
rule->rule.core_fields.vlan_id,
rule->rule.core_fields.proto,
rule->rule.core_fields.shost.v4,
rule->rule.core_fields.dhost.v4,
rule->rule.core_fields.sport_low, rule->rule.core_fields.sport_high,
rule->rule.core_fields.dport_low, rule->rule.core_fields.dport_high,
*behaviour);
}
rule->rule.internals.jiffies_last_match = jiffies;
return(1); /* match */
}
/* ********************************** */
static inline void set_skb_time(struct sk_buff *skb, struct pfring_pkthdr *hdr) {
/* BD - API changed for time keeping */
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
if(skb->stamp.tv_sec == 0)
do_gettimeofday(&skb->stamp); /* If timestamp is missing add it */
hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
hdr->extended_hdr.timestamp_ns = 0; /* No nsec for old kernels */
#elif(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
if(skb->tstamp.off_sec == 0)
__net_timestamp(skb); /* If timestamp is missing add it */
hdr->ts.tv_sec = skb->tstamp.off_sec, hdr->ts.tv_usec = skb->tstamp.off_usec;
hdr->extended_hdr.timestamp_ns = 0; /* No nsec for old kernels */
#else /* 2.6.22 and above */
if(skb->tstamp.tv64 == 0)
__net_timestamp(skb); /* If timestamp is missing add it */
hdr->ts = ktime_to_timeval(skb->tstamp);
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
{
/* Use hardware timestamps when present. If not, just use software timestamps */
hdr->extended_hdr.timestamp_ns = ktime_to_ns(skb_hwtstamps(skb)->hwtstamp);
if(unlikely(enable_debug))
printk("[PF_RING] hwts=%llu/dev=%s\n",
hdr->extended_hdr.timestamp_ns,
skb->dev ? skb->dev->name : "???");
}
#endif
if(hdr->extended_hdr.timestamp_ns == 0)
hdr->extended_hdr.timestamp_ns = ktime_to_ns(skb->tstamp);
#endif
}
/* ********************************** */
/*
Generic function for copying either a skb or a raw
memory block to the ring buffer
Return:
- 0 = packet was not copied (e.g. slot was full)
- 1 = the packet was copied (i.e. there was room for it)
*/
static inline int copy_data_to_ring(struct sk_buff *skb,
struct pf_ring_socket *pfr,
struct pfring_pkthdr *hdr,
int displ, int offset, void *plugin_mem,
void *raw_data, uint raw_data_len,
int *clone_id) {
char *ring_bucket;
u_int32_t off;
u_short do_lock = (
(enable_tx_capture && pfr->direction == rx_and_tx_direction) ||
(pfr->num_bound_devices > 1) ||
(pfr->num_channels_per_ring > 1) ||
(pfr->rehash_rss != NULL && get_num_rx_queues(skb->dev) > 1) ||
(pfr->cluster_id != 0)
);
if(pfr->ring_slots == NULL) return(0);
if(unlikely(enable_debug))
printk("[PF_RING] do_lock=%d [num_channels_per_ring=%d][num_bound_devices=%d]\n",
do_lock, pfr->num_channels_per_ring, pfr->num_bound_devices);
/* We need to lock as two ksoftirqd might put data onto the same ring */
if(do_lock) write_lock(&pfr->ring_index_lock);
// smp_rmb();
if(pfr->tx.enable_tx_with_bounce && pfr->header_len == long_pkt_header
&& pfr->slots_info->kernel_remove_off != pfr->slots_info->remove_off /* optimization to avoid too many locks */
&& pfr->slots_info->remove_off != get_next_slot_offset(pfr, pfr->slots_info->kernel_remove_off)) {
write_lock(&pfr->tx.consume_tx_packets_lock);
consume_pending_pkts(pfr, 0);
write_unlock(&pfr->tx.consume_tx_packets_lock);
}
off = pfr->slots_info->insert_off;
pfr->slots_info->tot_pkts++;
if(!check_free_ring_slot(pfr)) /* Full */ {
/* No room left */
pfr->slots_info->tot_lost++;
if(unlikely(enable_debug))
printk("[PF_RING] ==> slot(off=%d) is full [insert_off=%u][remove_off=%u][slot_len=%u][num_queued_pkts=%llu]\n",
off, pfr->slots_info->insert_off, pfr->slots_info->remove_off, pfr->slots_info->slot_len, num_queued_pkts(pfr));
if(do_lock) write_unlock(&pfr->ring_index_lock);
return(0);
}
ring_bucket = get_slot(pfr, off);
if(skb != NULL) {
/* Copy skb data */
hdr->caplen = min_val(hdr->caplen, pfr->bucket_len - offset);
if(hdr->ts.tv_sec == 0)
set_skb_time(skb, hdr);
if(pfr->header_len == long_pkt_header) {
if((plugin_mem != NULL) && (offset > 0))
memcpy(&ring_bucket[pfr->slot_header_len], plugin_mem, offset);
}
if(hdr->caplen > 0) {
u16 vlan_tci = 0;
if(unlikely(enable_debug))
printk("[PF_RING] --> [caplen=%d][len=%d][displ=%d][extended_hdr.parsed_header_len=%d][bucket_len=%d][sizeof=%d]\n",
hdr->caplen, hdr->len, displ, hdr->extended_hdr.parsed_header_len, pfr->bucket_len,
pfr->slot_header_len);
if((vlan_get_tag(skb, &vlan_tci) == 0) /* The packet is tagged... */
&& (hdr->extended_hdr.parsed_pkt.offset.vlan_offset == 0) /* but we have seen no tag -> it has been stripped */) {
/* VLAN-tagged packet with stripped VLAN tag */
u_int16_t *b;
struct vlan_ethhdr *v = vlan_eth_hdr(skb);
hdr->extended_hdr.parsed_pkt.vlan_id = vlan_tci, hdr->extended_hdr.parsed_pkt.offset.vlan_offset += sizeof(struct ethhdr),
hdr->extended_hdr.parsed_pkt.offset.l3_offset += sizeof(struct eth_vlan_hdr);
if(hdr->extended_hdr.parsed_pkt.offset.l4_offset) hdr->extended_hdr.parsed_pkt.offset.l4_offset += sizeof(struct eth_vlan_hdr);
if(hdr->extended_hdr.parsed_pkt.offset.payload_offset) hdr->extended_hdr.parsed_pkt.offset.payload_offset += sizeof(struct eth_vlan_hdr);
skb_copy_bits(skb, -displ, &ring_bucket[pfr->slot_header_len + offset], displ);
b = (u_int16_t*)&ring_bucket[pfr->slot_header_len + offset+12];
b[0] = ntohs(ETH_P_8021Q), b[1] = ntohs(vlan_tci), b[2] = v->h_vlan_proto;
if(skb_copy_bits(skb, 0, &ring_bucket[pfr->slot_header_len + offset + 18], hdr->caplen-14) < 0)
printk("[PF_RING] --> FAULT [skb->len=%u][len=%u]\n", skb->len, hdr->caplen-16);
hdr->len += sizeof(struct eth_vlan_hdr);
hdr->caplen = min_val(pfr->bucket_len - offset, hdr->caplen + sizeof(struct eth_vlan_hdr));
} else
skb_copy_bits(skb, -displ, &ring_bucket[pfr->slot_header_len + offset], hdr->caplen);
} else {
if(hdr->extended_hdr.parsed_header_len >= pfr->bucket_len) {
static u_char print_once = 0;
if(!print_once) {
printk("[PF_RING] WARNING: the bucket len is [%d] shorter than the plugin parsed header [%d]\n",
pfr->bucket_len, hdr->extended_hdr.parsed_header_len);
print_once = 1;
}
}
}
if(pfr->tx.enable_tx_with_bounce
&& (pfr->header_len == long_pkt_header)
&& (skb != NULL)
&& (clone_id != NULL) /* Just to be on the safe side */
) {
struct sk_buff *cloned;
/*
The TX transmission is supported only with long_pkt_header
where we can read the id of the output interface
*/
if(((*clone_id)++ == 0)
&& (transparent_mode != driver2pf_ring_transparent /* mode=1 */))
hdr->extended_hdr.tx.reserved = skb;
else {
cloned = skb_clone(skb, GFP_ATOMIC);
hdr->extended_hdr.tx.reserved = cloned;
}
if(displ > 0) skb_push(hdr->extended_hdr.tx.reserved, displ);
/* printk("[PF_RING] copy_data_to_ring(): clone_id=%d\n", *clone_id); */
}
} else {
/* Copy Raw data */
hdr->len = raw_data_len;
hdr->caplen = min_val(raw_data_len, pfr->bucket_len);
memcpy(&ring_bucket[pfr->slot_header_len], raw_data, hdr->caplen);
if(pfr->header_len == long_pkt_header)
hdr->extended_hdr.if_index = FAKE_PACKET;
/* printk("[PF_RING] Copied raw data at slot with offset %d [len=%d, caplen=%d]\n", off, hdr->len, hdr->caplen); */
}
/* Copy extended packet header */
memcpy(ring_bucket, hdr, pfr->slot_header_len);
/* Set Magic value */
memset(&ring_bucket[pfr->slot_header_len + offset + hdr->caplen], RING_MAGIC_VALUE, sizeof(u_int16_t));
/* Update insert offset */
pfr->slots_info->insert_off = get_next_slot_offset(pfr, off);
if(unlikely(enable_debug))
printk("[PF_RING] ==> insert_off=%d\n", pfr->slots_info->insert_off);
/* NOTE: smp_* barriers are _compiler_ barriers on UP, mandatory barriers on SMP
* a consumer _must_ see the new value of tot_insert only after the buffer update completes */
smp_mb(); //wmb();
pfr->slots_info->tot_insert++;
if(do_lock) write_unlock(&pfr->ring_index_lock);
if(num_queued_pkts(pfr) >= pfr->poll_num_pkts_watermark)
wake_up_interruptible(&pfr->ring_slots_waitqueue);
#ifdef VPFRING_SUPPORT
if(pfr->vpfring_host_eventfd_ctx && !(pfr->slots_info->vpfring_guest_flags & VPFRING_GUEST_NO_INTERRUPT))
eventfd_signal(pfr->vpfring_host_eventfd_ctx, 1);
#endif
return(1);
}
/* ********************************** */
static inline int copy_raw_data_to_ring(struct pf_ring_socket *pfr,
struct pfring_pkthdr *dummy_hdr,
void *raw_data, uint raw_data_len) {
return(copy_data_to_ring(NULL, pfr, dummy_hdr, 0, 0, NULL, raw_data, raw_data_len, NULL));
}
/* ********************************** */
static inline int add_pkt_to_ring(struct sk_buff *skb,
u_int8_t real_skb,
struct pf_ring_socket *_pfr,
struct pfring_pkthdr *hdr,
int displ, u_int32_t channel_id,
int offset, void *plugin_mem,
int *clone_id)
{
struct pf_ring_socket *pfr = (_pfr->master_ring != NULL) ? _pfr->master_ring : _pfr;
u_int32_t the_bit = 1 << channel_id;
if(unlikely(enable_debug))
printk("[PF_RING] --> add_pkt_to_ring(len=%d) [pfr->channel_id_mask=%08X][channel_id=%d][real_skb=%u]\n",
hdr->len, pfr->channel_id_mask, channel_id, real_skb);
if((!pfr->ring_active) || (!skb))
return(0);
if((pfr->channel_id_mask != RING_ANY_CHANNEL)
&& (channel_id != RING_ANY_CHANNEL)
&& (!(pfr->channel_id_mask & the_bit)))
return(0); /* Wrong channel */
if(pfr->kernel_consumer_plugin_id
&& plugin_registration[pfr->kernel_consumer_plugin_id]->pfring_packet_reader) {
write_lock(&pfr->ring_index_lock); /* Serialize */
plugin_registration[pfr->kernel_consumer_plugin_id]->pfring_packet_reader(pfr, skb, channel_id, hdr, displ);
pfr->slots_info->tot_pkts++;
write_unlock(&pfr->ring_index_lock);
return(0);
}
if(real_skb)
return(copy_data_to_ring(skb, pfr, hdr, displ, offset, plugin_mem, NULL, 0, clone_id));
else
return(copy_raw_data_to_ring(pfr, hdr, skb->data, hdr->len));
}
/* ********************************** */
static int add_packet_to_ring(struct pf_ring_socket *pfr,
u_int8_t real_skb,
struct pfring_pkthdr *hdr,
struct sk_buff *skb,
int displ, u_int8_t parse_pkt_first)
{
if(parse_pkt_first) {
u_int16_t ip_id;
u_int8_t first_fragment, second_fragment;
parse_pkt(skb, real_skb, displ, hdr, &ip_id, &first_fragment, &second_fragment);
}
ring_read_lock();
add_pkt_to_ring(skb, real_skb, pfr, hdr, 0, RING_ANY_CHANNEL, displ, NULL, NULL);
ring_read_unlock();
return(0);
}
/* ********************************** */
static int add_raw_packet_to_ring(struct pf_ring_socket *pfr, struct pfring_pkthdr *hdr,
u_char *data, u_int data_len,
u_int8_t parse_pkt_first)
{
int rc;
if(parse_pkt_first) {
u_int16_t ip_id;
u_int8_t first_fragment, second_fragment;
parse_raw_pkt(data, data_len, hdr, &ip_id, &first_fragment, &second_fragment);
}
ring_read_lock();
rc = copy_raw_data_to_ring(pfr, hdr, data, data_len);
ring_read_unlock();
return(rc == 1 ? 0 : -1);
}
/* ********************************** */
static int add_hdr_to_ring(struct pf_ring_socket *pfr,
u_int8_t real_skb,
struct pfring_pkthdr *hdr)
{
return(add_packet_to_ring(pfr, real_skb, hdr, NULL, 0, 0));
}
/* ********************************** */
/* Free filtering placeholders */
static void free_parse_memory(struct parse_buffer *parse_memory_buffer[])
{
int i;
for(i = 1; i <= max_registered_plugin_id; i++)
if(parse_memory_buffer[i]) {
if(parse_memory_buffer[i]->mem != NULL) {
kfree(parse_memory_buffer[i]->mem);
}
kfree(parse_memory_buffer[i]);
}
}
/* ************************************* */
static void free_filtering_rule(sw_filtering_rule_element * entry, u_int8_t freeing_ring)
{
#ifdef CONFIG_TEXTSEARCH
int i;
#endif
if(entry->rule.plugin_action.plugin_id > 0
&& plugin_registration[entry->rule.plugin_action.plugin_id]
&& plugin_registration[entry->rule.plugin_action.plugin_id]->pfring_plugin_free_rule_mem) {
/* "Freeing rule" callback.
* Note: if you are freeing rule->plugin_data_ptr within this callback, please set it to NULL. */
plugin_registration[entry->rule.plugin_action.plugin_id]->pfring_plugin_free_rule_mem(entry);
}
if(freeing_ring) { /* tell the plugin to free global data structures */
if(entry->rule.plugin_action.plugin_id > 0
&& plugin_registration[entry->rule.plugin_action.plugin_id]
&& plugin_registration[entry->rule.plugin_action.plugin_id]->pfring_plugin_free_ring_mem) {
/* "Freeing ring" callback.
* Note: if you are freeing rule->plugin_data_ptr within this callback, please set it to NULL. */
plugin_registration[entry->rule.plugin_action.plugin_id]->pfring_plugin_free_ring_mem(entry);
}
}
#ifdef CONFIG_TEXTSEARCH
for(i = 0; (i < MAX_NUM_PATTERN) && (entry->pattern[i] != NULL); i++)
textsearch_destroy(entry->pattern[i]);
#endif
if(entry->plugin_data_ptr != NULL) {
kfree(entry->plugin_data_ptr);
entry->plugin_data_ptr = NULL;
}
if(entry->rule.internals.reflector_dev != NULL)
dev_put(entry->rule.internals.reflector_dev); /* Release device */
if(entry->rule.extended_fields.filter_plugin_id > 0) {
if(plugin_registration[entry->rule.extended_fields.filter_plugin_id]->pfring_plugin_register)
plugin_registration[entry->rule.extended_fields.filter_plugin_id]->pfring_plugin_register(0);
}
if(entry->rule.plugin_action.plugin_id > 0) {
if(plugin_registration[entry->rule.plugin_action.plugin_id]->pfring_plugin_register)
plugin_registration[entry->rule.plugin_action.plugin_id]->pfring_plugin_register(0);
}
}
/* ************************************* */
static void free_sw_filtering_hash_bucket(sw_filtering_hash_bucket * bucket)
{
if(bucket->plugin_data_ptr != NULL) {
kfree(bucket->plugin_data_ptr);
bucket->plugin_data_ptr = NULL;
}
if(bucket->rule.internals.reflector_dev != NULL)
dev_put(bucket->rule.internals.reflector_dev); /* Release device */
if(bucket->rule.plugin_action.plugin_id > 0) {
if(plugin_registration[bucket->rule.plugin_action.plugin_id]->pfring_plugin_register)
plugin_registration[bucket->rule.plugin_action.plugin_id]->pfring_plugin_register(0);
}
}
/*
NOTE
I jeopardize the get_coalesce/set_eeprom fields for my purpose
until hw filtering support is part of the kernel
*/
/* ************************************* */
static int handle_sw_filtering_hash_bucket(struct pf_ring_socket *pfr,
sw_filtering_hash_bucket * rule,
u_char add_rule)
{
int rc = -1;
u_int32_t hash_value = hash_pkt(rule->rule.vlan_id, rule->rule.proto,
rule->rule.host_peer_a, rule->rule.host_peer_b,
rule->rule.port_peer_a, rule->rule.port_peer_b)
% perfect_rules_hash_size;
if(unlikely(enable_debug))
printk("[PF_RING] %s(vlan=%u, proto=%u, "
"sip=%d.%d.%d.%d, sport=%u, dip=%d.%d.%d.%d, dport=%u, "
"hash_value=%u, add_rule=%d) called\n",
__FUNCTION__,
rule->rule.vlan_id,
rule->rule.proto, ((rule->rule.host4_peer_a >> 24) & 0xff),
((rule->rule.host4_peer_a >> 16) & 0xff),
((rule->rule.host4_peer_a >> 8) & 0xff),
((rule->rule.host4_peer_a >> 0) & 0xff),
rule->rule.port_peer_a,
((rule->rule.host4_peer_b >> 24) & 0xff),
((rule->rule.host4_peer_b >> 16) & 0xff),
((rule->rule.host4_peer_b >> 8) & 0xff),
((rule->rule.host4_peer_b >> 0) & 0xff),
rule->rule.port_peer_b, hash_value, add_rule);
if(add_rule) {
/* Checking plugins */
if(rule->rule.plugin_action.plugin_id != NO_PLUGIN_ID) {
int ret = 0;
if(rule->rule.plugin_action.plugin_id >= MAX_PLUGIN_ID)
ret = -EFAULT;
else if(plugin_registration[rule->rule.plugin_action.plugin_id] == NULL)
ret = -EFAULT;
if(ret != 0) {
if(unlikely(enable_debug))
printk("[PF_RING] Invalid action plugin [id=%d]\n",
rule->rule.plugin_action.plugin_id);
return(ret);
}
}
/* Checking reflector device */
if(rule->rule.reflector_device_name[0] != '\0') {
if((pfr->ring_netdev->dev != NULL) &&
rule->rule.rule_action != bounce_packet_and_stop_rule_evaluation &&
rule->rule.rule_action != bounce_packet_and_continue_rule_evaluation &&
(strcmp(rule->rule.reflector_device_name, pfr->ring_netdev->dev->name) == 0)) {
if(unlikely(enable_debug))
printk("[PF_RING] You cannot use as reflection device the same device on "
"which this ring is bound\n");
return(-EFAULT);
}
rule->rule.internals.reflector_dev = dev_get_by_name(
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
&init_net,
#endif
rule->rule.reflector_device_name);
if(rule->rule.internals.reflector_dev == NULL) {
printk("[PF_RING] Unable to find device %s\n",
rule->rule.reflector_device_name);
return(-EFAULT);
}
} else
rule->rule.internals.reflector_dev = NULL;
/* initialiting hash table */
if(pfr->sw_filtering_hash == NULL) {
pfr->sw_filtering_hash = (sw_filtering_hash_bucket **)
kcalloc(perfect_rules_hash_size, sizeof(sw_filtering_hash_bucket *), GFP_ATOMIC);
if(pfr->sw_filtering_hash == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() returned %d [0]\n", __FUNCTION__, -EFAULT);
return(-EFAULT);
}
if(unlikely(enable_debug))
printk("[PF_RING] %s() allocated memory\n", __FUNCTION__);
}
}
if(pfr->sw_filtering_hash == NULL) {
/* We're trying to delete a hash rule from an empty hash */
return(-EFAULT);
}
if(pfr->sw_filtering_hash[hash_value] == NULL) {
if(add_rule) {
rule->next = NULL;
pfr->sw_filtering_hash[hash_value] = rule;
rc = 0;
} else {
if(unlikely(enable_debug))
printk("[PF_RING] %s() returned %d [1]\n", __FUNCTION__, -1);
return(-1); /* Unable to find the specified rule */
}
} else {
sw_filtering_hash_bucket *prev = NULL, *bucket = pfr->sw_filtering_hash[hash_value];
while(bucket != NULL) {
if(hash_filtering_rule_match(&bucket->rule, &rule->rule)) {
if(add_rule) {
if(unlikely(enable_debug))
printk("[PF_RING] Duplicate found while adding rule: discarded\n");
return(-EEXIST);
} else {
/* We've found the bucket to delete */
if(unlikely(enable_debug))
printk("[PF_RING] %s() found a bucket to delete: removing it\n", __FUNCTION__);
if(prev == NULL)
pfr->sw_filtering_hash[hash_value] = bucket->next;
else
prev->next = bucket->next;
free_sw_filtering_hash_bucket(bucket);
kfree(bucket);
pfr->num_sw_filtering_rules--;
if(unlikely(enable_debug))
printk("[PF_RING] %s() returned %d [2]\n", __FUNCTION__, 0);
return(0);
}
} else {
prev = bucket;
bucket = bucket->next;
}
}
if(add_rule) {
/* If the flow arrived until here, then this rule is unique */
if(unlikely(enable_debug))
printk("[PF_RING] %s() no duplicate rule found: adding the rule\n", __FUNCTION__);
rule->next = pfr->sw_filtering_hash[hash_value];
pfr->sw_filtering_hash[hash_value] = rule;
rc = 0;
} else {
/* The rule we searched for has not been found */
rc = -1;
}
}
if(add_rule && rc == 0) {
pfr->num_sw_filtering_rules++;
/* Avoid immediate rule purging */
rule->rule.internals.jiffies_last_match = jiffies;
if(rule->rule.plugin_action.plugin_id > 0) {
if(plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_register)
plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_register(1);
}
}
if(unlikely(enable_debug))
printk("[PF_RING] %s() returned %d [3]\n", __FUNCTION__, rc);
return(rc);
}
/* ************************************* */
static int add_sw_filtering_rule_element(struct pf_ring_socket *pfr, sw_filtering_rule_element *rule)
{
struct list_head *ptr;
int idx = 0;
sw_filtering_rule_element *entry;
struct list_head *prev = NULL;
/* Implement an ordered add looking backwards (probably we have incremental ids) */
prev = &pfr->sw_filtering_rules;
list_for_each_prev(ptr, &pfr->sw_filtering_rules) {
entry = list_entry(ptr, sw_filtering_rule_element, list);
if(entry->rule.rule_id == rule->rule.rule_id)
return(-EEXIST);
if(entry->rule.rule_id < rule->rule.rule_id)
break;
prev = ptr; /* position where to insert the new entry after checks */
}
/* Rule checks */
if(rule->rule.extended_fields.filter_plugin_id != NO_PLUGIN_ID) {
if(rule->rule.extended_fields.filter_plugin_id >= MAX_PLUGIN_ID
|| plugin_registration[rule->rule.extended_fields.filter_plugin_id] == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] Invalid filtering plugin [id=%d]\n",
rule->rule.extended_fields.filter_plugin_id);
return(-EFAULT);
}
}
if(rule->rule.plugin_action.plugin_id != NO_PLUGIN_ID) {
if(rule->rule.plugin_action.plugin_id >= MAX_PLUGIN_ID
|| plugin_registration[rule->rule.plugin_action.plugin_id] == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] Invalid action plugin [id=%d]\n",
rule->rule.plugin_action.plugin_id);
return(-EFAULT);
}
}
if(rule->rule.reflector_device_name[0] != '\0') {
if((pfr->ring_netdev->dev != NULL) &&
rule->rule.rule_action != bounce_packet_and_stop_rule_evaluation &&
rule->rule.rule_action != bounce_packet_and_continue_rule_evaluation &&
(strcmp(rule->rule.reflector_device_name, pfr->ring_netdev->dev->name) == 0)) {
if(unlikely(enable_debug))
printk("[PF_RING] You cannot use as reflection device the same device on which this ring is bound\n");
return(-EFAULT);
}
rule->rule.internals.reflector_dev = dev_get_by_name(
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
&init_net,
#endif
rule->rule.reflector_device_name);
if(rule->rule.internals.reflector_dev == NULL) {
printk("[PF_RING] Unable to find device %s\n", rule->rule.reflector_device_name);
return(-EFAULT);
}
} else
rule->rule.internals.reflector_dev = NULL;
if(unlikely(enable_debug))
printk("[PF_RING] SO_ADD_FILTERING_RULE: About to add rule %d\n",
rule->rule.rule_id);
/* Compile pattern if present */
if(strlen(rule->rule.extended_fields.payload_pattern) > 0) {
char *pattern = rule->rule.extended_fields.payload_pattern;
printk("[PF_RING] About to compile pattern '%s'\n", pattern);
while(pattern && (idx < MAX_NUM_PATTERN)) {
char *pipe = strchr(pattern, '|');
if(pipe)
pipe[0] = '\0';
#ifdef CONFIG_TEXTSEARCH
rule->pattern[idx] = textsearch_prepare("bm" /* Boyer-Moore */
/* "kmp" = Knuth-Morris-Pratt */
, pattern, strlen(pattern),
GFP_KERNEL,
TS_AUTOLOAD
#ifdef TS_IGNORECASE
| TS_IGNORECASE
#endif
);
if(rule->pattern[idx])
printk("[PF_RING] Compiled pattern '%s' [idx=%d]\n", pattern, idx);
#endif
if(pipe)
pattern = &pipe[1], idx++;
else
break;
}
} else {
#ifdef CONFIG_TEXTSEARCH
rule->pattern[0] = NULL;
#endif
}
list_add_tail(&rule->list, prev);
pfr->num_sw_filtering_rules++;
rule->rule.internals.jiffies_last_match = jiffies; /* Avoid immediate rule purging */
if(rule->rule.extended_fields.filter_plugin_id > 0) {
if(plugin_registration[rule->rule.extended_fields.filter_plugin_id]->pfring_plugin_register)
plugin_registration[rule->rule.extended_fields.filter_plugin_id]->pfring_plugin_register(1);
}
if(rule->rule.plugin_action.plugin_id > 0) {
if(plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_register)
plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_register(1);
}
return(0);
}
/* ************************************* */
static int remove_sw_filtering_rule_element(struct pf_ring_socket *pfr, u_int16_t rule_id)
{
int rule_found = 0;
struct list_head *ptr, *tmp_ptr;
list_for_each_safe(ptr, tmp_ptr, &pfr->sw_filtering_rules) {
sw_filtering_rule_element *entry;
entry = list_entry(ptr, sw_filtering_rule_element, list);
if(entry->rule.rule_id == rule_id) {
list_del(ptr);
free_filtering_rule(entry, 0);
kfree(entry);
pfr->num_sw_filtering_rules--;
if(unlikely(enable_debug))
printk("[PF_RING] SO_REMOVE_FILTERING_RULE: rule %d has been removed\n", rule_id);
rule_found = 1;
break;
}
} /* for */
return(rule_found);
}
/* ********************************** */
static int reflect_packet(struct sk_buff *skb,
struct pf_ring_socket *pfr,
struct net_device *reflector_dev,
int displ,
rule_action_behaviour behaviour,
u_int8_t do_clone_skb)
{
if(unlikely(enable_debug))
printk("[PF_RING] reflect_packet(%s) called\n", reflector_dev->name);
if((reflector_dev != NULL)
&& (reflector_dev->flags & IFF_UP) /* Interface is up */) {
int ret;
struct sk_buff *cloned;
if(do_clone_skb) {
if((cloned = skb_clone(skb, GFP_ATOMIC)) == NULL)
return -ENOMEM;
} else
cloned = skb;
cloned->pkt_type = PACKET_OUTGOING;
cloned->dev = reflector_dev;
if(displ > 0) skb_push(cloned, displ);
skb_reset_network_header(skb);
if(behaviour == bounce_packet_and_stop_rule_evaluation ||
behaviour == bounce_packet_and_continue_rule_evaluation) {
char dst_mac[6];
/* Swap mac addresses (be aware that data is also forwarded to userspace) */
memcpy(dst_mac, cloned->data, 6);
memcpy(cloned->data, &cloned->data[6], 6);
memcpy(&cloned->data[6], dst_mac, 6);
}
/*
NOTE
dev_queue_xmit() must be called with interrupts enabled
which means it can't be called with spinlocks held.
*/
ret = dev_queue_xmit(cloned);
if(ret == NETDEV_TX_OK)
pfr->slots_info->tot_fwd_ok++;
else
pfr->slots_info->tot_fwd_notok++;
if(unlikely(enable_debug))
printk("[PF_RING] dev_queue_xmit(%s) returned %d\n", reflector_dev->name, ret);
/* yield(); */
return(ret == NETDEV_TX_OK ? 0 : -ENETDOWN);
} else
pfr->slots_info->tot_fwd_notok++;
return(-ENETDOWN);
}
/* ********************************** */
int check_perfect_rules(struct sk_buff *skb,
struct pf_ring_socket *pfr,
struct pfring_pkthdr *hdr,
int *fwd_pkt,
u_int8_t *free_parse_mem,
struct parse_buffer *parse_memory_buffer[MAX_PLUGIN_ID],
int displ, u_int *last_matched_plugin)
{
u_int hash_idx;
sw_filtering_hash_bucket *hash_bucket;
u_int8_t hash_found = 0;
hash_idx = hash_pkt_header(hdr, 0) % perfect_rules_hash_size;
hash_bucket = pfr->sw_filtering_hash[hash_idx];
while(hash_bucket != NULL) {
if(hash_bucket_match(hash_bucket, hdr, 0, 0)) {
hash_found = 1;
break;
} else
hash_bucket = hash_bucket->next;
} /* while */
if(hash_found) {
rule_action_behaviour behaviour = forward_packet_and_stop_rule_evaluation;
if((hash_bucket->rule.plugin_action.plugin_id != NO_PLUGIN_ID)
&& (hash_bucket->rule.plugin_action.plugin_id < MAX_PLUGIN_ID)
&& (plugin_registration[hash_bucket->rule.plugin_action.plugin_id] != NULL)
&& (plugin_registration[hash_bucket->rule.plugin_action.plugin_id]->
pfring_plugin_handle_skb != NULL)
) {
plugin_registration[hash_bucket->rule.plugin_action.plugin_id]
->pfring_plugin_handle_skb(pfr, NULL, hash_bucket, hdr, skb, displ, 0, /* no plugin */
&parse_memory_buffer[hash_bucket->rule.plugin_action.plugin_id],
&behaviour);
if(parse_memory_buffer[hash_bucket->rule.plugin_action.plugin_id])
*free_parse_mem = 1;
*last_matched_plugin = hash_bucket->rule.plugin_action.plugin_id;
hdr->extended_hdr.parsed_pkt.last_matched_plugin_id = hash_bucket->rule.plugin_action.plugin_id;
} else
behaviour = hash_bucket->rule.rule_action;
switch(behaviour) {
case forward_packet_and_stop_rule_evaluation:
*fwd_pkt = 1;
break;
case dont_forward_packet_and_stop_rule_evaluation:
*fwd_pkt = 0;
break;
case execute_action_and_stop_rule_evaluation:
*fwd_pkt = 0;
break;
case execute_action_and_continue_rule_evaluation:
*fwd_pkt = 0;
hash_found = 0; /* This way we also evaluate the list of rules */
break;
case forward_packet_add_rule_and_stop_rule_evaluation:
*fwd_pkt = 1;
break;
case forward_packet_del_rule_and_stop_rule_evaluation:
*fwd_pkt = 1;
break;
case reflect_packet_and_stop_rule_evaluation:
case bounce_packet_and_stop_rule_evaluation:
*fwd_pkt = 0;
reflect_packet(skb, pfr, hash_bucket->rule.internals.reflector_dev, displ, behaviour, 1);
break;
case reflect_packet_and_continue_rule_evaluation:
case bounce_packet_and_continue_rule_evaluation:
*fwd_pkt = 0;
reflect_packet(skb, pfr, hash_bucket->rule.internals.reflector_dev, displ, behaviour, 1);
hash_found = 0; /* This way we also evaluate the list of rules */
break;
}
} else {
/* printk("[PF_RING] Packet not found\n"); */
}
return(hash_found);
}
/* ********************************** */
int check_wildcard_rules(struct sk_buff *skb,
struct pf_ring_socket *pfr,
struct pfring_pkthdr *hdr,
int *fwd_pkt,
u_int8_t *free_parse_mem,
struct parse_buffer *parse_memory_buffer[MAX_PLUGIN_ID],
int displ, u_int *last_matched_plugin)
{
struct list_head *ptr, *tmp_ptr;
if(unlikely(enable_debug))
printk("[PF_RING] Entered check_wildcard_rules()\n");
read_lock(&pfr->ring_rules_lock);
list_for_each_safe(ptr, tmp_ptr, &pfr->sw_filtering_rules) {
sw_filtering_rule_element *entry;
rule_action_behaviour behaviour = forward_packet_and_stop_rule_evaluation;
entry = list_entry(ptr, sw_filtering_rule_element, list);
if(unlikely(enable_debug))
printk("[PF_RING] Checking rule %d\n", entry->rule.rule_id);
if(match_filtering_rule(pfr, entry, hdr, skb, displ,
parse_memory_buffer, free_parse_mem,
last_matched_plugin, &behaviour)) {
if(unlikely(enable_debug))
printk("[PF_RING] Packet MATCH\n");
if(unlikely(enable_debug))
printk("[PF_RING] rule_id=%d behaviour=%d\n", entry->rule.rule_id, behaviour);
hdr->extended_hdr.parsed_pkt.last_matched_rule_id = entry->rule.rule_id;
if(behaviour == forward_packet_and_stop_rule_evaluation) {
*fwd_pkt = 1;
break;
} else if(behaviour == forward_packet_add_rule_and_stop_rule_evaluation) {
sw_filtering_rule_element *rule_element = NULL;
sw_filtering_hash_bucket *hash_bucket = NULL;
u_int16_t free_rule_element_id;
int rc = 0;
*fwd_pkt = 1;
/* we have done with rule evaluation,
* now we need a write_lock to add rules */
read_unlock(&pfr->ring_rules_lock);
if(*last_matched_plugin
&& plugin_registration[*last_matched_plugin] != NULL
&& plugin_registration[*last_matched_plugin]->pfring_plugin_add_rule != NULL) {
write_lock(&pfr->ring_rules_lock);
/* retrieving the first free rule id (rules are ordered).
* (we can reuse entry, ptr, tmp_ptr because we will stop rule evaluation) */
free_rule_element_id = 0;
list_for_each_safe(ptr, tmp_ptr, &pfr->sw_filtering_rules) {
sw_filtering_rule_element *tmp_entry = list_entry(ptr, sw_filtering_rule_element, list);
if(tmp_entry->rule.rule_id == free_rule_element_id)
free_rule_element_id++;
else break; /* we found an hole */
}
/* safety check to make sure nothing is changed since the read_unlock() */
if(plugin_registration[*last_matched_plugin] != NULL
&& plugin_registration[*last_matched_plugin]->pfring_plugin_add_rule != NULL) {
rc = plugin_registration[*last_matched_plugin]->pfring_plugin_add_rule(
entry, hdr, free_rule_element_id, &rule_element, &hash_bucket,
*last_matched_plugin, &parse_memory_buffer[*last_matched_plugin]);
if(unlikely(enable_debug))
printk("pfring_plugin_add_rule() returned %d\n", rc);
if(rc == 0) {
if(hash_bucket != NULL) {
rc = handle_sw_filtering_hash_bucket(pfr, hash_bucket, 1 /* add_rule_from_plugin */);
if(rc != 0) {
kfree(hash_bucket);
hash_bucket = NULL;
}
}
if(rule_element != NULL) {
rc = add_sw_filtering_rule_element(pfr, rule_element);
if(rc != 0) {
kfree(rule_element);
rule_element = NULL;
}
}
}
}
write_unlock(&pfr->ring_rules_lock);
} else { /* No plugin defined, creating an hash rule from packet headers */
hash_bucket = (sw_filtering_hash_bucket *)kcalloc(1, sizeof(sw_filtering_hash_bucket), GFP_ATOMIC);
if(hash_bucket != NULL) {
hash_bucket->rule.vlan_id = hdr->extended_hdr.parsed_pkt.vlan_id;
hash_bucket->rule.proto = hdr->extended_hdr.parsed_pkt.l3_proto;
hash_bucket->rule.host4_peer_a = hdr->extended_hdr.parsed_pkt.ipv4_src;
hash_bucket->rule.host4_peer_b = hdr->extended_hdr.parsed_pkt.ipv4_dst;
hash_bucket->rule.port_peer_a = hdr->extended_hdr.parsed_pkt.l4_src_port;
hash_bucket->rule.port_peer_b = hdr->extended_hdr.parsed_pkt.l4_dst_port;
hash_bucket->rule.rule_action = forward_packet_and_stop_rule_evaluation;
hash_bucket->rule.reflector_device_name[0] = '\0';
hash_bucket->rule.internals.reflector_dev = NULL;
hash_bucket->rule.plugin_action.plugin_id = NO_PLUGIN_ID;
write_lock(&pfr->ring_rules_lock);
rc = handle_sw_filtering_hash_bucket(pfr, hash_bucket, 1 /* add_rule_from_plugin */);
write_unlock(&pfr->ring_rules_lock);
if(rc != 0) {
kfree(hash_bucket);
hash_bucket = NULL;
} else {
if(unlikely(enable_debug))
printk("[PF_RING] Added rule: [%d.%d.%d.%d:%d <-> %d.%d.%d.%d:%d][tot_rules=%d]\n",
((hash_bucket->rule.host4_peer_a >> 24) & 0xff), ((hash_bucket->rule.host4_peer_a >> 16) & 0xff),
((hash_bucket->rule.host4_peer_a >> 8) & 0xff), ((hash_bucket->rule.host4_peer_a >> 0) & 0xff),
hash_bucket->rule.port_peer_a, ((hash_bucket->rule.host4_peer_b >> 24) & 0xff),
((hash_bucket->rule.host4_peer_b >> 16) & 0xff), ((hash_bucket->rule.host4_peer_b >> 8) & 0xff),
((hash_bucket->rule.host4_peer_b >> 0) & 0xff), hash_bucket->rule.port_peer_b, pfr->num_sw_filtering_rules);
}
}
}
/* Negative return values are not handled by the caller, it is better to return always 0.
* Note: be careful with unlock code when moving this */
return(0);
break;
} else if(behaviour == forward_packet_del_rule_and_stop_rule_evaluation) {
u_int16_t rule_element_id;
sw_filtering_hash_bucket hash_bucket;
int rc = 0;
*fwd_pkt = 1;
if(*last_matched_plugin
&& plugin_registration[*last_matched_plugin] != NULL
&& plugin_registration[*last_matched_plugin]->pfring_plugin_del_rule != NULL) {
rc = plugin_registration[*last_matched_plugin]->pfring_plugin_del_rule(
entry, hdr, &rule_element_id, &hash_bucket,
*last_matched_plugin, &parse_memory_buffer[*last_matched_plugin]);
if(unlikely(enable_debug))
printk("pfring_plugin_del_rule() returned %d\n", rc);
if(rc > 0) {
/* we have done with rule evaluation,
* now we need a write_lock to del rules */
read_unlock(&pfr->ring_rules_lock);
if(rc | 1) {
write_lock(&pfr->ring_rules_lock);
handle_sw_filtering_hash_bucket(pfr, &hash_bucket, 0 /* del */);
write_unlock(&pfr->ring_rules_lock);
}
if(rc | 2) {
write_lock(&pfr->ring_rules_lock);
remove_sw_filtering_rule_element(pfr, rule_element_id);
write_unlock(&pfr->ring_rules_lock);
}
/* Note: be careful with unlock code when moving this */
return(0);
}
}
break;
} else if(behaviour == dont_forward_packet_and_stop_rule_evaluation) {
*fwd_pkt = 0;
break;
}
if(entry->rule.rule_action == forward_packet_and_stop_rule_evaluation) {
*fwd_pkt = 1;
break;
} else if(entry->rule.rule_action == dont_forward_packet_and_stop_rule_evaluation) {
*fwd_pkt = 0;
break;
} else if(entry->rule.rule_action == execute_action_and_stop_rule_evaluation) {
printk("[PF_RING] *** execute_action_and_stop_rule_evaluation\n");
break;
} else if(entry->rule.rule_action == execute_action_and_continue_rule_evaluation) {
/* The action has already been performed inside match_filtering_rule()
hence instead of stopping rule evaluation, the next rule
will be evaluated */
} else if((entry->rule.rule_action == reflect_packet_and_stop_rule_evaluation)
|| (entry->rule.rule_action == bounce_packet_and_stop_rule_evaluation)) {
*fwd_pkt = 0;
reflect_packet(skb, pfr, entry->rule.internals.reflector_dev, displ, entry->rule.rule_action, 1);
break;
} else if((entry->rule.rule_action == reflect_packet_and_continue_rule_evaluation)
|| (entry->rule.rule_action == bounce_packet_and_continue_rule_evaluation)) {
*fwd_pkt = 1;
reflect_packet(skb, pfr, entry->rule.internals.reflector_dev, displ, entry->rule.rule_action, 1);
}
} else {
if(unlikely(enable_debug))
printk("[PF_RING] Packet not matched\n");
}
} /* for */
read_unlock(&pfr->ring_rules_lock);
return(0);
}
/* ********************************** */
/*
This code has been partially copied from af_packet.c
Return code
1: pass the filter
0: this packet has to be dropped
*/
int bpf_filter_skb(struct sk_buff *skb,
struct pf_ring_socket *pfr,
int displ) {
if(pfr->bpfFilter != NULL) {
unsigned res = 1;
u8 *skb_head = skb->data;
int skb_len = skb->len;
if(displ > 0) {
/*
Move off the offset (we modify the packet for the sake of filtering)
thus we need to restore it later on
NOTE: displ = 0 | skb_network_offset(skb)
*/
skb_push(skb, displ);
}
rcu_read_lock_bh();
res = sk_run_filter(skb, pfr->bpfFilter->insns
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38))
, pfr->bpfFilter->len
#endif
);
rcu_read_unlock_bh();
/* Restore */
if(displ > 0)
skb->data = skb_head, skb->len = skb_len;
if(res == 0) {
/* Filter failed */
if(unlikely(enable_debug))
printk("[PF_RING] %s(skb): Filter failed [len=%d][tot=%llu]"
"[insert_off=%d][pkt_type=%d][cloned=%d]\n", __FUNCTION__,
(int)skb->len, pfr->slots_info->tot_pkts,
pfr->slots_info->insert_off, skb->pkt_type,
skb->cloned);
return(0);
}
}
return(1);
}
/* ********************************** */
u_int32_t default_rehash_rss_func(struct sk_buff *skb, struct pfring_pkthdr *hdr) {
return hash_pkt_header(hdr, 0);
}
/* ********************************** */
/*
* Add the specified skb to the ring so that userland apps/plugins
* can use the packet.
*
* Return code:
* 0 packet successully processed but no room in the ring
* 1 packet successully processed and available room in the ring
* -1 processing error (e.g. the packet has been discarded by
* filter, ring not active...)
*
*/
static int add_skb_to_ring(struct sk_buff *skb,
u_int8_t real_skb,
struct pf_ring_socket *pfr,
struct pfring_pkthdr *hdr,
int is_ip_pkt, int displ,
u_int8_t channel_id,
u_int8_t num_rx_channels,
int *clone_id)
{
int fwd_pkt = 0, rc = 0;
struct parse_buffer *parse_memory_buffer[MAX_PLUGIN_ID] = { NULL };
u_int8_t free_parse_mem = 0;
u_int last_matched_plugin = 0;
u_int8_t hash_found = 0;
if(pfr && pfr->rehash_rss != NULL && skb->dev)
channel_id = pfr->rehash_rss(skb, hdr) % get_num_rx_queues(skb->dev);
/* This is a memory holder for storing parsed packet information
that will then be freed when the packet has been handled
*/
if(unlikely(enable_debug))
printk("[PF_RING] --> %s(len=%d) [channel_id=%d/%d][active=%d][%s]\n",
__FUNCTION__,
hdr->len, channel_id, num_rx_channels,
pfr->ring_active, pfr->ring_netdev->dev->name);
if((!pfring_enabled) || ((!pfr->ring_active) && (pfr->master_ring == NULL)))
return(-1);
pfr->num_rx_channels = num_rx_channels; /* Constantly updated */
hdr->extended_hdr.parsed_pkt.last_matched_rule_id = (u_int16_t)-1;
atomic_inc(&pfr->num_ring_users);
/* [1] BPF Filtering */
if(pfr->bpfFilter != NULL) {
if(bpf_filter_skb(skb, pfr, displ) == 0) {
atomic_dec(&pfr->num_ring_users);
return(-1);
}
}
if(unlikely(enable_debug)) {
printk("[PF_RING] %s: [%s][displ=%d][len=%d][caplen=%d]"
"[is_ip_pkt=%d][%d -> %d][%p/%p]\n", __FUNCTION__,
(skb->dev->name != NULL) ? skb->dev->name : "<NULL>",
displ, hdr->len, hdr->caplen,
is_ip_pkt, hdr->extended_hdr.parsed_pkt.l4_src_port,
hdr->extended_hdr.parsed_pkt.l4_dst_port, skb->dev,
pfr->ring_netdev);
}
/* Extensions */
fwd_pkt = pfr->sw_filtering_rules_default_accept_policy;
/* printk("[PF_RING] rules_default_accept_policy: [fwd_pkt=%d]\n", fwd_pkt); */
/* ************************** */
/* [2] Filter packet according to rules */
/* [2.1] Search the hash */
if(pfr->sw_filtering_hash != NULL)
hash_found = check_perfect_rules(skb, pfr, hdr, &fwd_pkt, &free_parse_mem,
parse_memory_buffer, displ, &last_matched_plugin);
if(unlikely(enable_debug))
printk("[PF_RING] check_perfect_rules() returned %d\n", hash_found);
/* [2.2] Search rules list */
if((!hash_found) && (pfr->num_sw_filtering_rules > 0)) {
if(check_wildcard_rules(skb, pfr, hdr, &fwd_pkt, &free_parse_mem,
parse_memory_buffer, displ, &last_matched_plugin) != 0)
fwd_pkt = 0;
if(unlikely(enable_debug))
printk("[PF_RING] check_wildcard_rules() completed: fwd_pkt=%d\n", fwd_pkt);
}
if(unlikely(enable_debug))
printk("[PF_RING] %s() verdict: fwd_pkt=%d [default=%u]\n", __FUNCTION__,
fwd_pkt, pfr->sw_filtering_rules_default_accept_policy);
if(fwd_pkt) {
/* We accept the packet: it needs to be queued */
if(unlikely(enable_debug))
printk("[PF_RING] Forwarding packet to userland\n");
/* [3] Packet sampling */
if(pfr->sample_rate > 1) {
write_lock(&pfr->ring_index_lock);
pfr->slots_info->tot_pkts++;
if(pfr->pktToSample <= 1) {
pfr->pktToSample = pfr->sample_rate;
} else {
pfr->pktToSample--;
if(unlikely(enable_debug))
printk("[PF_RING] %s(skb): sampled packet [len=%d]"
"[tot=%llu][insert_off=%d][pkt_type=%d][cloned=%d]\n",
__FUNCTION__,
(int)skb->len, pfr->slots_info->tot_pkts,
pfr->slots_info->insert_off, skb->pkt_type,
skb->cloned);
write_unlock(&pfr->ring_index_lock);
if(free_parse_mem)
free_parse_memory(parse_memory_buffer);
atomic_dec(&pfr->num_ring_users);
return(-1);
}
write_unlock(&pfr->ring_index_lock);
}
if(hdr->caplen > 0) {
/* Copy the packet into the bucket */
int offset;
void *mem;
if((last_matched_plugin > 0)
&& (parse_memory_buffer[last_matched_plugin] != NULL)) {
offset = hdr->extended_hdr.parsed_header_len = parse_memory_buffer[last_matched_plugin]->mem_len;
hdr->extended_hdr.parsed_pkt.last_matched_plugin_id = last_matched_plugin;
if(unlikely(enable_debug))
printk("[PF_RING] --> [last_matched_plugin = %d][extended_hdr.parsed_header_len=%d]\n",
last_matched_plugin, hdr->extended_hdr.parsed_header_len);
if(offset > pfr->bucket_len)
offset = hdr->extended_hdr.parsed_header_len = pfr->bucket_len;
mem = parse_memory_buffer[last_matched_plugin]->mem;
} else
offset = 0, hdr->extended_hdr.parsed_header_len = 0, mem = NULL;
rc = add_pkt_to_ring(skb, real_skb, pfr, hdr, displ, channel_id, offset, mem, clone_id);
}
}
if(unlikely(enable_debug))
printk("[PF_RING] [pfr->slots_info->insert_off=%d]\n",
pfr->slots_info->insert_off);
if(free_parse_mem)
free_parse_memory(parse_memory_buffer);
if(unlikely(enable_debug))
printk("[PF_RING] %s() returned %d\n", __FUNCTION__, rc);
atomic_dec(&pfr->num_ring_users);
return(rc);
}
/* ********************************** */
static int hash_pkt_cluster(ring_cluster_element *cluster_ptr,
struct pfring_pkthdr *hdr,
u_int16_t ip_id, u_int8_t first_fragment, u_int8_t second_fragment)
{
int idx;
if(unlikely(enable_debug))
printk("[PF_RING] %s(ip_id=%02X, first_fragment=%d, second_fragment=%d)\n",
__FUNCTION__, ip_id, first_fragment, second_fragment);
if(second_fragment) {
if((idx = get_fragment_app_id(hdr->extended_hdr.parsed_pkt.ipv4_src,
hdr->extended_hdr.parsed_pkt.ipv4_dst,
ip_id)) < 0)
return(idx);
} else {
switch(cluster_ptr->cluster.hashing_mode) {
case cluster_round_robin:
idx = cluster_ptr->cluster.hashing_id++;
break;
case cluster_per_flow_2_tuple:
idx = hash_pkt_header(hdr, HASH_PKT_HDR_RECOMPUTE | HASH_PKT_HDR_MASK_PORT | HASH_PKT_HDR_MASK_PROTO | HASH_PKT_HDR_MASK_VLAN);
break;
case cluster_per_flow_4_tuple:
idx = hash_pkt_header(hdr, HASH_PKT_HDR_RECOMPUTE | HASH_PKT_HDR_MASK_PROTO | HASH_PKT_HDR_MASK_VLAN);
break;
case cluster_per_flow_tcp_5_tuple:
if(((hdr->extended_hdr.parsed_pkt.tunnel.tunnel_id == NO_TUNNEL_ID) ?
hdr->extended_hdr.parsed_pkt.l3_proto : hdr->extended_hdr.parsed_pkt.tunnel.tunneled_proto) == IPPROTO_TCP)
idx = hash_pkt_header(hdr, HASH_PKT_HDR_RECOMPUTE | HASH_PKT_HDR_MASK_VLAN); /* 5 tuple for TCP */
else
idx = hash_pkt_header(hdr, HASH_PKT_HDR_RECOMPUTE | HASH_PKT_HDR_MASK_PORT | HASH_PKT_HDR_MASK_PROTO | HASH_PKT_HDR_MASK_VLAN); /* 2 tuple for non-TCP */
break;
case cluster_per_flow_5_tuple:
idx = hash_pkt_header(hdr, HASH_PKT_HDR_RECOMPUTE | HASH_PKT_HDR_MASK_VLAN);
break;
case cluster_per_flow:
default:
idx = hash_pkt_header(hdr, 0);
break;
}
if (idx < 0) idx = -idx; /* idx must be positive */
if(first_fragment) {
add_fragment_app_id(hdr->extended_hdr.parsed_pkt.ipv4_src,
hdr->extended_hdr.parsed_pkt.ipv4_dst,
ip_id, idx);
}
}
return(idx % cluster_ptr->cluster.num_cluster_elements);
}
/* ********************************** */
static int register_plugin(struct pfring_plugin_registration *reg)
{
if(reg == NULL)
return(-1);
if(unlikely(enable_debug))
printk("[PF_RING] --> register_plugin(%d)\n", reg->plugin_id);
if((reg->plugin_id >= MAX_PLUGIN_ID) || (reg->plugin_id == 0))
return(-EINVAL);
if(plugin_registration[reg->plugin_id] != NULL)
return(-EINVAL); /* plugin already registered */
if(reg->pfring_plugin_register == NULL)
printk("[PF_RING] WARNING: plugin %d does not implement handle pfring_plugin_register: please fix it\n",
reg->plugin_id);
plugin_registration[reg->plugin_id] = reg;
plugin_registration_size++;
max_registered_plugin_id = max_val(max_registered_plugin_id, reg->plugin_id);
printk("[PF_RING] registered plugin [id=%d][max=%d][%p]\n",
reg->plugin_id, max_registered_plugin_id,
plugin_registration[reg->plugin_id]);
try_module_get(THIS_MODULE); /* Increment usage count */
return(0);
}
/* ********************************** */
int unregister_plugin(u_int16_t pfring_plugin_id)
{
int i;
if(pfring_plugin_id >= MAX_PLUGIN_ID)
return(-EINVAL);
if(plugin_registration[pfring_plugin_id] == NULL)
return(-EINVAL); /* plugin not registered */
else {
struct sock *sk;
u_int32_t last_list_idx;
plugin_registration[pfring_plugin_id] = NULL;
plugin_registration_size--;
sk = (struct sock*)lockless_list_get_first(&ring_table, &last_list_idx);
while(sk != NULL) {
struct pf_ring_socket *pfr;
struct list_head *ptr, *tmp_ptr;
pfr = ring_sk(sk);
list_for_each_safe(ptr, tmp_ptr, &pfr->sw_filtering_rules) {
sw_filtering_rule_element *rule;
rule = list_entry(ptr, sw_filtering_rule_element, list);
if(rule->rule.plugin_action.plugin_id == pfring_plugin_id) {
ring_read_lock();
rule->rule.plugin_action.plugin_id = NO_PLUGIN_ID;
if(plugin_registration[pfring_plugin_id]
&& plugin_registration[pfring_plugin_id]->pfring_plugin_free_ring_mem) {
/* Custom free function */
plugin_registration[pfring_plugin_id]->pfring_plugin_free_ring_mem(rule);
}
if(rule->plugin_data_ptr != NULL) {
kfree(rule->plugin_data_ptr);
rule->plugin_data_ptr = NULL;
}
ring_read_unlock();
}
}
sk = (struct sock*)lockless_list_get_next(&ring_table, &last_list_idx);
}
for(i = MAX_PLUGIN_ID - 1; i > 0; i--) {
if(plugin_registration[i] != NULL) {
max_registered_plugin_id = i;
break;
}
}
printk("[PF_RING] unregistered plugin [id=%d][max=%d]\n",
pfring_plugin_id, max_registered_plugin_id);
module_put(THIS_MODULE); /* Decrement usage count */
return(0);
}
}
/* ********************************** */
static inline int is_valid_skb_direction(packet_direction direction, u_char recv_packet) {
switch(direction) {
case rx_and_tx_direction:
return(1);
case rx_only_direction:
if(recv_packet) return(1);
break;
case tx_only_direction:
if(!recv_packet) return(1);
break;
}
return(0);
}
/* ********************************** */
static struct sk_buff* defrag_skb(struct sk_buff *skb,
u_int16_t displ,
struct pfring_pkthdr *hdr,
int *defragmented_skb) {
struct sk_buff *cloned = NULL;
struct iphdr *iphdr = NULL;
struct sk_buff *skk = NULL;
skb_set_network_header(skb, hdr->extended_hdr.parsed_pkt.offset.l3_offset - displ);
skb_reset_transport_header(skb);
iphdr = ip_hdr(skb);
if(iphdr && (iphdr->version == 4)) {
if(unlikely(enable_debug))
printk("[PF_RING] [version=%d] %X -> %X\n",
iphdr->version, iphdr->saddr, iphdr->daddr);
if(iphdr->frag_off & htons(IP_MF | IP_OFFSET)) {
if((cloned = skb_clone(skb, GFP_ATOMIC)) != NULL) {
int vlan_offset = 0;
if(displ && (hdr->extended_hdr.parsed_pkt.offset.l3_offset - displ) /*VLAN*/) {
vlan_offset = 4;
skb_pull(cloned, vlan_offset);
displ += vlan_offset;
}
skb_set_network_header(cloned, hdr->extended_hdr.parsed_pkt.offset.l3_offset - displ);
skb_reset_transport_header(cloned);
iphdr = ip_hdr(cloned);
if(unlikely(enable_debug)) {
int ihl, end;
int offset = ntohs(iphdr->frag_off);
offset &= IP_OFFSET;
offset <<= 3;
ihl = iphdr->ihl * 4;
end = offset + cloned->len - ihl;
printk("[PF_RING] There is a fragment to handle [proto=%d][frag_off=%u]"
"[ip_id=%u][ip_hdr_len=%d][end=%d][network_header=%d][displ=%d]\n",
iphdr->protocol, offset,
ntohs(iphdr->id),
ihl, end,
hdr->extended_hdr.parsed_pkt.offset.l3_offset - displ, displ);
}
skk = ring_gather_frags(cloned);
if(skk != NULL) {
u_int16_t ip_id;
u_int8_t first_fragment, second_fragment;
if(unlikely(enable_debug)) {
unsigned char *c;
printk("[PF_RING] IP reasm on new skb [skb_len=%d]"
"[head_len=%d][nr_frags=%d][frag_list=%p]\n",
(int)skk->len,
skb_headlen(skk),
skb_shinfo(skk)->nr_frags,
skb_shinfo(skk)->frag_list);
c = skb_network_header(skk);
printk("[PF_RING] IP header "
"%X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X\n",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9],
c[10], c[11], c[12], c[13], c[14], c[15], c[16], c[17], c[18], c[19]);
c -= displ;
printk("[PF_RING] L2 header "
"%X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X\n",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9],
c[10], c[11], c[12], c[13], c[14], c[15], c[16], c[17]);
}
if(vlan_offset > 0) {
skb_push(skk, vlan_offset);
displ -= vlan_offset;
}
skb = skk;
*defragmented_skb = 1;
hdr->len = hdr->caplen = skb->len + displ;
parse_pkt(skb, 1, displ, hdr, &ip_id, &first_fragment, &second_fragment);
} else {
//printk("[PF_RING] Fragment queued \n");
return(NULL); /* mask rcvd fragments */
}
}
} else {
if(unlikely(enable_debug))
printk("[PF_RING] Do not seems to be a fragmented ip_pkt[iphdr=%p]\n",
iphdr);
}
} else if(iphdr && iphdr->version == 6) {
/* Re-assembling fragmented IPv6 packets has not been
implemented. Probability of observing fragmented IPv6
packets is extremely low. */
if(unlikely(enable_debug))
printk("[PF_RING] Re-assembling fragmented IPv6 packet hs not been implemented\n");
}
return(skb);
}
/* ********************************** */
/*
PF_RING main entry point
Return code
0 - Packet not handled
1 - Packet handled successfully
2 - Packet handled successfully but unable to copy it into
the ring due to lack of available space
*/
static int skb_ring_handler(struct sk_buff *skb,
u_int8_t recv_packet,
u_int8_t real_skb /* 1=real skb, 0=faked skb */,
u_int8_t *skb_reference_in_use, /* This return value is set to 1 in case
the input skb is in use by PF_RING and thus
the caller should NOT free it */
u_int32_t channel_id,
u_int32_t num_rx_channels)
{
struct sock *skElement;
int rc = 0, is_ip_pkt = 0, room_available = 0, clone_id = 0;
struct pfring_pkthdr hdr;
int displ;
int defragmented_skb = 0;
struct sk_buff *skk = NULL;
struct sk_buff *orig_skb = skb;
u_int32_t last_list_idx;
struct sock *sk;
struct pf_ring_socket *pfr;
ring_cluster_element *cluster_ptr;
u_int16_t ip_id = 0;
u_int8_t first_fragment = 0, second_fragment = 0;
*skb_reference_in_use = 0;
/* Check if there's at least one PF_RING ring defined that
could receive the packet: if none just stop here */
if(ring_table_size == 0)
return(0);
// prefetch(skb->data);
if(recv_packet) {
if(real_skb)
displ = skb->dev->hard_header_len;
else
displ = 0;
} else
displ = 0;
#if 0
if(unlikely(enable_debug)) {
if(skb->dev && (skb->dev->ifindex < MAX_NUM_IFIDX))
printk("[PF_RING] (1) %s(): [%d rings on %s (idx=%d), %d 'any' rings]\n", __FUNCTION__
num_rings_per_device[skb->dev->ifindex], skb->dev->name, skb->dev->ifindex, num_any_rings);
}
#endif
if((num_any_rings == 0)
&& (skb->dev
&& (skb->dev->ifindex < MAX_NUM_IFIDX)
&& (num_rings_per_device[skb->dev->ifindex] == 0))) {
return(0);
}
#ifdef PROFILING
uint64_t rdt = _rdtsc(), rdt1, rdt2;
#endif
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
if(channel_id == UNKNOWN_RX_CHANNEL)
channel_id = skb_get_rx_queue(skb);
#endif
if(channel_id > MAX_NUM_RX_CHANNELS) channel_id = 0 /* MAX_NUM_RX_CHANNELS */;
if((!skb) /* Invalid skb */ ||((!enable_tx_capture) && (!recv_packet))) {
/*
An outgoing packet is about to be sent out
but we decided not to handle transmitted
packets.
*/
return(0);
}
if(unlikely(enable_debug)) {
struct timeval tv;
skb_get_timestamp(skb, &tv);
printk("[PF_RING] %s(): [skb=%p][%u.%u][len=%d][dev=%s][csum=%u]\n", __FUNCTION__,
skb, (unsigned int)tv.tv_sec, (unsigned int)tv.tv_usec,
skb->len, skb->dev == NULL ? "<NULL>" : skb->dev->name,
skb->csum);
}
#ifdef PROFILING
rdt1 = _rdtsc();
#endif
memset(&hdr, 0, sizeof(hdr));
hdr.ts.tv_sec = 0;
hdr.len = hdr.caplen = skb->len + displ;
#if 0 /* safety check (this leads to wrong numbers with GSO) */
hdr.len = hdr.caplen = min(skb->len + displ,
skb->dev->mtu /* 1500 */ + skb->dev->hard_header_len /* 14 */ + 4 /* VLAN header */);
#endif
if(quick_mode) {
pfr = device_rings[skb->dev->ifindex][channel_id];
hdr.extended_hdr.parsed_header_len = 0;
if(pfr && pfr->rehash_rss != NULL && skb->dev) {
parse_pkt(skb, real_skb, displ, &hdr, &ip_id, &first_fragment, &second_fragment);
channel_id = pfr->rehash_rss(skb, &hdr) % get_num_rx_queues(skb->dev);
pfr = device_rings[skb->dev->ifindex][channel_id];
}
if(unlikely(enable_debug)) printk("[PF_RING] Expecting channel %d [%p]\n", channel_id, pfr);
if((pfr != NULL) && is_valid_skb_direction(pfr->direction, recv_packet)) {
/* printk("==>>> [%d][%d]\n", skb->dev->ifindex, channel_id); */
rc = 1;
room_available |= copy_data_to_ring(real_skb ? skb : NULL, pfr, &hdr,
displ, 0, NULL, NULL, 0, real_skb ? &clone_id : NULL);
}
} else {
is_ip_pkt = parse_pkt(skb, real_skb, displ, &hdr,
&ip_id, &first_fragment, &second_fragment);
if(enable_ip_defrag) {
if(real_skb
&& is_ip_pkt
&& recv_packet) {
skb = skk = defrag_skb(skb, displ, &hdr, &defragmented_skb);
if(skb == NULL) {
return(0);
}
}
}
if(skb->dev)
hdr.extended_hdr.if_index = skb->dev->ifindex;
else
hdr.extended_hdr.if_index = UNKNOWN_INTERFACE;
hdr.extended_hdr.tx.bounce_interface = UNKNOWN_INTERFACE;
hdr.extended_hdr.tx.reserved = NULL;
hdr.extended_hdr.rx_direction = recv_packet;
/* [1] Check unclustered sockets */
sk = (struct sock*)lockless_list_get_first(&ring_table, &last_list_idx);
if(unlikely(enable_debug))
printk("[PF_RING] -> lockless_list_get_first=%p [num elements=%u][last_list_idx=%u]\n",
sk, ring_table.num_elements, (unsigned int)last_list_idx);
while(sk != NULL) {
pfr = ring_sk(sk);
if((pfr != NULL)
&& (
test_bit(skb->dev->ifindex, pfr->netdev_mask)
|| (pfr->ring_netdev == &any_device_element) /* Socket bound to 'any' */
|| ((skb->dev->flags & IFF_SLAVE) && (pfr->ring_netdev->dev == skb->dev->master)))
&& (pfr->ring_netdev != &none_device_element) /* Not a dummy socket bound to "none" */
&& (pfr->cluster_id == 0 /* No cluster */ )
&& (pfr->ring_slots != NULL)
&& is_valid_skb_direction(pfr->direction, recv_packet)
) {
/* We've found the ring where the packet can be stored */
int old_len = hdr.len, old_caplen = hdr.caplen; /* Keep old lenght */
room_available |= add_skb_to_ring(skb, real_skb, pfr, &hdr, is_ip_pkt,
displ, channel_id, num_rx_channels, &clone_id);
hdr.len = old_len, hdr.caplen = old_caplen;
rc = 1; /* Ring found: we've done our job */
}
sk = (struct sock*)lockless_list_get_next(&ring_table, &last_list_idx);
if(unlikely(enable_debug))
printk("[PF_RING] -> lockless_list_get_next=%p [num elements=%u][last_list_idx=%u]\n",
sk, ring_table.num_elements, (unsigned int)last_list_idx);
}
cluster_ptr = (ring_cluster_element*)lockless_list_get_first(&ring_cluster_list, &last_list_idx);
/* [2] Check socket clusters */
while(cluster_ptr != NULL) {
struct pf_ring_socket *pfr;
if(cluster_ptr->cluster.num_cluster_elements > 0) {
u_short num_iterations;
int skb_hash = hash_pkt_cluster(cluster_ptr, &hdr,
ip_id, first_fragment, second_fragment);
/*
If the hashing value is negative, then this is a fragment that we are
not able to reassemble and thus we discard as the application has no
idea to what do with it
*/
if(skb_hash >= 0) {
/*
We try to add the packet to the right cluster
element, but if we're working in round-robin and this
element is full, we try to add this to the next available
element. If none with at least a free slot can be found
then we give up :-(
*/
for(num_iterations = 0;
num_iterations < cluster_ptr->cluster.num_cluster_elements;
num_iterations++) {
skElement = cluster_ptr->cluster.sk[skb_hash];
if(skElement != NULL) {
pfr = ring_sk(skElement);
if((pfr != NULL)
&& (pfr->ring_slots != NULL)
&& (test_bit(skb->dev->ifindex, pfr->netdev_mask)
|| ((skb->dev->flags & IFF_SLAVE)
&& (pfr->ring_netdev->dev == skb->dev->master)))
&& is_valid_skb_direction(pfr->direction, recv_packet)
) {
if(check_free_ring_slot(pfr) /* Not full */) {
/* We've found the ring where the packet can be stored */
int old_len = hdr.len, old_caplen = hdr.caplen; /* Keep old lenght */
room_available |= add_skb_to_ring(skb, real_skb, pfr, &hdr, is_ip_pkt,
displ, channel_id, num_rx_channels, &clone_id);
hdr.len = old_len, hdr.caplen = old_caplen;
rc = 1; /* Ring found: we've done our job */
break;
} else if((cluster_ptr->cluster.hashing_mode != cluster_round_robin)
/* We're the last element of the cluster so no further cluster element to check */
|| ((num_iterations + 1) > cluster_ptr->cluster.num_cluster_elements)) {
pfr->slots_info->tot_pkts++, pfr->slots_info->tot_lost++;
}
}
}
if(cluster_ptr->cluster.hashing_mode != cluster_round_robin)
break;
else
skb_hash = (skb_hash + 1) % cluster_ptr->cluster.num_cluster_elements;
}
} else
num_cluster_discarded_fragments++;
}
cluster_ptr = (ring_cluster_element*)lockless_list_get_next(&ring_cluster_list, &last_list_idx);
} /* Clustering */
#ifdef PROFILING
rdt1 = _rdtsc() - rdt1;
rdt2 = _rdtsc();
#endif
/* Fragment handling */
if(skk != NULL && defragmented_skb)
kfree_skb(skk);
}
if(clone_id > 0)
*skb_reference_in_use = 1;
if(rc == 1 /* Ring found */) {
if(transparent_mode == driver2pf_ring_non_transparent /* 2 */) {
/* transparent mode = 2 */
if(recv_packet && real_skb) {
if(unlikely(enable_debug))
printk("[PF_RING] kfree_skb()\n");
if(clone_id == 0) /* We have not used the orig_skb */
kfree_skb(orig_skb); /* Free memory */
}
}
// else { /* transparent mode = 0 or 1 */
// CHECK: I commented the line below as I have no idea why it has been put there
// rc = 0;
//}
#if 0
printk("[PF_RING] %s() [clone_id=%d][recv_packet=%d][real_skb=%d]\n",
__FUNCTION__, clone_id, recv_packet, real_skb);
#endif
}
#ifdef PROFILING
rdt2 = _rdtsc() - rdt2;
rdt = _rdtsc() - rdt;
if(unlikely(enable_debug))
printk("[PF_RING] # cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
(int)rdt, rdt - rdt1,
(int)((float)((rdt - rdt1) * 100) / (float)rdt), rdt2,
(int)((float)(rdt2 * 100) / (float)rdt));
#endif
if((rc == 1) && (room_available == 0))
rc = 2;
if(unlikely(enable_debug)) printk("[PF_RING] (4) %s(): returned %d\n", __FUNCTION__, rc);
return(rc); /* 0 = packet not handled */
}
/* ********************************** */
struct sk_buff skb;
static int buffer_ring_handler(struct net_device *dev, char *data, int len)
{
u_int8_t skb_reference_in_use;
if(unlikely(enable_debug))
printk("[PF_RING] buffer_ring_handler: [dev=%s][len=%d]\n",
dev->name == NULL ? "<NULL>" : dev->name, len);
skb.dev = dev, skb.len = len, skb.data = data, skb.data_len = len;
/* BD - API changed for time keeping */
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
skb.stamp.tv_sec = 0;
#elif(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
skb.tstamp.off_sec = 0;
#else
skb.tstamp.tv64 = 0;
#endif
return(skb_ring_handler(&skb, 1, 0 /* fake skb */,
&skb_reference_in_use,
UNKNOWN_RX_CHANNEL,
UNKNOWN_NUM_RX_CHANNELS));
}
/* ********************************** */
static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
, struct net_device *orig_dev
#endif
)
{
int rc;
u_int8_t skb_reference_in_use = 0;
if(skb->pkt_type != PACKET_LOOPBACK
&& (transparent_mode == standard_linux_path || skb->pkt_type == PACKET_OUTGOING)) {
rc = skb_ring_handler(skb,
(skb->pkt_type == PACKET_OUTGOING) ? 0 : 1,
1 /* real_skb */, &skb_reference_in_use,
UNKNOWN_RX_CHANNEL, UNKNOWN_NUM_RX_CHANNELS);
} else
rc = 0;
if(!skb_reference_in_use) {
/*
This packet has been received by Linux through its standard
mechanisms (no PF_RING transparent/TNAPI)
*/
kfree_skb(skb);
}
return(rc);
}
/* ********************************** */
void register_device_handler(void) {
if(transparent_mode != standard_linux_path && !enable_tx_capture)
return;
prot_hook.func = packet_rcv;
prot_hook.type = htons(ETH_P_ALL);
dev_add_pack(&prot_hook);
}
/* ********************************** */
void unregister_device_handler(void) {
if(transparent_mode != standard_linux_path && !enable_tx_capture)
return;
dev_remove_pack(&prot_hook); /* Remove protocol hook */
}
/* ********************************** */
static int ring_create(
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
struct net *net,
#endif
struct socket *sock, int protocol
#if((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,33)) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)) && defined(REDHAT_PATCHED_KERNEL)))
, int kern
#endif
)
{
struct sock *sk;
struct pf_ring_socket *pfr;
int err = -ENOMEM;
if(unlikely(enable_debug))
printk("[PF_RING] %s()\n", __FUNCTION__);
/* Are you root, superuser or so ? */
if(!capable(CAP_NET_ADMIN))
return -EPERM;
if(sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
if(protocol != htons(ETH_P_ALL))
return -EPROTONOSUPPORT;
#if(LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
#else
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24))
// BD: API changed in 2.6.12, ref:
// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
#else
sk = sk_alloc(net, PF_INET, GFP_KERNEL, &ring_proto);
#endif
#endif
if(sk == NULL)
goto out;
sock->ops = &ring_ops;
sock_init_data(sock, sk);
#if(LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
sk_set_owner(sk, THIS_MODULE);
#endif
ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
if(!(pfr = ring_sk(sk)))
goto free_sk;
memset(pfr, 0, sizeof(*pfr));
pfr->ring_shutdown = 0;
pfr->ring_active = 0; /* We activate as soon as somebody waits for packets */
pfr->num_rx_channels = UNKNOWN_NUM_RX_CHANNELS;
pfr->channel_id_mask = RING_ANY_CHANNEL;
pfr->bucket_len = DEFAULT_BUCKET_LEN;
pfr->poll_num_pkts_watermark = DEFAULT_MIN_PKT_QUEUED;
pfr->add_packet_to_ring = add_packet_to_ring;
pfr->add_raw_packet_to_ring = add_raw_packet_to_ring;
pfr->header_len = quick_mode ? short_pkt_header : long_pkt_header;
init_waitqueue_head(&pfr->ring_slots_waitqueue);
rwlock_init(&pfr->ring_index_lock);
rwlock_init(&pfr->ring_rules_lock);
atomic_set(&pfr->num_ring_users, 0);
INIT_LIST_HEAD(&pfr->sw_filtering_rules);
INIT_LIST_HEAD(&pfr->hw_filtering_rules);
INIT_LIST_HEAD(&pfr->locked_objects_list);
pfr->master_ring = NULL;
pfr->ring_netdev = &none_device_element; /* Unbound socket */
pfr->sample_rate = 1; /* No sampling */
sk->sk_family = PF_RING;
sk->sk_destruct = ring_sock_destruct;
pfr->ring_id = atomic_inc_return(&ring_id_serial);
rwlock_init(&pfr->tx.consume_tx_packets_lock);
pfr->tx.enable_tx_with_bounce = 0;
pfr->tx.last_tx_dev_idx = UNKNOWN_INTERFACE, pfr->tx.last_tx_dev = NULL;
if(ring_insert(sk) == -1)
goto free_pfr;
ring_proc_add(pfr);
if(unlikely(enable_debug))
printk("[PF_RING] %s(): created\n", __FUNCTION__);
return(0);
free_pfr:
kfree(ring_sk(sk));
free_sk:
sk_free(sk);
out:
return err;
}
/* ************************************* */
static int ring_proc_virtual_filtering_dev_get_info(char *buf, char **start, off_t offset,
int len, int *unused, void *data)
{
int rlen = 0;
if(data != NULL) {
virtual_filtering_device_info *info = (virtual_filtering_device_info*)data;
char *dev_family = "???";
switch(info->device_type) {
case standard_nic_family: dev_family = "Standard NIC"; break;
case intel_82599_family: dev_family = "Intel 82599"; break;
}
rlen = sprintf(buf, "Name: %s\n", info->device_name);
rlen += sprintf(buf+rlen, "Family: %s\n", dev_family);
}
return rlen;
}
/* ************************************* */
static virtual_filtering_device_element* add_virtual_filtering_device(struct sock *sock,
virtual_filtering_device_info *info)
{
virtual_filtering_device_element *elem;
struct list_head *ptr, *tmp_ptr;
if(unlikely(enable_debug))
printk("[PF_RING] --> add_virtual_filtering_device(%s)\n", info->device_name);
if(info == NULL)
return(NULL);
/* Check if the same entry is already present */
write_lock(&virtual_filtering_lock);
list_for_each_safe(ptr, tmp_ptr, &virtual_filtering_devices_list) {
virtual_filtering_device_element *filtering_ptr = list_entry(ptr, virtual_filtering_device_element, list);
if(strcmp(filtering_ptr->info.device_name, info->device_name) == 0) {
write_unlock(&virtual_filtering_lock);
return(NULL); /* Entry alredy present */
}
}
elem = kmalloc(sizeof(virtual_filtering_device_element), GFP_KERNEL);
if(elem == NULL)
return(NULL);
else {
memcpy(&elem->info, info, sizeof(virtual_filtering_device_info));
INIT_LIST_HEAD(&elem->list);
}
list_add(&elem->list, &virtual_filtering_devices_list); /* Add as first entry */
write_unlock(&virtual_filtering_lock);
/* Add /proc entry */
elem->info.proc_entry = proc_mkdir(elem->info.device_name, ring_proc_dev_dir);
create_proc_read_entry(PROC_INFO, 0 /* read-only */,
elem->info.proc_entry,
ring_proc_virtual_filtering_dev_get_info /* read */,
(void*)&elem->info);
return(elem);
}
/* ************************************* */
static int remove_virtual_filtering_device(struct sock *sock, char *device_name)
{
struct list_head *ptr, *tmp_ptr;
if(unlikely(enable_debug))
printk("[PF_RING] --> remove_virtual_filtering_device(%s)\n", device_name);
write_lock(&virtual_filtering_lock);
list_for_each_safe(ptr, tmp_ptr, &virtual_filtering_devices_list) {
virtual_filtering_device_element *filtering_ptr;
filtering_ptr = list_entry(ptr, virtual_filtering_device_element, list);
if(strcmp(filtering_ptr->info.device_name, device_name) == 0) {
/* Remove /proc entry */
remove_proc_entry(PROC_INFO, filtering_ptr->info.proc_entry);
remove_proc_entry(filtering_ptr->info.device_name, ring_proc_dev_dir);
list_del(ptr);
write_unlock(&virtual_filtering_lock);
kfree(filtering_ptr);
return(0);
}
}
write_unlock(&virtual_filtering_lock);
return(-EINVAL); /* Not found */
}
/* ********************************** */
static struct pf_userspace_ring* userspace_ring_create(char *u_dev_name, userspace_ring_client_type type,
wait_queue_head_t *consumer_ring_slots_waitqueue) {
char *c_p;
long id;
struct list_head *ptr, *tmp_ptr;
struct pf_userspace_ring *entry;
struct pf_userspace_ring *usr = NULL;
if(strncmp(u_dev_name, "usr", 3) != 0) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(%s) failed (1)\n", __FUNCTION__, u_dev_name);
return NULL;
}
id = simple_strtol(&u_dev_name[3], &c_p, 10);
write_lock(&userspace_ring_lock);
/* checking if the userspace ring already exists */
list_for_each_safe(ptr, tmp_ptr, &userspace_ring_list) {
entry = list_entry(ptr, struct pf_userspace_ring, list);
if(unlikely(enable_debug))
printk("[PF_RING] %s(%d) vs %lu [users: %u][type: %s]\n",
__FUNCTION__,
entry->id, id, atomic_read(&entry->users[type]),
(type == userspace_ring_producer) ? "producer" : "consumer");
if(entry->id == id) {
if(atomic_read(&entry->users[type]) > 0)
goto unlock;
usr = entry;
break;
}
}
/* creating a new userspace ring */
if(usr == NULL) {
/* Note: a userspace ring can be created by a consumer only,
* however a producer can keep it if the consumer dies */
if(unlikely(enable_debug))
printk("[PF_RING] %s(%s): attempting to create ring\n", __FUNCTION__, u_dev_name);
if(type == userspace_ring_producer) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(%s) failed (2)\n", __FUNCTION__, u_dev_name);
goto unlock;
}
usr = kcalloc(1, sizeof(struct pf_userspace_ring), GFP_KERNEL);
if(usr == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(%s) failed (3)\n", __FUNCTION__, u_dev_name);
goto unlock;
}
usr->id = id;
atomic_set(&usr->users[userspace_ring_consumer], 0);
atomic_set(&usr->users[userspace_ring_producer], 0);
list_add(&usr->list, &userspace_ring_list);
}
atomic_inc(&usr->users[type]);
if(unlikely(enable_debug))
printk("[PF_RING] %s(%lu) just created [users: %u][type: %s]\n",
__FUNCTION__, id, atomic_read(&usr->users[type]),
(type == userspace_ring_producer) ? "producer" : "consumer");
if(type == userspace_ring_consumer)
usr->consumer_ring_slots_waitqueue = consumer_ring_slots_waitqueue;
unlock:
write_unlock(&userspace_ring_lock);
if(unlikely(enable_debug)) {
if(usr != NULL)
printk("[PF_RING] %s() Userspace ring found or created.\n", __FUNCTION__);
else
printk("[PF_RING] %s(): NULL ring returned.\n", __FUNCTION__);
}
return usr;
}
/* ********************************** */
static int userspace_ring_remove(struct pf_userspace_ring *usr,
userspace_ring_client_type type)
{
struct list_head *ptr, *tmp_ptr;
struct pf_userspace_ring *entry;
int ret = 0;
write_lock(&userspace_ring_lock);
list_for_each_safe(ptr, tmp_ptr, &userspace_ring_list) {
entry = list_entry(ptr, struct pf_userspace_ring, list);
if(entry == usr) {
if(atomic_read(&usr->users[type]) > 0)
atomic_dec(&usr->users[type]);
if(type == userspace_ring_consumer)
usr->consumer_ring_slots_waitqueue = NULL;
if(atomic_read(&usr->users[userspace_ring_consumer]) == 0
&& atomic_read(&usr->users[userspace_ring_producer]) == 0) {
ret = 1; /* ring memory can be freed */
list_del(ptr);
kfree(entry);
}
break;
}
}
write_unlock(&userspace_ring_lock);
if(ret == 1) {
if(unlikely(enable_debug))
printk("[PF_RING] userspace_ring_remove() Ring can be freed.\n");
}
return ret;
}
/* ************************************* */
void reserve_memory(unsigned long base, unsigned long mem_len)
{
struct page *page, *page_end;
page_end = virt_to_page(base + mem_len - 1);
for(page = virt_to_page(base); page <= page_end; page++)
SetPageReserved(page);
}
void unreserve_memory(unsigned long base, unsigned long mem_len)
{
struct page *page, *page_end;
page_end = virt_to_page(base + mem_len - 1);
for(page = virt_to_page(base); page <= page_end; page++)
ClearPageReserved(page);
}
static void free_contiguous_memory(unsigned long mem, u_int mem_len)
{
if(mem != 0) {
unreserve_memory(mem, mem_len);
free_pages(mem, get_order(mem_len));
}
}
static unsigned long __get_free_pages_node(int nid, gfp_t gfp_mask, unsigned int order) {
struct page *page;
/* Just remember to do not use highmem flag:
* VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); */
page = alloc_pages_node(nid, gfp_mask, order);
if(!page)
return 0;
return (unsigned long) page_address(page);
}
static unsigned long alloc_contiguous_memory(u_int mem_len, int node)
{
unsigned long mem = 0;
/* trying to allocate memory on the selected numa node */
mem = __get_free_pages_node(node, GFP_KERNEL, get_order(mem_len));
if(!mem)
__get_free_pages(GFP_KERNEL, get_order(mem_len));
if(mem)
reserve_memory(mem, mem_len);
else
if(unlikely(enable_debug))
printk("[PF_RING] %s() Failure (len=%d, order=%d)\n", __FUNCTION__, mem_len, get_order(mem_len));
return(mem);
}
/* ************************************* */
static struct dma_memory_info *allocate_extra_dma_memory(struct device *hwdev,
u_int32_t num_slots, u_int32_t slot_len, u_int32_t chunk_len)
{
u_int i, num_slots_per_chunk, num_chunks;
struct dma_memory_info *dma_memory;
int numa_node =
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)) && defined(CONFIG_NUMA)
dev_to_node(hwdev)
#else
-1
#endif
;
/* Note: this function allocates up to num_slots slots. You can check the exact number by ... */
num_slots_per_chunk = chunk_len / slot_len;
num_chunks = (num_slots + num_slots_per_chunk-1) / num_slots_per_chunk;
if(num_chunks == 0)
return NULL;
if((dma_memory = kcalloc(1, sizeof(struct dma_memory_info), GFP_KERNEL)) == NULL)
return NULL;
dma_memory->chunk_len = chunk_len;
dma_memory->num_slots = num_slots;
dma_memory->slot_len = slot_len;
dma_memory->hwdev = hwdev;
dma_memory->num_chunks = num_chunks;
if((dma_memory->virtual_addr = kcalloc(1, sizeof(unsigned long) * dma_memory->num_chunks, GFP_KERNEL)) == NULL) {
kfree(dma_memory);
return NULL;
}
if((dma_memory->dma_addr = kcalloc(1, sizeof(u_int64_t) * dma_memory->num_slots, GFP_KERNEL)) == NULL) {
kfree(dma_memory->virtual_addr);
kfree(dma_memory);
return NULL;
}
if(numa_node == -1) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() device node not set, selecting current node\n", __FUNCTION__);
numa_node = numa_node_id(); /* using current node if not set */
}
if(unlikely(enable_debug))
printk("[PF_RING] Allocating %d DMA chunks of %d bytes on node %d [slots per chunk=%d]\n",
dma_memory->num_chunks, dma_memory->chunk_len, numa_node, num_slots_per_chunk);
/* Allocating memory chunks */
for(i=0; i < dma_memory->num_chunks; i++) {
dma_memory->virtual_addr[i] = alloc_contiguous_memory(dma_memory->chunk_len, numa_node);
if(!dma_memory->virtual_addr[i]) {
printk("[PF_RING] %s() Warning: no more free memory available! Allocated %d of %d chunks.\n",
__FUNCTION__, i + 1, dma_memory->num_chunks);
dma_memory->num_chunks = i;
dma_memory->num_slots = dma_memory->num_chunks * num_slots_per_chunk;
break;
}
}
/* Mapping DMA slots */
for(i=0; i < dma_memory->num_slots; i++) {
u_int chunk_id = i / num_slots_per_chunk;
u_int offset = (i % num_slots_per_chunk) * dma_memory->slot_len;
char *slot;
if(!dma_memory->virtual_addr[chunk_id])
break;
slot = (char *) (dma_memory->virtual_addr[chunk_id] + offset);
if(unlikely(enable_debug))
printk("[PF_RING] %s() Mapping DMA slot %d of %d [slot addr=%p][offset=%u]\n",
__FUNCTION__, i + 1, dma_memory->num_slots, slot, offset);
dma_memory->dma_addr[i] = cpu_to_le64(
pci_map_single(to_pci_dev(dma_memory->hwdev), slot,
dma_memory->slot_len,
PCI_DMA_BIDIRECTIONAL));
if(dma_mapping_error(
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26))
dma_memory->hwdev,
#endif
dma_memory->dma_addr[i])) {
printk("[PF_RING] %s() Error mapping DMA slot %d of %d \n", __FUNCTION__, i + 1, dma_memory->num_slots);
dma_memory->dma_addr[i] = 0;
dma_memory->num_slots = i;
break;
}
}
return dma_memory;
}
static void free_extra_dma_memory(struct dma_memory_info *dma_memory)
{
u_int i;
/* Unmapping DMA addresses */
if(dma_memory->dma_addr) {
for(i=0; i < dma_memory->num_slots; i++) {
if(dma_memory->dma_addr[i]) {
dma_unmap_single(dma_memory->hwdev, dma_memory->dma_addr[i],
dma_memory->slot_len,
PCI_DMA_BIDIRECTIONAL);
}
}
kfree(dma_memory->dma_addr);
}
/* Freeing memory */
if(dma_memory->virtual_addr) {
for(i=0; i < dma_memory->num_chunks; i++) {
if(dma_memory->virtual_addr[i]) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() Freeing chunk %d of %d\n", __FUNCTION__, i, dma_memory->num_chunks);
free_contiguous_memory(dma_memory->virtual_addr[i], dma_memory->chunk_len);
}
}
kfree(dma_memory->virtual_addr);
}
kfree(dma_memory);
}
/* ********************************** */
static struct dna_cluster* dna_cluster_create(u_int32_t dna_cluster_id, u_int32_t num_slots,
u_int32_t num_slaves, u_int32_t slave_mem_len,
u_int32_t master_persistent_mem_len, socket_mode mode,
u_int32_t options, char *hugepages_dir,
struct device *hwdev, u_int32_t slot_len, u_int32_t chunk_len,
u_int32_t *recovered)
{
struct list_head *ptr, *tmp_ptr;
struct dna_cluster *entry;
struct dna_cluster *dnac = NULL;
u_int32_t shared_mem_size;
int i;
write_lock(&dna_cluster_lock);
/* checking if the dna cluster already exists */
list_for_each_safe(ptr, tmp_ptr, &dna_cluster_list) {
entry = list_entry(ptr, struct dna_cluster, list);
if(entry->id == dna_cluster_id) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(%u) cluster already exists [master: %u]\n",
__FUNCTION__, dna_cluster_id, entry->master);
/* Note: a dna cluster can be created by a master only,
* however one/more slaves can keep it if the master dies */
if(entry->master > 0)
goto unlock;
dnac = entry;
break;
}
}
/* Creating a new dna cluster */
if(dnac == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(%u): attempting to create a dna cluster\n", __FUNCTION__, dna_cluster_id);
dnac = kcalloc(1, sizeof(struct dna_cluster), GFP_KERNEL);
if(dnac == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(%u) failed\n", __FUNCTION__, dna_cluster_id);
goto unlock;
}
dnac->id = dna_cluster_id;
dnac->num_slaves = num_slaves;
dnac->mode = mode;
dnac->options = options;
memcpy(dnac->hugepages_dir, hugepages_dir, DNA_CLUSTER_MAX_HP_DIR_LEN);
dnac->master = 0;
for (i = 0; i < dnac->num_slaves; i++)
dnac->active_slaves[i] = 0;
if(!(dnac->options & DNA_CLUSTER_OPT_HUGEPAGES)) {
if((dnac->extra_dma_memory = allocate_extra_dma_memory(hwdev, num_slots, slot_len, chunk_len)) == NULL) {
kfree(dnac);
dnac = NULL;
goto unlock;
}
if(dnac->extra_dma_memory->num_slots < num_slots) {
/* DNA cluster requires exactly num_slots, they are not used as a auxiliary "tank" */
free_extra_dma_memory(dnac->extra_dma_memory);
kfree(dnac);
dnac = NULL;
goto unlock;
}
}
if(num_slaves > 0 /* when using direct forwarding num_slaves could also be 0 */) {
dnac->slave_shared_memory_len = PAGE_ALIGN(slave_mem_len);
if (!(dnac->options & DNA_CLUSTER_OPT_HUGEPAGES)) {
shared_mem_size = dnac->slave_shared_memory_len * num_slaves;
if((dnac->shared_memory = allocate_shared_memory(&shared_mem_size)) == NULL) {
printk("[PF_RING] %s() ERROR: not enough memory for DNA Cluster shared memory\n", __FUNCTION__);
if(!(dnac->options & DNA_CLUSTER_OPT_HUGEPAGES))
free_extra_dma_memory(dnac->extra_dma_memory);
kfree(dnac);
dnac = NULL;
goto unlock;
}
}
}
dnac->master_persistent_memory_len = PAGE_ALIGN(master_persistent_mem_len);
shared_mem_size = dnac->master_persistent_memory_len;
if((dnac->master_persistent_memory = allocate_shared_memory(&shared_mem_size)) == NULL) {
printk("[PF_RING] %s() ERROR: not enough memory for DNA Cluster persistent memory\n", __FUNCTION__);
if (dnac->shared_memory != NULL)
vfree(dnac->shared_memory);
if(!(dnac->options & DNA_CLUSTER_OPT_HUGEPAGES))
free_extra_dma_memory(dnac->extra_dma_memory);
kfree(dnac);
dnac = NULL;
goto unlock;
}
/* global stats are at the beginning of the persistent memory */
dnac->stats = (struct dna_cluster_global_stats *) dnac->master_persistent_memory;
list_add(&dnac->list, &dna_cluster_list);
if(unlikely(enable_debug))
printk("[PF_RING] %s(%u) New DNA cluster created\n", __FUNCTION__, dna_cluster_id);
*recovered = 0;
} else {
/* recovering an old cluster */
/* checking cluster parameters */
if(dnac->num_slaves != num_slaves
|| (num_slaves > 0 && dnac->slave_shared_memory_len != PAGE_ALIGN(slave_mem_len))
|| dnac->master_persistent_memory_len != PAGE_ALIGN(master_persistent_mem_len)
|| dnac->mode != mode
|| (!(dnac->options & DNA_CLUSTER_OPT_HUGEPAGES) && (!dnac->extra_dma_memory || dnac->extra_dma_memory->num_slots != num_slots))
|| ((dnac->options & DNA_CLUSTER_OPT_HUGEPAGES) && dnac->extra_dma_memory && dnac->extra_dma_memory->num_slots > 0)) {
dnac = NULL;
goto unlock;
}
*recovered = 1;
}
dnac->master = 1;
unlock:
write_unlock(&dna_cluster_lock);
if(dnac != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(%u) DNA cluster found or created [master: %u]\n",
__FUNCTION__, dna_cluster_id, dnac->master);
} else
printk("[PF_RING] %s() error\n", __FUNCTION__);
return dnac;
}
static void dna_cluster_remove(struct dna_cluster *dnac, cluster_client_type type, u_int32_t slave_id)
{
struct list_head *ptr, *tmp_ptr;
struct dna_cluster *entry;
int i, active_users = 0;
write_lock(&dna_cluster_lock);
list_for_each_safe(ptr, tmp_ptr, &dna_cluster_list) {
entry = list_entry(ptr, struct dna_cluster, list);
if(entry == dnac) {
if(type == cluster_master)
dnac->master = 0;
else if(type == cluster_slave) {
dnac->slave_waitqueue[slave_id] = NULL;
dnac->active_slaves[slave_id] = 0;
}
if(dnac->master)
active_users++;
for (i = 0; i < dnac->num_slaves; i++)
if(dnac->active_slaves[i])
active_users++;
if(active_users == 0) {
list_del(ptr);
if(entry->extra_dma_memory)
free_extra_dma_memory(entry->extra_dma_memory);
vfree(entry->master_persistent_memory);
if(entry->shared_memory != NULL)
vfree(entry->shared_memory);
kfree(entry);
if(unlikely(enable_debug))
printk("[PF_RING] %s() success\n", __FUNCTION__);
}
break;
}
}
write_unlock(&dna_cluster_lock);
}
static struct dna_cluster* dna_cluster_attach(u_int32_t dna_cluster_id, u_int32_t *slave_id, u_int32_t auto_slave_id,
wait_queue_head_t *slave_waitqueue, u_int32_t *mode, u_int32_t *options,
u_int32_t *slave_mem_len, char *hugepages_dir)
{
struct list_head *ptr, *tmp_ptr;
struct dna_cluster *entry;
struct dna_cluster *dnac = NULL;
int i, free_id_found = 0;
write_lock(&dna_cluster_lock);
list_for_each_safe(ptr, tmp_ptr, &dna_cluster_list) {
entry = list_entry(ptr, struct dna_cluster, list);
if(entry->id == dna_cluster_id) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() cluster %u found\n",
__FUNCTION__, dna_cluster_id);
dnac = entry;
if(auto_slave_id) {
for (i = 0; i < dnac->num_slaves; i++) {
if(!dnac->active_slaves[i]) {
dnac->active_slaves[i] = 1;
*slave_id = i;
free_id_found = 1;
break;
}
}
if(!free_id_found) {
dnac = NULL;
goto unlock;
}
} else {
if(*slave_id >= dnac->num_slaves) {
printk("[PF_RING] %s() slave id is %u, max slave id is %u\n", __FUNCTION__, *slave_id, dnac->num_slaves - 1);
dnac = NULL;
goto unlock;
}
if(dnac->active_slaves[*slave_id]) {
printk("[PF_RING] %s() slave %u@%u already running\n", __FUNCTION__, *slave_id, dna_cluster_id);
dnac = NULL;
goto unlock;
} else {
dnac->active_slaves[*slave_id] = 1;
}
}
dnac->slave_waitqueue[*slave_id] = slave_waitqueue;
*mode = dnac->mode;
*options = dnac->options;
*slave_mem_len = dnac->slave_shared_memory_len;
memcpy(hugepages_dir, dnac->hugepages_dir, DNA_CLUSTER_MAX_HP_DIR_LEN);
break;
}
}
unlock:
write_unlock(&dna_cluster_lock);
if(unlikely(enable_debug)) {
if(dnac != NULL)
printk("[PF_RING] %s(%u) attached to DNA cluster\n",
__FUNCTION__, dna_cluster_id);
else
printk("[PF_RING] %s() error\n", __FUNCTION__);
}
return dnac;
}
/* ********************************** */
static int create_cluster_referee(struct pf_ring_socket *pfr, u_int32_t cluster_id, u_int32_t *recovered)
{
struct list_head *ptr, *tmp_ptr;
struct cluster_referee *entry;
struct cluster_referee *cr = NULL;
if(pfr->cluster_referee) /* already called */
return -1;
write_lock(&cluster_referee_lock);
/* checking if the dna cluster already exists */
list_for_each_safe(ptr, tmp_ptr, &cluster_referee_list) {
entry = list_entry(ptr, struct cluster_referee, list);
if(entry->id == cluster_id) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: cluster %u already exists [users: %u]\n",
__FUNCTION__, cluster_id, entry->users);
if(entry->master_running) /* multiple masters not allowed */
goto unlock;
cr = entry;
break;
}
}
/* Creating a new dna cluster */
if(cr == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: attempting to create a referee for cluster %u\n", __FUNCTION__, cluster_id);
cr = kcalloc(1, sizeof(struct cluster_referee), GFP_KERNEL);
if(cr == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: failure [cluster: %u]\n", __FUNCTION__, cluster_id);
goto unlock;
}
cr->id = cluster_id;
INIT_LIST_HEAD(&cr->objects_list);
list_add(&cr->list, &cluster_referee_list);
if(unlikely(enable_debug))
printk("[PF_RING] %s: new cluster referee created for cluster %u\n", __FUNCTION__, cluster_id);
*recovered = 0;
} else {
*recovered = 1;
}
pfr->cluster_role = cluster_master;
pfr->cluster_referee = cr;
cr->users++;
cr->master_running = 1;
unlock:
write_unlock(&cluster_referee_lock);
if(cr == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: error\n", __FUNCTION__);
return -1;
} else {
if(unlikely(enable_debug))
printk("[PF_RING] %s: cluster %u found or created\n", __FUNCTION__, cluster_id);
}
return 0;
}
static void remove_cluster_referee(struct pf_ring_socket *pfr)
{
struct list_head *ptr, *tmp_ptr;
struct cluster_referee *entry;
struct list_head *obj_ptr, *obj_tmp_ptr, *c_obj_ptr, *c_obj_tmp_ptr;
cluster_object *obj_entry = NULL, *c_obj_entry = NULL;
write_lock(&cluster_referee_lock);
list_for_each_safe(ptr, tmp_ptr, &cluster_referee_list) {
entry = list_entry(ptr, struct cluster_referee, list);
if(entry == pfr->cluster_referee) {
/* removing locked objects from socket */
list_for_each_safe(obj_ptr, obj_tmp_ptr, &pfr->locked_objects_list) {
obj_entry = list_entry(obj_ptr, cluster_object, list);
/* removing locks on current object from cluster */
list_for_each_safe(c_obj_ptr, c_obj_tmp_ptr, &entry->objects_list) {
c_obj_entry = list_entry(c_obj_ptr, cluster_object, list);
if(c_obj_entry->object_type == obj_entry->object_type && c_obj_entry->object_id == obj_entry->object_id) {
c_obj_entry->lock_bitmap &= ~obj_entry->lock_bitmap;
c_obj_entry->references -= obj_entry->references;
}
}
list_del(obj_ptr);
kfree(obj_entry);
}
if (pfr->cluster_role == cluster_master)
entry->master_running = 0;
if (!entry->master_running) { /* with no master objects without a reference get removed */
list_for_each_safe(c_obj_ptr, c_obj_tmp_ptr, &entry->objects_list) {
c_obj_entry = list_entry(c_obj_ptr, cluster_object, list);
if (c_obj_entry->references == 0) {
list_del(c_obj_ptr);
kfree(c_obj_entry);
}
}
}
entry->users--;
if(entry->users == 0) {
/* removing all objects from cluster */
list_for_each_safe(c_obj_ptr, c_obj_tmp_ptr, &entry->objects_list) {
c_obj_entry = list_entry(c_obj_ptr, cluster_object, list);
list_del(c_obj_ptr);
kfree(c_obj_entry);
}
list_del(ptr);
kfree(entry);
}
break;
}
}
write_unlock(&cluster_referee_lock);
pfr->cluster_referee = NULL;
}
static int publish_cluster_object(struct pf_ring_socket *pfr, u_int32_t cluster_id,
u_int32_t object_type, u_int32_t object_id, u_int32_t *references)
{
struct list_head *ptr, *tmp_ptr;
struct cluster_referee *entry, *cr = NULL;
struct list_head *obj_ptr, *obj_tmp_ptr;
cluster_object *obj_entry, *c_obj = NULL;
int rc = -1;
write_lock(&cluster_referee_lock);
list_for_each_safe(ptr, tmp_ptr, &cluster_referee_list) {
entry = list_entry(ptr, struct cluster_referee, list);
if(entry->id == cluster_id) {
cr = entry;
break;
}
}
if (cr == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: cluster %u not found\n", __FUNCTION__, cluster_id);
goto unlock;
}
list_for_each_safe(obj_ptr, obj_tmp_ptr, &cr->objects_list) {
obj_entry = list_entry(obj_ptr, cluster_object, list);
if(obj_entry->object_type == object_type && obj_entry->object_id == object_id) {
/* already published or with references (recovery) */
c_obj = obj_entry;
break;
}
}
if(c_obj == NULL) {
c_obj = kcalloc(1, sizeof(cluster_object), GFP_KERNEL);
if(c_obj == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: memory allocation failure\n", __FUNCTION__);
goto unlock;
}
c_obj->object_type = object_type;
c_obj->object_id = object_id;
list_add(&c_obj->list, &cr->objects_list);
}
if(unlikely(enable_debug))
printk("[PF_RING] %s: object %u.%u published in cluster %u\n", __FUNCTION__, object_type, object_id, cluster_id);
*references = c_obj->references;
rc = 0;
unlock:
write_unlock(&cluster_referee_lock);
return rc;
}
static int lock_cluster_object(struct pf_ring_socket *pfr, u_int32_t cluster_id,
u_int32_t object_type, u_int32_t object_id, u_int32_t lock_mask)
{
struct list_head *ptr, *tmp_ptr;
struct cluster_referee *entry, *cr = NULL;
struct list_head *obj_ptr, *obj_tmp_ptr;
cluster_object *obj_entry, *obj = NULL, *c_obj = NULL;
int rc = -1;
write_lock(&cluster_referee_lock);
list_for_each_safe(ptr, tmp_ptr, &cluster_referee_list) {
entry = list_entry(ptr, struct cluster_referee, list);
if(entry->id == cluster_id) {
cr = entry;
break;
}
}
if (cr == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: cluster %u not found\n", __FUNCTION__, cluster_id);
goto unlock;
}
if (!cr->master_running) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: cluster %u not running, new locks are not allowed\n", __FUNCTION__, cluster_id);
goto unlock;
}
/* adding locked objects to the cluster */
list_for_each_safe(obj_ptr, obj_tmp_ptr, &cr->objects_list) {
obj_entry = list_entry(obj_ptr, cluster_object, list);
if(unlikely(enable_debug))
printk("[PF_RING] %s: obj %u.%u\n", __FUNCTION__, obj_entry->object_type, obj_entry->object_id);
if(obj_entry->object_type == object_type && obj_entry->object_id == object_id) {
c_obj = obj_entry;
if (c_obj->lock_bitmap & lock_mask) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: trying to lock already-locked features on cluster %u\n", __FUNCTION__, cluster_id);
goto unlock;
}
break;
}
}
if(c_obj == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: object %u.%u not in the public list of cluster %u\n", __FUNCTION__, object_type, object_id, cluster_id);
goto unlock;
}
/* adding locked objects to the socket */
list_for_each_safe(obj_ptr, obj_tmp_ptr, &pfr->locked_objects_list) {
obj_entry = list_entry(obj_ptr, cluster_object, list);
if(obj_entry->object_type == object_type && obj_entry->object_id == object_id) {
obj = obj_entry;
break;
}
}
if(obj == NULL) {
obj = kcalloc(1, sizeof(cluster_object), GFP_KERNEL);
if(obj == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s: memory allocation failure\n", __FUNCTION__);
goto unlock;
}
obj->object_type = object_type;
obj->object_id = object_id;
list_add(&obj->list, &pfr->locked_objects_list);
}
c_obj->lock_bitmap |= lock_mask;
c_obj->references++;
obj->lock_bitmap |= lock_mask;
obj->references++;
if(unlikely(enable_debug))
printk("[PF_RING] %s: new object lock on cluster %u\n", __FUNCTION__, cluster_id);
if (pfr->cluster_referee == NULL) {
pfr->cluster_referee = cr;
cr->users++;
}
rc = 0;
unlock:
write_unlock(&cluster_referee_lock);
return rc;
}
/* *********************************************** */
static int ring_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct pf_ring_socket *pfr = ring_sk(sk);
struct list_head *ptr, *tmp_ptr;
void *ring_memory_ptr;
int free_ring_memory = 1;
if(!sk)
return 0;
else
pfr->ring_active = 0;
/* Notify the consumer that we're shutting down */
if(pfr->kernel_consumer_plugin_id
&& plugin_registration[pfr->kernel_consumer_plugin_id]->pfring_packet_term) {
plugin_registration[pfr->kernel_consumer_plugin_id]->pfring_packet_term(pfr);
}
/* Wait until the ring is being used... */
while(atomic_read(&pfr->num_ring_users) > 0) {
schedule();
}
if(unlikely(enable_debug))
printk("[PF_RING] called ring_release(%s)\n", pfr->ring_netdev->dev->name);
if(pfr->kernel_consumer_options) kfree(pfr->kernel_consumer_options);
/*
The calls below must be placed outside the
write_lock...write_unlock block.
*/
sock_orphan(sk);
ring_proc_remove(pfr);
if(pfr->tx.last_tx_dev != NULL)
dev_put(pfr->tx.last_tx_dev); /* Release device */
ring_write_lock();
if(pfr->ring_netdev->dev && pfr->ring_netdev == &any_device_element)
num_any_rings--;
else {
if(pfr->ring_netdev
&& (pfr->ring_netdev->dev->ifindex < MAX_NUM_IFIDX)) {
int i;
if(num_rings_per_device[pfr->ring_netdev->dev->ifindex] > 0)
num_rings_per_device[pfr->ring_netdev->dev->ifindex]--;
for(i=0; i<MAX_NUM_RX_CHANNELS; i++) {
u_int32_t the_bit = 1 << i;
if(pfr->channel_id_mask & the_bit) {
if(device_rings[pfr->ring_netdev->dev->ifindex][i] == pfr) {
/*
We must make sure that this is really us and not that by some chance
(e.g. bind failed) another ring
*/
device_rings[pfr->ring_netdev->dev->ifindex][i] = NULL;
}
}
}
}
}
if(pfr->ring_netdev != &none_device_element) {
if(pfr->cluster_id != 0)
remove_from_cluster(sk, pfr);
}
ring_remove(sk);
sock->sk = NULL;
/* Free rules */
if(pfr->ring_netdev != &none_device_element) {
list_for_each_safe(ptr, tmp_ptr, &pfr->sw_filtering_rules) {
sw_filtering_rule_element *rule;
rule = list_entry(ptr, sw_filtering_rule_element, list);
list_del(ptr);
free_filtering_rule(rule, 1);
kfree(rule);
}
/* Filtering hash rules */
if(pfr->sw_filtering_hash) {
int i;
for(i = 0; i < perfect_rules_hash_size; i++) {
if(pfr->sw_filtering_hash[i] != NULL) {
sw_filtering_hash_bucket *scan = pfr->sw_filtering_hash[i], *next;
while(scan != NULL) {
next = scan->next;
free_sw_filtering_hash_bucket(scan);
kfree(scan);
scan = next;
}
}
}
kfree(pfr->sw_filtering_hash);
}
/* printk("[PF_RING] --> num_hw_filtering_rules=%d\n", pfr->num_hw_filtering_rules); */
/* Free Hw Filtering Rules */
if(pfr->num_hw_filtering_rules > 0) {
list_for_each_safe(ptr, tmp_ptr, &pfr->hw_filtering_rules) {
hw_filtering_rule_element *hw_rule = list_entry(ptr, hw_filtering_rule_element, list);
/* Remove hw rule */
handle_hw_filtering_rule(pfr, &hw_rule->rule, remove_hw_rule);
list_del(ptr);
kfree(hw_rule);
}
}
}
if(pfr->v_filtering_dev != NULL) {
remove_virtual_filtering_device(sk, pfr->v_filtering_dev->info.device_name);
pfr->v_filtering_dev = NULL;
/* pfr->v_filtering_dev has been freed by remove_virtual_filtering_device() */
}
/* Free the ring buffer later, vfree needs interrupts enabled */
ring_memory_ptr = pfr->ring_memory;
ring_sk(sk) = NULL;
skb_queue_purge(&sk->sk_write_queue);
sock_put(sk);
ring_write_unlock();
#ifdef VPFRING_SUPPORT
if(pfr->vpfring_host_eventfd_ctx)
eventfd_ctx_put(pfr->vpfring_host_eventfd_ctx);
#endif //VPFRING_SUPPORT
if(pfr->appl_name != NULL)
kfree(pfr->appl_name);
/* Removing userspace ring if there are no other consumer/producer */
if(pfr->userspace_ring != NULL)
free_ring_memory = userspace_ring_remove(pfr->userspace_ring, pfr->userspace_ring_type);
if(ring_memory_ptr != NULL && free_ring_memory)
vfree(ring_memory_ptr);
if(pfr->dna_cluster != NULL)
dna_cluster_remove(pfr->dna_cluster, pfr->dna_cluster_type, pfr->dna_cluster_slave_id);
if(pfr->cluster_referee != NULL)
remove_cluster_referee(pfr);
if(pfr->dna_device_entry != NULL) {
dna_device_mapping mapping;
mapping.operation = remove_device_mapping;
snprintf(mapping.device_name, sizeof(mapping.device_name), "%s", pfr->dna_device_entry->dev.netdev->name);
mapping.channel_id = pfr->dna_device_entry->dev.channel_id;
ring_map_dna_device(pfr, &mapping);
}
if(pfr->extra_dma_memory != NULL) {
free_extra_dma_memory(pfr->extra_dma_memory);
pfr->extra_dma_memory = NULL;
}
/*
Wait long enough so that other threads using ring_table
have finished referencing the socket pointer that
we will be deleting
*/
wmb();
msleep(100 /* 100 msec */);
kfree(pfr); /* Time to free */
if(unlikely(enable_debug))
printk("[PF_RING] ring_release: done\n");
/* Some housekeeping tasks */
lockless_list_empty(&delayed_memory_table, 1 /* free memory */);
return 0;
}
/* ********************************** */
/*
* We create a ring for this socket and bind it to the specified device
*/
static int packet_ring_bind(struct sock *sk, char *dev_name)
{
struct pf_ring_socket *pfr = ring_sk(sk);
struct list_head *ptr, *tmp_ptr;
ring_device_element *dev = NULL;
if(dev_name == NULL)
return(-EINVAL);
/* UserSpace RING.
* Note: with userspace rings we expect that mmap() follow (only one) bind() */
if(pfr->userspace_ring != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] packet_ring_bind(): userspace_ring != NULL, failure\n");
return(-EINVAL); /* TODO bind() already called on a userspace ring */
}
if(strncmp(dev_name, "usr", 3) == 0) {
if(pfr->ring_memory != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] packet_ring_bind(): ring_memory != NULL, failure\n");
return(-EINVAL); /* TODO mmap() already called */
}
pfr->userspace_ring = userspace_ring_create(dev_name, userspace_ring_consumer,
&pfr->ring_slots_waitqueue);
if(pfr->userspace_ring == NULL)
return(-EINVAL);
pfr->userspace_ring_type = userspace_ring_consumer;
dev = &none_device_element;
} else {
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(strcmp(dev_ptr->dev->name, dev_name) == 0) {
dev = dev_ptr;
break;
}
}
}
if((dev == NULL) || (dev->dev->type != ARPHRD_ETHER))
return(-EINVAL);
if(dev->dev->ifindex >= MAX_NUM_IFIDX)
return(-EINVAL);
if(strcmp(dev->dev->name, "none") != 0
&& strcmp(dev->dev->name, "any") != 0
&& (!(dev->dev->flags & IFF_UP))) {
if(unlikely(enable_debug))
printk("[PF_RING] packet_ring_bind(%s): down\n", dev->dev->name);
return(-ENETDOWN);
}
if(unlikely(enable_debug))
printk("[PF_RING] packet_ring_bind(%s, bucket_len=%d) called\n",
dev->dev->name, pfr->bucket_len);
/* Set for all devices */
set_bit(dev->dev->ifindex, pfr->netdev_mask), pfr->num_bound_devices++;
/* We set the master device only when we have not yet set a device */
if(pfr->ring_netdev == &none_device_element) {
/* Remove old binding (by default binding to none)
BEFORE binding to a new device
*/
ring_proc_remove(pfr);
/*
IMPORTANT
Leave this statement here as last one. In fact when
the ring_netdev != &none_device_element the socket is ready to be used.
*/
pfr->ring_netdev = dev, pfr->channel_id_mask = RING_ANY_CHANNEL;
/* Time to rebind to a new device */
ring_proc_add(pfr);
}
/*
As the 'struct net_device' does not contain the number
of RX queues, we can guess that its number is the same as the number
of TX queues. After the first packet has been received by the adapter
the num of RX queues is updated with the real value
*/
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31))
pfr->num_rx_channels = pfr->ring_netdev->dev->real_num_tx_queues;
#else
pfr->num_rx_channels = 1;
#endif
if((dev == &any_device_element) && (!quick_mode)) {
num_any_rings++;
} else {
if(dev->dev->ifindex < MAX_NUM_IFIDX) {
num_rings_per_device[dev->dev->ifindex]++;
} else
printk("[PF_RING] INTERNAL ERROR: ifindex %d for %s is > than MAX_NUM_IFIDX\n",
dev->dev->ifindex, dev->dev->name);
}
return(0);
}
/* ************************************* */
/* Bind to a device */
static int ring_bind(struct socket *sock, struct sockaddr *sa, int addr_len)
{
struct sock *sk = sock->sk;
if(unlikely(enable_debug))
printk("[PF_RING] ring_bind() called\n");
/*
* Check legality
*/
if(addr_len != sizeof(struct sockaddr))
return(-EINVAL);
if(sa->sa_family != PF_RING)
return(-EINVAL);
if(sa->sa_data == NULL)
return(-EINVAL);
/* Safety check: add trailing zero if missing */
sa->sa_data[sizeof(sa->sa_data) - 1] = '\0';
if(unlikely(enable_debug))
printk("[PF_RING] searching device %s\n", sa->sa_data);
#if 0
if(strcmp(sa->sa_data, "any") == 0)
dev = &any_dev;
else {
if((dev = __dev_get_by_name(
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
&init_net,
#endif
sa->sa_data)) == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] search failed\n");
return(-EINVAL);
}
}
#endif
return(packet_ring_bind(sk, sa->sa_data));
}
/* ************************************* */
#if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
/*
* rvmalloc / rvfree / kvirt_to_pa copied from usbvideo.c
*/
unsigned long kvirt_to_pa(unsigned long adr)
{
unsigned long kva, ret;
kva = (unsigned long)page_address(vmalloc_to_page((void *)adr));
kva |= adr & (PAGE_SIZE - 1); /* restore the offset */
ret = __pa(kva);
return ret;
}
#endif
/* ************************************* */
static int do_memory_mmap(struct vm_area_struct *vma, unsigned long start_off, unsigned long size,
char *ptr, u_int ptr_pg_off, u_int flags, int mode)
{
unsigned long start;
/* we do not want to have this area swapped out, lock it */
vma->vm_flags |= flags;
start = vma->vm_start + start_off;
if(unlikely(enable_debug))
printk("[PF_RING] %s(mode=%d, size=%lu, ptr=%p)\n", __FUNCTION__, mode, size, ptr);
while(size > 0) {
int rc;
if(mode == 0) {
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
rc = remap_vmalloc_range(vma, ptr, ptr_pg_off);
break; /* Do not iterate */
#else
rc = remap_pfn_range(vma, start, kvirt_to_pa((unsigned long)ptr), PAGE_SIZE, PAGE_SHARED);
#endif
} else if(mode == 1) {
rc = remap_pfn_range(vma, start, __pa(ptr) >> PAGE_SHIFT, PAGE_SIZE, PAGE_SHARED);
} else {
rc = remap_pfn_range(vma, start, ((unsigned long)ptr) >> PAGE_SHIFT, PAGE_SIZE, PAGE_SHARED);
}
if(rc) {
if(unlikely(enable_debug))
printk("[PF_RING] remap_pfn_range() failed\n");
return(-EAGAIN);
}
start += PAGE_SIZE;
ptr += PAGE_SIZE;
if(size > PAGE_SIZE) {
size -= PAGE_SIZE;
} else {
size = 0;
}
}
return(0);
}
/* ************************************* */
static int ring_mmap(struct file *file,
struct socket *sock, struct vm_area_struct *vma)
{
struct sock *sk = sock->sk;
struct pf_ring_socket *pfr = ring_sk(sk);
int i, rc;
unsigned long mem_id = vma->vm_pgoff; /* using vm_pgoff as memory id */
unsigned long size = (unsigned long)(vma->vm_end - vma->vm_start);
if(unlikely(enable_debug))
printk("[PF_RING] %s() called\n", __FUNCTION__);
if(size % PAGE_SIZE) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: len is not multiple of PAGE_SIZE\n", __FUNCTION__);
return(-EINVAL);
}
if(unlikely(enable_debug))
printk("[PF_RING] %s() called, size: %ld bytes [bucket_len=%d]\n",
__FUNCTION__, size, pfr->bucket_len);
/* Trick for mapping DNA chunks */
if(mem_id >= 100) {
mem_id -= 100;
if(pfr->dna_device) {
if(mem_id < pfr->dna_device->mem_info.rx.packet_memory_num_chunks) {
/* DNA: RX packet memory */
if((rc = do_memory_mmap(vma, 0, size, (void *)pfr->dna_device->rx_packet_memory[mem_id], 0, VM_LOCKED, 1)) < 0)
return(rc);
return(0);
} else if(mem_id < pfr->dna_device->mem_info.rx.packet_memory_num_chunks +
pfr->dna_device->mem_info.tx.packet_memory_num_chunks) {
/* DNA: TX packet memory */
mem_id -= pfr->dna_device->mem_info.rx.packet_memory_num_chunks;
if((rc = do_memory_mmap(vma, 0, size, (void *)pfr->dna_device->tx_packet_memory[mem_id], 0, VM_LOCKED, 1)) < 0)
return(rc);
return(0);
} else if(pfr->extra_dma_memory && mem_id < pfr->dna_device->mem_info.rx.packet_memory_num_chunks +
pfr->dna_device->mem_info.tx.packet_memory_num_chunks +
pfr->extra_dma_memory->num_chunks) {
/* Extra DMA memory */
mem_id -= pfr->dna_device->mem_info.rx.packet_memory_num_chunks;
mem_id -= pfr->dna_device->mem_info.tx.packet_memory_num_chunks;
if(pfr->extra_dma_memory->virtual_addr == NULL)
return(-EINVAL);
if((rc = do_memory_mmap(vma, 0, size, (void *)pfr->extra_dma_memory->virtual_addr[mem_id], 0, VM_LOCKED, 1)) < 0)
return(rc);
return(0);
}
}
if(pfr->dna_cluster) {
/* DNA cluster extra DMA memory */
if(pfr->dna_device) {
mem_id -= pfr->dna_device->mem_info.rx.packet_memory_num_chunks;
mem_id -= pfr->dna_device->mem_info.tx.packet_memory_num_chunks;
if(pfr->extra_dma_memory)
mem_id -= pfr->extra_dma_memory->num_chunks;
}
if (pfr->dna_cluster->options & DNA_CLUSTER_OPT_HUGEPAGES)
return(-EINVAL);
if(pfr->dna_cluster->extra_dma_memory == NULL || pfr->dna_cluster->extra_dma_memory->virtual_addr == NULL)
return(-EINVAL);
if(mem_id >= pfr->dna_cluster->extra_dma_memory->num_chunks)
return(-EINVAL);
if (size < pfr->dna_cluster->extra_dma_memory->num_chunks * pfr->dna_cluster->extra_dma_memory->chunk_len)
return(-EINVAL);
for (i = 0; i < pfr->dna_cluster->extra_dma_memory->num_chunks; i++) {
if((rc = do_memory_mmap(vma, i * pfr->dna_cluster->extra_dma_memory->chunk_len,
pfr->dna_cluster->extra_dma_memory->chunk_len,
(void *)pfr->dna_cluster->extra_dma_memory->virtual_addr[i],
0, VM_LOCKED, 1)) < 0)
return(rc);
}
return(0);
}
printk("[PF_RING] %s() failed: not DNA nor DNA cluster\n", __FUNCTION__);
return(-EINVAL);
}
switch(mem_id) {
/* RING */
case 0:
if(pfr->dna_device != NULL || pfr->dna_cluster != NULL) {
printk("[PF_RING] %s(): trying to map ring memory on DNA socket\n", __FUNCTION__);
return(-EINVAL);
}
if(pfr->ring_memory == NULL) {
if(ring_alloc_mem(sk) != 0) {
printk("[PF_RING] %s(): unable to allocate memory\n", __FUNCTION__);
return(-EINVAL);
}
}
/* If userspace tries to mmap beyond end of our buffer, then fail */
if(size > pfr->slots_info->tot_mem) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: area too large [%ld > %d]\n", __FUNCTION__, size, pfr->slots_info->tot_mem);
return(-EINVAL);
}
if(unlikely(enable_debug))
printk("[PF_RING] mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
pfr->slots_info->slot_len, pfr->slots_info->min_num_slots, pfr->ring_netdev->dev->name);
if((rc = do_memory_mmap(vma, 0, size, pfr->ring_memory, 0, VM_LOCKED, 0)) < 0)
return(rc);
break;
case 1:
/* DNA: RX packet descriptors */
if(pfr->dna_device == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: operation for DNA only", __FUNCTION__);
return(-EINVAL);
}
if((rc = do_memory_mmap(vma, 0, size, (void *)pfr->dna_device->rx_descr_packet_memory, 0, VM_LOCKED, 1)) < 0)
return(rc);
break;
case 2:
/* DNA: Physical card memory */
if(pfr->dna_device == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: operation for DNA only", __FUNCTION__);
return(-EINVAL);
}
if((rc = do_memory_mmap(vma, 0, size, (void *)pfr->dna_device->phys_card_memory, 0, (
#if(LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0))
VM_IO | VM_RESERVED
#else
VM_IO | VM_DONTEXPAND | VM_DONTDUMP
#endif
), 2)) < 0)
return(rc);
break;
case 3:
/* DNA: TX packet descriptors */
if(pfr->dna_device == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: operation for DNA only", __FUNCTION__);
return(-EINVAL);
}
if((rc = do_memory_mmap(vma, 0, size, (void *)pfr->dna_device->tx_descr_packet_memory, 0, VM_LOCKED, 1)) < 0)
return(rc);
break;
case 4:
/* DNA cluster shared memory (master) */
if(pfr->dna_cluster == NULL || pfr->dna_cluster_type != cluster_master) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: operation for DNA cluster master only", __FUNCTION__);
return(-EINVAL);
}
if (pfr->dna_cluster->options & DNA_CLUSTER_OPT_HUGEPAGES) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: trying to allocate kernel memory when using hugepages", __FUNCTION__);
return(-EINVAL);
}
if(size > (pfr->dna_cluster->slave_shared_memory_len * pfr->dna_cluster->num_slaves)) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: area too large [%ld > %d]\n",
__FUNCTION__, size, pfr->dna_cluster->slave_shared_memory_len * pfr->dna_cluster->num_slaves);
return(-EINVAL);
}
if((rc = do_memory_mmap(vma, 0, size, pfr->dna_cluster->shared_memory, 0, VM_LOCKED, 0)) < 0)
return(rc);
break;
case 5:
/* DNA cluster shared memory (slave) */
if(pfr->dna_cluster == NULL || pfr->dna_cluster_type != cluster_slave) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: operation for DNA cluster slave only", __FUNCTION__);
return(-EINVAL);
}
if (pfr->dna_cluster->options & DNA_CLUSTER_OPT_HUGEPAGES) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: trying to allocate kernel memory when using hugepages", __FUNCTION__);
return(-EINVAL);
}
if(size > pfr->dna_cluster->slave_shared_memory_len) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: area too large [%ld > %d]\n",
__FUNCTION__, size, pfr->dna_cluster->slave_shared_memory_len * pfr->dna_cluster->num_slaves);
return(-EINVAL);
}
if((rc = do_memory_mmap(vma, 0, size, pfr->dna_cluster->shared_memory,
(pfr->dna_cluster->slave_shared_memory_len / PAGE_SIZE) * pfr->dna_cluster_slave_id,
VM_LOCKED, 0)) < 0)
return(rc);
break;
case 6:
/* DNA cluster persistent memory (master) */
if(pfr->dna_cluster == NULL || pfr->dna_cluster_type != cluster_master) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: operation for DNA cluster master only", __FUNCTION__);
return(-EINVAL);
}
if(size > pfr->dna_cluster->master_persistent_memory_len) {
if(unlikely(enable_debug))
printk("[PF_RING] %s() failed: area too large [%ld > %d]\n",
__FUNCTION__, size, pfr->dna_cluster->master_persistent_memory_len);
return(-EINVAL);
}
if((rc = do_memory_mmap(vma, 0, size, pfr->dna_cluster->master_persistent_memory, 0, VM_LOCKED, 0)) < 0)
return(rc);
break;
default:
return(-EAGAIN);
}
if(unlikely(enable_debug))
printk("[PF_RING] %s succeeded\n", __FUNCTION__);
return 0;
}
/* ************************************* */
static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t len, int flags)
{
struct pf_ring_socket *pfr = ring_sk(sock->sk);
u_int32_t queued_pkts, num_loops = 0;
if(unlikely(enable_debug))
printk("[PF_RING] ring_recvmsg called\n");
pfr->ring_active = 1;
while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
if(unlikely(enable_debug))
printk("[PF_RING] -> ring_recvmsg "
"[queued_pkts=%d][num_loops=%d]\n",
queued_pkts, num_loops);
if(queued_pkts > 0) {
if(num_loops++ > MAX_QUEUE_LOOPS)
break;
}
}
return(queued_pkts);
}
/* ************************************* */
static int pf_ring_inject_packet_to_stack(struct net_device *netdev, struct msghdr *msg, size_t len) {
int err = 0;
struct sk_buff *skb = __netdev_alloc_skb(netdev, len, GFP_KERNEL);
if(skb == NULL)
return -ENOBUFS;
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
if(err)
return err;
skb->protocol = eth_type_trans(skb, netdev);
err = netif_rx_ni(skb);
if (unlikely(enable_debug && err == NET_RX_SUCCESS))
printk("[PF_RING] Packet injected into the linux kernel!\n");
return err;
}
/* ************************************* */
/* This code is mostly coming from af_packet.c */
static int ring_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t len)
{
struct pf_ring_socket *pfr = ring_sk(sock->sk);
struct sockaddr_pkt *saddr;
struct sk_buff *skb;
__be16 proto = 0;
int err = 0;
/* Userspace RING: Waking up the ring consumer */
if(pfr->userspace_ring != NULL) {
if(pfr->userspace_ring->consumer_ring_slots_waitqueue != NULL
&& !(pfr->slots_info->userspace_ring_flags & USERSPACE_RING_NO_INTERRUPT)) {
pfr->slots_info->userspace_ring_flags |= USERSPACE_RING_NO_INTERRUPT;
wake_up_interruptible(pfr->userspace_ring->consumer_ring_slots_waitqueue);
}
return(len);
}
/*
* Get and verify the address.
*/
saddr = (struct sockaddr_pkt *)msg->msg_name;
if(saddr) {
if(saddr == NULL) proto = htons(ETH_P_ALL);
if(msg->msg_namelen < sizeof(struct sockaddr)) {
err = -EINVAL;
goto out;
}
if(msg->msg_namelen == sizeof(struct sockaddr_pkt))
proto = saddr->spkt_protocol;
} else {
err = -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
goto out;
}
/*
* Find the device first to size check it
*/
if(pfr->ring_netdev->dev == NULL)
goto out;
err = -ENETDOWN;
if(!(pfr->ring_netdev->dev->flags & IFF_UP))
goto out;
/*
* You may not queue a frame bigger than the mtu. This is the lowest level
* raw protocol and you must do your own fragmentation at this level.
*/
err = -EMSGSIZE;
if(len > pfr->ring_netdev->dev->mtu + pfr->ring_netdev->dev->hard_header_len)
goto out;
if (pfr->stack_injection_mode) {
err = pf_ring_inject_packet_to_stack(pfr->ring_netdev->dev, msg, len);
goto out;
}
err = -ENOBUFS;
skb = sock_wmalloc(sock->sk, len + LL_RESERVED_SPACE(pfr->ring_netdev->dev), 0, GFP_KERNEL);
/*
* If the write buffer is full, then tough. At this level the user gets to
* deal with the problem - do your own algorithmic backoffs. That's far
* more flexible.
*/
if(skb == NULL)
goto out;
/*
* Fill it in
*/
/* FIXME: Save some space for broken drivers that write a
* hard header at transmission time by themselves. PPP is the
* notable one here. This should really be fixed at the driver level.
*/
skb_reserve(skb, LL_RESERVED_SPACE(pfr->ring_netdev->dev));
skb_reset_network_header(skb);
/* Try to align data part correctly */
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23))
if(pfr->ring_netdev->dev->header_ops) {
skb->data -= pfr->ring_netdev->dev->hard_header_len;
skb->tail -= pfr->ring_netdev->dev->hard_header_len;
if(len < pfr->ring_netdev->dev->hard_header_len)
skb_reset_network_header(skb);
}
#else
if(pfr->ring_netdev->dev->hard_header) {
skb->data -= pfr->ring_netdev->dev->hard_header_len;
skb->tail -= pfr->ring_netdev->dev->hard_header_len;
if(len < pfr->ring_netdev->dev->hard_header_len) {
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18))
skb_reset_network_header(skb);
#else
skb->nh.raw = skb->data;
#endif
}
}
#endif
/* Returns -EFAULT on error */
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
skb->protocol = proto;
skb->dev = pfr->ring_netdev->dev;
skb->priority = sock->sk->sk_priority;
if(err)
goto out_free;
/*
* Now send it
*/
if(dev_queue_xmit(skb) != NETDEV_TX_OK) {
err = -ENETDOWN; /* Probably we need a better error here */
goto out;
}
pfr->slots_info->good_pkt_sent++;
return(len);
out_free:
kfree_skb(skb);
out:
if(pfr->slots_info) {
if(err == 0)
pfr->slots_info->good_pkt_sent++;
else
pfr->slots_info->pkt_send_error++;
}
return err;
}
/* ************************************* */
unsigned int ring_poll(struct file *file,
struct socket *sock, poll_table * wait)
{
struct pf_ring_socket *pfr = ring_sk(sock->sk);
int rc, mask = 0;
if(unlikely(enable_debug))
printk("[PF_RING] -- poll called [DNA: %p][%s]\n", pfr->dna_device,
pfr->ring_netdev->dev->name ? pfr->ring_netdev->dev->name : "???");
pfr->num_poll_calls++;
if(unlikely(pfr->ring_shutdown))
return(mask);
if(pfr->dna_device == NULL) {
/* PF_RING mode (No DNA) */
/* if(unlikely(enable_debug))
printk("[PF_RING] poll called (non DNA device)\n"); */
pfr->ring_active = 1;
// smp_rmb();
/* DNA cluster */
if(pfr->dna_cluster != NULL && pfr->dna_cluster_type == cluster_slave) {
poll_wait(file, &pfr->ring_slots_waitqueue, wait);
// if(1) /* queued packets info not available */
mask |= POLLIN | POLLRDNORM;
return(mask);
}
if(pfr->tx.enable_tx_with_bounce && pfr->header_len == long_pkt_header) {
write_lock_bh(&pfr->tx.consume_tx_packets_lock);
consume_pending_pkts(pfr, 1);
write_unlock_bh(&pfr->tx.consume_tx_packets_lock);
}
/* printk("Before [num_queued_pkts(pfr)=%u]\n", num_queued_pkts(pfr)); */
if(num_queued_pkts(pfr) < pfr->poll_num_pkts_watermark /* || pfr->num_poll_calls == 1 */) {
poll_wait(file, &pfr->ring_slots_waitqueue, wait);
// smp_mb();
}
/* printk("After [num_queued_pkts(pfr)=%u]\n", num_queued_pkts(pfr)); */
if(num_queued_pkts(pfr) >= pfr->poll_num_pkts_watermark)
mask |= POLLIN | POLLRDNORM;
return(mask);
} else {
/* DNA mode */
/* enable_debug = 1; */
if(unlikely(enable_debug))
printk("[PF_RING] poll called on DNA device [%d]\n",
*pfr->dna_device->interrupt_received);
if(pfr->dna_device->wait_packet_function_ptr == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] wait_packet_function_ptr is NULL: returning to caller\n");
return(0);
}
rc = pfr->dna_device->wait_packet_function_ptr(pfr->dna_device->rx_adapter_ptr, 1);
if(unlikely(enable_debug))
printk("[PF_RING] wait_packet_function_ptr(1) returned %d\n", rc);
if(rc == 0) {
if(unlikely(enable_debug))
printk("[PF_RING] calling poll_wait()\n");
/* No packet arrived yet */
poll_wait(file, pfr->dna_device->packet_waitqueue, wait);
if(unlikely(enable_debug))
printk("[PF_RING] poll_wait() just returned\n");
} else
rc = pfr->dna_device->wait_packet_function_ptr(pfr->dna_device->rx_adapter_ptr, 0);
if(unlikely(enable_debug))
printk("[PF_RING] wait_packet_function_ptr(0) returned %d\n", rc);
if(unlikely(enable_debug))
printk("[PF_RING] poll %s return [%d]\n",
pfr->ring_netdev->dev->name,
*pfr->dna_device->interrupt_received);
if(*pfr->dna_device->interrupt_received) {
return(POLLIN | POLLRDNORM);
} else {
return(0);
}
}
}
/* ************************************* */
int add_sock_to_cluster_list(ring_cluster_element * el, struct sock *sock)
{
if(el->cluster.num_cluster_elements == CLUSTER_LEN)
return(-1); /* Cluster full */
ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster.cluster_id;
el->cluster.sk[el->cluster.num_cluster_elements] = sock;
el->cluster.num_cluster_elements++;
return(0);
}
/* ************************************* */
int remove_from_cluster_list(struct ring_cluster *el, struct sock *sock)
{
int i, j;
for(i = 0; i < CLUSTER_LEN; i++)
if(el->sk[i] == sock) {
el->num_cluster_elements--;
if(el->num_cluster_elements > 0) {
/* The cluster contains other elements */
for(j = i; j < CLUSTER_LEN - 1; j++)
el->sk[j] = el->sk[j + 1];
el->sk[CLUSTER_LEN - 1] = NULL;
} else {
/* Empty cluster */
memset(el->sk, 0, sizeof(el->sk));
}
return(0);
}
return(-1); /* Not found */
}
/* ************************************* */
static int remove_from_cluster(struct sock *sock, struct pf_ring_socket *pfr)
{
ring_cluster_element *cluster_ptr;
u_int32_t last_list_idx;
if(unlikely(enable_debug))
printk("[PF_RING] --> remove_from_cluster(%d)\n", pfr->cluster_id);
if(pfr->cluster_id == 0 /* 0 = No Cluster */ )
return(0); /* Nothing to do */
cluster_ptr = (ring_cluster_element*)lockless_list_get_first(&ring_cluster_list, &last_list_idx);
while(cluster_ptr != NULL) {
if(cluster_ptr->cluster.cluster_id == pfr->cluster_id) {
int ret = remove_from_cluster_list(&cluster_ptr->cluster, sock);
if(cluster_ptr->cluster.num_cluster_elements == 0) {
lockless_list_remove(&ring_cluster_list, cluster_ptr);
lockless_list_add(&delayed_memory_table, cluster_ptr); /* Free later */
}
return ret;
}
cluster_ptr = (ring_cluster_element*)lockless_list_get_next(&ring_cluster_list, &last_list_idx);
}
return(-EINVAL); /* Not found */
}
/* ************************************* */
static int set_master_ring(struct sock *sock,
struct pf_ring_socket *pfr,
u_int32_t master_socket_id)
{
int rc = -1;
u_int32_t last_list_idx;
struct sock *sk;
if(unlikely(enable_debug))
printk("[PF_RING] set_master_ring(%s=%d)\n",
pfr->ring_netdev->dev ? pfr->ring_netdev->dev->name : "none",
master_socket_id);
sk = (struct sock*)lockless_list_get_first(&ring_table, &last_list_idx);
while(sk != NULL) {
struct pf_ring_socket *pfr;
pfr = ring_sk(sk);
if((pfr != NULL) && (pfr->ring_id == master_socket_id)) {
pfr->master_ring = pfr;
if(unlikely(enable_debug))
printk("[PF_RING] Found set_master_ring(%s) -> %s\n",
pfr->ring_netdev->dev ? pfr->ring_netdev->dev->name : "none",
pfr->master_ring->ring_netdev->dev->name);
rc = 0;
break;
} else {
if(unlikely(enable_debug))
printk("[PF_RING] Skipping socket(%s)=%d\n",
pfr->ring_netdev->dev ? pfr->ring_netdev->dev->name : "none",
pfr->ring_id);
}
sk = (struct sock*)lockless_list_get_next(&ring_table, &last_list_idx);
}
if(unlikely(enable_debug))
printk("[PF_RING] set_master_ring(%s, socket_id=%d) = %d\n",
pfr->ring_netdev->dev ? pfr->ring_netdev->dev->name : "none",
master_socket_id, rc);
return(rc);
}
/* ************************************* */
static int add_sock_to_cluster(struct sock *sock,
struct pf_ring_socket *pfr,
struct add_to_cluster *cluster)
{
ring_cluster_element *cluster_ptr;
u_int32_t last_list_idx;
if(unlikely(enable_debug))
printk("[PF_RING] --> add_sock_to_cluster(%d)\n", cluster->clusterId);
if(cluster->clusterId == 0 /* 0 = No Cluster */ )
return(-EINVAL);
if(pfr->cluster_id != 0)
remove_from_cluster(sock, pfr);
cluster_ptr = (ring_cluster_element*)lockless_list_get_first(&ring_cluster_list, &last_list_idx);
while(cluster_ptr != NULL) {
if(cluster_ptr->cluster.cluster_id == cluster->clusterId) {
return(add_sock_to_cluster_list(cluster_ptr, sock));
}
cluster_ptr = (ring_cluster_element*)lockless_list_get_next(&ring_cluster_list, &last_list_idx);
}
/* There's no existing cluster. We need to create one */
if((cluster_ptr = kmalloc(sizeof(ring_cluster_element), GFP_KERNEL)) == NULL)
return(-ENOMEM);
INIT_LIST_HEAD(&cluster_ptr->list);
cluster_ptr->cluster.cluster_id = cluster->clusterId;
cluster_ptr->cluster.num_cluster_elements = 1;
cluster_ptr->cluster.hashing_mode = cluster->the_type; /* Default */
cluster_ptr->cluster.hashing_id = 0;
memset(cluster_ptr->cluster.sk, 0, sizeof(cluster_ptr->cluster.sk));
cluster_ptr->cluster.sk[0] = sock;
pfr->cluster_id = cluster->clusterId;
lockless_list_add(&ring_cluster_list, cluster_ptr);
return(0); /* 0 = OK */
}
/* ************************************* */
static int ring_map_dna_device(struct pf_ring_socket *pfr,
dna_device_mapping *mapping)
{
struct list_head *ptr, *tmp_ptr;
if(unlikely(enable_debug))
printk("[PF_RING] ring_map_dna_device(%s@%d): %s\n",
mapping->device_name,
mapping->channel_id,
(mapping->operation == remove_device_mapping) ? "remove" : "add");
if(mapping->operation == remove_device_mapping) {
/* Unlock driver */
u8 found = 0;
list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
dna_device_list *entry = list_entry(ptr, dna_device_list, list);
if((!strcmp(entry->dev.netdev->name, mapping->device_name))
&& (entry->dev.channel_id == mapping->channel_id)
&& entry->num_bound_sockets) {
int i;
for(i=0; i<MAX_NUM_DNA_BOUND_SOCKETS; i++)
if(entry->bound_sockets[i] == pfr) {
entry->bound_sockets[i] = NULL;
found = 1;
break;
}
if(!found) {
if(unlikely(enable_debug))
printk("[PF_RING] ring_map_dna_device(remove_device_mapping, %s, %u): something got wrong\n",
mapping->device_name, mapping->channel_id);
return(-1); /* Something got wrong */
}
entry->num_bound_sockets--;
if(pfr->dna_device != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] ring_map_dna_device(%s): removed mapping [num_bound_sockets=%u]\n",
mapping->device_name, entry->num_bound_sockets);
pfr->dna_device->usage_notification(pfr->dna_device->rx_adapter_ptr,
pfr->dna_device->tx_adapter_ptr,
0 /* unlock */);
// pfr->dna_device = NULL;
}
/* Continue for all devices: no break */
}
}
if(unlikely(enable_debug))
printk("[PF_RING] ring_map_dna_device(%s): removed mapping\n", mapping->device_name);
return(0);
} else {
ring_proc_remove(pfr);
list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
dna_device_list *entry = list_entry(ptr, dna_device_list, list);
if((!strcmp(entry->dev.netdev->name, mapping->device_name))
&& (entry->dev.channel_id == mapping->channel_id)) {
int i, found = 0;
if(unlikely(enable_debug))
printk("[PF_RING] ==>> %s@%d [num_bound_sockets=%d][%p]\n",
entry->dev.netdev->name, mapping->channel_id,
entry->num_bound_sockets, entry);
for(i=0; i<MAX_NUM_DNA_BOUND_SOCKETS; i++)
if(entry->bound_sockets[i] == NULL) {
entry->bound_sockets[i] = pfr;
found = 1;
break;
}
if(!found) {
if(unlikely(enable_debug))
printk("[PF_RING] %s(add_device_mapping, %s, %u, %s): "
"something got wrong (too many DNA devices open)\n", __FUNCTION__,
mapping->device_name, mapping->channel_id, direction2string(pfr->mode));
return(-1); /* Something got wrong: too many mappings */
}
entry->num_bound_sockets++, pfr->dna_device_entry = entry;
pfr->dna_device = &entry->dev;
if(unlikely(enable_debug))
printk("[PF_RING] ring_map_dna_device(%s, %u): added mapping\n",
mapping->device_name, mapping->channel_id);
/* Now let's set the read ring_netdev device */
found = 0;
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(!strcmp(dev_ptr->dev->name, mapping->device_name)) {
if(unlikely(enable_debug))
printk("[PF_RING] ==>> %s [%p]\n", dev_ptr->dev->name, dev_ptr);
pfr->ring_netdev = dev_ptr;
found = 1;
break;
}
}
if(!found) {
printk("[PF_RING] %s(add_device_mapping, %s, %u, %s): "
"something got wrong (device not found)\n", __FUNCTION__,
mapping->device_name, mapping->channel_id, direction2string(pfr->mode));
return(-1); /* Something got wrong */
}
/* Lock driver */
if(unlikely(enable_debug))
printk("[PF_RING] ===> ring_map_dna_device(%s): added mapping [num_bound_sockets=%u]\n",
mapping->device_name, entry->num_bound_sockets);
pfr->dna_device->usage_notification(pfr->dna_device->rx_adapter_ptr,
pfr->dna_device->tx_adapter_ptr,
1 /* lock */);
ring_proc_add(pfr);
return(0);
}
}
}
if(unlikely(enable_debug))
printk("[PF_RING] ring_map_dna_device(%s, %u): mapping failed or not a dna device\n",
mapping->device_name, mapping->channel_id);
return(-1);
}
/* ************************************* */
static int get_fragment_app_id(u_int32_t ipv4_src_host, u_int32_t ipv4_dst_host,
u_int16_t fragment_id) {
u_int hash_id = fragment_id % NUM_FRAGMENTS_HASH_SLOTS;
struct hash_fragment_node *head, *prev, *next;
u_int8_t app_id;
if(unlikely(enable_debug))
printk("[PF_RING] %s(fragment_id=%d) [num_cluster_fragments=%d]\n",
__FUNCTION__, fragment_id, num_cluster_fragments);
if(cluster_fragment_hash[hash_id] == NULL)
return(-1); /* Not found */
write_lock(&cluster_fragments_lock);
head = cluster_fragment_hash[hash_id], prev = NULL;
while(head != NULL) {
next = head->next;
if((head->ip_fragment_id == fragment_id)
&& (head->ipv4_src_host == ipv4_src_host)
&& (head->ipv4_dst_host == ipv4_dst_host)) {
/* Found: 1) return queue_id and 2) delete this entry */
app_id = head->cluster_app_id;
if(prev == NULL)
cluster_fragment_hash[hash_id] = next;
else
prev->next = next;
kfree(head), num_cluster_fragments--;
if(unlikely(enable_debug))
printk("[PF_RING] %s(fragment_id=%d): found %d [num_cluster_fragments=%d]\n",
__FUNCTION__, fragment_id, app_id, num_cluster_fragments);
write_unlock(&cluster_fragments_lock);
return(app_id);
}
prev = head;
head = next;
} /* while */
write_unlock(&cluster_fragments_lock);
return(-1); /* Not found */
}
/* ************************************* */
static void purge_idle_fragment_cache(void)
{
if(likely(num_cluster_fragments == 0))
return;
else if((next_fragment_purge_jiffies < jiffies)
|| (num_cluster_fragments > (5*NUM_FRAGMENTS_HASH_SLOTS))) {
int i;
if(unlikely(enable_debug))
printk("[PF_RING] %s() [num_cluster_fragments=%d]\n", __FUNCTION__, num_cluster_fragments);
for(i=0; i<NUM_FRAGMENTS_HASH_SLOTS; i++) {
if(cluster_fragment_hash[i] != NULL) {
struct hash_fragment_node *next, *head, *prev;
head = cluster_fragment_hash[i], prev = NULL;
while(head != NULL) {
next = head->next;
if(head->expire_jiffies < jiffies) {
kfree(head), num_cluster_fragments--;
if(prev == NULL)
cluster_fragment_hash[i] = next;
else
prev->next = next;
}
head = next;
}
}
} /* for */
next_fragment_purge_jiffies = jiffies + 5*HZ /* 5 seconds in jiffies */;
}
}
/* ************************************* */
static void add_fragment_app_id(u_int32_t ipv4_src_host, u_int32_t ipv4_dst_host,
u_int16_t fragment_id, u_int8_t app_id) {
u_int hash_id = fragment_id % NUM_FRAGMENTS_HASH_SLOTS;
struct hash_fragment_node *node;
if(num_cluster_fragments > MAX_CLUSTER_FRAGMENTS_LEN) {
/* Avoid filling up all memory */
return;
}
if(unlikely(enable_debug))
printk("[PF_RING] %s(fragment_id=%d, app_id=%d) [num_cluster_fragments=%d]\n",
__FUNCTION__, fragment_id, app_id, num_cluster_fragments);
write_lock(&cluster_fragments_lock);
/* 1. Check if there is already the same entry on cache */
node = cluster_fragment_hash[hash_id];
while(node != NULL) {
if((node->ip_fragment_id == fragment_id)
&& (node->ipv4_src_host == ipv4_src_host)
&& (node->ipv4_dst_host == ipv4_dst_host)) {
/* Duplicate found */
node->cluster_app_id = app_id, node->expire_jiffies = jiffies + 5*HZ;
write_unlock(&cluster_fragments_lock);
return;
} else
node = node->next;
}
/* 2. Not found, let's add it */
if((node = kmalloc(sizeof(struct hash_fragment_node), GFP_ATOMIC)) == NULL) {
printk("[PF_RING] Out of memory\n");
write_unlock(&cluster_fragments_lock);
return;
}
node->ip_fragment_id = fragment_id, node->ipv4_src_host = ipv4_src_host,
node->ipv4_dst_host = ipv4_dst_host, node->cluster_app_id = app_id,
node->expire_jiffies = jiffies + 5*HZ;
node->next = cluster_fragment_hash[hash_id];
cluster_fragment_hash[hash_id] = node;
num_cluster_fragments++, next_fragment_purge_jiffies = node->expire_jiffies;
purge_idle_fragment_cache(); /* Just in case there are too many elements */
write_unlock(&cluster_fragments_lock);
}
/* ************************************* */
static void purge_idle_hash_rules(struct pf_ring_socket *pfr,
u_int16_t rule_inactivity)
{
int i, num_purged_rules = 0;
unsigned long expire_jiffies =
jiffies - msecs_to_jiffies(1000 * rule_inactivity);
if(unlikely(enable_debug))
printk("[PF_RING] purge_idle_hash_rules(rule_inactivity=%d)\n",
rule_inactivity);
/* Free filtering hash rules inactive for more than rule_inactivity seconds */
if(pfr->sw_filtering_hash != NULL) {
for(i = 0; i < perfect_rules_hash_size; i++) {
if(pfr->sw_filtering_hash[i] != NULL) {
sw_filtering_hash_bucket *scan = pfr->sw_filtering_hash[i], *next, *prev = NULL;
while(scan != NULL) {
int rc = 0;
next = scan->next;
if(scan->rule.plugin_action.plugin_id > 0
&& plugin_registration[scan->rule.plugin_action.plugin_id]
&& plugin_registration[scan->rule.plugin_action.plugin_id]->pfring_plugin_purge_idle)
rc = plugin_registration[scan->rule.plugin_action.plugin_id]->
pfring_plugin_purge_idle(pfr, NULL, scan, rule_inactivity);
if(scan->rule.internals.jiffies_last_match < expire_jiffies || rc > 0) {
/* Expired rule: free it */
if(unlikely(enable_debug))
printk ("[PF_RING] Purging hash rule "
/* "[last_match=%u][expire_jiffies=%u]" */
"[%d.%d.%d.%d:%d <-> %d.%d.%d.%d:%d][purged=%d][tot_rules=%d]\n",
/*
(unsigned int)scan->rule.internals.jiffies_last_match,
(unsigned int)expire_jiffies,
*/
((scan->rule.host4_peer_a >> 24) & 0xff),
((scan->rule.host4_peer_a >> 16) & 0xff),
((scan->rule.host4_peer_a >> 8) & 0xff),
((scan->rule.host4_peer_a >> 0) & 0xff),
scan->rule.port_peer_a,
((scan->rule.host4_peer_b >> 24) & 0xff),
((scan->rule.host4_peer_b >> 16) & 0xff),
((scan->rule.host4_peer_b >> 8) & 0xff),
((scan->rule.host4_peer_b >> 0) & 0xff),
scan->rule.port_peer_b,
num_purged_rules,
pfr->num_sw_filtering_rules);
free_sw_filtering_hash_bucket(scan);
kfree(scan);
if(prev == NULL)
pfr->sw_filtering_hash[i] = next;
else
prev->next = next;
pfr->num_sw_filtering_rules--;
num_purged_rules++;
} else
prev = scan;
scan = next;
}
}
}
}
if(unlikely(enable_debug))
printk("[PF_RING] Purged %d hash rules [tot_rules=%d]\n",
num_purged_rules, pfr->num_sw_filtering_rules);
}
/* ************************************* */
static void purge_idle_rules(struct pf_ring_socket *pfr,
u_int16_t rule_inactivity)
{
struct list_head *ptr, *tmp_ptr;
int num_purged_rules = 0;
unsigned long expire_jiffies =
jiffies - msecs_to_jiffies(1000 * rule_inactivity);
if(unlikely(enable_debug))
printk("[PF_RING] %s(rule_inactivity=%d) [num_sw_filtering_rules=%d]\n",
__FUNCTION__, rule_inactivity, pfr->num_sw_filtering_rules);
/* Free filtering rules inactive for more than rule_inactivity seconds */
if(pfr->num_sw_filtering_rules > 0) {
list_for_each_safe(ptr, tmp_ptr, &pfr->sw_filtering_rules) {
int rc = 0;
sw_filtering_rule_element *entry;
entry = list_entry(ptr, sw_filtering_rule_element, list);
/* Plugin callback is evaluated even if the rule has the "locked" field set. */
if(entry->rule.plugin_action.plugin_id > 0
&& plugin_registration[entry->rule.plugin_action.plugin_id]
&& plugin_registration[entry->rule.plugin_action.plugin_id]->pfring_plugin_purge_idle)
rc = plugin_registration[entry->rule.plugin_action.plugin_id]->
pfring_plugin_purge_idle(pfr, entry, NULL, rule_inactivity);
if((!entry->rule.locked && entry->rule.internals.jiffies_last_match < expire_jiffies) || rc > 0) {
/* Expired rule: free it */
if(unlikely(enable_debug))
printk ("[PF_RING] Purging rule "
// "[last_match=%u][expire_jiffies=%u]"
"[%d.%d.%d.%d:%d -> %d.%d.%d.%d:%d][purged=%d][tot_rules=%d]\n",
//(unsigned int) entry->rule.internals.jiffies_last_match,
//(unsigned int) expire_jiffies,
((entry->rule.core_fields.shost.v4 >> 24) & 0xff),
((entry->rule.core_fields.shost.v4 >> 16) & 0xff),
((entry->rule.core_fields.shost.v4 >> 8) & 0xff),
((entry->rule.core_fields.shost.v4 >> 0) & 0xff),
entry->rule.core_fields.sport_low,
((entry->rule.core_fields.dhost.v4 >> 24) & 0xff),
((entry->rule.core_fields.dhost.v4 >> 16) & 0xff),
((entry->rule.core_fields.dhost.v4 >> 8) & 0xff),
((entry->rule.core_fields.dhost.v4 >> 0) & 0xff),
entry->rule.core_fields.dport_low,
num_purged_rules,
pfr->num_sw_filtering_rules);
list_del(ptr);
free_filtering_rule(entry, 0);
kfree(entry);
pfr->num_sw_filtering_rules--;
num_purged_rules++;
}
}
}
if(unlikely(enable_debug))
printk("[PF_RING] Purged %d rules [tot_rules=%d]\n",
num_purged_rules, pfr->num_sw_filtering_rules);
}
/* ************************************* */
static int ring_proc_stats_read(char *buf, char **start, off_t offset,
int len, int *unused, void *data)
{
int rlen = 0;
if(data != NULL) {
struct pf_ring_socket *s = (struct pf_ring_socket*)data;
rlen = sprintf(buf, "%s\n", s->statsString);
}
return(rlen);
}
/* ************************************* */
int setSocketStats(struct pf_ring_socket *s, char *statsString) {
/* 1 - Check if the /proc entry exists otherwise create it */
if((ring_proc_stats_dir != NULL)
&& (s->sock_proc_stats_name[0] == '\0')) {
struct proc_dir_entry *entry;
snprintf(s->sock_proc_stats_name, sizeof(s->sock_proc_stats_name),
"%d-%s.%d", s->ring_pid,
s->ring_netdev->dev->name, s->ring_id);
if((entry = create_proc_read_entry(s->sock_proc_stats_name,
0 /* ro */,
ring_proc_stats_dir,
ring_proc_stats_read, s)) == NULL) {
s->sock_proc_stats_name[0] = '\0';
return(-1);
}
}
/* 2 - Set stats string */
strncpy(s->statsString, statsString, sizeof(s->statsString)-1);
return(0);
}
/* ************************************* */
/* Code taken/inspired from core/sock.c */
static int ring_setsockopt(struct socket *sock,
int level, int optname,
char __user * optval,
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31))
unsigned
#endif
int optlen)
{
struct pf_ring_socket *pfr = ring_sk(sock->sk);
int val, found, ret = 0 /* OK */, i;
u_int32_t ring_id;
struct add_to_cluster cluster;
u_int32_t channel_id_mask;
char applName[32 + 1] = { 0 };
char statsString[256 + 1] = { 0 };
u_int16_t rule_id, rule_inactivity;
packet_direction direction;
socket_mode sockmode;
hw_filtering_rule hw_rule;
struct list_head *ptr, *tmp_ptr;
#ifdef VPFRING_SUPPORT
struct vpfring_eventfd_info eventfd_i;
struct file *eventfp;
#endif //VPFRING_SUPPORT
if(pfr == NULL)
return(-EINVAL);
if(get_user(val, (int *)optval))
return(-EFAULT);
found = 1;
if(unlikely(enable_debug))
printk("[PF_RING] --> ring_setsockopt(optname=%u)\n", optname);
switch(optname) {
case SO_ATTACH_FILTER:
ret = -EINVAL;
if(unlikely(enable_debug))
printk("[PF_RING] BPF filter (%d)\n", 0);
if(optlen == sizeof(struct sock_fprog)) {
unsigned int fsize;
struct sock_fprog fprog;
struct sk_filter *filter, *old_filter;
ret = -EFAULT;
if(unlikely(enable_debug))
printk("[PF_RING] BPF filter (%d)\n", 1);
/*
NOTE
Do not call copy_from_user within a held
splinlock (e.g. ring_mgmt_lock) as this caused
problems when certain debugging was enabled under
2.6.5 -- including hard lockups of the machine.
*/
if(copy_from_user(&fprog, optval, sizeof(fprog)))
break;
/* Fix below courtesy of Noam Dev <noamdev@gmail.com> */
fsize = sizeof(struct sock_filter) * fprog.len;
filter = kmalloc(fsize + sizeof(struct sk_filter), GFP_KERNEL);
if(filter == NULL) {
ret = -ENOMEM;
break;
}
if(copy_from_user(filter->insns, fprog.filter, fsize)) {
kfree(filter);
break;
}
filter->len = fprog.len;
if(sk_chk_filter(filter->insns, filter->len) != 0) {
/* Bad filter specified */
kfree(filter);
break;
}
old_filter = pfr->bpfFilter;
/* get the lock, set the filter, release the lock */
write_lock_bh(&pfr->ring_rules_lock);
pfr->bpfFilter = filter;
write_unlock_bh(&pfr->ring_rules_lock);
if(old_filter != NULL)
kfree(old_filter);
ret = 0;
if(unlikely(enable_debug))
printk("[PF_RING] BPF filter attached successfully [len=%d]\n",
filter->len);
}
break;
case SO_DETACH_FILTER:
if(unlikely(enable_debug))
printk("[PF_RING] Removing BPF filter [%p]\n", pfr->bpfFilter);
write_lock_bh(&pfr->ring_rules_lock);
found = 1;
if(pfr->bpfFilter != NULL) {
kfree(pfr->bpfFilter);
pfr->bpfFilter = NULL;
} else
ret = -ENONET;
write_unlock_bh(&pfr->ring_rules_lock);
break;
case SO_ADD_TO_CLUSTER:
if(optlen != sizeof(cluster))
return(-EINVAL);
if(copy_from_user(&cluster, optval, sizeof(cluster)))
return(-EFAULT);
write_lock_bh(&pfr->ring_rules_lock);
ret = add_sock_to_cluster(sock->sk, pfr, &cluster);
write_unlock_bh(&pfr->ring_rules_lock);
break;
case SO_REMOVE_FROM_CLUSTER:
write_lock_bh(&pfr->ring_rules_lock);
ret = remove_from_cluster(sock->sk, pfr);
write_unlock_bh(&pfr->ring_rules_lock);
break;
case SO_SET_CHANNEL_ID:
if(optlen != sizeof(channel_id_mask))
return(-EINVAL);
if(copy_from_user(&channel_id_mask, optval, sizeof(channel_id_mask)))
return(-EFAULT);
pfr->num_channels_per_ring = 0;
/*
We need to set the device_rings[] for all channels set
in channel_id_mask
*/
if(quick_mode) {
for(i=0; i<pfr->num_rx_channels; i++) {
u_int32_t the_bit = 1 << i;
if(channel_id_mask & the_bit) {
if(device_rings[pfr->ring_netdev->dev->ifindex][i] != NULL)
return(-EINVAL); /* Socket already bound on this device */
}
}
}
/* Everything seems to work thus let's set the values */
for(i=0; i<pfr->num_rx_channels; i++) {
u_int32_t the_bit = 1 << i;
if(channel_id_mask & the_bit) {
if(unlikely(enable_debug)) printk("[PF_RING] Setting channel %d\n", i);
if(quick_mode) {
device_rings[pfr->ring_netdev->dev->ifindex][i] = pfr;
}
pfr->num_channels_per_ring++;
}
}
pfr->channel_id_mask = channel_id_mask;
if(unlikely(enable_debug))
printk("[PF_RING] [pfr->channel_id_mask=%08X][channel_id_mask=%08X]\n",
pfr->channel_id_mask, channel_id_mask);
ret = 0;
break;
case SO_SET_APPL_NAME:
if(optlen >
sizeof(applName) /* Names should not be too long */ )
return(-EINVAL);
if(copy_from_user(&applName, optval, optlen))
return(-EFAULT);
if(pfr->appl_name != NULL)
kfree(pfr->appl_name);
pfr->appl_name = (char *)kmalloc(optlen + 1, GFP_ATOMIC);
if(pfr->appl_name != NULL) {
memcpy(pfr->appl_name, applName, optlen);
pfr->appl_name[optlen] = '\0';
}
ret = 0;
break;
case SO_SET_PACKET_DIRECTION:
if(optlen != sizeof(direction))
return(-EINVAL);
if(copy_from_user(&direction, optval, sizeof(direction)))
return(-EFAULT);
pfr->direction = direction;
if(unlikely(enable_debug))
printk("[PF_RING] SO_SET_PACKET_DIRECTION [pfr->direction=%s][direction=%s]\n",
direction2string(pfr->direction), direction2string(direction));
ret = 0;
break;
case SO_SET_SOCKET_MODE:
if(optlen != sizeof(sockmode))
return(-EINVAL);
if(copy_from_user(&sockmode, optval, sizeof(sockmode)))
return(-EFAULT);
pfr->mode = sockmode;
if(unlikely(enable_debug))
printk("[PF_RING] SO_SET_LINK_DIRECTION [pfr->mode=%s][mode=%s]\n",
sockmode2string(pfr->mode), sockmode2string(sockmode));
ret = 0;
break;
case SO_PURGE_IDLE_HASH_RULES:
if(optlen != sizeof(rule_inactivity))
return(-EINVAL);
if(copy_from_user(&rule_inactivity, optval, sizeof(rule_inactivity)))
return(-EFAULT);
else {
write_lock_bh(&pfr->ring_rules_lock);
purge_idle_hash_rules(pfr, rule_inactivity);
write_unlock_bh(&pfr->ring_rules_lock);
ret = 0;
}
break;
case SO_PURGE_IDLE_RULES:
if(optlen != sizeof(rule_inactivity))
return(-EINVAL);
if(copy_from_user(&rule_inactivity, optval, sizeof(rule_inactivity)))
return(-EFAULT);
else {
write_lock_bh(&pfr->ring_rules_lock);
purge_idle_rules(pfr, rule_inactivity);
write_unlock_bh(&pfr->ring_rules_lock);
ret = 0;
}
break;
case SO_TOGGLE_FILTER_POLICY:
if(optlen != sizeof(u_int8_t))
return(-EINVAL);
else {
u_int8_t new_policy;
if(copy_from_user(&new_policy, optval, optlen))
return(-EFAULT);
write_lock_bh(&pfr->ring_rules_lock);
pfr->sw_filtering_rules_default_accept_policy = new_policy;
write_unlock_bh(&pfr->ring_rules_lock);
/*
if(unlikely(enable_debug))
printk("[PF_RING] SO_TOGGLE_FILTER_POLICY: default policy is %s\n",
pfr->sw_filtering_rules_default_accept_policy ? "accept" : "drop");
*/
}
break;
case SO_ADD_FILTERING_RULE:
if(unlikely(enable_debug))
printk("[PF_RING] +++ SO_ADD_FILTERING_RULE(len=%d)(len=%u)\n",
optlen, (unsigned int)sizeof(ip_addr));
if(pfr->ring_netdev == &none_device_element)
return(-EFAULT);
if(optlen == sizeof(filtering_rule)) {
int ret;
sw_filtering_rule_element *rule;
if(unlikely(enable_debug))
printk("[PF_RING] Allocating memory [filtering_rule]\n");
rule =(sw_filtering_rule_element *)
kcalloc(1, sizeof(sw_filtering_rule_element), GFP_KERNEL);
if(rule == NULL)
return(-EFAULT);
if(copy_from_user(&rule->rule, optval, optlen))
return(-EFAULT);
INIT_LIST_HEAD(&rule->list);
write_lock_bh(&pfr->ring_rules_lock);
ret = add_sw_filtering_rule_element(pfr, rule);
write_unlock_bh(&pfr->ring_rules_lock);
if(ret != 0) { /* even if rc == -EEXIST */
kfree(rule);
return(ret);
}
} else if(optlen == sizeof(hash_filtering_rule)) {
/* This is a hash rule */
int ret;
sw_filtering_hash_bucket *rule;
rule = (sw_filtering_hash_bucket *)
kcalloc(1, sizeof(sw_filtering_hash_bucket), GFP_KERNEL);
if(rule == NULL)
return(-EFAULT);
if(copy_from_user(&rule->rule, optval, optlen))
return(-EFAULT);
write_lock_bh(&pfr->ring_rules_lock);
ret = handle_sw_filtering_hash_bucket(pfr, rule, 1 /* add */);
write_unlock_bh(&pfr->ring_rules_lock);
if(ret != 0) { /* even if rc == -EEXIST */
kfree(rule);
return(ret);
}
} else {
printk("[PF_RING] Bad rule length (%d): discarded\n", optlen);
return(-EFAULT);
}
break;
case SO_REMOVE_FILTERING_RULE:
if(pfr->ring_netdev == &none_device_element) return(-EFAULT);
if(optlen == sizeof(u_int16_t /* rule_id */ )) {
/* This is a list rule */
int rc;
if(copy_from_user(&rule_id, optval, optlen))
return(-EFAULT);
write_lock_bh(&pfr->ring_rules_lock);
rc = remove_sw_filtering_rule_element(pfr, rule_id);
write_unlock_bh(&pfr->ring_rules_lock);
if(rc == 0) {
if(unlikely(enable_debug))
printk("[PF_RING] SO_REMOVE_FILTERING_RULE: rule %d does not exist\n", rule_id);
return(-EFAULT); /* Rule not found */
}
} else if(optlen == sizeof(hash_filtering_rule)) {
/* This is a hash rule */
sw_filtering_hash_bucket rule;
int rc;
if(copy_from_user(&rule.rule, optval, optlen))
return(-EFAULT);
write_lock_bh(&pfr->ring_rules_lock);
rc = handle_sw_filtering_hash_bucket(pfr, &rule, 0 /* delete */ );
write_unlock_bh(&pfr->ring_rules_lock);
if(rc != 0)
return(rc);
} else
return(-EFAULT);
break;
case SO_SET_SAMPLING_RATE:
if(optlen != sizeof(pfr->sample_rate))
return(-EINVAL);
if(copy_from_user(&pfr->sample_rate, optval, sizeof(pfr->sample_rate)))
return(-EFAULT);
break;
case SO_ACTIVATE_RING:
if(unlikely(enable_debug))
printk("[PF_RING] * SO_ACTIVATE_RING *\n");
if(pfr->dna_device_entry != NULL && !pfr->ring_active /* already active, no check */) {
int i;
for(i=0; i<MAX_NUM_DNA_BOUND_SOCKETS; i++) {
if((pfr->dna_device_entry->bound_sockets[i] != NULL)
&& pfr->dna_device_entry->bound_sockets[i]->ring_active) {
if( pfr->dna_device_entry->bound_sockets[i]->mode == pfr->mode
|| pfr->dna_device_entry->bound_sockets[i]->mode == send_and_recv_mode
|| pfr->mode == send_and_recv_mode) {
printk("[PF_RING] Unable to activate two or more DNA sockets on the same interface %s/link direction\n",
pfr->ring_netdev->dev->name);
return(-EFAULT); /* No way: we can't have two sockets that are doing the same thing with DNA */
}
} /* if */
} /* for */
}
found = 1, pfr->ring_active = 1;
break;
case SO_DEACTIVATE_RING:
if(unlikely(enable_debug))
printk("[PF_RING] * SO_DEACTIVATE_RING *\n");
found = 1, pfr->ring_active = 0;
break;
case SO_SET_POLL_WATERMARK:
if(optlen != sizeof(u_int16_t))
return(-EINVAL);
else {
u_int16_t threshold;
if(pfr->slots_info != NULL)
threshold = pfr->slots_info->min_num_slots/2;
else
threshold = min_num_slots;
if(copy_from_user(&pfr->poll_num_pkts_watermark, optval, optlen))
return(-EFAULT);
if(pfr->poll_num_pkts_watermark > threshold)
pfr->poll_num_pkts_watermark = threshold;
if(pfr->poll_num_pkts_watermark == 0)
pfr->poll_num_pkts_watermark = 1;
if(unlikely(enable_debug))
printk("[PF_RING] --> SO_SET_POLL_WATERMARK=%d\n", pfr->poll_num_pkts_watermark);
found = 1;
}
break;
case SO_RING_BUCKET_LEN:
if(optlen != sizeof(u_int32_t))
return(-EINVAL);
else {
if(copy_from_user(&pfr->bucket_len, optval, optlen))
return(-EFAULT);
if(unlikely(enable_debug))
printk("[PF_RING] --> SO_RING_BUCKET_LEN=%d\n", pfr->bucket_len);
found = 1;
}
break;
case SO_MAP_DNA_DEVICE:
if(optlen != sizeof(dna_device_mapping))
return(-EINVAL);
else {
dna_device_mapping mapping;
if(copy_from_user(&mapping, optval, optlen))
return(-EFAULT);
else
ret = ring_map_dna_device(pfr, &mapping), found = 1;
}
break;
case SO_SET_MASTER_RING:
/* Avoid using master sockets with bound rings */
if(pfr->ring_netdev == &none_device_element)
return(-EFAULT);
if(optlen != sizeof(ring_id))
return(-EINVAL);
if(copy_from_user(&ring_id, optval, sizeof(ring_id)))
return(-EFAULT);
write_lock_bh(&pfr->ring_rules_lock);
ret = set_master_ring(sock->sk, pfr, ring_id);
write_unlock_bh(&pfr->ring_rules_lock);
break;
case SO_ADD_HW_FILTERING_RULE:
if(optlen != sizeof(hw_filtering_rule))
return(-EINVAL);
if(copy_from_user(&hw_rule, optval, sizeof(hw_rule)))
return(-EFAULT);
/* Check if a rule with the same id exists */
list_for_each_safe(ptr, tmp_ptr, &pfr->hw_filtering_rules) {
hw_filtering_rule_element *rule = list_entry(ptr, hw_filtering_rule_element, list);
if(rule->rule.rule_id == hw_rule.rule_id) {
/* There's already a rule with the same id: failure */
printk("[PF_RING] Warning: duplicated hw rule id %d\n", hw_rule.rule_id);
return(-EINVAL);
}
}
ret = handle_hw_filtering_rule(pfr, &hw_rule, add_hw_rule);
if(ret != -1) {
hw_filtering_rule_element *rule;
if(unlikely(enable_debug))
printk("[PF_RING] New hw filtering rule [id=%d]\n", hw_rule.rule_id);
/* Add the hw rule to the socket hw rule list */
rule = kmalloc(sizeof(hw_filtering_rule_element), GFP_ATOMIC);
if(rule != NULL) {
INIT_LIST_HEAD(&rule->list);
memcpy(&rule->rule, &hw_rule, sizeof(hw_rule));
list_add(&rule->list, &pfr->hw_filtering_rules); /* Add as first entry */
pfr->num_hw_filtering_rules++;
} else
printk("[PF_RING] Out of memory\n");
/* Increase the number of device hw rules */
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(dev_ptr->dev == pfr->ring_netdev->dev) {
dev_ptr->hw_filters.num_filters++;
break;
}
}
}
found = 1;
break;
case SO_DEL_HW_FILTERING_RULE:
if(optlen != sizeof(u_int16_t))
return(-EINVAL);
if(copy_from_user(&rule_id, optval, sizeof(u_int16_t)))
return(-EFAULT);
/* Check if the rule we want to remove exists */
found = 0;
list_for_each_safe(ptr, tmp_ptr, &pfr->hw_filtering_rules) {
hw_filtering_rule_element *rule = list_entry(ptr, hw_filtering_rule_element, list);
if(rule->rule.rule_id == rule_id) {
/* There's already a rule with the same id: good */
memcpy(&hw_rule, &rule->rule, sizeof(hw_filtering_rule));
list_del(ptr);
kfree(rule);
found = 1;
break;
}
}
if(!found) return(-EINVAL);
ret = handle_hw_filtering_rule(pfr, &hw_rule, remove_hw_rule);
if(ret != -1) {
struct list_head *ptr, *tmp_ptr;
pfr->num_hw_filtering_rules--;
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(dev_ptr->dev == pfr->ring_netdev->dev) {
if(dev_ptr->hw_filters.num_filters > 0)
dev_ptr->hw_filters.num_filters--;
break;
}
}
}
break;
case SO_SET_PACKET_CONSUMER_MODE:
{
u_int diff = optlen-sizeof(pfr->kernel_consumer_plugin_id);
/* Copy the pluginId */
if(copy_from_user(&pfr->kernel_consumer_plugin_id, optval,
sizeof(pfr->kernel_consumer_plugin_id)))
return(-EFAULT);
#if 0
printk("[PF_RING] SO_SET_PACKET_CONSUMER_MODE=%d [diff=%d]\n",
pfr->kernel_consumer_plugin_id, diff);
#endif
if(diff > 0) {
pfr->kernel_consumer_options = kmalloc(diff, GFP_KERNEL);
if(pfr->kernel_consumer_options != NULL) {
if(copy_from_user(pfr->kernel_consumer_options,
&optval[sizeof(pfr->kernel_consumer_plugin_id)], diff))
return(-EFAULT);
} else
return(-EFAULT);
}
/* Notify the consumer that we're ready to start */
if(pfr->kernel_consumer_plugin_id
&& (plugin_registration[pfr->kernel_consumer_plugin_id] == NULL)) {
if(unlikely(enable_debug))
printk("[PF_RING] Plugin %d is unknown\n", pfr->kernel_consumer_plugin_id);
pfr->kernel_consumer_plugin_id = 0;
if(pfr->kernel_consumer_options != NULL) {
kfree(pfr->kernel_consumer_options);
pfr->kernel_consumer_options = NULL;
}
return(-EFAULT);
} else {
if(plugin_registration[pfr->kernel_consumer_plugin_id]->pfring_packet_start
&& (!pfr->ring_active)) {
plugin_registration[pfr->kernel_consumer_plugin_id]->
pfring_packet_start(pfr, copy_raw_data_to_ring);
}
}
}
break;
case SO_SET_VIRTUAL_FILTERING_DEVICE:
{
virtual_filtering_device_info elem;
if(optlen != sizeof(elem))
return(-EINVAL);
if(copy_from_user(&elem, optval, sizeof(elem)))
return(-EFAULT);
if((pfr->v_filtering_dev = add_virtual_filtering_device(sock->sk, &elem)) == NULL)
return(-EFAULT);
}
break;
case SO_REHASH_RSS_PACKET:
if(unlikely(enable_debug))
printk("[PF_RING] * SO_REHASH_RSS_PACKET *\n");
pfr->rehash_rss = default_rehash_rss_func;
found = 1;
break;
#ifdef VPFRING_SUPPORT
case SO_SET_VPFRING_HOST_EVENTFD:
if(optlen != sizeof(eventfd_i))
return(-EINVAL);
if(copy_from_user(&eventfd_i, optval, sizeof(eventfd_i)))
return(-EFAULT);
if(IS_ERR(eventfp = eventfd_fget(eventfd_i.fd)))
return(-EFAULT);
/* We don't need to check the id (we have only one event)
* eventfd_i.id == VPFRING_HOST_EVENT_RX_INT */
pfr->vpfring_host_eventfd_ctx = eventfd_ctx_fileget(eventfp);
break;
case SO_SET_VPFRING_GUEST_EVENTFD:
return(-EINVAL); /* (unused) */
break;
case SO_SET_VPFRING_CLEAN_EVENTFDS:
if(pfr->vpfring_host_eventfd_ctx)
eventfd_ctx_put(pfr->vpfring_host_eventfd_ctx);
pfr->vpfring_host_eventfd_ctx = NULL;
break;
#endif //VPFRING_SUPPORT
case SO_ATTACH_USERSPACE_RING:
{
char u_dev_name[32+1];
if(copy_from_user(u_dev_name, optval, sizeof(u_dev_name) - 1))
return(-EFAULT);
u_dev_name[sizeof(u_dev_name) - 1] = '\0';
if(pfr->ring_memory != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] SO_ATTACH_USERSPACE_RING (1) [%s]\n", u_dev_name);
return(-EINVAL); /* TODO mmap() already called */
}
/* Checks if the userspace ring exists */
pfr->userspace_ring = userspace_ring_create(u_dev_name, userspace_ring_producer, NULL);
if(pfr->userspace_ring == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] SO_ATTACH_USERSPACE_RING (2) [%s]\n", u_dev_name);
return(-EINVAL);
}
pfr->userspace_ring_type = userspace_ring_producer;
if(unlikely(enable_debug))
printk("[PF_RING] SO_ATTACH_USERSPACE_RING done [%s]\n", u_dev_name);
}
found = 1;
break;
case SO_CREATE_DNA_CLUSTER:
{
struct create_dna_cluster_info cdnaci;
if(optlen < sizeof(cdnaci))
return(-EINVAL);
if(copy_from_user(&cdnaci, optval, sizeof(cdnaci)))
return(-EFAULT);
if(cdnaci.slave_mem_len == 0 || cdnaci.num_slaves > DNA_CLUSTER_MAX_NUM_SLAVES)
return(-EINVAL);
if(pfr->dna_device == NULL || pfr->dna_device->hwdev == NULL)
return(-EINVAL);
if (!(cdnaci.options & DNA_CLUSTER_OPT_HUGEPAGES)) {
if(optlen < (sizeof(cdnaci) + sizeof(u_int64_t) * cdnaci.num_slots))
return(-EINVAL);
}
if(pfr->dna_cluster) /* already called */
return(-EINVAL);
pfr->dna_cluster = dna_cluster_create(cdnaci.cluster_id, cdnaci.num_slots, cdnaci.num_slaves,
cdnaci.slave_mem_len, cdnaci.master_persistent_mem_len,
cdnaci.mode, cdnaci.options, cdnaci.hugepages_dir,
pfr->dna_device->hwdev,
pfr->dna_device->mem_info.rx.packet_memory_slot_len,
pfr->dna_device->mem_info.rx.packet_memory_chunk_len,
&cdnaci.recovered);
if(pfr->dna_cluster == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] SO_CREATE_DNA_CLUSTER [%u]\n", cdnaci.cluster_id);
return(-EINVAL);
}
pfr->dna_cluster_type = cluster_master;
/* copying back the structure (actually we need cdnaci.recovered only) */
if(copy_to_user(optval, &cdnaci, sizeof(cdnaci))) {
dna_cluster_remove(pfr->dna_cluster, pfr->dna_cluster_type, 0);
pfr->dna_cluster = NULL;
return(-EFAULT);
}
/* copying dma addresses to userspace at the end of the structure */
if(!(cdnaci.options & DNA_CLUSTER_OPT_HUGEPAGES) && cdnaci.num_slots > 0) {
if(copy_to_user(optval + sizeof(cdnaci), pfr->dna_cluster->extra_dma_memory->dma_addr,
sizeof(u_int64_t) * cdnaci.num_slots)) {
dna_cluster_remove(pfr->dna_cluster, pfr->dna_cluster_type, 0);
pfr->dna_cluster = NULL;
return(-EFAULT);
}
}
if(unlikely(enable_debug))
printk("[PF_RING] SO_CREATE_DNA_CLUSTER done [%u]\n", cdnaci.cluster_id);
}
found = 1;
break;
case SO_ATTACH_DNA_CLUSTER:
{
struct attach_dna_cluster_info adnaci;
if(copy_from_user(&adnaci, optval, sizeof(adnaci)))
return(-EFAULT);
pfr->dna_cluster = dna_cluster_attach(adnaci.cluster_id, &adnaci.slave_id, adnaci.auto_slave_id,
&pfr->ring_slots_waitqueue, &adnaci.mode, &adnaci.options, &adnaci.slave_mem_len, adnaci.hugepages_dir);
if(pfr->dna_cluster == NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] SO_ATTACH_DNA_CLUSTER [%u@%u]\n", adnaci.slave_id, adnaci.cluster_id);
return(-EINVAL);
}
pfr->dna_cluster_slave_id = adnaci.slave_id;
pfr->dna_cluster_type = cluster_slave;
if(copy_to_user(optval, &adnaci, sizeof(adnaci))) { /* copying back values (return adnaci.mode) */
dna_cluster_remove(pfr->dna_cluster, pfr->dna_cluster_type, pfr->dna_cluster_slave_id);
pfr->dna_cluster = NULL;
return(-EFAULT);
}
if(unlikely(enable_debug))
printk("[PF_RING] SO_ATTACH_USERSPACE_RING done [%u]\n", adnaci.cluster_id);
}
found = 1;
break;
case SO_WAKE_UP_DNA_CLUSTER_SLAVE:
{
u_int32_t slave_id;
if(copy_from_user(&slave_id, optval, sizeof(slave_id)))
return(-EFAULT);
if(pfr->dna_cluster && slave_id < pfr->dna_cluster->num_slaves && pfr->dna_cluster->slave_waitqueue[slave_id])
wake_up_interruptible(pfr->dna_cluster->slave_waitqueue[slave_id]);
}
break;
case SO_CREATE_CLUSTER_REFEREE:
{
struct create_cluster_referee_info ccri;
if(optlen < sizeof(ccri))
return(-EINVAL);
if(copy_from_user(&ccri, optval, sizeof(ccri)))
return(-EFAULT);
if (create_cluster_referee(pfr, ccri.cluster_id, &ccri.recovered) < 0)
return(-EINVAL);
/* copying back the structure (actually we need ccri.recovered only) */
if(copy_to_user(optval, &ccri, sizeof(ccri))) {
remove_cluster_referee(pfr);
return(-EFAULT);
}
if(unlikely(enable_debug))
printk("[PF_RING] SO_CREATE_CLUSTER_REFEREE done [%u]\n", ccri.cluster_id);
}
found = 1;
break;
case SO_PUBLISH_CLUSTER_OBJECT:
{
struct public_cluster_object_info pcoi;
if(copy_from_user(&pcoi, optval, sizeof(pcoi)))
return(-EFAULT);
if (publish_cluster_object(pfr, pcoi.cluster_id, pcoi.object_type, pcoi.object_id, &pcoi.references) < 0)
return(-EINVAL);
/* copying back the structure (actually we need pcoi.references only) */
if(copy_to_user(optval, &pcoi, sizeof(pcoi))) {
return(-EFAULT);
}
if(unlikely(enable_debug))
printk("[PF_RING] SO_PUBLISH_CLUSTER_OBJECT done [%u.%u@%u]\n", pcoi.object_type, pcoi.object_id, pcoi.cluster_id);
}
found = 1;
break;
case SO_LOCK_CLUSTER_OBJECT:
{
struct lock_cluster_object_info lcoi;
if(copy_from_user(&lcoi, optval, sizeof(lcoi)))
return(-EFAULT);
if (lock_cluster_object(pfr, lcoi.cluster_id, lcoi.object_type, lcoi.object_id, lcoi.lock_mask) < 0)
return(-EINVAL);
if(unlikely(enable_debug))
printk("[PF_RING] SO_LOCK_CLUSTER_OBJECT done [%u.%u@%u]\n", lcoi.object_type, lcoi.object_id, lcoi.cluster_id);
}
found = 1;
break;
case SO_SHUTDOWN_RING:
found = 1, pfr->ring_active = 0, pfr->ring_shutdown = 1;
wake_up_interruptible(&pfr->ring_slots_waitqueue);
break;
case SO_USE_SHORT_PKT_HEADER:
found = 1, pfr->header_len = short_pkt_header;
break;
case SO_ENABLE_RX_PACKET_BOUNCE:
found = 1, pfr->tx.enable_tx_with_bounce = 1;
break;
case SO_SEND_MSG_TO_PLUGIN:
{
struct send_msg_to_plugin_info smtpi;
u_char *msg_data;
if(optlen < sizeof(smtpi))
return(-EINVAL);
if(copy_from_user(&smtpi, optval, sizeof(smtpi)))
return(-EFAULT);
if(optlen < (sizeof(smtpi) + smtpi.data_len))
return(-EINVAL);
msg_data = kmalloc(smtpi.data_len, GFP_KERNEL);
if(msg_data == NULL)
return -ENOMEM;
if(copy_from_user(msg_data, &optval[sizeof(smtpi)], smtpi.data_len)) {
kfree(msg_data);
return(-EFAULT);
}
if(smtpi.plugin_id < MAX_PLUGIN_ID
&& (plugin_registration[smtpi.plugin_id] != NULL)
&& (plugin_registration[smtpi.plugin_id]->pfring_plugin_handle_msg != NULL)) {
ret = plugin_registration[smtpi.plugin_id]->pfring_plugin_handle_msg(pfr, msg_data, smtpi.data_len);
/* copying back msg (it can be used as return value) */
if(copy_to_user(&optval[sizeof(smtpi)], msg_data, smtpi.data_len))
ret = -EFAULT;
} else {
printk("[PF_RING] Error: no plugin with id=%u or handle_msg method not implemented\n", smtpi.plugin_id);
ret = -EFAULT;
}
kfree(msg_data);
}
found = 1;
break;
case SO_SET_APPL_STATS:
if(optlen > sizeof(statsString) /* Names should not be too long */ )
return(-EINVAL);
if(copy_from_user(&statsString, optval, optlen))
return(-EFAULT);
ret = setSocketStats(pfr, statsString);
found = 1;
break;
case SO_SET_STACK_INJECTION_MODE:
pfr->stack_injection_mode = 1;
found = 1;
break;
default:
found = 0;
break;
}
if(found)
return(ret);
else
return(sock_setsockopt(sock, level, optname, optval, optlen));
}
/* ************************************* */
static int ring_getsockopt(struct socket *sock,
int level, int optname,
char __user *optval,
int __user *optlen)
{
int len;
struct pf_ring_socket *pfr = ring_sk(sock->sk);
if(pfr == NULL)
return(-EINVAL);
if(get_user(len, optlen))
return(-EFAULT);
if(len < 0)
return(-EINVAL);
if(unlikely(enable_debug))
printk("[PF_RING] --> getsockopt(%d)\n", optname);
switch (optname) {
case SO_GET_RING_VERSION:
{
u_int32_t version = RING_VERSION_NUM;
if(len < sizeof(u_int32_t))
return(-EINVAL);
else if(copy_to_user(optval, &version, sizeof(version)))
return(-EFAULT);
}
break;
case PACKET_STATISTICS:
{
struct tpacket_stats st;
if(len < sizeof(struct tpacket_stats))
return(-EINVAL);
st.tp_packets = pfr->slots_info->tot_insert;
st.tp_drops = pfr->slots_info->tot_lost;
if(copy_to_user(optval, &st, len))
return(-EFAULT);
break;
}
case SO_GET_HASH_FILTERING_RULE_STATS:
{
int rc = -EFAULT;
if(len >= sizeof(hash_filtering_rule)) {
hash_filtering_rule rule;
u_int hash_idx;
if(pfr->sw_filtering_hash == NULL) {
printk("[PF_RING] so_get_hash_filtering_rule_stats(): no hash failure\n");
return(-EFAULT);
}
if(copy_from_user(&rule, optval, sizeof(rule))) {
printk("[PF_RING] so_get_hash_filtering_rule_stats: copy_from_user() failure\n");
return(-EFAULT);
}
if(unlikely(enable_debug))
printk("[PF_RING] so_get_hash_filtering_rule_stats"
"(vlan=%u, proto=%u, sip=%u, sport=%u, dip=%u, dport=%u)\n",
rule.vlan_id, rule.proto,
rule.host4_peer_a, rule.port_peer_a,
rule.host4_peer_b,
rule.port_peer_b);
hash_idx = hash_pkt(rule.vlan_id, rule.proto,
rule.host_peer_a, rule.host_peer_b,
rule.port_peer_a, rule.port_peer_b) % perfect_rules_hash_size;
if(pfr->sw_filtering_hash[hash_idx] != NULL) {
sw_filtering_hash_bucket *bucket;
read_lock_bh(&pfr->ring_rules_lock);
bucket = pfr->sw_filtering_hash[hash_idx];
if(unlikely(enable_debug))
printk("[PF_RING] so_get_hash_filtering_rule_stats(): bucket=%p\n",
bucket);
while(bucket != NULL) {
if(hash_bucket_match_rule(bucket, &rule)) {
u_char *buffer = kmalloc(len, GFP_ATOMIC);
if(buffer == NULL) {
printk("[PF_RING] so_get_hash_filtering_rule_stats() no memory failure\n");
rc = -EFAULT;
} else {
if((plugin_registration[rule.plugin_action.plugin_id] == NULL)
||
(plugin_registration[rule.plugin_action.plugin_id]->pfring_plugin_get_stats == NULL)) {
printk("[PF_RING] Found rule but pluginId %d is not registered\n",
rule.plugin_action.plugin_id);
rc = -EFAULT;
} else
rc = plugin_registration[rule.plugin_action.plugin_id]->
pfring_plugin_get_stats(pfr, NULL, bucket, buffer, len);
if(rc > 0) {
if(copy_to_user(optval, buffer, rc)) {
printk("[PF_RING] copy_to_user() failure\n");
rc = -EFAULT;
}
}
}
break;
} else
bucket = bucket->next;
} /* while */
read_unlock_bh(&pfr->ring_rules_lock);
} else {
if(unlikely(enable_debug))
printk("[PF_RING] so_get_hash_filtering_rule_stats(): entry not found [hash_idx=%d]\n",
hash_idx);
}
}
return(rc);
break;
}
case SO_GET_FILTERING_RULE_STATS:
{
char *buffer = NULL;
int rc = -EFAULT;
struct list_head *ptr, *tmp_ptr;
u_int16_t rule_id;
if(len < sizeof(rule_id))
return(-EINVAL);
if(copy_from_user(&rule_id, optval, sizeof(rule_id)))
return(-EFAULT);
if(unlikely(enable_debug))
printk("[PF_RING] SO_GET_FILTERING_RULE_STATS: rule_id=%d\n",
rule_id);
read_lock_bh(&pfr->ring_rules_lock);
list_for_each_safe(ptr, tmp_ptr, &pfr->sw_filtering_rules) {
sw_filtering_rule_element *rule;
rule = list_entry(ptr, sw_filtering_rule_element, list);
if(rule->rule.rule_id == rule_id) {
buffer = kmalloc(len, GFP_ATOMIC);
if(buffer == NULL)
rc = -EFAULT;
else {
if((plugin_registration[rule->rule.plugin_action.plugin_id] == NULL)
||
(plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_get_stats == NULL)) {
printk("[PF_RING] Found rule %d but pluginId %d is not registered\n",
rule_id, rule->rule.plugin_action.plugin_id);
rc = -EFAULT;
} else
rc = plugin_registration[rule->rule.plugin_action.plugin_id]->
pfring_plugin_get_stats(pfr, rule, NULL, buffer, len);
if(rc > 0) {
if(copy_to_user(optval, buffer, rc)) {
rc = -EFAULT;
}
}
}
break;
}
}
read_unlock_bh(&pfr->ring_rules_lock);
if(buffer != NULL)
kfree(buffer);
/* printk("[PF_RING] SO_GET_FILTERING_RULE_STATS *END*\n"); */
return(rc);
break;
}
case SO_GET_MAPPED_DNA_DEVICE:
{
if((pfr->dna_device == NULL) || (len < sizeof(dna_memory_slots)))
return(-EFAULT);
if(copy_to_user(optval, &pfr->dna_device->mem_info, sizeof(dna_memory_slots)))
return(-EFAULT);
break;
}
case SO_GET_EXTRA_DMA_MEMORY:
{
u_int64_t num_slots, slot_len, chunk_len;
if(pfr->dna_device == NULL || pfr->dna_device->hwdev == NULL)
return(-EINVAL);
if(len < (3 * sizeof(u_int64_t)))
return(-EINVAL);
if(copy_from_user(&num_slots, optval, sizeof(num_slots)))
return(-EFAULT);
if(copy_from_user(&slot_len, optval+sizeof(num_slots), sizeof(slot_len)))
return(-EFAULT);
if(copy_from_user(&chunk_len, optval+sizeof(num_slots)+sizeof(slot_len), sizeof(chunk_len)))
return(-EFAULT);
//if(num_slots > MAX_EXTRA_DMA_SLOTS)
// num_slots = MAX_EXTRA_DMA_SLOTS;
if(len < (sizeof(u_int64_t) * num_slots))
return(-EINVAL);
if(pfr->extra_dma_memory) /* already called */
return(-EINVAL);
if((pfr->extra_dma_memory = allocate_extra_dma_memory(pfr->dna_device->hwdev,
num_slots, slot_len, chunk_len)) == NULL)
return(-EFAULT);
if(copy_to_user(optval, pfr->extra_dma_memory->dma_addr, (sizeof(u_int64_t) * num_slots))) {
free_extra_dma_memory(pfr->extra_dma_memory);
pfr->extra_dma_memory = NULL;
return(-EFAULT);
}
break;
}
case SO_GET_NUM_RX_CHANNELS:
{
u_int8_t num_rx_channels;
if(pfr->ring_netdev == &none_device_element) {
/* Device not yet bound */
num_rx_channels = UNKNOWN_NUM_RX_CHANNELS;
} else {
if(pfr->ring_netdev->is_dna_device)
num_rx_channels = pfr->ring_netdev->num_dna_rx_queues;
else
num_rx_channels = max_val(pfr->num_rx_channels, get_num_rx_queues(pfr->ring_netdev->dev));
}
if(unlikely(enable_debug))
printk("[PF_RING] --> SO_GET_NUM_RX_CHANNELS[%s]=%d [dna=%d/dns_rx_channels=%d][%p]\n",
pfr->ring_netdev->dev->name, num_rx_channels,
pfr->ring_netdev->is_dna_device,
pfr->ring_netdev->num_dna_rx_queues,
pfr->ring_netdev);
if(copy_to_user(optval, &num_rx_channels, sizeof(num_rx_channels)))
return(-EFAULT);
}
break;
case SO_GET_RING_ID:
if(len < sizeof(pfr->ring_id))
return(-EINVAL);
if(unlikely(enable_debug))
printk("[PF_RING] --> SO_GET_RING_ID=%d\n", pfr->ring_id);
if(copy_to_user(optval, &pfr->ring_id, sizeof(pfr->ring_id)))
return(-EFAULT);
break;
case SO_GET_PACKET_CONSUMER_MODE:
if(len < sizeof(pfr->kernel_consumer_plugin_id))
return(-EINVAL);
if(unlikely(enable_debug))
printk("[PF_RING] --> SO_GET_PACKET_CONSUMER_MODE=%d\n",
pfr->kernel_consumer_plugin_id);
if(copy_to_user(optval, &pfr->kernel_consumer_plugin_id,
sizeof(pfr->kernel_consumer_plugin_id)))
return(-EFAULT);
break;
case SO_GET_BOUND_DEVICE_ADDRESS:
if(len < ETH_ALEN) return(-EINVAL);
if(pfr->dna_device != NULL) {
if(copy_to_user(optval, pfr->dna_device->device_address, 6))
return(-EFAULT);
} else if((pfr->ring_netdev != NULL)
&& (pfr->ring_netdev->dev != NULL)) {
char lowest_if_mac[ETH_ALEN] = { 0 };
char magic_if_mac[ETH_ALEN];
memset(magic_if_mac, RING_MAGIC_VALUE, sizeof(magic_if_mac));
/* Read input buffer */
if(copy_from_user(&lowest_if_mac, optval, ETH_ALEN))
return(-EFAULT);
if(!memcmp(lowest_if_mac, magic_if_mac, ETH_ALEN)) {
struct list_head *ptr, *tmp_ptr;
long lowest_id = -1;
/* Return the MAC address of the lowest X of ethX */
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *entry = list_entry(ptr, ring_device_element, device_list);
char *eptr;
long id = simple_strtol(&entry->dev->name[3], &eptr, 10);
if((lowest_id == -1) || (id < lowest_id)) {
lowest_id = id, memcpy(lowest_if_mac, entry->dev->perm_addr, ETH_ALEN);
}
}
if(copy_to_user(optval, lowest_if_mac, ETH_ALEN))
return(-EFAULT);
} else {
if(copy_to_user(optval, pfr->ring_netdev->dev->dev_addr, ETH_ALEN))
return(-EFAULT);
}
} else
return(-EFAULT);
break;
case SO_GET_BOUND_DEVICE_IFINDEX:
if((len < sizeof(int))
|| (pfr->ring_netdev == NULL))
return(-EINVAL);
if(copy_to_user(optval, &pfr->ring_netdev->dev->ifindex, sizeof(int)))
return(-EFAULT);
break;
case SO_GET_NUM_QUEUED_PKTS:
{
u_int32_t num_queued = num_queued_pkts(pfr);
if(len < sizeof(num_queued))
return(-EINVAL);
if(copy_to_user(optval, &num_queued, sizeof(num_queued)))
return(-EFAULT);
}
break;
case SO_GET_PKT_HEADER_LEN:
if(len < sizeof(pfr->slot_header_len))
return(-EINVAL);
if(copy_to_user(optval, &pfr->slot_header_len, sizeof(pfr->slot_header_len)))
return(-EFAULT);
break;
case SO_GET_BUCKET_LEN:
if(len < sizeof(pfr->bucket_len))
return(-EINVAL);
if(copy_to_user(optval, &pfr->bucket_len, sizeof(pfr->bucket_len)))
return(-EFAULT);
break;
case SO_GET_LOOPBACK_TEST:
/* Used for testing purposes only */
{
/* printk("SO_GET_LOOPBACK_TEST (len=%d)\n", len); */
if(len > 0) {
if(len > loobpack_test_buffer_len) return(-EFAULT);
if(loobpack_test_buffer == NULL) {
loobpack_test_buffer = kmalloc(loobpack_test_buffer_len, GFP_ATOMIC);
if(loobpack_test_buffer == NULL)
return(-EFAULT); /* Not enough memory */
}
{
u_int i;
for(i=0; i<len; i++) loobpack_test_buffer[i] = i;
}
if(copy_to_user(optval, loobpack_test_buffer, len))
return(-EFAULT);
}
}
break;
case SO_GET_DEVICE_TYPE:
if(len < sizeof(pfring_device_type))
return(-EINVAL);
if(pfr->ring_netdev == NULL)
return(-EFAULT);
if(copy_to_user(optval, &pfr->ring_netdev->device_type, sizeof(pfring_device_type)))
return(-EFAULT);
break;
case SO_GET_DEVICE_IFINDEX:
{
struct list_head *ptr, *tmp_ptr;
char dev_name[32];
int ifindex_found = 0;
if(len < sizeof(int) || len > sizeof(dev_name))
return(-EINVAL);
if(copy_from_user(&dev_name, optval, len))
return(-EFAULT);
dev_name[sizeof(dev_name)-1] = 0;
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(strcmp(dev_ptr->dev->name, dev_name) == 0) {
ifindex_found = 1;
if(copy_to_user(optval, &dev_ptr->dev->ifindex, sizeof(int)))
return(-EFAULT);
break;
}
}
if(!ifindex_found)
return(-EINVAL);
}
break;
case SO_GET_APPL_STATS_FILE_NAME:
{
char path[255];
u_int slen;
snprintf(path, sizeof(path)-1,
"/proc/net/pf_ring/stats/%s", pfr->sock_proc_stats_name);
slen = strlen(path);
if(len < (slen+1))
return(-EINVAL);
if(copy_to_user(optval, path, slen))
return(-EFAULT);
}
break;
default:
return -ENOPROTOOPT;
}
if(put_user(len, optlen))
return(-EFAULT);
else
return(0);
}
/* ************************************* */
void dna_device_handler(dna_device_operation operation,
dna_version version,
mem_ring_info *rx_info,
mem_ring_info *tx_info,
unsigned long rx_packet_memory[DNA_MAX_NUM_CHUNKS],
void *rx_descr_packet_memory,
unsigned long tx_packet_memory[DNA_MAX_NUM_CHUNKS],
void *tx_descr_packet_memory,
void *phys_card_memory,
u_int phys_card_memory_len,
u_int channel_id,
struct net_device *netdev,
struct device *hwdev,
dna_device_model device_model,
u_char *device_address,
wait_queue_head_t *packet_waitqueue,
u_int8_t *interrupt_received,
void *rx_adapter_ptr, void *tx_adapter_ptr,
dna_wait_packet wait_packet_function_ptr,
dna_device_notify dev_notify_function_ptr)
{
if(unlikely(enable_debug)) {
printk("[PF_RING] dna_device_handler(%s@%u [operation=%s])\n",
netdev->name, channel_id,
operation == add_device_mapping ? "add_device_mapping" : "remove_device_mapping");
printk("[PF_RING] RX=%u/TX=%u\n", rx_info->packet_memory_num_chunks, tx_info->packet_memory_num_chunks);
}
if(operation == add_device_mapping) {
dna_device_list *next;
next = kmalloc(sizeof(dna_device_list), GFP_ATOMIC);
if(next != NULL) {
memset(next, 0, sizeof(dna_device_list));
next->num_bound_sockets = 0, next->dev.mem_info.version = version;
//printk("[PF_RING] [rx_slots=%u/num_rx_pages=%u/memory_tot_len=%u]][tx_slots=%u/num_tx_pages=%u]\n",
// packet_memory_num_slots, num_rx_pages, packet_memory_tot_len,
// num_tx_slots, num_tx_pages);
/* RX */
if(rx_info != NULL)
memcpy(&next->dev.mem_info.rx, rx_info, sizeof(next->dev.mem_info.rx));
if(rx_packet_memory != NULL)
memcpy(&next->dev.rx_packet_memory, rx_packet_memory, sizeof(next->dev.rx_packet_memory));
next->dev.rx_descr_packet_memory = rx_descr_packet_memory;
/* TX */
if(tx_info != NULL)
memcpy(&next->dev.mem_info.tx, tx_info, sizeof(next->dev.mem_info.tx));
if(tx_packet_memory != NULL)
memcpy(&next->dev.tx_packet_memory, tx_packet_memory, sizeof(next->dev.tx_packet_memory));
next->dev.tx_descr_packet_memory = tx_descr_packet_memory;
/* PHYS */
next->dev.phys_card_memory = phys_card_memory;
next->dev.mem_info.phys_card_memory_len = phys_card_memory_len;
next->dev.channel_id = channel_id;
next->dev.netdev = netdev;
next->dev.hwdev = hwdev;
next->dev.mem_info.device_model = device_model;
memcpy(next->dev.device_address, device_address, 6);
next->dev.packet_waitqueue = packet_waitqueue;
next->dev.interrupt_received = interrupt_received;
next->dev.rx_adapter_ptr = rx_adapter_ptr;
next->dev.tx_adapter_ptr = tx_adapter_ptr;
next->dev.wait_packet_function_ptr = wait_packet_function_ptr;
next->dev.usage_notification = dev_notify_function_ptr;
list_add(&next->list, &ring_dna_devices_list);
dna_devices_list_size++;
/* Increment usage count to avoid unloading it while DNA modules are in use */
try_module_get(THIS_MODULE);
/* We now have to update the device list */
{
struct list_head *ptr, *tmp_ptr;
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(strcmp(dev_ptr->dev->name, netdev->name) == 0) {
dev_ptr->num_dna_rx_queues = max_val(dev_ptr->num_dna_rx_queues, channel_id+1);
dev_ptr->is_dna_device = 1, dev_ptr->dna_device_model = device_model;
if(unlikely(enable_debug))
printk("[PF_RING] ==>> Updating DNA %s [num_dna_rx_queues=%d][%p]\n",
dev_ptr->dev->name, dev_ptr->num_dna_rx_queues, dev_ptr);
break;
}
}
}
} else {
printk("[PF_RING] Could not kmalloc slot!!\n");
}
} else {
struct list_head *ptr, *tmp_ptr;
dna_device_list *entry;
list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
entry = list_entry(ptr, dna_device_list, list);
if((entry->dev.netdev == netdev)
&& (entry->dev.channel_id == channel_id)) {
list_del(ptr);
kfree(entry);
dna_devices_list_size--;
/* Decrement usage count for DNA devices */
module_put(THIS_MODULE);
break;
}
}
}
if(unlikely(enable_debug))
printk("[PF_RING] dna_device_handler(%s): [dna_devices_list_size=%d]\n",
netdev->name, dna_devices_list_size);
}
/* ************************************* */
#ifdef REDBORDER_PATCH
static void bpctl_notifier(char *if_name) {
struct bpctl_cmd bpctl_cmd;
int i = 0, rc;
while (i < MAX_NUM_DEVICES && bypass_interfaces[i] != NULL) {
if(strcmp(if_name, bypass_interfaces[i]) == 0) {
struct list_head *ptr, *tmp_ptr;
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(strcmp(dev_ptr->dev->name, bypass_interfaces[i & ~0x1]) == 0) { /* master found */
memset(&bpctl_cmd, 0, sizeof(bpctl_cmd));
bpctl_cmd.in_param[1] = dev_ptr->dev->ifindex;
bpctl_cmd.in_param[2] = 1; /* on */
if((rc = bpctl_kernel_ioctl(BPCTL_IOCTL_TX_MSG(SET_BYPASS), &bpctl_cmd)) < 0) {
printk("[PF_RING][%s] %s interface is not a bypass device.\n",
__FUNCTION__, dev_ptr->dev->name);
return;
}
if((rc == 0) && (bpctl_cmd.status == 0))
printk("[PF_RING][%s] bypass enabled on %s.\n", __FUNCTION__, if_name);
else
printk("[PF_RING][%s] %s is a slave interface or doesn't support bypass.\n",
__FUNCTION__, dev_ptr->dev->name);
}
}
break;
}
i++;
}
}
#endif
/* ************************************* */
static int ring_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
#ifdef CONFIG_INET
case SIOCGIFFLAGS:
case SIOCSIFFLAGS:
case SIOCGIFCONF:
case SIOCGIFMETRIC:
case SIOCSIFMETRIC:
case SIOCGIFMEM:
case SIOCSIFMEM:
case SIOCGIFMTU:
case SIOCSIFMTU:
case SIOCSIFLINK:
case SIOCGIFHWADDR:
case SIOCSIFHWADDR:
case SIOCSIFMAP:
case SIOCGIFMAP:
case SIOCSIFSLAVE:
case SIOCGIFSLAVE:
case SIOCGIFINDEX:
case SIOCGIFNAME:
case SIOCGIFCOUNT:
case SIOCSIFHWBROADCAST:
return(inet_dgram_ops.ioctl(sock, cmd, arg));
#endif
default:
return -ENOIOCTLCMD;
}
return 0;
}
/* ************************************* */
static struct proto_ops ring_ops = {
.family = PF_RING,
.owner = THIS_MODULE,
/* Operations that make no sense on ring sockets. */
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.sendpage = sock_no_sendpage,
/* Now the operations that really occur. */
.release = ring_release,
.bind = ring_bind,
.mmap = ring_mmap,
.poll = ring_poll,
.setsockopt = ring_setsockopt,
.getsockopt = ring_getsockopt,
.ioctl = ring_ioctl,
.recvmsg = ring_recvmsg,
.sendmsg = ring_sendmsg,
};
/* ************************************ */
static struct net_proto_family ring_family_ops = {
.family = PF_RING,
.create = ring_create,
.owner = THIS_MODULE,
};
// BD: API changed in 2.6.12, ref:
// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
static struct proto ring_proto = {
.name = "PF_RING",
.owner = THIS_MODULE,
.obj_size = sizeof(struct ring_sock),
};
#endif
/* ************************************ */
static struct pfring_hooks ring_hooks = {
.magic = PF_RING,
.transparent_mode = &transparent_mode,
.ring_handler = skb_ring_handler,
.buffer_ring_handler = buffer_ring_handler,
.buffer_add_hdr_to_ring = add_hdr_to_ring,
.pfring_registration = register_plugin,
.pfring_unregistration = unregister_plugin,
.ring_dna_device_handler = dna_device_handler,
};
/* ************************************ */
void remove_device_from_ring_list(struct net_device *dev) {
struct list_head *ptr, *tmp_ptr;
u_int32_t last_list_idx;
struct sock *sk;
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(dev_ptr->dev == dev) {
if(dev_ptr->proc_entry) {
#ifdef ENABLE_PROC_WRITE_RULE
if(dev_ptr->device_type != standard_nic_family)
remove_proc_entry(PROC_RULES, dev_ptr->proc_entry);
#endif
remove_proc_entry(PROC_INFO, dev_ptr->proc_entry);
remove_proc_entry(dev_ptr->dev->name, ring_proc_dev_dir);
}
/* We now have to "un-bind" existing sockets */
sk = (struct sock*)lockless_list_get_first(&ring_table, &last_list_idx);
while(sk != NULL) {
struct pf_ring_socket *pfr;
pfr = ring_sk(sk);
if(pfr->ring_netdev == dev_ptr)
pfr->ring_netdev = &none_device_element; /* Unbinding socket */
sk = (struct sock*)lockless_list_get_next(&ring_table, &last_list_idx);
}
list_del(ptr);
kfree(dev_ptr);
break;
}
}
}
/* ************************************ */
int add_device_to_ring_list(struct net_device *dev) {
ring_device_element *dev_ptr;
if((dev_ptr = kmalloc(sizeof(ring_device_element), GFP_KERNEL)) == NULL)
return(-ENOMEM);
memset(dev_ptr, 0, sizeof(ring_device_element));
INIT_LIST_HEAD(&dev_ptr->device_list);
dev_ptr->dev = dev;
dev_ptr->proc_entry = proc_mkdir(dev_ptr->dev->name, ring_proc_dev_dir);
dev_ptr->device_type = standard_nic_family; /* Default */
create_proc_read_entry(PROC_INFO, 0 /* read-only */,
dev_ptr->proc_entry,
ring_proc_dev_get_info /* read */,
dev_ptr);
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31))
/* Dirty trick to fix at some point used to discover Intel 82599 interfaces: FIXME */
if((dev_ptr->dev->ethtool_ops != NULL) && (dev_ptr->dev->ethtool_ops->set_rxnfc != NULL)) {
struct ethtool_rxnfc cmd;
int rc;
cmd.cmd = ETHTOOL_PFRING_SRXFTCHECK /* check */;
rc = dev_ptr->dev->ethtool_ops->set_rxnfc(dev_ptr->dev, &cmd);
if(unlikely(enable_debug))
printk("[PF_RING] set_rxnfc returned %d\n", rc);
if(rc == RING_MAGIC_VALUE) {
/* This device supports hardware filtering */
dev_ptr->device_type = intel_82599_family;
/* Setup handlers */
dev_ptr->hw_filters.filter_handlers.five_tuple_handler = i82599_generic_handler;
dev_ptr->hw_filters.filter_handlers.perfect_filter_handler = i82599_generic_handler;
#ifdef ENABLE_PROC_WRITE_RULE
entry = create_proc_read_entry(PROC_RULES, 0666 /* rw */,
dev_ptr->proc_entry,
ring_proc_dev_rule_read, dev_ptr);
if(entry) {
entry->write_proc = ring_proc_dev_rule_write;
if(unlikely(enable_debug)) printk("[PF_RING] Device %s (Intel 82599) DOES support hardware packet filtering\n", dev->name);
} else {
if(unlikely(enable_debug)) printk("[PF_RING] Error while creating /proc entry 'rules' for device %s\n", dev->name);
}
#endif
} else {
if(unlikely(enable_debug)) printk("[PF_RING] Device %s does NOT support hardware packet filtering [1]\n", dev->name);
}
} else {
if(unlikely(enable_debug)) printk("[PF_RING] Device %s does NOT support hardware packet filtering [2]\n", dev->name);
}
#endif
list_add(&dev_ptr->device_list, &ring_aware_device_list);
return(0);
}
/* ************************************ */
void pf_ring_add_module_dependency(void) {
/* Don't actually do anything */
}
EXPORT_SYMBOL(pf_ring_add_module_dependency);
/* ************************************ */
int pf_ring_inject_packet_to_ring(int if_index, int channel_id, char *data, int data_len, struct pfring_pkthdr *hdr) {
struct sock* sk = NULL;
u_int32_t last_list_idx;
struct pf_ring_socket *pfr;
u_int32_t the_bit = 1 << channel_id;
int rc = -2; /* -2 == socket not found */
if(quick_mode) {
if(if_index < MAX_NUM_IFIDX && channel_id < MAX_NUM_RX_CHANNELS && device_rings[if_index][channel_id] != NULL)
/* 0 == success, -1 == no room available */
rc = add_raw_packet_to_ring(device_rings[if_index][channel_id], hdr, data, data_len, 0);
} else {
sk = (struct sock*)lockless_list_get_first(&ring_table, &last_list_idx);
while (NULL != sk) {
pfr = ring_sk( sk);
if(pfr != NULL
&& (test_bit(if_index, pfr->netdev_mask) /* || pfr->ring_netdev == &any_device_element */ )
&& ((pfr->channel_id_mask & the_bit) || channel_id == RING_ANY_CHANNEL)) {
/* 0 == success, -1 == no room available */
rc = add_raw_packet_to_ring(pfr, hdr, data, data_len, 0);
}
sk = (struct sock*)lockless_list_get_next(&ring_table, &last_list_idx);
}
}
if(unlikely(enable_debug) && rc == -2)
printk("[PF_RING] %s() Error: no ring found for if_index=%d, channel_id=%d\n",
__FUNCTION__, if_index, channel_id);
return rc;
}
EXPORT_SYMBOL(pf_ring_inject_packet_to_ring);
/* ************************************ */
static int ring_notifier(struct notifier_block *this, unsigned long msg, void *data)
{
struct net_device *dev = data;
struct pfring_hooks *hook;
if(dev != NULL) {
if(unlikely(enable_debug))
printk("[PF_RING] packet_notifier(%lu) [%s][%d]\n", msg, dev->name, dev->type);
/* Skip non ethernet interfaces */
if(
(dev->type != ARPHRD_ETHER) /* Ethernet */
/* Wifi */
&& (dev->type != ARPHRD_IEEE80211)
&& (dev->type != ARPHRD_IEEE80211_PRISM)
&& (dev->type != ARPHRD_IEEE80211_RADIOTAP)
&& strncmp(dev->name, "bond", 4)) {
if(unlikely(enable_debug)) printk("[PF_RING] packet_notifier(%s): skipping non ethernet device\n", dev->name);
return NOTIFY_DONE;
}
if(dev->ifindex >= MAX_NUM_IFIDX) {
if(unlikely(enable_debug))
printk("[PF_RING] packet_notifier(%s): interface index %d > max index %d\n",
dev->name, dev->ifindex, MAX_NUM_IFIDX);
return NOTIFY_DONE;
}
switch(msg) {
case NETDEV_PRE_UP:
case NETDEV_UP:
case NETDEV_DOWN:
break;
case NETDEV_REGISTER:
if(unlikely(enable_debug))
printk("[PF_RING] packet_notifier(%s) [REGISTER][pfring_ptr=%p][hook=%p]\n",
dev->name, dev->pfring_ptr, &ring_hooks);
if(dev->pfring_ptr == NULL) {
dev->pfring_ptr = &ring_hooks;
if(add_device_to_ring_list(dev) != 0) {
printk("[PF_RING] Error in add_device_to_ring_list(%s)\n", dev->name);
}
}
break;
case NETDEV_UNREGISTER:
if(unlikely(enable_debug))
printk("[PF_RING] packet_notifier(%s) [UNREGISTER][pfring_ptr=%p]\n",
dev->name, dev->pfring_ptr);
hook = (struct pfring_hooks*)dev->pfring_ptr;
if(hook && (hook->magic == PF_RING)) {
remove_device_from_ring_list(dev);
dev->pfring_ptr = NULL;
}
/* We don't have to worry updating rules that might have used this
device (just removed) as reflection device. This because whenever
we set a rule with reflection, we do dev_put() so such device is
busy until we remove the rule
*/
break;
case NETDEV_CHANGE: /* Interface state change */
#ifdef REDBORDER_PATCH
if(test_bit(__LINK_STATE_NOCARRIER, &dev->state))
bpctl_notifier(dev->name);
#endif
case NETDEV_CHANGEADDR: /* Interface address changed (e.g. during device probing) */
break;
case NETDEV_CHANGENAME: /* Rename interface ethX -> ethY */
{
struct list_head *ptr, *tmp_ptr;
if(unlikely(enable_debug)) printk("[PF_RING] Device change name %s\n", dev->name);
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr = list_entry(ptr, ring_device_element, device_list);
if(dev_ptr->dev == dev) {
if(unlikely(enable_debug))
printk("[PF_RING] ==>> FOUND device change name %s -> %s\n",
dev_ptr->proc_entry->name, dev->name);
/* Remove old entry */
#ifdef ENABLE_PROC_WRITE_RULE
if(dev_ptr->device_type != standard_nic_family)
remove_proc_entry(PROC_RULES, dev_ptr->proc_entry);
#endif
remove_proc_entry(PROC_INFO, dev_ptr->proc_entry);
remove_proc_entry(dev_ptr->proc_entry->name, ring_proc_dev_dir);
/* Add new entry */
dev_ptr->proc_entry = proc_mkdir(dev_ptr->dev->name, ring_proc_dev_dir);
create_proc_read_entry(PROC_INFO, 0 /* read-only */,
dev_ptr->proc_entry,
ring_proc_dev_get_info /* read */,
dev_ptr);
#ifdef ENABLE_PROC_WRITE_RULE
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31))
if(dev_ptr->device_type != standard_nic_family) {
struct proc_dir_entry *entry;
entry= create_proc_read_entry(PROC_RULES, 0666 /* rw */,
dev_ptr->proc_entry,
ring_proc_dev_rule_read,
dev_ptr);
if(entry)
entry->write_proc = ring_proc_dev_rule_write;
}
#endif
#endif
#if(LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0))
strncpy(dev_ptr->proc_entry->name, dev->name, dev_ptr->proc_entry->namelen);
dev_ptr->proc_entry->name[dev_ptr->proc_entry->namelen /* size is namelen+1 */] = '\0';
#else
dev_ptr->proc_entry->name = dev->name;
#endif
break;
}
}
}
break;
default:
if(unlikely(enable_debug))
printk("[PF_RING] packet_notifier(%s): unhandled message [msg=%lu][pfring_ptr=%p]\n",
dev->name, msg, dev->pfring_ptr);
break;
}
}
return NOTIFY_DONE;
}
/* ************************************ */
static struct notifier_block ring_netdev_notifier = {
.notifier_call = ring_notifier,
};
/* ************************************ */
static void __exit ring_exit(void)
{
struct list_head *ptr, *tmp_ptr;
struct pfring_hooks *hook;
pfring_enabled = 0;
unregister_device_handler();
list_del(&any_device_element.device_list);
list_for_each_safe(ptr, tmp_ptr, &ring_aware_device_list) {
ring_device_element *dev_ptr;
dev_ptr = list_entry(ptr, ring_device_element, device_list);
hook = (struct pfring_hooks*)dev_ptr->dev->pfring_ptr;
#ifdef ENABLE_PROC_WRITE_RULE
/* Remove /proc entry for the selected device */
if(dev_ptr->device_type != standard_nic_family)
remove_proc_entry(PROC_RULES, dev_ptr->proc_entry);
#endif
remove_proc_entry(PROC_INFO, dev_ptr->proc_entry);
remove_proc_entry(dev_ptr->dev->name, ring_proc_dev_dir);
if(hook->magic == PF_RING) {
if(unlikely(enable_debug)) printk("[PF_RING] Unregister hook for %s\n", dev_ptr->dev->name);
dev_ptr->dev->pfring_ptr = NULL; /* Unhook PF_RING */
}
list_del(ptr);
kfree(dev_ptr);
}
if(num_cluster_fragments > 0) {
int i;
for(i=0; i<NUM_FRAGMENTS_HASH_SLOTS; i++) {
if(cluster_fragment_hash[i] != NULL) {
struct hash_fragment_node *next, *head;
head = cluster_fragment_hash[i];
while(head != NULL) {
next = head->next;
kfree(head);
head = next;
}
}
}
}
term_lockless_list(&ring_table, 1 /* free memory */);
term_lockless_list(&ring_cluster_list, 1 /* free memory */);
term_lockless_list(&delayed_memory_table, 1 /* free memory */);
list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
dna_device_list *elem;
elem = list_entry(ptr, dna_device_list, list);
list_del(ptr);
kfree(elem);
}
sock_unregister(PF_RING);
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
proto_unregister(&ring_proto);
#endif
unregister_netdevice_notifier(&ring_netdev_notifier);
ring_proc_term();
if(loobpack_test_buffer != NULL)
kfree(loobpack_test_buffer);
printk("[PF_RING] Module unloaded\n");
}
/* ************************************ */
static int __init ring_init(void)
{
static struct net_device any_dev, none_dev;
int i;
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
int rc;
#endif
printk("[PF_RING] Welcome to PF_RING %s ($Revision: %s$)\n"
"(C) 2004-13 ntop.org\n",
RING_VERSION, SVN_REV);
#if(LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
if((rc = proto_register(&ring_proto, 0)) != 0)
return(rc);
#endif
init_lockless_list(&ring_table);
init_lockless_list(&ring_cluster_list);
init_lockless_list(&delayed_memory_table);
INIT_LIST_HEAD(&virtual_filtering_devices_list);
INIT_LIST_HEAD(&ring_aware_device_list);
INIT_LIST_HEAD(&ring_dna_devices_list);
INIT_LIST_HEAD(&userspace_ring_list);
INIT_LIST_HEAD(&dna_cluster_list);
INIT_LIST_HEAD(&cluster_referee_list);
for(i = 0; i < MAX_NUM_DEVICES; i++)
INIT_LIST_HEAD(&device_ring_list[i]);
init_ring_readers();
memset(&any_dev, 0, sizeof(any_dev));
strcpy(any_dev.name, "any");
any_dev.ifindex = MAX_NUM_IFIDX-1, any_dev.type = ARPHRD_ETHER;
memset(&any_device_element, 0, sizeof(any_device_element));
any_device_element.dev = &any_dev, any_device_element.device_type = standard_nic_family;
INIT_LIST_HEAD(&any_device_element.device_list);
list_add(&any_device_element.device_list, &ring_aware_device_list);
memset(&none_dev, 0, sizeof(none_dev));
strcpy(none_dev.name, "none");
none_dev.ifindex = MAX_NUM_IFIDX-2, none_dev.type = ARPHRD_ETHER;
memset(&none_device_element, 0, sizeof(none_device_element));
none_device_element.dev = &none_dev, none_device_element.device_type = standard_nic_family;
ring_proc_init();
sock_register(&ring_family_ops);
register_netdevice_notifier(&ring_netdev_notifier);
/* Sanity check */
if(transparent_mode > driver2pf_ring_non_transparent)
transparent_mode = standard_linux_path;
printk("[PF_RING] Min # ring slots %d\n", min_num_slots);
printk("[PF_RING] Slot version %d\n",
RING_FLOWSLOT_VERSION);
printk("[PF_RING] Capture TX %s\n",
enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
printk("[PF_RING] Transparent Mode %d\n",
transparent_mode);
printk("[PF_RING] IP Defragment %s\n",
enable_ip_defrag ? "Yes" : "No");
printk("[PF_RING] Initialized correctly\n");
register_device_handler();
pfring_enabled = 1;
return 0;
}
module_init(ring_init);
module_exit(ring_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Luca Deri <deri@ntop.org>");
MODULE_DESCRIPTION("Packet capture acceleration and analysis");
MODULE_ALIAS_NETPROTO(PF_RING);