net: Add Open vSwitch kernel components.

Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments.  In addition to supporting a variety of features
expected in a traditional hardware switch, it enables fine-grained
programmatic extension and flow-based control of the network.
This control is useful in a wide variety of applications but is
particularly important in multi-server virtualization deployments,
which are often characterized by highly dynamic endpoints and the need
to maintain logical abstractions for multiple tenants.

The Open vSwitch datapath provides an in-kernel fast path for packet
forwarding.  It is complemented by a userspace daemon, ovs-vswitchd,
which is able to accept configuration from a variety of sources and
translate it into packet processing rules.

See http://openvswitch.org for more information and userspace
utilities.

Signed-off-by: Jesse Gross <jesse@nicira.com>
This commit is contained in:
Jesse Gross 2011-10-25 19:26:31 -07:00
parent 75f2811c64
commit ccb1352e76
20 changed files with 5874 additions and 0 deletions

28
net/openvswitch/Kconfig Normal file
View file

@ -0,0 +1,28 @@
#
# Open vSwitch
#
config OPENVSWITCH
tristate "Open vSwitch"
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
expected in a traditional hardware switch, it enables fine-grained
programmatic extension and flow-based control of the network. This
control is useful in a wide variety of applications but is
particularly important in multi-server virtualization deployments,
which are often characterized by highly dynamic endpoints and the
need to maintain logical abstractions for multiple tenants.
The Open vSwitch datapath provides an in-kernel fast path for packet
forwarding. It is complemented by a userspace daemon, ovs-vswitchd,
which is able to accept configuration from a variety of sources and
translate it into packet processing rules.
See http://openvswitch.org for more information and userspace
utilities.
To compile this code as a module, choose M here: the module will be
called openvswitch.
If unsure, say N.

14
net/openvswitch/Makefile Normal file
View file

@ -0,0 +1,14 @@
#
# Makefile for Open vSwitch.
#
obj-$(CONFIG_OPENVSWITCH) += openvswitch.o
openvswitch-y := \
actions.o \
datapath.o \
dp_notify.o \
flow.o \
vport.o \
vport-internal_dev.o \
vport-netdev.o \

415
net/openvswitch/actions.c Normal file
View file

@ -0,0 +1,415 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/checksum.h>
#include <net/dsfield.h>
#include "datapath.h"
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len, bool keep_skb);
static int make_writable(struct sk_buff *skb, int write_len)
{
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}
/* remove VLAN header from packet and update csum accrodingly. */
static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
{
struct vlan_hdr *vhdr;
int err;
err = make_writable(skb, VLAN_ETH_HLEN);
if (unlikely(err))
return err;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_sub(skb->csum, csum_partial(skb->data
+ ETH_HLEN, VLAN_HLEN, 0));
vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
*current_tci = vhdr->h_vlan_TCI;
memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
__skb_pull(skb, VLAN_HLEN);
vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
skb_reset_mac_len(skb);
return 0;
}
static int pop_vlan(struct sk_buff *skb)
{
__be16 tci;
int err;
if (likely(vlan_tx_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
skb->len < VLAN_ETH_HLEN))
return 0;
err = __pop_vlan_tci(skb, &tci);
if (err)
return err;
}
/* move next vlan tag to hw accel tag */
if (likely(skb->protocol != htons(ETH_P_8021Q) ||
skb->len < VLAN_ETH_HLEN))
return 0;
err = __pop_vlan_tci(skb, &tci);
if (unlikely(err))
return err;
__vlan_hwaccel_put_tag(skb, ntohs(tci));
return 0;
}
static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
{
if (unlikely(vlan_tx_tag_present(skb))) {
u16 current_tag;
/* push down current VLAN tag */
current_tag = vlan_tx_tag_get(skb);
if (!__vlan_put_tag(skb, current_tag))
return -ENOMEM;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(skb->data
+ ETH_HLEN, VLAN_HLEN, 0));
}
__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
return 0;
}
static int set_eth_addr(struct sk_buff *skb,
const struct ovs_key_ethernet *eth_key)
{
int err;
err = make_writable(skb, ETH_HLEN);
if (unlikely(err))
return err;
memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
return 0;
}
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
__be32 *addr, __be32 new_addr)
{
int transport_len = skb->len - skb_transport_offset(skb);
if (nh->protocol == IPPROTO_TCP) {
if (likely(transport_len >= sizeof(struct tcphdr)))
inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
*addr, new_addr, 1);
} else if (nh->protocol == IPPROTO_UDP) {
if (likely(transport_len >= sizeof(struct udphdr)))
inet_proto_csum_replace4(&udp_hdr(skb)->check, skb,
*addr, new_addr, 1);
}
csum_replace4(&nh->check, *addr, new_addr);
skb->rxhash = 0;
*addr = new_addr;
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
{
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
nh->ttl = new_ttl;
}
static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
{
struct iphdr *nh;
int err;
err = make_writable(skb, skb_network_offset(skb) +
sizeof(struct iphdr));
if (unlikely(err))
return err;
nh = ip_hdr(skb);
if (ipv4_key->ipv4_src != nh->saddr)
set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
if (ipv4_key->ipv4_dst != nh->daddr)
set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
if (ipv4_key->ipv4_tos != nh->tos)
ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
if (ipv4_key->ipv4_ttl != nh->ttl)
set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
return 0;
}
/* Must follow make_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
inet_proto_csum_replace2(check, skb, *port, new_port, 0);
*port = new_port;
skb->rxhash = 0;
}
static int set_udp_port(struct sk_buff *skb,
const struct ovs_key_udp *udp_port_key)
{
struct udphdr *uh;
int err;
err = make_writable(skb, skb_transport_offset(skb) +
sizeof(struct udphdr));
if (unlikely(err))
return err;
uh = udp_hdr(skb);
if (udp_port_key->udp_src != uh->source)
set_tp_port(skb, &uh->source, udp_port_key->udp_src, &uh->check);
if (udp_port_key->udp_dst != uh->dest)
set_tp_port(skb, &uh->dest, udp_port_key->udp_dst, &uh->check);
return 0;
}
static int set_tcp_port(struct sk_buff *skb,
const struct ovs_key_tcp *tcp_port_key)
{
struct tcphdr *th;
int err;
err = make_writable(skb, skb_transport_offset(skb) +
sizeof(struct tcphdr));
if (unlikely(err))
return err;
th = tcp_hdr(skb);
if (tcp_port_key->tcp_src != th->source)
set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
if (tcp_port_key->tcp_dst != th->dest)
set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
return 0;
}
static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
struct vport *vport;
if (unlikely(!skb))
return -ENOMEM;
vport = rcu_dereference(dp->ports[out_port]);
if (unlikely(!vport)) {
kfree_skb(skb);
return -ENODEV;
}
ovs_vport_send(vport, skb);
return 0;
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr)
{
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.key = &OVS_CB(skb)->flow->key;
upcall.userdata = NULL;
upcall.pid = 0;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
switch (nla_type(a)) {
case OVS_USERSPACE_ATTR_USERDATA:
upcall.userdata = a;
break;
case OVS_USERSPACE_ATTR_PID:
upcall.pid = nla_get_u32(a);
break;
}
}
return ovs_dp_upcall(dp, skb, &upcall);
}
static int sample(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr)
{
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
int rem;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
switch (nla_type(a)) {
case OVS_SAMPLE_ATTR_PROBABILITY:
if (net_random() >= nla_get_u32(a))
return 0;
break;
case OVS_SAMPLE_ATTR_ACTIONS:
acts_list = a;
break;
}
}
return do_execute_actions(dp, skb, nla_data(acts_list),
nla_len(acts_list), true);
}
static int execute_set_action(struct sk_buff *skb,
const struct nlattr *nested_attr)
{
int err = 0;
switch (nla_type(nested_attr)) {
case OVS_KEY_ATTR_PRIORITY:
skb->priority = nla_get_u32(nested_attr);
break;
case OVS_KEY_ATTR_ETHERNET:
err = set_eth_addr(skb, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_IPV4:
err = set_ipv4(skb, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_TCP:
err = set_tcp_port(skb, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_UDP:
err = set_udp_port(skb, nla_data(nested_attr));
break;
}
return err;
}
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len, bool keep_skb)
{
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
* then freeing the original skbuff is wasteful. So the following code
* is slightly obscure just to avoid that. */
int prev_port = -1;
const struct nlattr *a;
int rem;
for (a = attr, rem = len; rem > 0;
a = nla_next(a, &rem)) {
int err = 0;
if (prev_port != -1) {
do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
prev_port = -1;
}
switch (nla_type(a)) {
case OVS_ACTION_ATTR_OUTPUT:
prev_port = nla_get_u32(a);
break;
case OVS_ACTION_ATTR_USERSPACE:
output_userspace(dp, skb, a);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
err = push_vlan(skb, nla_data(a));
if (unlikely(err)) /* skb already freed. */
return err;
break;
case OVS_ACTION_ATTR_POP_VLAN:
err = pop_vlan(skb);
break;
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, nla_data(a));
break;
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, a);
break;
}
if (unlikely(err)) {
kfree_skb(skb);
return err;
}
}
if (prev_port != -1) {
if (keep_skb)
skb = skb_clone(skb, GFP_ATOMIC);
do_output(dp, skb, prev_port);
} else if (!keep_skb)
consume_skb(skb);
return 0;
}
/* Execute a list of actions against 'skb'. */
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
{
struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
return do_execute_actions(dp, skb, acts->actions,
acts->actions_len, false);
}

1912
net/openvswitch/datapath.c Normal file

File diff suppressed because it is too large Load diff

125
net/openvswitch/datapath.h Normal file
View file

@ -0,0 +1,125 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#ifndef DATAPATH_H
#define DATAPATH_H 1
#include <asm/page.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/u64_stats_sync.h>
#include <linux/version.h>
#include "flow.h"
struct vport;
#define DP_MAX_PORTS 1024
#define SAMPLE_ACTION_DEPTH 3
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
* datapath.
* @n_hit: Number of received packets for which a matching flow was found in
* the flow table.
* @n_miss: Number of received packets that had no matching flow in the flow
* table. The sum of @n_hit and @n_miss is the number of packets that have
* been received by the datapath.
* @n_lost: Number of received packets that had no matching flow in the flow
* table that could not be sent to userspace (normally due to an overflow in
* one of the datapath's queues).
*/
struct dp_stats_percpu {
u64 n_hit;
u64 n_missed;
u64 n_lost;
struct u64_stats_sync sync;
};
/**
* struct datapath - datapath for flow-based packet switching
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
* @n_flows: Number of flows currently in flow table.
* @table: Current flow table. Protected by genl_lock and RCU.
* @ports: Map from port number to &struct vport. %OVSP_LOCAL port
* always exists, other ports may be %NULL. Protected by RTNL and RCU.
* @port_list: List of all ports in @ports in arbitrary order. RTNL required
* to iterate or modify.
* @stats_percpu: Per-CPU datapath statistics.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
*/
struct datapath {
struct rcu_head rcu;
struct list_head list_node;
/* Flow table. */
struct flow_table __rcu *table;
/* Switch ports. */
struct vport __rcu *ports[DP_MAX_PORTS];
struct list_head port_list;
/* Stats. */
struct dp_stats_percpu __percpu *stats_percpu;
};
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
*/
struct ovs_skb_cb {
struct sw_flow *flow;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
/**
* struct dp_upcall - metadata to include with a packet to send to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
* @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
* @userdata: If nonnull, its u64 value is extracted and passed to userspace as
* %OVS_PACKET_ATTR_USERDATA.
* @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
* packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
*/
struct dp_upcall_info {
u8 cmd;
const struct sw_flow_key *key;
const struct nlattr *userdata;
u32 pid;
};
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_multicast_group ovs_dp_vport_multicast_group;
void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
const struct dp_upcall_info *);
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
#endif /* datapath.h */

View file

@ -0,0 +1,66 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#include <linux/netdevice.h>
#include <net/genetlink.h>
#include "datapath.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
static int dp_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
struct net_device *dev = ptr;
struct vport *vport;
if (ovs_is_internal_dev(dev))
vport = ovs_internal_dev_get_vport(dev);
else
vport = ovs_netdev_get_vport(dev);
if (!vport)
return NOTIFY_DONE;
switch (event) {
case NETDEV_UNREGISTER:
if (!ovs_is_internal_dev(dev)) {
struct sk_buff *notify;
notify = ovs_vport_cmd_build_info(vport, 0, 0,
OVS_VPORT_CMD_DEL);
ovs_dp_detach_port(vport);
if (IS_ERR(notify)) {
netlink_set_err(init_net.genl_sock, 0,
ovs_dp_vport_multicast_group.id,
PTR_ERR(notify));
break;
}
genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id,
GFP_KERNEL);
}
break;
}
return NOTIFY_DONE;
}
struct notifier_block ovs_dp_device_notifier = {
.notifier_call = dp_device_event
};

1346
net/openvswitch/flow.c Normal file

File diff suppressed because it is too large Load diff

199
net/openvswitch/flow.h Normal file
View file

@ -0,0 +1,199 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#ifndef FLOW_H
#define FLOW_H 1
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
#include <linux/if_ether.h>
#include <linux/in6.h>
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/flex_array.h>
#include <net/inet_ecn.h>
struct sk_buff;
struct sw_flow_actions {
struct rcu_head rcu;
u32 actions_len;
struct nlattr actions[];
};
struct sw_flow_key {
struct {
u32 priority; /* Packet QoS priority. */
u16 in_port; /* Input switch port (or USHRT_MAX). */
} phy;
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
__be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
__be16 type; /* Ethernet frame type. */
} eth;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
union {
struct {
struct {
__be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */
} addr;
union {
struct {
__be16 src; /* TCP/UDP source port. */
__be16 dst; /* TCP/UDP destination port. */
} tp;
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
};
} ipv4;
struct {
struct {
struct in6_addr src; /* IPv6 source address. */
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
struct {
__be16 src; /* TCP/UDP source port. */
__be16 dst; /* TCP/UDP destination port. */
} tp;
struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
} nd;
} ipv6;
};
};
struct sw_flow {
struct rcu_head rcu;
struct hlist_node hash_node[2];
u32 hash;
struct sw_flow_key key;
struct sw_flow_actions __rcu *sf_acts;
spinlock_t lock; /* Lock for values below. */
unsigned long used; /* Last used time (in jiffies). */
u64 packet_count; /* Number of packets matched. */
u64 byte_count; /* Number of bytes matched. */
u8 tcp_flags; /* Union of seen TCP flags. */
};
struct arp_eth_header {
__be16 ar_hrd; /* format of hardware address */
__be16 ar_pro; /* format of protocol address */
unsigned char ar_hln; /* length of hardware address */
unsigned char ar_pln; /* length of protocol address */
__be16 ar_op; /* ARP opcode (command) */
/* Ethernet+IPv4 specific members. */
unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
unsigned char ar_sip[4]; /* sender IP address */
unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
unsigned char ar_tip[4]; /* target IP address */
} __packed;
int ovs_flow_init(void);
void ovs_flow_exit(void);
struct sw_flow *ovs_flow_alloc(void);
void ovs_flow_deferred_free(struct sw_flow *);
void ovs_flow_free(struct sw_flow *flow);
struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *);
void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
int *key_lenp);
void ovs_flow_used(struct sw_flow *, struct sk_buff *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
/* Upper bound on the length of a nlattr-formatted flow key. The longest
* nlattr-formatted flow key would be:
*
* struct pad nl hdr total
* ------ --- ------ -----
* OVS_KEY_ATTR_PRIORITY 4 -- 4 8
* OVS_KEY_ATTR_IN_PORT 4 -- 4 8
* OVS_KEY_ATTR_ETHERNET 12 -- 4 16
* OVS_KEY_ATTR_8021Q 4 -- 4 8
* OVS_KEY_ATTR_ETHERTYPE 2 2 4 8
* OVS_KEY_ATTR_IPV6 40 -- 4 44
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* -------------------------------------------------
* total 132
*/
#define FLOW_BUFSIZE 132
int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *);
int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
const struct nlattr *);
#define TBL_MIN_BUCKETS 1024
struct flow_table {
struct flex_array *buckets;
unsigned int count, n_buckets;
struct rcu_head rcu;
int node_ver;
u32 hash_seed;
bool keep_flows;
};
static inline int ovs_flow_tbl_count(struct flow_table *table)
{
return table->count;
}
static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
{
return (table->count > table->n_buckets);
}
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
struct sw_flow_key *key, int len);
void ovs_flow_tbl_destroy(struct flow_table *table);
void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
struct flow_table *ovs_flow_tbl_alloc(int new_size);
struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow);
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len);
struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
#endif /* flow.h */

View file

@ -0,0 +1,241 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#include <linux/hardirq.h>
#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
#include <linux/version.h>
#include "datapath.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
struct internal_dev {
struct vport *vport;
};
static struct internal_dev *internal_dev_priv(struct net_device *netdev)
{
return netdev_priv(netdev);
}
/* This function is only called by the kernel network layer.*/
static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
struct vport *vport = ovs_internal_dev_get_vport(netdev);
struct ovs_vport_stats vport_stats;
ovs_vport_get_stats(vport, &vport_stats);
/* The tx and rx stats need to be swapped because the
* switch and host OS have opposite perspectives. */
stats->rx_packets = vport_stats.tx_packets;
stats->tx_packets = vport_stats.rx_packets;
stats->rx_bytes = vport_stats.tx_bytes;
stats->tx_bytes = vport_stats.rx_bytes;
stats->rx_errors = vport_stats.tx_errors;
stats->tx_errors = vport_stats.rx_errors;
stats->rx_dropped = vport_stats.tx_dropped;
stats->tx_dropped = vport_stats.rx_dropped;
return stats;
}
static int internal_dev_mac_addr(struct net_device *dev, void *p)
{
struct sockaddr *addr = p;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
return 0;
}
/* Called with rcu_read_lock_bh. */
static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
rcu_read_lock();
ovs_vport_receive(internal_dev_priv(netdev)->vport, skb);
rcu_read_unlock();
return 0;
}
static int internal_dev_open(struct net_device *netdev)
{
netif_start_queue(netdev);
return 0;
}
static int internal_dev_stop(struct net_device *netdev)
{
netif_stop_queue(netdev);
return 0;
}
static void internal_dev_getinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
{
strcpy(info->driver, "openvswitch");
}
static const struct ethtool_ops internal_dev_ethtool_ops = {
.get_drvinfo = internal_dev_getinfo,
.get_link = ethtool_op_get_link,
};
static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
{
if (new_mtu < 68)
return -EINVAL;
netdev->mtu = new_mtu;
return 0;
}
static void internal_dev_destructor(struct net_device *dev)
{
struct vport *vport = ovs_internal_dev_get_vport(dev);
ovs_vport_free(vport);
free_netdev(dev);
}
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = internal_dev_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_dev_get_stats,
};
static void do_setup(struct net_device *netdev)
{
ether_setup(netdev);
netdev->netdev_ops = &internal_dev_netdev_ops;
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->destructor = internal_dev_destructor;
SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
netdev->tx_queue_len = 0;
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
netdev->vlan_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_TX;
netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
random_ether_addr(netdev->dev_addr);
}
static struct vport *internal_dev_create(const struct vport_parms *parms)
{
struct vport *vport;
struct netdev_vport *netdev_vport;
struct internal_dev *internal_dev;
int err;
vport = ovs_vport_alloc(sizeof(struct netdev_vport),
&ovs_internal_vport_ops, parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto error;
}
netdev_vport = netdev_vport_priv(vport);
netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
parms->name, do_setup);
if (!netdev_vport->dev) {
err = -ENOMEM;
goto error_free_vport;
}
internal_dev = internal_dev_priv(netdev_vport->dev);
internal_dev->vport = vport;
err = register_netdevice(netdev_vport->dev);
if (err)
goto error_free_netdev;
dev_set_promiscuity(netdev_vport->dev, 1);
netif_start_queue(netdev_vport->dev);
return vport;
error_free_netdev:
free_netdev(netdev_vport->dev);
error_free_vport:
ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
static void internal_dev_destroy(struct vport *vport)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
netif_stop_queue(netdev_vport->dev);
dev_set_promiscuity(netdev_vport->dev, -1);
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(netdev_vport->dev);
}
static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
{
struct net_device *netdev = netdev_vport_priv(vport)->dev;
int len;
len = skb->len;
skb->dev = netdev;
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
netif_rx(skb);
return len;
}
const struct vport_ops ovs_internal_vport_ops = {
.type = OVS_VPORT_TYPE_INTERNAL,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
.get_name = ovs_netdev_get_name,
.get_ifindex = ovs_netdev_get_ifindex,
.send = internal_dev_recv,
};
int ovs_is_internal_dev(const struct net_device *netdev)
{
return netdev->netdev_ops == &internal_dev_netdev_ops;
}
struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
{
if (!ovs_is_internal_dev(netdev))
return NULL;
return internal_dev_priv(netdev)->vport;
}

View file

@ -0,0 +1,28 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#ifndef VPORT_INTERNAL_DEV_H
#define VPORT_INTERNAL_DEV_H 1
#include "datapath.h"
#include "vport.h"
int ovs_is_internal_dev(const struct net_device *);
struct vport *ovs_internal_dev_get_vport(struct net_device *);
#endif /* vport-internal_dev.h */

View file

@ -0,0 +1,198 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/if_arp.h>
#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/llc.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <net/llc.h>
#include "datapath.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
/* Must be called with rcu_read_lock. */
static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
{
if (unlikely(!vport)) {
kfree_skb(skb);
return;
}
/* Make our own copy of the packet. Otherwise we will mangle the
* packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
* (No one comes after us, since we tell handle_bridge() that we took
* the packet.) */
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
return;
skb_push(skb, ETH_HLEN);
ovs_vport_receive(vport, skb);
}
/* Called with rcu_read_lock and bottom-halves disabled. */
static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
struct vport *vport;
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return RX_HANDLER_PASS;
vport = ovs_netdev_get_vport(skb->dev);
netdev_port_receive(vport, skb);
return RX_HANDLER_CONSUMED;
}
static struct vport *netdev_create(const struct vport_parms *parms)
{
struct vport *vport;
struct netdev_vport *netdev_vport;
int err;
vport = ovs_vport_alloc(sizeof(struct netdev_vport),
&ovs_netdev_vport_ops, parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto error;
}
netdev_vport = netdev_vport_priv(vport);
netdev_vport->dev = dev_get_by_name(&init_net, parms->name);
if (!netdev_vport->dev) {
err = -ENODEV;
goto error_free_vport;
}
if (netdev_vport->dev->flags & IFF_LOOPBACK ||
netdev_vport->dev->type != ARPHRD_ETHER ||
ovs_is_internal_dev(netdev_vport->dev)) {
err = -EINVAL;
goto error_put;
}
err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
vport);
if (err)
goto error_put;
dev_set_promiscuity(netdev_vport->dev, 1);
netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
return vport;
error_put:
dev_put(netdev_vport->dev);
error_free_vport:
ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
static void netdev_destroy(struct vport *vport)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
dev_set_promiscuity(netdev_vport->dev, -1);
synchronize_rcu();
dev_put(netdev_vport->dev);
ovs_vport_free(vport);
}
const char *ovs_netdev_get_name(const struct vport *vport)
{
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
return netdev_vport->dev->name;
}
int ovs_netdev_get_ifindex(const struct vport *vport)
{
const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
return netdev_vport->dev->ifindex;
}
static unsigned packet_length(const struct sk_buff *skb)
{
unsigned length = skb->len - ETH_HLEN;
if (skb->protocol == htons(ETH_P_8021Q))
length -= VLAN_HLEN;
return length;
}
static int netdev_send(struct vport *vport, struct sk_buff *skb)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
int mtu = netdev_vport->dev->mtu;
int len;
if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
if (net_ratelimit())
pr_warn("%s: dropped over-mtu packet: %d > %d\n",
ovs_dp_name(vport->dp), packet_length(skb), mtu);
goto error;
}
if (unlikely(skb_warn_if_lro(skb)))
goto error;
skb->dev = netdev_vport->dev;
len = skb->len;
dev_queue_xmit(skb);
return len;
error:
kfree_skb(skb);
ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
return 0;
}
/* Returns null if this device is not attached to a datapath. */
struct vport *ovs_netdev_get_vport(struct net_device *dev)
{
if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
return (struct vport *)
rcu_dereference_rtnl(dev->rx_handler_data);
else
return NULL;
}
const struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
.create = netdev_create,
.destroy = netdev_destroy,
.get_name = ovs_netdev_get_name,
.get_ifindex = ovs_netdev_get_ifindex,
.send = netdev_send,
};

View file

@ -0,0 +1,42 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#ifndef VPORT_NETDEV_H
#define VPORT_NETDEV_H 1
#include <linux/netdevice.h>
#include "vport.h"
struct vport *ovs_netdev_get_vport(struct net_device *dev);
struct netdev_vport {
struct net_device *dev;
};
static inline struct netdev_vport *
netdev_vport_priv(const struct vport *vport)
{
return vport_priv(vport);
}
const char *ovs_netdev_get_name(const struct vport *);
const char *ovs_netdev_get_config(const struct vport *);
int ovs_netdev_get_ifindex(const struct vport *);
#endif /* vport_netdev.h */

396
net/openvswitch/vport.c Normal file
View file

@ -0,0 +1,396 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#include <linux/dcache.h>
#include <linux/etherdevice.h>
#include <linux/if.h>
#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/compat.h>
#include <linux/version.h>
#include "vport.h"
#include "vport-internal_dev.h"
/* List of statically compiled vport implementations. Don't forget to also
* add yours to the list at the bottom of vport.h. */
static const struct vport_ops *vport_ops_list[] = {
&ovs_netdev_vport_ops,
&ovs_internal_vport_ops,
};
/* Protected by RCU read lock for reading, RTNL lock for writing. */
static struct hlist_head *dev_table;
#define VPORT_HASH_BUCKETS 1024
/**
* ovs_vport_init - initialize vport subsystem
*
* Called at module load time to initialize the vport subsystem.
*/
int ovs_vport_init(void)
{
dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
GFP_KERNEL);
if (!dev_table)
return -ENOMEM;
return 0;
}
/**
* ovs_vport_exit - shutdown vport subsystem
*
* Called at module exit time to shutdown the vport subsystem.
*/
void ovs_vport_exit(void)
{
kfree(dev_table);
}
static struct hlist_head *hash_bucket(const char *name)
{
unsigned int hash = full_name_hash(name, strlen(name));
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
/**
* ovs_vport_locate - find a port that has already been created
*
* @name: name of port to find
*
* Must be called with RTNL or RCU read lock.
*/
struct vport *ovs_vport_locate(const char *name)
{
struct hlist_head *bucket = hash_bucket(name);
struct vport *vport;
struct hlist_node *node;
hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
if (!strcmp(name, vport->ops->get_name(vport)))
return vport;
return NULL;
}
/**
* ovs_vport_alloc - allocate and initialize new vport
*
* @priv_size: Size of private data area to allocate.
* @ops: vport device ops
*
* Allocate and initialize a new vport defined by @ops. The vport will contain
* a private data area of size @priv_size that can be accessed using
* vport_priv(). vports that are no longer needed should be released with
* vport_free().
*/
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
const struct vport_parms *parms)
{
struct vport *vport;
size_t alloc_size;
alloc_size = sizeof(struct vport);
if (priv_size) {
alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
alloc_size += priv_size;
}
vport = kzalloc(alloc_size, GFP_KERNEL);
if (!vport)
return ERR_PTR(-ENOMEM);
vport->dp = parms->dp;
vport->port_no = parms->port_no;
vport->upcall_pid = parms->upcall_pid;
vport->ops = ops;
vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
if (!vport->percpu_stats)
return ERR_PTR(-ENOMEM);
spin_lock_init(&vport->stats_lock);
return vport;
}
/**
* ovs_vport_free - uninitialize and free vport
*
* @vport: vport to free
*
* Frees a vport allocated with vport_alloc() when it is no longer needed.
*
* The caller must ensure that an RCU grace period has passed since the last
* time @vport was in a datapath.
*/
void ovs_vport_free(struct vport *vport)
{
free_percpu(vport->percpu_stats);
kfree(vport);
}
/**
* ovs_vport_add - add vport device (for kernel callers)
*
* @parms: Information about new vport.
*
* Creates a new vport with the specified configuration (which is dependent on
* device type). RTNL lock must be held.
*/
struct vport *ovs_vport_add(const struct vport_parms *parms)
{
struct vport *vport;
int err = 0;
int i;
ASSERT_RTNL();
for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
if (vport_ops_list[i]->type == parms->type) {
vport = vport_ops_list[i]->create(parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto out;
}
hlist_add_head_rcu(&vport->hash_node,
hash_bucket(vport->ops->get_name(vport)));
return vport;
}
}
err = -EAFNOSUPPORT;
out:
return ERR_PTR(err);
}
/**
* ovs_vport_set_options - modify existing vport device (for kernel callers)
*
* @vport: vport to modify.
* @port: New configuration.
*
* Modifies an existing device with the specified configuration (which is
* dependent on device type). RTNL lock must be held.
*/
int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
{
ASSERT_RTNL();
if (!vport->ops->set_options)
return -EOPNOTSUPP;
return vport->ops->set_options(vport, options);
}
/**
* ovs_vport_del - delete existing vport device
*
* @vport: vport to delete.
*
* Detaches @vport from its datapath and destroys it. It is possible to fail
* for reasons such as lack of memory. RTNL lock must be held.
*/
void ovs_vport_del(struct vport *vport)
{
ASSERT_RTNL();
hlist_del_rcu(&vport->hash_node);
vport->ops->destroy(vport);
}
/**
* ovs_vport_get_stats - retrieve device stats
*
* @vport: vport from which to retrieve the stats
* @stats: location to store stats
*
* Retrieves transmit, receive, and error stats for the given device.
*
* Must be called with RTNL lock or rcu_read_lock.
*/
void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
{
int i;
memset(stats, 0, sizeof(*stats));
/* We potentially have 2 sources of stats that need to be combined:
* those we have collected (split into err_stats and percpu_stats) from
* set_stats() and device error stats from netdev->get_stats() (for
* errors that happen downstream and therefore aren't reported through
* our vport_record_error() function).
* Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS).
* netdev-stats can be directly read over netlink-ioctl.
*/
spin_lock_bh(&vport->stats_lock);
stats->rx_errors = vport->err_stats.rx_errors;
stats->tx_errors = vport->err_stats.tx_errors;
stats->tx_dropped = vport->err_stats.tx_dropped;
stats->rx_dropped = vport->err_stats.rx_dropped;
spin_unlock_bh(&vport->stats_lock);
for_each_possible_cpu(i) {
const struct vport_percpu_stats *percpu_stats;
struct vport_percpu_stats local_stats;
unsigned int start;
percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
do {
start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
local_stats = *percpu_stats;
} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
stats->rx_bytes += local_stats.rx_bytes;
stats->rx_packets += local_stats.rx_packets;
stats->tx_bytes += local_stats.tx_bytes;
stats->tx_packets += local_stats.tx_packets;
}
}
/**
* ovs_vport_get_options - retrieve device options
*
* @vport: vport from which to retrieve the options.
* @skb: sk_buff where options should be appended.
*
* Retrieves the configuration of the given device, appending an
* %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
* vport-specific attributes to @skb.
*
* Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
* negative error code if a real error occurred. If an error occurs, @skb is
* left unmodified.
*
* Must be called with RTNL lock or rcu_read_lock.
*/
int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
{
struct nlattr *nla;
nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
if (!nla)
return -EMSGSIZE;
if (vport->ops->get_options) {
int err = vport->ops->get_options(vport, skb);
if (err) {
nla_nest_cancel(skb, nla);
return err;
}
}
nla_nest_end(skb, nla);
return 0;
}
/**
* ovs_vport_receive - pass up received packet to the datapath for processing
*
* @vport: vport that received the packet
* @skb: skb that was received
*
* Must be called with rcu_read_lock. The packet cannot be shared and
* skb->data should point to the Ethernet header. The caller must have already
* called compute_ip_summed() to initialize the checksumming fields.
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
{
struct vport_percpu_stats *stats;
stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
u64_stats_update_begin(&stats->sync);
stats->rx_packets++;
stats->rx_bytes += skb->len;
u64_stats_update_end(&stats->sync);
ovs_dp_process_received_packet(vport, skb);
}
/**
* ovs_vport_send - send a packet on a device
*
* @vport: vport on which to send the packet
* @skb: skb to send
*
* Sends the given packet and returns the length of data sent. Either RTNL
* lock or rcu_read_lock must be held.
*/
int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
{
int sent = vport->ops->send(vport, skb);
if (likely(sent)) {
struct vport_percpu_stats *stats;
stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
u64_stats_update_begin(&stats->sync);
stats->tx_packets++;
stats->tx_bytes += sent;
u64_stats_update_end(&stats->sync);
}
return sent;
}
/**
* ovs_vport_record_error - indicate device error to generic stats layer
*
* @vport: vport that encountered the error
* @err_type: one of enum vport_err_type types to indicate the error type
*
* If using the vport generic stats layer indicate that an error of the given
* type has occured.
*/
void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
{
spin_lock(&vport->stats_lock);
switch (err_type) {
case VPORT_E_RX_DROPPED:
vport->err_stats.rx_dropped++;
break;
case VPORT_E_RX_ERROR:
vport->err_stats.rx_errors++;
break;
case VPORT_E_TX_DROPPED:
vport->err_stats.tx_dropped++;
break;
case VPORT_E_TX_ERROR:
vport->err_stats.tx_errors++;
break;
};
spin_unlock(&vport->stats_lock);
}

205
net/openvswitch/vport.h Normal file
View file

@ -0,0 +1,205 @@
/*
* Copyright (c) 2007-2011 Nicira Networks.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#ifndef VPORT_H
#define VPORT_H 1
#include <linux/list.h>
#include <linux/openvswitch.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
#include "datapath.h"
struct vport;
struct vport_parms;
/* The following definitions are for users of the vport subsytem: */
int ovs_vport_init(void);
void ovs_vport_exit(void);
struct vport *ovs_vport_add(const struct vport_parms *);
void ovs_vport_del(struct vport *);
struct vport *ovs_vport_locate(const char *name);
void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
int ovs_vport_set_options(struct vport *, struct nlattr *options);
int ovs_vport_get_options(const struct vport *, struct sk_buff *);
int ovs_vport_send(struct vport *, struct sk_buff *);
/* The following definitions are for implementers of vport devices: */
struct vport_percpu_stats {
u64 rx_bytes;
u64 rx_packets;
u64 tx_bytes;
u64 tx_packets;
struct u64_stats_sync sync;
};
struct vport_err_stats {
u64 rx_dropped;
u64 rx_errors;
u64 tx_dropped;
u64 tx_errors;
};
/**
* struct vport - one port within a datapath
* @rcu: RCU callback head for deferred destruction.
* @port_no: Index into @dp's @ports array.
* @dp: Datapath to which this port belongs.
* @node: Element in @dp's @port_list.
* @upcall_pid: The Netlink port to use for packets received on this port that
* miss the flow table.
* @hash_node: Element in @dev_table hash table in vport.c.
* @ops: Class structure.
* @percpu_stats: Points to per-CPU statistics used and maintained by vport
* @stats_lock: Protects @err_stats;
* @err_stats: Points to error statistics used and maintained by vport
*/
struct vport {
struct rcu_head rcu;
u16 port_no;
struct datapath *dp;
struct list_head node;
u32 upcall_pid;
struct hlist_node hash_node;
const struct vport_ops *ops;
struct vport_percpu_stats __percpu *percpu_stats;
spinlock_t stats_lock;
struct vport_err_stats err_stats;
};
/**
* struct vport_parms - parameters for creating a new vport
*
* @name: New vport's name.
* @type: New vport's type.
* @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
* none was supplied.
* @dp: New vport's datapath.
* @port_no: New vport's port number.
*/
struct vport_parms {
const char *name;
enum ovs_vport_type type;
struct nlattr *options;
/* For ovs_vport_alloc(). */
struct datapath *dp;
u16 port_no;
u32 upcall_pid;
};
/**
* struct vport_ops - definition of a type of virtual port
*
* @type: %OVS_VPORT_TYPE_* value for this type of virtual port.
* @create: Create a new vport configured as specified. On success returns
* a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value.
* @destroy: Destroys a vport. Must call vport_free() on the vport but not
* before an RCU grace period has elapsed.
* @set_options: Modify the configuration of an existing vport. May be %NULL
* if modification is not supported.
* @get_options: Appends vport-specific attributes for the configuration of an
* existing vport to a &struct sk_buff. May be %NULL for a vport that does not
* have any configuration.
* @get_name: Get the device's name.
* @get_config: Get the device's configuration.
* @get_ifindex: Get the system interface index associated with the device.
* May be null if the device does not have an ifindex.
* @send: Send a packet on the device. Returns the length of the packet sent.
*/
struct vport_ops {
enum ovs_vport_type type;
/* Called with RTNL lock. */
struct vport *(*create)(const struct vport_parms *);
void (*destroy)(struct vport *);
int (*set_options)(struct vport *, struct nlattr *);
int (*get_options)(const struct vport *, struct sk_buff *);
/* Called with rcu_read_lock or RTNL lock. */
const char *(*get_name)(const struct vport *);
void (*get_config)(const struct vport *, void *);
int (*get_ifindex)(const struct vport *);
int (*send)(struct vport *, struct sk_buff *);
};
enum vport_err_type {
VPORT_E_RX_DROPPED,
VPORT_E_RX_ERROR,
VPORT_E_TX_DROPPED,
VPORT_E_TX_ERROR,
};
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
const struct vport_parms *);
void ovs_vport_free(struct vport *);
#define VPORT_ALIGN 8
/**
* vport_priv - access private data area of vport
*
* @vport: vport to access
*
* If a nonzero size was passed in priv_size of vport_alloc() a private data
* area was allocated on creation. This allows that area to be accessed and
* used for any purpose needed by the vport implementer.
*/
static inline void *vport_priv(const struct vport *vport)
{
return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
}
/**
* vport_from_priv - lookup vport from private data pointer
*
* @priv: Start of private data area.
*
* It is sometimes useful to translate from a pointer to the private data
* area to the vport, such as in the case where the private data pointer is
* the result of a hash table lookup. @priv must point to the start of the
* private data area.
*/
static inline struct vport *vport_from_priv(const void *priv)
{
return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
}
void ovs_vport_receive(struct vport *, struct sk_buff *);
void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
/* List of statically compiled vport implementations. Don't forget to also
* add yours to the list at the top of vport.c. */
extern const struct vport_ops ovs_netdev_vport_ops;
extern const struct vport_ops ovs_internal_vport_ops;
#endif /* vport.h */