版权声明:如有需要,可供转载,但请注明出处:https://blog.csdn.net/City_of_skey/article/details/85240141

目录

 

1、网桥简介

 

2、网桥初始化

2.1 struct net_bridge

2.2 struct net_bridge_port

2.3 struct net_bridge_fdb_entry

2.4 br_init

3、新建网桥

3.1、br_add_bridge

3.2、new_bridge_dev

3.3、br_dev_setup

4、新增端口

4.1、add_del_if

4.2、br_add_if


1、网桥简介

linux下的网桥是一个虚拟设备,本质上说是一个高层次的二层虚拟设备,它把其他的从设备虚拟为一个port。当把一个网卡设备加入的网桥后,网桥的mac地址就是下面所有网卡最小的那个mac地址,然后所有的网卡共享网桥的ip。网卡的接受、发送数据包就交给网桥决策。网桥工作在链路层。

网桥发包流程

如下图所示网桥收包流程图:

网桥收包流程

 

2、网桥初始化

2.1 struct net_bridge

struct net_bridge是描述网桥的结构体,比较重要的元素hash,hash这个哈希表保存了mac地址对应的网络端口也就是mac地址学习,网桥转发时就要根据mac地址在这个哈希表中查找从那个端口转发出去。

struct net_bridge
{
	spinlock_t			lock;		//自旋锁
	struct list_head		port_list;		//网桥下设备链表
	struct net_device		*dev;		//网桥的设备实例结构体

	struct br_cpu_netstats __percpu *stats;
	spinlock_t			hash_lock;
	struct hlist_head		hash[BR_HASH_SIZE]; //保存mac地址学习也就是net_bridge_fsb_entry结构体
	unsigned long			feature_mask;
#ifdef CONFIG_BRIDGE_NETFILTER
	struct rtable 			fake_rtable;
#endif
	unsigned long			flags;
#define BR_SET_MAC_ADDR		0x00000001

	/* STP */
	bridge_id			designated_root;
	bridge_id			bridge_id;
	u32				root_path_cost;
	unsigned long			max_age;
	unsigned long			hello_time;
	unsigned long			forward_delay;
	unsigned long			bridge_max_age;
	unsigned long			ageing_time;
	unsigned long			bridge_hello_time;
	unsigned long			bridge_forward_delay;

	u8				group_addr[ETH_ALEN];
	u16				root_port;

	/*STP协议类型*/
	enum {
		BR_NO_STP, 		/* no spanning tree */
		BR_KERNEL_STP,		/* old STP in kernel */
		BR_USER_STP,		/* new RSTP in userspace */
	} stp_enabled;

	unsigned char			topology_change;
	unsigned char			topology_change_detected;

#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
	unsigned char			multicast_router;

	u8				multicast_disabled:1;

	u32				hash_elasticity;
	u32				hash_max;

	u32				multicast_last_member_count;
	u32				multicast_startup_queries_sent;
	u32				multicast_startup_query_count;

	unsigned long			multicast_last_member_interval;
	unsigned long			multicast_membership_interval;
	unsigned long			multicast_querier_interval;
	unsigned long			multicast_query_interval;
	unsigned long			multicast_query_response_interval;
	unsigned long			multicast_startup_query_interval;

	spinlock_t			multicast_lock;
	struct net_bridge_mdb_htable	*mdb;
	struct hlist_head		router_list;
	struct hlist_head		mglist;

	/*一些定时器*/
	struct timer_list		multicast_router_timer;
	struct timer_list		multicast_querier_timer;
	struct timer_list		multicast_query_timer;
#endif

	struct timer_list		hello_timer;
	struct timer_list		tcn_timer;
	struct timer_list		topology_change_timer;
	struct timer_list		gc_timer;
	struct kobject			*ifobj;
};

2.2 struct net_bridge_port

struct net_bridge_port描述网桥下的网卡端口。

struct net_bridge_port
{
	struct net_bridge		*br;			//端口所属的网桥设备
	struct net_device		*dev;		//端口设备的接头体
	struct list_head		list;			//网桥下的端口链表

	/* STP */
	u8				priority;
	u8				state;
	u16				port_no;
	unsigned char			topology_change_ack;
	unsigned char			config_pending;
	port_id				port_id;
	port_id				designated_port;
	bridge_id			designated_root;
	bridge_id			designated_bridge;
	u32				path_cost;
	u32				designated_cost;

	/*端口的一些定时器*/
	struct timer_list		forward_delay_timer;
	struct timer_list		hold_timer;
	struct timer_list		message_age_timer;
	struct kobject			kobj;
	struct rcu_head			rcu;

	unsigned long 			flags;
#define BR_HAIRPIN_MODE		0x00000001

#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
	u32				multicast_startup_queries_sent;
	unsigned char			multicast_router;
	struct timer_list		multicast_router_timer;
	struct timer_list		multicast_query_timer;
	struct hlist_head		mglist;
	struct hlist_node		rlist;
#endif

#ifdef CONFIG_SYSFS
	char				sysfs_name[IFNAMSIZ];
#endif
};

struct br_cpu_netstats {
	unsigned long	rx_packets;
	unsigned long	rx_bytes;
	unsigned long	tx_packets;
	unsigned long	tx_bytes;
};

2.3 struct net_bridge_fdb_entry

struct net_bridge_fdb_entry是一个mac地址和网络端口的对应表,最主要的两个元素是dst、addr,也就是这个mac地址从那个网络端口出去。

struct net_bridge_fdb_entry
{
	struct hlist_node		hlist;		//链表指针
	struct net_bridge_port		*dst;	//网络端口

	struct rcu_head			rcu;
	unsigned long			ageing_timer;
	mac_addr			addr;				//mac地址
	unsigned char			is_local;			//是否是本机mac地址
	unsigned char			is_static;			//是否是静态mac地址
};

2.4 br_init

网桥在内核中实现是一个模块,所有模块的初始化是br_init函数,函数在/net/bridge/br.c文件中

br主要做以下几件事情:

(1)STP协议注册

(2)MAC学习表的初始化

(3)网桥在netfilter上钩子函数注册

(4)网桥的ioctl设置,提供给应用层的操作接口

static int __init br_init(void)
{
	int err;

	/*STP协议注册*/
	err = stp_proto_register(&br_stp_proto);
	if (err < 0) {
		pr_err("bridge: can't register sap for STP\n");
		return err;
	}
	/*端口-MAC表初始化*/
	err = br_fdb_init();
	if (err)
		goto err_out;

	err = register_pernet_subsys(&br_net_ops);
	if (err)
		goto err_out1;

	/*netfilter钩子函数注册*/
	err = br_netfilter_init();
	if (err)
		goto err_out2;

	/*注册到netdevice通知链上*/
	err = register_netdevice_notifier(&br_device_notifier);
	if (err)
		goto err_out3;
	/*netlink初始化*/
	err = br_netlink_init();
	if (err)
		goto err_out4;

	/*设置网桥的ioctl,提供给用户层的接口*/
	brioctl_set(br_ioctl_deviceless_stub);
	
	/*网桥数据处理接口*/
	br_handle_frame_hook = br_handle_frame;

#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
	br_fdb_test_addr_hook = br_fdb_test_addr;
#endif

	return 0;
err_out4:
	unregister_netdevice_notifier(&br_device_notifier);
err_out3:
	br_netfilter_fini();
err_out2:
	unregister_pernet_subsys(&br_net_ops);
err_out1:
	br_fdb_fini();
err_out:
	stp_proto_unregister(&br_stp_proto);
	return err;
}

网桥在netfilter框架中主要注册了7个钩子函数,接下来会详细介绍。

/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
 * br_dev_queue_push_xmit is called afterwards */
static struct nf_hook_ops br_nf_ops[] __read_mostly = {
	{
		.hook = br_nf_pre_routing,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_PRE_ROUTING,
		.priority = NF_BR_PRI_BRNF,
	},
	{
		.hook = br_nf_local_in,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_LOCAL_IN,
		.priority = NF_BR_PRI_BRNF,
	},
	{
		.hook = br_nf_forward_ip,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_FORWARD,
		.priority = NF_BR_PRI_BRNF - 1,
	},
	{
		.hook = br_nf_forward_arp,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_FORWARD,
		.priority = NF_BR_PRI_BRNF,
	},
	{
		.hook = br_nf_post_routing,
		.owner = THIS_MODULE,
		.pf = PF_BRIDGE,
		.hooknum = NF_BR_POST_ROUTING,
		.priority = NF_BR_PRI_LAST,
	},
	{
		.hook = ip_sabotage_in,
		.owner = THIS_MODULE,
		.pf = PF_INET,
		.hooknum = NF_INET_PRE_ROUTING,
		.priority = NF_IP_PRI_FIRST,
	},
	{
		.hook = ip_sabotage_in,
		.owner = THIS_MODULE,
		.pf = PF_INET6,
		.hooknum = NF_INET_PRE_ROUTING,
		.priority = NF_IP6_PRI_FIRST,
	},
};

3、新建网桥

我们通过brctl addbr br0命令新建一个网桥br0,内核提供的ioctl接口由函数br_ioctl_deviceless_stub实现,新建网桥调用br_add_bridge,删除网桥调用br_del_bridge。

int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
{
	switch (cmd) {
	case SIOCGIFBR:
	case SIOCSIFBR:
		return old_deviceless(net, uarg);

	case SIOCBRADDBR:
	case SIOCBRDELBR:
	{
		char buf[IFNAMSIZ];

		if (!capable(CAP_NET_ADMIN))
			return -EPERM;
		/*从应用层地址空间拷贝数据*/
		if (copy_from_user(buf, uarg, IFNAMSIZ))
			return -EFAULT;

		buf[IFNAMSIZ-1] = 0;
		if (cmd == SIOCBRADDBR)
			/*新建一个网桥设备*/
			return br_add_bridge(net, buf);
		/*删除一个网桥设备*/
		return br_del_bridge(net, buf);
	}
	}
	return -EOPNOTSUPP;
}

3.1、br_add_bridge

br_add_bridge主要做三件事情:

1、调用new_brideg_dev根据网桥名字新建一个网桥设备

2、网桥设备也是一个以太设备所以要调用register_netdevice注册这个网桥设备的struct net_device

3、初始化网桥设备在sysfs中的相关信息,便于管理查找

int br_add_bridge(struct net *net, const char *name)
{
	struct net_device *dev;
	int ret;

	/*新建一个网桥设备,name是网桥的名字*/
	dev = new_bridge_dev(net, name);
	if (!dev)
		return -ENOMEM;

	rtnl_lock();
	if (strchr(dev->name, '%')) {
		ret = dev_alloc_name(dev, dev->name);
		if (ret < 0)
			goto out_free;
	}

	SET_NETDEV_DEVTYPE(dev, &br_type);

	/*注册网桥虚拟设备*/
	ret = register_netdevice(dev);
	if (ret)
		goto out_free;
	/*在sysfs中建立相关信息,便于查找和管理*/
	ret = br_sysfs_addbr(dev);
	if (ret)
		unregister_netdevice(dev);
 out:
	rtnl_unlock();
	return ret;

out_free:
	free_netdev(dev);
	goto out;
}

3.2、new_bridge_dev

网桥设备也是一个虚拟的以太设备,所以调用alloc_netdev新建一个以太设备,并执行网桥初始化函数br_dev_setup,net_device的私有数据指向网桥。

static struct net_device *new_bridge_dev(struct net *net, const char *name)
{
	struct net_bridge *br;
	struct net_device *dev;

	/*创建一个虚拟dev,执行网桥初始化函数br_dev_setup*/
	dev = alloc_netdev(sizeof(struct net_bridge), name,
			   br_dev_setup);

	if (!dev)
		return NULL;
	dev_net_set(dev, net);

	/*struct net_device私有数据指向网桥*/
	br = netdev_priv(dev);
	br->dev = dev;

	br->stats = alloc_percpu(struct br_cpu_netstats);
	if (!br->stats) {
		free_netdev(dev);
		return NULL;
	}

	spin_lock_init(&br->lock);
	INIT_LIST_HEAD(&br->port_list);
	spin_lock_init(&br->hash_lock);

	br->bridge_id.prio[0] = 0x80;
	br->bridge_id.prio[1] = 0x00;

	memcpy(br->group_addr, br_group_address, ETH_ALEN);

	br->feature_mask = dev->features;
	/*默认不开启STP功能*/
	br->stp_enabled = BR_NO_STP;
	br->designated_root = br->bridge_id;
	br->root_path_cost = 0;
	br->root_port = 0;
	br->bridge_max_age = br->max_age = 20 * HZ;
	br->bridge_hello_time = br->hello_time = 2 * HZ;
	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
	br->topology_change = 0;
	br->topology_change_detected = 0;
	br->ageing_time = 300 * HZ;

	/*路由表相关初始化*/
	br_netfilter_rtable_init(br);
	/*相关定时器初始化*/
	br_stp_timer_init(br);
	br_multicast_init(br);

	return dev;
}

3.3、br_dev_setup

这个函数主要是初始化网桥

(1)初始化网桥的MAC

(2)对网桥进行以太网初始化

(3)设置以太设备的操作函数指针br_netdev_ops

(4)设置以太设备管理函数指向br_ethtool_ops。

void br_dev_setup(struct net_device *dev)
{
	/*初始化MAC*/
	random_ether_addr(dev->dev_addr);
	/*网桥是以太设备,所以要进行以太初始化*/
	ether_setup(dev);

	/*以太设备的操作函数指针*/
	dev->netdev_ops = &br_netdev_ops;
	dev->destructor = br_dev_free;
	/*以太设备的管理操作函数指针*/
	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
	dev->tx_queue_len = 0;
	/*私有数据网桥标志*/
	dev->priv_flags = IFF_EBRIDGE;

	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
			NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
}

br_netdev_ops:

static const struct net_device_ops br_netdev_ops = {
	.ndo_open		 = br_dev_open,			//打开设备
	.ndo_stop		 = br_dev_stop,			//关闭设备
	.ndo_start_xmit		 = br_dev_xmit,		//发送数据
	.ndo_get_stats		 = br_get_stats,
	.ndo_set_mac_address	 = br_set_mac_address,	//设置mac地址
	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
	.ndo_change_mtu		 = br_change_mtu,		//设置mtu
	.ndo_do_ioctl		 = br_dev_ioctl,				//ioctl操作,添加、删除端口
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
#endif
};

br_ethtool_ops:

static const struct ethtool_ops br_ethtool_ops = {
	.get_drvinfo    = br_getinfo,
	.get_link	= ethtool_op_get_link,
	.get_tx_csum	= ethtool_op_get_tx_csum,
	.set_tx_csum 	= br_set_tx_csum,
	.get_sg		= ethtool_op_get_sg,
	.set_sg		= br_set_sg,
	.get_tso	= ethtool_op_get_tso,
	.set_tso	= br_set_tso,
	.get_ufo	= ethtool_op_get_ufo,
	.set_ufo	= ethtool_op_set_ufo,
	.get_flags	= ethtool_op_get_flags,
};

4、新增端口

应用层通过brctl addif br0 eth0将eth0添加到网桥br0下面,内核的实现函数是br_dev_ioctl,依据cmd添加或者删除端口add_del_if

int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
	struct net_bridge *br = netdev_priv(dev);

	switch(cmd) {
	case SIOCDEVPRIVATE:
		return old_dev_ioctl(dev, rq, cmd);

	case SIOCBRADDIF:
	case SIOCBRDELIF:
		/*根据cmd添加或者删除端口*/
		return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);

	}

	br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
	return -EOPNOTSUPP;
}

4.1、add_del_if

/* called with RTNL */
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
	struct net_device *dev;
	int ret;

	if (!capable(CAP_NET_ADMIN))
		return -EPERM;
	/*查找网桥设备是否存在,不存在直接返回*/
	dev = __dev_get_by_index(dev_net(br->dev), ifindex);
	if (dev == NULL)
		return -EINVAL;

	if (isadd)
		/*添加端口*/
		ret = br_add_if(br, dev);
	else
		/*删除端口*/
		ret = br_del_if(br, dev);

	return ret;
}

4.2、br_add_if

这个函数是网桥添加端口的关键,主要做以下事情

(1)、首先对要添加的设备检查,环路设备、非以太网设备直接返回,检查设备是否已经添加到其他网桥下面,

(2)、调用new_nbp新建一个网桥端口结构体struct net_bridge_port

(3)、设备的模式为混合模式

(4)、将新增的端口添加到端口-MAC表中

(5)、将新增的端口p添加到sysfs文件系统中

(6)、将新增的端口p添加到端口链表port_list中

/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;

	/*环路端口或者非以太网端口不添加直接返回*/
	/* Don't allow bridging non-ethernet like devices */
	if ((dev->flags & IFF_LOOPBACK) ||
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
		return -EINVAL;
	/*端口本事是桥设备直接返回*/
	/* No bridging of bridges */
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
		return -ELOOP;

	/*端口已经添加到了其他网桥下面也直接返回*/
	/* Device is already being bridged */
	if (dev->br_port != NULL)
		return -EBUSY;

	/*不允许添加的网桥标志*/
	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

	/*新建一个端口*/
	p = new_nbp(br, dev);
	if (IS_ERR(p))
		return PTR_ERR(p);

	/*设置设备的为混杂模式*/
	err = dev_set_promiscuity(dev, 1);
	if (err)
		goto put_back;

	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
	if (err)
		goto err0;

	/*将端口添加到端口-MAC映射表中*/
	err = br_fdb_insert(br, p, dev->dev_addr);
	if (err)
		goto err1;
	/*将新增的端口p添加到sysfs文件中*/
	err = br_sysfs_addif(p);
	if (err)
		goto err2;

	rcu_assign_pointer(dev->br_port, p);
	dev_disable_lro(dev);

	/*将端口p添加到端口链表port_list中*/
	list_add_rcu(&p->list, &br->port_list);

	spin_lock_bh(&br->lock);
	br_stp_recalculate_bridge_id(br);
	br_features_recompute(br);

	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
	spin_unlock_bh(&br->lock);

	br_ifinfo_notify(RTM_NEWLINK, p);

	dev_set_mtu(br->dev, br_min_mtu(br));

	kobject_uevent(&p->kobj, KOBJ_ADD);

	br_netpoll_enable(br, dev);

	return 0;
err2:
	br_fdb_delete_by_port(br, p, 1);
err1:
	kobject_put(&p->kobj);
	p = NULL; /* kobject_put frees */
err0:
	dev_set_promiscuity(dev, -1);
put_back:
	dev_put(dev);
	kfree(p);
	return err;
}

 

GitHub 加速计划 / li / linux-dash
6
1
下载
A beautiful web dashboard for Linux
最近提交(Master分支:3 个月前 )
186a802e added ecosystem file for PM2 4 年前
5def40a3 Add host customization support for the NodeJS version 4 年前
Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐