0
点赞
收藏
分享

微信扫一扫

邻居子系统1.5 neigh output

老王420 2022-06-24 阅读 22

1.5.1

当邻居项不处于NUD_CONNECTD状态时,不允许快速路径发送报文,函数neigh_resolve_output 用于慢而安全的输出,通常用初始化neigh_ops结构

来实例output函数,当邻居从NUD_CONNECT转到非NUD_CONNECT的时候,使用neigh_suspect 将output设置为neigh_resolve_output ()

/* Neighbour state is suspicious;
disable fast path.

Called with write_locked neigh.
*/
static void neigh_suspect(struct neighbour *neigh)
{
NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);

neigh->output = neigh->ops->output;
}

/* Neighbour state is OK;
enable fast path.

Called with write_locked neigh.
*/
static void neigh_connect(struct neighbour *neigh)
{
NEIGH_PRINTK2("neigh %p is connected.\n", neigh);

neigh->output = neigh->ops->connected_output;
}
static const struct neigh_ops arp_generic_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
};

neigh_resolve_output:大概含义为:邻居项的输出设备支持hard_header_cache 同时二层首部没有建立

则为改路由缓存建立硬件首部缓存,然后再输出报文中添加改二层硬件首部.;否则直接在报文首部添加硬件首部

/* Slow and careful. */

int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
int rc = 0;

if (!dst)
goto discard;
/* 检测邻居项状态有效性 */
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
unsigned int seq;
///* 有二层头缓存函数,则缓存之 */
if (dev->header_ops->cache && !neigh->hh.hh_len)
neigh_hh_init(neigh, dst);

do {/* 填充二层头 */
__skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq));

if (err >= 0)//如果添加首部成功调用xmit 输出到网络设备
rc = dev_queue_xmit(skb);/* 数据包发送 */
else
goto out_kfree_skb;
}
out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
goto out;
}

 

static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
unsigned long now = jiffies;

if (neigh->used != now)
neigh->used = now;
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
return __neigh_event_send(neigh, skb);
return 0;
}
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
int rc;
bool immediate_probe = false;

write_lock_bh(&neigh->lock);

rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
/*
去掉NUD_CONNECT NUD_DELAY NUD_PROBE 状态
那么就只剩下 NUD_STALE NUD_INCOMPLETE NUD_NONE NUD_FAILD
*/
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { //NUD_NONE状态
if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
//如果允许发送广播请求或者应用程序发送请求解析neigh地址
unsigned long next, now = jiffies;

atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
next = now + max(neigh->parms->retrans_time, HZ/2);
neigh_add_timer(neigh, next); //启动定时器
immediate_probe = true; //发送arp 请求(ipv4) 请求邻居表项
} else {
neigh->nud_state = NUD_FAILED;//邻居无效 不能输出
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);

kfree_skb(skb);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
neigh->nud_state = NUD_DELAY;//转变为delay 状态
neigh->updated = jiffies;
neigh_add_timer(neigh,
jiffies + neigh->parms->delay_probe_time);
}

if (neigh->nud_state == NUD_INCOMPLETE) {//说明之前有报文发送
if (skb) {
while (neigh->arp_queue_len_bytes + skb->truesize >
neigh->parms->queue_len_bytes) {//如果请求报文已经满了,但还没有收到应答。
struct sk_buff *buff;//如果缓存队列还没有达到上限,则将报文加入到输出缓存队列中
//否者 丢弃队列中最早加入的报文然后加入队列
//但是返回值都是1 即 不能立即发送
buff = __skb_dequeue(&neigh->arp_queue);
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
kfree_skb(buff);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
__skb_queue_tail(&neigh->arp_queue, skb);
neigh->arp_queue_len_bytes += skb->truesize;
}
rc = 1;
}
out_unlock_bh:
if (immediate_probe)
neigh_probe(neigh); 发出邻居项请求 solict报文 (arp请求等)
else
write_unlock(&neigh->lock);
local_bh_enable();
return rc;
}

 

neigh_hh_init :缓存二层头,以eth为例:就是缓存二层mac

邻居子系统1.5 neigh output_sed邻居子系统1.5 neigh output_缓存_02

/**
* ether_setup - setup Ethernet network device
* @dev: network device
*
* Fill in the fields of the device structure with Ethernet-generic values.
*/
void ether_setup(struct net_device *dev)
{
dev->header_ops = &eth_header_ops;
dev->type = ARPHRD_ETHER;
dev->hard_header_len = ETH_HLEN;
dev->mtu = ETH_DATA_LEN;
dev->addr_len = ETH_ALEN;
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
dev->priv_flags |= IFF_TX_SKB_SHARING;

memset(dev->broadcast, 0xFF, ETH_ALEN);

}
const struct header_ops eth_header_ops ____cacheline_aligned = {
.create = eth_header,
.parse = eth_header_parse,
.rebuild = eth_rebuild_header,
.cache = eth_header_cache,
.cache_update = eth_header_cache_update,
};

/**
* eth_header_cache - fill cache entry from neighbour
* @neigh: source neighbour
* @hh: destination cache entry
* @type: Ethernet type field
*
* Create an Ethernet header template from the neighbour.
*/
int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
{
struct ethhdr *eth;
const struct net_device *dev = neigh->dev;

eth = (struct ethhdr *)
(((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));

if (type == htons(ETH_P_802_3))
return -1;

eth->h_proto = type;
memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
hh->hh_len = ETH_HLEN;
return 0;
}

/* called with read_lock_bh(&n->lock); */
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
__be16 prot = dst->ops->protocol;
struct hh_cache *hh = &n->hh;

write_lock_bh(&n->lock);

/* Only one thread can come in here and initialize the
* hh_cache entry.
*/
if (!hh->hh_len)
dev->header_ops->cache(n, hh, prot);

write_unlock_bh(&n->lock);
}
//根据代码可以看出 直接拷贝二层头

View Code

 其实在创建网卡虚拟接口ethx的时候, 以ixgbe驱动为例! ixgbe驱动加载后match 到设备info,执行ixgbe_probe,其会创建net_device,

netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);

 

其最后调用ether_setup  设置其dev->header_ops = &eth_header_ops;等函数 设置net_device等hook 函数,以及设置ethtool_ops 回调接口实现;

最后设置 netdev->netdev_ops = &ixgbe_netdev_ops; 设置 报文发送的驱动函数接口实现 ,比如ndo_start_xmit 的实现

快速发送:

//ip_finish_output2 中会调用dst_neigh_output  输出报文
static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
struct sk_buff *skb)
{
struct hh_cache *hh;

if (unlikely(dst->pending_confirm)) {
n->confirmed = jiffies;
dst->pending_confirm = 0;
}

hh = &n->hh;
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
return neigh_hh_output(hh, skb);//快速发出
else
return n->output(n, skb);// 慢速发出neigh_resolve_output
}
/* 拷贝缓存的二层头部,输出 */
static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
{
unsigned int seq;
unsigned int hh_len;

/* 拷贝二层头到skb */
do {
seq = read_seqbegin(&hh->hh_lock);
hh_len = hh->hh_len;
/* 二层头部<DATA_MOD,直接使用该长度拷贝 */
if (likely(hh_len <= HH_DATA_MOD)) {
/* this is inlined by gcc */
memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
}
/* >=DATA_MOD,对齐头部,拷贝 */
else {
unsigned int hh_alen = HH_DATA_ALIGN(hh_len);

memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
}
} while (read_seqretry(&hh->hh_lock, seq));

skb_push(skb, hh_len);

/* 发送 */
return dev_queue_xmit(skb);
}

 

neigh_hh_output-缓存输出,直接拷贝二层头部,然后输出;

neigh_connected_output-快速输出,用于连接状态的输出;需要重新构建二层头部,然后输出;

neigh_resolve_output-慢速输出,用于非连接状态的输出;需要对邻居项状态进行检查,然后重新构造二层头部,最后输出;

neigh_direct_output-直接输出,用于没有二层头部时的输出;

/* CONNECTED状态的发送函数,没有neigh_hh_output快,这个需要重建二层头 */
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct net_device *dev = neigh->dev;
unsigned int seq;
int err;

/* 拷贝二层头 */
do {
__skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq));

/* 发送数据包 */
if (err >= 0)
err = dev_queue_xmit(skb);
else {
err = -EINVAL;
kfree_skb(skb);
}
return err;
}

 

http代理服务器(3-4-7层代理)-网络事件库公共组件、内核kernel驱动 摄像头驱动 tcpip网络协议栈、netfilter、bridge 好像看过!!!! 但行好事 莫问前程 --身高体重180的胖子

举报

相关推荐

0 条评论