e100网卡驱动分析

e100网卡驱动
在模块的初始化函数中调用pci_register_driver(&e100_driver)向内核注册e100驱动
2911 static struct pci_driver e100_driver = {
2912     .name =         DRV_NAME,
2913     .id_table =     e100_id_table,
2914     .probe =        e100_probe,
2915     .remove =       __devexit_p(e100_remove),
2916 #ifdef CONFIG_PM
2917     /* Power Management hooks */
2918     .suspend =      e100_suspend,
2919     .resume =       e100_resume,
2920 #endif
2921     .shutdown =     e100_shutdown,
2922     .err_handler = &e100_err_handler,
2923 };
内核检测到网卡后(vid,pid在idtable中),会调用e100_probe函数,probe函数创建netdevice结构和e100私有的nic结构,将其初始化,并通过register_netdev注册对应的netdevice结构,具体流程如下:
//为其分配内存
netdev = alloc_etherdev(sizeof(struct nic))

//初始化对应的函数指针
2627     netdev->open = e100_open;
2628     netdev->stop = e100_close;
2629     netdev->hard_start_xmit = e100_xmit_frame;
2630     netdev->set_multicast_list = e100_set_multicast_list;
2631     netdev->set_mac_address = e100_set_mac_address;
2632     netdev->change_mtu = e100_change_mtu;
2633     netdev->do_ioctl = e100_do_ioctl;
2634     SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
2635     netdev->tx_timeout = e100_tx_timeout;
2636     netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;

//napi支持,分配napi结构,初始化,并挂在netdevice的napi_list上
2642     nic = netdev_priv(netdev);
2643     netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);

//nic的初始化
2702     init_timer(&nic->watchdog);
2703     nic->watchdog.function = e100_watchdog;
2704     nic->watchdog.data = (unsigned long)nic;
2705     init_timer(&nic->blink_timer);
2706     nic->blink_timer.function = e100_blink_led;
2707     nic->blink_timer.data = (unsigned long)nic;
2708 
2709     INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);

//向内核注册
register_netdev(netdev)

当驱动激活的时候调用e100_open()函数
//初始化和分配rxs数据接收链表
2124     if((err = e100_rx_alloc_list(nic)))
2125         return err;
//分配和初始化cbs命令链表,数据的发送通过这个
2126     if((err = e100_alloc_cbs(nic)))
2127         goto err_rx_clean_list;
2128     if((err = e100_hw_init(nic)))
2129         goto err_clean_cbs;
2130     e100_set_multicast_list(nic->netdev);
2131     e100_start_receiver(nic, NULL);
2132     mod_timer(&nic->watchdog, jiffies);
2133     if((err = request_irq(nic->pdev->irq, e100_intr, IRQF_SHARED,
2134         nic->netdev->name, nic->netdev)))
2135         goto err_no_irq;
2136     netif_wake_queue(nic->netdev);
2137     napi_enable(&nic->napi);
数据的发送
static int e100_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1637 {
1650     err = e100_exec_cb(nic, skb, e100_xmit_prepare);
1651     
1652     switch(err) {
1653     case -ENOSPC:
1654         /* We queued the skb, but now we're out of space. */
1655         DPRINTK(TX_ERR, DEBUG, "No space for CB\n");
1656         netif_stop_queue(netdev);
1657         break;
1658     case -ENOMEM:
1659         /* This is a hard error - log it. */
1660         DPRINTK(TX_ERR, DEBUG, "Out of Tx resources, returning skb\n");
1661         netif_stop_queue(netdev);
1662         return 1;
1663     }
1664 
1665     netdev->trans_start = jiffies;
1666     return 0;
1667 }
通过e100_exec_cb()发送数据,并对发送的错误进行处理,如果发送失败,并net_device的发送队列设置相应的标志,参见netif_stop_queue。
其中 e100_xmit_prepare对cb作一些设置,e100_exec_cb才发送数据.
1619 static void e100_xmit_prepare(struct nic *nic, struct cb *cb,
1620     struct sk_buff *skb)
1621 {
1622     cb->command = nic->tx_command;
1623     /* interrupt every 16 packets regardless of delay */
1624     if((nic->cbs_avail & ~15) == nic->cbs_avail)
1625         cb->command |= cpu_to_le16(cb_i);
1626     cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd);
1627     cb->u.tcb.tcb_byte_count = 0;
1628     cb->u.tcb.threshold = nic->tx_threshold;
1629     cb->u.tcb.tbd_count = 1;
1630     cb->u.tcb.tbd.buf_addr = cpu_to_le32(pci_map_single(nic->pdev,
1631         skb->data, skb->len, PCI_DMA_TODEVICE));
1632     /* check for mapping failure? */
1633     cb->u.tcb.tbd.size = cpu_to_le16(skb->len);
1634 }

e100_exec_cb()则对nic上的cb链表进行处理
 837 static int e100_exec_cb(struct nic *nic, struct sk_buff *skb,
 838     void (*cb_prepare)(struct nic *, struct cb *, struct sk_buff *))
 839 {
//使用一个空闲cb结构
 851     cb = nic->cb_to_use;
 852     nic->cb_to_use = cb->next;
 853     nic->cbs_avail--;
 854     cb->skb = skb;
 //调用e100_xmit_prepare对cb进行设置
 859     cb_prepare(nic, cb, skb);

//对链表上的cb进行处理
 867     while(nic->cb_to_send != nic->cb_to_use) {
 868         if(unlikely(e100_exec_cmd(nic, nic->cuc_cmd,
 869             nic->cb_to_send->dma_addr))) {
 870             /* Ok, here's where things get sticky.  It's
 871              * possible that we can't schedule the command
 872              * because the controller is too busy, so
 873              * let's just queue the command and try again
 874              * when another command is scheduled. */
 875             if(err == -ENOSPC) {
 876                 //request a reset
 877                 schedule_work(&nic->tx_timeout_task);
 878             }
 879             break;
 880         } else {
 881             nic->cuc_cmd = cuc_resume;
 882             nic->cb_to_send = nic->cb_to_send->next;
 883         }
 884     }

e100_exec_cmd执行具体的操作,到这儿,哥就不懂了,有很多硬件相关的东西。有那些cmd?dma起了什么作用?


数据的接受
注册的中断函数  e100_intr,经典的中断处理例程
2032 static irqreturn_t e100_intr(int irq, void *dev_id)
2033 {
2034     struct net_device *netdev = dev_id;
2035     struct nic *nic = netdev_priv(netdev);
//读取寄存器,判断中断是我们产生的还是其他设备产生的
2036     u8 stat_ack = ioread8(&nic->csr->scb.stat_ack);
2037 
2038     DPRINTK(INTR, DEBUG, "stat_ack = 0x%02X\n", stat_ack);
2039 
2040     if(stat_ack == stat_ack_not_ours || /* Not our interrupt */
2041        stat_ack == stat_ack_not_present)    /* Hardware is ejected */
2042         return IRQ_NONE;
2043 
//ack 我们的设备
2044     /* Ack interrupt(s) */
2045     iowrite8(stat_ack, &nic->csr->scb.stat_ack);
2046 
2047     /* We hit Receive No Resource (RNR); restart RU after cleaning */
2048     if(stat_ack & stat_ack_rnr)
2049         nic->ru_running = RU_SUSPENDED;
2050
//把设备的poll放到softnet_data的poll_list上 
2051     if(likely(netif_rx_schedule_prep(netdev, &nic->napi))) {
2052         e100_disable_irq(nic);
2053         __netif_rx_schedule(netdev, &nic->napi);
2054     }
2055 
2056     return IRQ_HANDLED;
2057 }
数据的接收用的是napi方法,netif_rx_schedule()最终会调用napi_schedule把设备的poll方法挂在poll_list上。
2359 void __napi_schedule(struct napi_struct *n)
2360 {                    
2361     unsigned long flags;
2362     
2363     local_irq_save(flags);
2364     list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2365     __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2366     local_irq_restore(flags);
2367 }
设备的poll方法
2059 static int e100_poll(struct napi_struct *napi, int budget)
2060 {
2061     struct nic *nic = container_of(napi, struct nic, napi);
2062     struct net_device *netdev = nic->netdev;
2063     unsigned int work_done = 0;
2064 
2065     e100_rx_clean(nic, &work_done, budget);
2066     e100_tx_clean(nic);
2067 
2068     /* If budget not fully consumed, exit the polling mode */
2069     if (work_done < budget) {
2070         netif_rx_complete(netdev, napi);
2071         e100_enable_irq(nic);
2072     }
2073 
2074     return work_done;
2075 }
最终调用netif_recive_skb将数据包提交到上层。具体到硬件还是不太懂啊.:(

内核对数据的接收
有两种模式中断和NAPI。
先说核心的数据结构
 996 /*
 997  * Incoming packets are placed on per-cpu queues so that
 998  * no locking is needed.
 999  */
1000 struct softnet_data
1001 {
1002     struct Qdisc        *output_queue;
1003     struct sk_buff_head input_pkt_queue;
1004     struct list_head    poll_list;
1005     struct sk_buff      *completion_queue;
1006 
1007     struct napi_struct  backlog;
1008 #ifdef CONFIG_NET_DMA
1009     struct dma_chan     *net_dma;
1010 #endif
1011 };
正如注释所说,每个CPU都有一个,当涉及它的操作时,先关中断,所以不需要锁的保护
普通中断模式下,中断通过netif_rx把数据提交给input_pkt_queue,如果backlog没有添加到poll_list中,添加之。
而在NAPI下只是调用netif_rx_schedule判断是否需要把设备的poll方法添加到poll_list中.
添加后raise_softirq(NET_RX_ACTION),调用net_rx_action软中断进行后续的处理;
net_rx_action遍历poll_list链表,调用其poll方法,对非NAPI设备来说就是通过process_backlog通用的poll方法把input_pkt_queue上的数据提交到上层,而对NAPI来说调用自身的poll方法。其中包含流量控制和对poll调用次数的限制。
process_backlog通过netif_receive_skb处理桥接,数据包监听,最后ip_rcv传给ip层
以e100_poll为例,其遍历rxs接收数据链表,对缓冲区的数据作unmap操作,然后调用netif_receive_skb传给上层。但调用napi的poll函数时在哪儿禁用对应设备的中断?

数据的发送:
两种方式:
1,dev_queue_xmit,会经过内核的流量控制子系统qdisc_run,最红调用hard_start_xmit
2,直接调用hard_start_xmit()发送数据
net_tx_action()作用
1,释放完成队列的内存,2,调用qdisc_run对output队列上的数据

作者: sandflee   发布时间: 2010-10-15