Linux 的softirq機制是與SMP緊密不可分的。為此,整個softirq機制的設計與實現中自始自終都貫徹了一個思想:“誰觸發,誰執行”(Who marks,Who runs),也即觸發軟中斷的那個CPU負責執行它所觸發的軟中斷,而且每個CPU都由它自己的軟中斷觸發與控制機制。這個設計思想也使得softirq 機制充分利用了SMP系統的性能和特點。 多個softirq可以並行執行,甚至同一個softirq可以在多個processor上同時執行。

一、softirq的實現
     每個softirq在內核中通過struct softirq_action來表示,另外,通過全局屬組softirq_vec標識當前內核支持的所有的softirq。
/* softirq mask and active fields moved to irq_cpustat_t in
 * asm/hardirq.h to get better cache usage. KAO
 */

struct softirq_action
{
    void    (*action)(struct softirq_action *);
};

static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
Linux內核最多可以支持32個softirq(思考:為什麽是32個?),但當前只實現了10個,如下:
enum
{
    HI_SOFTIRQ=0,
    TIMER_SOFTIRQ,
    NET_TX_SOFTIRQ,
    NET_RX_SOFTIRQ,
    BLOCK_SOFTIRQ,
    BLOCK_IOPOLL_SOFTIRQ,
    TASKLET_SOFTIRQ,
    SCHED_SOFTIRQ,
    HRTIMER_SOFTIRQ,
    RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */

    NR_SOFTIRQS
};
二、softirq處理函數

    struct softirq_action結構體中,只有一個函數指針成員action,即指向用戶定義的softirq處理函數。當執行時,可以通過如下代碼:
                     softirq_vec[i]->action(i);
    一個註冊的softirq在執行之前必須被激活,術語稱為"raise the softirq"。被激活的softirq通常並不會立即執行,一般會在之後的某個時刻檢查當前系統中是否有被pending的softirq,如果有就去執行,Linux內核中檢查是否有softirq掛起的檢查點主要有以下三類:
(1)硬件中斷代碼返回的時候
/*
 * Exit an interrupt context. Process softirqs if needed and possible:
 */
void irq_exit(void)
{
    account_system_vtime(current);
    trace_hardirq_exit();
    sub_preempt_count(IRQ_EXIT_OFFSET);
    if (!in_interrupt() && local_softirq_pending())
        invoke_softirq();

    rcu_irq_exit();
#ifdef CONFIG_NO_HZ
    /* Make sure that timer wheel updates are propagated */
    if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
        tick_nohz_stop_sched_tick(0);
#endif
    preempt_enable_no_resched();
}
(2)ksoftirqd內核服務線程運行的時候
static int run_ksoftirqd(void * __bind_cpu)
{
    ... ...
        while (local_softirq_pending()) {
            /* Preempt disable stops cpu going offline.
             If already offline, we'll be on wrong CPU:
             don't process */
            if (cpu_is_offline((long)__bind_cpu))
                goto wait_to_die;
            do_softirq();
            preempt_enable_no_resched();
            cond_resched();
            preempt_disable();
            rcu_note_context_switch((long)__bind_cpu);
        }
        preempt_enable();
        set_current_state(TASK_INTERRUPTIBLE);
    }
    __set_current_state(TASK_RUNNING);
    return 0;
... ...
}
(3)在一些內核子系統中顯示的去檢查掛起的softirq
int netif_rx_ni(struct sk_buff *skb)
{
    int err;

    preempt_disable();
    err = netif_rx(skb);
    if (local_softirq_pending())
        do_softirq();
    preempt_enable();

    return err;
}
下面重點分析以下do_softirq(),了解Linux內核到底是怎麽來處理softirq的。
asmlinkage void do_softirq(void)
{
    unsigned long flags;
    struct thread_info *curctx;
    union irq_ctx *irqctx;
    u32 *isp;

    if (in_interrupt()) /*這個函數需要仔細理解???*/
        return;

    local_irq_save(flags);

    if (local_softirq_pending()) {
        curctx = current_thread_info();
        irqctx = __get_cpu_var(softirq_ctx);
        irqctx->tinfo.task = curctx->task;
        irqctx->tinfo.previous_esp = current_stack_pointer;

        /* build the stack frame on the softirq stack */
        isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));

        call_on_stack(__do_softirq, isp);
        /*
         * Shouldnt happen, we returned above if in_interrupt():
         */
        WARN_ON_ONCE(softirq_count());
    }

    local_irq_restore(flags);
}
do_softirq主要是完成了以下幾個功能:
(1)檢查當前processor上是否有pending的softirq
(2)如果有pending的softirq,為softirq的處理建立新的堆棧,即建立新的軟中斷上下文環境
(3)處理軟中斷__do_softirq
這裏需要重點分析一下in_interrupt()函數的含義。在linux內核中,為了方便判斷當前執行路徑在哪個上下文環境中,定義了幾個接口:
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
     | NMI_MASK))
/*
 * Are we doing bottom half or hardware interrupt processing?
 * Are we in a softirq context? Interrupt context?
 */
#define in_irq() (hardirq_count())
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
/*
 * Are we in NMI context?
 */
#define in_nmi() (preempt_count() & NMI_MASK)
從註釋可以看出包括:硬件中斷上下文,軟件中斷上下文,不可屏蔽上下文等。在這些宏中,都涉及到了preempt_count()這個宏,這個宏是一個比較重要的宏,在Linux源碼中對其做了詳細的註釋:
/*
 * We put the hardirq and softirq counter into the preemption
 * counter. The bitmask has the following meaning:
 *
 * - bits 0-7 are the preemption count (max preemption depth: 256)
 * - bits 8-15 are the softirq count (max # of softirqs: 256)
 *
 * The hardirq count can in theory reach the same as NR_IRQS.
 * In reality, the number of nested IRQS is limited to the stack
 * size as well. For archs with over 1000 IRQS it is not practical
 * to expect that they will all nest. We give a max of 10 bits for
 * hardirq nesting. An arch may choose to give less than 10 bits.
 * m68k expects it to be 8.
 *
 * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
 * - bit 26 is the NMI_MASK
 * - bit 28 is the PREEMPT_ACTIVE flag
 *
 * PREEMPT_MASK: 0x000000ff
 * SOFTIRQ_MASK: 0x0000ff00
 * HARDIRQ_MASK: 0x03ff0000
 * NMI_MASK: 0x04000000
 */
從註釋可以看出,preempt_count各個bit位的含義:
(1)bit0~7位表示搶占計數,即支持最大的搶占深度為256
(2)bit8~15位表示軟中斷計數,即支持最大的軟中斷的個數為256,需要註意的是,由於軟中斷還受制於pending狀態,一個32位的變量,因此實際最大只能支持32個軟中斷。
(3)bit16~25位表示硬件中斷嵌套層數,即最大可支持的嵌套層次為1024,實際情況下這是不可能的,因為中斷的嵌套層數還受制於中斷處理的棧空間的大小。
    介紹了這麽多,現在了重點分析下上面提到的in_interrupt到底表示什麽意思?
#define in_interrupt() (irq_count())

#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \

| NMI_MASK))
從其宏定義可以看出,in_interrupt宏的值是硬件中斷嵌套層數,軟中斷計數以及可屏蔽中斷三者之和。回到do_softirq的代碼中,如果in_interrupt的值大於0,就不會處理軟中斷,意思是當有硬件中斷嵌套,其他軟中斷以及不可屏蔽中斷的情況下,不會去處理軟中斷。對於中斷的嵌套層數以及不可屏蔽中斷是比較好理解的,對於軟中斷,應該去分析以下,在什麽地方軟中斷的計數會增加:
__local_bh_disable((unsigned long)__builtin_return_address(0));
static inline void __local_bh_disable(unsigned long ip)
{
    add_preempt_count(SOFTIRQ_OFFSET);
    barrier();
}
# define add_preempt_count(val)    do { preempt_count() += (val); } while (0)
從代碼可以看出,禁止中斷下半部分的函數會增加軟中斷的計數,即當有軟中斷的do_softirq在進行處理時,如果此時被硬件中斷打斷,而且在硬件中斷中又激活了優先級更高的軟中斷,當硬件中斷退出時,那麽當再去執行do_softirq時,此時in_interrupt > 0,豈不是死鎖了!!!希望大家指教。
實際的處理函數為__do_softirq:
asmlinkage void __do_softirq(void)
{
    struct softirq_action *h;
    __u32 pending;
    int max_restart = MAX_SOFTIRQ_RESTART; /*不啟動ksoftirqd之前,最大的處理softirq的次數,經驗值*/
    int cpu;
    /*取得當前被掛起的softirq,同時這裏也解釋了為什麽Linux內核最多支持32個softirq,因為pending只有32bit*/
    pending = local_softirq_pending();
    account_system_vtime(current);

    __local_bh_disable((unsigned long)__builtin_return_address(0));
    lockdep_softirq_enter();

    cpu = smp_processor_id();
restart:
    /* Reset the pending bitmask before enabling irqs */
    set_softirq_pending(0);/*獲取了pending的softirq之後,清空所有pending的softirq的標誌*/

    local_irq_enable();

    h = softirq_vec;

    do {
        if (pending & 1) { /*從最低位開始,循環右移逐位處理pending的softirq*/
            int prev_count = preempt_count();
            kstat_incr_softirqs_this_cpu(h - softirq_vec);

            trace_softirq_entry(h, softirq_vec);
            h->action(h); /*執行softirq的處理函數*/
            trace_softirq_exit(h, softirq_vec);
            if (unlikely(prev_count != preempt_count())) {
                printk(KERN_ERR "huh, entered softirq %td %s %p"
                 "with preempt_count %08x,"
                 " exited with %08x?\n", h - softirq_vec,
                 softirq_to_name[h - softirq_vec],
                 h->action, prev_count, preempt_count());
                preempt_count() = prev_count;
            }

            rcu_bh_qs(cpu);
        }
        h++;
        pending >>= 1;  /*循環右移*/
    } while (pending);

    local_irq_disable();

    pending = local_softirq_pending();
    if (pending && --max_restart)  /*啟動ksoftirqd的閾值*/
        goto restart;

    if (pending)  /*啟動ksoftirqd去處理softirq,此時說明pending的softirq比較多,比較頻繁,上面的處理過程中,又不斷有softirq被pending*/
        wakeup_softirqd();

    lockdep_softirq_exit();

    account_system_vtime(current);
    _local_bh_enable();

三、使用softirq
     softirq一般用在對實時性要求比較強的地方,當前的Linux內核中,只有兩個子系統直接使用了softirq:網絡子系統和塊設備子系統。另外,增加新的softirq需要重新編譯內核,因此,除非必須需要,最好考慮tasklet和kernel timer是否適合當前需要。
     如果必須需要使用softirq,那麽需要考慮的一個重要的問題就是新增加的softirq的優先級,默認情況下,softirq的數值越小優先級越高,根據實際經驗,新增加的softirq最好在BLOCK_SOFTIRQ和TASKLET_SOFTIRQ之間。
     softirq的處理函數通過open_softirq進行註冊,此函數接收兩個參數,一個是softirq的整數索引,另一個是該softirq對應的處理函數。例如在網絡子系統中,註冊了如下兩個softirq及其處理函數:
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
    前面提到,軟中斷處理函數註冊後,還需要將該軟中斷激活,此軟中斷才能被執行,激活操作是通過raise_softirq函數來實現,在網絡子系統中激活代碼如下:
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
                 struct napi_struct *napi)
{
    list_add_tail(&napi->poll_list, &sd->poll_list);
    __raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
這裏的__raise_softirq_irqoff和raise_softirq的區別是,前者在事先已經關中斷的情況下可以被使用,後者自己完成中斷的關閉和恢復。

arrow
arrow
    文章標籤
    Linux 源碼 開發 編程
    全站熱搜
    創作者介紹
    創作者 成功运行 的頭像
    成功运行

    成功运行的部落格

    成功运行 發表在 痞客邦 留言(0) 人氣()