Linux 内核 schedule时的preemption notify机制

内核进行进程切换时,先调用了__schedule,在关抢占后调用context_switch:
static void __sched __schedule(void)
{
    struct task_struct *prev, *next;
    unsigned long *switch_count;
    struct rq *rq;
    int cpu;

need_resched:
    preempt_disable();
    cpu = smp_processor_id();
    rq = cpu_rq(cpu);
...
raw_spin_lock_irq(&rq->lock);
...

context_switch(rq, prev, next);

...

sched_preempt_enable_no_resched();
static inline void
context_switch(struct rq *rq, struct task_struct *prev,
           struct task_struct *next)
{
    struct mm_struct *mm, *oldmm;

    prepare_task_switch(rq, prev, next);

    mm = next->mm;
    oldmm = prev->active_mm;
…

    finish_task_switch(this_rq(), prev);
prepare_task_switch里面调用fire_sched_out_preempt_notifiers,进而调用prev进程注册的sched_out操作,
static inline void
prepare_task_switch(struct rq *rq, struct task_struct *prev,
            struct task_struct *next)
{
    trace_sched_switch(prev, next);
    sched_info_switch(prev, next);
    perf_event_task_sched_out(prev, next);
    fire_sched_out_preempt_notifiers(prev, next);
    prepare_lock_switch(rq, next);
    prepare_arch_switch(next);
}
这里是分支:如果是新创建的进程被调度了,要调用schedule_tail:
/**
* schedule_tail - first thing a freshly forked thread must call.
* @prev: the thread we just switched away from.
*/
asmlinkage void schedule_tail(struct task_struct *prev)
    __releases(rq->lock)
{
    struct rq *rq = this_rq();

    finish_task_switch(rq, prev);

    /*
     * FIXME: do we need to worry about rq being invalidated by the
     * task_switch?
     */
    post_schedule(rq);

#ifdef __ARCH_WANT_UNLOCKED_CTXSW
    /* In this case, finish_task_switch does not reenable preemption */
    preempt_enable();
#endif
    if (current->set_child_tid)
        put_user(task_pid_vnr(current), current->set_child_tid);
}
回到主题:finish_task_switch这里会调用fire_sched_in_preempt_notifiers:
static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
{
    struct preempt_notifier *notifier;
 
    hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
        notifier->ops->sched_in(notifier, raw_smp_processor_id());
}
preempt_notifiers在哪里注册的呢,对kvm来说是这里:
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
{
    int r;
    struct kvm_vcpu *vcpu, *v;
 
    if (id >= KVM_MAX_VCPUS)
        return -EINVAL;
 
    vcpu = kvm_arch_vcpu_create(kvm, id);
    if (IS_ERR(vcpu))
        return PTR_ERR(vcpu);
 
    preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
 
    r = kvm_arch_vcpu_setup(vcpu);

kvm模块加载的时候vmx_init->kvm_init里面初始化了kvm_preempt_ops

    kvm_preempt_ops.sched_in = kvm_sched_in;
    kvm_preempt_ops.sched_out = kvm_sched_out;
所以kvm_sched_in和kvm_sched_out被调用时的上下文是关抢占的

Leave a Reply

Your email address will not be published. Required fields are marked *