返回列表 发帖

Linux子系统之进程研究

Linux子系统之进程研究

Linux子系统之进程研究

mcuos.com站长osboy 原创


1.
进程与线程定义进程:
进程是程序执行时的一个实例。

进程创建的时候,几乎与父进程相同,与父进程有相同的程序代码,但是有各自独立的数据拷贝堆和栈。子进程对一个内存单元的修改,父进程是不可见的,反之亦然。

多线程应用程序:
拥有许多执行流的用户程序,共享应用程序的大部分数据结构。一个进程由几个用户线程组成,每个线程都代表进程的一个执行流。

Linux早起版本的多线程支持:
多线程程序的多个执行流的创建,处理和调度都是在用户层完成的,从内核的观点来看,多线程组成的程序仅仅是一个普通的进程。Linux内核对其的调度,会影响每一个组成的线程。所以不能实现一个线程阻塞,而另一个线程执行其他的事情,因为内核对进程的调度会影响组成该进程的所有线程。

Linux轻量级进程:
多个轻量级进程可以共享一些资源,但是又可以由内核独立调度,一个睡眠另一个是可以运行的。轻量级进程和线程关联起来就是线程的概念。

线程组:
实现了多线程应用的一组轻量级进程。
2.
进程描述符包含了一个进程相关的所有信息。用它来管理进程,内核必须对每个进程所做的事情进行清楚的描述。
Include/linux/sched.h中有:
struct task_struct {

volatile long state;
/* -1 unrunnable, 0 runnable, >0 stopped */

注释:进程当前所处的状态
#define TASK_RUNNING
0 //
要么在cpu上运行,要么准备运行
#define TASK_INTERRUPTIBLE
1 //
进程被挂起,直到硬件中断释放进程等待的资源,或者产生一个信号都可以把进程状态设置为TASK_RUNNING
#define TASK_UNINTERRUPTIBLE
2 //
同上,但是信号不能唤醒他。


#define __TASK_STOPPED
4//
进程的执行被暂停。
#define __TASK_TRACED
8//
进程的执行被debugger暂停。


void *stack;


atomic_t usage;


unsigned int flags;
/* per process flags, defined below */


unsigned int ptrace;



int lock_depth;
/* BKL lock depth */


#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW

int oncpu;

#endif
#endif


int prio, static_prio, normal_prio;


unsigned int rt_priority;


const struct sched_class *sched_class;


struct sched_entity se;


struct sched_rt_entity rt;


#ifdef CONFIG_PREEMPT_NOTIFIERS

/* list of struct preempt_notifier: */


struct hlist_head preempt_notifiers;

#endif


/*



* fpu_counter contains the number of consecutive context switches



* that the FPU is used. If this is over a threshold, the lazy fpu



* saving becomes unlazy to save the trap. This is an unsigned char



* so that after 256 times the counter wraps and the behavior turns



* lazy again; this to deal with bursty apps that only use FPU for



* a short time



*/


unsigned char fpu_counter;

#ifdef CONFIG_BLK_DEV_IO_TRACE

unsigned int btrace_seq;

#endif


unsigned int policy;


cpumask_t cpus_allowed;


#ifdef CONFIG_TREE_PREEMPT_RCU

int rcu_read_lock_nesting;


char rcu_read_unlock_special;


struct rcu_node *rcu_blocked_node;


struct list_head rcu_node_entry;

#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)

struct sched_info sched_info;

#endif


struct list_head tasks;//
进程链表,链接所有进程的描述符。

struct plist_node pushable_tasks;



struct mm_struct *mm, *active_mm;

#if defined(SPLIT_RSS_COUNTING)

struct task_rss_stat
rss_stat;

#endif
/* task state */

int exit_state;

注释:
/* in tsk->exit_state */
#define EXIT_ZOMBIE
16 //
进程僵死状态。进程的执行被禁止,但是还没发布wait4()或者waitpid()系统调用来返回有关死亡进程的信息。在调用wait之前内核不能丢弃包含在死进程描述符数据,因为父进程可能需要它。
#define EXIT_DEAD
32 //
进程僵死撤销状态。执行了wait后,把进程从EXIT_ZOMBIE状态变为EXIT_DEAD状态。

设置任务的运行状态的linux API
#define __set_task_state(tsk, state_value)
\


do { (tsk)->state = (state_value); } while (0)

#define set_task_state(tsk, state_value)
\//
严格顺序执行,内存屏蔽,防止执行顺序被优化。

set_mb((tsk)->state, (state_value))


#define __set_current_state(state_value)
\


do { current->state = (state_value); } while (0)

#define set_current_state(state_value)
\


set_mb(current->state, (state_value))



int exit_code, exit_signal;


int pdeath_signal;
/*
The signal sent when the parent dies
*/


/* ??? */


unsigned int personality;


unsigned did_exec:1;


unsigned in_execve:1;
/* Tell the LSMs that the process is doing an



* execve */


unsigned in_iowait:1;




/* Revert to default priority/policy when forking */


unsigned sched_reset_on_fork:1;



pid_t pid;//
标识一个线程,pid最大为32768,一个页面正好是4k字节,共32768bits所以linuxpidmap-array位图来表示当前已分配或者闲置的PID号,pidmap-array位图存放在一个页面上。

pid_t tgid;//
线程组的pid

#ifdef CONFIG_CC_STACKPROTECTOR

/* Canary value for the -fstack-protector gcc feature */


unsigned long stack_canary;

#endif


/*



* pointers to (original) parent process, youngest child, younger sibling,



* older sibling, respectively.
(p->father can be replaced with



* p->real_parent->pid)



*/


struct task_struct *real_parent; /* real parent process */


struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */


/*



* children/sibling forms the list of my natural children



*/


struct list_head children;
/* list of my children */


struct list_head sibling;
/* linkage in my parent's children list */


struct task_struct *group_leader;
/* threadgroup leader */



/*



* ptraced is the list of tasks this task is using ptrace on.



* This includes both natural children and PTRACE_ATTACH targets.



* p->ptrace_entry is p's link on the p->parent->ptraced list.



*/


struct list_head ptraced;


struct list_head ptrace_entry;



/* PID/PID hash table linkage. */


struct pid_link pids[PIDTYPE_MAX];


struct list_head thread_group;



struct completion *vfork_done;
/* for vfork() */


int __user *set_child_tid;
/* CLONE_CHILD_SETTID */


int __user *clear_child_tid;
/* CLONE_CHILD_CLEARTID */



cputime_t utime, stime, utimescaled, stimescaled;


cputime_t gtime;

#ifndef CONFIG_VIRT_CPU_ACCOUNTING

cputime_t prev_utime, prev_stime;

#endif

unsigned long nvcsw, nivcsw; /* context switch counts */


struct timespec start_time;
/* monotonic time */


struct timespec real_start_time;
/* boot based time */

/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */

unsigned long min_flt, maj_flt;



struct task_cputime cputime_expires;


struct list_head cpu_timers[3];


/* process credentials */

const struct cred *real_cred;
/* objective and real subjective task



* credentials (COW) */


const struct cred *cred;
/* effective (overridable) subjective task



* credentials (COW) */


struct mutex cred_guard_mutex;
/* guard against foreign influences on



* credential calculations



* (notably. ptrace) */


struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */



char comm[TASK_COMM_LEN]; /* executable name excluding path



- access with [gs]et_task_comm (which lock



it with task_lock())



- initialized normally by setup_new_exec */

/* file system info */

int link_count, total_link_count;

#ifdef CONFIG_SYSVIPC
/* ipc stuff */

struct sysv_sem sysvsem;

#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */

unsigned long last_switch_count;

#endif
/* CPU-specific state of this task */

struct thread_struct thread;

/* filesystem information */

struct fs_struct *fs;

/* open file information */

struct files_struct *files;

/* namespaces */

struct nsproxy *nsproxy;

/* signal handlers */

struct signal_struct *signal;


struct sighand_struct *sighand;



sigset_t blocked, real_blocked;


sigset_t saved_sigmask;
/* restored if set_restore_sigmask() was used */


struct sigpending pending;



unsigned long sas_ss_sp;


size_t sas_ss_size;


int (*notifier)(void *priv);


void *notifier_data;


sigset_t *notifier_mask;


struct audit_context *audit_context;

#ifdef CONFIG_AUDITSYSCALL

uid_t loginuid;


unsigned int sessionid;

#endif

seccomp_t seccomp;


/* Thread group tracking */


u32 parent_exec_id;



u32 self_exec_id;

/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,

* mempolicy */


spinlock_t alloc_lock;


#ifdef CONFIG_GENERIC_HARDIRQS

/* IRQ handler threads */


struct irqaction *irqaction;

#endif


/* Protection of the PI data structures: */


raw_spinlock_t pi_lock;


#ifdef CONFIG_RT_MUTEXES

/* PI waiters blocked on a rt_mutex held by this task */


struct plist_head pi_waiters;


/* Deadlock detection and priority inheritance handling */


struct rt_mutex_waiter *pi_blocked_on;

#endif

#ifdef CONFIG_DEBUG_MUTEXES

/* mutex deadlock detection */


struct mutex_waiter *blocked_on;

#endif
#ifdef CONFIG_TRACE_IRQFLAGS

unsigned int irq_events;


unsigned long hardirq_enable_ip;


unsigned long hardirq_disable_ip;


unsigned int hardirq_enable_event;


unsigned int hardirq_disable_event;


int hardirqs_enabled;


int hardirq_context;


unsigned long softirq_disable_ip;


unsigned long softirq_enable_ip;


unsigned int softirq_disable_event;


unsigned int softirq_enable_event;


int softirqs_enabled;


int softirq_context;

#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL

u64 curr_chain_key;


int lockdep_depth;


unsigned int lockdep_recursion;


struct held_lock held_locks[MAX_LOCK_DEPTH];


gfp_t lockdep_reclaim_gfp;

#endif

/* journalling filesystem info */

void *journal_info;


/* stacked block device info */

struct bio_list *bio_list;


/* VM state */

struct reclaim_state *reclaim_state;



struct backing_dev_info *backing_dev_info;



struct io_context *io_context;



unsigned long ptrace_message;


siginfo_t *last_siginfo; /* For ptrace use.
*/


struct task_io_accounting ioac;

#if defined(CONFIG_TASK_XACCT)

u64 acct_rss_mem1;
/* accumulated rss usage */


u64 acct_vm_mem1;
/* accumulated virtual memory usage */


cputime_t acct_timexpd;
/* stime + utime since last update */

#endif
#ifdef CONFIG_CPUSETS

nodemask_t mems_allowed;
/* Protected by alloc_lock */


int mems_allowed_change_disable;


int cpuset_mem_spread_rotor;


int cpuset_slab_spread_rotor;

#endif
#ifdef CONFIG_CGROUPS

/* Control Group info protected by css_set_lock */


struct css_set *cgroups;


/* cg_list protected by css_set_lock and tsk->alloc_lock */


struct list_head cg_list;

#endif
#ifdef CONFIG_FUTEX

struct robust_list_head __user *robust_list;

#ifdef CONFIG_COMPAT

struct compat_robust_list_head __user *compat_robust_list;

#endif

struct list_head pi_state_list;


struct futex_pi_state *pi_state_cache;

#endif
#ifdef CONFIG_PERF_EVENTS

struct perf_event_context *perf_event_ctxp;


struct mutex perf_event_mutex;


struct list_head perf_event_list;

#endif
#ifdef CONFIG_NUMA

struct mempolicy *mempolicy;
/* Protected by alloc_lock */


short il_next;

#endif

atomic_t fs_excl;
/* holding fs exclusive resources */


struct rcu_head rcu;



/*



* cache last used pipe for splice



*/


struct pipe_inode_info *splice_pipe;

#ifdef
CONFIG_TASK_DELAY_ACCT


struct task_delay_info *delays;

#endif
#ifdef CONFIG_FAULT_INJECTION

int make_it_fail;

#endif

struct prop_local_single dirties;

#ifdef CONFIG_LATENCYTOP

int latency_record_count;


struct latency_record latency_record[LT_SAVECOUNT];

#endif

/*



* time slack values; these are used to round up poll() and



* select() etc timeout values. These are in nanoseconds.



*/


unsigned long timer_slack_ns;


unsigned long default_timer_slack_ns;



struct list_head
*scm_work_list;

#ifdef CONFIG_FUNCTION_GRAPH_TRACER

/* Index of current stored address in ret_stack */


int curr_ret_stack;


/* Stack of return addresses for return function tracing */


struct ftrace_ret_stack
*ret_stack;


/* time stamp for last schedule */


unsigned long long ftrace_timestamp;


/*



* Number of functions that haven't been traced



* because of depth overrun.



*/


atomic_t trace_overrun;


/* Pause for the tracing */


atomic_t tracing_graph_pause;

#endif
#ifdef CONFIG_TRACING

/* state flags for use by tracers */


unsigned long trace;


/* bitmask of trace recursion */


unsigned long trace_recursion;

#endif /* CONFIG_TRACING */
#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */

struct memcg_batch_info {


int do_batch;
/* incremented when batch uncharge started */


struct mem_cgroup *memcg; /* target memcg of uncharge */


unsigned long bytes;
/* uncharged usage */


unsigned long memsw_bytes; /* uncharged mem+swap usage */


} memcg_batch;

#endif
};

3.
进程描述符处理Linux把两个不同的数据结构紧凑的放在一个单独为进程分配的存储区域内。一个是内核态的进程堆栈,另一个是线程描述符thread_info结构。

Include/linux/sched.h

union thread_union {

struct thread_info thread_info;//
线程描述符

unsigned long stack[THREAD_SIZE/sizeof(long)];//
内核态的进程堆栈
};

struct thread_info {

unsigned long
flags;
/* low level flags */


int
preempt_count;
/* 0 => preemptable, <0 => bug */


mm_segment_t
addr_limit;
/* address limit */


struct task_struct
*task;

/* main task structure */


struct exec_domain
*exec_domain;
/* execution domain */


__u32
cpu;
/* cpu */


__u32
cpu_domain;
/* cpu domain */


struct cpu_context_save
cpu_context;
/* cpu context */


__u32
syscall;
/* syscall number */


__u8
used_cp[16];
/* thread used copro */


unsigned long
tp_value;


struct crunch_state
crunchstate;


union fp_state
fpstate __attribute__((aligned(8)));


union vfp_state
vfpstate;

#ifdef CONFIG_ARM_THUMBEE

unsigned long
thumbee_state;
/* ThumbEE Handler Base register */

#endif

struct restart_block
restart_block;

};

该段存储区域的示意图为:



#define THREAD_SIZE
8192

Armsp寄存器是CPU的栈指针,用来存放栈顶单元的地址。
从用户态刚刚切换到内核态的时候,进程的内核栈总是空的,栈起始与这段内存区的末端,并朝开始的方向增长。如何通过当前堆栈的sp的值获得当前这段内存区的起始地址,见下面的代码:

static inline struct thread_info *current_thread_info(void)
{

register unsigned long sp asm ("sp");


return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));

}

通过上面的代码很容易通过sp获得thread_info在内存中的起始地址。
进程常用的是进程描述符地址而不是thread_info,所以为了获得在当前CPU上运行的描述符指针,可以使用:
linuxcurrent.h中有这段code

static inline struct task_struct *get_current(void)
{

return current_thread_info()->task;

}

#define current (get_current())

所以仅仅通过检查内核栈,就能够获得当前正确的进程。
4.
内核双向链表进程链表:
#define next_task(p) \

list_entry_rcu((p)->tasks.next, struct task_struct, tasks)


#define for_each_process(p) \

for (p = &init_task ; (p = next_task(p)) != &init_task ; )


进程的链表把所有的进程的描述符链接起来。

这里init_task是进程0,他是所有进程的祖先,也叫Idle进程或者swapper进程,他是在linux初始化阶段从无到有创建的一个内核线程,他使用如下静态分配的数据结构:

/*

*
INIT_TASK is used to set up the first task table, touch at


* your own risk!. Base=0, limit=0x1fffff (=2MB)


*/

#define INIT_TASK(tsk)
\

{
\


.state
= 0,
\


.stack
= &init_thread_info,
\


.usage
= ATOMIC_INIT(2),
\


.flags
= PF_KTHREAD,
\


.lock_depth
= -1,
\


.prio
= MAX_PRIO-20,
\


.static_prio
= MAX_PRIO-20,
\


.normal_prio
= MAX_PRIO-20,
\


.policy
= SCHED_NORMAL,
\


.cpus_allowed
= CPU_MASK_ALL,
\


.mm
= NULL,
\


.active_mm
= &init_mm,
\


.se
= {
\


.group_node
= LIST_HEAD_INIT(tsk.se.group_node),
\


},
\


.rt
= {
\


.run_list
= LIST_HEAD_INIT(tsk.rt.run_list),
\


.time_slice
= HZ,
\


.nr_cpus_allowed = NR_CPUS,
\


},
\


.tasks
= LIST_HEAD_INIT(tsk.tasks),
\


.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \


.ptraced
= LIST_HEAD_INIT(tsk.ptraced),
\


.ptrace_entry
= LIST_HEAD_INIT(tsk.ptrace_entry),
\


.real_parent
= &tsk,
\


.parent
= &tsk,
\


.children
= LIST_HEAD_INIT(tsk.children),
\


.sibling
= LIST_HEAD_INIT(tsk.sibling),
\


.group_leader
= &tsk,
\


.real_cred
= &init_cred,
\


.cred
= &init_cred,

\


.cred_guard_mutex =
\



__MUTEX_INITIALIZER(tsk.cred_guard_mutex),
\


.comm
= "swapper",
\


.thread
= INIT_THREAD,
\


.fs
= &init_fs,
\


.files
= &init_files,
\


.signal
= &init_signals,
\


.sighand
= &init_sighand,

\


.nsproxy
= &init_nsproxy,
\


.pending
= {
\


.list = LIST_HEAD_INIT(tsk.pending.list),
\


.signal = {{0}}},
\


.blocked
= {{0}},
\


.alloc_lock
= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),
\


.journal_info
= NULL,
\


.cpu_timers
= INIT_CPU_TIMERS(tsk.cpu_timers),
\


.fs_excl
= ATOMIC_INIT(0),
\


.pi_lock
= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),
\


.timer_slack_ns = 50000, /* 50 usec default slack */
\


.pids = {
\


[PIDTYPE_PID]
= INIT_PID_LINK(PIDTYPE_PID),
\


[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),
\


[PIDTYPE_SID]
= INIT_PID_LINK(PIDTYPE_SID),
\


},
\


.thread_group
= LIST_HEAD_INIT(tsk.thread_group),
\


.dirties = INIT_PROP_LOCAL_SINGLE(dirties),
\


INIT_IDS
\


INIT_PERF_EVENTS(tsk)
\


INIT_TRACE_IRQFLAGS
\


INIT_LOCKDEP
\


INIT_FTRACE_GRAPH
\


INIT_TRACE_RECURSION
\


INIT_TASK_RCU_PREEMPT(tsk)
\

}


static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
/*

* Initial thread structure.


*


* We need to make sure that this is 8192-byte aligned due to the


* way process stacks are handled. This is done by making sure


* the linker maps this in the .text segment right after head.S,


* and making head.S ensure the proper alignment.


*


* The things we do for performance..


*/

union thread_union init_thread_union __init_task_data =

{ INIT_THREAD_INFO(init_task) };


/*

* Initial task structure.


*


* All other task structs will be allocated on slabs in fork.c


*/

struct task_struct init_task = INIT_TASK(init_task);

#define init_thread_info
(init_thread_union.thread_info)


以上就是进程0的创建。

进程1的概念就是在start_kernel里面有一个:
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
调用。

static int __init kernel_init(void * unused)
{

/*



* Wait until kthreadd is all set-up.



*/


wait_for_completion(&kthreadd_done);


lock_kernel();



/*



* init can allocate pages on any node



*/


set_mems_allowed(node_states[N_HIGH_MEMORY]);


/*



* init can run on any cpu.



*/


set_cpus_allowed_ptr(current, cpu_all_mask);


/*



* Tell the world that we're going to be the grim



* reaper of innocent orphaned children.



*



* We don't want people to have to make incorrect



* assumptions about where in the task array this



* can be found.



*/


init_pid_ns.child_reaper = current;



cad_pid = task_pid(current);



smp_prepare_cpus(setup_max_cpus);



do_pre_smp_initcalls();


start_boot_trace();



smp_init();


sched_init_smp();



do_basic_setup();



/* Open the /dev/console on the rootfs, this should never fail */


if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)


printk(KERN_WARNING "Warning: unable to open an initial console.\n");



(void) sys_dup(0);


(void) sys_dup(0);


/*



* check if there is an early userspace init.
If yes, let it do all



* the work



*/



if (!ramdisk_execute_command)


ramdisk_execute_command = "/init";



if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {


ramdisk_execute_command = NULL;


prepare_namespace();


}



/*



* Ok, we have completed the initial bootup, and



* we're essentially up and running. Get rid of the



* initmem segments and start the user-mode stuff..



*/



init_post();


return 0;

}
这个函数就是进程1要执行的程序片段,继续完成内核初始化。

在调用了:

run_init_process("/sbin/init");


run_init_process("/etc/init");


run_init_process("/bin/init");


run_init_process("/bin/sh");

程序片段后,装载第一个可执行应用程序init后,内核线程变成为一个普通的应用进程。

内核的其他线程:

Keventd
kapmd :处理与高级电源管理相关的事件
kswapd:执行内存回收
其他请参见深入理解linux内核书。
5.
进程间的关系程序创建进程具有父子关系,如果一个进程创建多个子进程,则子进程之间具有兄弟关系。
假设有一个进程p,则他的task_struct中有如下字段:

Real_parent:指向了创建了进程p的父进程的描述符。

Parent :指向了p的当前的父进程。

Children :链表的头部,链表中所有元素都是p创建的子进程。

Sibling :指向兄弟进程中,下一个或者前一个Sibling元素的指针。

6.
进程等待队列



我们经常在写驱动的时候在等待某个条件的时候需要阻塞一个进程,我们经常使用一些linuxAPI,比如:

#define wait_event(wq, condition)
\

do {
\


if (condition)

\


break;
\


__wait_event(wq, condition);
\

} while (0)

#define __wait_event(wq, condition)
\

do {
\


DEFINE_WAIT(__wait);
\


\


for (;;) {
\


prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);
\


if (condition)
\


break;
\


schedule();
\


}
\


finish_wait(&wq, &__wait);
\

} while (0)


finish_wait把进程的状态再次设置为TASK_RUNNING状态,仅发生在调用schedule()之前唤醒条件为真的情况下。
DEFINE_WAIT(__wait);初始化一个叫wait_queue_t的等待队列元素,该等待队列结构原型为:

struct __wait_queue {

unsigned int flags;

#define WQ_FLAG_EXCLUSIVE
0x01


void *private;


wait_queue_func_t func;


struct list_head task_list;

};

#define DEFINE_WAIT_FUNC(name, function)
\


wait_queue_t name = {
\


.private
= current,
\


.func
= function,
\


.task_list
= LIST_HEAD_INIT((name).task_list),
\


}


#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
该函数把当前进程的task_stuct指针赋值给private指针变量,从而如上图所示,一个完整的等待队列元素初始化完毕,并且和相应的进程相关联起来。autoremove_wake_function

下面这个函数prepare_to_wait把上面初始化好了的__wait等待队列元素,添加到以wq作为等待队列头的进程等待队列中。Wq的初始化为:
DECLARE_WAIT_QUEUE_HEADwq

DECLARE_WAIT_QUEUE_HEAD是个宏定义:

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {
\


.lock
= __SPIN_LOCK_UNLOCKED(name.lock),
\


.task_list
= { &(name).task_list, &(name).task_list } }


#define DECLARE_WAIT_QUEUE_HEAD(name) \

wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)

完成等待队列头的初始化。

void
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{

unsigned long flags;



wait->flags &= ~WQ_FLAG_EXCLUSIVE;

注释:ldd3中描述flags0表示非互斥进程,不知道为什么这边始终都是设置为0
难道wait_event_xxx相关的API都是非互斥进程?不能用来等待访问临界资源。
最后查找代码prepare_to_wait发现这个函数是专门插入非互斥等待队列的。
prepare_to_wait_exclusive函数是插入互斥等待队列的。

spin_lock_irqsave(&q->lock, flags);


if (list_empty(&wait->task_list))


__add_wait_queue(q, wait);


set_current_state(state);


spin_unlock_irqrestore(&q->lock, flags);

}
EXPORT_SYMBOL(prepare_to_wait);


struct __wait_queue_head {

spinlock_t lock;


struct list_head task_list;

};
typedef struct __wait_queue_head wait_queue_head_t;

static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
{

list_add(&new->task_list, &head->task_list);

}

7.
进程创建Linux应用程序中,clone(),vfork(),fork()的系统调用分别为:

/* Fork a new task - this creates a new program thread.

* This is called indirectly via a small wrapper


*/

asmlinkage int sys_fork(struct pt_regs *regs)
{
#ifdef CONFIG_MMU

return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);

#else

/* can not support in nommu mode */


return(-EINVAL);

#endif
}

/* Clone a task - this clones the calling program thread.

* This is called indirectly via a small wrapper


*/

asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,


int __user *parent_tidptr, int tls_val,



int __user *child_tidptr, struct pt_regs *regs)

{

if (!newsp)


newsp = regs->ARM_sp;



return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);

}


asmlinkage int sys_vfork(struct pt_regs *regs)
{

return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);

}

从上面的三个函数可知,他们都是最终调用do_fork函数,来实现进程的创建,只不过他们传进去的参数不同而已。

do_fork函数原型在kernel/fork.c中:
long do_fork(unsigned long clone_flags,


unsigned long stack_start,



struct pt_regs *regs,



unsigned long stack_size,



int __user *parent_tidptr,



int __user *child_tidptr)

{
注释:参数说明:
clone_flags:低字节指定子进程结束的时候发送给父进程的信号代码
stack_start:进程用户态的堆栈指针
regs:通用寄存器的指针。
stack_size
parent_tidptr
child_tidptr


struct task_struct *p;


int trace = 0;


long nr;



/*



* Do some preliminary argument and permissions checking before we



* actually start allocating stuff



*/


if (clone_flags & CLONE_NEWUSER) {


if (clone_flags & CLONE_THREAD)



return -EINVAL;


/* hopefully this check will go away when userns support is



* complete



*/


if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||


!capable(CAP_SETGID))


return -EPERM;


}



/*



* We hope to recycle these flags after 2.6.26



*/


if (unlikely(clone_flags & CLONE_STOPPED)) {


static int __read_mostly count = 100;



if (count > 0 && printk_ratelimit()) {


char comm[TASK_COMM_LEN];



count--;


printk(KERN_INFO "fork(): process `%s' used deprecated "


"clone flags 0x%lx\n",


get_task_comm(comm, current),


clone_flags & CLONE_STOPPED);


}


}



/*



* When called from kernel_thread, don't do user tracing stuff.



*/


if (likely(user_mode(regs)))


trace = tracehook_prepare_clone(clone_flags);



p = copy_process(clone_flags, stack_start, regs, stack_size,



child_tidptr, NULL, trace);


/*



* Do this prior waking up the new thread - the thread pointer



* might get invalid after that point, if the thread exits quickly.



*/


if (!IS_ERR(p)) {


struct completion vfork;



trace_sched_process_fork(current, p);



nr = task_pid_vnr(p);



if (clone_flags & CLONE_PARENT_SETTID)


put_user(nr, parent_tidptr);



if (clone_flags & CLONE_VFORK) {


p->vfork_done = &vfork;


init_completion(&vfork);


}



audit_finish_fork(p);


tracehook_report_clone(regs, clone_flags, nr, p);



/*



* We set PF_STARTING at creation in case tracing wants to



* use this to distinguish a fully live task from one that



* hasn't gotten to tracehook_report_clone() yet.
Now we



* clear it and set the child going.



*/


p->flags &= ~PF_STARTING;



if (unlikely(clone_flags & CLONE_STOPPED)) {


/*



* We'll start up with an immediate SIGSTOP.



*/


sigaddset(&p->pending.signal, SIGSTOP);


set_tsk_thread_flag(p, TIF_SIGPENDING);


__set_task_state(p, TASK_STOPPED);


} else {


wake_up_new_task(p, clone_flags);


}



tracehook_report_clone_complete(trace, regs,


clone_flags, nr, p);



if (clone_flags & CLONE_VFORK) {


freezer_do_not_count();


wait_for_completion(&vfork);


freezer_count();


tracehook_report_vfork_done(p, nr);


}


} else {


nr = PTR_ERR(p);


}


return nr;

}


附件: 您需要登录才可以下载或查看附件。没有帐号?本站只开放邀请码注册,QQ:82475491,索要邀请码
分享到: QQ空间QQ空间 腾讯微博腾讯微博 腾讯朋友腾讯朋友

返回列表
网页右侧QQ悬浮滚动在线客服
网页右侧QQ悬浮滚动在线客服