Linux内核学习第六周 进程描述与进程创建

1.task_struct的数据结构

1235struct task_struct {
1236    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
1237    void *stack;
1238    atomic_t usage;
1239    unsigned int flags;    /* per process flags, defined below */
1240    unsigned int ptrace;
1241
1242#ifdef CONFIG_SMP
1243    struct llist_node wake_entry;
1244    int on_cpu;
1245    struct task_struct *last_wakee;
1246    unsigned long wakee_flips;
1247    unsigned long wakee_flip_decay_ts;
1248
1249    int wake_cpu;
1250#endif
1251    int on_rq;
1252
1253    int prio, static_prio, normal_prio;
1254    unsigned int rt_priority;
1255    const struct sched_class *sched_class;
1256    struct sched_entity se;
1257    struct sched_rt_entity rt;
1258#ifdef CONFIG_CGROUP_SCHED
1259    struct task_group *sched_task_group;
1260#endif
1261    struct sched_dl_entity dl;
1262
1263#ifdef CONFIG_PREEMPT_NOTIFIERS
1264    /* list of struct preempt_notifier: */
1265    struct hlist_head preempt_notifiers;
1266#endif
1267
1268#ifdef CONFIG_BLK_DEV_IO_TRACE
1269    unsigned int btrace_seq;
1270#endif
1271
1272    unsigned int policy;
1273    int nr_cpus_allowed;
1274    cpumask_t cpus_allowed;
1275
1276#ifdef CONFIG_PREEMPT_RCU
1277    int rcu_read_lock_nesting;
1278    union rcu_special rcu_read_unlock_special;
1279    struct list_head rcu_node_entry;
1280#endif /* #ifdef CONFIG_PREEMPT_RCU */
1281#ifdef CONFIG_TREE_PREEMPT_RCU
1282    struct rcu_node *rcu_blocked_node;
1283#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1284#ifdef CONFIG_TASKS_RCU
1285    unsigned long rcu_tasks_nvcsw;
1286    bool rcu_tasks_holdout;
1287    struct list_head rcu_tasks_holdout_list;
1288    int rcu_tasks_idle_cpu;
1289#endif /* #ifdef CONFIG_TASKS_RCU */
1290
1291#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1292    struct sched_info sched_info;
1293#endif
1294
1295    struct list_head tasks;
1296#ifdef CONFIG_SMP
1297    struct plist_node pushable_tasks;
1298    struct rb_node pushable_dl_tasks;
1299#endif
1300
1301    struct mm_struct *mm, *active_mm;
1302#ifdef CONFIG_COMPAT_BRK
1303    unsigned brk_randomized:1;
1304#endif
1305    /* per-thread vma caching */
1306    u32 vmacache_seqnum;
1307    struct vm_area_struct *vmacache[VMACACHE_SIZE];
1308#if defined(SPLIT_RSS_COUNTING)
1309    struct task_rss_stat    rss_stat;
1310#endif
1311/* task state */
1312    int exit_state;
1313    int exit_code, exit_signal;
1314    int pdeath_signal;  /*  The signal sent when the parent dies  */
1315    unsigned int jobctl;    /* JOBCTL_*, siglock protected */
1316
1317    /* Used for emulating ABI behavior of previous Linux versions */
1318    unsigned int personality;
1319
1320    unsigned in_execve:1;    /* Tell the LSMs that the process is doing an
1321                 * execve */
1322    unsigned in_iowait:1;
1323
1324    /* Revert to default priority/policy when forking */
1325    unsigned sched_reset_on_fork:1;
1326    unsigned sched_contributes_to_load:1;
1327
1328    unsigned long atomic_flags; /* Flags needing atomic access. */
1329
1330    pid_t pid;
1331    pid_t tgid;
1332
1333#ifdef CONFIG_CC_STACKPROTECTOR
1334    /* Canary value for the -fstack-protector gcc feature */
1335    unsigned long stack_canary;
1336#endif
1337    /*
1338     * pointers to (original) parent process, youngest child, younger sibling,
1339     * older sibling, respectively.  (p->father can be replaced with
1340     * p->real_parent->pid)
1341     */
1342    struct task_struct __rcu *real_parent; /* real parent process */
1343    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
1344    /*
1345     * children/sibling forms the list of my natural children
1346     */
1347    struct list_head children;    /* list of my children */
1348    struct list_head sibling;    /* linkage in my parent‘s children list */
1349    struct task_struct *group_leader;    /* threadgroup leader */
1350
1351    /*
1352     * ptraced is the list of tasks this task is using ptrace on.
1353     * This includes both natural children and PTRACE_ATTACH targets.
1354     * p->ptrace_entry is p‘s link on the p->parent->ptraced list.
1355     */
1356    struct list_head ptraced;
1357    struct list_head ptrace_entry;
1358
1359    /* PID/PID hash table linkage. */
1360    struct pid_link pids[PIDTYPE_MAX];
1361    struct list_head thread_group;
1362    struct list_head thread_node;
1363
1364    struct completion *vfork_done;        /* for vfork() */
1365    int __user *set_child_tid;        /* CLONE_CHILD_SETTID */
1366    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */
1367
1368    cputime_t utime, stime, utimescaled, stimescaled;
1369    cputime_t gtime;
1370#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1371    struct cputime prev_cputime;
1372#endif
1373#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1374    seqlock_t vtime_seqlock;
1375    unsigned long long vtime_snap;
1376    enum {
1377        VTIME_SLEEPING = 0,
1378        VTIME_USER,
1379        VTIME_SYS,
1380    } vtime_snap_whence;
1381#endif
1382    unsigned long nvcsw, nivcsw; /* context switch counts */
1383    u64 start_time;        /* monotonic time in nsec */
1384    u64 real_start_time;    /* boot based time in nsec */
1385/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1386    unsigned long min_flt, maj_flt;
1387
1388    struct task_cputime cputime_expires;
1389    struct list_head cpu_timers[3];
1390
1391/* process credentials */
1392    const struct cred __rcu *real_cred; /* objective and real subjective task
1393                     * credentials (COW) */
1394    const struct cred __rcu *cred;    /* effective (overridable) subjective task
1395                     * credentials (COW) */
1396    char comm[TASK_COMM_LEN]; /* executable name excluding path
1397                     - access with [gs]et_task_comm (which lock
1398                       it with task_lock())
1399                     - initialized normally by setup_new_exec */
1400/* file system info */
1401    int link_count, total_link_count;
1402#ifdef CONFIG_SYSVIPC
1403/* ipc stuff */
1404    struct sysv_sem sysvsem;
1405    struct sysv_shm sysvshm;
1406#endif
1407#ifdef CONFIG_DETECT_HUNG_TASK
1408/* hung task detection */
1409    unsigned long last_switch_count;
1410#endif
1411/* CPU-specific state of this task */
1412    struct thread_struct thread;
1413/* filesystem information */
1414    struct fs_struct *fs;
1415/* open file information */
1416    struct files_struct *files;
1417/* namespaces */
1418    struct nsproxy *nsproxy;
1419/* signal handlers */
1420    struct signal_struct *signal;
1421    struct sighand_struct *sighand;
1422
1423    sigset_t blocked, real_blocked;
1424    sigset_t saved_sigmask;    /* restored if set_restore_sigmask() was used */
1425    struct sigpending pending;
1426
1427    unsigned long sas_ss_sp;
1428    size_t sas_ss_size;
1429    int (*notifier)(void *priv);
1430    void *notifier_data;
1431    sigset_t *notifier_mask;
1432    struct callback_head *task_works;
1433
1434    struct audit_context *audit_context;
1435#ifdef CONFIG_AUDITSYSCALL
1436    kuid_t loginuid;
1437    unsigned int sessionid;
1438#endif
1439    struct seccomp seccomp;
1440
1441/* Thread group tracking */
1442       u32 parent_exec_id;
1443       u32 self_exec_id;
1444/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
1445 * mempolicy */
1446    spinlock_t alloc_lock;
1447
1448    /* Protection of the PI data structures: */
1449    raw_spinlock_t pi_lock;
1450
1451#ifdef CONFIG_RT_MUTEXES
1452    /* PI waiters blocked on a rt_mutex held by this task */
1453    struct rb_root pi_waiters;
1454    struct rb_node *pi_waiters_leftmost;
1455    /* Deadlock detection and priority inheritance handling */
1456    struct rt_mutex_waiter *pi_blocked_on;
1457#endif
1458
1459#ifdef CONFIG_DEBUG_MUTEXES
1460    /* mutex deadlock detection */
1461    struct mutex_waiter *blocked_on;
1462#endif
1463#ifdef CONFIG_TRACE_IRQFLAGS
1464    unsigned int irq_events;
1465    unsigned long hardirq_enable_ip;
1466    unsigned long hardirq_disable_ip;
1467    unsigned int hardirq_enable_event;
1468    unsigned int hardirq_disable_event;
1469    int hardirqs_enabled;
1470    int hardirq_context;
1471    unsigned long softirq_disable_ip;
1472    unsigned long softirq_enable_ip;
1473    unsigned int softirq_disable_event;
1474    unsigned int softirq_enable_event;
1475    int softirqs_enabled;
1476    int softirq_context;
1477#endif
1478#ifdef CONFIG_LOCKDEP
1479# define MAX_LOCK_DEPTH 48UL
1480    u64 curr_chain_key;
1481    int lockdep_depth;
1482    unsigned int lockdep_recursion;
1483    struct held_lock held_locks[MAX_LOCK_DEPTH];
1484    gfp_t lockdep_reclaim_gfp;
1485#endif
1486
1487/* journalling filesystem info */
1488    void *journal_info;
1489
1490/* stacked block device info */
1491    struct bio_list *bio_list;
1492
1493#ifdef CONFIG_BLOCK
1494/* stack plugging */
1495    struct blk_plug *plug;
1496#endif
1497
1498/* VM state */
1499    struct reclaim_state *reclaim_state;
1500
1501    struct backing_dev_info *backing_dev_info;
1502
1503    struct io_context *io_context;
1504
1505    unsigned long ptrace_message;
1506    siginfo_t *last_siginfo; /* For ptrace use.  */
1507    struct task_io_accounting ioac;
1508#if defined(CONFIG_TASK_XACCT)
1509    u64 acct_rss_mem1;    /* accumulated rss usage */
1510    u64 acct_vm_mem1;    /* accumulated virtual memory usage */
1511    cputime_t acct_timexpd;    /* stime + utime since last update */
1512#endif
1513#ifdef CONFIG_CPUSETS
1514    nodemask_t mems_allowed;    /* Protected by alloc_lock */
1515    seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
1516    int cpuset_mem_spread_rotor;
1517    int cpuset_slab_spread_rotor;
1518#endif
1519#ifdef CONFIG_CGROUPS
1520    /* Control Group info protected by css_set_lock */
1521    struct css_set __rcu *cgroups;
1522    /* cg_list protected by css_set_lock and tsk->alloc_lock */
1523    struct list_head cg_list;
1524#endif
1525#ifdef CONFIG_FUTEX
1526    struct robust_list_head __user *robust_list;
1527#ifdef CONFIG_COMPAT
1528    struct compat_robust_list_head __user *compat_robust_list;
1529#endif
1530    struct list_head pi_state_list;
1531    struct futex_pi_state *pi_state_cache;
1532#endif
1533#ifdef CONFIG_PERF_EVENTS
1534    struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1535    struct mutex perf_event_mutex;
1536    struct list_head perf_event_list;
1537#endif
1538#ifdef CONFIG_DEBUG_PREEMPT
1539    unsigned long preempt_disable_ip;
1540#endif
1541#ifdef CONFIG_NUMA
1542    struct mempolicy *mempolicy;    /* Protected by alloc_lock */
1543    short il_next;
1544    short pref_node_fork;
1545#endif
1546#ifdef CONFIG_NUMA_BALANCING
1547    int numa_scan_seq;
1548    unsigned int numa_scan_period;
1549    unsigned int numa_scan_period_max;
1550    int numa_preferred_nid;
1551    unsigned long numa_migrate_retry;
1552    u64 node_stamp;            /* migration stamp  */
1553    u64 last_task_numa_placement;
1554    u64 last_sum_exec_runtime;
1555    struct callback_head numa_work;
1556
1557    struct list_head numa_entry;
1558    struct numa_group *numa_group;
1559
1560    /*
1561     * Exponential decaying average of faults on a per-node basis.
1562     * Scheduling placement decisions are made based on the these counts.
1563     * The values remain static for the duration of a PTE scan
1564     */
1565    unsigned long *numa_faults_memory;
1566    unsigned long total_numa_faults;
1567
1568    /*
1569     * numa_faults_buffer records faults per node during the current
1570     * scan window. When the scan completes, the counts in
1571     * numa_faults_memory decay and these values are copied.
1572     */
1573    unsigned long *numa_faults_buffer_memory;
1574
1575    /*
1576     * Track the nodes the process was running on when a NUMA hinting
1577     * fault was incurred.
1578     */
1579    unsigned long *numa_faults_cpu;
1580    unsigned long *numa_faults_buffer_cpu;
1581
1582    /*
1583     * numa_faults_locality tracks if faults recorded during the last
1584     * scan window were remote/local. The task scan period is adapted
1585     * based on the locality of the faults with different weights
1586     * depending on whether they were shared or private faults
1587     */
1588    unsigned long numa_faults_locality[2];
1589
1590    unsigned long numa_pages_migrated;
1591#endif /* CONFIG_NUMA_BALANCING */
1592
1593    struct rcu_head rcu;
1594
1595    /*
1596     * cache last used pipe for splice
1597     */
1598    struct pipe_inode_info *splice_pipe;
1599
1600    struct page_frag task_frag;
1601
1602#ifdef    CONFIG_TASK_DELAY_ACCT
1603    struct task_delay_info *delays;
1604#endif
1605#ifdef CONFIG_FAULT_INJECTION
1606    int make_it_fail;
1607#endif
1608    /*
1609     * when (nr_dirtied >= nr_dirtied_pause), it‘s time to call
1610     * balance_dirty_pages() for some dirty throttling pause
1611     */
1612    int nr_dirtied;
1613    int nr_dirtied_pause;
1614    unsigned long dirty_paused_when; /* start of a write-and-pause period */
1615
1616#ifdef CONFIG_LATENCYTOP
1617    int latency_record_count;
1618    struct latency_record latency_record[LT_SAVECOUNT];
1619#endif
1620    /*
1621     * time slack values; these are used to round up poll() and
1622     * select() etc timeout values. These are in nanoseconds.
1623     */
1624    unsigned long timer_slack_ns;
1625    unsigned long default_timer_slack_ns;
1626
1627#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1628    /* Index of current stored address in ret_stack */
1629    int curr_ret_stack;
1630    /* Stack of return addresses for return function tracing */
1631    struct ftrace_ret_stack    *ret_stack;
1632    /* time stamp for last schedule */
1633    unsigned long long ftrace_timestamp;
1634    /*
1635     * Number of functions that haven‘t been traced
1636     * because of depth overrun.
1637     */
1638    atomic_t trace_overrun;
1639    /* Pause for the tracing */
1640    atomic_t tracing_graph_pause;
1641#endif
1642#ifdef CONFIG_TRACING
1643    /* state flags for use by tracers */
1644    unsigned long trace;
1645    /* bitmask and counter of trace recursion */
1646    unsigned long trace_recursion;
1647#endif /* CONFIG_TRACING */
1648#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
1649    unsigned int memcg_kmem_skip_account;
1650    struct memcg_oom_info {
1651        struct mem_cgroup *memcg;
1652        gfp_t gfp_mask;
1653        int order;
1654        unsigned int may_oom:1;
1655    } memcg_oom;
1656#endif
1657#ifdef CONFIG_UPROBES
1658    struct uprobe_task *utask;
1659#endif
1660#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1661    unsigned int    sequential_io;
1662    unsigned int    sequential_io_avg;
1663#endif
1664};

task_struct

2.进程创建

Linux 系统中fork 、vfork 、clone等函数都可以用来创建一个新的进程,调用函数如下所示:

SYSCALL_DEFINE0(fork)
{
  return do_fork(SIGCHLD, 0, 0, NULL, NULL);
}

SYSCALL_DEFINE0(vfork)
{
  return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL);
}

SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
                int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val)
{
  return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
}

可以看到,fork 、vfork 、clone 等函数对应的系统调用,也都是调用了 do_fork 函数实现的。在do_fork函数中,真正实现复制的是copy_process函数:



p = copy_process(clone_flags, stack_start, stack_size,child_tidptr, NULL, trace);
p = dup_task_struct(current);创建内核栈
retval = security_task_create(clone_flags);
retval = sched_fork(clone_flags, p);和调度相关的设置,cpu将调度这个task
retval = copy_thread(clone_flags, stack_start, stack_size, p); 复制父进程堆栈的内容到子进程的堆栈中去.

这其中,copy_thread函数中的语句p->thread.ip = (unsigned long) ret_from_fork决定了新进程的第一条指令地址。

创建栈函数dup_task_struct:

tsk = alloc_task_struct_node(node);开辟内存空间
ti = alloc_thread_info_node(tsk, node);ti指向thread_info的首地址,同时也是系统为新进程分配的两个连续页面的首地址。
err = arch_dup_task_struct(tsk, orig);复制父进程的task_struct信息到新的task_struct里
tsk->stack = ti;task对应栈
setup_thread_stack(tsk, orig);初始化thread info结构
set_task_stack_end_magic(tsk);栈结束的地址设置数据为栈结束标示

3.新进程开始执行

在之前的分析中,谈到copy_process中的copy_thread()函数,正是这个函数决定了子进程从系统调用中返回后的执行.ret_from_fork决定了新进程的第一条指令地址。p->thread.ip = (unsigned long)ret_from_fork;将子进程的ip设置为ret_from_fork的首地址,子进程从ret_from_fork开始执行。

4.执行起点与内核堆栈如何保持一致

  • 在ret_from_fork之前,也就是在copy_thread()函数中*childregs = *current_pt_regs();该句将父进程的regs参数赋值到子进程的内核堆栈,
  • *childregs的类型为pt_regs,里面存放了SAVE ALL中压入栈的参数
  • 故在之后的RESTORE ALL中能顺利执行下去

5.总结

linux通过复制父进程创建子进程,linux为每个新创建的进程动态分配一个task_struct结构,fork被调用一次,返回两次。

时间: 2024-10-21 03:34:32

Linux内核学习第六周 进程描述与进程创建的相关文章

LINUX内核分析第六周学习总结——进程的描述和进程的创建

LINUX内核分析第六周学习总结——进程的描述和进程的创建 张忻(原创作品转载请注明出处) <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 一.知识概要 进程的描述 进程描述符task_struct数据结构(一) 进程描述符task_struct数据结构(二) 进程的创建 进程的创建概览及fork一个进程的用户态代码 理解进程创建过程复杂代码的方法 浏览进程创建过程相关的关键代码 创建的新进程是从哪里开始执行的

20135327郭皓--Linux内核分析第六周 进程的描述和进程的创建

进程的描述和进程的创建 一.进程的描述 操作系统三大功能: 进程管理 内存管理 文件系统 进程描述符task_struct数据结构 task _ struct:为了管理进程,内核必须对每个进程进行清晰的描述,进程描述符提供了内核所需了解的进程信息. 进程的状态:Linux进程的状态(就绪态.运行态.阻塞态) 进程的标示pid:用来标示进程 进程描述符task_struct数据结构(重要部分): 1 struct task_struct { 2 volatile long state; /* 进程

Linux内核设计第六周学习总结 分析Linux内核创建一个新进程的过程

陈巧然 原创作品转载请注明出处 <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 一.实验过程 登陆实验楼虚拟机http://www.shiyanlou.com/courses/195 打开shell终端,执行以下命令: cd LinuxKernel rm -rf menu git clone https://github.com/mengning/menu.git cd menu mv test_fork.c

linux内核分析 第六周 分析Linux内核创建一个新进程的过程

进程的描述 操作系统的三大管理功能:进程管理.内存管理.文件系统 为了管理进程,内核必须对每个进程进行清晰的描述,进程描述符提供了内核所需了解的进程信息. 进程控制块PCB task_struct:进程状态.进程打开的文件.进程优先级信息 task_struct总体数据结构的抽象: tty:控制台 fs:文件系统 files:文件描述符 mm:内存管理 signal:信号描述 进程的状态: 注意:Linux下,中就绪状态和运行状态都是TASK_RUNNING 一.gdb跟踪分析一个fork系统调

Linux内核分析——第六周学习笔记20135308

第五周 进程的描述和进程的创建 一.进程描述符task_struct数据结构 1.操作系统三大功能 进程管理 内存管理 文件系统 2.进程控制块PCB——task_struct 也叫进程描述符,为了管理进程,内核需要对每个进程进行描述,它就提供了内核所需了解的进程信息. struct task_struct数据结构很庞大,1235行~1644行 3.Linux进程状态 Linux进程的状态与操作系统原理中的描述的进程状态有所不同 操作系统状态: 就绪态 运行态 阻塞态 linux进程状态: 4.

linux内核分析第六周-分析Linux内核创建一个新进程的过程

Linux内核对进程管理是操作系统的重要任务之一. 此次实验就是了解内核创建一个新进程的大致过程. 为了简单,使用fork再用户态创建一个进程.代码如下: 下面是准备工作??? cd LinuxKernel rm -rf menu git clone https://github.com/mengning/menu.git cd menu mv test_fork.c test.c make rootfs 打开gdb进行远程调试? 设置断点 b sys_clone b do_fork b dup

Linux内核分析——第六周学习笔记

进程的描述和进程的创建 前言:以下笔记除了一些讲解视频中的概念记录,图示.图示中的补充文字.总结.分析.小结部分均是个人理解.如有错误观点,请多指教! PS.实验操作会在提交到MOOC网站的博客中写.

linux内核分析 第六周读书笔记

第三章 进程管理 3.1 进程 进程:处于执行期的程序 线程是在进程活动中的对象:内核调度的对象是线程而不是进程,在Linux系统中,并不区分线程和进程 在现代操作系统中, 进程提供两种虚拟机制:虚拟内存器和虚拟内存. 进程在创建它的时刻开始存活,这通常是调用fork系统的结果.该系统调用通过复制一个现有进程来创建一个全新的进程.fork系统调用从内核返回两次,一次到父进程,另一次回到新产生的子进程. 通常,创建新的进程都是为了立即执行新的不同的程序,而接着调用exec()这组函数就可以创建新的

Linux内核学习第五周 系统调用

一.实验截图 二.系统调用流程图: 三.总结:系统调用过程分析 linux的系统调用过程:用户程序→C库(即API):INT 0x80 →system_call→系统调用服务例程→内核程序.我们常说的用户API其实就是系统提供的C库. 系统调用是通过软中断指令 INT 0x80 实现的,而这条INT 0x80指令就被封装在C库的函数中.软中断和我们常说的硬中断不同之处在于,软中断是由指令触发的,而不是由硬件外设引起的.INT 0x80 这条指令的执行会让系统跳转到一个预设的内核空间地址,它指向系