kernel/ptrace.c

/* ptrace.c */
/* By Ross Biro 1/23/92 */
/* edited by Linus Torvalds */

#include <linux/head.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/user.h>

#include <asm/segment.h>
#include <asm/system.h>
#include <linux/debugreg.h>

/*
 * does not yet catch signals sent when the child dies.
 * in exit.c or in signal.c.
 */
//exit.c和signal.c当子进程退出时任然捕获不到信号
/* determines which flags the user has access to. */
/* 1 = access 0 = no access */
#define FLAG_MASK 0x00044dd5

/* set‘s the trap flag. */
#define TRAP_FLAG 0x100

/*
 * this is the number to subtract from the top of the stack. To find
 * the local frame.
 */
#define MAGICNUMBER 68

/* change a pid into a task struct. */
//获取指定pid的任务
static inline struct task_struct * get_task(int pid)
{
    int i;
    //遍历所有任务进程,查找指定进程号的进程
    for (i = 1; i < NR_TASKS; i++) {
        if (task[i] != NULL && (task[i]->pid == pid))
            return task[i];
    }
    return NULL;
}

/*
 * this routine will get a word off of the processes priviledged stack.
 * the offset is how far from the base addr as stored in the TSS.  
 * this routine assumes that all the priviledged stacks are in our
 * data space.
 */   
 //获取栈的偏移处的内容
static inline int get_stack_long(struct task_struct *task, int offset)
{
    unsigned char *stack;
    //获取到栈基址
    stack = (unsigned char *)task->tss.esp0;
    //计算偏移
    stack += offset;
    //取出该位置的内容
    return (*((int *)stack));
}

/*
 * this routine will put a word on the processes priviledged stack.
 * the offset is how far from the base addr as stored in the TSS.  
 * this routine assumes that all the priviledged stacks are in our
 * data space.
 */
 //设置栈指定偏移处的内容
static inline int put_stack_long(struct task_struct *task, int offset,
    unsigned long data)
{
    unsigned char * stack;
    //栈基址
    stack = (unsigned char *) task->tss.esp0;
    //偏移位置
    stack += offset;
    //设置内容
    *(unsigned long *) stack = data;
    return 0;
}

/*
 * This routine gets a long from any process space by following the page
 * tables. NOTE! You should check that the long isn‘t on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * NOTE2! This uses "tsk->tss.cr3" even though we know it‘s currently always
 * zero. This routine shouldn‘t have to change when we make a better mm.
 */
 //获取指定任务的内存控制任意位置处的数据
static unsigned long get_long(struct task_struct * tsk,
    unsigned long addr)
{
    unsigned long page;

repeat:
    page = *PAGE_DIR_OFFSET(tsk->tss.cr3,addr);
    if (page & PAGE_PRESENT) {
        page &= PAGE_MASK;
        page += PAGE_PTR(addr);
        page = *((unsigned long *) page);
    }
    if (!(page & PAGE_PRESENT)) {
        do_no_page(0,addr,tsk,0);
        goto repeat;
    }
/* this is a hack for non-kernel-mapped video buffers and similar */
    if (page >= high_memory)
        return 0;
    page &= PAGE_MASK;
    page += addr & ~PAGE_MASK;
    return *(unsigned long *) page;
}

/*
 * This routine puts a long into any process space by following the page
 * tables. NOTE! You should check that the long isn‘t on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * Now keeps R/W state of page so that a text page stays readonly
 * even if a debugger scribbles breakpoints into it.  -M.U-
 */
static void put_long(struct task_struct * tsk, unsigned long addr,
    unsigned long data)
{
    unsigned long page, pte = 0;
    int readonly = 0;

repeat:
    page = *PAGE_DIR_OFFSET(tsk->tss.cr3,addr);
    if (page & PAGE_PRESENT) {
        page &= PAGE_MASK;
        page += PAGE_PTR(addr);
        pte = page;
        page = *((unsigned long *) page);
    }
    if (!(page & PAGE_PRESENT)) {
        do_no_page(0 /* PAGE_RW */ ,addr,tsk,0);
        goto repeat;
    }
    if (!(page & PAGE_RW)) {
        if(!(page & PAGE_COW))
            readonly = 1;
        do_wp_page(PAGE_RW | PAGE_PRESENT,addr,tsk,0);
        goto repeat;
    }
/* this is a hack for non-kernel-mapped video buffers and similar */
    if (page >= high_memory)
        return;
/* we‘re bypassing pagetables, so we have to set the dirty bit ourselves */
    *(unsigned long *) pte |= (PAGE_DIRTY|PAGE_COW);
    page &= PAGE_MASK;
    page += addr & ~PAGE_MASK;
    *(unsigned long *) page = data;
    if(readonly) {
        *(unsigned long *) pte &=~ (PAGE_RW|PAGE_COW);
        invalidate();
    }
}

/*
 * This routine checks the page boundaries, and that the offset is
 * within the task area. It then calls get_long() to read a long.
 */
static int read_long(struct task_struct * tsk, unsigned long addr,
    unsigned long * result)
{
    unsigned long low,high;

if (addr > TASK_SIZE-sizeof(long))
        return -EIO;
    if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
        low = get_long(tsk,addr & ~(sizeof(long)-1));
        high = get_long(tsk,(addr+sizeof(long)) & ~(sizeof(long)-1));
        switch (addr & (sizeof(long)-1)) {
            case 1:
                low >>= 8;
                low |= high << 24;
                break;
            case 2:
                low >>= 16;
                low |= high << 16;
                break;
            case 3:
                low >>= 24;
                low |= high << 8;
                break;
        }
        *result = low;
    } else
        *result = get_long(tsk,addr);
    return 0;
}

/*
 * This routine checks the page boundaries, and that the offset is
 * within the task area. It then calls put_long() to write a long.
 */
static int write_long(struct task_struct * tsk, unsigned long addr,
    unsigned long data)
{
    unsigned long low,high;

if (addr > TASK_SIZE-sizeof(long))
        return -EIO;
    if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
        low = get_long(tsk,addr & ~(sizeof(long)-1));
        high = get_long(tsk,(addr+sizeof(long)) & ~(sizeof(long)-1));
        switch (addr & (sizeof(long)-1)) {
            case 0: /* shouldn‘t happen, but safety first */
                low = data;
                break;
            case 1:
                low &= 0x000000ff;
                low |= data << 8;
                high &= ~0xff;
                high |= data >> 24;
                break;
            case 2:
                low &= 0x0000ffff;
                low |= data << 16;
                high &= ~0xffff;
                high |= data >> 16;
                break;
            case 3:
                low &= 0x00ffffff;
                low |= data << 24;
                high &= ~0xffffff;
                high |= data >> 8;
                break;
        }
        put_long(tsk,addr & ~(sizeof(long)-1),low);
        put_long(tsk,(addr+sizeof(long)) & ~(sizeof(long)-1),high);
    } else
        put_long(tsk,addr,data);
    return 0;
}

asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
{
    struct task_struct *child;
    struct user * dummy;
    int i;

dummy = NULL;

if (request == PTRACE_TRACEME) {
        /* are we already being traced? */
        if (current->flags & PF_PTRACED)
            return -EPERM;
        /* set the ptrace bit in the proccess flags. */
        current->flags |= PF_PTRACED;
        return 0;
    }
    if (pid == 1)        /* you may not mess with init */
        return -EPERM;
    if (!(child = get_task(pid)))
        return -ESRCH;
    if (request == PTRACE_ATTACH) {
        if (child == current)
            return -EPERM;
        if ((!child->dumpable || (current->uid != child->euid) ||
             (current->gid != child->egid)) && !suser())
            return -EPERM;
        /* the same process cannot be attached many times */
        if (child->flags & PF_PTRACED)
            return -EPERM;
        child->flags |= PF_PTRACED;
        if (child->p_pptr != current) {
            REMOVE_LINKS(child);
            child->p_pptr = current;
            SET_LINKS(child);
        }
        send_sig(SIGSTOP, child, 1);
        return 0;
    }
    if (!(child->flags & PF_PTRACED))
        return -ESRCH;
    if (child->state != TASK_STOPPED) {
        if (request != PTRACE_KILL)
            return -ESRCH;
    }
    if (child->p_pptr != current)
        return -ESRCH;

switch (request) {
    /* when I and D space are seperate, these will need to be fixed. */
        case PTRACE_PEEKTEXT: /* read word at location addr. */
        case PTRACE_PEEKDATA: {
            unsigned long tmp;
            int res;

res = read_long(child, addr, &tmp);
            if (res < 0)
                return res;
            res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
            if (!res)
                put_fs_long(tmp,(unsigned long *) data);
            return res;
        }

/* read the word at location addr in the USER area. */
        case PTRACE_PEEKUSR: {
            unsigned long tmp;
            int res;

if ((addr & 3) || addr < 0 ||
                addr > sizeof(struct user) - 3)
                return -EIO;

res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
            if (res)
                return res;
            tmp = 0;  /* Default return condition */
            if(addr < 17*sizeof(long)) {
              addr = addr >> 2; /* temporary hack. */

tmp = get_stack_long(child, sizeof(long)*addr - MAGICNUMBER);
              if (addr == DS || addr == ES ||
                  addr == FS || addr == GS ||
                  addr == CS || addr == SS)
                tmp &= 0xffff;
            };
            if(addr >= (long) &dummy->u_debugreg[0] &&
               addr <= (long) &dummy->u_debugreg[7]){
                addr -= (long) &dummy->u_debugreg[0];
                addr = addr >> 2;
                tmp = child->debugreg[addr];
            };
            put_fs_long(tmp,(unsigned long *) data);
            return 0;
        }

/* when I and D space are seperate, this will have to be fixed. */
        case PTRACE_POKETEXT: /* write the word at location addr. */
        case PTRACE_POKEDATA:
            return write_long(child,addr,data);

case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
            if ((addr & 3) || addr < 0 ||
                addr > sizeof(struct user) - 3)
                return -EIO;

addr = addr >> 2; /* temproary hack. */

if (addr == ORIG_EAX)
                return -EIO;
            if (addr == DS || addr == ES ||
                addr == FS || addr == GS ||
                addr == CS || addr == SS) {
                    data &= 0xffff;
                    if (data && (data & 3) != 3)
                    return -EIO;
            }
            if (addr == EFL) {   /* flags. */
                data &= FLAG_MASK;
                data |= get_stack_long(child, EFL*sizeof(long)-MAGICNUMBER)  & ~FLAG_MASK;
            }
          /* Do not allow the user to set the debug register for kernel
             address space */
          if(addr < 17){
              if (put_stack_long(child, sizeof(long)*addr-MAGICNUMBER, data))
                return -EIO;
            return 0;
            };

/* We need to be very careful here.  We implicitly
             want to modify a portion of the task_struct, and we
             have to be selective about what portions we allow someone
             to modify. */

addr = addr << 2;  /* Convert back again */
          if(addr >= (long) &dummy->u_debugreg[0] &&
             addr <= (long) &dummy->u_debugreg[7]){

if(addr == (long) &dummy->u_debugreg[4]) return -EIO;
              if(addr == (long) &dummy->u_debugreg[5]) return -EIO;
              if(addr < (long) &dummy->u_debugreg[4] &&
                 ((unsigned long) data) >= 0xbffffffd) return -EIO;
              
              if(addr == (long) &dummy->u_debugreg[7]) {
                  data &= ~DR_CONTROL_RESERVED;
                  for(i=0; i<4; i++)
                      if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
                          return -EIO;
              };

addr -= (long) &dummy->u_debugreg;
              addr = addr >> 2;
              child->debugreg[addr] = data;
              return 0;
          };
          return -EIO;

case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
        case PTRACE_CONT: { /* restart after signal. */
            long tmp;

if ((unsigned long) data > NSIG)
                return -EIO;
            if (request == PTRACE_SYSCALL)
                child->flags |= PF_TRACESYS;
            else
                child->flags &= ~PF_TRACESYS;
            child->exit_code = data;
            child->state = TASK_RUNNING;
    /* make sure the single step bit is not set. */
            tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
            put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
            return 0;
        }

/*
 * make the child exit.  Best I can do is send it a sigkill.
 * perhaps it should be put in the status that it want‘s to
 * exit.
 */
        case PTRACE_KILL: {
            long tmp;

child->state = TASK_RUNNING;
            child->exit_code = SIGKILL;
    /* make sure the single step bit is not set. */
            tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
            put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
            return 0;
        }

case PTRACE_SINGLESTEP: {  /* set the trap flag. */
            long tmp;

if ((unsigned long) data > NSIG)
                return -EIO;
            child->flags &= ~PF_TRACESYS;
            tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) | TRAP_FLAG;
            put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
            child->state = TASK_RUNNING;
            child->exit_code = data;
    /* give it a chance to run. */
            return 0;
        }

case PTRACE_DETACH: { /* detach a process that was attached. */
            long tmp;

if ((unsigned long) data > NSIG)
                return -EIO;
            child->flags &= ~(PF_PTRACED|PF_TRACESYS);
            child->state = TASK_RUNNING;
            child->exit_code = data;
            REMOVE_LINKS(child);
            child->p_pptr = child->p_opptr;
            SET_LINKS(child);
            /* make sure the single step bit is not set. */
            tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
            put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
            return 0;
        }

default:
            return -EIO;
    }
}

//系统调用
asmlinkage void syscall_trace(void)
{
    if ((current->flags & (PF_PTRACED|PF_TRACESYS))
            != (PF_PTRACED|PF_TRACESYS))
        return;
    current->exit_code = SIGTRAP;
    current->state = TASK_STOPPED;
    notify_parent(current);
    schedule();
    /*
     * this isn‘t the same as continuing with a signal, but it will do
     * for normal use.  strace only continues with a signal if the
     * stopping signal is not SIGTRAP.  -brl
     */
    if (current->exit_code)
        current->signal |= (1 << (current->exit_code - 1));
    current->exit_code = 0;
}

kernel/ptrace.c

时间: 2024-10-17 17:58:37

kernel/ptrace.c的相关文章

玩转ptrace(转)

下面是转帖的内容,写的很详细.但是不同的linux发行版中头文件的路径和名称并不相同.如在某些发行版中<linux/user.h>就不存在,其中定义的变量出现在<asm/ptrace-abi.h>和<sys/user.h>中. ================================================================================================== by Pradeep Padala Create

ptrace函数深入分析

ptrace函数:进程跟踪. 形式:#include<sys/ptrace.h> Int ptrace(int request,int pid,int addr,int data); 概述: 父进程控制子进程运行,检查和改变它的核心Image.Ptrace主要用来实现断点调试.当进程被中止,通知父进程,进程的内存空间可以被读写,父进程可以选择是子进程继续执行,还是中止. 根据ptrace的函数原形 int ptrace(int request, int pid, int addr, int d

Process Kill Technology &amp;&amp; Process Protection Against In Linux

目录 0. 引言 1. Kill Process By Kill Command 2. Kill Process By Resource Limits 3. Kill Process By Code Injection Into Running Process Via GDB 4. Kill Process By Using Cross Process Virtual Memory Modify 5. Kill Process By Using ptrace To Inject .so 6. P

Linux内核漏洞利用-环境配置(转)

实验环境: Ubuntu-14.04.1 x86 linux-2.6.32.1 busybox-1.27.2 qemu 0x00 安装qemu sudo apt-get install qemu qemu-system 0x01 Linux内核编译 $ wget https://www.kernel.org/pub/linux/kernel/v2.6/linux-2.6.32.tar.gz $ tar zxvf linux-2.6.32.tar.gz $ cd linux-2.6.32 $ su

内核升极2.6.18 升级到 2.6.32 装systemtap

安装步骤: 1.下载装源代码: https://www.kernel.org/ 找到 https://www.kernel.org/pub/  链接 查找linux->kernel->v2.6->linux-2.6.32.tar.bz2 2.拖动虚拟机 放到目录 /usr/src/kernels/下解压 为    linux -3.2.26. 3 .配制内核 1.  yum install ncurses-devel.x86_64 2.  打补丁utrace,  (补丁下载      h

Ubuntu 11.10 (Oneiric)上编译带utrace补丁的内核 转

Ubuntu 11.10 (Oneiric)上编译带utrace补丁的内核 首先准备linux内核编译环境: sudo apt-get install fakeroot build-essential crash kexec-tools makedumpfile kernel-wedge kernel-package sudo apt-get build-dep linux sudo apt-get install git-core libncurses5 libncurses5-dev lib

最详细的黑客教程

最详细的黑客教程-- 最详细的黑客入门法则! (目前为止) 第一章------关于找软件 第二章------关于被入侵 第三章------基础知识和入侵步骤 第四章------关于命令的使用  第五章------关于windows98  第六章------关于破解邮箱  第七章------关于解除网吧.网页限制 第八章------关于流光 第九章------关于字典 第十章------关于ipc$.空连接和默认共享 第十一章------关于扫描出的漏洞 第十二章------关于提升权限 第十三章

linux系统调用表(system call table)

系统调用号 函数名 入口点 源码 0 read sys_read fs/read_write.c 1 write sys_write fs/read_write.c 2 open sys_open fs/open.c 3 close sys_close fs/open.c 4 stat sys_newstat fs/stat.c 5 fstat sys_newfstat fs/stat.c 6 lstat sys_newlstat fs/stat.c 7 poll sys_poll fs/sel

Linux Syste m Call Table

Linux System Call Table(2.2) %eax Name Source %ebx %ecx %edx %esx %edi 1 sys_exit kernel/exit.c int - - - - 2 sys_fork arch/i386/kernel/process.c struct pt_regs - - - - 3 sys_read fs/read_write.c unsigned int char * size_t - - 4 sys_write fs/read_wri