crash log具体流程概述
当某服务或者native code程序crash产生调试信息后有两中去向:
1.写入到logcat:
这种信息可以通过adb shell中的logcat中察看到。
2.写入到系统的/data/tombstones/文件夹中:
创建tombstone_xx文件后写入信息,xx
从00开始,最大支持49个tombstone_xx文件,超出后会从00开始重新写入覆盖之前.
当某一进程crash以后会向系统发送信号,信号在某个地方会被拦截下来发送给android的处理函数
debugger_signal_handler,此函数通过socket
发送给守护进程debuggerd,由debuggerd来处理相关操作
一)拦截信号单元: debugger (common\bionic\linker\debugger.c)
debugger_init()函数如下:
1 void debugger_init() 2 { 3 struct sigaction act; 4 memset(&act, 0, sizeof(act)); 5 act.sa_sigaction = debugger_signal_handler; 6 act.sa_flags = SA_RESTART | SA_SIGINFO; 7 sigemptyset(&act.sa_mask); 8 9 sigaction(SIGILL, &act, NULL); 10 sigaction(SIGABRT, &act, NULL); 11 sigaction(SIGBUS, &act, NULL); 12 sigaction(SIGFPE, &act, NULL); 13 sigaction(SIGSEGV, &act, NULL); 14 sigaction(SIGSTKFLT, &act, NULL); 15 sigaction(SIGPIPE, &act, NULL); 16 }
从如上代码可以看出此函数的功能是收到信号后会执行debugger_signal_handler处理函数。下面要把这个初始化函数放到启动的 进程中。
当某进程创建后在main()之前首先调用__start
函数然后调用__linker_init()函数在__linker_init()中会调用debugger_init()
这样当进程出现问题后会发信号给系统,这样在系统处理之前会首先跳到debugger_signal_handler做处理
void debugger_signal_handler(int n, siginfo_t* info, void* unused) { //do something tid = gettid(); s = socket_abstract_client(DEBUGGER_SOCKET_NAME, SOCK_STREAM); if (s >= 0) { int ret; debugger_msg_t msg; msg.action = DEBUGGER_ACTION_CRASH; msg.tid = tid; //向服务端发送数据 包括进程的tid RETRY_ON_EINTR(ret, write(s, &msg, sizeof(msg))); if (ret == sizeof(msg)) { RETRY_ON_EINTR(ret, read(s, &tid, 1)); int savedErrno = errno; notify_gdb_of_libraries(); errno = savedErrno; } //.. }
如上代码是将pid发送给服务端,服务端就是debuggerd进程,这时debuggerd会接收到客户端的数据,里面包含crash线程的 tidread(s, &tid, 1);这个函数是等待socket发来的数据,他会一直阻塞在这里等待结束。
二)守护进程 debuggerd (common\system\core\debuggerd\debuggerd.c)
此进程在init.rc中可以找到跟随其他服务一起启动.在debuggerd的main函数中会执行do_server()函数
do_server()函数如下:
static int do_server() { //do sth s = socket_local_server(DEBUGGER_SOCKET_NAME, ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_STREAM); if(s < 0) return 1; fcntl(s, F_SETFD, FD_CLOEXEC); LOG("debuggerd: " __DATE__ " " __TIME__ "\n"); for(;;) { struct sockaddr addr; socklen_t alen; int fd; alen = sizeof(addr); XLOG("waiting for connection\n"); fd = accept(s, &addr, &alen); if(fd < 0) { XLOG("accept failed: %s\n", strerror(errno)); continue; } fcntl(fd, F_SETFD, FD_CLOEXEC); handle_request(fd); } return 0;
从如上代码可以看出debuggerd创建完服务端以后会一直在for(;;)中等待客户端的消息,当检测到有消息到达后会执行
handle_request函数
static void handle_request(int fd) { int status = read_request(fd, &request); ptrace(PTRACE_ATTACH, request.tid, 0, 0); TEMP_FAILURE_RETRY(write(fd, "\0", 1)) != 1) int total_sleep_time_usec = 0; for (;;) { int signal = wait_for_signal(request.tid, &total_sleep_time_usec); if (signal < 0) { break; } // switch (signal) { case SIGSTOP: if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) { XLOG("stopped -- dumping to tombstone\n"); tombstone_path = engrave_tombstone(request.pid, request.tid, signal, true, true, &detach_failed, &total_sleep_time_usec); } else if (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE) { XLOG("stopped -- dumping to fd\n"); dump_backtrace(fd, request.pid, request.tid, &detach_failed, &total_sleep_time_usec); } else if (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE_TO_LOG) { XLOG("stopped -- dumping to log\n"); dump_backtrace_for_thread(fd, request.pid, request.tid, &detach_failed, &total_sleep_time_usec); } else { XLOG("stopped -- continuing\n"); status = ptrace(PTRACE_CONT, request.tid, 0, 0); if (status) { LOG("ptrace continue failed: %s\n", strerror(errno)); } LOG("handle_request:loop again"); continue; /* loop again */ } break; case SIGILL: case SIGABRT: case SIGBUS: case SIGFPE: case SIGSEGV: case SIGPIPE: case SIGSTKFLT: { XLOG("stopped -- fatal signal\n"); /* * Send a SIGSTOP to the process to make all of * the non-signaled threads stop moving. Without * this we get a lot of "ptrace detach failed: * No such process". */ kill(request.pid, SIGSTOP); /* don‘t dump sibling threads when attaching to GDB because it * makes the process less reliable, apparently... */ tombstone_path = engrave_tombstone(request.pid, request.tid, signal, !attach_gdb, false, &detach_failed, &total_sleep_time_usec); break; } } //.. kill(request.pid, SIGCONT); if (detach_failed) { LOG("debuggerd committing suicide to free the zombie!\n"); kill(getpid(), SIGKILL); } }
通过read_request读到了进程的pid,uid等后通过ptrace(PTRACE_ATTACH, request.tid, 0,
0),将debuggerd挂
在了crash进程上这样就可以控制crash进程的信息了,PTRACE_ATTACH会向被调试进程发送SIGSTOP,crash进程停止
TEMP_FAILURE_RETRY(write(fd, "\0", 1)) != 1
会向客户端发送数据,但是这时crash进程停止了所以停止接受数据
此时debuggerd进程等待crash进程发来的信号 首先进入 case SIGSTOP 后通过request可以判断出 执行如下代码
XLOG("stopped -- continuing\n"); status = ptrace(PTRACE_CONT, request.tid, 0, 0); if (status) { LOG("ptrace continue failed: %s\n", strerror(errno)); } LOG("handle_request:loop again"); continue; /* loop again */
此时ptrace(PTRACE_CONT)会将crash激活继续执行,激活后crash进程接收到了数据继续执行后将发送
SIGSEGV(crash的进程发送
此信号),再次被debuggerd接受到后执行switch ,开始执行 engrave_tombstone函数
三)debuggerd的核心处理 (common\system\core\debuggerd\tombstone.c)
engrave_tombstone()这个函数的主要工作是在/data/tombstones/下创建tombstones_xx文
件,然后将寄存器信息
堆栈信息写入进去。通过dump_crash函数写入
static bool dump_crash(log_t* log, pid_t pid, pid_t tid, int signal, bool dump_sibling_threads, int* total_sleep_time_usec, int index) { char date[80]; time_t now = time(NULL); strftime(date, sizeof(date), "%Y-%m-%d %H:%M:%S", localtime(&now)); _LOG(log, false, "debuggerd: %s\n", date); _LOG(log, false, "*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"); dump_build_info(log); dump_thread_info(log, pid, tid, true, index, signal); dump_fault_addr(log, tid, signal); ptrace_context_t* context = load_ptrace_context(tid); dump_thread(context, log, tid, true, total_sleep_time_usec); dump_maps(log, pid); dump_smaps(log, pid); dump_status(log, pid); dump_logs(log, pid, true); if (dump_sibling_threads) { detach_failed = dump_sibling_thread_report(context, log, pid, tid, total_sleep_time_usec); } free_ptrace_context(context); return detach_failed; }
以上dump_xxx函数中会调用__LOG(log,xxxxx);函数,此函数中会通过打开log参数中的文件句柄来执行write操作将log写入文件 LOG函数如下:
void _LOG(log_t* log, bool in_tombstone_only, const char *fmt, ...) { char buf[512]; va_list ap; va_start(ap, fmt); if (log && log->tfd >= 0) { int len; vsnprintf(buf, sizeof(buf), fmt, ap); len = strlen(buf); write(log->tfd, buf, len); } if (!in_tombstone_only && (!log || !log->quiet)) { __android_log_vprint(ANDROID_LOG_INFO, "DEBUG", fmt, ap); } va_end(ap); }
所以可以通过in_tombstone_only来决定是否执行_android_log_vprint,进而决定是否输出到 logcat