撰写不易,转载需注明出处:http://blog.csdn.net/jscese/article/details/46699117本文来自 【jscese】的博客!


kernel支持:

由linux支持,进程崩溃时记录存储堆栈空间,寄存器等相关内容,保留致命现场数据,便于分析查找根源。
编译项:

Symbol: COREDUMP [=y] 
Type  : boolean
Prompt: Enable core dump support
  Location:
(1) -> Userspace binary formats
 Defined at fs/Kconfig.binfmt:182  

/kernel/signal.c中的信号处理函数:get_signal_to_deliver中有这么一段:

        if (sig_kernel_coredump(signr)) {
            if (print_fatal_signals)
                print_fatal_signal(info->si_signo);
            proc_coredump_connector(current);
            /*
             * If it was able to dump core, this kills all
             * other threads in the group and synchronizes with
             * their demise.  If we lost the race with another
             * thread getting here, it set group_exit_code
             * first and our do_group_exit call below will use
             * that value and ignore the one we pass it.
             */
            do_coredump(info);
        }

展开宏如下:

(((signr) < 32) && ((1UL << ((signr) - 1)) & ((\
        (1UL << ((3) - 1))   |  (1UL << ((4) - 1))    | \
    (1UL << ((5) - 1))   |  (1UL << ((6) - 1))   | \
        (1UL << ((8) - 1))    |  (1UL << ((11) - 1))   | \
    (1UL << ((10) - 1))    |  (1UL << ((12) - 1))    | \
        (1UL << ((24) - 1))   |  (1UL << ((25) - 1))   | \
    (1UL << ((7) - 1))      

可对照signal列表查看,

do_coredump函数实现在/fs/coredump.c

void do_coredump(siginfo_t *siginfo)
{
...
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
...
    struct coredump_params cprm = {
        .siginfo = siginfo,
        .regs = signal_pt_regs(),
        .limit = rlimit(RLIMIT_CORE),  //获取当前进程的rlimit
        /*
         * We must use the same mm->flags while dumping core to avoid
         * inconsistency of bit flags, since this flag is not protected
         * by any locks.
         */
        .mm_flags = mm->flags,
    };
...
binfmt = mm->binfmt;
...
    if (cprm.limit < binfmt->min_coredump)    //判断rlimit 必须大于定的一个最小值: 
            goto fail_unlock;
...
}
这个最小值定义在binfmt_elf.c中
static struct linux_binfmt elf_format = {
    .module     = THIS_MODULE,
    .load_binary    = load_elf_binary,
    .load_shlib = load_elf_library,
    .core_dump  = elf_core_dump,
    .min_coredump   = ELF_EXEC_PAGESIZE,
};

#define ELF_EXEC_PAGESIZE 4096

kernel中的就先记录这些准备信息,如有需要可再深入查看coredump时抓取存储的细节


Android native 层面配置

上patch:

diff --git a/init.{ro.hardware}.rc b/init.{ro.hardware}.rc
index 8571839..a161071 100755
--- a/init.{ro.hardware}.rc
+++ b/init.ro.hardware.rc
@@ -106,6 +106,10 @@ on fs
mount debugfs none /sys/kernel/debug
+    # config coredump
+   mkdir /data/coredump 0777 system system
+   checkenable /data/coredump/enable
+   # end


diff --git a/init/builtins.c b/init/builtins.c
index 81f9b2c..c21a192 100755
--- a/init/builtins.c
+++ b/init/builtins.c
@@ -949,3 +949,60 @@ int do_wait(int nargs, char **args)
     } else
         return -1;
 }
+
+/*(jiangbin: command check enable from file;operation )*/
+int do_checkenable(int nargs, char **args) {
+
+   int fd,len;
+   char buf[10];
+
+
+   ERROR("do_checkenable in init");
+    if (nargs == 2) {
+
+       if((fd = open(args[1], O_RDONLY|O_CREAT, 0664)) < 0)
+       {
+           return -1;
+       }
+
+       len = read(fd, buf, sizeof buf);
+       if (len < 0) {
+           close (fd);
+           return -1;
+       }
+       close (fd);
+       buf[len] = '\0';
+       if(atoi(buf)==1) /*is enable*/
+       {
+           if(strcmp(args[1],"/data/coredump/enable")==0)
+           {
+               struct rlimit coredump;
+               memset(&coredump, 0, sizeof(struct rlimit));
+               coredump.rlim_cur = RLIM_INFINITY;
+               coredump.rlim_max = RLIM_INFINITY;
+               if(setrlimit(RLIMIT_CORE, &coredump)==0)
+               {
+                   NOTICE("in init command do_checkenable coredump cur==%lu , max==%lu pid==%lu\n",coredump.rlim_cur,coredump.rlim_max, getpid());
+               }else
+               {
+                   ERROR("setrlimit unlimit fail");
+                   return -1;
+               }
+               int iret=0;
+               iret=write_file("/proc/sys/kernel/core_pattern","/data/coredump/core.%e.%p.%s");
+               iret=write_file("/proc/sys/fs/suid_dumpable","1");
+               return iret;
+
+
+           }
+
+       }
+
+
+       return 0;
+
+    }
+    return -1;
+
+}
+/*end*/
diff --git a/init/init_parser.c b/init/init_parser.c
old mode 100644
new mode 100755
index 6466db2..569b910
--- a/init/init_parser.c
+++ b/init/init_parser.c
@@ -89,6 +89,7 @@ static int lookup_keyword(const char *s)
         if (!strcmp(s, "hown")) return K_chown;
         if (!strcmp(s, "hmod")) return K_chmod;
         if (!strcmp(s, "ritical")) return K_critical;
+        if (!strcmp(s, "heckenable")) return K_checkenable;/*jiangbin add for checkenable*/
         break;
     case 'd':
         if (!strcmp(s, "isabled")) return K_disabled;
diff --git a/init/keywords.h b/init/keywords.h
old mode 100644
new mode 100755
index 2d97e5b..cf8792d
--- a/init/keywords.h
+++ b/init/keywords.h
@@ -41,6 +41,7 @@ int do_loglevel(int nargs, char **args);
 int do_load_persist_props(int nargs, char **args);
 int do_load_all_props(int nargs, char **args);
 int do_wait(int nargs, char **args);
+int do_checkenable(int nargs, char **args);
 #define __MAKE_KEYWORD_ENUM__
 #define KEYWORD(symbol, flags, nargs, func) K_##symbol,
 enum {
@@ -104,6 +105,7 @@ enum {
     KEYWORD(load_persist_props,    COMMAND, 0, do_load_persist_props)
     KEYWORD(load_all_props,        COMMAND, 0, do_load_all_props)
     KEYWORD(ioprio,      OPTION,  0, 0)
+    KEYWORD(checkenable,        COMMAND, 1, do_checkenable)
 #ifdef __MAKE_KEYWORD_ENUM__
     KEYWORD_COUNT,
 };


作为开关添加了一个checkenable command 方便修改操作.

最终生成core文件 /data/coredump/core.%e.%p.%s 含义:

%p 出Core进程的PID
%u 出Core进程的UID
%s 造成Core的signal号
%t 出Core的时间,从1970-01-0100:00:00开始的秒数
%e 出Core进程对应的可执行文件名

可使用ulimit -c 查看


Android Application 层配置:

按道理init进程按照上面那样设置之后,它的子进程zygote应该也是具备coredump能力的,自然fork的app进程也是
但是发现在zygote fork进程之后进行的:

    private static void callPostForkChildHooks(int debugFlags, String instructionSet) {
        long startTime = SystemClock.elapsedRealtime();
        VM_HOOKS.postForkChild(debugFlags, instructionSet);
        checkTime(startTime, "Zygote.callPostForkChildHooks");
    }

中调用到 dalvik.system.ZygoteHooks 中进一步初始化
libcore/dalvik/src/main/java/dalvik/system/ZygoteHooks.java
native:
/art/runtime/native/dalvik_system_ZygoteHooks.cc

调用逻辑不多描述其中会进入这个函数:

static void EnableDebugger() {
  // To let a non-privileged gdbserver attach to this
  // process, we must set our dumpable flag.
#if defined(HAVE_PRCTL)
  if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
    PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
  }
#endif
  // We don't want core dumps, though, so set the core dump size to 0.
  rlimit rl;
  rl.rlim_cur = 0;
  rl.rlim_max = RLIM_INFINITY;
  if (setrlimit(RLIMIT_CORE, &rl) == -1) {
    PLOG(ERROR) << "setrlimit(RLIMIT_CORE) failed for pid " << getpid();
  }
}

所以zygote fork出来的process 最后都是current rlimit_core 为0
导致无法coredump

解决办法patch如下:

diff --git a/core/jni/com_android_internal_os_Zygote.cpp b/core/jni/com_android_internal_os_Zygote.cpp
old mode 100644
new mode 100755
index 4f5e08b..f9782c3
--- a/core/jni/com_android_internal_os_Zygote.cpp
+++ b/core/jni/com_android_internal_os_Zygote.cpp
@@ -35,6 +35,7 @@
 #include <sys/utsname.h>
 #include <sys/wait.h>

+#include <cutils/properties.h>

 #include <cutils/fs.h>
 #include <cutils/multiuser.h>
@@ -165,6 +166,26 @@ static void SetGids(JNIEnv* env, jintArray javaGids) {
   }
 }
-       delete se_name; 

+
+/*actions_code(jiangbin:native interface to setrlimit for app process:coredump)*/
+static void Zygote_nativesetrlimit()
+{
+   struct rlimit coredump;
+   memset(&coredump, 0, sizeof(struct rlimit));
+   coredump.rlim_cur = RLIM_INFINITY;
+   coredump.rlim_max = RLIM_INFINITY;
+   if(setrlimit(RLIMIT_CORE, &coredump)==0)
+   {
+       ALOGD("in zygotejni setunlimit cur==%lu , max==%lu pid==%lu\n",coredump.rlim_cur,coredump.rlim_max, getpid());
+   }else
+   {
+       ALOGE("setrlimit unlimit fail in zygotejni");
+   }
+}
+/*end*/
+
+
+
 // Sets the resource limits via setrlimit(2) for the values in the
 // two-dimensional array of integers that's passed in. The second dimension
 // contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
@@ -577,6 +598,17 @@ static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArra
       ALOGE("Error calling post fork hooks.");
       RuntimeAbort(env);
     }
+
+    /*actions_code(jiangbin:to setrlimit for app process:coredump after PostForkChildHooks)*/
+    char propcoredump[PROPERTY_VALUE_MAX];
+    property_get("persist.sys.zygotedump", propcoredump, "");
+   if(strstr(propcoredump,se_name_c_str)!=NULL)
+   {
+      Zygote_nativesetrlimit();
+   }
+   /*end*/
+       delete se_name; 
+
   } else if (pid > 0) {
     // the parent process
   }

同样留有开关用于控制~
下篇记录 coredump 文件解析流程

GitHub 加速计划 / li / linux-dash
10.39 K
1.2 K
下载
A beautiful web dashboard for Linux
最近提交(Master分支:2 个月前 )
186a802e added ecosystem file for PM2 4 年前
5def40a3 Add host customization support for the NodeJS version 4 年前
Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐