探测softlockup的原理 2017-06-10

本文以2.6.18内核来说明.

1. 检测

每间隔HZ, 会喂watchdog.

tick                         watchdog
|                              |
|           wakup              |
|- - - - -  - - -  - - - >     |
|check                         | touch
|                              |
|                              |
|           wakup              |
|- - - - -  - - -  - - - >     |
|check                         | touch
|                              |
|                              |

若watchdog线程迟迟没有被调度到, 则导致watchdog没有touch, 这样, tick 时钟会检查到该问题, 报 soft lockup .

为何中断handler中可以使用current 2017-05-13

本博文仅提出问题, 不给出答案. 祝玩得开心.

问题: 我们知道 当前task可以通过current来获得. ARM上, 其实现如下:

#define current (get_current()) 
#define get_current() (current_thread_info()->task)
static inline struct thread_info *current_thread_info(void)
{
    register unsigned long sp asm ("sp");
    return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}

对于ARM, 其sp是banked的, irq模式和SVC模式的sp不是同一个. 这样, 中断handler还能通过sp来获得当前task吗?

以下内容是本问题的提示.
为了避免干扰各位思考, 已设置背景色为黑色, 若要查看文字, 可通过按ctrl+a等方法查看.

中断handler在SVC模式下执行的.

信号 2017-02-26

1. 发送

以tkill为例来说明. linux-3.10.86/kernel/signal.c 假定不是发送给组的, 即__send_signal()的@group为0

do_tkill -> do_send_specific
|--find_task_by_vpid
|--check_kill_permission
|--do_send_sig_info -> send_signal
|   |--__send_signal


__send_signal
|--q=__sigqueue_alloc //new sigqueue instance
|--list_add_tail(&q->list, &pending->list);
|--设置q->info的各域
|   |--q->info.si_signo =
|   |--q->info.si_pid =
|--complete_signal
|   |--signal_wake_up -> signal_wake_up_state
|   |   |--set_tsk_thread_flag(t, TIF_SIGPENDING);
|   |   |--wake_up_state(, |TASK_INTERRUPTIBLE) -> try_to_wake_up

fixup_exception 2017-02-25

1. 什么情况下会调用fixup_exception

linux-3.10.86/arch/arm/mm/fault.c

static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{

    if (!user_mode(regs))
        goto no_context;


no_context:
    __do_kernel_fault(mm, addr, fsr, regs);
    return 0;
}

所以, 用户态(user_mode(regs))发起的 读或写 , copy to/from user等*不会*调用到 __do_kernel_fault, 也就不会调用fixup_exception的. (fixup_exception 仅会被 __do_kernel_fault调用.)

mmap和fault handler 2017-02-24

1. 问题引入

我们想知道, 通过mmap映射文件, 然后读取, 是如何读取到文件的, 其中的fault handler相关的内容.

2. ftrace

#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

#define MEMSIZE 1024*4
#define MPFILE "./.ash_history"

//#define ON_OFF //tracing_on

void write_ftrace(char *path, char *val)
{
    char ftrace_d[50]="/sys/kernel/debug/tracing/";
    int len_d=strlen(ftrace_d);


    int fd_trace=open(strcat(ftrace_d,path), O_RDWR|O_TRUNC);
    if (fd_trace < 0) {
        printf("open %s:%s\n",path,strerror(errno));
        exit(1);
    }

    printf("to write:%s %s\n",path,val);

    int ret=write(fd_trace, val, strlen(val));
    if(ret<0)
    {
        printf("write %s %s\n",path,strerror(errno));
        exit(1);
    }


    if (0==strncmp("trace",path, sizeof("trace")))
    {
        close(fd_trace);
        return;
    }

    char val_read[20];
    memset(val_read,0,sizeof(val_read));
    pread(fd_trace, val_read, sizeof(val_read),0);
    printf("read val:%s\n",val_read);
    close(fd_trace);

}

void write_val(char *path, char *val)
{
    int fd=open(path, O_RDWR);
    if (fd < 0) {
        printf("open %s:%s\n",path,strerror(errno));
        exit(1);
    }

    printf("to write:%s %s\n",path, val);
    int ret=write(fd, val, strlen(val));
    if(ret<0)
    {
        printf("write %s %s\n",path,strerror(errno));
        exit(1);
    }

    char val_read[20];
    memset(val_read,0,sizeof(val_read));
    pread(fd, val_read, sizeof(val_read),0);
    printf("read val:%s\n",val_read);
    close(fd);
}


int main()
{
    char *ptr;
    int fd;
    int ret=-1;
    int i=-1;

    fd = open(MPFILE, O_RDWR);
    if (fd < 0) {
        perror("open()");
        exit(1);
    }

    ptr = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
    if (ptr == NULL) {
        perror("malloc()");
        exit(1);
    }

    /*避免page cache干扰*/
    if( posix_fadvise(fd,0,4096,POSIX_FADV_DONTNEED) != 0) {  
        printf("Cache FADV_DONTNEED failed, %s\n",strerror(errno));  
    }  
    else {  
        printf("Cache FADV_DONTNEED done\n");  
    }  

    //mount -t debugfs nodev /sys/kernel/debug/
    system("mount -t debugfs nodev /sys/kernel/debug/");



    /*
    ~ # echo 1 > /proc/sys/kernel/ftrace_enabled
    */

    write_val("/proc/sys/kernel/ftrace_enabled", "1");

    int pid_cur=getpid();
    printf("pid:%d\n",pid_cur);
    char str_pid[25];
    memset(str_pid,0, sizeof(str_pid));
    snprintf(str_pid, sizeof(str_pid),"%d", pid_cur);
    //  /sys/kernel/debug/tracing/set_ftrace_pid
    write_ftrace("set_ftrace_pid", str_pid);


    write_ftrace("tracing_on", "1");


    /*# can set other filtering here
    echo function > ${ROOT_FTRACE}/current_tracer
    */
    write_ftrace("current_tracer", "function");

    write_ftrace("trace", "0");

    /*
    #echo 0 > ${ROOT_FTRACE}/trace
    echo start_trace_marker > ${ROOT_FTRACE}/trace_marker
    exec $* 
    */

    //write_ftrace("trace_marker", "start_trace_marker");

   i=*ptr;  //这个就是我们要跟踪的


    /*
    echo end_trace_marker > ${ROOT_FTRACE}/trace_marker
    echo 0  > ${ROOT_FTRACE}/tracing_on  #stop record to buffer
    */

    //write_ftrace("trace_marker", "end_trace_marker");

    write_ftrace("tracing_on", "0");
    system("cat /sys/kernel/debug/tracing/trace > mmap.ftrace");

    printf("%x\n", i);


    munmap(ptr, MEMSIZE);
    close(fd);

    exit(1);
}