信号 2017-02-26
1. 发送
以tkill为例来说明.
linux-3.10.86/kernel/signal.c
假定不是发送给组的, 即__send_signal()
的@group为0
do_tkill -> do_send_specific
|--find_task_by_vpid
|--check_kill_permission
|--do_send_sig_info -> send_signal
| |--__send_signal
__send_signal
|--q=__sigqueue_alloc //new sigqueue instance
|--list_add_tail(&q->list, &pending->list);
|--设置q->info的各域
| |--q->info.si_signo =
| |--q->info.si_pid =
|--complete_signal
| |--signal_wake_up -> signal_wake_up_state
| | |--set_tsk_thread_flag(t, TIF_SIGPENDING);
| | |--wake_up_state(, |TASK_INTERRUPTIBLE) -> try_to_wake_up
fixup_exception 2017-02-25
1. 什么情况下会调用fixup_exception
linux-3.10.86/arch/arm/mm/fault.c
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
if (!user_mode(regs))
goto no_context;
no_context:
__do_kernel_fault(mm, addr, fsr, regs);
return 0;
}
所以, 用户态(usermode(regs))发起的 读或写 , copy to/from user等不会调用到 _dokernelfault, 也就不会调用fixupexception的. (fixupexception 仅会被 _dokernel_fault调用.)
do_fork和COW 2017-02-24
1. 问题引入
摘自APUE:
由于在fork之后进程跟随着exec, 所以现在的很多实现并不执行一个父进程数据段, 栈和堆的完全复制. 作为替代, 使用了COW技术. 这些区域由父子进程共享, 而且内核将它们的访问权限改变为只读的. 如果父子进程中的任一个试图修改这些区域, 则内核只为修改区域的那块内存制作一个副本, 通常是虚拟存储系统中的一"页".
问题:哪里设置只读, 如何知道只读?
2. 解
快速tips:
PTE entry is marked as un-writeable.
But VMA is marked as writeable.
copyonepte()会调用 ptepsetwrprotect()
还是先从dup_mm()开始看吧.
linux-3.10.86/kernel/fork.c
mmap和fault handler 2017-02-24
1. 问题引入
我们想知道, 通过mmap映射文件, 然后读取, 是如何读取到文件的, 其中的fault handler相关的内容.
2. ftrace
#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#define MEMSIZE 1024*4
#define MPFILE "./.ash_history"
//#define ON_OFF //tracing_on
void write_ftrace(char *path, char *val)
{
char ftrace_d[50]="/sys/kernel/debug/tracing/";
int len_d=strlen(ftrace_d);
int fd_trace=open(strcat(ftrace_d,path), O_RDWR|O_TRUNC);
if (fd_trace < 0) {
printf("open %s:%s\n",path,strerror(errno));
exit(1);
}
printf("to write:%s %s\n",path,val);
int ret=write(fd_trace, val, strlen(val));
if(ret<0)
{
printf("write %s %s\n",path,strerror(errno));
exit(1);
}
if (0==strncmp("trace",path, sizeof("trace")))
{
close(fd_trace);
return;
}
char val_read[20];
memset(val_read,0,sizeof(val_read));
pread(fd_trace, val_read, sizeof(val_read),0);
printf("read val:%s\n",val_read);
close(fd_trace);
}
void write_val(char *path, char *val)
{
int fd=open(path, O_RDWR);
if (fd < 0) {
printf("open %s:%s\n",path,strerror(errno));
exit(1);
}
printf("to write:%s %s\n",path, val);
int ret=write(fd, val, strlen(val));
if(ret<0)
{
printf("write %s %s\n",path,strerror(errno));
exit(1);
}
char val_read[20];
memset(val_read,0,sizeof(val_read));
pread(fd, val_read, sizeof(val_read),0);
printf("read val:%s\n",val_read);
close(fd);
}
int main()
{
char *ptr;
int fd;
int ret=-1;
int i=-1;
fd = open(MPFILE, O_RDWR);
if (fd < 0) {
perror("open()");
exit(1);
}
ptr = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (ptr == NULL) {
perror("malloc()");
exit(1);
}
/*避免page cache干扰*/
if( posix_fadvise(fd,0,4096,POSIX_FADV_DONTNEED) != 0) {
printf("Cache FADV_DONTNEED failed, %s\n",strerror(errno));
}
else {
printf("Cache FADV_DONTNEED done\n");
}
//mount -t debugfs nodev /sys/kernel/debug/
system("mount -t debugfs nodev /sys/kernel/debug/");
/*
~ # echo 1 > /proc/sys/kernel/ftrace_enabled
*/
write_val("/proc/sys/kernel/ftrace_enabled", "1");
int pid_cur=getpid();
printf("pid:%d\n",pid_cur);
char str_pid[25];
memset(str_pid,0, sizeof(str_pid));
snprintf(str_pid, sizeof(str_pid),"%d", pid_cur);
// /sys/kernel/debug/tracing/set_ftrace_pid
write_ftrace("set_ftrace_pid", str_pid);
write_ftrace("tracing_on", "1");
/*# can set other filtering here
echo function > ${ROOT_FTRACE}/current_tracer
*/
write_ftrace("current_tracer", "function");
write_ftrace("trace", "0");
/*
#echo 0 > ${ROOT_FTRACE}/trace
echo start_trace_marker > ${ROOT_FTRACE}/trace_marker
exec $*
*/
//write_ftrace("trace_marker", "start_trace_marker");
i=*ptr; //这个就是我们要跟踪的
/*
echo end_trace_marker > ${ROOT_FTRACE}/trace_marker
echo 0 > ${ROOT_FTRACE}/tracing_on #stop record to buffer
*/
//write_ftrace("trace_marker", "end_trace_marker");
write_ftrace("tracing_on", "0");
system("cat /sys/kernel/debug/tracing/trace > mmap.ftrace");
printf("%x\n", i);
munmap(ptr, MEMSIZE);
close(fd);
exit(1);
}
关于THREAD_START_SP 2017-02-23
1. 问题引入
https://awakening-fong.github.io/posts/arm/arm_qemu_02 中说到 startkernel前设置sp为 initthreadunion + THREADSTART_SP
#define THREAD_START_SP (THREAD_SIZE - 8)
为何要-8?
2. 解
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm ("sp");
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
(图示中A点到B点 大小是8字节)
如果sp是SP0, 那么,(sp & ~(THREADSIZE - 1)) 后还是指向SP0 (A点);
如果sp是SP1, 那么, (sp & ~(THREADSIZE - 1)) 后指向的是thread_info (C点).
所以, 需要-8.