回写路径
2017-01-01
方式1. 前期tag dirty
linux-3.10.86/mm/page-writeback.c
write_cache_pages
{
while (...) {
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
...
lock_page(page);
...
/*常见的有__mpage_writepage 或者 __writepage*/
ret = (*writepage)(page, wbc, data);
}
__filemap_fdatawrite_range
| |--do_writepages
| | |--mapping->a_ops->writepages or generic_writepages
| | | |--write_cache_pages
比如fsync
do_fsync -> vfs_fsync -> vfs_fsync_range -> file->f_op->fsync -> ext2_fsync
-> generic_file_fsync
|--filemap_write_and_wait_range -> __filemap_fdatawrite_range
|--sync_mapping_buffers
|--sync_inode_metadata
| |--struct writeback_control wbc ....
比如 linux-3.10.86/fs/sync.c
sync_file_range -> filemap_fdatawrite_range -> __filemap_fdatawrite_range
方式2. BDI回写
2.1 触发回写
linux-3.10.86/mm/backing-dev.c
default_bdi_init
|--- bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
WQ_UNBOUND | WQ_SYSFS, 0);
bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
|--INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
linux-3.10.86/fs/fs-writeback.c
bdi_writeback_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(to_delayed_work(work),
struct bdi_writeback, dwork);
...
/*任务还没完成, 让本函数(bdi_writeback_workfn)再跑跑*/
if (!list_empty(&bdi->work_list))
mod_delayed_work(bdi_wq, &wb->dwork, 0);
...
}
通过以下函数 可以让 bdi_writeback_workfn()
跑起来:
flush_delayed_work
mod_delayed_work_on
mod_delayed_work
queue_delayed_work_on
queue_delayed_work
...
常用的封装过的函数是:
linux-3.10.86/fs/fs-writeback.c
bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
...
list_add_tail(&work->list, &bdi->work_list);
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
}
调用bdi_queue_work
的有:
__bdi_start_writeback()
writeback_inodes_sb_nr()
sync_inodes_sb()
wakeup_flusher_threads -> __bdi_start_writeback
linux-3.10.86/fs/sync.c
sync -> wakeup_flusher_threads
linux-3.10.86/fs/buffer.c
free_more_memory -> wakeup_flusher_threads
linux-3.10.86/mm/vmscan.c
do_try_to_free_pages -> wakeup_flusher_threads
linux-3.10.86/fs/sync.c
syncfs -> sync_filesystem -> __sync_filesystem -> writeback_inodes_sb -> writeback_inodes_sb_nr
所以, 大体上是两类, 一类是内存不足触发的, 一类是同步文件系统触发的.
2.2 kupdate
__mark_inode_dirty -> bdi_wakeup_thread_delayed -> queue_delayed_work
2.3 前期dirty inode
在触发回写线程前, b_dirty
, b_io
, b_more_io
等链表 还是要非空的.
__set_page_dirty
|-- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
| |--list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
generic_write_end
|--if (i_size_changed) mark_inode_dirty(inode);
| |--__mark_inode_dirty(inode, I_DIRTY);
generic_file_direct_write
|--i_size_write
|--mark_inode_dirty
| |--__mark_inode_dirty(inode, I_DIRTY);
...
struct bdi_writeback {
...
struct list_head b_dirty; /* dirty inodes */
/*
这个是常见的套路, 一次性把b_dirty中合适的(过期的)node挪到b_io中,
然后其他人可以继续往链表中b_dirty添加, 我们只需处理链表b_io上的inode,可以减少锁的overhead.
另一方面次要的原因是 可以避免有人一直往链表b_dirty中添加,
导致写个不停, 不过这个可以通过限制每次写的数量来避免.
*/
struct list_head b_io; /* parked for writeback */
/*
writeback_sb_inodes
|--if inode希望I_SYNC, 但回写控制不是, 放入b_more_io
requeue_inode
{
if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
if (wbc->nr_to_write <= 0) {
requeue_io(inode, wb);
}
...
}
}
还有page要写, 不过配额用光了, 则放入b_more_io.
*/
struct list_head b_more_io; /* parked for more writeback */
}
本文地址: https://awakening-fong.github.io/posts/io/writeback_routine
转载请注明出处: https://awakening-fong.github.io
若无法评论, 请打开JavaScript, 并通过proxy.
blog comments powered by Disqus