(kernel 2.6.24 )
page cache:是块设备(硬盘等)与vfs之间的一个缓存。由于cpu访问内存的速度远高于访问硬盘等外设的速度。读取设备读取设备上一个文件时候预读到内存,可以提高系统I/O速度.管理page cache的数据结构:address_space通过radix_tree管理cache,在radix_tree的帮助下可以通过index高效的查找cache。
file:include/linux/fs.h
497 struct address_space {
498 struct inode *host; /* owner: inode, block_device */
499 struct radix_tree_root page_tree; /* radix tree of all pages */
500 rwlock_t tree_lock; /* and rwlock protecting it */
501 unsigned int i_mmap_writable;/* count VM_SHARED mappings */
502 struct prio_tree_root i_mmap; /* tree of private and shared mappings */
503 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
504 spinlock_t i_mmap_lock; /* protect tree, count, list */
505 unsigned int truncate_count; /* Cover race condition with truncate */
506 unsigned long nrpages; /* number of total pages */
507 pgoff_t writeback_index;/* writeback starts here */
508 const struct address_space_operations *a_ops; /* 操作函数methods */
509 unsigned long flags; /* error bits/gfp mask */
510 struct backing_dev_info *backing_dev_info; /* device readahead, etc */
511 spinlock_t private_lock; /* for use by the address_space */
512 struct list_head private_list; /* ditto */
513 struct address_space *assoc_mapping; /* ditto */
514 } __attribute__((aligned(sizeof(long))));
file:include/linux/fs.h
442 struct address_space_operations {
443 int (*writepage)(struct page *page, struct writeback_control *wbc);
444 int (*readpage)(struct file *, struct page *);
445 void (*sync_page)(struct page *);
446
447 /* Write back some dirty pages from this mapping. */
448 int (*writepages)(struct address_space *, struct writeback_control *);
449
450 /* Set a page dirty. Return true if this dirtied it */
451 int (*set_page_dirty)(struct page *page);
452
453 int (*readpages)(struct file *filp, struct address_space *mapping,
454 struct list_head *pages, unsigned nr_pages);
455
456 /*
457 * ext3 requires that a successful prepare_write() call be followed
458 * by a commit_write() call - they must be balanced
459 */
460 int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
461 int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
462
463 int (*write_begin)(struct file *, struct address_space *mapping,
464 loff_t pos, unsigned len, unsigned flags,
465 struct page **pagep, void **fsdata);
466 int (*write_end)(struct file *, struct address_space *mapping,
467 loff_t pos, unsigned len, unsigned copied,
468 struct page *page, void *fsdata);
469
470 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
471 sector_t (*bmap)(struct address_space *, sector_t);
472 void (*invalidatepage) (struct page *, unsigned long);
473 int (*releasepage) (struct page *, gfp_t);
474 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
475 loff_t offset, unsigned long nr_segs);
476 struct page* (*get_xip_page)(struct address_space *, sector_t,
477 int);
478 /* migrate the contents of a page to the specified target */
479 int (*migratepage) (struct address_space *,
480 struct page *, struct page *);
481 int (*launder_page) (struct page *);
482 };
index:是数据在外设中的偏移量(或者理解为地址)
radix_tree:
file:include/linux/radix-tree.h
60 /* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
61 struct radix_tree_root {
62 unsigned int height; /*整棵数高度*/
63 gfp_t gfp_mask; /*新建radix_tree_node时候的mask*/
64 struct radix_tree_node *rnode;
65 };
file:include/linux/radix-tree.h
49 struct radix_tree_node {
50 unsigned int height; /* Height from the bottom */
51 unsigned int count; /* slots中非NULL项个数*/
52 struct rcu_head rcu_head;
/**
* 通过index位移得到offset,通过offset访问slots
* 有三种结果:NULL,page*,radix_tree_node *.
*/
53 void *slots[RADIX_TREE_MAP_SIZE]; /*存放page指针或者下级node指针*/
/*存放两个标志dirty,write_back,如果对应dirty标志设置,说明对应节点或者子树有脏页*/
54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];/*存放flag*/
55 };
--------------------------------------------------------------------------------------
37 #ifdef __KERNEL__
//index中每次的偏移量(一般设置为6)
38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
39 #else
40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
41 #endif
42//每个radix_tree_node中slots数组的数据个数
43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
//偏移mask
44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
45//按long对齐,每个slot对应一个bit,计算需要多少个long
46 #define RADIX_TREE_TAG_LONGS \
47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
_______________________________________________________________________________
| count |
|-------------------------------------------------------------------------------|
| * | * |NULL| | | | | | |..... .....| | | | | | | | | | | |slots (三种可能的值
|-------------------------------------------------------------------------------|
|_________________32bit___________________|______________32bit__________________|tags
|_________________32bit___________________|______________32bit__________________|
file:lib/radix-tree.c
1090 void __init radix_tree_init(void)
1091 {
//在slab中建立cache
//radix_tree_node存放在slab中
1092 radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1093 sizeof(struct radix_tree_node), 0,
1094 SLAB_PANIC, radix_tree_node_ctor);
1095 radix_tree_init_maxindex();
1096 hotcpu_notifier(radix_tree_callback, 0);
1097 }
file:mm/filemap.c
591 /**
592 * find_get_page - find and get a page reference
593 * @mapping: the address_space to search
594 * @offset: the page index
595 *
596 * Is there a pagecache struct page at the given (mapping, offset) tuple?
597 * If yes, increment its refcount and return it; if no, return NULL.
598 */
599 struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
600 {
601 struct page *page;
602
603 read_lock_irq(&mapping->tree_lock);
604 page = radix_tree_lookup(&mapping->page_tree, offset);
605 if (page)
606 page_cache_get(page);/*增加count*/
607 read_unlock_irq(&mapping->tree_lock);
608 return page;
609 }
610 EXPORT_SYMBOL(find_get_page);
file:lib/radix-tree.c
391 /**
392 * radix_tree_lookup - perform lookup operation on a radix tree
393 * @root: radix tree root
394 * @index: index key
395 *
396 * Lookup the item at the position @index in the radix tree @root.
397 *
398 * This function can be called under rcu_read_lock, however the caller
399 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
400 * them safely). No RCU barriers are required to access or modify the
401 * returned item, however.
402 */
403 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
404 {
405 unsigned int height, shift;
406 struct radix_tree_node *node, **slot;
407
408 node = rcu_dereference(root->rnode);/*rcu保护*/
409 if (node == NULL)
410 return NULL;
411
412 if (!radix_tree_is_indirect_ptr(node)) {
413 if (index > 0)
414 return NULL;
415 return node;
416 }
417 node = radix_tree_indirect_to_ptr(node);
418
419 height = node->height;
420 if (index > radix_tree_maxindex(height))
421 return NULL;
422
423 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
424 /*通过index查找slots得到node*/
425 do {
426 slot = (struct radix_tree_node **)
427 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
428 node = rcu_dereference(*slot);
429 if (node == NULL) /*说明没有在内存中*/
430 return NULL;
431
432 shift -= RADIX_TREE_MAP_SHIFT;
433 height--;
434 } while (height > 0);
435
436 return node;
437 }
438 EXPORT_SYMBOL(radix_tree_lookup);
Trackback: http://tb.donews.net/TrackBack.aspx?PostId=1226349