来源:http://bbs.chinaunix.net/archiver/?tid-1925448.html
Btrfs的磁盘结构
Btrfs超级块磁盘结构
超级块记录着一个分区的整体信息。
Btrfs磁盘super_block结构用btrfs_super_block来描述,其定义为:
---------------------------------------------------------------------
fs/btrfs/ctree.h
330 struct btrfs_super_block {
331 u8 csum;
332 /* the first 4 fields must match struct btrfs_header */
333 u8 fsid; /* FS specific uuid */
334 __le64 bytenr; /* this block number */
335 __le64 flags;
336
337 /* allowed to be different from the btrfs_header from here own down */
338 __le64 magic;
339 __le64 generation;
340 __le64 root;
341 __le64 chunk_root;
342 __le64 log_root;
343
344 /* this will help find the new super based on the log root */
345 __le64 log_root_transid;
346 __le64 total_bytes;
347 __le64 bytes_used;
348 __le64 root_dir_objectid;
349 __le64 num_devices;
350 __le32 sectorsize;
351 __le32 nodesize;
352 __le32 leafsize;
353 __le32 stripesize;
354 __le32 sys_chunk_array_size;
355 __le64 chunk_root_generation;
356 __le64 compat_flags;
357 __le64 compat_ro_flags;
358 __le64 incompat_flags;
359 __le16 csum_type;
360 u8 root_level;
361 u8 chunk_root_level;
362 u8 log_root_level;
363 struct btrfs_dev_item dev_item;
364
365 char label;
366
367 /* future expansion */
368 __le64 reserved;
369 u8 sys_chunk_array;
370 } __attribute__ ((__packed__));
---------------------------------------------------------------------
每个磁盘超级块中都包含一个设备item,其定义如下:
---------------------------------------------------------------------
fs/btrfs/ctree.h
184 struct btrfs_dev_item {
185 /* the internal btrfs device id */
186 __le64 devid;
187
188 /* size of the device */
189 __le64 total_bytes;
190
191 /* bytes used */
192 __le64 bytes_used;
193
194 /* optimal io alignment for this device */
195 __le32 io_align;
196
197 /* optimal io width for this device */
198 __le32 io_width;
199
200 /* minimal io size for this device */
201 __le32 sector_size;
202
203 /* type and info about this device */
204 __le64 type;
205
206 /* expected generation for this device */
207 __le64 generation;
208
209 /*
210 * starting byte of this partition on the device,
211 * to allow for stripe alignment in the future
212 */
213 __le64 start_offset;
214
215 /* grouping information for allocation decisions */
216 __le32 dev_group;
217
218 /* seek speed 0-100 where 100 is fastest */
219 u8 seek_speed;
220
221 /* bandwidth 0-100 where 100 is fastest */
222 u8 bandwidth;
223
224 /* btrfs generated uuid for this device */
225 u8 uuid;
226
227 /* uuid of FS who owns this device */
228 u8 fsid;
229 } __attribute__ ((__packed__));
---------------------------------------------------------------------
关于磁盘超级块,还可以看到如下的一些宏:
---------------------------------------------------------------------
fs/btrfs/disk-io.h
22 #define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
23 #define BTRFS_SUPER_INFO_SIZE 4096
24
25 #define BTRFS_SUPER_MIRROR_MAX 3
26 #define BTRFS_SUPER_MIRROR_SHIFT 12
---------------------------------------------------------------------
我们可以看到磁盘超级块在块设备上的偏移为64 * 1024,其大小为4096,并且在磁盘上有3个镜像。
得先了解一下btrfs管理元数据的方式。Btrfs 内部所有的元数据都采用 BTree 管理,拥有良好的可扩展性。 btrfs 内部不同的元数据由不同的 Tree 管理。在 superblock 中,有指针指向这些 BTree 的根。
FS Tree 管理文件相关的元数据,如 inode,dir等; Chunk tree管理设备,每一个磁盘设备都在Chunk Tree中有一个item;Extent Tree管理磁盘空间分配,btrfs每分配一段磁盘空间,便将该磁盘空间的信息插入到Extent tree。查询Extent Tree将得到空闲的磁盘空间信息;checksum Tree 保存数据块的校验和;Tree of tree root保存很多 BTree 的根节点。比如用户每建立一个快照,btrfs 便会创建一个FS Tree。
为了管理所有的树,btrfs 采用 Tree of tree root来保存所有树的根节点,(super_block->s_fs_info = tree_root(struct btrfs_root), tree_root-> fs_info = fs_info(struct btrfs_fs_info)),也就是btrfs_fs_info结构,其定义为:
---------------------------------------------------------------------
fs/btrfs/ctree.h
802 struct btrfs_fs_info {
803 u8 fsid;
804 u8 chunk_tree_uuid;
805 struct btrfs_root *extent_root;
806 struct btrfs_root *tree_root;
807 struct btrfs_root *chunk_root;
808 struct btrfs_root *dev_root;
809 struct btrfs_root *fs_root;
810 struct btrfs_root *csum_root;
811
812 /* the log root tree is a directory of all the other log roots */
813 struct btrfs_root *log_root_tree;
814
815 spinlock_t fs_roots_radix_lock;
816 struct radix_tree_root fs_roots_radix;
817
818 /* block group cache stuff */
819 spinlock_t block_group_cache_lock;
820 struct rb_root block_group_cache_tree;
821
822 struct extent_io_tree freed_extents;
823 struct extent_io_tree *pinned_extents;
824
825 /* logical->physical extent mapping */
826 struct btrfs_mapping_tree mapping_tree;
827
828 u64 generation;
829 u64 last_trans_committed;
988
989 void *bdev_holder;
990 };
---------------------------------------------------------------------
既然说到了Btree,就不能不提btrfs中的一些Btree设施,先来看下extent_buffer。顾名思义,即是extent在内存中的缓冲,它是btrfs文件系统磁盘空间管理的核心,btrfs通过btree来管理各种元数据,比如inode、目录项,等。这些B+树的每一个节点(包括叶子节点和上层节点)都存储在一个单位的extent中,每次要读取元数据或者要向磁盘写入元数据,则通常先先将数据读入extent_buffer或者向extent_buffe写入数据。
---------------------------------------------------------------------
fs/btrfs/extent-io.h
struct extent_buffer {
u64 start;
unsigned long len;
char *map_token;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
struct page *first_page;
unsigned long bflags;
atomic_t refs;
struct list_head leak_list;
struct rb_node rb_node;
spinlock_t lock;
wait_queue_head_t lock_wq;
};
---------------------------------------------------------------------
接下来,来看这一个个的节点的构成,包括上层节点和叶子节点。
每一个树块(叶子或节点)都以header开始。其定义为:
---------------------------------------------------------------------
fs/btrfs/ctree.h
291 struct btrfs_header {
292 /* these first four must match the super block */
293 u8 csum;
294 u8 fsid; /* FS specific uuid */
295 __le64 bytenr; /* which block this node is supposed to live in */
296 __le64 flags;
297
298 /* allowed to be different from the super from here on down */
299 u8 chunk_tree_uuid;
300 __le64 generation;
301 __le64 owner;
302 __le32 nritems;
303 u8 level;
304 } __attribute__ ((__packed__));
---------------------------------------------------------------------
而一个上层节点则由一个结构表示,其定义为:
---------------------------------------------------------------------
fs/btrfs/ctree.h
411 struct btrfs_key_ptr {
412 struct btrfs_disk_key key;
413 __le64 blockptr;
414 __le64 generation;
415 } __attribute__ ((__packed__));
416
417 struct btrfs_node {
418 struct btrfs_header header;
419 struct btrfs_key_ptr ptrs[];
420 } __attribute__ ((__packed__));
---------------------------------------------------------------------
用于管理btrfs各种元数据的各种B+树最大的不同显然就在这个btrfs_leaf的内容了。叶子节点的其定义如下:
---------------------------------------------------------------------
fs/btrfs/ctree.h
389 struct btrfs_item {
390 struct btrfs_disk_key key;
391 __le32 offset;
392 __le32 size;
393 } __attribute__ ((__packed__));
394
402 struct btrfs_leaf {
403 struct btrfs_header header;
404 struct btrfs_item items[];
405 } __attribute__ ((__packed__));
---------------------------------------------------------------------
代码的注释中也有说明,叶子节点的结构大致如下图:
javascript:;
最主要区别就在于Data部分的内容,这部分内容则因树的类型的不同而不同,比如,如果是inode的btree,那么data部分的内容就是btrfs_inode_item结构,等等。
Btrfs磁盘inode结构
Btrfs磁盘上的inode结构用btrfs_inode_item结构来表示,其定义为:
---------------------------------------------------------------------
fs/btrfs/ctree.h
535 struct btrfs_inode_item {
536 /* nfs style generation number */
537 __le64 generation;
538 /* transid that last touched this inode */
539 __le64 transid;
540 __le64 size;
541 __le64 nbytes;
542 __le64 block_group;
543 __le32 nlink;
544 __le32 uid;
545 __le32 gid;
546 __le32 mode;
547 __le64 rdev;
548 __le64 flags;
549
550 /* modification sequence number for NFS */
551 __le64 sequence;
557 __le64 reserved;
558 struct btrfs_timespec atime;
559 struct btrfs_timespec ctime;
560 struct btrfs_timespec mtime;
561 struct btrfs_timespec otime;
562 } __attribute__ ((__packed__));
---------------------------------------------------------------------
上面atime、 ctime、mtime分别记录访问时间、改变时间和修改时间,其类型定义如下:
524 struct btrfs_timespec {
525 __le64 sec;
526 __le32 nsec;
527 } __attribute__ ((__packed__));
Btrfs磁盘目录项结构
Btrfs磁盘目录项使用一个btrfs_dir_item来表示,在磁盘上作为某个Btree的叶子节点的data项而存在,其定义为:
---------------------------------------------------------------------
fs/btrfs/ctree.h
568 struct btrfs_dir_item {
569 struct btrfs_disk_key location;
570 __le64 transid;
571 __le16 data_len;
572 __le16 name_len;
573 u8 type;
574 } __attribute__ ((__packed__));
---------------------------------------------------------------------
上面的location字段,顾名思义,指的应该是文件的位置,可为什么会是btrfs_disk_key,其实它指的是关联的文件在inode Btree中的关键字。Ext2 等文件系统中采用固定分配inode,磁盘目录项中含有文件索引节点号,而btrfs则使用关键字来定位文件。
上面的btrfs_disk_key定义如下:
---------------------------------------------------------------------
fs/btrfs/ctree.h
167 struct btrfs_disk_key {
168 __le64 objectid;
169 u8 type;
170 __le64 offset;
171 } __attribute__ ((__packed__));
172
173 struct btrfs_key {
174 u64 objectid;
175 u8 type;
176 u64 offset;
177 } __attribute__ ((__packed__));
---------------------------------------------------------------------
Btrfs使用一棵chunk数来管理磁盘设备
---------------------------------------------------------------------
fs/btrfs/ctree.h
237 struct btrfs_chunk {
238 /* size of this chunk in bytes */
239 __le64 length;
240
241 /* objectid of the root referencing this chunk */
242 __le64 owner;
243
244 __le64 stripe_len;
245 __le64 type;
246
247 /* optimal io alignment for this chunk */
248 __le32 io_align;
249
250 /* optimal io width for this chunk */
251 __le32 io_width;
252
253 /* minimal io size for this chunk */
254 __le32 sector_size;
255
259 __le16 num_stripes;
260
261 /* sub stripes only matter for raid10 */
262 __le16 sub_stripes;
263 struct btrfs_stripe stripe;
264 /* additional stripes go here */
265 } __attribute__ ((__packed__));
231 struct btrfs_stripe {
232 __le64 devid;
233 __le64 offset;
234 u8 dev_uuid;
235 } __attribute__ ((__packed__));
---------------------------------------------------------------------
在内核代码中还可以见到下面这各种磁盘存储结构,和计算机屏幕对视n久,也没太搞明白他们是干什么的,比如,它们是哪棵树的叶子节点的数据部分等,诸位高人,如果有明白的,还请多多指教。谢谢了。
---------------------------------------------------------------------
fs/btrfs/ctree.h
455 struct btrfs_extent_item {
456 __le64 refs;
457 __le64 generation;
458 __le64 flags;
459 } __attribute__ ((__packed__));
---------------------------------------------------------------------
---------------------------------------------------------------------
fs/btrfs/ctree.h
461 struct btrfs_extent_item_v0 {
462 __le32 refs;
463 } __attribute__ ((__packed__));
476 struct btrfs_tree_block_info {
477 struct btrfs_disk_key key;
478 u8 level;
479 } __attribute__ ((__packed__));
480
481 struct btrfs_extent_data_ref {
482 __le64 root;
483 __le64 objectid;
484 __le64 offset;
485 __le32 count;
486 } __attribute__ ((__packed__));
487
488 struct btrfs_shared_data_ref {
489 __le32 count;
490 } __attribute__ ((__packed__));
491
492 struct btrfs_extent_inline_ref {
493 u8 type;
494 __le64 offset;
495 } __attribute__ ((__packed__));
---------------------------------------------------------------------
---------------------------------------------------------------------
fs/btrfs/ctree.h
498 struct btrfs_extent_ref_v0 {
499 __le64 root;
500 __le64 generation;
501 __le64 objectid;
502 __le32 count;
503 } __attribute__ ((__packed__));
---------------------------------------------------------------------
---------------------------------------------------------------------
fs/btrfs/ctree.h
510 struct btrfs_dev_extent {
511 __le64 chunk_tree;
512 __le64 chunk_objectid;
513 __le64 chunk_offset;
514 __le64 length;
515 u8 chunk_tree_uuid;
516 } __attribute__ ((__packed__));
---------------------------------------------------------------------
---------------------------------------------------------------------
fs/btrfs/ctree.h
518 struct btrfs_inode_ref {
519 __le64 index;
520 __le16 name_len;
521 /* name goes here */
522 } __attribute__ ((__packed__));
---------------------------------------------------------------------
---------------------------------------------------------------------
fs/btrfs/ctree.h
564 struct btrfs_dir_log_item {
565 __le64 end;
566 } __attribute__ ((__packed__));
---------------------------------------------------------------------
---------------------------------------------------------------------
fs/btrfs/ctree.h
594 struct btrfs_root_ref {
595 __le64 dirid;
596 __le64 sequence;
597 __le16 name_len;
598 } __attribute__ ((__packed__));
---------------------------------------------------------------------
---------------------------------------------------------------------
fs/btrfs/ctree.h
604 struct btrfs_file_extent_item {
608 __le64 generation;
616 __le64 ram_bytes;
617
625 u8 compression;
626 u8 encryption;
627 __le16 other_encoding; /* spare for later use */
630 u8 type;
631
636 __le64 disk_bytenr;
637 __le64 disk_num_bytes;
645 __le64 offset;
650 __le64 num_bytes;
651
652 } __attribute__ ((__packed__));
---------------------------------------------------------------------