Linux内核源代码情景分析-从路径名到目标节点

先看外包装,__user_walk,假设name为/usr/local/hello.c

int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
{
	char *tmp;
	int err;

	tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面
	err = PTR_ERR(tmp);
	if (!IS_ERR(tmp)) {
		err = 0;
		if (path_init(tmp, flags, nd))
			err = path_walk(tmp, nd);
		putname(tmp);
	}
	return err;
}

第一个参数name为路径名。

第二个参数flags可以为以下的标志:

#define LOOKUP_FOLLOW		(1) //如果找到的目标只是"符号连接"到其它文件或者目录的一个目录项,则要顺着连接链一直找到终点
#define LOOKUP_DIRECTORY	(2) //表示要寻找的目标必须是个目录
#define LOOKUP_CONTINUE		(4)
#define LOOKUP_POSITIVE		(8)
#define LOOKUP_PARENT		(16) //找到父节点
#define LOOKUP_NOALT		(32)

第三个参数nd是个结构指针,数据结构nameidata的定义如下:

struct nameidata {
	struct dentry *dentry;
	struct vfsmount *mnt;
	struct qstr last;
	unsigned int flags;
	int last_type;
};

path_init,代码如下:

int path_init(const char *name, unsigned int flags, struct nameidata *nd)
{
	nd->last_type = LAST_ROOT; /* if there are only slashes... */
	nd->flags = flags;
	if (*name==‘/‘)
		return walk_init_root(name,nd);//如果路径名是以"/"开头的绝对路径,那就要通过这个函数从根目录开始查找
	read_lock(&current->fs->lock);
	nd->mnt = mntget(current->fs->pwdmnt);//如果路径名不以"/"开头,那就当前目录开始
	nd->dentry = dget(current->fs->pwd);
	read_unlock(&current->fs->lock);
	return 1;
}
static inline int
walk_init_root(const char *name, struct nameidata *nd)
{
	read_lock(&current->fs->lock);
	if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
		nd->mnt = mntget(current->fs->altrootmnt);
		nd->dentry = dget(current->fs->altroot);
		read_unlock(&current->fs->lock);
		if (__emul_lookup_dentry(name,nd))
			return 0;
		read_lock(&current->fs->lock);
	}
	nd->mnt = mntget(current->fs->rootmnt);
	nd->dentry = dget(current->fs->root);
	read_unlock(&current->fs->lock);
	return 1;
}

返回到__user_walk,继续执行path_walk,代码如下:

int path_walk(const char * name, struct nameidata *nd)//name为/usr/local/hello.c
{
	struct dentry *dentry;
	struct inode *inode;
	int err;
	unsigned int lookup_flags = nd->flags;

	while (*name==‘/‘)//跳过/usr/local/hello.c最开始的‘/‘
		name++;
	if (!*name)//如果只有‘/‘,那么直接退出
		goto return_base;

	inode = nd->dentry->d_inode;//根节点的indode结构
	if (current->link_count)
		lookup_flags = LOOKUP_FOLLOW;//如果找到的目标只是"符号连接"到其它文件或者目录的一个目录项,则要顺着连接链一直找到终点

	/* At this point we know we have a real path component. */
	for(;;) {//第一次循环
		unsigned long hash;
		struct qstr this;
		unsigned int c;

		err = permission(inode, MAY_EXEC);
		dentry = ERR_PTR(err);
 		if (err)
			break;

		this.name = name;//已经指向了usr的首字符‘u‘
		c = *(const unsigned char *)name;

		hash = init_name_hash();
		do {
			name++;
			hash = partial_name_hash(c, hash);
			c = *(const unsigned char *)name;
		} while (c && (c != ‘/‘));//如果字符c为空或者遇到了‘/‘则退出循环,本例中c指向的usr后面的‘/‘
		this.len = name - (const char *) this.name;//usr的长度
		this.hash = end_name_hash(hash);

		/* remove trailing slashes? */
		if (!c)//如果字符c为空,那么说明是最后一个节点,且最后一个节点是文件
			goto last_component;
		while (*++name == ‘/‘);//跳过‘/‘
		if (!*name)//如果‘/‘后面为空,那么说明这也是最后一个节点,且最后一个节点是目录
			goto last_with_slashes;

		/*
		 * "." and ".." are special - ".." especially so because it has
		 * to be able to know about the current root directory and
		 * parent relationships.
		 */
		if (this.name[0] == ‘.‘) switch (this.len) {//执行到这里,this代表中间节点,中间节点一定是目录
			default:
				break;
			case 2:
				if (this.name[1] != ‘.‘)//如果当前节点是..,那么要表示上一级目录
					break;
				follow_dotdot(nd);
				inode = nd->dentry->d_inode;
				/* fallthrough */
			case 1://如果当前的节点是 . ,表示当前目录
				continue;
		}
		/*
		 * See if the low-level filesystem might want
		 * to use its own hash..
		 */
		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {//如果有d_hash这个函数
			err = nd->dentry->d_op->d_hash(nd->dentry, &this);//使用这个函数,重新计算hash值
			if (err < 0)
				break;
		}
		/* This does the actual lookups.. */
		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//在内存中寻找该节点业已建立的dentry结构
		if (!dentry) {//如果没有找到
			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//那么就要建立该节点的dentry结构
			err = PTR_ERR(dentry);
			if (IS_ERR(dentry))
				break;
		}
		/* Check mountpoints.. */
		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))//检查是否是挂载点
			;

		err = -ENOENT;
		inode = dentry->d_inode;
		if (!inode)
			goto out_dput;
		err = -ENOTDIR;
		if (!inode->i_op)
			goto out_dput;

		if (inode->i_op->follow_link) {//看看这个指针是否为NULL,这个指针是在ext2_read_inode中设置的
			err = do_follow_link(dentry, nd);
			dput(dentry);
			if (err)
				goto return_err;
			err = -ENOENT;
			inode = nd->dentry->d_inode;
			if (!inode)
				break;
			err = -ENOTDIR;
			if (!inode->i_op)
				break;
		} else {
			dput(nd->dentry);
			nd->dentry = dentry;//如果没有,直接附给nd->dentry
		}
		err = -ENOTDIR;
		if (!inode->i_op->lookup)//iop指针也是ext2_read_inode中设置的为ext2_dir_inode_operations,因为当前/usr为目录节点,所以一定要有ext2_lookup这个指针
			break;
		continue;
		/* here ends the main loop */

last_with_slashes:
		......
last_component:
		......
}
struct qstr {
	const unsigned char * name;
	unsigned int len;
	unsigned int hash;
};

cached_lookup,在内存中寻找该节点业已建立的dentry结构,代码如下:

static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
{
	struct dentry * dentry = d_lookup(parent, name);

	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
			dput(dentry);
			dentry = NULL;
		}
	}
	return dentry;
}
struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
{
	unsigned int len = name->len;
	unsigned int hash = name->hash;
	const unsigned char *str = name->name;
	struct list_head *head = d_hash(parent,hash);
	struct list_head *tmp;

	spin_lock(&dcache_lock);
	tmp = head->next;
	for (;;) {
		struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
		if (tmp == head)
			break;
		tmp = tmp->next;
		if (dentry->d_name.hash != hash)//比较hash值
			continue;
		if (dentry->d_parent != parent)//比较父节点的dentry结构是否一样
			continue;
		if (parent->d_op && parent->d_op->d_compare) {
			if (parent->d_op->d_compare(parent, &dentry->d_name, name))
				continue;
		} else {
			if (dentry->d_name.len != len)//比较长度
				continue;
			if (memcmp(dentry->d_name.name, str, len))//比较名字
				continue;
		}
		__dget_locked(dentry);
		dentry->d_flags |= DCACHE_REFERENCED;
		spin_unlock(&dcache_lock);
		return dentry;//如果找到则返回节点的dentry结构
	}
	spin_unlock(&dcache_lock);
	return NULL;//如果没有找到,返回NULL
}

内核中有个杂凑表dentry_hashtable,是一个list_head指针数组,一旦在内存中建立起一个目录节点的dentry结构,就根据其节点名的杂凑值挂入杂凑表中的某个队列,需要寻找时则还是根据杂凑值从杂凑表查找。d_hash就是根据杂凑值找到杂凑表中的某一个队列头。

static inline struct list_head * d_hash(struct dentry * parent, unsigned long hash)
{
	hash += (unsigned long) parent / L1_CACHE_BYTES;
	hash = hash ^ (hash >> D_HASHBITS) ^ (hash >> D_HASHBITS*2);
	return dentry_hashtable + (hash & D_HASHMASK);
}

根据name在队列里中寻找,是否已经存在这个dentry结构。

如果没有找到,那么要建立该节点的dentry结构,real_lookup代码如下:

static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
{
	struct dentry * result;
	struct inode *dir = parent->d_inode;

	down(&dir->i_sem);
	/*
	 * First re-do the cached lookup just in case it was created
	 * while we waited for the directory semaphore..
	 *
	 * FIXME! This could use version numbering or similar to
	 * avoid unnecessary cache lookups.
	 */
	result = d_lookup(parent, name);//再一次在内存中寻找该节点业已建立的dentry结构
	if (!result) {//如果还是没有找到
		struct dentry * dentry = d_alloc(parent, name);//分配该节点的dentry结构
		result = ERR_PTR(-ENOMEM);
		if (dentry) {
			lock_kernel();
			result = dir->i_op->lookup(dir, dentry);//分配该节点的inode结构,并挂在dentry结构上
			unlock_kernel();
			if (result)
				dput(dentry);
			else
				result = dentry;
		}
		up(&dir->i_sem);
		return result;
	}

	/*
	 * Uhhuh! Nasty case: the cache was re-populated while
	 * we waited on the semaphore. Need to revalidate.
	 */
	up(&dir->i_sem);
	if (result->d_op && result->d_op->d_revalidate) {
		if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
			dput(result);
			result = ERR_PTR(-ENOENT);
		}
	}
	return result;
}

d_alloc,分配该节点的dentry结构,代码如下:

struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
{
	char * str;
	struct dentry *dentry;

	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); //分配了dentry结构
	if (!dentry)
		return NULL;

	if (name->len > DNAME_INLINE_LEN-1) {
		str = kmalloc(NAME_ALLOC_LEN(name->len), GFP_KERNEL);
		if (!str) {
			kmem_cache_free(dentry_cache, dentry);
			return NULL;
		}
	} else
		str = dentry->d_iname; 

	memcpy(str, name->name, name->len);
	str[name->len] = 0;

	atomic_set(&dentry->d_count, 1);
	dentry->d_flags = 0;
	dentry->d_inode = NULL;
	dentry->d_parent = NULL;
	dentry->d_sb = NULL;
	dentry->d_name.name = str;//名字
	dentry->d_name.len = name->len;//长度
	dentry->d_name.hash = name->hash;//hash值
	dentry->d_op = NULL;
	dentry->d_fsdata = NULL;
	INIT_LIST_HEAD(&dentry->d_vfsmnt);//见下面关于dentry的说明
	INIT_LIST_HEAD(&dentry->d_hash);
	INIT_LIST_HEAD(&dentry->d_lru);
	INIT_LIST_HEAD(&dentry->d_subdirs);
	INIT_LIST_HEAD(&dentry->d_alias);
	if (parent) {
		dentry->d_parent = dget(parent);
		dentry->d_sb = parent->d_sb;
		spin_lock(&dcache_lock);
		list_add(&dentry->d_child, &parent->d_subdirs);
		spin_unlock(&dcache_lock);
	} else
		INIT_LIST_HEAD(&dentry->d_child);

	dentry_stat.nr_dentry++;
	return dentry;
}

其中dentry结构,如下:

struct dentry {
	atomic_t d_count;
	unsigned int d_flags;
	struct inode  * d_inode;	/* Where the name belongs to - NULL is negative */
	struct dentry * d_parent;	/* parent directory */
	struct list_head d_vfsmnt;
	struct list_head d_hash;	/* lookup hash list */
	struct list_head d_lru;		/* d_count = 0 LRU list */
	struct list_head d_child;	/* child of parent list */
	struct list_head d_subdirs;	/* our children */
	struct list_head d_alias;	/* inode alias list */
	struct qstr d_name;
	unsigned long d_time;		/* used by d_revalidate */
	struct dentry_operations  *d_op;
	struct super_block * d_sb;	/* The root of the dentry tree */
	unsigned long d_reftime;	/* last time referenced */
	void * d_fsdata;		/* fs-specific data */
	unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
};

1、每个dentry结构都通过队列头d_hash链入杂凑表dentry_hashtable中的某个队列里。

2、共享计数为0的dentry结构都通过队列头d_lru链入LRU队列dentry_unused。在队列中等待释放或者"东山再起"。

3、每个dentry结构都通过指针d_inode指向一个inode数据结构。但是多个dentry结构可以指向同一个indoe数据结构。

4、指向同一个inode数据结构的dentry结构都通过队列头d_alias链接在一起,都在该inode结构的i_dentry队列中。

5、每个dentry结构都通过指针d_parent指向其父目录节点的dentry结构,并通过队列头d_child跟同一目录中的其它节点的dentry结构链接在一起,都在父目录节点的d_subdirs队列中。

6、每个dentry结构都通过指针d_sb指向一个super_block数据结构。

7、每个dentry结构都通过指针d_op指向一个dentry_operations数据结构。

8、每个dentry结构都有个队列头d_vfsmnt,用于文件系统的安装。

返回到real_lookup中,如果该节点的dentry结构分配成功,那么调用dir->i_op->lookup(dir, dentry)来分配该节点的inode结构,并挂在dentry结构上。其中dir是父目录节点的inode结构。

对于ext2文件系统来说,lookup为ext2_lookup。代码如下:

static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry)
{
	struct inode * inode;
	struct ext2_dir_entry_2 * de;
	struct buffer_head * bh;

	if (dentry->d_name.len > EXT2_NAME_LEN)
		return ERR_PTR(-ENAMETOOLONG);

	bh = ext2_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de);//根据父节点的inode结构中inode->u.ext2_i.i_data找到对应的目录项
	inode = NULL;
	if (bh) {
		unsigned long ino = le32_to_cpu(de->inode);//得到该节点inode结构的节点号
		brelse (bh);
		inode = iget(dir->i_sb, ino);//通过这个节点号寻找该节点的inode结构

		if (!inode)
			return ERR_PTR(-EACCES);
	}
	d_add(dentry, inode);//把该节点的inode结构和dentry连接在一起
	return NULL;
}

有关文件系统的细节,请看文件系统,用这篇文章的术语,ext2_find_entry根据父节点的inode结构中inode->u.ext2_i.i_data找到对应的目录项,这个目录项的结构是ext2_dir_entry_2结构,读者在前面已经看过dentry结构,它们的区别是ext2_dir_entry_2结构对应硬盘上的页目录项结构,而dentry是动态建立的。

struct ext2_dir_entry_2 {
	__u32	inode;			/* Inode number */
	__u16	rec_len;		/* Directory entry length */
	__u8	name_len;		/* Name length */
	__u8	file_type;
	char	name[EXT2_NAME_LEN];	/* File name */
};

ext2_find_entry找到的数据块通常有许多目录项,根据dentry->d_name.name, dentry->d_name.len找到对应的目录项结构ext2_dir_entry_2。

然后根据de->inode,得到该节点inode结构的节点号,通过这个节点号寻找该节点的inode结构,iget如下:

static inline struct inode *iget(struct super_block *sb, unsigned long ino)
{
	return iget4(sb, ino, NULL, NULL);
}
struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
{
	struct list_head * head = inode_hashtable + hash(sb,ino);
	struct inode * inode;

	spin_lock(&inode_lock);
	inode = find_inode(sb, ino, head, find_actor, opaque);//在杂凑表队列中寻找
	if (inode) {//找到了
		__iget(inode);//递增计数
		spin_unlock(&inode_lock);
		wait_on_inode(inode);
		return inode;
	}
	spin_unlock(&inode_lock);

	/*
	 * get_new_inode() will do the right thing, re-trying the search
	 * in case it had to block at any point.
	 */
	return get_new_inode(sb, ino, head, find_actor, opaque);//分配一个新的该节点的inode结构
}

inode结构也有个杂凑表inode_hashtable,已经建立的inode结构都通过结构中的i_hash(也是一个list_head)挂在该杂凑表的某一队列中,所以首先要通过find_inode()在杂凑表队列中寻找。

static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
{
	struct list_head *tmp;
	struct inode * inode;

	tmp = head;
	for (;;) {
		tmp = tmp->next;
		inode = NULL;
		if (tmp == head)
			break;
		inode = list_entry(tmp, struct inode, i_hash);
		if (inode->i_ino != ino)//对比节点号
			continue;
		if (inode->i_sb != sb)//和超级块结构
			continue;
		if (find_actor && !find_actor(inode, ino, opaque))
			continue;
		break;
	}
	return inode;
}

如果没有找到,get_new_inode分配一个新的该节点的inode结构,代码如下:

static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
{
	struct inode * inode;

	inode = alloc_inode();
	if (inode) {
		struct inode * old;

		spin_lock(&inode_lock);
		/* We released the lock, so.. */
		old = find_inode(sb, ino, head, find_actor, opaque);//再一次在杂凑表队列中寻找
		if (!old) {//如果没有找到
			inodes_stat.nr_inodes++;
			list_add(&inode->i_list, &inode_in_use);
			list_add(&inode->i_hash, head);//加入到对应的hash表
			inode->i_sb = sb;//超级块结构
			inode->i_dev = sb->s_dev;//设备号
			inode->i_ino = ino;//节点号
			inode->i_flags = 0;
			atomic_set(&inode->i_count, 1);
			inode->i_state = I_LOCK;
			spin_unlock(&inode_lock);

			clean_inode(inode);
			sb->s_op->read_inode(inode);

			/*
			 * This is special!  We do not need the spinlock
			 * when clearing I_LOCK, because we‘re guaranteed
			 * that nobody else tries to do anything about the
			 * state of the inode when it is locked, as we
			 * just created it (so there can be no old holders
			 * that haven‘t tested I_LOCK).
			 */
			inode->i_state &= ~I_LOCK;
			wake_up(&inode->i_wait);

			return inode;
		}

		/*
		 * Uhhuh, somebody else created the same inode under
		 * us. Use the old inode instead of the one we just
		 * allocated.
		 */
		__iget(old);//如果找到了inode结构
		spin_unlock(&inode_lock);
		destroy_inode(inode);
		inode = old;//使用找到的inode结构
		wait_on_inode(inode);
	}
	return inode;
}

alloc_inode,分配一个新的inode结构,代码如下:

#define alloc_inode() 	 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))

sb->s_op->read_inode,把硬盘上的inode结构读入,并合理的赋值给内存中这个动态的inode。下面我们分别介绍下动态的inode结构和硬盘上的ext2_inode结构。

动态的inode结构:

struct inode {
	struct list_head	i_hash;
	struct list_head	i_list;
	struct list_head	i_dentry;

	struct list_head	i_dirty_buffers;

	unsigned long		i_ino;
	atomic_t		i_count;
	kdev_t			i_dev;
	umode_t			i_mode;
	nlink_t			i_nlink;
	uid_t			i_uid;
	gid_t			i_gid;
	kdev_t			i_rdev;
	loff_t			i_size;
	time_t			i_atime;
	time_t			i_mtime;
	time_t			i_ctime;
	unsigned long		i_blksize;
	unsigned long		i_blocks;
	unsigned long		i_version;
	struct semaphore	i_sem;
	struct semaphore	i_zombie;
	struct inode_operations	*i_op;
	struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
	struct super_block	*i_sb;
	wait_queue_head_t	i_wait;
	struct file_lock	*i_flock;
	struct address_space	*i_mapping;
	struct address_space	i_data;
	struct dquot		*i_dquot[MAXQUOTAS];
	struct pipe_inode_info	*i_pipe;
	struct block_device	*i_bdev;

	unsigned long		i_dnotify_mask; /* Directory notify events */
	struct dnotify_struct	*i_dnotify; /* for directory notifications */

	unsigned long		i_state;

	unsigned int		i_flags;
	unsigned char		i_sock;

	atomic_t		i_writecount;
	unsigned int		i_attr_flags;
	__u32			i_generation;
	union {
		struct minix_inode_info		minix_i;
		struct ext2_inode_info		ext2_i;
		struct hpfs_inode_info		hpfs_i;
		struct ntfs_inode_info		ntfs_i;
		struct msdos_inode_info		msdos_i;
		struct umsdos_inode_info	umsdos_i;
		struct iso_inode_info		isofs_i;
		struct nfs_inode_info		nfs_i;
		struct sysv_inode_info		sysv_i;
		struct affs_inode_info		affs_i;
		struct ufs_inode_info		ufs_i;
		struct efs_inode_info		efs_i;
		struct romfs_inode_info		romfs_i;
		struct shmem_inode_info		shmem_i;
		struct coda_inode_info		coda_i;
		struct smb_inode_info		smbfs_i;
		struct hfs_inode_info		hfs_i;
		struct adfs_inode_info		adfs_i;
		struct qnx4_inode_info		qnx4_i;
		struct bfs_inode_info		bfs_i;
		struct udf_inode_info		udf_i;
		struct ncp_inode_info		ncpfs_i;
		struct proc_inode_info		proc_i;
		struct socket			socket_i;
		struct usbdev_inode_info        usbdev_i;
		void				*generic_ip;
	} u;
}

硬盘上的ext2_inode结构:

struct ext2_inode {
	__u16	i_mode;		/* File mode */
	__u16	i_uid;		/* Low 16 bits of Owner Uid */
	__u32	i_size;		/* Size in bytes */
	__u32	i_atime;	/* Access time */
	__u32	i_ctime;	/* Creation time */
	__u32	i_mtime;	/* Modification time */
	__u32	i_dtime;	/* Deletion Time */
	__u16	i_gid;		/* Low 16 bits of Group Id */
	__u16	i_links_count;	/* Links count */
	__u32	i_blocks;	/* Blocks count */
	__u32	i_flags;	/* File flags */
	union {
		struct {
			__u32  l_i_reserved1;
		} linux1;
		struct {
			__u32  h_i_translator;
		} hurd1;
		struct {
			__u32  m_i_reserved1;
		} masix1;
	} osd1;				/* OS dependent 1 */
	__u32	i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
	__u32	i_generation;	/* File version (for NFS) */
	__u32	i_file_acl;	/* File ACL */
	__u32	i_dir_acl;	/* Directory ACL */
	__u32	i_faddr;	/* Fragment address */
	union {
		struct {
			__u8	l_i_frag;	/* Fragment number */
			__u8	l_i_fsize;	/* Fragment size */
			__u16	i_pad1;
			__u16	l_i_uid_high;	/* these 2 fields    */
			__u16	l_i_gid_high;	/* were reserved2[0] */
			__u32	l_i_reserved2;
		} linux2;
		struct {
			__u8	h_i_frag;	/* Fragment number */
			__u8	h_i_fsize;	/* Fragment size */
			__u16	h_i_mode_high;
			__u16	h_i_uid_high;
			__u16	h_i_gid_high;
			__u32	h_i_author;
		} hurd2;
		struct {
			__u8	m_i_frag;	/* Fragment number */
			__u8	m_i_fsize;	/* Fragment size */
			__u16	m_pad1;
			__u32	m_i_reserved2[2];
		} masix2;
	} osd2;				/* OS dependent 2 */
}

sb->s_op->read_inode,把硬盘上的inode结构读入,并合理的赋值给内存中这个动态的inode。

void ext2_read_inode (struct inode * inode)
{
	struct buffer_head * bh;
	struct ext2_inode * raw_inode;
	unsigned long block_group;
	unsigned long group_desc;
	unsigned long desc;
	unsigned long block;
	unsigned long offset;
	struct ext2_group_desc * gdp;

	if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO &&
	     inode->i_ino != EXT2_ACL_DATA_INO &&
	     inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
	    inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "bad inode number: %lu", inode->i_ino);
		goto bad_inode;
	}
	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
	if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "group >= groups count");
		goto bad_inode;
	}
	group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);
	desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb) - 1);
	bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
	if (!bh) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "Descriptor not loaded");
		goto bad_inode;
	}

	gdp = (struct ext2_group_desc *) bh->b_data;
	/*
	 * Figure out the offset within the block group inode table
	 */
	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
		EXT2_INODE_SIZE(inode->i_sb);
	block = le32_to_cpu(gdp[desc].bg_inode_table) +
		(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
	if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
		ext2_error (inode->i_sb, "ext2_read_inode",
			    "unable to read inode block - "
			    "inode=%lu, block=%lu", inode->i_ino, block);
		goto bad_inode;
	}
	offset &= (EXT2_BLOCK_SIZE(inode->i_sb) - 1);
	raw_inode = (struct ext2_inode *) (bh->b_data + offset);//根据节点号,找到硬盘上的ext2_inode结构

	inode->i_mode = le16_to_cpu(raw_inode->i_mode);//mode也来来源于ext2_inode结构
	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
	if(!(test_opt (inode->i_sb, NO_UID32))) {
		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
	}
	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
	inode->i_size = le32_to_cpu(raw_inode->i_size);
	inode->i_atime = le32_to_cpu(raw_inode->i_atime);
	inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
	inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
	inode->u.ext2_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
	/* We now have enough fields to check if the inode was active or not.
	 * This is needed because nfsd might try to access dead inodes
	 * the test is that same one that e2fsck uses
	 * NeilBrown 1999oct15
	 */
	if (inode->i_nlink == 0 && (inode->i_mode == 0 || inode->u.ext2_i.i_dtime)) {
		/* this inode is deleted */
		brelse (bh);
		goto bad_inode;
	}
	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
	inode->i_version = ++event;
	inode->u.ext2_i.i_flags = le32_to_cpu(raw_inode->i_flags);//联合体的ext2_i被复制
	inode->u.ext2_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
	inode->u.ext2_i.i_frag_no = raw_inode->i_frag;
	inode->u.ext2_i.i_frag_size = raw_inode->i_fsize;
	inode->u.ext2_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
	if (S_ISDIR(inode->i_mode))
		inode->u.ext2_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
	else {
		inode->u.ext2_i.i_high_size = le32_to_cpu(raw_inode->i_size_high);
		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
	}
	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
	inode->u.ext2_i.i_block_group = block_group;

	/*
	 * NOTE! The in-memory inode i_data array is in little-endian order
	 * even on big-endian machines: we do NOT byteswap the block numbers!
	 */
	for (block = 0; block < EXT2_N_BLOCKS; block++)
		inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];

	if (inode->i_ino == EXT2_ACL_IDX_INO ||
	    inode->i_ino == EXT2_ACL_DATA_INO)
		/* Nothing to do */ ;
	else if (S_ISREG(inode->i_mode)) {//根据不同的该节点的inode结构不同状态,设置不同的指针
		inode->i_op = &ext2_file_inode_operations;
		inode->i_fop = &ext2_file_operations;
		inode->i_mapping->a_ops = &ext2_aops;
	} else if (S_ISDIR(inode->i_mode)) {//目前是目录
		inode->i_op = &ext2_dir_inode_operations;
		inode->i_fop = &ext2_dir_operations;
	} else if (S_ISLNK(inode->i_mode)) {
		if (!inode->i_blocks)
			inode->i_op = &ext2_fast_symlink_inode_operations;
		else {
			inode->i_op = &page_symlink_inode_operations;
			inode->i_mapping->a_ops = &ext2_aops;
		}
	} else
		init_special_inode(inode, inode->i_mode,
				   le32_to_cpu(raw_inode->i_block[0]));
	brelse (bh);
	inode->i_attr_flags = 0;
	if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
		inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS;
		inode->i_flags |= S_SYNC;
	}
	if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL) {
		inode->i_attr_flags |= ATTR_FLAG_APPEND;
		inode->i_flags |= S_APPEND;
	}
	if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL) {
		inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE;
		inode->i_flags |= S_IMMUTABLE;
	}
	if (inode->u.ext2_i.i_flags & EXT2_NOATIME_FL) {
		inode->i_attr_flags |= ATTR_FLAG_NOATIME;
		inode->i_flags |= S_NOATIME;
	}
	return;

bad_inode:
	make_bad_inode(inode);
	return;
}

其中ext2_dir_inode_operations如下:

struct inode_operations ext2_dir_inode_operations = {
	create:		ext2_create,
	lookup:		ext2_lookup,
	link:		ext2_link,
	unlink:		ext2_unlink,
	symlink:	ext2_symlink,
	mkdir:		ext2_mkdir,
	rmdir:		ext2_rmdir,
	mknod:		ext2_mknod,
	rename:		ext2_rename,
};

iget获取到该节点的inode结构后,就返回ext2_lookup,继续执行d_add(dentry, inode),把该节点的inode结构和dentry连接在一起。

static __inline__ void d_add(struct dentry * entry, struct inode * inode)
{
	d_instantiate(entry, inode);//该节点的inode结构和dentry连接在一起
	d_rehash(entry);//把dentry结构放入杂凑表的某一个队里中
}
void d_instantiate(struct dentry *entry, struct inode * inode)
{
	spin_lock(&dcache_lock);
	if (inode)
		list_add(&entry->d_alias, &inode->i_dentry);//可能一个inode结构对应多个entry结构
	entry->d_inode = inode;//链接在一起了
	spin_unlock(&dcache_lock);
}
void d_rehash(struct dentry * entry)
{
	struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
	spin_lock(&dcache_lock);
	list_add(&entry->d_hash, list);
	spin_unlock(&dcache_lock);
}

而inode结构放入杂凑表的某个队列是在get_new_inode里面:

list_add(&inode->i_hash, head);

real_lookup返回该节点的dentry结构。然后检查是否是挂在点,如果不是nd->dentry被赋值为dentry。由于是目录节点,所有lookup指针不能为NULL。

第一轮循环,执行到这里,continue开始第二轮循环。

		if (!inode->i_op->lookup)
			break;
		continue;

第二轮循环,如下:

int path_walk(const char * name, struct nameidata *nd)
{
	struct dentry *dentry;
	struct inode *inode;
	int err;
	unsigned int lookup_flags = nd->flags;

	while (*name==‘/‘)
		name++;
	if (!*name)
		goto return_base;

	inode = nd->dentry->d_inode;
	if (current->link_count)
		lookup_flags = LOOKUP_FOLLOW;

	/* At this point we know we have a real path component. */
	for(;;) {
		unsigned long hash;
		struct qstr this;
		unsigned int c;

		err = permission(inode, MAY_EXEC);
		dentry = ERR_PTR(err);
 		if (err)
			break;

		this.name = name;//指向local的第一个字符‘l‘
		c = *(const unsigned char *)name;

		hash = init_name_hash();
		do {
			name++;
			hash = partial_name_hash(c, hash);
			c = *(const unsigned char *)name;
		} while (c && (c != ‘/‘));
		this.len = name - (const char *) this.name;//local的长度
		this.hash = end_name_hash(hash);//hash值

		/* remove trailing slashes? */
		if (!c)
			goto last_component;
		while (*++name == ‘/‘);
		if (!*name)
			goto last_with_slashes;

		/*
		 * "." and ".." are special - ".." especially so because it has
		 * to be able to know about the current root directory and
		 * parent relationships.
		 */
		if (this.name[0] == ‘.‘) switch (this.len) {
			default:
				break;
			case 2:
				if (this.name[1] != ‘.‘)
					break;
				follow_dotdot(nd);
				inode = nd->dentry->d_inode;
				/* fallthrough */
			case 1:
				continue;
		}
		/*
		 * See if the low-level filesystem might want
		 * to use its own hash..
		 */
		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
			if (err < 0)
				break;
		}
		/* This does the actual lookups.. */
		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
		if (!dentry) {
			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
			err = PTR_ERR(dentry);
			if (IS_ERR(dentry))
				break;
		}
		/* Check mountpoints.. */
		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
			;

		err = -ENOENT;
		inode = dentry->d_inode;
		if (!inode)
			goto out_dput;
		err = -ENOTDIR;
		if (!inode->i_op)
			goto out_dput;

		if (inode->i_op->follow_link) {
			err = do_follow_link(dentry, nd);
			dput(dentry);
			if (err)
				goto return_err;
			err = -ENOENT;
			inode = nd->dentry->d_inode;
			if (!inode)
				break;
			err = -ENOTDIR;
			if (!inode->i_op)
				break;
		} else {
			dput(nd->dentry);
			nd->dentry = dentry;
		}
		err = -ENOTDIR;
		if (!inode->i_op->lookup)
			break;
		continue;
		/* here ends the main loop */

last_with_slashes:
		......
last_component:
		......
}

除上面注释外,其他的流程和和第一次循环都一样。

第二轮循环,也是执行到这里,continue开始第三轮循环:

		if (!inode->i_op->lookup)//由于/usr/local/hello.c,local仍然是目录节点
			break;
		continue;

第三轮循环,如下:

int path_walk(const char * name, struct nameidata *nd)
{
	struct dentry *dentry;
	struct inode *inode;
	int err;
	unsigned int lookup_flags = nd->flags;

	while (*name==‘/‘)
		name++;
	if (!*name)
		goto return_base;

	inode = nd->dentry->d_inode;
	if (current->link_count)
		lookup_flags = LOOKUP_FOLLOW;

	/* At this point we know we have a real path component. */
	for(;;) {
		unsigned long hash;
		struct qstr this;
		unsigned int c;

		err = permission(inode, MAY_EXEC);
		dentry = ERR_PTR(err);
 		if (err)
			break;

		this.name = name;//指向了hello.c的第一个字符‘h‘
		c = *(const unsigned char *)name;

		hash = init_name_hash();
		do {
			name++;
			hash = partial_name_hash(c, hash);
			c = *(const unsigned char *)name;
		} while (c && (c != ‘/‘));
		this.len = name - (const char *) this.name;//hello.c的长度
		this.hash = end_name_hash(hash);

		/* remove trailing slashes? */
		if (!c)//c为NULL了
			goto last_component;
		while (*++name == ‘/‘);
		if (!*name)
			goto last_with_slashes;

		.....

last_with_slashes:
		lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;//如果最后一个节点是目录,那么要加上这两个标志位
last_component:
		if (lookup_flags & LOOKUP_PARENT)//这个标志位表示是要寻找父节点
			goto lookup_parent;
		if (this.name[0] == ‘.‘) switch (this.len) {//如果最后一个节点是目录,且是dot或者dotdot
			default:
				break;
			case 2:
				if (this.name[1] != ‘.‘)
					break;
				follow_dotdot(nd);
				inode = nd->dentry->d_inode;
				/* fallthrough */
			case 1:
				goto return_base;
		}
		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
			if (err < 0)
				break;
		}
		dentry = cached_lookup(nd->dentry, &this, 0);
		if (!dentry) {
			dentry = real_lookup(nd->dentry, &this, 0);//本次寻找的节点是文件
			err = PTR_ERR(dentry);
			if (IS_ERR(dentry))
				break;
		}
		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))//是否是挂载点
			;
		inode = dentry->d_inode;
		if ((lookup_flags & LOOKUP_FOLLOW)//和第一次和第二次循环不同,必须LOOKUP_FOLLOW标志位置1
		    && inode && inode->i_op && inode->i_op->follow_link) {
			err = do_follow_link(dentry, nd);
			dput(dentry);
			if (err)
				goto return_err;
			inode = nd->dentry->d_inode;
		} else {
			dput(nd->dentry);
			nd->dentry = dentry;//赋值给nd->dentry
		}
		err = -ENOENT;
		if (!inode)
			goto no_inode;
		if (lookup_flags & LOOKUP_DIRECTORY) {//如果最后一个节点是目录,也就是这个标志为置1的时候,还要检查指针是否为NULL
			err = -ENOTDIR;
			if (!inode->i_op || !inode->i_op->lookup)
				break;
		}
		goto return_base;
no_inode:
		err = -ENOENT;
		if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
			break;
		goto return_base;
lookup_parent:
		nd->last = this;//nd->dentry是父节点的dentry结构
		nd->last_type = LAST_NORM;
		if (this.name[0] != ‘.‘)
			goto return_base;
		if (this.len == 1)
			nd->last_type = LAST_DOT;
		else if (this.len == 2 && this.name[1] == ‘.‘)
			nd->last_type = LAST_DOTDOT;
return_base:
		return 0;
out_dput:
		dput(dentry);
		break;
	}
	path_release(nd);
return_err:
	return err;
}

real_lookup由于hello.c节点是文件,所以执行如下代码段:

if (inode->i_ino == EXT2_ACL_IDX_INO ||
	    inode->i_ino == EXT2_ACL_DATA_INO)
		/* Nothing to do */ ;
	else if (S_ISREG(inode->i_mode)) {//hello.c是文件,所以执行这段代码
		inode->i_op = &ext2_file_inode_operations;
		inode->i_fop = &ext2_file_operations;
		inode->i_mapping->a_ops = &ext2_aops;
	} else if (S_ISDIR(inode->i_mode)) {
		inode->i_op = &ext2_dir_inode_operations;
		inode->i_fop = &ext2_dir_operations;
	} else if (S_ISLNK(inode->i_mode)) {
		if (!inode->i_blocks)
			inode->i_op = &ext2_fast_symlink_inode_operations;
		else {
			inode->i_op = &page_symlink_inode_operations;
			inode->i_mapping->a_ops = &ext2_aops;
		}
	} else
		init_special_inode(inode, inode->i_mode,
				   le32_to_cpu(raw_inode->i_block[0]));

至此,path_walk的三层循环,都分析完了,nd->dentry最终指向了/usr/local/hello.c这个文件节点的dentry结构,其中的d_inode指向了该节点的inode结构。

时间: 2024-11-05 16:36:38

Linux内核源代码情景分析-从路径名到目标节点的相关文章

Linux内核源代码情景分析-文件系统的安装

执行sudo mount -t ext2 /dev/sdb1 /mnt/sdb,将文件系统挂在到/mnt/sdb上.系统调用mount,映射到内核层执行的是sys_mount.假设/dev/sdb1和/mnt/sdb都位于ext2文件系统中. asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, unsigned long flags, void * data)//dev_name指向了"/dev/sdb

Linux内核源代码情景分析-访问权限与文件安全性

在Linux内核源代码情景分析-从路径名到目标节点,一文中path_walk代码中,err = permission(inode, MAY_EXEC)当前进程是否可以访问这个节点,代码如下: int permission(struct inode * inode,int mask) { if (inode->i_op && inode->i_op->permission) { int retval; lock_kernel(); retval = inode->i_

Linux内核源代码情景分析-特殊文件系统/proc-对/proc/self/cwd的访问

继上篇文章Linux内核源代码情景分析-特殊文件系统/proc,我们对/proc/loadavg访问后,这篇文章是对/proc/self/cwd的访问. int __user_walk(const char *name, unsigned flags, struct nameidata *nd) { char *tmp; int err; tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面 err = PTR_ERR(tmp); if (!IS

Linux内核源代码情景分析-虚拟文件系统

我们先来看两张图: 第一张是VFS与具体文件系统的关系示意图: 第二张是Linux文件系统的层次结构: 特殊文件:用来实现"管道"的文件,特别是"命名管道"的FIFO文件,还有Unix域的socket,也都属于特殊文件:还有在/proc目录下的一系列文件. 磁盘文件:就是存在硬盘上的文件. 设备文件:sudo mount -t ext2 /dev/sdb1 /mnt/sdb,这里的/dev/sdb1就是设备文件.如果硬盘上的节点raw_inode->i_blo

Linux内核源代码情景分析-进程间通信-命名管道

建立命名管道,mknod mypipe p.命名管道存在硬盘上,而管道不是. 通过open打开这个命名管道,在内核中通过sys_open()实现,filename是"***/mypipe ". 相关部分,请参考Linux内核源代码情景分析-文件的打开. sys_open进入filp_open,然后在open_namei中调用一个函数path_walk(),根据文件的路径名在文件系统中找到代表这个文件的inode.在将磁盘上的inode读入内存时,要根据文件的类型(FIFO文件的S_IF

Linux内核源代码情景分析-内存管理之slab-回收

在上一篇文章Linux内核源代码情景分析-内存管理之slab-分配与释放,最后形成了如下图的结构: 图 1 我们看到空闲slab块占用的若干页面,不会自己释放:我们是通过kmem_cache_reap和kmem_cache_shrink来回收的.他们的区别是: 1.我们先看kmem_cache_shrink,代码如下: int kmem_cache_shrink(kmem_cache_t *cachep) { if (!cachep || in_interrupt() || !is_chaine

Linux内核源代码情景分析-系统调用mknod

普通文件可以用open或者create创建,FIFO文件可以用pipe创建,mknod主要用于设备文件的创建. 在内核中,mknod是由sys_mknod实现的,代码如下: asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev) //比如filename为/tmp/server_socket,dev是设备号 { int error = 0; char * tmp; struct dentry * dentry;

Linux内核源代码情景分析-文件系统安装后的访问

在Linux内核源代码情景分析-文件系统的安装,一文中,已经调用sudo mount -t ext2 /dev/sdb1 /mnt/sdb,在/mnt/sdb节点上挂载了文件系统,那么我们接下来访问/mnt/sdb/hello.c节点.我们来看一下path_walk的执行有什么不同? int path_walk(const char * name, struct nameidata *nd) { struct dentry *dentry; struct inode *inode; int er

Linux内核源代码情景分析-内存管理

用户空间的页面有下面几种: 1.普通的用户空间页面,包括进程的代码段.数据段.堆栈段.以及动态分配的"存储堆". 2.通过系统调用mmap()映射到用户空间的已打开文件的内容. 3.进程间的共享内存区. 这些页面的的周转有两方面的意思. 1.页面的分配,使用,回收.如进程压栈时新申请的页面,这类页面不进行盘区交换,不使用时释放得以回收. 这部分通过一个场景来解释: Linux内核源代码情景分析-内存管理之用户堆栈的扩展. 2.盘区交换.如要执行硬盘上的对应代码段.把硬盘上的代码段换入内