#define SBULL_MINORS 16 /* 每个sbull设备所支持的次设备号的数量 */
#define KERNEL_SECTOR_SIZE 512 // 本地定义的常量,使用该常量进行内核512字节到实际
// 扇区大小的转换
#define INVALIDATE_DELAY 30*HZ
块设备的核心数据结构(the internal representation of our device):
struct sbull_dev{
int size; /* Device size in sectors */
u8 *data; /* The data array */
short users; /* How many users */
short media_change; /* Flag a media change? */
spinlock_t lock; /* For mutual exclusion */
struct request_queue *queue; /* The device request queue */
struct gendisk *gd; /* The gendisk structure */
struct timer_list timer; /* For simulated media changes */
}
static struct sbull_dev *Devices = NULL;
static int sbull_major = 0; /* 块设备号,0就自动分配*/
static int ndevices = 1;
static int nsectors = 25600; /* 硬件扇区数目 */
static int hardsect_size = 512; /* 硬件扇区大小 */
/* The different "request modes " we can use. */
enum{
RM_SIMPLE = 0,
RM_FULL = 1,
RM_NOQUEUE = 2,
};
static int request_mode = RM_FULL;
块设备驱动都是从 init 函数开始的,所以从这里开始分析
static int __init sbull_init(void)
{
int i;
sbull_major = register_blkdev(sbull_major,"sbull");// 注册块设备,第一个是设备号,0为动态
if(sbull_major <= 0) //分配,第二个是设备名
{
printk(KERN_WARNING "sbull:unable to get major number\n");
return -EBUSY;
}
/* 为块核心数据结构 sbull_dev 分配空间*/
Devices = kmalloc(ndevices *sizeof(struct sbull_dev),GFP_KERNEL);
if(Devices == NULL)
goto out_unregister;
for(i = 0;i < ndevices;i++) /* 初始化 sbull_dev 核心数据结构 */
setup_device(Devices + i,i);
return 0;
out_unregister:
unregister_blkdev(sbull_major,"sbd");
return -ENOMEM;
}
/*初始化 sbull_dev 数据结构的具体实现*/
static void setup_device(struct sbull_dev *dev,int which)
{
memset(dev,0,sizeof(struct sbull_dev)); /* 初始化 dev 所指内容为0*/
dev->size = nsectors * hardsect_size;
dev->data = vmalloc(dev->size);
if(dev->data == NULL)
{
printk(KERN_NOTICE "vmalloc failure.\n");
return ;
}
spin_lock_init(&dev->lock); /* 初始化自旋锁*/
/* 在分配请求队列前要先初始化自旋锁*/
/* The timer which "invalidates the device给内核定时器初始化 "*/
init_timer(&dev->timer); /*初始化定时器,实际将结构中的list成员初始化为空*/
dev->timer.data = (unsigned long)dev; /*被用作function函数的调用参数*/
dev->timer.function = sbull_invalidate; /* 当定时器到期时,就执行function指定的函数*/
/*
* The I/O queue, depending on whether we are using our own
* make_request function or not.
*/
switch(request_mode)
{
case RM_NOQUEUE:
dev->queue = blk_alloc_queue(GFP_KERNEL); /* 分配“请求队列” */
if(dev->queue == NULL)
goto out_vfree;
blk_queue_make_request(dev->queue,sbull_make_request); /*绑定"制造请求"函数 */
break;
case RM_FULL:
dev->queue = blk_init_queue(sbull_full_request,&dev->lock); /*请求队列初始化*/
if(dev->queue == NULL)
goto out_vfree;
break;
case RM_SIMPLE:
dev->queue = blk_init_queue(sbull_request,&dev->lock); /*请求队列初始化*/
if(dev->queue == NULL)
goto out_vfree;
break;
default:
printk(KERN_NOTICE "Bad request mode %d,using simple\n",request_mode);
}
blk_queue_hardsect_size(dev->queue,hardsect_size); /* 硬件扇区尺寸设置 */
dev->queue->queuedata = dev;
dev->gd = alloc_disk(SBULL_MINORS); /* 动态分配 gendisk 结构体*/
if(!dev->gd)
{
printk(KERN_NOTICE "alloc_disk failure\n");
goto out_vfree;
}
/* 初始化 gendisk */
dev->gd->major = sbull_major; /* 主设备号 */
dev->gd->first_minor = which * SBULL_MINORS; /* 第一个次设备号 */
dev->gd->fops = &sbull_ops; /* 块设备操作结构体 */
dev->gd->queue = dev->queue; /* 请求队列 */
dev->gd->private_data = dev; /* 私有数据 */
snprintf(dev->gd->disk_name,32,"sbull%c",which + ‘a‘);
/* 每个请求的大小都是扇区大小的整数倍,内核总是认为扇区大小是512字节,因此必须进行转换*/
set_capacity(dev->gd,nsectors*(hardsect_size/KERNEL_SECTOR_SIZE));
add_disk(dev->gd); /* 完成以上初始化后,调用 add_disk 函数来注册这个磁盘设备 */
return ;
out_vfree:
if(dev->data)
vfree(dev->data); /* 释放用 vmalloc 申请的不连续空间*/
}
下面对上述函数初始化中涉及的函数进行分析
/*
* The device operations structure.
*/
static struct block_device_operations sbull_ops = {
.owner = THIS_MODULE,
.open = sbull_open,
.release = sbull_release,
.media_changed = sbull_media_changed, // 媒介改变
.revalidate_disk = sbull_revalidate, // 是介质有效
.ioctl = sbull_ioctl,
.getgeo = sbull_getgeo, // 得到几何信息
};
/*
* The "invalidte"function runs out of the device timer;it sets a flag to
* simulate the removal of the media.
*/
void sbull_invalidate(unsigned long ldev)
{
struct sbull_dev *dev = (struct sbull_dev *)ldev;
spin_lock(&dev->lock); /* 上锁 */
if(dev->users || !dev->data)
printk(KERN_WARNING "sbull:timer sanity check failed\n");
else
dev->media_change = 1;
spin_unlock(&dev->lock); /* 解锁 */
}
/*
* The direct make request version.
*/
static int sbull_make_request(request_queue_t *q,struct bio *bio)
{
struct sbull_dev *dev = q->queuedata;
int status;
status = sbull_xfer_bio(dev,bio);
bio_endio(bio,bio->bi_size,status); // bio_endio()函数通知处理结束
return 0;
}
/*
* Smarter request function that "handles clustering".
*/
static void sbull_full_request(request_queue_t *q)
{
struct request *req;
int sectors_xferred;
struct sbull_dev *dev = q->queuedata;
/* 遍历每个请求 */
while((req = elv_next_request(q)) != NULL) // elv_next_request 获得队列中第
// 一个未完成的请求
{
if(!blk_fs_request(req)) //如果不是文件系统请求——移动块设备数据请求
{
printk(KERN_NOTICE "Skip non-fs request\n");
end_request(req,0); //0通知请求处理失败,为1表示请求成功
continue;
}
sectors_xferred = sbull_xfer_request(dev,req); //调用请求处理函数
//下面函数告知块设备层已经完成 sectors_xferred(count) 个扇区的传送, end_that_request_first()
//的返回值是一标志,返回 0 表示所有的扇区已经传送并完成请求
if(!end_that_request_first(req,1,sectors_xferred))
{
blkdev_dequeue_request(req); //从队列中清除这个请求
//请求传给下面函数,通知所有正在等待这个请求完成的对象请求已经完成并回收这个请求结构体
end_that_request_last(req,sectors_xferred);
}
}
}
/*
* Transfer a full request.请求处理函数
*/
static int sbull_xfer_request(struct sbull_dev *dev,struct request *req)
{
struct bio *bio;
int nsect = 0;
rq_for_each_bio(bio,req)//此宏遍历请求中的每个bio,传递用于sbull_xfer_bio()传输的指针
{
sbull_xfer_bio(dev,bio); //调用 bio 处理函数
nsect += bio->bi_size/KERNEL_SECTOR_SIZE; //传递的字节数/扇区大小等于扇区数
}
return nsect;
}
/*
* Transfer a single BIO. bio处理函数
*/
static int sbull_xfer_bio(struct sbull_dev *dev,struct bio *bio)
{
int i;
struct bio_vec *bvec; //定义实际的 vec 列表
sector_t sector = bio->bi_sector; //定义要传输的第一个扇区
//下面的宏遍历bio的每一段,获得一个内核虚拟地址来存取缓冲
bio_for_each_segment(bvec,bio,i)
{
char *buffer = __bio_kmap_atomic(bio,i,KM_USER0);//通过kmap_atomic()函数获得返
//回bio的第i个缓冲区的虚拟地址
sbull_transfer(dev,
sector, // 开始扇区的索引号
bio_cur_sectors(bio), // 需要传输的扇区数
buffer, // 传输数据的缓冲区指针
bio_data_dir(bio) // 传输方向,0表述从设备读,非0从设备写
== WRITE);
sector += bio_cur_sectors(bio); //返回扇区数
__bio_kunmap_atomic(bio,KM_USER0); //返回由 __bio_kmap_atomic()获得的内核虚拟地址
}
return 0;
}
/*
* Handle an I/O request. 处理 I/O 拷贝数据的 函数
*/
static void sbull_transfer(struct sbull_dev *dev,unsigned long sector,
unsigned long nsect,char *buffer,int write)
{
unsigned long offset = sector * KERNEL_SECTOR_SIZE;
unsigned long nbytes = nsect * KERNEL_SECTOR_SIZE;
if((offset + nbytes) > dev->size)
{
printk(KERN_NOTICE "Beyond-end write (%ld %ld)\n",offset,nbytes);
return ;
}
if(write)
memcpy(dev->data + offset,buffer,nbytes);
else
memcpy(buffer,dev->data + offset,nbytes);
}
/*
* The simple form of the request function.
*/
static void sbull_request(request_queue_t *q)
{
struct request *req; //定义请求结构体
while((req = elv_next_request(q)) != NULL)//elv_next_request()获得队列中第一个未完成请求
{
struct sbull_dev *dev = req->rq_disk->private_data;
if(!blk_fs_request(req)) //判断是否为文件系统请求
{
printk(KERN_NOTICE "Skip non-fs request\n");
end_request(req,0); //通知请求处理失败,0为失败,1为成功
continue;
}
sbull_transfer(dev,req->sector,req->current_nr_sectors,
req->buffer,rq_data_dir(req));
end_request(req,1);
}
}
/*
* open()函数
*/
static int sbull_open(struct inode *inode,struct file *filp)
{
struct sbull_dev *dev = inode->i_bdev->bd_disk->private_data;
del_timer_sync(&dev->timer); //去掉"介质移除"定时器
filp->private_data = dev;
spin_lock(&dev->lock);
if(!dev->users)
check_disk_change(inode->i_bdev);
dev->users++; // 使用计数加 1
spin_unlock(&dev->lock);
return 0;
}
/*
* release 关闭函数,减少用户基数,并启用介质移除定时器
*/
static int sbull_release(struct inode *inode,struct file *filp)
{
struct sbull_dev *dev = inode->i_bdev->bd_disk->private_data;
spin_lock(&dev->lock);
dev->users--; // 使用计数减 1
if(!dev->users)
{
//30秒的定时器,如果这个时段内设备没有被打开则移除设备
dev->timer.expires = jiffies + INVALIDATE_DELAY;
add_timer(&dev->timer); //将定时器添加到定时器队列中
}
spin_unlock(&dev->lock);
return 0;
}
/*
* Look for a (simulated) media change.
*/
int sbull_media_changed(struct gendisk *gd)
{
struct sbull_dev *dev = gd->private_data;
return dev->media_change;
}
/*
* Revalidate.we do not take the lock here,for fear of deadlocking with open.
* That needs to be reevaluated.
* 调用此函数内核将试着重新读取分区表,在这里这个函数这是简单的重置 media_change 的标志位,并
* 清除内存空间以模拟插入一张磁盘
*/
int sbull_revalidate(struct gendisk *gd)
{
struct sbull_dev *dev = gd->private_data;
if(dev->media_change)
{
dev->media_change = 0;
memset(dev->data,0,dev->size);
}
return 0;
}
/*
* The ioctl() implementation.
* 在这里只处理了一个命令,对设备物理信息的查询请求,这里由于是虚拟设备,因此只提供了一虚
* 拟信息
*/
int sbull_ioctl(struct inode *inode,struct file *filp,
unsigned int cmd,unsigned long arg)
{
long size;
struct hd_geometry geo;
struct sbull_dev *dev = filp->private_data; // 通过 file->private 获得设备结构体
switch(cmd)
{
case HDIO_GETGEO:
/*
* Get geometry: since we are a virtual device, we have to make
* up something plausible. So we claim 16 sectors, four heads,
* and calculate the corresponding number of cylinders. We set the
* start of data at sector four.
*/
size = dev->size *(hardsect_size/KERNEL_SECTOR_SIZE);
/* 获得几何信息 */
geo.cylinders = (size & ~0x3f) >> 6;
geo.heads = 4;
geo.sectors = 16;
geo.start = 4;
if(copy_to_user((void __user *)arg,&geo,sizeof(geo)))
return -EFAULT;
return 0;
}
return -ENOTTY; // 不知道的命令
}
static int sbull_getgeo(struct block_device *bdev,struct hd_geometry *geo)
{
unsigned long size;
struct sbull_dev *pdev = bdev->bd_disk->private_data;
size = pdev->size;
geo->cylinders = (size & ~ox3f) >> 6;
geo->heads = 4;
geo->sectors = 16;
geo->start = 4;
return 0;
}
/*
* 卸载模块函数
*/
static void sbull_exit(void)
{
int i;
for(i = 0; i < ndevices;i++)
{
struct sbull_dev *dev = Devices + i;
del_timer_sync(&dev->timer); /* 去掉 "介质移除" 定时器*/
if(dev->gd)
{
del_gendisk(dev->gd); /* 释放 gendisk 结构体*/
put_disk(dev->gd); /* 释放对 gendisk 的引用 */
}
if(dev->queue)
{
if(request_mode == RM_NOQUEUE)
blk_put_queue(dev->queue);
else
blk_cleanup_queue(dev->queue); // 清除请求队列
}
if(dev->data)
vfree(dev->data);
}
unregister_blkdev(sbull_major,"sbull");
kfree(Devices);
}
module_init(sbull_init);
module_exit(sbull_exit);