os_aio_slot_t {
ibool is_read; /*!< TRUE if a read operation */
ulint pos; // os_aio_array_t数组中所在的位置
ibool reserved; // TRUE表示该Slot已经被别的IO请求占用了
time_t reservation_time; // 占用的时间
ulint len; // io请求的长度
byte* buf; // 数据读取或者需要写入的buffer,通常指向buffer pool的一个页面,压缩页面有特殊处理
ulint type; /* 请求类型,即读还是写IO请求 */
os_offset_t offset; /*!< file offset in bytes */
os_file_t file; /*!< file where to read or write */
const char* name; /*!< 需要读取的文件及路径信息 */
ibool io_already_done; /* TRUE表示IO已经完成了
fil_node_t* message1; /* 该aio操作的innodb文件描述符(f_node_t)*/
void* message2; /* 用来记录完成IO请求所对应的具体buffer pool bpage页 */
\#ifdef WIN_ASYNC_IO
HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the
aio request */
\#elif defined(LINUX_NATIVE_AIO)
struct iocb control; /* 该slot使用的aio请求控制块iocb */
int n_bytes; /* 读写bytes */
int ret; /* AIO return code */
\#endif /* WIN_ASYNC_IO */
}
static
ibool
/*==================*/
os_aio_array_t* array, /* IO请求函数 */
os_aio_slot_t* slot, /* 申请好的slot */
ibool should_buffer) // 是否需要缓存aio 请求,该变量主要对预读起作用
{
...
/* Find out what we are going to work with.
The iocb struct is directly in the slot.
The io_context is one per segment. */
// 每个segment包含的slot个数,Linux下每个segment包含256个slot
slots_per_segment = array->n_slots / array->n_segments;
iocb = &slot->control;
io_ctx_index = slot->pos / slots_per_segment;
if (should_buffer) {
/* 这里也可以看到aio请求缓存只对读请求起作用 */
ut_ad(array == os_aio_read_array);
ulint n;
ulint count;
os_mutex_enter(array->mutex);
/* There are array->n_slots elements in array->pending, which is divided into
* array->n_segments area of equal size. The iocb of each segment are
* buffered in its corresponding area in the pending array consecutively as
* they come. array->count[i] records the number of buffered aio requests in
* the ith segment.*/
n = io_ctx_index * slots_per_segment
+ array->count[io_ctx_index];
array->pending[n] = iocb;
array->count[io_ctx_index] ++;
count = array->count[io_ctx_index];
os_mutex_exit(array->mutex);
// 如果当前segment的slot都已经被占用了,就需要提交一次异步aio请求
if (count == slots_per_segment) {
os_aio_linux_dispatch_read_array_submit(); //no cover line
}
// 否则就直接返回
return (TRUE);
}
// 直接提交IO请求到内核
ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
...
}
- IO线程负责监控aio请求的主函数fil_aio_wait
- IO线程负责处理native IO请求的函数os_aio_linux_handle
ibool
os_aio_linux_handle(ulint global_seg, // 属于哪个segment
fil_node_t**message1, /* 该aio操作的innodb文件描述符(f_node_t)*/
void** message2, /* 用来记录完成IO请求所对应的具体buffer pool bpage页 */
segment = os_aio_get_array_and_local_segment(&array, global_seg);
n = array->n_slots / array->n_segments; //获得一个线程可监控的io event数
/* Loop until we have found a completed request. */
for (;;) {
ibool any_reserved = FALSE;
os_mutex_enter(array->mutex);
for (i = 0; i < n; ++i) { // 遍历该线程所发起的所有aio请求
slot = os_aio_array_get_nth_slot(
array, i + segment * n);
if (!slot->reserved) { // 该slot是否被占用
continue;
} else if (slot->io_already_done) { // IO请求已经完成,可以通知主线程返回数据了
/* Something for us to work on. */
goto found;
} else {
any_reserved = TRUE;
}
}
os_mutex_exit(array->mutex);
// 到这里说明没有找到一个完成的io,则再去collect
os_aio_linux_collect(array, segment, n);
found: // 找到一个完成的io,将内容返回
*message1 = slot->message1;
*message2 = slot->message2; // 返回完成IO所对应的bpage页
*type = slot->type;
if (slot->ret == 0 && slot->n_bytes == (long) slot->len) {
if (slot->page_encrypt
&& slot->type == OS_FILE_READ) {
os_decrypt_page(slot->buf, slot->len, slot->page_size, FALSE);
}
ret = TRUE;
} else {
errno = -slot->ret;
/* os_file_handle_error does tell us if we should retry
this IO. As it stands now, we don't do this retry when
reaping requests from a different context than
the dispatcher. This non-retry logic is the same for
windows and linux native AIO.
We should probably look into this to transparently
re-submit the IO. */
os_file_handle_error(slot->name, "Linux aio");
ret = FALSE;
}
os_mutex_exit(array->mutex);
os_aio_array_free_slot(array, slot);
}
- 等待native IO请求完成os_aio_linux_collect