数据库内核月报－ 2017/07 - MySQL · 源码分析 · InnoDB 异步IO工作流程 - 《数据库内核月报》

源码分析

os_aio_array_t

os_aio_slot_t


 os_aio_slot_t {
  ibool   is_read;  /*!< TRUE if a read operation */
  ulint   pos;    // os_aio_array_t数组中所在的位置 
  ibool   reserved; // TRUE表示该Slot已经被别的IO请求占用了
  time_t    reservation_time; // 占用的时间
  ulint   len;    // io请求的长度
  byte*   buf;    // 数据读取或者需要写入的buffer，通常指向buffer pool的一个页面，压缩页面有特殊处理
  ulint   type;   /* 请求类型，即读还是写IO请求 */ 
  os_offset_t offset;   /*!< file offset in bytes */
  os_file_t file;   /*!< file where to read or write */
  const char* name;   /*!< 需要读取的文件及路径信息 */
  ibool   io_already_done; /* TRUE表示IO已经完成了
  fil_node_t* message1; /* 该aio操作的innodb文件描述符（f_node_t）*/
  void*   message2; /* 用来记录完成IO请求所对应的具体buffer pool bpage页 */
 \#ifdef WIN_ASYNC_IO
  HANDLE    handle;   /*!< handle object we need in the
          OVERLAPPED struct */
  OVERLAPPED  control;  /*!< Windows control block for the
          aio request */
  \#elif defined(LINUX_NATIVE_AIO)
  struct iocb control;  /* 该slot使用的aio请求控制块iocb */
  int   n_bytes;  /* 读写bytes */
  int   ret;    /* AIO return code */
  \#endif /* WIN_ASYNC_IO */
}

物理数据页操作入口函数os_aio_func

static
ibool
/*==================*/
  os_aio_array_t* array,  /* IO请求函数 */
  os_aio_slot_t*  slot, /* 申请好的slot */
        ibool           should_buffer)  // 是否需要缓存aio 请求，该变量主要对预读起作用
{
    ...
  /* Find out what we are going to work with.
  The iocb struct is directly in the slot.
  The io_context is one per segment. */
  // 每个segment包含的slot个数，Linux下每个segment包含256个slot
  slots_per_segment = array->n_slots / array->n_segments;
  iocb = &slot->control;
  io_ctx_index = slot->pos / slots_per_segment;
  if (should_buffer) {
      /* 这里也可以看到aio请求缓存只对读请求起作用 */
      ut_ad(array == os_aio_read_array);
    ulint n;
    ulint count;
    os_mutex_enter(array->mutex);
    /* There are array->n_slots elements in array->pending, which is divided into
     * array->n_segments area of equal size.  The iocb of each segment are 
     * buffered in its corresponding area in the pending array consecutively as
     * they come.  array->count[i] records the number of buffered aio requests in
     * the ith segment.*/
       n = io_ctx_index * slots_per_segment
      + array->count[io_ctx_index];
      array->pending[n] = iocb;
      array->count[io_ctx_index] ++; 
      count = array->count[io_ctx_index];
      os_mutex_exit(array->mutex);
      // 如果当前segment的slot都已经被占用了，就需要提交一次异步aio请求
      if (count == slots_per_segment) {
            os_aio_linux_dispatch_read_array_submit(); //no cover line
      }   
      // 否则就直接返回
        return (TRUE);                  
   } 
    // 直接提交IO请求到内核
  ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
  ...
}

IO线程负责监控aio请求的主函数fil_aio_wait

IO线程负责处理native IO请求的函数os_aio_linux_handle

ibool
os_aio_linux_handle(ulint    global_seg, // 属于哪个segment
                    fil_node_t**message1, /* 该aio操作的innodb文件描述符（f_node_t）*/
                    void**    message2, /* 用来记录完成IO请求所对应的具体buffer pool bpage页 */
    segment = os_aio_get_array_and_local_segment(&array, global_seg); 
    n = array->n_slots / array->n_segments; //获得一个线程可监控的io event数
    /* Loop until we have found a completed request. */
    for (;;) {
        ibool    any_reserved = FALSE;
        os_mutex_enter(array->mutex);
        for (i = 0; i < n; ++i) {  // 遍历该线程所发起的所有aio请求
            slot = os_aio_array_get_nth_slot(
                array, i + segment * n); 
            if (!slot->reserved) {  // 该slot是否被占用
                continue;
            } else if (slot->io_already_done) {  // IO请求已经完成，可以通知主线程返回数据了
                /* Something for us to work on. */
                goto found;
            } else {
                any_reserved = TRUE;
            }
        }
        os_mutex_exit(array->mutex);
       // 到这里说明没有找到一个完成的io，则再去collect
        os_aio_linux_collect(array, segment, n); 
found:   // 找到一个完成的io，将内容返回
    *message1 = slot->message1;  
    *message2 = slot->message2; // 返回完成IO所对应的bpage页
    *type = slot->type;
    if (slot->ret == 0 && slot->n_bytes == (long) slot->len) {
        if (slot->page_encrypt
        && slot->type == OS_FILE_READ) {
          os_decrypt_page(slot->buf, slot->len, slot->page_size, FALSE);
    }    
    ret = TRUE;
  } else {
    errno = -slot->ret;
    /* os_file_handle_error does tell us if we should retry
    this IO. As it stands now, we don't do this retry when
    reaping requests from a different context than
    the dispatcher. This non-retry logic is the same for
    windows and linux native AIO.
    We should probably look into this to transparently
    re-submit the IO. */
    os_file_handle_error(slot->name, "Linux aio");
    ret = FALSE;
  }
  os_mutex_exit(array->mutex);
  os_aio_array_free_slot(array, slot);
}

等待native IO请求完成os_aio_linux_collect