Linux Aio System Call

io_submit、io_setup和io_getevents示例

出处:http://blog.csdn.net/aquester/article/details/7635578 虽然标为原创,不过我还是找到了一个更像出处的地方 http://www.hadoopor.com/thread-956-1-1.html

io_submit、io_setup和io_getevents和LINUX上的AIO系统调用。这有一个非常特别注意的地方——传递给io_setup的aio_context参数必须初始化为0,在它的man手册里其实有说明,但容易被忽视,man说明如下:
ctxp must not point to an AIO context that already exists, and must be initialized to 0 prior to the call

完整示例如下:

// 包含必须头文件
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <libaio.h>
 
int main()
{
        io_context_t ctx;
        unsigned nr_events = 10;
        memset(&ctx, 0, sizeof(ctx));  // It's necessary,这里一定要的
        int errcode = io_setup(nr_events, &ctx);
        if (errcode == 0)
                printf("io_setup success\n");
        else
                printf("io_setup error: :%d:%s\n", errcode, strerror(-errcode));
 
        // 如果不指定O_DIRECT,则io_submit操作和普通的read/write操作没有什么区别了,将来的LINUX可能
        // 可以支持不指定O_DIRECT标志
        int fd = open("./direct.txt", O_CREAT|O_DIRECT|O_WRONLY, S_IRWXU|S_IRWXG|S_IROTH);
        printf("open: %s\n", strerror(errno));
 
        char* buf;
        errcode = posix_memalign((void**)&buf, sysconf(_SC_PAGESIZE), sysconf(_SC_PAGESIZE));
        printf("posix_memalign: %s\n", strerror(errcode));
 
        strcpy(buf, "hello xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
 
        struct iocb *iocbpp = (struct iocb *)malloc(sizeof(struct iocb));
        memset(iocbpp, 0, sizeof(struct iocb));
 
        iocbpp[0].data           = buf;
        iocbpp[0].aio_lio_opcode = IO_CMD_PWRITE;
        iocbpp[0].aio_reqprio    = 0;
        iocbpp[0].aio_fildes     = fd;
 
        iocbpp[0].u.c.buf    = buf;
        iocbpp[0].u.c.nbytes = page_size;//strlen(buf); // 这个值必须按512字节对齐
        iocbpp[0].u.c.offset = 0; // 这个值必须按512字节对齐
 
        // 提交异步操作,异步写磁盘
        int n = io_submit(ctx, 1, &iocbpp);
        printf("==io_submit==: %d:%s\n", n, strerror(-n));
 
        struct io_event events[10];
        struct timespec timeout = {1, 100};
        // 检查写磁盘情况,类似于epoll_wait或select
        n = io_getevents(ctx, 1, 10, events, &timeout);
        printf("io_getevents: %d:%s\n", n, strerror(-n));
 
        close(fd);
        io_destroy(ctx);
        return 0;
}

测试环境:Linux 2.6.16,SUSE Linux Enterprise Server 10 (x86_64)

struct iocb {
       /* these are internal to the kernel/libc. */
       __u64   aio_data;       /* data to be returned in event\'s data */用来返回异步IO事件信息的空间,类似于epoll中的ptr。
       __u32   PADDED(aio_key, aio_reserved1); /* the kernel sets aio_key to the req # */
       /* common fields */
       __u16   aio_lio_opcode; /* see IOCB_CMD_ above */
       __s16   aio_reqprio;      // 请求的优先级
       __u32   aio_fildes;        //  文件描述符
       __u64   aio_buf;           // 用户态缓冲区
       __u64   aio_nbytes;      // 文件操作的字节数
       __s64   aio_offset;       // 文件操作的偏移量

       /* extra parameters */
       __u64   aio_reserved2;  /* TODO: use this for a (struct sigevent *) */
       __u64   aio_reserved3;
}; /* 64 bytes */

struct io_event {
       __u64           data;          /* the data field from the iocb */ // 类似于epoll_event中的ptr
       __u64           obj;            /* what iocb this event came from */ // 对应的用户态iocb结构体指针
       __s64           res;            /* result code for this event */ // 操作的结果,类似于read/write的返回值
       __s64           res2;          /* secondary result */
};

系统调用功能原型

  • io_setup为当前进程初始化一个异步IO上下文 int io_setup(unsigned nr_events,aio_context_t *ctxp);
  • io_submit提交一个或者多个异步IO操作 int io_submit(aio_context_t ctx_id,long nr, struct iocb **iocbpp);
  • io_getevents获得未完成的异步IO操作的状态 int io_getevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout);
  • io_cancel 取消一个未完成的异步IO操作 int io_cancel(aio_context_t ctx_id, struct iocb *iocb, struct io_event *result);
  • io_destroy 从当前进程删除一个异步IO上下文 int io_destroy(aio_context_t ctx);

See Also:
Linux epoll
Nginx AIO