Redis源码剖析和注释(十六)—- Redis输入输出的抽象(rio)

撸了今年阿里、腾讯和美团的面试,我有一个重要发现…….

作者:men_wen

出处:https://blog.csdn.net/men_wen/column/info/15428


1. 概述

rio是Redis对IO操作的一个抽象,可以面向不同的输入输出设备,例如一个缓冲区IO、文件IO和socket IO。

一个rio对象提供一下四个方法:

  • read:读操作
  • write:写操作
  • tell:读写的偏移量
  • flush:冲洗缓冲区操作

使用C语言,实现面向对象的思想。

2. rio对象的抽象

rio结构体如下:rio.c 和 rio.h 文件详细注释

    struct _rio {
        /* Backend functions.
         * Since this functions do not tolerate short writes or reads the return
         * value is simplified to: zero on error, non zero on complete success. */
        // 读,写,读写偏移量、冲洗操作的函数指针,非0表示成功
        size_t (*read)(struct _rio *, void *buf, size_t len);
        size_t (*write)(struct _rio *, const void *buf, size_t len);
        off_t (*tell)(struct _rio *);
        int (*flush)(struct _rio *);
        /* The update_cksum method if not NULL is used to compute the checksum of
         * all the data that was read or written so far. The method should be
         * designed so that can be called with the current checksum, and the buf
         * and len fields pointing to the new block of data to add to the checksum
         * computation. */
        // 计算和校验函数
        void (*update_cksum)(struct _rio *, const void *buf, size_t len);

        /* The current checksum */
        // 当前校验和
        uint64_t cksum;

        /* number of bytes read or written */
        // 读或写的字节数
        size_t processed_bytes;

        /* maximum single read or write chunk size */
        // 每次读或写的最大字节数
        size_t max_processing_chunk;

        /* Backend-specific vars. */
        // 读写的各种对象
        union {
            /*内存缓冲区 In-memory buffer target. */
            struct {
                sds ptr;    //缓冲区的指针,本质是char *
                off_t pos;  //缓冲区的偏移量
            } buffer;

            /*标准文件IO Stdio file pointer target. */
            struct {
                FILE *fp;       // 文件指针,指向被打开的文件
                off_t buffered; /* 最近一次同步之后所写的字节数 Bytes written since last fsync. */
                off_t autosync; /* 写入设置的autosync字节后,会执行fsync()同步 fsync after 'autosync' bytes written. */
            } file;

            /*文件描述符 Multiple FDs target (used to write to N sockets). */
            struct {
                int *fds;       /*文件描述符数组 File descriptors. */
                int *state;     /*每一个fd所对应的errno  Error state of each fd. 0 (if ok) or errno. */
                int numfds;     // 数组长度,文件描述符个数
                off_t pos;      // 偏移量
                sds buf;        // 缓冲区
            } fdset;
        } io;
    };

    typedef struct _rio rio;

下面的函数是流对象的接口。

2.1 读操作

    static inline size_t rioRead(rio *r, void *buf, size_t len) {
        while (len) {
            // 读的字节长度,不能超过每次读或写的最大字节数max_processing_chunk
            size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
            // 调用自身的read方法读到buf中
            if (r->read(r,buf,bytes_to_read) == 0)
                return 0;
            // 更新和校验
            if (r->update_cksum) r->update_cksum(r,buf,bytes_to_read);
            // 更新偏移量,指向下一个读的位置
            buf = (char*)buf + bytes_to_read;
            // 计算剩余要读的长度
            len -= bytes_to_read;
            // 更新读或写的字节数
            r->processed_bytes += bytes_to_read;
        }
        return 1;
    }

2.2 写操作

    static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
        while (len) {
            // 写的字节长度,不能超过每次读或写的最大字节数max_processing_chunk
            size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
            // 更新和校验
            if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write);
            // 调用自身的write方法写入
            if (r->write(r,buf,bytes_to_write) == 0)
                return 0;
            // 更新偏移量,指向下一个写的位置
            buf = (char*)buf + bytes_to_writ;
            // 计算剩余写入的长度
            len -= bytes_to_write;
            // 更新读或写的字节数
            r->processed_bytes += bytes_to_write;
        }
        return 1;
    }

2.3 返回当前偏移量

    static inline off_t rioTell(rio *r) {
        return r->tell(r);
    }

2.4 缓冲区冲洗函数

    static inline int rioFlush(rio *r) {
        return r->flush(r);
    }

3. rio对象的实现

对rio抽象的结构体中,使用了一个共用体(union),它可能是三种不同的对象,分别是:

  • 缓冲区 IO(Buffer I/O)
  • 标准输入输出 IO(Stdio file pointer)
  • 文件描述符集合(File descriptors set)

下面给出所有对象的实现:rio.c 和 rio.h 文件详细注释

3.1 缓冲区IO

    /* ------------------------- Buffer I/O implementation ----------------------- */
    // 缓冲区IO实现

    /* Returns 1 or 0 for success/failure. */
    // 将len长的buf写到一个缓冲区对象r中
    static size_t rioBufferWrite(rio *r, const void *buf, size_t len) {
        r->io.buffer.ptr = sdscatlen(r->io.buffer.ptr,(char*)buf,len);  //追加操作
        r->io.buffer.pos += len;    //更新偏移量
        return 1;
    }

    /* Returns 1 or 0 for success/failure. */
    // 讲缓冲区对象r读到buf中,读len长
    static size_t rioBufferRead(rio *r, void *buf, size_t len) {
        // 缓冲区对象的长度小于len,不够读,返回0
        if (sdslen(r->io.buffer.ptr)-r->io.buffer.pos < len)
            return 0; /* not enough buffer to return len bytes. */
        // 读到buf中
        memcpy(buf,r->io.buffer.ptr+r->io.buffer.pos,len);
        // 更新偏移量
        r->io.buffer.pos += len;
        return 1;
    }

    /* Returns read/write position in buffer. */
    // 返回缓冲区对象r当前的偏移量
    static off_t rioBufferTell(rio *r) {
        return r->io.buffer.pos;
    }

    /* Flushes any buffer to target device if applicable. Returns 1 on success
     * and 0 on failures. */
    // 清洗缓冲区
    static int rioBufferFlush(rio *r) {
        UNUSED(r);  //void r,强转成void类型对象,缓冲区就相当于释放
        return 1; /* Nothing to do, our write just appends to the buffer. */
    }

    // 定义一个缓冲区对象并初始化方法和成员
    static const rio rioBufferIO = {
        rioBufferRead,
        rioBufferWrite,
        rioBufferTell,
        rioBufferFlush,
        NULL,           /* update_checksum */
        0,              /* current checksum */
        0,              /* bytes read or written */
        0,              /* read/write chunk size */
        { { NULL, 0 } } /* union for io-specific vars */
    };

    // 初始化缓冲区对象r并设置缓冲区的地址
    void rioInitWithBuffer(rio *r, sds s) {
        *r = rioBufferIO;
        r->io.buffer.ptr = s;
        r->io.buffer.pos = 0;
    }

3.2 标准输入输出 IO

    /* --------------------- Stdio file pointer implementation ------------------- */
    // 标准文件IO实现
    /* Returns 1 or 0 for success/failure. */
    // 将len长的buf写入一个文件流对象
    static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
        size_t retval;

        // 调用底层库函数
        retval = fwrite(buf,len,1,r->io.file.fp);
        // 更新写入的长度
        r->io.file.buffered += len;

        // 如果已经达到自动的同步autosync所设置的字节数
        if (r->io.file.autosync &&
            r->io.file.buffered >= r->io.file.autosync)
        {
            // 冲洗键盘缓冲区中的数据到文件中
            fflush(r->io.file.fp);
            // 同步操作
            aof_fsync(fileno(r->io.file.fp));
            // 长度初始化为0
            r->io.file.buffered = 0;
        }
        return retval;
    }

    /* Returns 1 or 0 for success/failure. */
    // 从文件流对象r中读出len长度的字节到buf中
    static size_t rioFileRead(rio *r, void *buf, size_t len) {
        return fread(buf,len,1,r->io.file.fp);
    }

    /* Returns read/write position in file. */
    // 返回文件流对象的偏移量
    static off_t rioFileTell(rio *r) {
        return ftello(r->io.file.fp);
    }

    /* Flushes any buffer to target device if applicable. Returns 1 on success
     * and 0 on failures. */
    // 清洗文件流
    static int rioFileFlush(rio *r) {
        return (fflush(r->io.file.fp) == 0) ? 1 : 0;
    }

    // 初始化一个文件流对象
    static const rio rioFileIO = {
        rioFileRead,
        rioFileWrite,
        rioFileTell,
        rioFileFlush,
        NULL,           /* update_checksum */
        0,              /* current checksum */
        0,              /* bytes read or written */
        0,              /* read/write chunk size */
        { { NULL, 0 } } /* union for io-specific vars */
    };

    // 初始化一个文件流对象且设置对应文件
    void rioInitWithFile(rio *r, FILE *fp) {
        *r = rioFileIO;
        r->io.file.fp = fp;
        r->io.file.buffered = 0;
        r->io.file.autosync = 0;
    }

3.3 文件描述符集合

    /* ------------------- File descriptors set implementation ------------------- */
    // 文件描述符合集合实现
    /* Returns 1 or 0 for success/failure.
     * The function returns success as long as we are able to correctly write
     * to at least one file descriptor.
     *
     * When buf is NULL and len is 0, the function performs a flush operation
     * if there is some pending buffer, so this function is also used in order
     * to implement rioFdsetFlush(). */
    // 将buf写入文件描述符集合对象
    static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
        ssize_t retval;
        int j;
        unsigned char *p = (unsigned char*) buf;
        int doflush = (buf == NULL && len == 0);    //如果buf为空且len为0,相当于flush操作

        /* To start we always append to our buffer. If it gets larger than
         * a given size, we actually write to the sockets. */
        // 将buf中的内容写到文件描述符集合对象的缓冲区中
        if (len) {
            r->io.fdset.buf = sdscatlen(r->io.fdset.buf,buf,len);
            // 设置写完的标志
            len = 0; /* Prevent entering the while below if we don't flush. */
            if (sdslen(r->io.fdset.buf) > PROTO_IOBUF_LEN) doflush = 1; //如果缓冲区太大需要冲刷到socket中
        }

        // 冲洗文件描述符集合对象,设置集合缓冲区长度和集合缓冲区地址
        if (doflush) {
            p = (unsigned char*) r->io.fdset.buf;
            len = sdslen(r->io.fdset.buf);
        }

        /* Write in little chunchs so that when there are big writes we
         * parallelize while the kernel is sending data in background to
         * the TCP socket. */
        // 一次可能无法冲洗完,需要循环多次
        while(len) {
            // 一次最多冲洗1M字节
            size_t count = len < 1024 ? len : 1024;
            int broken = 0;
            for (j = 0; j < r->io.fdset.numfds; j++) {
                // errno为0表示ok,记录不为0的文件描述符个数
                if (r->io.fdset.state[j] != 0) {
                    /* Skip FDs alraedy in error. */
                    broken++;
                    continue;
                }

                /* Make sure to write 'count' bytes to the socket regardless
                 * of short writes. */
                size_t nwritten = 0;
                // 新写的数据一次或多次写够count个字节往第一个文件描述符fd
                while(nwritten != count) {
                    retval = write(r->io.fdset.fds[j],p+nwritten,count-nwritten);
                    // 写失败,判断是不是写阻塞,是则设置超时
                    if (retval <= 0) {
                        /* With blocking sockets, which is the sole user of this
                         * rio target, EWOULDBLOCK is returned only because of
                         * the SO_SNDTIMEO socket option, so we translate the error
                         * into one more recognizable by the user. */
                        if (retval == -1 && errno == EWOULDBLOCK) errno = ETIMEDOUT;
                        break;
                    }
                    nwritten += retval; //每次加上写成功的字节数
                }

                // 如果刚才写失败的情况,则将当前的文件描述符状态设置为错误的标记码
                if (nwritten != count) {
                    /* Mark this FD as broken. */
                    r->io.fdset.state[j] = errno;
                    if (r->io.fdset.state[j] == 0) r->io.fdset.state[j] = EIO;
                }
            }
            // 所有的文件描述符都出错返回0
            if (broken == r->io.fdset.numfds) return 0; /* All the FDs in error. */
            // 更新下次要写入的地址和长度
            p += count;
            len -= count;
            r->io.fdset.pos += count;   //已写入的偏移量
        }

        if (doflush) sdsclear(r->io.fdset.buf); //释放集合缓冲区
        return 1;
    }

    /* Returns 1 or 0 for success/failure. */
    // 文件描述符集合对象不支持读,直接返回0
    static size_t rioFdsetRead(rio *r, void *buf, size_t len) {
        UNUSED(r);
        UNUSED(buf);
        UNUSED(len);
        return 0; /* Error, this target does not support reading. */
    }

    /* Returns read/write position in file. */
    // 获取偏移量
    static off_t rioFdsetTell(rio *r) {
        return r->io.fdset.pos;
    }

    /* Flushes any buffer to target device if applicable. Returns 1 on success
     * and 0 on failures. */
    // 清洗缓冲区的值
    static int rioFdsetFlush(rio *r) {
        /* Our flush is implemented by the write method, that recognizes a
         * buffer set to NULL with a count of zero as a flush request. */
        return rioFdsetWrite(r,NULL,0);
    }

    // 初始化一个文件描述符集合对象
    static const rio rioFdsetIO = {
        rioFdsetRead,
        rioFdsetWrite,
        rioFdsetTell,
        rioFdsetFlush,
        NULL,           /* update_checksum */
        0,              /* current checksum */
        0,              /* bytes read or written */
        0,              /* read/write chunk size */
        { { NULL, 0 } } /* union for io-specific vars */
    };

    // 初始化一个文件描述符集合对象并设置成员变量
    void rioInitWithFdset(rio *r, int *fds, int numfds) {
        int j;

        *r = rioFdsetIO;
        r->io.fdset.fds = zmalloc(sizeof(int)*numfds);
        r->io.fdset.state = zmalloc(sizeof(int)*numfds);
        memcpy(r->io.fdset.fds,fds,sizeof(int)*numfds);
        for (j = 0; j < numfds; j++) r->io.fdset.state[j] = 0;
        r->io.fdset.numfds = numfds;
        r->io.fdset.pos = 0;
        r->io.fdset.buf = sdsempty();
    }

    /* release the rio stream. */
    // 释放文件描述符集合流对象
    void rioFreeFdset(rio *r) {
        zfree(r->io.fdset.fds);
        zfree(r->io.fdset.state);
        sdsfree(r->io.fdset.buf);
    }

3.4 三种对象的共同实现

  • 校验和函数
    /* ---------------------------- Generic functions ---------------------------- */
    // 通用函数
    /* This function can be installed both in memory and file streams when checksum
     * computation is needed. */
    // 根据CRC64算法进行校验和
    void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) {
        r->cksum = crc64(r->cksum,buf,len);
    }
  • 设置自动同步的限制字节
    /* Set the file-based rio object to auto-fsync every 'bytes' file written.
     * By default this is set to zero that means no automatic file sync is
     * performed.
     *
     * This feature is useful in a few contexts since when we rely on OS write
     * buffers sometimes the OS buffers way too much, resulting in too many
     * disk I/O concentrated in very little time. When we fsync in an explicit
     * way instead the I/O pressure is more distributed across time. */
    // 设置自动同步的字节数限制,如果bytes为0,则意味着不执行
    void rioSetAutoSync(rio *r, off_t bytes) {
        serverAssert(r->read == rioFileIO.read);    //限制为文件流对象,不对其他对象设置限制
        r->io.file.autosync = bytes;
    }

4. 利用rio生成AOF协议

    // 以"*<count>\r\n"格式为写如一个int整型的count
    size_t rioWriteBulkCount(rio *r, char prefix, int count) {
        char cbuf[128];
        int clen;

        // 构建一个 "*<count>\r\n"
        cbuf[0] = prefix;
        clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,count);
        cbuf[clen++] = '\r';
        cbuf[clen++] = '\n';
        // 调用rio的接口,将cbuf写如r中
        if (rioWrite(r,cbuf,clen) == 0) return 0;
        return clen;
    }

    /* Write binary-safe string in the format: "$<count>\r\n<payload>\r\n". */
    // 以"$<count>\r\n<payload>\r\n"为格式写入一个字符串
    size_t rioWriteBulkString(rio *r, const char *buf, size_t len) {
        size_t nwritten;

        // 写入"$<len>\r\n"
        if ((nwritten = rioWriteBulkCount(r,'$',len)) == 0) return 0;
        // 追加写入一个buf,也就是<payload>部分
        if (len > 0 && rioWrite(r,buf,len) == 0) return 0;
        // 追加"\r\n"
        if (rioWrite(r,"\r\n",2) == 0) return 0;
        return nwritten+len+2;  //返回长度
    }

    /* Write a long long value in format: "$<count>\r\n<payload>\r\n". */
    // 以"$<count>\r\n<payload>\r\n"为格式写入一个longlong 值
    size_t rioWriteBulkLongLong(rio *r, long long l) {
        char lbuf[32];
        unsigned int llen;

        // 将longlong转为字符串,按字符串的格式写入
        llen = ll2string(lbuf,sizeof(lbuf),l);
        return rioWriteBulkString(r,lbuf,llen);
    }

    /* Write a double value in the format: "$<count>\r\n<payload>\r\n" */
    // 以"$<count>\r\n<payload>\r\n"为格式写入一个 double 值
    size_t rioWriteBulkDouble(rio *r, double d) {
        char dbuf[128];
        unsigned int dlen;

        //以宽度为17位的方式写到dbuf中,17位的double双精度浮点数的长度最短且无损
        dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d);
        return rioWriteBulkString(r,dbuf,dlen);
    }
赞(0) 打赏

如未加特殊说明,此网站文章均为原创,转载必须注明出处。Java 技术驿站 » Redis源码剖析和注释(十六)—- Redis输入输出的抽象(rio)
分享到: 更多 (0)

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

关注【Java 技术驿站】公众号,每天早上 8:10 为你推送一篇技术文章

扫描二维码关注我!


关注【Java 技术驿站】公众号 回复 “VIP”,获取 VIP 地址永久关闭弹出窗口

免费获取资源

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏