0

0

硬盘写满后redis的处理机制

php中文网

php中文网

发布时间:2016-06-07 16:09:50

|

1642人浏览过

|

来源于php中文网

原创

前些天一台redis机器硬盘写满了,主要是由于程序bug导致备份量激增,而恰好监控程序的通知机制也罢工了,于是第一次体验到了redis的罢工(只读不写)。 现在我们来看下在磁盘写满后redis的处理机制: save流程:serverCron-rdbSaveBackground-rdbSave save后

前些天一台redis机器硬盘写满了,主要是由于程序bug导致备份量激增,而恰好监控程序的通知机制也罢工了,于是第一次体验到了redis的罢工(只读不写)。

现在我们来看下在磁盘写满后redis的处理机制:

save流程:serverCron->rdbSaveBackground->rdbSave
save后流程:serverCron->backgroundSaveDoneHandler
上述流程产生的结果就是server.lastbgsave_status = REDIS_ERR,

受其影响,processCommand和luaRedisGenericCommand中判断如果是写操作,则直接返回REDIS_OK,而没有实际写入

1.rdbSave所有的写出错都会返回REDIS_ERR

超级简历WonderCV
超级简历WonderCV

免费求职简历模版下载制作,应届生职场人必备简历制作神器

下载
int rdbSave(char *filename) {
    dictIterator *di = NULL;
    dictEntry *de;
    char tmpfile[256];
    char magic[10];
    int j;
    long long now = mstime();
    FILE *fp;
    rio rdb;
    uint64_t cksum;

    snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
    fp = fopen(tmpfile,"w");
    if (!fp) {
        redisLog(REDIS_WARNING, "Failed opening .rdb for saving: %s",
            strerror(errno));
        return REDIS_ERR;
    }

    rioInitWithFile(&rdb,fp);
    if (server.rdb_checksum)
        rdb.update_cksum = rioGenericUpdateChecksum;
    snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION);
    if (rdbWriteRaw(&rdb,magic,9) == -1) goto werr;

    for (j = 0; j < server.dbnum; j++) {
        redisDb *db = server.db+j;
        dict *d = db->dict;
        if (dictSize(d) == 0) continue;
        di = dictGetSafeIterator(d);
        if (!di) {
            fclose(fp);
            return REDIS_ERR;
        }

        /* Write the SELECT DB opcode */
        if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;
        if (rdbSaveLen(&rdb,j) == -1) goto werr;

        /* Iterate this DB writing every entry */
        while((de = dictNext(di)) != NULL) {
            sds keystr = dictGetKey(de);
            robj key, *o = dictGetVal(de);
            long long expire;

            initStaticStringObject(key,keystr);
            expire = getExpire(db,&key);
            if (rdbSaveKeyValuePair(&rdb,&key,o,expire,now) == -1) goto werr;
        }
        dictReleaseIterator(di);
    }
    di = NULL; /* So that we don't release it again on error. */

    /* EOF opcode */
    if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_EOF) == -1) goto werr;

    /* CRC64 checksum. It will be zero if checksum computation is disabled, the
     * loading code skips the check in this case. */
    cksum = rdb.cksum;
    memrev64ifbe(&cksum);
    if (rioWrite(&rdb,&cksum,8) == 0) goto werr;

    /* Make sure data will not remain on the OS's output buffers */
    if (fflush(fp) == EOF) goto werr;
    if (fsync(fileno(fp)) == -1) goto werr;
    if (fclose(fp) == EOF) goto werr;

    /* Use RENAME to make sure the DB file is changed atomically only
     * if the generate DB file is ok. */
    if (rename(tmpfile,filename) == -1) {
        redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
        unlink(tmpfile);
        return REDIS_ERR;
    }
    redisLog(REDIS_NOTICE,"DB saved on disk");
    server.dirty = 0;
    server.lastsave = time(NULL);
    server.lastbgsave_status = REDIS_OK;
    return REDIS_OK;

werr:
    fclose(fp);
    unlink(tmpfile);
    redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
    if (di) dictReleaseIterator(di);
    return REDIS_ERR;
}

2.rdbSaveBackground中,如果子进程调用rdbsave返回REDIS_ERR,那么子进程exit(1)

int rdbSaveBackground(char *filename) {
    pid_t childpid;
    long long start;

    if (server.rdb_child_pid != -1) return REDIS_ERR;

    server.dirty_before_bgsave = server.dirty;
    server.lastbgsave_try = time(NULL);

    start = ustime();
    if ((childpid = fork()) == 0) {
        int retval;

        /* Child */
        closeListeningSockets(0);
        redisSetProcTitle("redis-rdb-bgsave");
        retval = rdbSave(filename);
        if (retval == REDIS_OK) {
            size_t private_dirty = zmalloc_get_private_dirty();

            if (private_dirty) {
                redisLog(REDIS_NOTICE,
                    "RDB: %zu MB of memory used by copy-on-write",
                    private_dirty/(1024*1024));
            }
        }
        exitFromChild((retval == REDIS_OK) ? 0 : 1);       //进程退出时返回0/1
    } else {
        /* Parent */
        server.stat_fork_time = ustime()-start;
        if (childpid == -1) {
            server.lastbgsave_status = REDIS_ERR;
            redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
                strerror(errno));
            return REDIS_ERR;
        }
        redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
        server.rdb_save_time_start = time(NULL);
        server.rdb_child_pid = childpid;
        updateDictResizePolicy();
        return REDIS_OK;
    }
    return REDIS_OK; /* unreached */
}
3.bgsave完成后,serverCron中得到bgsave子进程的返回码进行后续处理
    /* Check if a background saving or AOF rewrite in progress terminated. */
    if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
        int statloc;
        pid_t pid;

        if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
            int exitcode = WEXITSTATUS(statloc);
            int bysignal = 0;

            if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);

            if (pid == server.rdb_child_pid) {
                backgroundSaveDoneHandler(exitcode,bysignal);    //根据bgsave子进程的exitcode以及是否由信号结束的标签进行后续处理
            } else if (pid == server.aof_child_pid) {
                backgroundRewriteDoneHandler(exitcode,bysignal);
            } else {
                redisLog(REDIS_WARNING,
                    "Warning, detected child with unmatched pid: %ld",
                    (long)pid);
            }
            updateDictResizePolicy();
        }
    }
4.如果子进程非信号结束,并且exitcode非0,那么设置bgsave状态为REDIS_ERR
void backgroundSaveDoneHandler(int exitcode, int bysignal) {
    if (!bysignal && exitcode == 0) {
        redisLog(REDIS_NOTICE,
            "Background saving terminated with success");
        server.dirty = server.dirty - server.dirty_before_bgsave;
        server.lastsave = time(NULL);
        server.lastbgsave_status = REDIS_OK;
    } else if (!bysignal && exitcode != 0) {
        redisLog(REDIS_WARNING, "Background saving error");
        server.lastbgsave_status = REDIS_ERR;      //状态转换
    } else {
        mstime_t latency;

        redisLog(REDIS_WARNING,
            "Background saving terminated by signal %d", bysignal);
        latencyStartMonitor(latency);
        rdbRemoveTempFile(server.rdb_child_pid);
        latencyEndMonitor(latency);
        latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
        /* SIGUSR1 is whitelisted, so we have a way to kill a child without
         * tirggering an error conditon. */
        if (bysignal != SIGUSR1)
            server.lastbgsave_status = REDIS_ERR;
    }
    server.rdb_child_pid = -1;
    server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
    server.rdb_save_time_start = -1;
    /* Possibly there are slaves waiting for a BGSAVE in order to be served
     * (the first stage of SYNC is a bulk transfer of dump.rdb) */
    updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? REDIS_OK : REDIS_ERR);
}
5.processCommand中判定cmd是写操作的话,直接返回REDIS_OK
    /* Don't accept write commands if there are problems persisting on disk
     * and if this is a master instance. */
    if (((server.stop_writes_on_bgsave_err &&
          server.saveparamslen > 0 &&
          server.lastbgsave_status == REDIS_ERR) ||
          server.aof_last_write_status == REDIS_ERR) &&
        server.masterhost == NULL &&
        (c->cmd->flags & REDIS_CMD_WRITE ||
         c->cmd->proc == pingCommand))
    {
        flagTransaction(c);
        if (server.aof_last_write_status == REDIS_OK)
            addReply(c, shared.bgsaveerr);
        else
            addReplySds(c,
                sdscatprintf(sdsempty(),
                "-MISCONF Errors writing to the AOF file: %s\r\n",
                strerror(server.aof_last_write_errno)));
        return REDIS_OK;
    }
6.luaRedisGenericCommand中判定cmd是写操作的话,屏蔽
    /* Write commands are forbidden against read-only slaves, or if a
     * command marked as non-deterministic was already called in the context
     * of this script. */
    if (cmd->flags & REDIS_CMD_WRITE) {
        if (server.lua_random_dirty) {
            luaPushError(lua,
                "Write commands not allowed after non deterministic commands");
            goto cleanup;
        } else if (server.masterhost && server.repl_slave_ro &&
                   !server.loading &&
                   !(server.lua_caller->flags & REDIS_MASTER))
        {
            luaPushError(lua, shared.roslaveerr->ptr);
            goto cleanup;
        } else if (server.stop_writes_on_bgsave_err &&
                   server.saveparamslen > 0 &&
                   server.lastbgsave_status == REDIS_ERR)
        {
            luaPushError(lua, shared.bgsaveerr->ptr);
            goto cleanup;
        }
    }

cleanup:
    /* Clean up. Command code may have changed argv/argc so we use the
     * argv/argc of the client instead of the local variables. */
    for (j = 0; j < c->argc; j++) {
        robj *o = c->argv[j];

        /* Try to cache the object in the cached_objects array.
         * The object must be small, SDS-encoded, and with refcount = 1
         * (we must be the only owner) for us to cache it. */
        if (j < LUA_CMD_OBJCACHE_SIZE &&
            o->refcount == 1 &&
            o->encoding == REDIS_ENCODING_RAW &&
            sdslen(o->ptr) <= LUA_CMD_OBJCACHE_MAX_LEN)
        {
            struct sdshdr *sh = (void*)(((char*)(o->ptr))-(sizeof(struct sdshdr)));

            if (cached_objects[j]) decrRefCount(cached_objects[j]);
            cached_objects[j] = o;
            cached_objects_len[j] = sh->free + sh->len;
        } else {
            decrRefCount(o);
        }
    }

    if (c->argv != argv) {
        zfree(c->argv);
        argv = NULL;
    }

    if (raise_error) {
        /* If we are here we should have an error in the stack, in the
         * form of a table with an "err" field. Extract the string to
         * return the plain error. */
        lua_pushstring(lua,"err");
        lua_gettable(lua,-2);
        return lua_error(lua);
    }
    return 1;  

相关专题

更多
php源码安装教程大全
php源码安装教程大全

本专题整合了php源码安装教程,阅读专题下面的文章了解更多详细内容。

61

2025.12.31

php网站源码教程大全
php网站源码教程大全

本专题整合了php网站源码相关教程,阅读专题下面的文章了解更多详细内容。

41

2025.12.31

视频文件格式
视频文件格式

本专题整合了视频文件格式相关内容,阅读专题下面的文章了解更多详细内容。

32

2025.12.31

不受国内限制的浏览器大全
不受国内限制的浏览器大全

想找真正自由、无限制的上网体验?本合集精选2025年最开放、隐私强、访问无阻的浏览器App,涵盖Tor、Brave、Via、X浏览器、Mullvad等高自由度工具。支持自定义搜索引擎、广告拦截、隐身模式及全球网站无障碍访问,部分更具备防追踪、去谷歌化、双内核切换等高级功能。无论日常浏览、隐私保护还是突破地域限制,总有一款适合你!

41

2025.12.31

出现404解决方法大全
出现404解决方法大全

本专题整合了404错误解决方法大全,阅读专题下面的文章了解更多详细内容。

198

2025.12.31

html5怎么播放视频
html5怎么播放视频

想让网页流畅播放视频?本合集详解HTML5视频播放核心方法!涵盖<video>标签基础用法、多格式兼容(MP4/WebM/OGV)、自定义播放控件、响应式适配及常见浏览器兼容问题解决方案。无需插件,纯前端实现高清视频嵌入,助你快速打造现代化网页视频体验。

9

2025.12.31

关闭win10系统自动更新教程大全
关闭win10系统自动更新教程大全

本专题整合了关闭win10系统自动更新教程大全,阅读专题下面的文章了解更多详细内容。

8

2025.12.31

阻止电脑自动安装软件教程
阻止电脑自动安装软件教程

本专题整合了阻止电脑自动安装软件教程,阅读专题下面的文章了解更多详细教程。

3

2025.12.31

html5怎么使用
html5怎么使用

想快速上手HTML5开发?本合集为你整理最实用的HTML5使用指南!涵盖HTML5基础语法、主流框架(如Bootstrap、Vue、React)集成方法,以及无需安装、直接在线编辑运行的平台推荐(如CodePen、JSFiddle)。无论你是新手还是进阶开发者,都能轻松掌握HTML5网页制作、响应式布局与交互功能开发,零配置开启高效前端编程之旅!

2

2025.12.31

热门下载

更多
网站特效
/
网站源码
/
网站素材
/
前端模板

精品课程

更多
相关推荐
/
热门推荐
/
最新课程
计算机系统从应用层到底层
计算机系统从应用层到底层

共6课时 | 0.3万人学习

开源物联网开发实例
开源物联网开发实例

共6课时 | 0.4万人学习

关于我们 免责申明 举报中心 意见反馈 讲师合作 广告合作 最新更新
php中文网:公益在线php培训,帮助PHP学习者快速成长!
关注服务号 技术交流群
PHP中文网订阅号
每天精选资源文章推送

Copyright 2014-2026 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号