From 8775e63ca44c86c1519fe69243db36195dd8ea4f Mon Sep 17 00:00:00 2001 From: raffertyyu Date: Mon, 20 Oct 2025 17:10:19 +0800 Subject: [PATCH] avoid mostly harmless integer overflow in cjson and fix CVE-2025-46819 --- src/thirdparty/lua/src/llex.c | 34 +++++++++++++--------- src/thirdparty/lua/src/lua_cjson.c | 3 +- src/thirdparty/lua/src/strbuf.c | 3 +- tests/rr_unit/scripting.tcl | 46 ++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 17 deletions(-) diff --git a/src/thirdparty/lua/src/llex.c b/src/thirdparty/lua/src/llex.c index 88c6790c..efad7092 100644 --- a/src/thirdparty/lua/src/llex.c +++ b/src/thirdparty/lua/src/llex.c @@ -138,6 +138,7 @@ static void inclinenumber (LexState *ls) { void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { + ls->t.token = 0; ls->decpoint = '.'; ls->L = L; ls->lookahead.token = TK_EOS; /* no look-ahead token */ @@ -206,9 +207,13 @@ static void read_numeral (LexState *ls, SemInfo *seminfo) { trydecpoint(ls, seminfo); /* try to update decimal point separator */ } - -static int skip_sep (LexState *ls) { - int count = 0; +/* +** reads a sequence '[=*[' or ']=*]', leaving the last bracket. +** If a sequence is well-formed, return its number of '='s + 2; otherwise, +** return 1 if there is no '='s or 0 otherwise (an unfinished '[==...'). +*/ +static size_t skip_sep (LexState *ls) { + size_t count = 0; int s = ls->current; lua_assert(s == '[' || s == ']'); save_and_next(ls); @@ -216,11 +221,13 @@ static int skip_sep (LexState *ls) { save_and_next(ls); count++; } - return (ls->current == s) ? count : (-count) - 1; + return (ls->current == s) ? count + 2 + : (count == 0) ? 1 + : 0; } -static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { +static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) { int cont = 0; (void)(cont); /* avoid warnings when `cont' is not used */ save_and_next(ls); /* skip 2nd `[' */ @@ -270,8 +277,8 @@ static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { } } endloop: if (seminfo) - seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), - luaZ_bufflen(ls->buff) - 2*(2 + sep)); + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep, + luaZ_bufflen(ls->buff) - 2 * sep); } @@ -346,9 +353,9 @@ static int llex (LexState *ls, SemInfo *seminfo) { /* else is a comment */ next(ls); if (ls->current == '[') { - int sep = skip_sep(ls); + size_t sep = skip_sep(ls); luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ - if (sep >= 0) { + if (sep >= 2) { read_long_string(ls, NULL, sep); /* long comment */ luaZ_resetbuffer(ls->buff); continue; @@ -360,13 +367,14 @@ static int llex (LexState *ls, SemInfo *seminfo) { continue; } case '[': { - int sep = skip_sep(ls); - if (sep >= 0) { + size_t sep = skip_sep(ls); + if (sep >= 2) { read_long_string(ls, seminfo, sep); return TK_STRING; } - else if (sep == -1) return '['; - else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); + else if (sep == 0) /* '[=...' missing second bracket */ + luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); + return '['; } case '=': { next(ls); diff --git a/src/thirdparty/lua/src/lua_cjson.c b/src/thirdparty/lua/src/lua_cjson.c index 1d507ea0..d91fc0bd 100644 --- a/src/thirdparty/lua/src/lua_cjson.c +++ b/src/thirdparty/lua/src/lua_cjson.c @@ -464,9 +464,8 @@ static void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *js static void json_append_string(lua_State *l, strbuf_t *json, int lindex) { const char *escstr; - int i; const char *str; - size_t len; + size_t i, len; str = lua_tolstring(l, lindex, &len); diff --git a/src/thirdparty/lua/src/strbuf.c b/src/thirdparty/lua/src/strbuf.c index 775e8baf..97ee940c 100644 --- a/src/thirdparty/lua/src/strbuf.c +++ b/src/thirdparty/lua/src/strbuf.c @@ -176,8 +176,7 @@ void strbuf_resize(strbuf_t *s, size_t len) void strbuf_append_string(strbuf_t *s, const char *str) { - int i; - size_t space; + size_t i, space; space = strbuf_empty_length(s); diff --git a/tests/rr_unit/scripting.tcl b/tests/rr_unit/scripting.tcl index 01fa559a..2de8f41c 100644 --- a/tests/rr_unit/scripting.tcl +++ b/tests/rr_unit/scripting.tcl @@ -614,6 +614,52 @@ start_server {tags {"scripting"}} { } } +# start a new server to test the large-memory tests +start_server {tags {"scripting external:skip large-memory"}} { + test {EVAL - JSON string encoding a string larger than 2GB} { + run_script { + local s = string.rep("a", 1024 * 1024 * 1024) + return #cjson.encode(s..s..s) + } 0 + } {3221225474} ;# length includes two double quotes at both ends + + test {EVAL - Test long escape sequences for strings} { + run_script { + -- Generate 1gb '==...==' separator + local s = string.rep('=', 1024 * 1024) + local t = {} for i=1,1024 do t[i] = s end + local sep = table.concat(t) + collectgarbage('collect') + + local code = table.concat({'return [',sep,'[x]',sep,']'}) + collectgarbage('collect') + + -- Load the code and run it. Script will return the string length. + -- Escape sequence: [=....=[ to ]=...=] will be ignored + -- Actual string is a single character: 'x'. Script will return 1 + local func = loadstring(code) + return #func() + } 0 + } {1} + + test {EVAL - Lua can parse string with too many new lines} { + # Create a long string consisting only of newline characters. When Lua + # fails to parse a string, it typically includes a snippet like + # "... near ..." in the error message to indicate the last recognizable + # token. In this test, since the input contains only newlines, there + # should be no identifiable token, so the error message should contain + # only the actual error, without a near clause. + + run_script { + local s = string.rep('\n', 1024 * 1024) + local t = {} for i=1,2048 do t[#t+1] = s end + local lines = table.concat(t) + local fn, err = loadstring(lines) + return err + } 0 + } {*chunk has too many lines} +} + start_server {tags {"scripting repl"}} { start_server {} { test {Before the slave connects we issue two EVAL commands} {