From 10551622687a9b8e5d63417088fba18ca1b15725 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sun, 11 Jan 2026 09:04:55 -0600 Subject: [PATCH 1/8] s/CXX17/CXX20/g --- r/DESCRIPTION | 2 +- r/README.md | 2 +- r/configure | 16 ++++++++-------- r/configure.win | 2 +- r/src/Makevars.in | 2 +- r/src/Makevars.ucrt | 2 +- r/tools/nixlibs.R | 13 ++++++++----- r/vignettes/install.Rmd | 4 ++-- 8 files changed, 23 insertions(+), 20 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 86ca441263e..34d1f5a9f46 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -28,7 +28,7 @@ URL: https://github.com/apache/arrow/, https://arrow.apache.org/docs/r/ BugReports: https://github.com/apache/arrow/issues Encoding: UTF-8 Language: en-US -SystemRequirements: C++17; for AWS S3 support on Linux, libcurl and openssl (optional); +SystemRequirements: C++20; for AWS S3 support on Linux, libcurl and openssl (optional); cmake >= 3.26 (build-time only, and only for full source build) Biarch: true Imports: diff --git a/r/README.md b/r/README.md index 1ab9206f119..c5cc647ec6b 100644 --- a/r/README.md +++ b/r/README.md @@ -44,7 +44,7 @@ There are some special cases to note: - On Linux the installation process can sometimes be more involved because CRAN does not host binaries for Linux. For more information please see the [installation guide](https://arrow.apache.org/docs/r/articles/install.html). -- If you are compiling arrow from source, please note that as of version 10.0.0, arrow requires C++17 to build. This has implications on Windows and CentOS 7. For Windows users it means you need to be running an R version of 4.0 or later. On CentOS 7, it means you need to install a newer compiler than the default system compiler gcc. See the [installation details article](https://arrow.apache.org/docs/r/articles/developers/install_details.html) for guidance. +- If you are compiling arrow from source, please note that as of version 22.0.0, arrow requires C++20 to build. This has implications on Windows and CentOS 7. For Windows users it means you need to be running an R version of 4.1 or later. See the [installation details article](https://arrow.apache.org/docs/r/articles/developers/install_details.html) for guidance. - Development versions of arrow are released nightly. For information on how to install nightly builds please see the [installing nightly builds](https://arrow.apache.org/docs/r/articles/install_nightly.html) article. diff --git a/r/configure b/r/configure index f64a3673f97..74f3f359d83 100755 --- a/r/configure +++ b/r/configure @@ -86,10 +86,10 @@ if [ "$ARROW_R_DEV" = "true" ] && [ -f "data-raw/codegen.R" ]; then ${R_HOME}/bin/Rscript data-raw/codegen.R fi -# Arrow requires C++17, so check for it -if [ ! "`${R_HOME}/bin/R CMD config CXX17`" ]; then +# Arrow requires C++20, so check for it +if [ ! "`${R_HOME}/bin/R CMD config CXX20`" ]; then echo "------------------------- NOTE ---------------------------" - echo "Cannot install arrow: a C++17 compiler is required." + echo "Cannot install arrow: a C++20 compiler is required." echo "See https://arrow.apache.org/docs/r/articles/install.html" echo "---------------------------------------------------------" exit 1 @@ -265,7 +265,7 @@ set_pkg_vars () { # match the substring. However, expr always outputs the number of matched characters # to stdout, to avoid noise in the log we redirect the output to /dev/null if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then - # avoid C++17 availability warnings on macOS < 11 + # avoid C++20 availability warnings on macOS < 11 PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY" fi } @@ -408,11 +408,11 @@ else fi # Test that we can compile something with those flags -CXX17="`${R_HOME}/bin/R CMD config CXX17` -E" -CXX17FLAGS=`"${R_HOME}"/bin/R CMD config CXX17FLAGS` -CXX17STD=`"${R_HOME}"/bin/R CMD config CXX17STD` +CXX20="`${R_HOME}/bin/R CMD config CXX20` -E" +CXX20FLAGS=`"${R_HOME}"/bin/R CMD config CXX20FLAGS` +CXX20STD=`"${R_HOME}"/bin/R CMD config CXX20STD` CPPFLAGS=`"${R_HOME}"/bin/R CMD config CPPFLAGS` -TEST_CMD="${CXX17} ${CPPFLAGS} ${PKG_CFLAGS} ${CXX17FLAGS} ${CXX17STD} -xc++ -" +TEST_CMD="${CXX20} ${CPPFLAGS} ${PKG_CFLAGS} ${CXX20FLAGS} ${CXX20STD} -xc++ -" TEST_ERROR=$(echo "#include $PKG_TEST_HEADER" | ${TEST_CMD} -o /dev/null 2>&1) if [ $? -eq 0 ]; then diff --git a/r/configure.win b/r/configure.win index 433ef28439a..50d3e70081d 100755 --- a/r/configure.win +++ b/r/configure.win @@ -122,7 +122,7 @@ set_pkg_vars () { # match the substring. However, expr always outputs the number of matched characters # to stdout, to avoid noise in the log we redirect the output to /dev/null if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then - # avoid C++17 availability warnings on macOS < 11 + # avoid C++20 availability warnings on macOS < 11 PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY" fi } diff --git a/r/src/Makevars.in b/r/src/Makevars.in index af0826faacb..1b7ad08e1cb 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -25,7 +25,7 @@ PKG_CPPFLAGS=@cflags@ # https://bugs.llvm.org/show_bug.cgi?id=39191 # https://www.mail-archive.com/gcc-bugs@gcc.gnu.org/msg534862.html # PKG_CXXFLAGS=$(CXX_VISIBILITY) -CXX_STD=CXX17 +CXX_STD=CXX20 PKG_LIBS=@libs@ all: $(SHLIB) purify diff --git a/r/src/Makevars.ucrt b/r/src/Makevars.ucrt index a91dedc2d55..b72ed64d98e 100644 --- a/r/src/Makevars.ucrt +++ b/r/src/Makevars.ucrt @@ -19,4 +19,4 @@ CRT=-ucrt include Makevars.win # XXX for some reason, this variable doesn't seem propagated from Makevars.win -CXX_STD=CXX17 +CXX_STD=CXX20 diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 9d0a2604682..f4ccb4956a8 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -310,11 +310,11 @@ compile_test_program <- function(code) { openssl_dir <- paste0("-I", openssl_root_dir, "/include") } runner <- paste( - R_CMD_config("CXX17"), + R_CMD_config("CXX20"), openssl_dir, R_CMD_config("CPPFLAGS"), - R_CMD_config("CXX17FLAGS"), - R_CMD_config("CXX17STD"), + R_CMD_config("CXX20FLAGS"), + R_CMD_config("CXX20STD"), "-E", "-xc++" ) @@ -565,8 +565,11 @@ build_libarrow <- function(src_dir, dst_dir) { # is found, it will be used by the libarrow build, and this does # not affect how R compiles the arrow bindings. CC = sub("^.*ccache", "", R_CMD_config("CC")), - CXX = paste(sub("^.*ccache", "", R_CMD_config("CXX17")), R_CMD_config("CXX17STD")), - # CXXFLAGS = R_CMD_config("CXX17FLAGS"), # We don't want the same debug symbols + CXX = paste( + sub("^.*ccache", "", R_CMD_config("CXX20")), + R_CMD_config("CXX20STD") + ), + # CXXFLAGS = R_CMD_config("CXX20FLAGS"), # We don't want the same debug symbols LDFLAGS = R_CMD_config("LDFLAGS"), N_JOBS = ncores ) diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index 69780bd64df..d9cdcc3885c 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -23,8 +23,8 @@ but there are a few things to note. ### Compilers -As of version 10.0.0, arrow requires a C++17 compiler to build. -For `gcc`, this generally means version 7 or newer. Most contemporary Linux +As of version 22.0.0, arrow requires a C++20 compiler to build. +For `gcc`, this generally means version 10 or newer. Most contemporary Linux distributions have a new enough compiler; however, CentOS 7 is a notable exception, as it ships with gcc 4.8. From c9b9544d3ff72495fbed51ec710a2c6c000a1ac8 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 13 Jan 2026 21:26:45 -0600 Subject: [PATCH 2/8] Windows fix? --- r/src/compute.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 0777ca8bc72..2dd7085b77f 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -163,10 +163,20 @@ std::shared_ptr make_compute_options( // cpp11 does not support bool here so use int auto orders = cpp11::as_cpp>(options["orders"]); std::vector keys; +// GCC 14+ with C++20 raises a false positive -Wmaybe-uninitialized warning +// due to std::variant move operations in arrow::FieldRef. This is a known +// GCC issue with variant's move constructor analysis. +#if defined(__GNUC__) && __GNUC__ >= 14 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif for (size_t i = 0; i < names.size(); i++) { keys.push_back( Key(names[i], (orders[i] > 0) ? Order::Descending : Order::Ascending)); } +#if defined(__GNUC__) && __GNUC__ >= 14 +#pragma GCC diagnostic pop +#endif auto out = std::make_shared(Options(keys)); return out; } From c8e6f3d87a923724aad17ef22f9ad1f1597b8be7 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 13 Jan 2026 22:04:31 -0600 Subject: [PATCH 3/8] try a different valgrind image --- compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compose.yaml b/compose.yaml index 84481e1af76..2bd38a381e8 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1718,9 +1718,9 @@ services: cache_from: - ${REPO}:amd64-ubuntu-r-valgrind args: - base: wch1/r-debug:latest + base: rhub/valgrind:latest cmake: ${CMAKE} - r_bin: RDvalgrind + r_bin: R tz: ${TZ} environment: <<: [*common, *ccache, *sccache] From 57abbb9ffde1ef88d34e775140ef9c4b585cac74 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 13 Jan 2026 22:12:52 -0600 Subject: [PATCH 4/8] R CMD check pragma shift --- r/src/Makevars.in | 5 +++++ r/src/compute.cpp | 10 ---------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/r/src/Makevars.in b/r/src/Makevars.in index 1b7ad08e1cb..7af78fe87bd 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -25,6 +25,11 @@ PKG_CPPFLAGS=@cflags@ # https://bugs.llvm.org/show_bug.cgi?id=39191 # https://www.mail-archive.com/gcc-bugs@gcc.gnu.org/msg534862.html # PKG_CXXFLAGS=$(CXX_VISIBILITY) + +# GCC 14+ with C++20 raises false positive -Wmaybe-uninitialized warnings +# due to std::variant move operations in arrow::FieldRef. +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635 +PKG_CXXFLAGS=-Wno-maybe-uninitialized CXX_STD=CXX20 PKG_LIBS=@libs@ diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 2dd7085b77f..0777ca8bc72 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -163,20 +163,10 @@ std::shared_ptr make_compute_options( // cpp11 does not support bool here so use int auto orders = cpp11::as_cpp>(options["orders"]); std::vector keys; -// GCC 14+ with C++20 raises a false positive -Wmaybe-uninitialized warning -// due to std::variant move operations in arrow::FieldRef. This is a known -// GCC issue with variant's move constructor analysis. -#if defined(__GNUC__) && __GNUC__ >= 14 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif for (size_t i = 0; i < names.size(); i++) { keys.push_back( Key(names[i], (orders[i] > 0) ? Order::Descending : Order::Ascending)); } -#if defined(__GNUC__) && __GNUC__ >= 14 -#pragma GCC diagnostic pop -#endif auto out = std::make_shared(Options(keys)); return out; } From a5f02739467e84172d63a39142301e489e6ffa78 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 13 Jan 2026 22:58:55 -0600 Subject: [PATCH 5/8] Try again --- r/configure | 8 -------- r/configure.win | 13 +++++-------- r/src/Makevars.in | 5 ----- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/r/configure b/r/configure index 74f3f359d83..9e92eb6b47f 100755 --- a/r/configure +++ b/r/configure @@ -260,14 +260,6 @@ set_pkg_vars () { if [ "$ARROW_R_CXXFLAGS" ]; then PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS" fi - - # We use expr because the product version returns more than just 10.13 and we want to - # match the substring. However, expr always outputs the number of matched characters - # to stdout, to avoid noise in the log we redirect the output to /dev/null - if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then - # avoid C++20 availability warnings on macOS < 11 - PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY" - fi } # If we have pkg-config, it will tell us what libarrow needs diff --git a/r/configure.win b/r/configure.win index 50d3e70081d..365b56a2862 100755 --- a/r/configure.win +++ b/r/configure.win @@ -117,14 +117,6 @@ set_pkg_vars () { if [ "$ARROW_R_CXXFLAGS" ]; then PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS" fi - - # We use expr because the product version returns more than just 10.13 and we want to - # match the substring. However, expr always outputs the number of matched characters - # to stdout, to avoid noise in the log we redirect the output to /dev/null - if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then - # avoid C++20 availability warnings on macOS < 11 - PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY" - fi } # If we have pkg-config, it will tell us what libarrow needs @@ -328,6 +320,11 @@ if [ "$ARROW_R_CXXFLAGS" ]; then PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS" fi +# GCC 14+ with C++20 raises false positive -Wmaybe-uninitialized warnings +# due to std::variant move operations in arrow::FieldRef. +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635 +PKG_CFLAGS="$PKG_CFLAGS -Wno-maybe-uninitialized" + echo "*** Writing $(pwd)/src/Makevars.win" sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars.win diff --git a/r/src/Makevars.in b/r/src/Makevars.in index 7af78fe87bd..1b7ad08e1cb 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -25,11 +25,6 @@ PKG_CPPFLAGS=@cflags@ # https://bugs.llvm.org/show_bug.cgi?id=39191 # https://www.mail-archive.com/gcc-bugs@gcc.gnu.org/msg534862.html # PKG_CXXFLAGS=$(CXX_VISIBILITY) - -# GCC 14+ with C++20 raises false positive -Wmaybe-uninitialized warnings -# due to std::variant move operations in arrow::FieldRef. -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635 -PKG_CXXFLAGS=-Wno-maybe-uninitialized CXX_STD=CXX20 PKG_LIBS=@libs@ From 6ef56a53fe669792224963d1a4fad35aed843704 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 14 Jan 2026 07:51:40 -0600 Subject: [PATCH 6/8] A different windows fix --- r/configure.win | 5 ----- r/src/compute.cpp | 5 +++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/r/configure.win b/r/configure.win index 365b56a2862..16c5ec1bee8 100755 --- a/r/configure.win +++ b/r/configure.win @@ -320,11 +320,6 @@ if [ "$ARROW_R_CXXFLAGS" ]; then PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS" fi -# GCC 14+ with C++20 raises false positive -Wmaybe-uninitialized warnings -# due to std::variant move operations in arrow::FieldRef. -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635 -PKG_CFLAGS="$PKG_CFLAGS -Wno-maybe-uninitialized" - echo "*** Writing $(pwd)/src/Makevars.win" sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars.win diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 0777ca8bc72..180990e6320 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -163,9 +163,10 @@ std::shared_ptr make_compute_options( // cpp11 does not support bool here so use int auto orders = cpp11::as_cpp>(options["orders"]); std::vector keys; + keys.reserve(names.size()); for (size_t i = 0; i < names.size(); i++) { - keys.push_back( - Key(names[i], (orders[i] > 0) ? Order::Descending : Order::Ascending)); + Order order = (orders[i] > 0) ? Order::Descending : Order::Ascending; + keys.emplace_back(names[i], order); } auto out = std::make_shared(Options(keys)); return out; From 0a010315d305868f958dc5adc55067e0114b9154 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 14 Jan 2026 08:19:51 -0600 Subject: [PATCH 7/8] Another, different approach --- r/src/compute.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 180990e6320..5d1fb5af658 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -155,20 +155,18 @@ std::shared_ptr make_compute_options( } if (func_name == "sort_indices") { - using Key = arrow::compute::SortKey; using Order = arrow::compute::SortOrder; using Options = arrow::compute::SortOptions; auto names = cpp11::as_cpp>(options["names"]); // false means descending, true means ascending // cpp11 does not support bool here so use int auto orders = cpp11::as_cpp>(options["orders"]); - std::vector keys; - keys.reserve(names.size()); + auto out = std::make_shared(); + out->sort_keys.reserve(names.size()); for (size_t i = 0; i < names.size(); i++) { - Order order = (orders[i] > 0) ? Order::Descending : Order::Ascending; - keys.emplace_back(names[i], order); + out->sort_keys.emplace_back(names[i], + (orders[i] > 0) ? Order::Descending : Order::Ascending); } - auto out = std::make_shared(Options(keys)); return out; } From 60fb68784ecbb0fa46b0b04dd0a5d039479b313d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 14 Jan 2026 09:33:46 -0600 Subject: [PATCH 8/8] Another try --- r/src/compute.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 5d1fb5af658..c8aa903bf06 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -155,18 +155,20 @@ std::shared_ptr make_compute_options( } if (func_name == "sort_indices") { + using Key = arrow::compute::SortKey; using Order = arrow::compute::SortOrder; using Options = arrow::compute::SortOptions; auto names = cpp11::as_cpp>(options["names"]); // false means descending, true means ascending // cpp11 does not support bool here so use int auto orders = cpp11::as_cpp>(options["orders"]); - auto out = std::make_shared(); - out->sort_keys.reserve(names.size()); + // Use resize + assignment to avoid vector growth operations that trigger + // false positive -Wmaybe-uninitialized warnings in GCC 14 with std::variant + std::vector keys(names.size(), Key("", Order::Ascending)); for (size_t i = 0; i < names.size(); i++) { - out->sort_keys.emplace_back(names[i], - (orders[i] > 0) ? Order::Descending : Order::Ascending); + keys[i] = Key(names[i], (orders[i] > 0) ? Order::Descending : Order::Ascending); } + auto out = std::make_shared(std::move(keys)); return out; }