From 686d20ad53852bc199afbd4da1a3faa6ed9bbb16 Mon Sep 17 00:00:00 2001 From: AJIOB Date: Sun, 4 Jan 2026 11:39:25 +0300 Subject: [PATCH 1/2] Add support for C preprocessor output --- src/compiler/c.rs | 303 +++++++++++++++++++++------------------ src/compiler/clang.rs | 5 + src/compiler/compiler.rs | 96 ++++++++++++- src/compiler/gcc.rs | 5 + 4 files changed, 266 insertions(+), 143 deletions(-) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 8db84d265..1090039ae 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -405,7 +405,9 @@ where ); } - let use_preprocessor_cache_mode = { + let needs_preprocessing = self.parsed_args.language.needs_c_preprocessing(); + + let use_preprocessor_cache_mode = if needs_preprocessing { let can_use_preprocessor_cache_mode = preprocessor_cache_mode_config .use_preprocessor_cache_mode && !too_hard_for_preprocessor_cache_mode; @@ -432,6 +434,12 @@ where } use_preprocessor_cache_mode + } else { + debug!( + "parse_arguments: Disabling preprocessor cache because {} language doesn't need C preprocessing", + self.parsed_args.language.as_str() + ); + false }; let mut preprocessor_key = if use_preprocessor_cache_mode { @@ -448,156 +456,167 @@ where } else { None }; - if let Some(preprocessor_key) = &preprocessor_key { - if cache_control == CacheControl::Default { - if let Some(mut seekable) = storage - .get_preprocessor_cache_entry(preprocessor_key) - .await? - { - let mut buf = vec![]; - seekable.read_to_end(&mut buf)?; - let mut preprocessor_cache_entry = PreprocessorCacheEntry::read(&buf)?; - let mut updated = false; - let hit = preprocessor_cache_entry - .lookup_result_digest(preprocessor_cache_mode_config, &mut updated); - - let mut update_failed = false; - if updated { - // Time macros have been found, we need to update - // the preprocessor cache entry. See [`PreprocessorCacheEntry::result_matches`]. - debug!( - "Preprocessor cache updated because of time macros: {preprocessor_key}" - ); - if let Err(e) = storage - .put_preprocessor_cache_entry( - preprocessor_key, - preprocessor_cache_entry, - ) - .await - { - debug!("Failed to update preprocessor cache: {}", e); - update_failed = true; + let (preprocessor_output, include_files) = if needs_preprocessing { + if let Some(preprocessor_key) = &preprocessor_key { + if cache_control == CacheControl::Default { + if let Some(mut seekable) = storage + .get_preprocessor_cache_entry(preprocessor_key) + .await? + { + let mut buf = vec![]; + seekable.read_to_end(&mut buf)?; + let mut preprocessor_cache_entry = PreprocessorCacheEntry::read(&buf)?; + let mut updated = false; + let hit = preprocessor_cache_entry + .lookup_result_digest(preprocessor_cache_mode_config, &mut updated); + + let mut update_failed = false; + if updated { + // Time macros have been found, we need to update + // the preprocessor cache entry. See [`PreprocessorCacheEntry::result_matches`]. + debug!( + "Preprocessor cache updated because of time macros: {preprocessor_key}" + ); + + if let Err(e) = storage + .put_preprocessor_cache_entry( + preprocessor_key, + preprocessor_cache_entry, + ) + .await + { + debug!("Failed to update preprocessor cache: {}", e); + update_failed = true; + } } - } - if !update_failed { - if let Some(key) = hit { - debug!("Preprocessor cache hit: {preprocessor_key}"); - // A compiler binary may be a symlink to another and - // so has the same digest, but that means - // the toolchain will not contain the correct path - // to invoke the compiler! Add the compiler - // executable path to try and prevent this - let weak_toolchain_key = format!( - "{}-{}", - self.executable.to_string_lossy(), - self.executable_digest - ); - return Ok(HashResult { - key, - compilation: Box::new(CCompilation { - parsed_args: self.parsed_args.to_owned(), - is_locally_preprocessed: false, - #[cfg(feature = "dist-client")] - preprocessed_input: PREPROCESSING_SKIPPED_COMPILE_POISON - .to_vec(), - executable: self.executable.to_owned(), - compiler: self.compiler.to_owned(), - cwd: cwd.to_owned(), - env_vars: env_vars.to_owned(), - }), - weak_toolchain_key, - }); - } else { - debug!("Preprocessor cache miss: {preprocessor_key}"); + if !update_failed { + if let Some(key) = hit { + debug!("Preprocessor cache hit: {preprocessor_key}"); + // A compiler binary may be a symlink to another and + // so has the same digest, but that means + // the toolchain will not contain the correct path + // to invoke the compiler! Add the compiler + // executable path to try and prevent this + let weak_toolchain_key = format!( + "{}-{}", + self.executable.to_string_lossy(), + self.executable_digest + ); + return Ok(HashResult { + key, + compilation: Box::new(CCompilation { + parsed_args: self.parsed_args.to_owned(), + is_locally_preprocessed: false, + #[cfg(feature = "dist-client")] + preprocessed_input: PREPROCESSING_SKIPPED_COMPILE_POISON + .to_vec(), + executable: self.executable.to_owned(), + compiler: self.compiler.to_owned(), + cwd: cwd.to_owned(), + env_vars: env_vars.to_owned(), + }), + weak_toolchain_key, + }); + } else { + debug!("Preprocessor cache miss: {preprocessor_key}"); + } } } } } - } - let result = self - .compiler - .preprocess( - creator, - &self.executable, - &self.parsed_args, - &cwd, - &env_vars, - may_dist, - rewrite_includes_only, - use_preprocessor_cache_mode, - ) - .await; - let out_pretty = self.parsed_args.output_pretty().into_owned(); - let result = result.map_err(|e| { - debug!("[{}]: preprocessor failed: {:?}", out_pretty, e); - e - }); + let result = self + .compiler + .preprocess( + creator, + &self.executable, + &self.parsed_args, + &cwd, + &env_vars, + may_dist, + rewrite_includes_only, + use_preprocessor_cache_mode, + ) + .await; + let out_pretty = self.parsed_args.output_pretty().into_owned(); + let result = result.map_err(|e| { + debug!("[{}]: preprocessor failed: {:?}", out_pretty, e); + e + }); + + let outputs = self.parsed_args.outputs.clone(); + let args_cwd = cwd.clone(); + + let mut preprocessor_result = result.or_else(move |err| { + // Errors remove all traces of potential output. + debug!("removing files {:?}", &outputs); + + let v: std::result::Result<(), std::io::Error> = + outputs.values().try_for_each(|output| { + let mut path = args_cwd.clone(); + path.push(&output.path); + match fs::metadata(&path) { + // File exists, remove it. + Ok(_) => fs::remove_file(&path), + _ => Ok(()), + } + }); + if v.is_err() { + warn!("Could not remove files after preprocessing failed!"); + } - let outputs = self.parsed_args.outputs.clone(); - let args_cwd = cwd.clone(); - - let mut preprocessor_result = result.or_else(move |err| { - // Errors remove all traces of potential output. - debug!("removing files {:?}", &outputs); - - let v: std::result::Result<(), std::io::Error> = - outputs.values().try_for_each(|output| { - let mut path = args_cwd.clone(); - path.push(&output.path); - match fs::metadata(&path) { - // File exists, remove it. - Ok(_) => fs::remove_file(&path), - _ => Ok(()), + match err.downcast::() { + Ok(ProcessError(output)) => { + debug!( + "[{}]: preprocessor returned error status {:?}", + out_pretty, + output.status.code() + ); + // Drop the stdout since it's the preprocessor output, + // just hand back stderr and the exit status. + bail!(ProcessError(process::Output { + stdout: vec!(), + ..output + })) } - }); - if v.is_err() { - warn!("Could not remove files after preprocessing failed!"); - } + Err(err) => Err(err), + } + })?; - match err.downcast::() { - Ok(ProcessError(output)) => { - debug!( - "[{}]: preprocessor returned error status {:?}", - out_pretty, - output.status.code() - ); - // Drop the stdout since it's the preprocessor output, - // just hand back stderr and the exit status. - bail!(ProcessError(process::Output { - stdout: vec!(), - ..output - })) + // Remember include files needed in this preprocessing step + let mut include_files = HashMap::new(); + if preprocessor_key.is_some() { + // TODO how to propagate stats and which stats? + if !process_preprocessed_file( + &absolute_input_path, + &cwd, + &mut preprocessor_result.stdout, + &mut include_files, + preprocessor_cache_mode_config, + start_of_compilation, + StandardFsAbstraction, + )? { + debug!("Disabling preprocessor cache mode"); + preprocessor_key = None; } - Err(err) => Err(err), } - })?; - // Remember include files needed in this preprocessing step - let mut include_files = HashMap::new(); - if preprocessor_key.is_some() { - // TODO how to propagate stats and which stats? - if !process_preprocessed_file( - &absolute_input_path, - &cwd, - &mut preprocessor_result.stdout, - &mut include_files, - preprocessor_cache_mode_config, - start_of_compilation, - StandardFsAbstraction, - )? { - debug!("Disabling preprocessor cache mode"); - preprocessor_key = None; - } - } + trace!( + "[{}]: Preprocessor output is {} bytes", + self.parsed_args.output_pretty(), + preprocessor_result.stdout.len() + ); - trace!( - "[{}]: Preprocessor output is {} bytes", - self.parsed_args.output_pretty(), - preprocessor_result.stdout.len() - ); + (preprocessor_result.stdout, include_files) + } else { + // No preprocessing is supported - input is already preprocessed + ( + std::fs::read(absolute_input_path.as_path())?, + HashMap::new(), + ) + }; // Create an argument vector containing both common and arch args, to // use in creating a hash key @@ -611,7 +630,7 @@ where &common_and_arch_args, &extra_hashes, &env_vars, - &preprocessor_result.stdout, + &preprocessor_output, self.compiler.plusplus(), ) }; @@ -650,7 +669,7 @@ where parsed_args: self.parsed_args.clone(), is_locally_preprocessed: true, #[cfg(feature = "dist-client")] - preprocessed_input: preprocessor_result.stdout, + preprocessed_input: preprocessor_output, executable: self.executable.clone(), compiler: self.compiler.clone(), cwd, @@ -1640,6 +1659,8 @@ mod test { t("c", Language::C); + t("i", Language::CPreprocessed); + t("C", Language::Cxx); t("cc", Language::Cxx); t("cp", Language::Cxx); @@ -1648,6 +1669,8 @@ mod test { t("cxx", Language::Cxx); t("c++", Language::Cxx); + t("ii", Language::CxxPreprocessed); + t("h", Language::GenericHeader); t("hh", Language::CxxHeader); @@ -1661,9 +1684,13 @@ mod test { t("m", Language::ObjectiveC); + t("mi", Language::ObjectiveCPreprocessed); + t("M", Language::ObjectiveCxx); t("mm", Language::ObjectiveCxx); + t("mii", Language::ObjectiveCxxPreprocessed); + t("cu", Language::Cuda); t("hip", Language::Hip); } diff --git a/src/compiler/clang.rs b/src/compiler/clang.rs index 1910b77a7..e5304d852 100644 --- a/src/compiler/clang.rs +++ b/src/compiler/clang.rs @@ -180,13 +180,18 @@ impl CCompilerImpl for Clang { pub fn language_to_clang_arg(lang: Language) -> Option<&'static str> { match lang { + // https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Driver/Types.def Language::C => Some("c"), Language::CHeader => Some("c-header"), + Language::CPreprocessed => Some("cpp-output"), Language::Cxx => Some("c++"), Language::CxxHeader => Some("c++-header"), + Language::CxxPreprocessed => Some("c++-cpp-output"), Language::ObjectiveC => Some("objective-c"), + Language::ObjectiveCPreprocessed => Some("objective-c-cpp-output"), Language::ObjectiveCxx => Some("objective-c++"), Language::ObjectiveCxxHeader => Some("objective-c++-header"), + Language::ObjectiveCxxPreprocessed => Some("objective-c++-cpp-output"), Language::Cuda => Some("cuda"), Language::CudaFE => None, Language::Ptx => None, diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index bdab84e1e..49024993e 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -218,9 +218,13 @@ pub enum Language { Cxx, GenericHeader, CHeader, + CPreprocessed, CxxHeader, + CxxPreprocessed, ObjectiveC, + ObjectiveCPreprocessed, ObjectiveCxx, + ObjectiveCxxPreprocessed, ObjectiveCxxHeader, Cuda, CudaFE, @@ -237,16 +241,16 @@ impl Language { Some("c") => Some(Language::C), // Could be C or C++ Some("h") => Some(Language::GenericHeader), - // TODO i + Some("i") => Some(Language::CPreprocessed), Some("C") | Some("cc") | Some("cp") | Some("cpp") | Some("CPP") | Some("cxx") | Some("c++") => Some(Language::Cxx), - // TODO ii + Some("ii") => Some(Language::CxxPreprocessed), Some("H") | Some("hh") | Some("hp") | Some("hpp") | Some("HPP") | Some("hxx") | Some("h++") | Some("tcc") => Some(Language::CxxHeader), Some("m") => Some(Language::ObjectiveC), - // TODO mi + Some("mi") => Some(Language::ObjectiveCPreprocessed), Some("M") | Some("mm") => Some(Language::ObjectiveCxx), - // TODO mii + Some("mii") => Some(Language::ObjectiveCxxPreprocessed), Some("cu") => Some(Language::Cuda), Some("ptx") => Some(Language::Ptx), Some("cubin") => Some(Language::Cubin), @@ -264,11 +268,15 @@ impl Language { match self { Language::C => "c", Language::CHeader => "cHeader", + Language::CPreprocessed => "cPreprocessed", Language::Cxx => "c++", Language::CxxHeader => "c++Header", + Language::CxxPreprocessed => "c++Preprocessed", Language::GenericHeader => "c/c++", Language::ObjectiveC => "objc", + Language::ObjectiveCPreprocessed => "objcPreprocessed", Language::ObjectiveCxx | Language::ObjectiveCxxHeader => "objc++", + Language::ObjectiveCxxPreprocessed => "objc++Preprocessed", Language::Cuda => "cuda", Language::CudaFE => "cuda", Language::Ptx => "ptx", @@ -277,6 +285,17 @@ impl Language { Language::Hip => "hip", } } + + pub fn needs_c_preprocessing(self) -> bool { + !matches!( + self, + Language::CPreprocessed + | Language::CxxPreprocessed + | Language::ObjectiveCPreprocessed + | Language::ObjectiveCxxPreprocessed + | Language::Rust + ) + } } impl CompilerKind { @@ -284,12 +303,16 @@ impl CompilerKind { match lang { Language::C | Language::CHeader + | Language::CPreprocessed | Language::Cxx | Language::CxxHeader + | Language::CxxPreprocessed | Language::GenericHeader | Language::ObjectiveC + | Language::ObjectiveCPreprocessed | Language::ObjectiveCxx - | Language::ObjectiveCxxHeader => "C/C++", + | Language::ObjectiveCxxHeader + | Language::ObjectiveCxxPreprocessed => "C/C++", Language::Cuda => "CUDA", Language::CudaFE => "CUDA (Device code)", Language::Ptx => "PTX", @@ -2276,6 +2299,69 @@ LLVM version: 6.0", assert_ne!(results[0].key, results[2].key); } + #[test_case(true ; "with preprocessor cache")] + #[test_case(false ; "without preprocessor cache")] + fn test_preprocessed_file_works_without_preprocessor_call(preprocessor_cache_mode: bool) { + let f = TestFixture::new(); + let clang = f.mk_bin("clang").unwrap(); + let creator = new_creator(); + let runtime = single_threaded_runtime(); + let pool = runtime.handle(); + let output = "compiler_id=clang\ncompiler_version=\"16.0.0\""; + let cwd = f.tempdir.path(); + + let results: Vec<_> = ["foo.c", "foo.i"] + .iter() + .map(|file| { + let arguments = ovec!["-c", file, "-o", "foo.o"]; + + // Write a dummy input file so the preprocessor cache mode can work + std::fs::write(f.tempdir.path().join(file), "int foo(void) { return 0; }").unwrap(); + + next_command(&creator, Ok(MockChild::new(exit_status(0), output, ""))); + let c = detect_compiler( + creator.clone(), + &clang, + f.tempdir.path(), + &[], + &[], + pool, + None, + ) + .wait() + .unwrap() + .0; + + // Only run the preprocessor on the non-preprocessed file + if !file.ends_with("i") { + next_command( + &creator, + Ok(MockChild::new(exit_status(0), "preprocessor output", "")), + ); + } + let mut hasher = match c.parse_arguments(&arguments, ".".as_ref(), &[]) { + CompilerArguments::Ok(h) => h, + o => panic!("Bad result from parse_arguments: {:?}", o), + }; + hasher + .generate_hash_key( + &creator, + cwd.to_path_buf(), + vec![], + false, + pool, + false, + Arc::new(MockStorage::new(None, preprocessor_cache_mode)), + CacheControl::Default, + ) + .wait() + .unwrap() + }) + .collect(); + assert_eq!(results.len(), 2); + assert_ne!(results[0].key, results[1].key); + } + #[test] fn test_get_compiler_info() { let creator = new_creator(); diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs index e4c17efe5..1a66a94d3 100644 --- a/src/compiler/gcc.rs +++ b/src/compiler/gcc.rs @@ -701,13 +701,18 @@ where pub fn language_to_gcc_arg(lang: Language) -> Option<&'static str> { match lang { + // https://gcc.gnu.org/onlinedocs/gcc/Overall-Options.html Language::C => Some("c"), Language::CHeader => Some("c-header"), + Language::CPreprocessed => Some("cpp-output"), Language::Cxx => Some("c++"), Language::CxxHeader => Some("c++-header"), + Language::CxxPreprocessed => Some("c++-cpp-output"), Language::ObjectiveC => Some("objective-c"), + Language::ObjectiveCPreprocessed => Some("objective-c-cpp-output"), Language::ObjectiveCxx => Some("objective-c++"), Language::ObjectiveCxxHeader => Some("objective-c++-header"), + Language::ObjectiveCxxPreprocessed => Some("objective-c++-cpp-output"), Language::Cuda => Some("cu"), Language::CudaFE => None, Language::Ptx => None, From ff781bd97bffc5869cdcb454e93958101b2e59a7 Mon Sep 17 00:00:00 2001 From: AJIOB Date: Mon, 5 Jan 2026 11:30:07 +0300 Subject: [PATCH 2/2] Fix hash logging prefix --- src/compiler/c.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 1090039ae..12d9f3d0c 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -400,7 +400,7 @@ where .is_some(); if let Some(arg) = &self.parsed_args.too_hard_for_preprocessor_cache_mode { debug!( - "parse_arguments: Cannot use preprocessor cache because of {:?}", + "generate_hash_key: Cannot use preprocessor cache because of {:?}", arg ); } @@ -429,14 +429,14 @@ where if can_use_preprocessor_cache_mode && !use_preprocessor_cache_mode { debug!( - "parse_arguments: Disabling preprocessor cache because SCCACHE_DIRECT=false" + "generate_hash_key: Disabling preprocessor cache because SCCACHE_DIRECT=false" ); } use_preprocessor_cache_mode } else { debug!( - "parse_arguments: Disabling preprocessor cache because {} language doesn't need C preprocessing", + "generate_hash_key: Disabling preprocessor cache because {} language doesn't need C preprocessing", self.parsed_args.language.as_str() ); false