From a7334d3256c67763ecc84dba1804e621af178241 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 11 Jan 2026 02:26:28 +0000 Subject: [PATCH 1/2] feat: add baseline support for tracking and suppressing existing violations Baselines allow you to track existing violations in a codebase and only report new violations. This enables: - Adopting sqruff in existing projects without fixing all violations first - Preventing regressions while allowing legacy issues to be addressed over time - Enabling incremental improvement of code quality Usage: 1. Generate a baseline: sqruff baseline -o .sqruff-baseline 2. Lint using the baseline (only report new violations): sqruff lint --baseline .sqruff-baseline The baseline uses a count-based approach (similar to elm-review and ESLint), tracking how many violations of each rule type exist per file. This is more stable than line-number-based approaches because it doesn't get invalidated when unrelated code changes shift line numbers. --- Cargo.lock | 1 + crates/cli-lib/src/baseline.rs | 309 ++++++++++++++++++++++++ crates/cli-lib/src/commands.rs | 17 ++ crates/cli-lib/src/commands_baseline.rs | 106 ++++++++ crates/cli-lib/src/commands_lint.rs | 243 ++++++++++++++++++- crates/cli-lib/src/lib.rs | 19 +- crates/cli/Cargo.toml | 5 + crates/cli/tests/baseline.rs | 231 ++++++++++++++++++ 8 files changed, 924 insertions(+), 7 deletions(-) create mode 100644 crates/cli-lib/src/baseline.rs create mode 100644 crates/cli-lib/src/commands_baseline.rs create mode 100644 crates/cli/tests/baseline.rs diff --git a/Cargo.lock b/Cargo.lock index a52f625b5..e5a581b5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1478,6 +1478,7 @@ dependencies = [ "expect-test", "mimalloc", "pyo3", + "serde_json", "sqruff-cli-lib", "sqruff-lib", "tempfile", diff --git a/crates/cli-lib/src/baseline.rs b/crates/cli-lib/src/baseline.rs new file mode 100644 index 000000000..ec7a2c186 --- /dev/null +++ b/crates/cli-lib/src/baseline.rs @@ -0,0 +1,309 @@ +use serde::{Deserialize, Serialize}; +use sqruff_lib::core::linter::linted_file::LintedFile; +use std::collections::BTreeMap; +use std::io::{self, Write}; +use std::path::Path; + +/// The current baseline format version. +const BASELINE_VERSION: &str = "1"; + +/// Default baseline filename. +pub const DEFAULT_BASELINE_FILENAME: &str = ".sqruff-baseline"; + +/// Represents a baseline of known violations. +/// +/// The baseline uses a count-based approach similar to elm-review and ESLint's +/// native implementation. This is more stable than line-number-based approaches +/// because it doesn't get invalidated by unrelated code edits. +/// +/// For each file, we track the count of violations per rule code. When comparing +/// against a baseline, we allow up to that many violations of each rule type +/// per file before reporting them as new issues. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Baseline { + /// The version of the baseline format. + version: String, + /// Map of file paths to their violation counts per rule. + /// Structure: { file_path: { rule_code: count } } + files: BTreeMap>, +} + +/// Statistics about a baseline comparison. +#[derive(Debug, Default)] +pub struct BaselineStats { + /// Number of violations that were in the baseline (suppressed). + pub suppressed: usize, + /// Number of violations that are new (not in baseline). + pub new_violations: usize, + /// Number of violations that were fixed (in baseline but not in current). + pub fixed: usize, +} + +impl Baseline { + /// Creates a new empty baseline. + pub fn new() -> Self { + Self { + version: BASELINE_VERSION.to_string(), + files: BTreeMap::new(), + } + } + + /// Creates a baseline from linted files. + pub fn from_linted_files<'a>(files: impl IntoIterator) -> Self { + let mut baseline = Self::new(); + + for file in files { + let path = normalize_path(file.path()); + let violations = file.violations(); + + if violations.is_empty() { + continue; + } + + let rule_counts = baseline.files.entry(path).or_default(); + + for violation in violations { + let rule_code = violation.rule_code().to_string(); + *rule_counts.entry(rule_code).or_insert(0) += 1; + } + } + + baseline + } + + /// Loads a baseline from a file path. + pub fn load(path: &Path) -> io::Result { + let content = std::fs::read_to_string(path)?; + let baseline: Baseline = serde_json::from_str(&content).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Failed to parse baseline file: {}", e), + ) + })?; + + // Version check + if baseline.version != BASELINE_VERSION { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "Unsupported baseline version '{}'. Expected '{}'.", + baseline.version, BASELINE_VERSION + ), + )); + } + + Ok(baseline) + } + + /// Saves the baseline to a file path. + pub fn save(&self, path: &Path) -> io::Result<()> { + let content = serde_json::to_string_pretty(self)?; + let mut file = std::fs::File::create(path)?; + file.write_all(content.as_bytes())?; + file.write_all(b"\n")?; + Ok(()) + } + + /// Writes the baseline to stdout. + pub fn write_to_stdout(&self) -> io::Result<()> { + let content = serde_json::to_string_pretty(self)?; + println!("{}", content); + Ok(()) + } + + /// Gets the violation count for a specific file and rule. + pub fn get_count(&self, file_path: &str, rule_code: &str) -> usize { + let normalized = normalize_path(file_path); + self.files + .get(&normalized) + .and_then(|rules| rules.get(rule_code)) + .copied() + .unwrap_or(0) + } + + /// Returns the total number of violations in the baseline. + pub fn total_violations(&self) -> usize { + self.files.values().flat_map(|rules| rules.values()).sum() + } + + /// Returns the number of files in the baseline. + pub fn file_count(&self) -> usize { + self.files.len() + } + + /// Returns an iterator over all files in the baseline. + pub fn files(&self) -> impl Iterator { + self.files.keys() + } + + /// Checks if the baseline is empty. + pub fn is_empty(&self) -> bool { + self.files.is_empty() + } +} + +/// Represents counts of violations by rule for filtering. +#[derive(Debug, Default)] +struct RuleViolationCounts { + counts: BTreeMap, +} + +impl RuleViolationCounts { + /// Try to consume a violation. Returns true if the violation is within + /// the baseline allowance (should be suppressed), false if it's a new violation. + fn try_consume(&mut self, rule_code: &str) -> bool { + if let Some(count) = self.counts.get_mut(rule_code) { + if *count > 0 { + *count -= 1; + return true; + } + } + false + } +} + +/// Result of filtering violations against a baseline. +pub struct FilteredViolations { + /// Violations that are new (not in baseline). + pub new_violations: Vec, + /// Statistics about the filtering. + pub stats: BaselineStats, +} + +/// Filters violations from a linted file against a baseline. +/// +/// This function implements the count-based filtering logic: +/// - For each file/rule combination, we allow up to `baseline_count` violations +/// - Violations beyond that count are considered new +/// - Violations are processed in order (line number, then column) +pub fn filter_violations_against_baseline( + file: &LintedFile, + baseline: &Baseline, +) -> FilteredViolations { + let path = normalize_path(file.path()); + let violations = file.violations(); + + // Get the baseline counts for this file + let baseline_rules = baseline.files.get(&path); + + let mut rule_counts = RuleViolationCounts::default(); + if let Some(rules) = baseline_rules { + rule_counts.counts = rules.clone(); + } + + let mut new_violations = Vec::new(); + let mut suppressed = 0; + + // Process violations in order + for violation in violations { + let rule_code = violation.rule_code(); + if rule_counts.try_consume(rule_code) { + suppressed += 1; + } else { + new_violations.push(violation.clone()); + } + } + + // Calculate how many baseline violations were fixed + // (remaining counts in baseline that weren't consumed) + let fixed: usize = rule_counts.counts.values().sum(); + let new_violation_count = new_violations.len(); + + FilteredViolations { + new_violations, + stats: BaselineStats { + suppressed, + new_violations: new_violation_count, + fixed, + }, + } +} + +/// Normalizes a file path for consistent comparison. +/// Converts backslashes to forward slashes and removes leading "./" +fn normalize_path(path: &str) -> String { + let normalized = path.replace('\\', "/"); + normalized + .strip_prefix("./") + .unwrap_or(&normalized) + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_baseline_new() { + let baseline = Baseline::new(); + assert_eq!(baseline.version, BASELINE_VERSION); + assert!(baseline.is_empty()); + } + + #[test] + fn test_baseline_serialization() { + let mut baseline = Baseline::new(); + baseline + .files + .entry("test.sql".to_string()) + .or_default() + .insert("AL01".to_string(), 2); + baseline + .files + .entry("test.sql".to_string()) + .or_default() + .insert("CP01".to_string(), 1); + + let json = serde_json::to_string_pretty(&baseline).unwrap(); + let parsed: Baseline = serde_json::from_str(&json).unwrap(); + + assert_eq!(parsed.version, BASELINE_VERSION); + assert_eq!(parsed.get_count("test.sql", "AL01"), 2); + assert_eq!(parsed.get_count("test.sql", "CP01"), 1); + assert_eq!(parsed.get_count("test.sql", "XX99"), 0); + } + + #[test] + fn test_normalize_path() { + assert_eq!(normalize_path("./foo/bar.sql"), "foo/bar.sql"); + assert_eq!(normalize_path("foo\\bar.sql"), "foo/bar.sql"); + assert_eq!(normalize_path("foo/bar.sql"), "foo/bar.sql"); + } + + #[test] + fn test_rule_violation_counts() { + let mut counts = RuleViolationCounts::default(); + counts.counts.insert("AL01".to_string(), 2); + + // First two consumptions should succeed + assert!(counts.try_consume("AL01")); + assert!(counts.try_consume("AL01")); + // Third should fail (exceeded baseline) + assert!(!counts.try_consume("AL01")); + // Unknown rule should fail + assert!(!counts.try_consume("XX99")); + } + + #[test] + fn test_total_violations() { + let mut baseline = Baseline::new(); + baseline + .files + .entry("a.sql".to_string()) + .or_default() + .insert("AL01".to_string(), 2); + baseline + .files + .entry("a.sql".to_string()) + .or_default() + .insert("CP01".to_string(), 1); + baseline + .files + .entry("b.sql".to_string()) + .or_default() + .insert("AL01".to_string(), 3); + + assert_eq!(baseline.total_violations(), 6); + assert_eq!(baseline.file_count(), 2); + } +} diff --git a/crates/cli-lib/src/commands.rs b/crates/cli-lib/src/commands.rs index 27ed6a276..5356edaa6 100644 --- a/crates/cli-lib/src/commands.rs +++ b/crates/cli-lib/src/commands.rs @@ -48,6 +48,11 @@ pub enum Commands { Dialects, #[command(name = "templaters", about = "List available templaters")] Templaters, + #[command( + name = "baseline", + about = "Generate a baseline of existing violations" + )] + Baseline(BaselineArgs), #[cfg(feature = "parser")] #[command( name = "parse", @@ -62,6 +67,18 @@ pub struct LintArgs { pub paths: Vec, #[arg(default_value_t, short, long)] pub format: Format, + /// Path to a baseline file. Only violations not in the baseline will be reported. + #[arg(long)] + pub baseline: Option, +} + +#[derive(Debug, Parser)] +pub struct BaselineArgs { + /// Files or directories to scan for creating a baseline. Use `-` to read from stdin. + pub paths: Vec, + /// Output file for the baseline. If not specified, outputs to stdout. + #[arg(short, long)] + pub output: Option, } #[derive(Debug, Parser)] diff --git a/crates/cli-lib/src/commands_baseline.rs b/crates/cli-lib/src/commands_baseline.rs new file mode 100644 index 000000000..5dff3b1cd --- /dev/null +++ b/crates/cli-lib/src/commands_baseline.rs @@ -0,0 +1,106 @@ +use crate::baseline::Baseline; +use crate::commands::BaselineArgs; +use sqruff_lib::core::config::FluffConfig; +use sqruff_lib::core::linter::core::Linter; +use std::path::Path; + +/// Run the baseline generation command. +/// +/// This scans the specified paths for SQL files, lints them, and generates +/// a baseline file containing all current violations. The baseline can then +/// be used with `sqruff lint --baseline` to only report new violations. +pub(crate) fn run_baseline( + args: BaselineArgs, + config: FluffConfig, + ignorer: impl Fn(&Path) -> bool + Send + Sync, + collect_parse_errors: bool, +) -> i32 { + let BaselineArgs { paths, output } = args; + + // Create a linter WITHOUT a formatter (we don't want to output violations during baseline generation) + let mut linter = Linter::new(config, None, None, collect_parse_errors); + + eprintln!("Scanning files to generate baseline..."); + + // Lint the paths + let result = linter.lint_paths(paths, false, &ignorer); + + // Collect all linted files + let files: Vec<_> = result.into_iter().collect(); + + // Create baseline from violations + let baseline = Baseline::from_linted_files(files.iter()); + + // Output summary + let file_count = baseline.file_count(); + let violation_count = baseline.total_violations(); + + if baseline.is_empty() { + eprintln!("No violations found. Baseline is empty."); + } else { + eprintln!( + "Found {} violation(s) across {} file(s).", + violation_count, file_count + ); + } + + // Save or output the baseline + match output { + Some(path) => { + if let Err(e) = baseline.save(&path) { + eprintln!("Error saving baseline to {}: {}", path.display(), e); + return 1; + } + eprintln!("Baseline saved to: {}", path.display()); + } + None => { + if let Err(e) = baseline.write_to_stdout() { + eprintln!("Error writing baseline: {}", e); + return 1; + } + } + } + + 0 +} + +/// Run baseline generation from stdin. +pub(crate) fn run_baseline_stdin(config: FluffConfig, output: Option) -> i32 { + let read_in = match crate::stdin::read_std_in() { + Ok(content) => content, + Err(e) => { + eprintln!("Error reading from stdin: {}", e); + return 1; + } + }; + + // Create a linter WITHOUT a formatter (we don't want to output violations during baseline generation) + let linter = Linter::new(config, None, None, false); + let result = linter.lint_string(&read_in, None, false); + + // Create baseline from the single linted file + let baseline = Baseline::from_linted_files(std::iter::once(&result)); + + // Output summary + let violation_count = baseline.total_violations(); + eprintln!("Found {} violation(s) in stdin.", violation_count); + + // Save or output the baseline + match output { + Some(path) => { + if let Err(e) = baseline.save(&path) { + eprintln!("Error saving baseline to {}: {}", path.display(), e); + return 1; + } + eprintln!("Baseline saved to: {}", path.display()); + } + None => { + if let Err(e) = baseline.write_to_stdout() { + eprintln!("Error writing baseline: {}", e); + return 1; + } + } + } + + 0 +} diff --git a/crates/cli-lib/src/commands_lint.rs b/crates/cli-lib/src/commands_lint.rs index a1177df6d..ee4b59cf2 100644 --- a/crates/cli-lib/src/commands_lint.rs +++ b/crates/cli-lib/src/commands_lint.rs @@ -1,7 +1,14 @@ +use crate::baseline::{Baseline, BaselineStats, filter_violations_against_baseline}; use crate::commands::{Format, LintArgs}; -use crate::linter; +use crate::formatters::OutputStreamFormatter; +use crate::formatters::github_annotation_native_formatter::GithubAnnotationNativeFormatter; +use crate::formatters::json::JsonFormatter; +use sqruff_lib::Formatter; use sqruff_lib::core::config::FluffConfig; -use std::path::Path; +use sqruff_lib::core::linter::core::Linter; +use sqruff_lib::core::linter::linted_file::LintedFile; +use std::path::{Path, PathBuf}; +use std::sync::Arc; pub(crate) fn run_lint( args: LintArgs, @@ -9,8 +16,33 @@ pub(crate) fn run_lint( ignorer: impl Fn(&Path) -> bool + Send + Sync, collect_parse_errors: bool, ) -> i32 { - let LintArgs { paths, format } = args; - let mut linter = linter(config, format, collect_parse_errors); + let LintArgs { + paths, + format, + baseline, + } = args; + + match baseline { + Some(baseline_path) => run_lint_with_baseline( + paths, + format, + baseline_path, + config, + ignorer, + collect_parse_errors, + ), + None => run_lint_without_baseline(paths, format, config, ignorer, collect_parse_errors), + } +} + +fn run_lint_without_baseline( + paths: Vec, + format: Format, + config: FluffConfig, + ignorer: impl Fn(&Path) -> bool + Send + Sync, + collect_parse_errors: bool, +) -> i32 { + let mut linter = crate::linter(config, format, collect_parse_errors); let result = linter.lint_paths(paths, false, &ignorer); linter.formatter().unwrap().completion_message(result.len()); @@ -18,17 +50,216 @@ pub(crate) fn run_lint( result.has_violations() as i32 } +fn run_lint_with_baseline( + paths: Vec, + format: Format, + baseline_path: PathBuf, + config: FluffConfig, + ignorer: impl Fn(&Path) -> bool + Send + Sync, + collect_parse_errors: bool, +) -> i32 { + // Load the baseline + let baseline = match Baseline::load(&baseline_path) { + Ok(b) => b, + Err(e) => { + eprintln!( + "Error loading baseline from {}: {}", + baseline_path.display(), + e + ); + return 1; + } + }; + + eprintln!( + "Using baseline: {} ({} violations in {} files)", + baseline_path.display(), + baseline.total_violations(), + baseline.file_count() + ); + + // Create a linter WITHOUT a formatter (we'll dispatch manually after filtering) + let mut linter = Linter::new(config.clone(), None, None, collect_parse_errors); + let result = linter.lint_paths(paths, false, &ignorer); + + // Create the formatter + let formatter: Arc = create_formatter(format, &config); + + // Track aggregate statistics + let mut total_stats = BaselineStats::default(); + let mut files_with_new_violations = 0; + let file_count = result.len(); + + // Process each file, filter violations against baseline, and dispatch + for file in result { + let filtered = filter_violations_against_baseline(&file, &baseline); + + total_stats.suppressed += filtered.stats.suppressed; + total_stats.new_violations += filtered.stats.new_violations; + total_stats.fixed += filtered.stats.fixed; + + if !filtered.new_violations.is_empty() { + files_with_new_violations += 1; + + // Create a new LintedFile with only the new violations + let filtered_file = create_filtered_linted_file(&file, filtered.new_violations); + formatter.dispatch_file_violations(&filtered_file); + } + } + + // Output completion message + formatter.completion_message(file_count); + + // Output baseline summary + print_baseline_summary(&total_stats, files_with_new_violations); + + // Return non-zero if there are new violations + (total_stats.new_violations > 0) as i32 +} + pub(crate) fn run_lint_stdin( config: FluffConfig, format: Format, + baseline: Option, collect_parse_errors: bool, ) -> i32 { let read_in = crate::stdin::read_std_in().unwrap(); - let linter = linter(config, format, collect_parse_errors); - let result = linter.lint_string(&read_in, None, false); + match baseline { + Some(baseline_path) => run_lint_stdin_with_baseline( + &read_in, + format, + baseline_path, + config, + collect_parse_errors, + ), + None => run_lint_stdin_without_baseline(&read_in, format, config, collect_parse_errors), + } +} + +fn run_lint_stdin_without_baseline( + sql: &str, + format: Format, + config: FluffConfig, + collect_parse_errors: bool, +) -> i32 { + let linter = crate::linter(config, format, collect_parse_errors); + let result = linter.lint_string(sql, None, false); linter.formatter().unwrap().completion_message(1); result.has_violations() as i32 } + +fn run_lint_stdin_with_baseline( + sql: &str, + format: Format, + baseline_path: PathBuf, + config: FluffConfig, + collect_parse_errors: bool, +) -> i32 { + // Load the baseline + let baseline = match Baseline::load(&baseline_path) { + Ok(b) => b, + Err(e) => { + eprintln!( + "Error loading baseline from {}: {}", + baseline_path.display(), + e + ); + return 1; + } + }; + + // Create a linter WITHOUT a formatter + let linter = Linter::new(config.clone(), None, None, collect_parse_errors); + let file = linter.lint_string(sql, None, false); + + // Create the formatter + let formatter: Arc = create_formatter(format, &config); + + // Filter violations + let filtered = filter_violations_against_baseline(&file, &baseline); + + if !filtered.new_violations.is_empty() { + let filtered_file = create_filtered_linted_file(&file, filtered.new_violations); + formatter.dispatch_file_violations(&filtered_file); + } + + formatter.completion_message(1); + + // Output baseline summary + print_baseline_summary( + &filtered.stats, + (filtered.stats.new_violations > 0) as usize, + ); + + (filtered.stats.new_violations > 0) as i32 +} + +fn create_formatter(format: Format, config: &FluffConfig) -> Arc { + match format { + Format::Human => { + let output_stream = std::io::stderr().into(); + let formatter = OutputStreamFormatter::new( + output_stream, + config.get("nocolor", "core").as_bool().unwrap_or_default(), + config.get("verbose", "core").as_int().unwrap_or_default(), + ); + Arc::new(formatter) + } + Format::GithubAnnotationNative => { + let output_stream = std::io::stderr(); + let formatter = GithubAnnotationNativeFormatter::new(output_stream); + Arc::new(formatter) + } + Format::Json => { + let formatter = JsonFormatter::default(); + Arc::new(formatter) + } + } +} + +fn create_filtered_linted_file( + original: &LintedFile, + new_violations: Vec, +) -> LintedFile { + // We need to create a new LintedFile with only the filtered violations. + // Since LintedFile::new requires a TemplatedFile which we can't easily clone, + // we'll use a workaround by creating a simple mock. + // + // For display purposes, we only need path and violations, so we create + // a minimal LintedFile. + LintedFile::new( + original.path().to_string(), + Vec::new(), // No patches for linting (not fixing) + sqruff_lib_core::templaters::TemplatedFile::from(original.path()), + new_violations, + None, + ) +} + +fn print_baseline_summary(stats: &BaselineStats, files_with_new_violations: usize) { + // Only print if we actually used the baseline + if stats.suppressed > 0 || stats.fixed > 0 || stats.new_violations > 0 { + eprintln!(); + eprintln!("Baseline summary:"); + + if stats.suppressed > 0 { + eprintln!(" {} violation(s) suppressed by baseline", stats.suppressed); + } + + if stats.new_violations > 0 { + eprintln!( + " {} new violation(s) in {} file(s)", + stats.new_violations, files_with_new_violations + ); + } else { + eprintln!(" No new violations introduced"); + } + + if stats.fixed > 0 { + eprintln!(" {} baseline violation(s) have been fixed", stats.fixed); + } + } +} diff --git a/crates/cli-lib/src/lib.rs b/crates/cli-lib/src/lib.rs index 27783812b..4d2ce4a40 100644 --- a/crates/cli-lib/src/lib.rs +++ b/crates/cli-lib/src/lib.rs @@ -14,7 +14,9 @@ use crate::formatters::OutputStreamFormatter; use crate::formatters::github_annotation_native_formatter::GithubAnnotationNativeFormatter; use crate::formatters::json::JsonFormatter; +pub mod baseline; pub mod commands; +mod commands_baseline; mod commands_dialects; mod commands_fix; mod commands_info; @@ -92,7 +94,12 @@ where 1 } Ok(false) => commands_lint::run_lint(args, config, ignorer, collect_parse_errors), - Ok(true) => commands_lint::run_lint_stdin(config, args.format, collect_parse_errors), + Ok(true) => commands_lint::run_lint_stdin( + config, + args.format, + args.baseline, + collect_parse_errors, + ), }, Commands::Fix(args) => match is_std_in_flag_input(&args.paths) { Err(e) => { @@ -122,6 +129,16 @@ where commands_templaters::templaters(); 0 } + Commands::Baseline(args) => match is_std_in_flag_input(&args.paths) { + Err(e) => { + eprintln!("{e}"); + 1 + } + Ok(false) => { + commands_baseline::run_baseline(args, config, ignorer, collect_parse_errors) + } + Ok(true) => commands_baseline::run_baseline_stdin(config, args.output), + }, #[cfg(feature = "parser")] Commands::Parse(args) => commands_parse::run_parse(args, config), } diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 2b8830dc5..58d1193a9 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -67,6 +67,10 @@ harness = false name = "dialect_override" harness = false +[[test]] +name = "baseline" +harness = false + [package.metadata.cargo-machete] ignored = ["pyo3"] @@ -82,4 +86,5 @@ mimalloc = { version = "0.1", optional = true } assert_cmd = "2.0.16" expect-test = "1.5.0" tempfile = "3.23.0" +serde_json = "1.0" sqruff-lib.workspace = true diff --git a/crates/cli/tests/baseline.rs b/crates/cli/tests/baseline.rs new file mode 100644 index 000000000..4bfaf0e48 --- /dev/null +++ b/crates/cli/tests/baseline.rs @@ -0,0 +1,231 @@ +use core::str; +use std::path::{Path, PathBuf}; + +use assert_cmd::Command; +use tempfile::TempDir; + +fn main() { + test_baseline_generation(); + test_baseline_lint_suppresses_existing(); + test_baseline_lint_detects_new_violations(); + test_baseline_lint_reports_fixed_violations(); + test_baseline_file_not_found(); +} + +fn get_sqruff_path() -> PathBuf { + let profile = if cfg!(debug_assertions) { + "debug" + } else { + "release" + }; + + let cargo_folder = Path::new(env!("CARGO_MANIFEST_DIR")); + let mut sqruff_path = PathBuf::from(cargo_folder); + sqruff_path.push(format!("../../target/{}/sqruff", profile)); + sqruff_path +} + +/// Test that baseline generation works and creates valid JSON. +fn test_baseline_generation() { + let sqruff_path = get_sqruff_path(); + let cargo_folder = Path::new(env!("CARGO_MANIFEST_DIR")); + + // Create a temporary directory with a SQL file that has violations + let temp_dir = TempDir::new().unwrap(); + let sql_file = temp_dir.path().join("test.sql"); + std::fs::write(&sql_file, "select a,b from foo").unwrap(); + + let mut cmd = Command::new(sqruff_path); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("baseline").arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + + let assert = cmd.assert(); + let output = assert.get_output(); + + // Should exit with 0 + assert_eq!(output.status.code().unwrap(), 0); + + // stdout should contain valid JSON + let stdout_str = str::from_utf8(&output.stdout).unwrap(); + let baseline: serde_json::Value = serde_json::from_str(stdout_str).expect("Should be valid JSON"); + + // Check baseline structure + assert_eq!(baseline["version"], "1"); + assert!(baseline["files"].is_object()); + + // Should have exactly one file with LT01 violation + let files = baseline["files"].as_object().unwrap(); + assert_eq!(files.len(), 1); + + // Check violation count + for (_path, violations) in files { + assert!(violations["LT01"].as_u64().unwrap() >= 1); + } +} + +/// Test that linting with a baseline suppresses existing violations. +fn test_baseline_lint_suppresses_existing() { + let sqruff_path = get_sqruff_path(); + let cargo_folder = Path::new(env!("CARGO_MANIFEST_DIR")); + + // Create a temporary directory + let temp_dir = TempDir::new().unwrap(); + + // Create a SQL file with violations + let sql_file = temp_dir.path().join("test.sql"); + std::fs::write(&sql_file, "select a,b from foo").unwrap(); + + // Generate baseline + let baseline_file = temp_dir.path().join(".sqruff-baseline"); + let mut cmd = Command::new(sqruff_path.clone()); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("baseline") + .arg("-o") + .arg(&baseline_file) + .arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + cmd.assert().success(); + + // Lint with baseline - should pass (exit 0) since all violations are baselined + let mut cmd = Command::new(sqruff_path); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("lint") + .arg("--baseline") + .arg(&baseline_file) + .arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + + let assert = cmd.assert(); + let output = assert.get_output(); + + // Should exit with 0 (no new violations) + assert_eq!(output.status.code().unwrap(), 0); + + // stderr should mention suppressed violations + let stderr_str = str::from_utf8(&output.stderr).unwrap(); + assert!(stderr_str.contains("suppressed by baseline")); + assert!(stderr_str.contains("No new violations")); +} + +/// Test that linting with a baseline detects new violations. +fn test_baseline_lint_detects_new_violations() { + let sqruff_path = get_sqruff_path(); + let cargo_folder = Path::new(env!("CARGO_MANIFEST_DIR")); + + // Create a temporary directory + let temp_dir = TempDir::new().unwrap(); + + // Create a SQL file with one violation + let sql_file = temp_dir.path().join("test.sql"); + std::fs::write(&sql_file, "select a,b from foo").unwrap(); + + // Generate baseline + let baseline_file = temp_dir.path().join(".sqruff-baseline"); + let mut cmd = Command::new(sqruff_path.clone()); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("baseline") + .arg("-o") + .arg(&baseline_file) + .arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + cmd.assert().success(); + + // Add a new violation to the file + std::fs::write(&sql_file, "select a,b,c from foo").unwrap(); + + // Lint with baseline - should fail (exit 1) due to new violation + let mut cmd = Command::new(sqruff_path); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("lint") + .arg("--baseline") + .arg(&baseline_file) + .arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + + let assert = cmd.assert(); + let output = assert.get_output(); + + // Should exit with 1 (new violations) + assert_eq!(output.status.code().unwrap(), 1); + + // stderr should mention new violations + let stderr_str = str::from_utf8(&output.stderr).unwrap(); + assert!(stderr_str.contains("new violation")); +} + +/// Test that linting with a baseline reports fixed violations. +fn test_baseline_lint_reports_fixed_violations() { + let sqruff_path = get_sqruff_path(); + let cargo_folder = Path::new(env!("CARGO_MANIFEST_DIR")); + + // Create a temporary directory + let temp_dir = TempDir::new().unwrap(); + + // Create a SQL file with violations + let sql_file = temp_dir.path().join("test.sql"); + std::fs::write(&sql_file, "select a,b from foo").unwrap(); + + // Generate baseline + let baseline_file = temp_dir.path().join(".sqruff-baseline"); + let mut cmd = Command::new(sqruff_path.clone()); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("baseline") + .arg("-o") + .arg(&baseline_file) + .arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + cmd.assert().success(); + + // Fix the violation in the file + std::fs::write(&sql_file, "SELECT a, b FROM foo").unwrap(); + + // Lint with baseline - should pass and report fixed violations + let mut cmd = Command::new(sqruff_path); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("lint") + .arg("--baseline") + .arg(&baseline_file) + .arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + + let assert = cmd.assert(); + let output = assert.get_output(); + + // Should exit with 0 + assert_eq!(output.status.code().unwrap(), 0); + + // stderr should mention fixed violations + let stderr_str = str::from_utf8(&output.stderr).unwrap(); + assert!(stderr_str.contains("have been fixed")); +} + +/// Test that linting with a non-existent baseline file errors appropriately. +fn test_baseline_file_not_found() { + let sqruff_path = get_sqruff_path(); + let cargo_folder = Path::new(env!("CARGO_MANIFEST_DIR")); + + // Create a temporary directory with a SQL file + let temp_dir = TempDir::new().unwrap(); + let sql_file = temp_dir.path().join("test.sql"); + std::fs::write(&sql_file, "select a,b from foo").unwrap(); + + // Try to lint with non-existent baseline + let mut cmd = Command::new(sqruff_path); + cmd.env("HOME", PathBuf::from(cargo_folder)); + cmd.arg("lint") + .arg("--baseline") + .arg(temp_dir.path().join("nonexistent.baseline")) + .arg(temp_dir.path()); + cmd.current_dir(cargo_folder); + + let assert = cmd.assert(); + let output = assert.get_output(); + + // Should exit with 1 (error) + assert_eq!(output.status.code().unwrap(), 1); + + // stderr should mention error loading baseline + let stderr_str = str::from_utf8(&output.stderr).unwrap(); + assert!(stderr_str.contains("Error loading baseline")); +} From 83063e780f21f57f83cdc86f1f63d2ca84e8e228 Mon Sep 17 00:00:00 2001 From: benfdking <9087625+benfdking@users.noreply.github.com> Date: Sun, 11 Jan 2026 02:32:20 +0000 Subject: [PATCH 2/2] Apply automatic changes --- docs/cli.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/cli.md b/docs/cli.md index ea22a4982..12de46fd0 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -12,6 +12,7 @@ This document contains the help content for the `sqruff` command-line program. * [`sqruff rules`↴](#sqruff-rules) * [`sqruff dialects`↴](#sqruff-dialects) * [`sqruff templaters`↴](#sqruff-templaters) +* [`sqruff baseline`↴](#sqruff-baseline) ## `sqruff` @@ -28,6 +29,7 @@ sqruff is a sql formatter and linter * `rules` — Explain the available rules * `dialects` — List available dialects * `templaters` — List available templaters +* `baseline` — Generate a baseline of existing violations ###### **Options:** @@ -57,6 +59,7 @@ Lint SQL files via passing a list of files or using stdin Possible values: `human`, `github-annotation-native`, `json` +* `--baseline ` — Path to a baseline file. Only violations not in the baseline will be reported @@ -121,6 +124,22 @@ List available templaters +## `sqruff baseline` + +Generate a baseline of existing violations + +**Usage:** `sqruff baseline [OPTIONS] [PATHS]...` + +###### **Arguments:** + +* `` — Files or directories to scan for creating a baseline. Use `-` to read from stdin + +###### **Options:** + +* `-o`, `--output ` — Output file for the baseline. If not specified, outputs to stdout + + +