From 77c63e64985305dd37a8d086f6fd276f38fe8de0 Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 28 Jun 2026 13:57:24 +0200 Subject: [PATCH 1/8] [chore] bump CodeM8 version to 0.7.4 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8771335..7fa0d9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,7 +150,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "codem8" -version = "0.7.3" +version = "0.7.4" dependencies = [ "clap", "ignore", diff --git a/Cargo.toml b/Cargo.toml index 26f2b81..01b8d32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codem8" -version = "0.7.3" +version = "0.7.4" edition = "2021" rust-version = "1.85" license = "MIT" From b65780370c68b49cc63931515f7a3567049c5d5c Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 28 Jun 2026 14:38:18 +0200 Subject: [PATCH 2/8] [feat] add strict git branch report scoping --- README.md | 2 +- src/cli/args.rs | 64 +++++- src/cli/help.rs | 9 + src/cli/mod.rs | 1 + src/discovery/git.rs | 185 +++++++++++++++ src/discovery/mod.rs | 2 +- src/lib.rs | 358 +++++++++++++++++++++++++++--- src/model.rs | 18 ++ src/report/complexity_renderer.rs | 64 +++++- src/report/duplicate_renderer.rs | 50 ++++- 10 files changed, 703 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 0b1e388..b1b3301 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ codem8 --report-complexity Install `codem8` from the GitHub source with Cargo: ```bash -cargo install --git https://github.com/b4prog/CodeM8 codem8 +cargo install --locked --git https://github.com/b4prog/CodeM8 codem8 ``` Build from a local checkout with Cargo: diff --git a/src/cli/args.rs b/src/cli/args.rs index c8ee3f4..92e0159 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -20,6 +20,8 @@ struct ClapCli { verbose: u8, #[arg(long = "codem8-git-branch", action = ArgAction::Count)] git_branch: u8, + #[arg(long = "codem8-git-branch-strict", action = ArgAction::Count)] + git_branch_strict: u8, #[arg( long = "codem8-file-extension", value_name = "extensions", @@ -62,7 +64,9 @@ where let report = selected_report(&parsed)?; validate_repeated_options(&parsed)?; let git_branch = parsed.git_branch != 0; - let files = selected_files(&parsed, git_branch)?; + let git_branch_strict = parsed.git_branch_strict != 0; + let files = selected_files(&parsed, git_branch || git_branch_strict)?; + validate_git_branch_modes(git_branch, git_branch_strict)?; validate_complexity_limits(report, &parsed)?; Ok(CliConfig { report, @@ -70,6 +74,7 @@ where file_extensions: selected_file_extensions(&parsed), files, git_branch, + git_branch_strict, max_cognitive_complexity: parsed .max_cognitive_complexity .unwrap_or(DEFAULT_MAX_COGNITIVE_COMPLEXITY), @@ -109,6 +114,11 @@ fn validate_repeated_options(parsed: &ClapCli) -> Result<()> { "git branch mode was provided more than once", )); } + if parsed.git_branch_strict > 1 { + return Err(CodeM8Error::new( + "strict git branch mode was provided more than once", + )); + } if parsed.file_extensions.len() > 1 { return Err(CodeM8Error::new( "file extensions were provided more than once", @@ -122,6 +132,15 @@ fn validate_repeated_options(parsed: &ClapCli) -> Result<()> { Ok(()) } +fn validate_git_branch_modes(git_branch: bool, git_branch_strict: bool) -> Result<()> { + if git_branch && git_branch_strict { + return Err(CodeM8Error::new( + "git branch mode and strict git branch mode are mutually exclusive", + )); + } + Ok(()) +} + fn selected_files(parsed: &ClapCli, git_branch: bool) -> Result>> { let files = parsed.files.first().cloned(); if git_branch && files.is_some() { @@ -260,6 +279,8 @@ fn normalized_clap_arg(arg: String) -> Result { Ok("--codem8-verbose".to_owned()) } else if arg == "-git-branch" { Ok("--codem8-git-branch".to_owned()) + } else if arg == "-git-branch-strict" { + Ok("--codem8-git-branch-strict".to_owned()) } else if let Some(value) = arg.strip_prefix("-file-extension=") { Ok(format!("--codem8-file-extension={value}")) } else if let Some(value) = arg.strip_prefix("-files=") { @@ -289,6 +310,7 @@ mod tests { assert_eq!(config.file_extensions, supported_file_extensions()); assert_eq!(config.files, None); assert!(!config.git_branch); + assert!(!config.git_branch_strict); assert_eq!( config.max_cognitive_complexity, DEFAULT_MAX_COGNITIVE_COMPLEXITY @@ -336,6 +358,16 @@ mod tests { fn parses_git_branch_duplicate_report_config() { let config = parse_args(["--report-duplicate", "-git-branch"]).expect("config parses"); assert!(config.git_branch); + assert!(!config.git_branch_strict); + assert_eq!(config.files, None); + } + + #[test] + fn parses_strict_git_branch_duplicate_report_config() { + let config = + parse_args(["--report-duplicate", "-git-branch-strict"]).expect("config parses"); + assert!(!config.git_branch); + assert!(config.git_branch_strict); assert_eq!(config.files, None); } @@ -387,6 +419,7 @@ mod tests { "--file-extension=js", "--files=src/a.ts", "--git-branch", + "--git-branch-strict", "--max-cognitive-complexity=20", "--max-cyclomatic-complexity=12", ] { @@ -461,6 +494,26 @@ mod tests { .contains("git branch mode was provided more than once")); } + #[test] + fn rejects_repeated_strict_git_branch_arguments() { + let error = parse_args([ + "--report-duplicate", + "-git-branch-strict", + "-git-branch-strict", + ]) + .expect_err("repeated strict git branch mode fails"); + assert!(error + .to_string() + .contains("strict git branch mode was provided more than once")); + } + + #[test] + fn rejects_git_branch_with_strict_git_branch() { + let error = parse_args(["--report-duplicate", "-git-branch", "-git-branch-strict"]) + .expect_err("exclusive git branch modes fail"); + assert!(error.to_string().contains("mutually exclusive")); + } + #[test] fn rejects_git_branch_with_explicit_files() { let error = parse_args(["--report-duplicate", "-git-branch", "-files=a.ts"]) @@ -470,6 +523,15 @@ mod tests { .contains("git branch mode cannot be combined with explicit files")); } + #[test] + fn rejects_strict_git_branch_with_explicit_files() { + let error = parse_args(["--report-duplicate", "-git-branch-strict", "-files=a.ts"]) + .expect_err("exclusive strict file modes fail"); + assert!(error + .to_string() + .contains("git branch mode cannot be combined with explicit files")); + } + #[test] fn parses_explicit_file_list() { let files = parse_file_list("src/a.ts, ./src/b.ts").expect("files parse"); diff --git a/src/cli/help.rs b/src/cli/help.rs index f5fd547..c375783 100644 --- a/src/cli/help.rs +++ b/src/cli/help.rs @@ -37,6 +37,11 @@ OPTIONS: Search only in files changed on the current local Git branch. Cannot be combined with -files. + -git-branch-strict + Search only in files changed on the current local Git branch, and report + only duplicate blocks or complex functions that overlap changed lines. + Cannot be combined with -files or -git-branch. + -max-cognitive-complexity= Maximum allowed cognitive complexity for --report-complexity. Defaults to 15. @@ -47,6 +52,7 @@ OPTIONS: -verbose Include analyzed files and timings in report output, plus duplicate block details. + In -git-branch-strict mode, analyzed files include changed line ranges. DUPLICATE REPORT PURPOSE: The duplicate report helps you find repeated code that may be worth @@ -64,6 +70,7 @@ EXAMPLES: codem8 --report-duplicate -file-extension=ts,tsx,js,jsx codem8 --report-duplicate -file-extension=ts,js -files=\"src/a.ts,src/b.js\" codem8 --report-duplicate -git-branch + codem8 --report-duplicate -git-branch-strict codem8 --report-complexity -file-extension=rs -max-cognitive-complexity=12 "; @@ -109,6 +116,7 @@ mod tests { assert!(help.contains("-file-extension=")); assert!(help.contains("-files=")); assert!(help.contains("-git-branch")); + assert!(help.contains("-git-branch-strict")); assert!(help.contains("-max-cognitive-complexity=")); assert!(help.contains("-max-cyclomatic-complexity=")); } @@ -118,6 +126,7 @@ mod tests { assert!(!help.contains("--file-extension=")); assert!(!help.contains("--files=")); assert!(!help.contains("--git-branch")); + assert!(!help.contains("--git-branch-strict")); assert!(!help.contains("--max-cognitive-complexity=")); assert!(!help.contains("--max-cyclomatic-complexity=")); } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 31455ad..beaf6e2 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -28,6 +28,7 @@ pub struct CliConfig { pub file_extensions: Vec, pub files: Option>, pub git_branch: bool, + pub git_branch_strict: bool, pub max_cognitive_complexity: u32, pub max_cyclomatic_complexity: u32, } diff --git a/src/discovery/git.rs b/src/discovery/git.rs index 965f0c9..2569ccd 100644 --- a/src/discovery/git.rs +++ b/src/discovery/git.rs @@ -4,6 +4,7 @@ use std::path::{Path, PathBuf}; use std::process::{Command, Output}; use crate::error::{CodeM8Error, Result}; +use crate::model::{ChangedFileLines, LineRange}; /// Lists files changed on the current branch compared to the origin base branch. /// @@ -60,6 +61,62 @@ pub fn changed_files_against_origin(current_dir: &Path) -> Result> .collect()) } +/// Lists changed lines on the current branch compared to the origin base branch. +/// +/// # Errors +/// +/// Returns an error when Git metadata cannot be resolved or diff output cannot +/// be parsed. +pub fn changed_lines_against_origin(current_dir: &Path) -> Result> { + let repo_root = repo_root(current_dir)?; + ensure_named_branch(&repo_root)?; + let origin_ref = origin_base_ref(&repo_root)?; + let merge_base = run_git_text( + &repo_root, + &["merge-base", &origin_ref, "HEAD"], + "find merge base with origin base branch", + )?; + let mut changed_files = Vec::new(); + extend_changed_lines( + &repo_root, + current_dir, + &[ + "diff", + "--unified=0", + "--no-color", + "--diff-filter=ACMRTUXB", + merge_base.trim(), + "HEAD", + ], + &mut changed_files, + )?; + extend_changed_lines( + &repo_root, + current_dir, + &[ + "diff", + "--unified=0", + "--no-color", + "--cached", + "--diff-filter=ACMRTUXB", + ], + &mut changed_files, + )?; + extend_changed_lines( + &repo_root, + current_dir, + &[ + "diff", + "--unified=0", + "--no-color", + "--diff-filter=ACMRTUXB", + ], + &mut changed_files, + )?; + extend_untracked_changed_lines(&repo_root, current_dir, &mut changed_files)?; + Ok(changed_files) +} + fn repo_root(current_dir: &Path) -> Result { let output = run_git_output( current_dir, @@ -120,6 +177,134 @@ fn collect_nul_paths(repo_root: &Path, args: &[&str], paths: &mut BTreeSet, +) -> Result<()> { + let output = run_git_output(repo_root, args, "list changed git lines")?; + let stdout = ensure_git_success(output, "list changed git lines")?; + let text = output_text(stdout, "parse changed git lines")?; + for changed_file in parse_changed_lines(&text)? { + if let Some(path) = existing_file_path(repo_root, current_dir, &changed_file.path) { + merge_changed_file(changed_files, path, changed_file.lines); + } + } + Ok(()) +} + +fn extend_untracked_changed_lines( + repo_root: &Path, + current_dir: &Path, + changed_files: &mut Vec, +) -> Result<()> { + let output = run_git_output( + repo_root, + &["ls-files", "--others", "--exclude-standard", "-z"], + "list untracked git files", + )?; + let stdout = ensure_git_success(output, "list untracked git files")?; + for path in nul_paths(&stdout) { + if let Some(display_path) = existing_file_path(repo_root, current_dir, &path) { + let line_count = count_lines(&repo_root.join(path), &display_path)?; + let lines = (line_count != 0) + .then_some(vec![LineRange { + start: 1, + end: line_count, + }]) + .unwrap_or_default(); + merge_changed_file(changed_files, display_path, lines); + } + } + Ok(()) +} + +fn parse_changed_lines(text: &str) -> Result> { + let mut files = Vec::new(); + let mut current_path = None::; + for line in text.lines() { + if let Some(path) = line.strip_prefix("+++ b/") { + current_path = Some(PathBuf::from(path)); + } else if line == "+++ /dev/null" { + current_path = None; + } else if line.starts_with("@@ ") { + let path = current_path.clone().ok_or_else(|| { + CodeM8Error::new("could not parse changed git lines: missing file") + })?; + let range = parse_hunk_range(line)?; + push_parsed_range(&mut files, path, range); + } + } + Ok(files) +} + +fn parse_hunk_range(line: &str) -> Result> { + let added = line + .split_whitespace() + .find(|part| part.starts_with('+')) + .ok_or_else(|| CodeM8Error::new(format!("could not parse changed git hunk: {line}")))?; + let added = added.trim_start_matches('+'); + let (start, count) = added + .split_once(',') + .map_or((added, "1"), |(start, count)| (start, count)); + let start = start + .parse::() + .map_err(|_| CodeM8Error::new(format!("could not parse changed git hunk: {line}")))?; + let count = count + .parse::() + .map_err(|_| CodeM8Error::new(format!("could not parse changed git hunk: {line}")))?; + Ok((count != 0).then_some(LineRange { + start, + end: start + count - 1, + })) +} + +fn push_parsed_range(files: &mut Vec, path: PathBuf, range: Option) { + if let Some(range) = range { + merge_changed_file(files, path, vec![range]); + } +} + +fn merge_changed_file( + changed_files: &mut Vec, + path: PathBuf, + lines: Vec, +) { + if let Some(changed_file) = changed_files.iter_mut().find(|file| file.path == path) { + changed_file.lines.extend(lines); + changed_file.lines = merged_ranges(&changed_file.lines); + } else { + changed_files.push(ChangedFileLines { + path, + lines: merged_ranges(&lines), + }); + changed_files.sort_by(|left, right| left.path.cmp(&right.path)); + } +} + +fn merged_ranges(lines: &[LineRange]) -> Vec { + let mut ranges = lines.to_vec(); + ranges.sort_by_key(|range| (range.start, range.end)); + let mut merged = Vec::::new(); + for range in ranges { + if let Some(last) = merged.last_mut() { + if range.start <= last.end + 1 { + last.end = last.end.max(range.end); + continue; + } + } + merged.push(range); + } + merged +} + +fn count_lines(path: &Path, display_path: &Path) -> Result { + let contents = fs::read_to_string(path) + .map_err(|error| CodeM8Error::io(display_path, "read file", &error))?; + Ok(contents.lines().count()) +} + fn existing_file_path(repo_root: &Path, current_dir: &Path, path: &Path) -> Option { let absolute = repo_root.join(path); let metadata = fs::symlink_metadata(&absolute).ok()?; diff --git a/src/discovery/mod.rs b/src/discovery/mod.rs index 70e2a1c..e7384e3 100644 --- a/src/discovery/mod.rs +++ b/src/discovery/mod.rs @@ -4,7 +4,7 @@ mod explicit; mod git; mod recursive; -pub(crate) use git::changed_files_against_origin; +pub(crate) use git::{changed_files_against_origin, changed_lines_against_origin}; use crate::error::Result; use crate::model::SourceFile; diff --git a/src/lib.rs b/src/lib.rs index 4803d08..1c6d9fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,14 +11,33 @@ pub mod report; use std::collections::HashSet; use std::io::Write; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::time::{Duration, Instant}; use crate::error::{CodeM8Error, Result}; use crate::model::SourceFile; -use crate::model::{DuplicateBlock, ProcessedFile}; +use crate::model::{ + AnalyzedFile, ChangedFileLines, DuplicateBlock, DuplicateOccurrence, FunctionComplexity, + LineRange, ProcessedFile, +}; use crate::paths::format_path; +struct BranchScope { + files: Option>, + lines: Option>, + strict_file_paths: Option>, +} + +impl BranchScope { + fn files(&self) -> Option<&[PathBuf]> { + self.files.as_deref().or(self.strict_file_paths.as_deref()) + } + + fn lines(&self) -> Option<&[ChangedFileLines]> { + self.lines.as_deref() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum RunStatus { Success, @@ -76,39 +95,26 @@ fn run_duplicate_report( current_dir: &Path, writer: &mut W, ) -> Result { - let git_branch_files = changed_git_branch_files(config, current_dir)?; + let branch_scope = changed_branch_scope(config, current_dir)?; let (source_files, discovery_duration) = discover_report_files( config.verbose, current_dir, &config.file_extensions, - if config.git_branch { - None - } else { - config.files.as_deref() - }, + duplicate_discovery_files(config), )?; let (processed_files, file_processing_duration) = time_result(config.verbose, || line::process_source_files(&source_files))?; - let analyzed_source_files = git_branch_files.as_deref().map_or_else( - || processed_files.clone(), - |git_branch_files| filtered_processed_files(&processed_files, git_branch_files), - ); + let analyzed_source_files = filtered_processed_files_for_scope(&processed_files, &branch_scope); let (duplicate_blocks, duplicate_detection_duration) = time_value(config.verbose, || { report::detect_duplicate_blocks(&processed_files) }); - let duplicate_blocks = match git_branch_files.as_deref() { - Some(git_branch_files) => filtered_duplicate_blocks(duplicate_blocks, git_branch_files), - None => duplicate_blocks, - }; + let duplicate_blocks = filtered_duplicate_blocks_for_scope(duplicate_blocks, &branch_scope); let report = report::DuplicateReport { analyzed_files: analyzed_source_files.len(), analyzed_extensions: config.file_extensions.clone(), - analyzed_file_paths: config.verbose.then(|| { - analyzed_source_files - .iter() - .map(|processed_file| processed_file.source.display_path.clone()) - .collect() - }), + analyzed_file_paths: config + .verbose + .then(|| analyzed_processed_files(&analyzed_source_files, branch_scope.lines())), timings: duplicate_timings( discovery_duration, file_processing_duration, @@ -127,13 +133,13 @@ fn run_complexity_report( current_dir: &Path, writer: &mut W, ) -> Result { - let git_branch_files = changed_git_branch_files(config, current_dir)?; + let branch_scope = changed_branch_scope(config, current_dir)?; let analyzed_extensions = report::complexity_supported_file_extensions(&config.file_extensions); let (complexity_source_files, discovery_duration) = discover_report_files( config.verbose, current_dir, &analyzed_extensions, - git_branch_files.as_deref().or(config.files.as_deref()), + branch_scope.files().or(config.files.as_deref()), )?; let (functions, complexity_analysis_duration) = time_result(config.verbose, || { report::detect_complex_functions( @@ -142,15 +148,16 @@ fn run_complexity_report( config.max_cyclomatic_complexity, ) })?; + let functions = match branch_scope.lines() { + Some(git_branch_lines) => filtered_strict_complex_functions(functions, git_branch_lines), + None => functions, + }; let report = report::ComplexityReport { analyzed_files: complexity_source_files.len(), analyzed_extensions, - analyzed_file_paths: config.verbose.then(|| { - complexity_source_files - .iter() - .map(|source_file| source_file.display_path.clone()) - .collect() - }), + analyzed_file_paths: config + .verbose + .then(|| analyzed_source_file_paths(&complexity_source_files, branch_scope.lines())), max_cognitive_complexity: config.max_cognitive_complexity, max_cyclomatic_complexity: config.max_cyclomatic_complexity, timings: complexity_timings(discovery_duration, complexity_analysis_duration), @@ -162,10 +169,21 @@ fn run_complexity_report( Ok(status) } +fn changed_branch_scope(config: &cli::CliConfig, current_dir: &Path) -> Result { + let files = changed_git_branch_files(config, current_dir)?; + let lines = changed_git_branch_lines(config, current_dir)?; + let strict_file_paths = lines.as_ref().map(|lines| changed_line_paths(lines)); + Ok(BranchScope { + files, + lines, + strict_file_paths, + }) +} + fn changed_git_branch_files( config: &cli::CliConfig, current_dir: &Path, -) -> Result>> { +) -> Result>> { if config.git_branch { discovery::changed_files_against_origin(current_dir).map(Some) } else { @@ -173,6 +191,25 @@ fn changed_git_branch_files( } } +fn changed_git_branch_lines( + config: &cli::CliConfig, + current_dir: &Path, +) -> Result>> { + if config.git_branch_strict { + discovery::changed_lines_against_origin(current_dir).map(Some) + } else { + Ok(None) + } +} + +fn duplicate_discovery_files(config: &cli::CliConfig) -> Option<&[PathBuf]> { + if config.git_branch || config.git_branch_strict { + None + } else { + config.files.as_deref() + } +} + fn discover_report_files( verbose: bool, current_dir: &Path, @@ -237,7 +274,7 @@ fn time_value(enabled: bool, operation: impl FnOnce() -> T) -> (T, Option Vec { let selected_files = selected_files .iter() @@ -252,9 +289,19 @@ fn filtered_processed_files( .collect() } +fn filtered_processed_files_for_scope( + processed_files: &[ProcessedFile], + branch_scope: &BranchScope, +) -> Vec { + branch_scope.files().map_or_else( + || processed_files.to_vec(), + |files| filtered_processed_files(processed_files, files), + ) +} + fn filtered_duplicate_blocks( duplicate_blocks: Vec, - selected_files: &[std::path::PathBuf], + selected_files: &[PathBuf], ) -> Vec { let selected_files = selected_files .iter() @@ -271,6 +318,121 @@ fn filtered_duplicate_blocks( .collect() } +fn filtered_duplicate_blocks_for_scope( + duplicate_blocks: Vec, + branch_scope: &BranchScope, +) -> Vec { + let duplicate_blocks = match branch_scope.files() { + Some(files) => filtered_duplicate_blocks(duplicate_blocks, files), + None => duplicate_blocks, + }; + match branch_scope.lines() { + Some(lines) => filtered_strict_duplicate_blocks(duplicate_blocks, lines), + None => duplicate_blocks, + } +} + +fn changed_line_paths(changed_lines: &[ChangedFileLines]) -> Vec { + changed_lines + .iter() + .map(|changed_file| changed_file.path.clone()) + .collect() +} + +fn analyzed_processed_files( + processed_files: &[ProcessedFile], + changed_lines: Option<&[ChangedFileLines]>, +) -> Vec { + processed_files + .iter() + .map(|processed_file| analyzed_file(&processed_file.source.display_path, changed_lines)) + .collect() +} + +fn analyzed_source_file_paths( + source_files: &[SourceFile], + changed_lines: Option<&[ChangedFileLines]>, +) -> Vec { + source_files + .iter() + .map(|source_file| analyzed_file(&source_file.display_path, changed_lines)) + .collect() +} + +fn analyzed_file(path: &Path, changed_lines: Option<&[ChangedFileLines]>) -> AnalyzedFile { + AnalyzedFile { + path: path.to_path_buf(), + changed_lines: changed_lines + .and_then(|changed_lines| changed_lines_for_path(path, changed_lines)), + } +} + +fn changed_lines_for_path( + path: &Path, + changed_lines: &[ChangedFileLines], +) -> Option> { + let formatted_path = format_path(path); + changed_lines + .iter() + .find(|changed_file| format_path(&changed_file.path) == formatted_path) + .map(|changed_file| changed_file.lines.clone()) +} + +fn filtered_strict_duplicate_blocks( + duplicate_blocks: Vec, + changed_lines: &[ChangedFileLines], +) -> Vec { + duplicate_blocks + .into_iter() + .filter(|duplicate_block| { + duplicate_block_applies_to_changed_lines(duplicate_block, changed_lines) + }) + .collect() +} + +fn duplicate_block_applies_to_changed_lines( + duplicate_block: &DuplicateBlock, + changed_lines: &[ChangedFileLines], +) -> bool { + duplicate_block + .occurrences + .iter() + .any(|occurrence| occurrence_applies_to_changed_lines(occurrence, changed_lines)) +} + +fn occurrence_applies_to_changed_lines( + occurrence: &DuplicateOccurrence, + changed_lines: &[ChangedFileLines], +) -> bool { + changed_lines_for_path(&occurrence.file_path, changed_lines).is_some_and(|lines| { + ranges_overlap_lines(occurrence.start_line, occurrence.end_line, &lines) + }) +} + +fn filtered_strict_complex_functions( + functions: Vec, + changed_lines: &[ChangedFileLines], +) -> Vec { + functions + .into_iter() + .filter(|function| function_applies_to_changed_lines(function, changed_lines)) + .collect() +} + +fn function_applies_to_changed_lines( + function: &FunctionComplexity, + changed_lines: &[ChangedFileLines], +) -> bool { + changed_lines_for_path(&function.file_path, changed_lines) + .is_some_and(|lines| ranges_overlap_lines(function.start_line, function.end_line, &lines)) +} + +fn ranges_overlap_lines(start: usize, end: usize, lines: &[LineRange]) -> bool { + lines + .iter() + .any(|line_range| start <= line_range.end && end >= line_range.start) +} + #[cfg(test)] mod tests { use std::fs; @@ -587,6 +749,59 @@ mod tests { assert!(output.contains("Duplicate blocks found: 0")); } + #[test] + fn strict_git_branch_mode_reports_duplicates_only_on_changed_lines() { + if !git_is_available() { + return; + } + let project = TempGitRepo::new("strict-duplicate-lines"); + project.git(&["init"]); + project.write("src/a.ts", "const shared = 1;\nconst branch = 1;\n"); + project.write("src/b.ts", "const shared = 1;\n"); + project.commit("initial"); + project.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + project.git(&["branch", "-M", "feature"]); + project.write("src/a.ts", "const shared = 1;\nconst branch = 2;\n"); + let output = run_in(&project, &["--report-duplicate", "-git-branch-strict"]) + .expect("report succeeds"); + assert!(output.contains("Number of files analyzed: 1")); + assert!(output.contains("Duplicate blocks found: 0")); + project.write("src/a.ts", "const changed = 1;\nconst branch = 2;\n"); + project.commit("branch change"); + project.write("src/b.ts", "const changed = 1;\n"); + let output = run_in(&project, &["--report-duplicate", "-git-branch-strict"]) + .expect("report succeeds"); + assert!(output.contains("Duplicate blocks found: 1")); + assert!(output.contains("- src/a.ts:1-1")); + assert!(output.contains("- src/b.ts:1-1")); + } + + #[test] + fn verbose_strict_git_branch_report_lists_changed_line_ranges() { + if !git_is_available() { + return; + } + let project = TempGitRepo::new("strict-verbose-ranges"); + project.git(&["init"]); + project.write( + "src/a.ts", + "const one = 1;\nconst two = 2;\nconst three = 3;\n", + ); + project.commit("initial"); + project.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + project.git(&["branch", "-M", "feature"]); + project.write( + "src/a.ts", + "const one = 1;\nconst two = 20;\nconst three = 30;\n", + ); + let output = run_in( + &project, + &["--report-duplicate", "-git-branch-strict", "-verbose"], + ) + .expect("report succeeds"); + assert!(output.contains("Files analyzed:\n- src/a.ts (2-3)\n")); + } + #[test] fn complexity_report_lists_functions_over_limits() { let project = TempProject::new("complexity"); @@ -722,6 +937,83 @@ mod tests { assert!(output.contains("Functions exceeding limits: 0")); } + #[test] + fn strict_git_branch_mode_reports_complexity_only_for_changed_functions() { + if !git_is_available() { + return; + } + let project = TempGitRepo::new("strict-complexity-lines"); + project.git(&["init"]); + project.write( + "src/lib.rs", + "fn risky(value: i32) -> i32 {\n\ + if value > 10 {\n\ + return 10;\n\ + }\n\ + if value > 5 {\n\ + return 5;\n\ + }\n\ + 0\n\ + }\n\ + const VALUE: i32 = 1;\n", + ); + project.commit("initial"); + project.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + project.git(&["branch", "-M", "feature"]); + project.write( + "src/lib.rs", + "fn risky(value: i32) -> i32 {\n\ + if value > 10 {\n\ + return 10;\n\ + }\n\ + if value > 5 {\n\ + return 5;\n\ + }\n\ + 0\n\ + }\n\ + const VALUE: i32 = 2;\n", + ); + let output = run_in( + &project, + &[ + "--report-complexity", + "-git-branch-strict", + "-file-extension=rs", + "-max-cognitive-complexity=1", + "-max-cyclomatic-complexity=1", + ], + ) + .expect("report succeeds"); + assert!(output.contains("Number of files analyzed: 1")); + assert!(output.contains("Functions exceeding limits: 0")); + project.write( + "src/lib.rs", + "fn risky(value: i32) -> i32 {\n\ + if value > 10 {\n\ + return 11;\n\ + }\n\ + if value > 5 {\n\ + return 5;\n\ + }\n\ + 0\n\ + }\n\ + const VALUE: i32 = 2;\n", + ); + let output = run_in( + &project, + &[ + "--report-complexity", + "-git-branch-strict", + "-file-extension=rs", + "-max-cognitive-complexity=1", + "-max-cyclomatic-complexity=1", + ], + ) + .expect("report succeeds"); + assert!(output.contains("Functions exceeding limits: 1")); + assert!(output.contains("Function: risky")); + } + #[test] fn invalid_explicit_file_returns_a_clear_error() { let project = TempProject::new("invalid-file"); diff --git a/src/model.rs b/src/model.rs index 1fa699f..6db9825 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,5 +1,23 @@ use std::path::PathBuf; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct LineRange { + pub start: usize, + pub end: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ChangedFileLines { + pub path: PathBuf, + pub lines: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AnalyzedFile { + pub path: PathBuf, + pub changed_lines: Option>, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum LineStatus { Comparison, diff --git a/src/report/complexity_renderer.rs b/src/report/complexity_renderer.rs index f05d331..4960937 100644 --- a/src/report/complexity_renderer.rs +++ b/src/report/complexity_renderer.rs @@ -1,15 +1,14 @@ use std::fmt::Write as _; -use std::path::PathBuf; use std::time::Duration; -use crate::model::FunctionComplexity; +use crate::model::{AnalyzedFile, FunctionComplexity, LineRange}; use crate::paths::format_path; #[derive(Debug, Clone, PartialEq)] pub struct ComplexityReport { pub analyzed_files: usize, pub analyzed_extensions: Vec, - pub analyzed_file_paths: Option>, + pub analyzed_file_paths: Option>, pub max_cognitive_complexity: u32, pub max_cyclomatic_complexity: u32, pub timings: Option, @@ -83,15 +82,44 @@ pub fn render_complexity_report(report: &ComplexityReport, verbose: bool) -> Str output } -fn render_analyzed_files(output: &mut String, analyzed_file_paths: Option<&[PathBuf]>) { +fn render_analyzed_files(output: &mut String, analyzed_file_paths: Option<&[AnalyzedFile]>) { if let Some(analyzed_file_paths) = analyzed_file_paths { output.push_str("Files analyzed:\n"); for file in analyzed_file_paths { - let _ = writeln!(output, "- {}", format_path(file)); + let _ = writeln!(output, "- {}", format_analyzed_file(file)); } } } +fn format_analyzed_file(file: &AnalyzedFile) -> String { + match file.changed_lines.as_deref() { + Some(lines) if !lines.is_empty() => { + format!( + "{} ({})", + format_path(&file.path), + format_line_ranges(lines) + ) + } + Some(_) | None => format_path(&file.path), + } +} + +fn format_line_ranges(lines: &[LineRange]) -> String { + lines + .iter() + .map(format_line_range) + .collect::>() + .join(",") +} + +fn format_line_range(range: &LineRange) -> String { + if range.start == range.end { + range.start.to_string() + } else { + format!("{}-{}", range.start, range.end) + } +} + fn render_timings(output: &mut String, timings: Option) { if let Some(timings) = timings { output.push_str("Timings:\n"); @@ -191,7 +219,10 @@ mod tests { let report = ComplexityReport { analyzed_files: 1, analyzed_extensions: vec!["rs".to_string()], - analyzed_file_paths: Some(vec![PathBuf::from("src/lib.rs")]), + analyzed_file_paths: Some(vec![AnalyzedFile { + path: PathBuf::from("src/lib.rs"), + changed_lines: None, + }]), max_cognitive_complexity: 15, max_cyclomatic_complexity: 10, timings: Some(ComplexityReportTimings { @@ -205,4 +236,25 @@ mod tests { assert!(output.contains("- Discovery: 1.234 ms\n")); assert!(output.contains("- Complexity analysis: 12.345 ms\n")); } + + #[test] + fn renders_verbose_changed_line_ranges() { + let report = ComplexityReport { + analyzed_files: 1, + analyzed_extensions: vec!["rs".to_string()], + analyzed_file_paths: Some(vec![AnalyzedFile { + path: PathBuf::from("src/lib.rs"), + changed_lines: Some(vec![ + LineRange { start: 3, end: 17 }, + LineRange { start: 21, end: 21 }, + ]), + }]), + max_cognitive_complexity: 15, + max_cyclomatic_complexity: 10, + timings: None, + functions: Vec::new(), + }; + let output = render_complexity_report(&report, true); + assert!(output.contains("Files analyzed:\n- src/lib.rs (3-17,21)\n")); + } } diff --git a/src/report/duplicate_renderer.rs b/src/report/duplicate_renderer.rs index 3f3129a..9f56a9d 100644 --- a/src/report/duplicate_renderer.rs +++ b/src/report/duplicate_renderer.rs @@ -1,15 +1,14 @@ use std::fmt::Write as _; -use std::path::PathBuf; use std::time::Duration; -use crate::model::DuplicateBlock; +use crate::model::{AnalyzedFile, DuplicateBlock, LineRange}; use crate::paths::format_path; #[derive(Debug, Clone, PartialEq, Eq)] pub struct DuplicateReport { pub analyzed_files: usize, pub analyzed_extensions: Vec, - pub analyzed_file_paths: Option>, + pub analyzed_file_paths: Option>, pub timings: Option, pub duplicate_blocks: Vec, } @@ -61,9 +60,38 @@ fn render_report_summary(output: &mut String, report: &DuplicateReport, verbose: } } -fn render_analyzed_files(output: &mut String, analyzed_file_paths: &[PathBuf]) { +fn render_analyzed_files(output: &mut String, analyzed_file_paths: &[AnalyzedFile]) { for file in analyzed_file_paths { - let _ = writeln!(output, "- {}", format_path(file)); + let _ = writeln!(output, "- {}", format_analyzed_file(file)); + } +} + +fn format_analyzed_file(file: &AnalyzedFile) -> String { + match file.changed_lines.as_deref() { + Some(lines) if !lines.is_empty() => { + format!( + "{} ({})", + format_path(&file.path), + format_line_ranges(lines) + ) + } + Some(_) | None => format_path(&file.path), + } +} + +fn format_line_ranges(lines: &[LineRange]) -> String { + lines + .iter() + .map(format_line_range) + .collect::>() + .join(",") +} + +fn format_line_range(range: &LineRange) -> String { + if range.start == range.end { + range.start.to_string() + } else { + format!("{}-{}", range.start, range.end) } } @@ -143,7 +171,7 @@ mod tests { use std::path::PathBuf; use std::time::Duration; - use crate::model::{DuplicateBlock, DuplicateOccurrence}; + use crate::model::{AnalyzedFile, DuplicateBlock, DuplicateOccurrence}; use super::*; @@ -257,8 +285,14 @@ mod tests { analyzed_files: 2, analyzed_extensions: vec!["ts".to_string()], analyzed_file_paths: Some(vec![ - PathBuf::from("src/a.ts"), - PathBuf::from("src/nested/b.ts"), + AnalyzedFile { + path: PathBuf::from("src/a.ts"), + changed_lines: None, + }, + AnalyzedFile { + path: PathBuf::from("src/nested/b.ts"), + changed_lines: None, + }, ]), timings: None, duplicate_blocks: Vec::new(), From a6f5ac632f3069f7ec0805da71799f336e159f45 Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 28 Jun 2026 14:53:41 +0200 Subject: [PATCH 3/8] [docs] prioritize complexity report in documentation --- README.md | 72 ++++++++++++++++++++++++------------------------- src/cli/help.rs | 49 ++++++++++++++++++++++++++------- 2 files changed, 75 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index b1b3301..efce9e6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,14 @@ # CodeM8 CodeM8 is a Rust command-line application for deterministic source code reports. -It can detect duplicated line-based code blocks in a repository: +It can report functions whose cognitive or cyclomatic complexity exceeds +configurable limits: + +```bash +codem8 --report-complexity +``` + +CodeM8 can also detect duplicated line-based code blocks in a repository: ```bash codem8 --report-duplicate @@ -12,13 +19,6 @@ trims source lines, ignores empty lines, hashes normalized lines with XXH3 128-bit, classifies syntax-only lines as block-only, groups repeated blocks, and prints a stable plain-text report sorted by duplicate weight. -CodeM8 can also report functions whose cognitive or cyclomatic complexity -exceeds configurable limits: - -```bash -codem8 --report-complexity -``` - ## Installation Install `codem8` from the GitHub source with Cargo: @@ -42,21 +42,21 @@ cargo install --locked --path . Run from the local checkout without installing: ```bash -cargo run -- --report-duplicate +cargo run -- --report-complexity ``` ## Usage -Analyze supported source files from the current directory: +Analyze function complexity for languages supported by `rust-code-analysis`: ```bash -codem8 --report-duplicate +codem8 --report-complexity ``` -Analyze function complexity for languages supported by `rust-code-analysis`: +Analyze supported source files from the current directory for duplicate code: ```bash -codem8 --report-complexity +codem8 --report-duplicate ``` Restrict analysis to specific extensions: @@ -78,20 +78,20 @@ Analyze files changed on the current local Git branch compared to the origin base branch: ```bash -codem8 --report-duplicate -git-branch +codem8 --report-complexity -git-branch ``` -The duplicate and complexity reports are mutually exclusive; run one report per +The complexity and duplicate reports are mutually exclusive; run one report per command. -Reports exit with a non-zero status when they detect issues: duplicate blocks -for `--report-duplicate`, or functions above the configured limits for -`--report-complexity`. +Reports exit with a non-zero status when they detect issues: functions above the +configured limits for `--report-complexity`, or duplicate blocks for +`--report-duplicate`. Include analyzed files, report metrics, and timing information: ```bash -codem8 --report-duplicate -verbose +codem8 --report-complexity -verbose ``` Set complexity thresholds: @@ -100,6 +100,23 @@ Set complexity thresholds: codem8 --report-complexity -max-cognitive-complexity=15 -max-cyclomatic-complexity=10 ``` +## Complexity Report + +The complexity report uses `rust-code-analysis` and only applies to file +extensions supported by that crate. It reports `SpaceKind::Function` entries +whose cognitive complexity exceeds the configured cognitive limit or whose +cyclomatic complexity exceeds the configured cyclomatic limit. + +The default maximum cognitive complexity is 15, and the default maximum +cyclomatic complexity is 10. Use `-max-cognitive-complexity=` and +`-max-cyclomatic-complexity=` to adjust them. + +Use `-git-branch` to analyze complexity only in supported files changed on the +current local branch. The same origin branch resolution and `-files` exclusion +rules used by the duplicate report apply. + +Use `-verbose` to list analyzed files and timing information. + ## Duplicate Report By default, CodeM8 analyzes all registered source file extensions. Recursive @@ -133,23 +150,6 @@ occurrence count, and timings for discovery, file processing, and duplicate detection. Character counts are used internally for scoring and sorting, but are not printed. -## Complexity Report - -The complexity report uses `rust-code-analysis` and only applies to file -extensions supported by that crate. It reports `SpaceKind::Function` entries -whose cognitive complexity exceeds the configured cognitive limit or whose -cyclomatic complexity exceeds the configured cyclomatic limit. - -The default maximum cognitive complexity is 15, and the default maximum -cyclomatic complexity is 10. Use `-max-cognitive-complexity=` and -`-max-cyclomatic-complexity=` to adjust them. - -Use `-git-branch` to analyze complexity only in supported files changed on the -current local branch. The same origin branch resolution and `-files` exclusion -rules used by the duplicate report apply. - -Use `-verbose` to list analyzed files and timing information. - ## Development Run the full local verification set: diff --git a/src/cli/help.rs b/src/cli/help.rs index c375783..cace464 100644 --- a/src/cli/help.rs +++ b/src/cli/help.rs @@ -6,8 +6,8 @@ const HELP_TEXT_BODY: &str = "\ USAGE: codem8 help codem8 -h - codem8 --report-duplicate [OPTIONS] codem8 --report-complexity [OPTIONS] + codem8 --report-duplicate [OPTIONS] COMMANDS: help @@ -15,13 +15,14 @@ COMMANDS: Display this detailed documentation. REQUIRED REPORT SWITCHES: - --report-duplicate - Analyze source files and print a duplicate code report. - --report-complexity Analyze supported source files and print a function complexity report. Cannot be combined with --report-duplicate. + --report-duplicate + Analyze source files and print a duplicate code report. + Cannot be combined with --report-complexity. + OPTIONS: -file-extension= Comma-separated source file extensions to analyze. @@ -54,24 +55,27 @@ OPTIONS: Include analyzed files and timings in report output, plus duplicate block details. In -git-branch-strict mode, analyzed files include changed line ranges. +COMPLEXITY REPORT PURPOSE: + The complexity report helps you find functions whose cognitive or cyclomatic + complexity exceeds the configured limits. It lists each function with its + location and both computed complexity values. + DUPLICATE REPORT PURPOSE: The duplicate report helps you find repeated code that may be worth refactoring, reviewing, or consolidating. It lists each duplicated block with the files and line ranges where it appears, making it easier to compare the repeated code and decide whether it should stay duplicated. -COMPLEXITY REPORT PURPOSE: - The complexity report helps you find functions whose cognitive or cyclomatic - complexity exceeds the configured limits. It lists each function with its - location and both computed complexity values. - EXAMPLES: + codem8 --report-complexity + codem8 --report-complexity -file-extension=rs -max-cognitive-complexity=12 + codem8 --report-complexity -git-branch + codem8 --report-complexity -git-branch-strict codem8 --report-duplicate codem8 --report-duplicate -file-extension=ts,tsx,js,jsx codem8 --report-duplicate -file-extension=ts,js -files=\"src/a.ts,src/b.js\" codem8 --report-duplicate -git-branch codem8 --report-duplicate -git-branch-strict - codem8 --report-complexity -file-extension=rs -max-cognitive-complexity=12 "; #[must_use] @@ -98,6 +102,7 @@ mod tests { assert_help_includes_expected_sections(&help); assert_help_includes_single_dash_options(&help); assert_help_excludes_double_dash_options(&help); + assert_help_mentions_complexity_before_duplicate(&help); } fn assert_help_includes_expected_sections(help: &str) { @@ -131,6 +136,30 @@ mod tests { assert!(!help.contains("--max-cyclomatic-complexity=")); } + fn assert_help_mentions_complexity_before_duplicate(help: &str) { + assert!( + help.find("codem8 --report-complexity [OPTIONS]") + .expect("complexity usage exists") + < help + .find("codem8 --report-duplicate [OPTIONS]") + .expect("duplicate usage exists") + ); + assert!( + help.find("COMPLEXITY REPORT PURPOSE:") + .expect("complexity purpose exists") + < help + .find("DUPLICATE REPORT PURPOSE:") + .expect("duplicate purpose exists") + ); + assert!( + help.find("codem8 --report-complexity\n") + .expect("complexity example exists") + < help + .find("codem8 --report-duplicate\n") + .expect("duplicate example exists") + ); + } + #[test] fn help_text_includes_version_from_cargo_lock() { let version = codem8_version_from_cargo_lock().expect("codem8 version exists"); From ac5fa110ac151a8bc5fb4af20e1613fb5d180de0 Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 28 Jun 2026 15:13:08 +0200 Subject: [PATCH 4/8] [docs] clarify git branch report help text --- src/cli/help.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cli/help.rs b/src/cli/help.rs index cace464..28dcb8d 100644 --- a/src/cli/help.rs +++ b/src/cli/help.rs @@ -39,8 +39,7 @@ OPTIONS: branch. Cannot be combined with -files. -git-branch-strict - Search only in files changed on the current local Git branch, and report - only duplicate blocks or complex functions that overlap changed lines. + Limit the report to lines changed on the current git branch. Cannot be combined with -files or -git-branch. -max-cognitive-complexity= From 0967aa5405c340be6c32f96471514b08702c7c32 Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 28 Jun 2026 15:20:25 +0200 Subject: [PATCH 5/8] [fix] normalize changed line ranges to worktree coordinates --- src/discovery/git.rs | 62 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/src/discovery/git.rs b/src/discovery/git.rs index 2569ccd..1da2d6e 100644 --- a/src/discovery/git.rs +++ b/src/discovery/git.rs @@ -86,30 +86,6 @@ pub fn changed_lines_against_origin(current_dir: &Path) -> Result Date: Sun, 28 Jun 2026 15:25:59 +0200 Subject: [PATCH 6/8] [fix] handle non-UTF-8 untracked files in changed line discovery --- src/discovery/git.rs | 46 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/src/discovery/git.rs b/src/discovery/git.rs index 1da2d6e..f9c767d 100644 --- a/src/discovery/git.rs +++ b/src/discovery/git.rs @@ -276,9 +276,12 @@ fn merged_ranges(lines: &[LineRange]) -> Vec { } fn count_lines(path: &Path, display_path: &Path) -> Result { - let contents = fs::read_to_string(path) - .map_err(|error| CodeM8Error::io(display_path, "read file", &error))?; - Ok(contents.lines().count()) + let contents = + fs::read(path).map_err(|error| CodeM8Error::io(display_path, "read file", &error))?; + if contents.is_empty() { + return Ok(0); + } + Ok(contents.split_inclusive(|byte| *byte == b'\n').count()) } fn existing_file_path(repo_root: &Path, current_dir: &Path, path: &Path) -> Option { @@ -367,6 +370,14 @@ mod tests { fs::write(path, contents).expect("write test file"); } + fn write_bytes(&self, relative_path: &str, contents: &[u8]) { + let path = self.path.join(relative_path); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).expect("create parent directory"); + } + fs::write(path, contents).expect("write test file"); + } + fn git(&self, args: &[&str]) { let status = Command::new("git") .arg("-C") @@ -480,4 +491,33 @@ mod tests { }] ); } + + #[test] + fn ignores_non_utf8_untracked_files_when_collecting_changed_lines() { + if !git_is_available() { + return; + } + let repo = TempGitRepo::new("non-utf8-untracked"); + repo.git(&["init"]); + repo.write("src/base.ts", "const value = base;\n"); + repo.commit("initial"); + repo.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + repo.git(&["branch", "-M", "feature"]); + repo.write("src/untracked.ts", "first line\nsecond line\n"); + repo.write_bytes("assets/image.bin", &[0xFF, 0xFE, 0x00, b'\n', 0x80]); + let files = changed_lines_against_origin(repo.path()).expect("list changed lines"); + assert_eq!( + files, + [ + ChangedFileLines { + path: PathBuf::from("assets/image.bin"), + lines: vec![LineRange { start: 1, end: 2 }], + }, + ChangedFileLines { + path: PathBuf::from("src/untracked.ts"), + lines: vec![LineRange { start: 1, end: 2 }], + }, + ] + ); + } } From 7c69cb71ea75e322457ebe9a75f94052f7838913 Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 28 Jun 2026 15:32:28 +0200 Subject: [PATCH 7/8] [fix] handle quoted git diff headers when parsing changed lines --- src/discovery/git.rs | 133 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 5 deletions(-) diff --git a/src/discovery/git.rs b/src/discovery/git.rs index f9c767d..71360fb 100644 --- a/src/discovery/git.rs +++ b/src/discovery/git.rs @@ -200,21 +200,126 @@ fn parse_changed_lines(text: &str) -> Result> { let mut files = Vec::new(); let mut current_path = None::; for line in text.lines() { - if let Some(path) = line.strip_prefix("+++ b/") { - current_path = Some(PathBuf::from(path)); - } else if line == "+++ /dev/null" { - current_path = None; - } else if line.starts_with("@@ ") { + if line.starts_with("@@ ") { let path = current_path.clone().ok_or_else(|| { CodeM8Error::new("could not parse changed git lines: missing file") })?; let range = parse_hunk_range(line)?; push_parsed_range(&mut files, path, range); + continue; + } + match parse_changed_file_header(line)? { + ParsedChangedFileHeader::NotHeader => {} + ParsedChangedFileHeader::DevNull => current_path = None, + ParsedChangedFileHeader::Path(path) => current_path = Some(path), } } Ok(files) } +enum ParsedChangedFileHeader { + NotHeader, + DevNull, + Path(PathBuf), +} + +fn parse_changed_file_header(line: &str) -> Result { + let Some(path) = line.strip_prefix("+++ ") else { + return Ok(ParsedChangedFileHeader::NotHeader); + }; + if path == "/dev/null" { + return Ok(ParsedChangedFileHeader::DevNull); + } + let path = if let Some(path) = path.strip_prefix("b/") { + path.to_owned() + } else if path.starts_with('"') { + let path = parse_quoted_diff_path(path)?; + path.strip_prefix("b/") + .ok_or_else(|| CodeM8Error::new(format!("could not parse changed git header: {line}")))? + .to_owned() + } else { + return Err(CodeM8Error::new(format!( + "could not parse changed git header: {line}" + ))); + }; + Ok(ParsedChangedFileHeader::Path(PathBuf::from(path))) +} + +fn parse_quoted_diff_path(path: &str) -> Result { + let Some(quoted) = path + .strip_prefix('"') + .and_then(|value| value.strip_suffix('"')) + else { + return Err(CodeM8Error::new(format!( + "could not parse changed git header: +++ {path}" + ))); + }; + let mut parsed = String::new(); + let mut chars = quoted.chars(); + while let Some(ch) = chars.next() { + if ch != '\\' { + parsed.push(ch); + continue; + } + parsed.push(parse_diff_escape(&mut chars, path)?); + } + Ok(parsed) +} + +fn parse_diff_escape(chars: &mut std::str::Chars<'_>, path: &str) -> Result { + let escaped = chars.next().ok_or_else(|| { + CodeM8Error::new(format!("could not parse changed git header: +++ {path}")) + })?; + let parsed = if let Some(parsed) = simple_diff_escape(escaped) { + parsed + } else if matches!(escaped, '0'..='7') { + parse_diff_octal_escape(chars, escaped, path)? + } else { + return Err(CodeM8Error::new(format!( + "could not parse changed git header: +++ {path}" + ))); + }; + Ok(parsed) +} + +fn simple_diff_escape(escaped: char) -> Option { + [ + ('\\', '\\'), + ('"', '"'), + ('a', '\u{0007}'), + ('b', '\u{0008}'), + ('f', '\u{000C}'), + ('n', '\n'), + ('r', '\r'), + ('t', '\t'), + ('v', '\u{000B}'), + ] + .into_iter() + .find_map(|(pattern, value)| (escaped == pattern).then_some(value)) +} + +fn parse_diff_octal_escape( + chars: &mut std::str::Chars<'_>, + first: char, + path: &str, +) -> Result { + let mut octal = String::from(first); + while octal.len() < 3 { + let Some(next) = chars.clone().next() else { + break; + }; + if !matches!(next, '0'..='7') { + break; + } + if let Some(digit) = chars.next() { + octal.push(digit); + } + } + let value = u8::from_str_radix(&octal, 8) + .map_err(|_| CodeM8Error::new(format!("could not parse changed git header: +++ {path}")))?; + Ok(char::from(value)) +} + fn parse_hunk_range(line: &str) -> Result> { let added = line .split_whitespace() @@ -492,6 +597,24 @@ mod tests { ); } + #[test] + fn parses_changed_lines_for_quoted_diff_paths() { + let diff = concat!( + "diff --git \"a/src/space file.ts\" \"b/src/space file.ts\"\n", + "--- \"a/src/space file.ts\"\n", + "+++ \"b/src/space file.ts\"\n", + "@@ -0,0 +1 @@\n", + ); + let files = parse_changed_lines(diff).expect("parse quoted diff"); + assert_eq!( + files, + [ChangedFileLines { + path: PathBuf::from("src/space file.ts"), + lines: vec![LineRange { start: 1, end: 1 }], + }] + ); + } + #[test] fn ignores_non_utf8_untracked_files_when_collecting_changed_lines() { if !git_is_available() { From 999e1a7022ac47d6684a92eb851730354ed0950e Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 28 Jun 2026 16:15:18 +0200 Subject: [PATCH 8/8] [fix] decode quoted git diff paths as UTF-8 bytes --- src/discovery/git.rs | 56 ++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/discovery/git.rs b/src/discovery/git.rs index 71360fb..14a815c 100644 --- a/src/discovery/git.rs +++ b/src/discovery/git.rs @@ -254,19 +254,21 @@ fn parse_quoted_diff_path(path: &str) -> Result { "could not parse changed git header: +++ {path}" ))); }; - let mut parsed = String::new(); + let mut parsed = Vec::new(); let mut chars = quoted.chars(); while let Some(ch) = chars.next() { if ch != '\\' { - parsed.push(ch); + let mut buffer = [0_u8; 4]; + parsed.extend_from_slice(ch.encode_utf8(&mut buffer).as_bytes()); continue; } parsed.push(parse_diff_escape(&mut chars, path)?); } - Ok(parsed) + String::from_utf8(parsed) + .map_err(|_| CodeM8Error::new(format!("could not parse changed git header: +++ {path}"))) } -fn parse_diff_escape(chars: &mut std::str::Chars<'_>, path: &str) -> Result { +fn parse_diff_escape(chars: &mut std::str::Chars<'_>, path: &str) -> Result { let escaped = chars.next().ok_or_else(|| { CodeM8Error::new(format!("could not parse changed git header: +++ {path}")) })?; @@ -282,27 +284,23 @@ fn parse_diff_escape(chars: &mut std::str::Chars<'_>, path: &str) -> Result Option { +fn simple_diff_escape(escaped: char) -> Option { [ - ('\\', '\\'), - ('"', '"'), - ('a', '\u{0007}'), - ('b', '\u{0008}'), - ('f', '\u{000C}'), - ('n', '\n'), - ('r', '\r'), - ('t', '\t'), - ('v', '\u{000B}'), + ('\\', b'\\'), + ('"', b'"'), + ('a', 0x07), + ('b', 0x08), + ('f', 0x0C), + ('n', b'\n'), + ('r', b'\r'), + ('t', b'\t'), + ('v', 0x0B), ] .into_iter() .find_map(|(pattern, value)| (escaped == pattern).then_some(value)) } -fn parse_diff_octal_escape( - chars: &mut std::str::Chars<'_>, - first: char, - path: &str, -) -> Result { +fn parse_diff_octal_escape(chars: &mut std::str::Chars<'_>, first: char, path: &str) -> Result { let mut octal = String::from(first); while octal.len() < 3 { let Some(next) = chars.clone().next() else { @@ -317,7 +315,7 @@ fn parse_diff_octal_escape( } let value = u8::from_str_radix(&octal, 8) .map_err(|_| CodeM8Error::new(format!("could not parse changed git header: +++ {path}")))?; - Ok(char::from(value)) + Ok(value) } fn parse_hunk_range(line: &str) -> Result> { @@ -615,6 +613,24 @@ mod tests { ); } + #[test] + fn parses_changed_lines_for_non_ascii_quoted_diff_paths() { + let diff = concat!( + "diff --git \"a/src/caf\\303\\251.ts\" \"b/src/caf\\303\\251.ts\"\n", + "--- \"a/src/caf\\303\\251.ts\"\n", + "+++ \"b/src/caf\\303\\251.ts\"\n", + "@@ -0,0 +1 @@\n", + ); + let files = parse_changed_lines(diff).expect("parse non-ascii quoted diff"); + assert_eq!( + files, + [ChangedFileLines { + path: PathBuf::from("src/caf\u{00E9}.ts"), + lines: vec![LineRange { start: 1, end: 1 }], + }] + ); + } + #[test] fn ignores_non_utf8_untracked_files_when_collecting_changed_lines() { if !git_is_available() {