From d845aa459b739561eb10652213af64c0bd558f5e Mon Sep 17 00:00:00 2001 From: daladim Date: Thu, 16 Mar 2023 15:23:58 +0100 Subject: [PATCH 1/7] [minor] Renamed internal functions This will be useful to disambiguiate when adding support for 3-way merging of arbitrary types --- src/diff/mod.rs | 8 ++++---- src/merge/mod.rs | 12 ++++++------ src/utils.rs | 19 ++++++++++++++++--- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/diff/mod.rs b/src/diff/mod.rs index a456c41..d6f2842 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -96,8 +96,8 @@ impl DiffOptions { /// Produce a Patch between two texts based on the configured options pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> { let mut classifier = Classifier::default(); - let (old_lines, old_ids) = classifier.classify_lines(original); - let (new_lines, new_ids) = classifier.classify_lines(modified); + let (old_lines, old_ids) = classifier.classify_text(original); + let (new_lines, new_ids) = classifier.classify_text(modified); let solution = self.diff_slice(&old_ids, &new_ids); @@ -112,8 +112,8 @@ impl DiffOptions { modified: &'a [u8], ) -> Patch<'a, [u8]> { let mut classifier = Classifier::default(); - let (old_lines, old_ids) = classifier.classify_lines(original); - let (new_lines, new_ids) = classifier.classify_lines(modified); + let (old_lines, old_ids) = classifier.classify_text(original); + let (new_lines, new_ids) = classifier.classify_text(modified); let solution = self.diff_slice(&old_ids, &new_ids); diff --git a/src/merge/mod.rs b/src/merge/mod.rs index 83b99fb..0c0703c 100644 --- a/src/merge/mod.rs +++ b/src/merge/mod.rs @@ -152,9 +152,9 @@ impl MergeOptions { theirs: &'a str, ) -> Result { let mut classifier = Classifier::default(); - let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor); - let (our_lines, our_ids) = classifier.classify_lines(ours); - let (their_lines, their_ids) = classifier.classify_lines(theirs); + let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor); + let (our_lines, our_ids) = classifier.classify_text(ours); + let (their_lines, their_ids) = classifier.classify_text(theirs); let opts = DiffOptions::default(); let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); @@ -183,9 +183,9 @@ impl MergeOptions { theirs: &'a [u8], ) -> Result, Vec> { let mut classifier = Classifier::default(); - let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor); - let (our_lines, our_ids) = classifier.classify_lines(ours); - let (their_lines, their_ids) = classifier.classify_lines(theirs); + let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor); + let (our_lines, our_ids) = classifier.classify_text(ours); + let (their_lines, their_ids) = classifier.classify_text(theirs); let opts = DiffOptions::default(); let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); diff --git a/src/utils.rs b/src/utils.rs index 9b3e70d..14f96ef 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -12,7 +12,7 @@ pub struct Classifier<'a, T: ?Sized> { } impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> { - fn classify(&mut self, record: &'a T) -> u64 { + fn classify_item(&mut self, record: &'a T) -> u64 { match self.unique_ids.entry(record) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { @@ -25,9 +25,9 @@ impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> { } impl<'a, T: ?Sized + Text> Classifier<'a, T> { - pub fn classify_lines(&mut self, text: &'a T) -> (Vec<&'a T>, Vec) { + pub fn classify_text(&mut self, text: &'a T) -> (Vec<&'a T>, Vec) { LineIter::new(text) - .map(|line| (line, self.classify(line))) + .map(|line| (line, self.classify_item(line))) .unzip() } } @@ -227,3 +227,16 @@ fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option { fn find_byte(haystack: &[u8], byte: u8) -> Option { haystack.iter().position(|&b| b == byte) } + +#[cfg(test)] +mod test { + use super::Classifier; + + #[test] + fn classify_string() { + let input = "abc\ndef"; + let mut classifier = Classifier::default(); + let (lines, _ids) = classifier.classify_text(input); + assert_eq!(lines, vec!["abc\n", "def"]); + } +} From 52255272e6491ea841f22d2701a0e83137293d0d Mon Sep 17 00:00:00 2001 From: daladim Date: Thu, 16 Mar 2023 15:32:44 +0100 Subject: [PATCH 2/7] [minor] Removed useless generic type Because MergeRange are actually always MergeRange<[u64]> --- src/merge/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/merge/mod.rs b/src/merge/mod.rs index 0c0703c..dc465e6 100644 --- a/src/merge/mod.rs +++ b/src/merge/mod.rs @@ -490,11 +490,11 @@ fn cleanup_conflicts<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike + PartialE } } -fn output_result<'a, T: ?Sized>( +fn output_result<'a>( ancestor: &[&'a str], ours: &[&'a str], theirs: &[&'a str], - merge: &[MergeRange], + merge: &[MergeRange<[u64]>], marker_len: usize, style: ConflictStyle, ) -> Result { @@ -556,11 +556,11 @@ fn add_conflict_marker( output.push('\n'); } -fn output_result_bytes<'a, T: ?Sized>( +fn output_result_bytes<'a>( ancestor: &[&'a [u8]], ours: &[&'a [u8]], theirs: &[&'a [u8]], - merge: &[MergeRange], + merge: &[MergeRange<[u64]>], marker_len: usize, style: ConflictStyle, ) -> Result, Vec> { From 6321dcea3df9830055b0e8dfabb1f59760c63c9b Mon Sep 17 00:00:00 2001 From: daladim Date: Thu, 16 Mar 2023 16:02:14 +0100 Subject: [PATCH 3/7] Ability to merge any list of values that implement Eq + Hash This makes this crate useful to apply 3-way merges on arbitrary data --- src/lib.rs | 2 +- src/merge/mod.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++++ src/merge/tests.rs | 12 +++++++ src/utils.rs | 16 +++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 2d4b0dc..e716220 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -221,5 +221,5 @@ mod utils; pub use apply::{apply, apply_bytes, ApplyError}; pub use diff::{create_patch, create_patch_bytes, DiffOptions}; -pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions}; +pub use merge::{merge, merge_bytes, merge_custom, ConflictStyle, MergeOptions, MergeConflicts}; pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter}; diff --git a/src/merge/mod.rs b/src/merge/mod.rs index dc465e6..2a1ee85 100644 --- a/src/merge/mod.rs +++ b/src/merge/mod.rs @@ -4,6 +4,7 @@ use crate::{ utils::Classifier, }; use std::{cmp, fmt}; +use std::hash::Hash; #[cfg(test)] mod tests; @@ -205,6 +206,34 @@ impl MergeOptions { self.style, ) } + + pub fn merge_custom<'a, T: Eq + Hash>( + &self, + ancestor: &'a [T], + ours: &'a [T], + theirs: &'a [T], + ) -> Result, MergeConflicts> { + let mut classifier = Classifier::default(); + let (ancestor_lines, ancestor_ids) = classifier.classify(ancestor); + let (our_lines, our_ids) = classifier.classify(ours); + let (their_lines, their_ids) = classifier.classify(theirs); + + let opts = DiffOptions::default(); + let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); + let their_solution = opts.diff_slice(&ancestor_ids, &their_ids); + + let merged = merge_solutions(&our_solution, &their_solution); + let mut merge = diff3_range_to_merge_range(&merged); + + cleanup_conflicts(&mut merge); + + output_result_custom( + &ancestor_lines, + &our_lines, + &their_lines, + &merge, + ) + } } impl Default for MergeOptions { @@ -277,6 +306,30 @@ pub fn merge_bytes<'a>( MergeOptions::default().merge_bytes(ancestor, ours, theirs) } +/// Infos about a merge that went wrong +#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] +pub struct MergeConflicts { + /// How many conflicts have occurred + pub count: usize +} + +impl std::fmt::Display for MergeConflicts { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(f, "{} merge conflicts", self.count) + } +} + +impl std::error::Error for MergeConflicts {} + +/// Perform a 3-way merge between any list of values that support it +pub fn merge_custom<'a, T: Eq + Hash>( + ancestor: &'a [T], + ours: &'a [T], + theirs: &'a [T], +) -> Result, MergeConflicts> { + MergeOptions::default().merge_custom(ancestor, ours, theirs) +} + fn merge_solutions<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( our_solution: &[DiffRange<'ancestor, 'ours, T>], their_solution: &[DiffRange<'ancestor, 'theirs, T>], @@ -635,3 +688,39 @@ fn add_conflict_marker_bytes( } output.push(b'\n'); } + +fn output_result_custom<'a, T: Eq + Hash>( + ancestor: &[&'a T], + ours: &[&'a T], + theirs: &[&'a T], + merge: &[MergeRange<[u64]>], +) -> Result, MergeConflicts> { + let mut conflicts = 0; + let mut output = Vec::new(); + + for merge_range in merge { + match merge_range { + MergeRange::Equal(range, ..) => { + output.extend(ancestor[range.range()].iter().copied()); + } + MergeRange::Conflict(_ancestor_range, _ours_range, _theirs_range) => { + conflicts += 1; + } + MergeRange::Ours(range) => { + output.extend(ours[range.range()].iter().copied()); + } + MergeRange::Theirs(range) => { + output.extend(theirs[range.range()].iter().copied()); + } + MergeRange::Both(range, _) => { + output.extend(ours[range.range()].iter().copied()); + } + } + } + + if conflicts != 0 { + Err(MergeConflicts { count: conflicts }) + } else { + Ok(output) + } +} diff --git a/src/merge/tests.rs b/src/merge/tests.rs index ff40860..65a1bb6 100644 --- a/src/merge/tests.rs +++ b/src/merge/tests.rs @@ -215,6 +215,18 @@ salt ); } +#[test] +fn test_merge_arbitrary_type() { + let original = [1,2,3,4,5, 6]; + let ours = [1,2,3,4,5,100,6]; + let theirs = [1, 3,4,5, 6]; + let expected = [1, 3,4,5,100,6]; + + let result = merge_custom(&original, &ours, &theirs).unwrap(); + let result_owned: Vec = result.iter().map(|r| **r).collect(); + assert_eq!(result_owned, expected); +} + #[test] fn myers_diffy_vs_git() { let original = "\ diff --git a/src/utils.rs b/src/utils.rs index 14f96ef..aa6d69c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -32,6 +32,14 @@ impl<'a, T: ?Sized + Text> Classifier<'a, T> { } } +impl<'a, T: Eq + Hash> Classifier<'a, T> { + pub fn classify(&mut self, data: &'a [T]) -> (Vec<&'a T>, Vec) { + data.iter() + .map(|item| (item, self.classify_item(item))) + .unzip() + } +} + impl Default for Classifier<'_, T> { fn default() -> Self { Self { @@ -232,6 +240,14 @@ fn find_byte(haystack: &[u8], byte: u8) -> Option { mod test { use super::Classifier; + #[test] + fn classify() { + let input = vec![10, 11, 12, 13]; + let mut classifier = Classifier::default(); + let (lines, _ids) = classifier.classify(&input); + assert_eq!(lines, vec![&10, &11, &12, &13]); + } + #[test] fn classify_string() { let input = "abc\ndef"; From 136741e2450b2d9385a10ebfbbf8af1648a89f21 Mon Sep 17 00:00:00 2001 From: daladim Date: Thu, 16 Mar 2023 16:22:11 +0100 Subject: [PATCH 4/7] [doc] Added references to 3-way merging of arbitrary data --- src/lib.rs | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e716220..513f6b8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,27 @@ //! Tools for finding and manipulating differences between files //! -//! ## Overview +//! # Overview //! //! This library is intended to be a collection of tools used to find and -//! manipulate differences between files inspired by [LibXDiff] and [GNU -//! Diffutils]. Version control systems like [Git] and [Mercurial] generally -//! communicate differences between two versions of a file using a `diff` or -//! `patch`. +//! manipulate differences between files or arbitrary data inspired by +//! [LibXDiff] and [GNU Diffutils]. Version control systems like [Git] and +//! [Mercurial] generallycommunicate differences between two versions of a +//! file using a `diff` or `patch`. //! //! The current diff implementation is based on the [Myers' diff algorithm]. //! +//! ## Supported features +//! +//! | Feature | UTF-8 strings | non-UTF-8 string | Arbitrary types (that implement `Eq + Hash`) | +//! |------------------|---------------|------------------|----------------------------------------------| +//! | Creating a patch | ✅ | ✅ | | +//! | Applying a patch | ✅ | ✅ | | +//! | 3-way merge | ✅ | ✅ | ✅ | +//! +//! "Arbitrary types" means "any type that implements `Eq + Hash`".
+//! Supporting patches for arbitrary types would not be very helpful, since +//! there is no standardized way of formatting them. +//! //! ## UTF-8 and Non-UTF-8 //! //! This library has support for working with both utf8 and non-utf8 texts. @@ -198,6 +210,22 @@ //! assert_eq!(merge(original, a, b).unwrap_err(), expected); //! ``` //! +//! It is possible to perform 3-way merges between collections of arbitrary +//! types `T` as long as `T: Eq + Hash`. +//! ``` +//! use diffy::merge_custom; +//! +//! let original = [1,2,3,4,5, 6]; +//! let a = [1,2,3,4,5,100,6]; +//! let b = [1, 3,4,5, 6]; +//! let expected = [1, 3,4,5,100,6]; +//! +//! let result = merge_custom(&original, &a, &b).unwrap(); +//! let result_owned: Vec = result.iter().map(|r| **r).collect(); +//! assert_eq!(result_owned, expected); +//! ``` +//! +//! //! [LibXDiff]: http://www.xmailserver.org/xdiff-lib.html //! [Myers' diff algorithm]: http://www.xmailserver.org/diff2.pdf //! [GNU Diffutils]: https://www.gnu.org/software/diffutils/ From b1b0cae23aeb4d51779ee1d60c297b8ec8c3c52f Mon Sep 17 00:00:00 2001 From: daladim Date: Thu, 16 Mar 2023 16:22:28 +0100 Subject: [PATCH 5/7] More categories and keywords in Cargo.toml --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8f8df90..9b83536 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,8 @@ description = "Tools for finding and manipulating differences between files" documentation = "https://docs.rs/diffy" repository = "https://github.com/bmwill/diffy" readme = "README.md" -keywords = ["diff", "patch", "merge"] -categories = ["text-processing"] +keywords = ["diff", "patch", "merge", "3-way", "myers"] +categories = ["text-processing", "algorithms"] rust-version = "1.62.1" edition = "2021" From a9cee93990cfffc854f4a33d52ae06e006fdad03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Froissart?= Date: Mon, 8 Jul 2024 23:19:22 +0100 Subject: [PATCH 6/7] [minor] Typo --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 513f6b8..54cbf70 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ //! This library is intended to be a collection of tools used to find and //! manipulate differences between files or arbitrary data inspired by //! [LibXDiff] and [GNU Diffutils]. Version control systems like [Git] and -//! [Mercurial] generallycommunicate differences between two versions of a +//! [Mercurial] generally communicate differences between two versions of a //! file using a `diff` or `patch`. //! //! The current diff implementation is based on the [Myers' diff algorithm]. From 6f75fc527b657c218a655287126da6aa694f5b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Froissart?= Date: Mon, 8 Jul 2024 23:25:43 +0100 Subject: [PATCH 7/7] Format --- src/lib.rs | 2 +- src/merge/mod.rs | 11 +++-------- src/merge/tests.rs | 1 + 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 54cbf70..d10afa2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -249,5 +249,5 @@ mod utils; pub use apply::{apply, apply_bytes, ApplyError}; pub use diff::{create_patch, create_patch_bytes, DiffOptions}; -pub use merge::{merge, merge_bytes, merge_custom, ConflictStyle, MergeOptions, MergeConflicts}; +pub use merge::{merge, merge_bytes, merge_custom, ConflictStyle, MergeConflicts, MergeOptions}; pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter}; diff --git a/src/merge/mod.rs b/src/merge/mod.rs index 2a1ee85..4ffc12c 100644 --- a/src/merge/mod.rs +++ b/src/merge/mod.rs @@ -3,8 +3,8 @@ use crate::{ range::{DiffRange, Range, SliceLike}, utils::Classifier, }; -use std::{cmp, fmt}; use std::hash::Hash; +use std::{cmp, fmt}; #[cfg(test)] mod tests; @@ -227,12 +227,7 @@ impl MergeOptions { cleanup_conflicts(&mut merge); - output_result_custom( - &ancestor_lines, - &our_lines, - &their_lines, - &merge, - ) + output_result_custom(&ancestor_lines, &our_lines, &their_lines, &merge) } } @@ -310,7 +305,7 @@ pub fn merge_bytes<'a>( #[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] pub struct MergeConflicts { /// How many conflicts have occurred - pub count: usize + pub count: usize, } impl std::fmt::Display for MergeConflicts { diff --git a/src/merge/tests.rs b/src/merge/tests.rs index 65a1bb6..e447e8f 100644 --- a/src/merge/tests.rs +++ b/src/merge/tests.rs @@ -216,6 +216,7 @@ salt } #[test] +#[rustfmt::skip] fn test_merge_arbitrary_type() { let original = [1,2,3,4,5, 6]; let ours = [1,2,3,4,5,100,6];