Skip to content
3 changes: 3 additions & 0 deletions src/Data/Algorithm/Diff.hs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ getDiffBy eq a b = markup a b . reverse $ ses eq a b
markup _ _ _ = []

-- | Like 'getGroupedDiff' but accepts a custom equality predicate.
--
-- Postcondition: the output list is guaranteed to be /chunked/. i.e. no two adjacent
-- elements share the same constructor.
getGroupedDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]]
getGroupedDiffBy eq a b = go $ getDiffBy eq a b
where go (First x : xs) = let (fs, rest) = goFirsts xs in First (x:fs) : go rest
Expand Down
89 changes: 67 additions & 22 deletions src/Data/Algorithm/DiffContext.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
-- Portability : portable
-- Author : David Fox (ddssff at the email service from google)
--
-- Generates a grouped diff with merged runs, and outputs them in the manner of diff -u
-- Generates a grouped diff with merged runs, and outputs them in the manner of @diff -u@.
-----------------------------------------------------------------------------
module Data.Algorithm.DiffContext
( ContextDiff, Hunk
Expand All @@ -22,19 +22,27 @@ module Data.Algorithm.DiffContext
) where

import Data.Algorithm.Diff (PolyDiff(..), Diff, getGroupedDiff)
-- import Data.List (groupBy)
import Data.Bifunctor
import Text.PrettyPrint (Doc, text, empty, hcat)

-- | A diff consisting of disjoint 'Hunk's.
type ContextDiff c = [Hunk c]

-- | A 'Hunk' is a list of adjacent 'Diff's.
Comment thread
ninioArtillero marked this conversation as resolved.
--
-- No two consecutive elements in a 'Hunk' are both applications
-- of 'First', 'Second', or 'Both', i.e. the list does not stutter
-- on 'Diff' constructors.
type Hunk c = [Diff [c]]

-- | A version of 'groupBy' that does not assume the argument function
-- is transitive. This is used to partition the 'Diff' list into
-- segments that begin and end with matching ('Both') text, with and
-- have non-matching ('First' and 'Second') text in the middle.

-- | Groups elements so that consecutive elements in a group satisfy the predicate.
-- This is unlike 'Data.List.groupBy' where grouped elements are only guaranteed to
-- satisfy the predicate w.r.t. the first element of the group.
--
-- > let notBoth1 a b = not (a == 1 || b == 1) in
-- For instance, to split the input where there are two consecutive `1`s:
--
-- > let notBoth1 a b = not (a == 1 && b == 1) in
-- >
-- > groupBy' notBoth1 [1,1,2,3,1,1,4,5,6,1]
-- > [[1],[1,2,3,1],[1,4,5,6,1]]
Expand Down Expand Up @@ -83,46 +91,83 @@ unnumber (Numbered _ a) = a
-- > -k
getContextDiff ::
Eq a
=> Maybe Int -- ^ Number of context elements, Nothing means infinite
=> Maybe Int -- ^ Context size. 'Nothing' means returning a whole-diff 'Hunk'.
-> [a]
-> [a]
-> ContextDiff (Numbered a)
getContextDiff context a b =
getContextDiffNumbered context (numbered a) (numbered b)
getContextDiff contextSize a b =
getContextDiffNumbered contextSize (numbered a) (numbered b)

-- | If for some reason you need the line numbers stripped from the
-- result of getContextDiff for backwards compatibility.
-- result of 'getContextDiff' for backwards compatibility.
unNumberContextDiff :: ContextDiff (Numbered a) -> ContextDiff a
unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber)))

-- | Create a diff of separate 'Hunk's, each containing a sequence
-- of differing elements surrounded by common elements for context.
--
-- The context size determines when to merge adjacent hunks:
-- two hunks are merged when the number of common elements between them does not
-- exceed twice the context size. Furthermore, if @contextSize@ is 'Nothing'
-- a single hunk with the whole diff is produced.
getContextDiffNumbered ::
Eq a
=> Maybe Int -- ^ Number of context elements, Nothing means infinite
=> Maybe Int -- ^ Context size. 'Nothing' means returning a whole-diff 'Hunk'.
-> [Numbered a]
-> [Numbered a]
-> ContextDiff (Numbered a)
getContextDiffNumbered context a0 b0 =
getContextDiffNumbered contextSize a0 b0 =
-- The 'Diff' list is grouped into 'Hunks' that begin and end
-- with matching ('Both') text, having non-matching ('First' and 'Second')
-- text in the middle. Note that a non-trivial partition can only happen after
-- the matching text has been reduced to become consecutive 'Both' values
-- corresponding to a hunk's suffix and the following hunk prefix.
groupBy' (\a b -> not (isBoth a && isBoth b)) $ doPrefix $ getGroupedDiff a0 b0
where
isBoth (Both _ _) = True
isBoth _ = False
-- Handle the common text leading up to a diff.
-- | Handle the common text leading up to a diff.
--
-- The @a@ elements in @doPrefix h@ are a subset of those in @h@,
-- in the same order. Additionaly, 'First' and 'Second' diffs
-- are identical in both lists.
--
-- The difference between input and output is that some 'Both' diffs might
-- be split into two other 'Both' diffs. This hapṕens when their contents
-- are too large compared with the contex size, resulting in some @a@
-- elements being dropped.
doPrefix :: Hunk a -> Hunk a
Comment thread
ninioArtillero marked this conversation as resolved.
doPrefix [] = []
-- Trailing common elements are no prefix.
-- This case corresponds to when both input lists are identical, so the
-- resulting 'ContextDiff' is empty.
doPrefix [Both _ _] = []
-- Do the prefix proper.
doPrefix (Both xs ys : more) =
Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) context)
(maybe ys (\n -> drop (max 0 (length ys - n)) ys) context) : doSuffix more
-- Prefix finished, do the diff then the following suffix
Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) contextSize)
(maybe ys (\n -> drop (max 0 (length ys - n)) ys) contextSize) : doSuffix more
-- Prefix finished, do the diff then the following suffix.
doPrefix (d : ds) = doSuffix (d : ds)
-- Handle the common text following a diff.
-- | Handle the common text following a diff.
--
-- Precondition: The input does not start with a 'Both' diff. Otherwise,
-- it behaves like @doPrefix@.
doSuffix :: Hunk a -> Hunk a
Comment thread
ninioArtillero marked this conversation as resolved.
doSuffix [] = []
doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)]
-- A trailing suffix.
doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize)]
-- Either whole context or common text is too short to split.
doSuffix (Both xs ys : more)
| maybe True (\n -> length xs <= n * 2) context =
| maybe True (\n -> length xs <= n * 2) contextSize =
Both xs ys : doPrefix more
-- If the common text long enough, split it into a suffix and prefix
-- (resulting in some elements excluded from the diff in the middle).
doSuffix (Both xs ys : more) =
Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)
: doPrefix (Both (maybe mempty (\n -> drop n xs) context) (maybe mempty (\n -> drop n ys) context) : more)
-- NOTE: the guard above ensures that the following 'maybe's
-- default values are unreachable and result in non-empty lists.
Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize)
: doPrefix (Both (maybe mempty (\n -> drop n xs) contextSize) (maybe mempty (\n -> drop n ys) contextSize) : more)
-- Diff elements are preserved.
doSuffix (d : ds) = d : doSuffix ds

-- | Pretty print a ContextDiff in the manner of diff -u.
Expand Down
88 changes: 71 additions & 17 deletions src/Data/Algorithm/DiffOutput.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,58 @@
-- Portability : portable
-- Author : Stephan Wehr (wehr@factisresearch.com) and JP Moresmau (jp@moresmau.fr)
--
-- Generates a string output that is similar to diff normal mode
-- Generates a string output that is similar to diff normal mode.
-----------------------------------------------------------------------------
module Data.Algorithm.DiffOutput where
import Data.Algorithm.Diff
import Text.PrettyPrint hiding ((<>))
import Data.Char
import Data.List

-- | Converts Diffs to DiffOperations
-- | Converts 'Diff's to 'DiffOperation's. 'First' and 'Second'
-- ocurrances are converted to 'Addition' and 'Deletion', respectively, while
-- consecutive ocurrances of them are replaced by a 'Change'.
diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange]
diffToLineRanges = toLineRange 1 1
where
-- | In @toLineRange x y ds@, @x@ is the index of the current string in the
-- left input of the diff @ds@, and @y@ is the index of the corresponding
-- string in the right input of the diff @ds@.
toLineRange :: Int -> Int -> [Diff [String]] -> [DiffOperation LineRange]
Comment thread
ninioArtillero marked this conversation as resolved.
toLineRange _ _ []=[]
-- If the lines are the same, we just move forward.
toLineRange leftLine rightLine (Both ls _:rs)=
let lins=length ls
in toLineRange (leftLine+lins) (rightLine+lins) rs
-- A 'Change' is introduced when an addition is followed by a deletion, or vice versa.
toLineRange leftLine rightLine (Second lsS:First lsF:rs)=
toChange leftLine rightLine lsF lsS rs
toLineRange leftLine rightLine (First lsF:Second lsS:rs)=
toChange leftLine rightLine lsF lsS rs
-- Introduce 'Addition's.
toLineRange leftLine rightLine (Second lsS:rs)=
let linesS=length lsS
diff=Addition (LineRange (rightLine,rightLine+linesS-1) lsS) (leftLine-1)
in diff : toLineRange leftLine (rightLine+linesS) rs
-- Introduce 'Deletion's.
toLineRange leftLine rightLine (First lsF:rs)=
let linesF=length lsF
diff=Deletion (LineRange (leftLine,leftLine+linesF-1) lsF) (rightLine-1)
in diff: toLineRange(leftLine+linesF) rightLine rs
-- | Build 'Change's from adjacent additions and deletions.
toChange :: Int -- ^ Current left line number.
-> Int -- ^ Current right line number.
-> [String] -- ^ Lines from the 'First' list (corresponding to deletions).
-> [String] -- ^ Lines from the 'Second' list (corresponding to additions).
-> [Diff [String]] -- ^ Remaining 'Diff's.
-> [DiffOperation LineRange]
toChange leftLine rightLine lsF lsS rs=
let linesS=length lsS
linesF=length lsF
in Change (LineRange (leftLine,leftLine+linesF-1) lsF) (LineRange (rightLine,rightLine+linesS-1) lsS)
: toLineRange (leftLine+linesF) (rightLine+linesS) rs

-- | pretty print the differences. The output is similar to the output of the diff utility
-- | Pretty print the differences. The output is similar to the output of the @diff@ utility.
--
-- > > putStr (ppDiff (getGroupedDiff ["a","b","c","d","e"] ["a","c","d","f"]))
-- > 2d1
Expand All @@ -59,7 +75,7 @@ ppDiff gdiff =
render (prettyDiffs diffLineRanges) ++ "\n"


-- | pretty print of diff operations
-- | Pretty print of diff operations.
prettyDiffs :: [DiffOperation LineRange] -> Doc
prettyDiffs [] = empty
prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest
Expand All @@ -80,59 +96,97 @@ prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest
prettyLines start lins =
vcat (map (\l -> char start <+> text l) lins)

-- | Parse pretty printed Diffs as DiffOperations
-- | Parse pretty printed 'Diff's as 'DiffOperation's.
parsePrettyDiffs :: String -> [DiffOperation LineRange]
parsePrettyDiffs = reverse . doParse [] . lines
where
doParse diffs [] = diffs
doParse diffs s =
-- | Parsing entry point that iteratively accumulates 'DiffOperation's
-- until the input is exhausted.
doParse :: [DiffOperation LineRange] -> [String] -> [DiffOperation LineRange]
-- NOTE: Incorrectly formatted lines are ignored.
doParse acc [] = acc
doParse acc s =
let (mnd,r) = parseDiff s
in case mnd of
Just nd -> doParse (nd:diffs) r
_ -> doParse diffs r
Just nd -> doParse (nd:acc) r
_ -> doParse acc r

parseDiff :: [String] -> (Maybe (DiffOperation LineRange), [String])
parseDiff [] = (Nothing,[])
parseDiff (h:rs) = let
(r1,hrs1) = parseRange h
in case hrs1 of
-- In each case, we pass the left line range,
-- the remaining string after the type character,
-- which must contain the right line range,
-- and the remaining lines to parse.
('d':hrs2) -> parseDel r1 hrs2 rs
('a':hrs2) -> parseAdd r1 hrs2 rs
('c':hrs2) -> parseChange r1 hrs2 rs
_ -> (Nothing,rs)

parseDel :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
parseDel r1 hrs2 rs = let
-- NOTE: the wildcard should correspond to the end of line,
-- but is ignored for simplicity.
(r2,_) = parseRange hrs2
(ls,rs2) = span (isPrefixOf "<") rs
in (Just $ Deletion (LineRange r1 (map (drop 2) ls)) (fst r2), rs2)

parseAdd :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
parseAdd r1 hrs2 rs = let
-- NOTE: the wildcard should correspond to the end of line,
-- but is ignored for simplicity.
(r2,_) = parseRange hrs2
(ls,rs2) = span (isPrefixOf ">") rs
in (Just $ Addition (LineRange r2 (map (drop 2) ls)) (fst r1), rs2)

parseChange :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
parseChange r1 hrs2 rs = let
-- NOTE: the wildcard should correspond to the end of line,
-- but is ignored for simplicity.
(r2,_) = parseRange hrs2
(ls1,rs2) = span (isPrefixOf "<") rs
in case rs2 of
-- The left and right diff of a 'Change' are separated by a "---" line.
("---":rs3) -> let
(ls2,rs4) = span (isPrefixOf ">") rs3
in (Just $ Change (LineRange r1 (map (drop 2) ls1)) (LineRange r2 (map (drop 2) ls2)), rs4)
_ -> (Nothing,rs2)

parseRange :: String -> ((LineNo, LineNo),String)
parseRange l = let
(fstLine,rs) = span isDigit l
(sndLine,rs3) = case rs of
-- The comma is used to separate
-- the start and end line numbers in a range,
-- but is omitted if they are the same.
-- i.e. the range is a single line.
(',':rs2) -> span isDigit rs2
_ -> (fstLine,rs)
in ((read fstLine,read sndLine),rs3)

-- | Line number alias
-- | Line number alias. Always non-negative.
type LineNo = Int

-- | Line Range: start, end and contents
-- | Line Range: start, end and contents.
Comment thread
ninioArtillero marked this conversation as resolved.
--
-- The following invariants hold:
--
-- > snd lrNumbers >= fst lrNumbers
-- > snd lrNumbers - fst lrNumbers + 1 == length lrContents
--
-- which imply @lrContents@ cannot be empty.
data LineRange = LineRange { lrNumbers :: (LineNo, LineNo)
, lrContents :: [String]
}
deriving (Show,Read,Eq,Ord)
deriving (Show, Read, Eq, Ord)

-- | Diff Operation representing changes to apply
data DiffOperation a = Deletion a LineNo
| Addition a LineNo
| Change a a
deriving (Show,Read,Eq,Ord)
-- | Diff operation representing changes to apply.
data DiffOperation a
= Deletion a LineNo -- ^ Element deleted on the left input, line number
-- preceding the deleted lines in the right input.
| Addition a LineNo -- ^ Element added from the right input, line number
-- preceding the added lines in the left input.
| Change a a -- ^ Element changed from the left input to the right input.
deriving (Show,Read,Eq,Ord)
Loading