Skip to content

Commit 344e6f0

Browse files
Add and fix haddocks in DiffContext and DiffOutput
1 parent b843602 commit 344e6f0

2 files changed

Lines changed: 91 additions & 26 deletions

File tree

src/Data/Algorithm/DiffContext.hs

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
-- Portability : portable
99
-- Author : David Fox (ddssff at the email service from google)
1010
--
11-
-- Generates a grouped diff with merged runs, and outputs them in the manner of diff -u
11+
-- Generates a grouped diff with merged runs, and outputs them in the manner of @diff -u@.
1212
-----------------------------------------------------------------------------
1313
module Data.Algorithm.DiffContext
1414
( ContextDiff, Hunk
@@ -22,19 +22,23 @@ module Data.Algorithm.DiffContext
2222
) where
2323

2424
import Data.Algorithm.Diff (PolyDiff(..), Diff, getGroupedDiff)
25-
-- import Data.List (groupBy)
2625
import Data.Bifunctor
2726
import Text.PrettyPrint (Doc, text, empty, hcat)
2827

28+
-- | A diff consisting of disjoint 'Hunk's.
2929
type ContextDiff c = [Hunk c]
30+
31+
-- | A 'Hunk' is a list of adjacent 'Diff's.
3032
type Hunk c = [Diff [c]]
3133

32-
-- | A version of 'groupBy' that does not assume the argument function
33-
-- is transitive. This is used to partition the 'Diff' list into
34-
-- segments that begin and end with matching ('Both') text, with and
35-
-- have non-matching ('First' and 'Second') text in the middle.
34+
35+
-- | Groups elements so that consecutive elements in a group satisfy the predicate.
36+
-- This is unlike 'Data.List.groupBy' where grouped elements are only guaranteed to
37+
-- satisfy the predicate w.r.t. the first element of the group.
38+
--
39+
-- For instance, to split the input where there are two consecutive `1`s:
3640
--
37-
-- > let notBoth1 a b = not (a == 1 || b == 1) in
41+
-- > let notBoth1 a b = not (a == 1 && b == 1) in
3842
-- >
3943
-- > groupBy' notBoth1 [1,1,2,3,1,1,4,5,6,1]
4044
-- > [[1],[1,2,3,1],[1,4,5,6,1]]
@@ -83,46 +87,68 @@ unnumber (Numbered _ a) = a
8387
-- > -k
8488
getContextDiff ::
8589
Eq a
86-
=> Maybe Int -- ^ Number of context elements, Nothing means infinite
90+
=> Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'.
8791
-> [a]
8892
-> [a]
8993
-> ContextDiff (Numbered a)
9094
getContextDiff context a b =
9195
getContextDiffNumbered context (numbered a) (numbered b)
9296

9397
-- | If for some reason you need the line numbers stripped from the
94-
-- result of getContextDiff for backwards compatibility.
98+
-- result of 'getContextDiff' for backwards compatibility.
9599
unNumberContextDiff :: ContextDiff (Numbered a) -> ContextDiff a
96100
unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber)))
97101

102+
-- | Create a diff made of separate 'Hunk's by reducing the lists of common
103+
-- elements surrounding each sequence of differing elements to the specified
104+
-- @context@ number. Adjancent hunks end up merged if the list of common elements
105+
-- between them is shorter than twice the @context@.
106+
-- If @context@ is 'Nothing', we get a single hunk with the whole diff.
98107
getContextDiffNumbered ::
99108
Eq a
100-
=> Maybe Int -- ^ Number of context elements, Nothing means infinite
109+
=> Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'.
101110
-> [Numbered a]
102111
-> [Numbered a]
103112
-> ContextDiff (Numbered a)
104113
getContextDiffNumbered context a0 b0 =
114+
-- The 'Diff' list is grouped into 'Hunks' that begin and end
115+
-- with matching ('Both') text, having non-matching ('First' and 'Second')
116+
-- text in the middle. Note that a non-trivial partition can only happen after
117+
-- the matching text has been reduced to become consecutive 'Both' values
118+
-- corresponding to a hunk's suffix and the following hunk prefix.
105119
groupBy' (\a b -> not (isBoth a && isBoth b)) $ doPrefix $ getGroupedDiff a0 b0
106120
where
107121
isBoth (Both _ _) = True
108122
isBoth _ = False
109-
-- Handle the common text leading up to a diff.
123+
-- | Handle the common text leading up to a diff.
124+
doPrefix :: Hunk a -> Hunk a
110125
doPrefix [] = []
126+
-- Trailing common elements are no prefix.
111127
doPrefix [Both _ _] = []
128+
-- Do the prefix proper.
112129
doPrefix (Both xs ys : more) =
113130
Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) context)
114131
(maybe ys (\n -> drop (max 0 (length ys - n)) ys) context) : doSuffix more
115-
-- Prefix finished, do the diff then the following suffix
132+
-- Prefix finished, do the diff then the following suffix.
116133
doPrefix (d : ds) = doSuffix (d : ds)
117-
-- Handle the common text following a diff.
134+
-- | Handle the common text following a diff.
135+
doSuffix :: Hunk a -> Hunk a
118136
doSuffix [] = []
137+
-- A trailing suffix.
119138
doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)]
139+
-- Infinite context or common text too short to split.
120140
doSuffix (Both xs ys : more)
121141
| maybe True (\n -> length xs <= n * 2) context =
122142
Both xs ys : doPrefix more
143+
-- If the common text long enough, split it into a suffix and prefix
144+
-- (resulting in some elements excluded from the diff in the middle).
123145
doSuffix (Both xs ys : more) =
124146
Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)
147+
-- NOTE: both 'mempty's here are unreachable in practice because:
148+
-- 1. The guard above ensures that @context@ is not 'Nothing'
149+
-- 2. Both lists have the same length.
125150
: doPrefix (Both (maybe mempty (\n -> drop n xs) context) (maybe mempty (\n -> drop n ys) context) : more)
151+
-- Diff elements are preserved.
126152
doSuffix (d : ds) = d : doSuffix ds
127153

128154
-- | Pretty print a ContextDiff in the manner of diff -u.

src/Data/Algorithm/DiffOutput.hs

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,42 +8,53 @@
88
-- Portability : portable
99
-- Author : Stephan Wehr (wehr@factisresearch.com) and JP Moresmau (jp@moresmau.fr)
1010
--
11-
-- Generates a string output that is similar to diff normal mode
11+
-- Generates a string output that is similar to diff normal mode.
1212
-----------------------------------------------------------------------------
1313
module Data.Algorithm.DiffOutput where
1414
import Data.Algorithm.Diff
1515
import Text.PrettyPrint hiding ((<>))
1616
import Data.Char
1717
import Data.List
1818

19-
-- | Converts Diffs to DiffOperations
19+
-- | Converts 'Diff's to 'DiffOperation's.
2020
diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange]
2121
diffToLineRanges = toLineRange 1 1
2222
where
2323
toLineRange :: Int -> Int -> [Diff [String]] -> [DiffOperation LineRange]
2424
toLineRange _ _ []=[]
25+
-- If the lines are the same, we just move forward.
2526
toLineRange leftLine rightLine (Both ls _:rs)=
2627
let lins=length ls
2728
in toLineRange (leftLine+lins) (rightLine+lins) rs
29+
-- A 'Change' is introduced when an addition is followed by a deletion, or vice versa.
2830
toLineRange leftLine rightLine (Second lsS:First lsF:rs)=
2931
toChange leftLine rightLine lsF lsS rs
3032
toLineRange leftLine rightLine (First lsF:Second lsS:rs)=
3133
toChange leftLine rightLine lsF lsS rs
34+
-- Introduce 'Addition's.
3235
toLineRange leftLine rightLine (Second lsS:rs)=
3336
let linesS=length lsS
3437
diff=Addition (LineRange (rightLine,rightLine+linesS-1) lsS) (leftLine-1)
3538
in diff : toLineRange leftLine (rightLine+linesS) rs
39+
-- Introduce 'Deletion's.
3640
toLineRange leftLine rightLine (First lsF:rs)=
3741
let linesF=length lsF
3842
diff=Deletion (LineRange (leftLine,leftLine+linesF-1) lsF) (rightLine-1)
3943
in diff: toLineRange(leftLine+linesF) rightLine rs
44+
-- | Build 'Change's from adjacent additions and deletions.
45+
toChange :: Int -- ^ Current left line number.
46+
-> Int -- ^ Current right line number.
47+
-> [String] -- ^ Lines from the 'First' list (corresponding to deletions).
48+
-> [String] -- ^ Lines from the 'Second' list (corresponding to additions).
49+
-> [Diff [String]] -- ^ Remaining 'Diff's.
50+
-> [DiffOperation LineRange]
4051
toChange leftLine rightLine lsF lsS rs=
4152
let linesS=length lsS
4253
linesF=length lsF
4354
in Change (LineRange (leftLine,leftLine+linesF-1) lsF) (LineRange (rightLine,rightLine+linesS-1) lsS)
4455
: toLineRange (leftLine+linesF) (rightLine+linesS) rs
4556

46-
-- | pretty print the differences. The output is similar to the output of the diff utility
57+
-- | Pretty print the differences. The output is similar to the output of the @diff@ utility.
4758
--
4859
-- > > putStr (ppDiff (getGroupedDiff ["a","b","c","d","e"] ["a","c","d","f"]))
4960
-- > 2d1
@@ -59,7 +70,7 @@ ppDiff gdiff =
5970
render (prettyDiffs diffLineRanges) ++ "\n"
6071

6172

62-
-- | pretty print of diff operations
73+
-- | Pretty print of diff operations.
6374
prettyDiffs :: [DiffOperation LineRange] -> Doc
6475
prettyDiffs [] = empty
6576
prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest
@@ -80,58 +91,86 @@ prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest
8091
prettyLines start lins =
8192
vcat (map (\l -> char start <+> text l) lins)
8293

83-
-- | Parse pretty printed Diffs as DiffOperations
94+
-- | Parse pretty printed 'Diff's as 'DiffOperation's.
8495
parsePrettyDiffs :: String -> [DiffOperation LineRange]
8596
parsePrettyDiffs = reverse . doParse [] . lines
8697
where
87-
doParse diffs [] = diffs
88-
doParse diffs s =
98+
-- | Parsing entry point that iteratively accumulates 'DiffOperation's
99+
-- until the input is exhausted.
100+
doParse :: [DiffOperation LineRange] -> [String] -> [DiffOperation LineRange]
101+
-- NOTE: Incorrectly formatted lines are ignored.
102+
doParse acc [] = acc
103+
doParse acc s =
89104
let (mnd,r) = parseDiff s
90105
in case mnd of
91-
Just nd -> doParse (nd:diffs) r
92-
_ -> doParse diffs r
106+
Just nd -> doParse (nd:acc) r
107+
_ -> doParse acc r
108+
109+
parseDiff :: [String] -> (Maybe (DiffOperation LineRange), [String])
93110
parseDiff [] = (Nothing,[])
94111
parseDiff (h:rs) = let
95112
(r1,hrs1) = parseRange h
96113
in case hrs1 of
114+
-- In each case, we pass the left line range,
115+
-- the remaining string after the type character,
116+
-- which must contain the right line range,
117+
-- and the remaining lines to parse.
97118
('d':hrs2) -> parseDel r1 hrs2 rs
98119
('a':hrs2) -> parseAdd r1 hrs2 rs
99120
('c':hrs2) -> parseChange r1 hrs2 rs
100121
_ -> (Nothing,rs)
122+
123+
parseDel :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
101124
parseDel r1 hrs2 rs = let
125+
-- NOTE: the wildcard should correspond to the end of line,
126+
-- but is ignored for simplicity.
102127
(r2,_) = parseRange hrs2
103128
(ls,rs2) = span (isPrefixOf "<") rs
104129
in (Just $ Deletion (LineRange r1 (map (drop 2) ls)) (fst r2), rs2)
130+
131+
parseAdd :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
105132
parseAdd r1 hrs2 rs = let
133+
-- NOTE: the wildcard should correspond to the end of line,
134+
-- but is ignored for simplicity.
106135
(r2,_) = parseRange hrs2
107136
(ls,rs2) = span (isPrefixOf ">") rs
108137
in (Just $ Addition (LineRange r2 (map (drop 2) ls)) (fst r1), rs2)
138+
139+
parseChange :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
109140
parseChange r1 hrs2 rs = let
141+
-- NOTE: the wildcard should correspond to the end of line,
142+
-- but is ignored for simplicity.
110143
(r2,_) = parseRange hrs2
111144
(ls1,rs2) = span (isPrefixOf "<") rs
112145
in case rs2 of
146+
-- The left and right diff of a 'Change' are separated by a "---" line.
113147
("---":rs3) -> let
114148
(ls2,rs4) = span (isPrefixOf ">") rs3
115149
in (Just $ Change (LineRange r1 (map (drop 2) ls1)) (LineRange r2 (map (drop 2) ls2)), rs4)
116150
_ -> (Nothing,rs2)
151+
117152
parseRange :: String -> ((LineNo, LineNo),String)
118153
parseRange l = let
119154
(fstLine,rs) = span isDigit l
120155
(sndLine,rs3) = case rs of
156+
-- The comma is used to separate
157+
-- the start and end line numbers in a range,
158+
-- but is omitted if they are the same.
159+
-- i.e. the range is a single line.
121160
(',':rs2) -> span isDigit rs2
122161
_ -> (fstLine,rs)
123162
in ((read fstLine,read sndLine),rs3)
124163

125-
-- | Line number alias
164+
-- | Line number alias.
126165
type LineNo = Int
127166

128-
-- | Line Range: start, end and contents
167+
-- | Line Range: start, end and contents.
129168
data LineRange = LineRange { lrNumbers :: (LineNo, LineNo)
130169
, lrContents :: [String]
131170
}
132-
deriving (Show,Read,Eq,Ord)
171+
deriving (Show, Read, Eq, Ord)
133172

134-
-- | Diff Operation representing changes to apply
173+
-- | Diff operation representing changes to apply.
135174
data DiffOperation a = Deletion a LineNo
136175
| Addition a LineNo
137176
| Change a a

0 commit comments

Comments
 (0)