Skip to content

Commit 6830219

Browse files
committed
Starting out language annotation support implementation
Initial support for language annotations like `/* lua */` that should remain as block comments when directly preceding string literals, while other block comments get converted to line comments. - Detect language annotations: single-line, non-doc comments with valid language identifiers - Preserve as `/* lang */` block comment syntax instead of converting to `# lang` line comments - Works with both regular strings `"..."` and indented strings `''...''`
1 parent 149c060 commit 6830219

File tree

3 files changed

+82
-6
lines changed

3 files changed

+82
-6
lines changed

src/Nixfmt/Lexer.hs

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
module Nixfmt.Lexer (lexeme, pushTrivia, takeTrivia, whole) where
77

88
import Control.Monad.State.Strict (MonadState, evalStateT, get, modify, put)
9-
import Data.Char (isSpace)
9+
import Data.Char (isAlphaNum, isSpace)
1010
import Data.List (dropWhileEnd)
1111
import Data.Maybe (fromMaybe)
1212
import Data.Text as Text (
1313
Text,
14+
all,
15+
any,
1416
isPrefixOf,
1517
length,
1618
lines,
@@ -29,6 +31,7 @@ import Data.Void (Void)
2931
import Nixfmt.Types (
3032
Ann (..),
3133
Parser,
34+
Token (TDoubleQuote, TDoubleSingleQuote),
3235
TrailingComment (..),
3336
Trivia,
3437
Trivium (..),
@@ -43,9 +46,11 @@ import Text.Megaparsec (
4346
chunk,
4447
getSourcePos,
4548
hidden,
49+
lookAhead,
4650
many,
4751
manyTill,
4852
notFollowedBy,
53+
optional,
4954
some,
5055
try,
5156
unPos,
@@ -59,6 +64,8 @@ data ParseTrivium
5964
PTLineComment Text Pos
6065
| -- Track whether it is a doc comment
6166
PTBlockComment Bool [Text]
67+
| -- | Language annotation like /* lua */ (single line, non-doc)
68+
PTLanguageAnnotation Text
6269
deriving (Show)
6370

6471
preLexeme :: Parser a -> Parser a
@@ -133,6 +140,7 @@ convertTrailing = toMaybe . join . map toText
133140
where
134141
toText (PTLineComment c _) = strip c
135142
toText (PTBlockComment False [c]) = strip c
143+
toText (PTLanguageAnnotation _) = "" -- Language annotations don't become trailing comments
136144
toText _ = ""
137145
join = Text.unwords . filter (/= "")
138146
toMaybe "" = Nothing
@@ -148,6 +156,7 @@ convertLeading =
148156
PTBlockComment _ [] -> []
149157
PTBlockComment False [c] -> [LineComment $ " " <> strip c]
150158
PTBlockComment isDoc cs -> [BlockComment isDoc cs]
159+
PTLanguageAnnotation c -> [LanguageAnnotation c]
151160
)
152161

153162
isTrailing :: ParseTrivium -> Bool
@@ -156,17 +165,75 @@ isTrailing (PTBlockComment False []) = True
156165
isTrailing (PTBlockComment False [_]) = True
157166
isTrailing _ = False
158167

159-
convertTrivia :: [ParseTrivium] -> Pos -> (Maybe TrailingComment, Trivia)
160-
convertTrivia pts nextCol =
168+
-- Check if a text is a valid language identifier for language annotations
169+
isLanguageIdentifier :: Text -> Bool
170+
isLanguageIdentifier content =
171+
let stripped = strip content
172+
in not (Text.null stripped)
173+
&& Text.length stripped <= 30 -- TODO: make configurable or remove limit
174+
&& Text.all (\c -> isAlphaNum c || c `elem` ['-', '+', '.', '_']) stripped
175+
&& not (Text.any (`elem` ['\n', '\r']) content)
176+
177+
-- Check if next token is a string literal
178+
isStringToken :: Maybe Token -> Bool
179+
isStringToken (Just TDoubleQuote) = True
180+
isStringToken (Just TDoubleSingleQuote) = True
181+
isStringToken _ = False
182+
183+
-- Convert a single block comment to language annotation if it matches criteria
184+
toLangAnnotation :: Text -> Maybe Token -> Maybe ParseTrivium
185+
toLangAnnotation content nextToken
186+
| isStringToken nextToken && isLanguageIdentifier content =
187+
Just (PTLanguageAnnotation (strip content))
188+
| otherwise = Nothing
189+
190+
convertTrivia :: [ParseTrivium] -> Pos -> Maybe Token -> (Maybe TrailingComment, Trivia)
191+
convertTrivia pts nextCol nextToken =
161192
let (trailing, leading) = span isTrailing pts
162-
in case (trailing, leading) of
193+
-- Check if we should convert trailing block comment to language annotation
194+
-- If so, move it to leading position instead of keeping it as trailing
195+
(trailing', leading') = case (trailing, nextToken) of
196+
-- If next token is a string and trailing trivia is a potential language annotation
197+
([PTBlockComment False [content]], _)
198+
| Just langAnnotation <- toLangAnnotation content nextToken ->
199+
([], langAnnotation : leading)
200+
_ ->
201+
-- Check if we should convert leading block comment to language annotation
202+
case (leading, nextToken) of
203+
-- If next token is a string and last trivia is a potential language annotation
204+
(PTBlockComment False [content] : rest, _)
205+
| Just langAnnotation <- toLangAnnotation content nextToken ->
206+
(trailing, langAnnotation : rest)
207+
-- Handle case where block comment is not first in leading trivia
208+
_ ->
209+
let findAndReplace [] = Nothing
210+
findAndReplace (PTBlockComment False [content] : rest)
211+
| Just langAnnotation <- toLangAnnotation content nextToken =
212+
Just (langAnnotation : rest)
213+
findAndReplace (x : xs) = (x :) <$> findAndReplace xs
214+
in case findAndReplace leading of
215+
Just newLeading -> (trailing, newLeading)
216+
Nothing -> (trailing, leading)
217+
in case (trailing', leading') of
163218
-- Special case: if the trailing comment visually forms a block with the start of the following line,
164219
-- then treat it like part of those comments instead of a distinct trailing comment.
165220
-- This happens especially often after `{` or `[` tokens, where the comment of the first item
166221
-- starts on the same line ase the opening token.
167222
([PTLineComment _ pos], (PTNewlines 1) : (PTLineComment _ pos') : _) | pos == pos' -> (Nothing, convertLeading pts)
168223
([PTLineComment _ pos], [PTNewlines 1]) | pos == nextCol -> (Nothing, convertLeading pts)
169-
_ -> (convertTrailing trailing, convertLeading leading)
224+
_ -> (convertTrailing trailing', convertLeading leading')
225+
226+
-- Parser to peek at the next token type without consuming input
227+
parseNextTokenType :: Parser Token
228+
parseNextTokenType = do
229+
-- Skip any trivia that might appear before the next token
230+
_ <- many (hidden $ lineComment <|> blockComment <|> newlines)
231+
-- Skip any remaining whitespace
232+
_ <- manyP (\x -> isSpace x && x /= '\n' && x /= '\r')
233+
TDoubleQuote
234+
<$ chunk "\""
235+
<|> TDoubleSingleQuote
236+
<$ chunk "''"
170237

171238
trivia :: Parser [ParseTrivium]
172239
trivia = many $ hidden $ lineComment <|> blockComment <|> newlines
@@ -188,7 +255,11 @@ lexeme p = do
188255
parsedTrivia <- trivia
189256
-- This is the position of the next lexeme after the currently parsed one
190257
SourcePos{sourceColumn = col} <- getSourcePos
191-
let (trailing, nextLeading) = convertTrivia parsedTrivia col
258+
259+
-- Add lookahead for next token
260+
nextToken <- optional (try $ lookAhead $ preLexeme parseNextTokenType)
261+
262+
let (trailing, nextLeading) = convertTrivia parsedTrivia col nextToken
192263
pushTrivia nextLeading
193264
return $
194265
Ann

src/Nixfmt/Pretty.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ instance Pretty TrailingComment where
8686
instance Pretty Trivium where
8787
pretty EmptyLine = emptyline
8888
pretty (LineComment c) = comment ("#" <> c) <> hardline
89+
pretty (LanguageAnnotation lang) = comment ("/* " <> lang <> " */") <> hardspace
8990
pretty (BlockComment isDoc c) =
9091
comment (if isDoc then "/**" else "/*")
9192
<> hardline
@@ -109,6 +110,8 @@ prettyItems (Items items) = sepBy hardline items
109110

110111
instance Pretty [Trivium] where
111112
pretty [] = mempty
113+
-- Special case: if trivia consists only of a single language annotation, render it inline without a preceding hardline
114+
pretty [langAnnotation@(LanguageAnnotation _)] = pretty langAnnotation
112115
pretty trivia = hardline <> hcat trivia
113116

114117
instance (Pretty a) => Pretty (Ann a) where

src/Nixfmt/Types.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ data Trivium
7272
| -- Multi-line comments with /* or /**. Multiple # comments are treated as a list of `LineComment`.
7373
-- The bool indicates a doc comment (/**)
7474
BlockComment Bool [Text]
75+
| -- | Language annotation comments like /* lua */ that should remain as block comments before strings
76+
LanguageAnnotation Text
7577
deriving (Eq, Show)
7678

7779
type Trivia = [Trivium]

0 commit comments

Comments
 (0)