66module Nixfmt.Lexer (lexeme , pushTrivia , takeTrivia , whole ) where
77
88import Control.Monad.State.Strict (MonadState , evalStateT , get , modify , put )
9- import Data.Char (isSpace )
9+ import Data.Char (isAlphaNum , isSpace )
1010import Data.List (dropWhileEnd )
1111import Data.Maybe (fromMaybe )
1212import Data.Text as Text (
1313 Text ,
14+ all ,
15+ any ,
1416 isPrefixOf ,
1517 length ,
1618 lines ,
@@ -29,6 +31,7 @@ import Data.Void (Void)
2931import Nixfmt.Types (
3032 Ann (.. ),
3133 Parser ,
34+ Token (TDoubleQuote , TDoubleSingleQuote ),
3235 TrailingComment (.. ),
3336 Trivia ,
3437 Trivium (.. ),
@@ -43,9 +46,11 @@ import Text.Megaparsec (
4346 chunk ,
4447 getSourcePos ,
4548 hidden ,
49+ lookAhead ,
4650 many ,
4751 manyTill ,
4852 notFollowedBy ,
53+ optional ,
4954 some ,
5055 try ,
5156 unPos ,
@@ -59,6 +64,8 @@ data ParseTrivium
5964 PTLineComment Text Pos
6065 | -- Track whether it is a doc comment
6166 PTBlockComment Bool [Text ]
67+ | -- | Language annotation like /* lua */ (single line, non-doc)
68+ PTLanguageAnnotation Text
6269 deriving (Show )
6370
6471preLexeme :: Parser a -> Parser a
@@ -133,6 +140,7 @@ convertTrailing = toMaybe . join . map toText
133140 where
134141 toText (PTLineComment c _) = strip c
135142 toText (PTBlockComment False [c]) = strip c
143+ toText (PTLanguageAnnotation _) = " " -- Language annotations don't become trailing comments
136144 toText _ = " "
137145 join = Text. unwords . filter (/= " " )
138146 toMaybe " " = Nothing
@@ -148,6 +156,7 @@ convertLeading =
148156 PTBlockComment _ [] -> []
149157 PTBlockComment False [c] -> [LineComment $ " " <> strip c]
150158 PTBlockComment isDoc cs -> [BlockComment isDoc cs]
159+ PTLanguageAnnotation c -> [LanguageAnnotation c]
151160 )
152161
153162isTrailing :: ParseTrivium -> Bool
@@ -156,17 +165,75 @@ isTrailing (PTBlockComment False []) = True
156165isTrailing (PTBlockComment False [_]) = True
157166isTrailing _ = False
158167
159- convertTrivia :: [ParseTrivium ] -> Pos -> (Maybe TrailingComment , Trivia )
160- convertTrivia pts nextCol =
168+ -- Check if a text is a valid language identifier for language annotations
169+ isLanguageIdentifier :: Text -> Bool
170+ isLanguageIdentifier content =
171+ let stripped = strip content
172+ in not (Text. null stripped)
173+ && Text. length stripped <= 30 -- TODO: make configurable or remove limit
174+ && Text. all (\ c -> isAlphaNum c || c `elem` [' -' , ' +' , ' .' , ' _' ]) stripped
175+ && not (Text. any (`elem` [' \n ' , ' \r ' ]) content)
176+
177+ -- Check if next token is a string literal
178+ isStringToken :: Maybe Token -> Bool
179+ isStringToken (Just TDoubleQuote ) = True
180+ isStringToken (Just TDoubleSingleQuote ) = True
181+ isStringToken _ = False
182+
183+ -- Convert a single block comment to language annotation if it matches criteria
184+ toLangAnnotation :: Text -> Maybe Token -> Maybe ParseTrivium
185+ toLangAnnotation content nextToken
186+ | isStringToken nextToken && isLanguageIdentifier content =
187+ Just (PTLanguageAnnotation (strip content))
188+ | otherwise = Nothing
189+
190+ convertTrivia :: [ParseTrivium ] -> Pos -> Maybe Token -> (Maybe TrailingComment , Trivia )
191+ convertTrivia pts nextCol nextToken =
161192 let (trailing, leading) = span isTrailing pts
162- in case (trailing, leading) of
193+ -- Check if we should convert trailing block comment to language annotation
194+ -- If so, move it to leading position instead of keeping it as trailing
195+ (trailing', leading') = case (trailing, nextToken) of
196+ -- If next token is a string and trailing trivia is a potential language annotation
197+ ([PTBlockComment False [content]], _)
198+ | Just langAnnotation <- toLangAnnotation content nextToken ->
199+ ([] , langAnnotation : leading)
200+ _ ->
201+ -- Check if we should convert leading block comment to language annotation
202+ case (leading, nextToken) of
203+ -- If next token is a string and last trivia is a potential language annotation
204+ (PTBlockComment False [content] : rest, _)
205+ | Just langAnnotation <- toLangAnnotation content nextToken ->
206+ (trailing, langAnnotation : rest)
207+ -- Handle case where block comment is not first in leading trivia
208+ _ ->
209+ let findAndReplace [] = Nothing
210+ findAndReplace (PTBlockComment False [content] : rest)
211+ | Just langAnnotation <- toLangAnnotation content nextToken =
212+ Just (langAnnotation : rest)
213+ findAndReplace (x : xs) = (x : ) <$> findAndReplace xs
214+ in case findAndReplace leading of
215+ Just newLeading -> (trailing, newLeading)
216+ Nothing -> (trailing, leading)
217+ in case (trailing', leading') of
163218 -- Special case: if the trailing comment visually forms a block with the start of the following line,
164219 -- then treat it like part of those comments instead of a distinct trailing comment.
165220 -- This happens especially often after `{` or `[` tokens, where the comment of the first item
166221 -- starts on the same line ase the opening token.
167222 ([PTLineComment _ pos], (PTNewlines 1 ) : (PTLineComment _ pos') : _) | pos == pos' -> (Nothing , convertLeading pts)
168223 ([PTLineComment _ pos], [PTNewlines 1 ]) | pos == nextCol -> (Nothing , convertLeading pts)
169- _ -> (convertTrailing trailing, convertLeading leading)
224+ _ -> (convertTrailing trailing', convertLeading leading')
225+
226+ -- Parser to peek at the next token type without consuming input
227+ parseNextTokenType :: Parser Token
228+ parseNextTokenType = do
229+ -- Skip any trivia that might appear before the next token
230+ _ <- many (hidden $ lineComment <|> blockComment <|> newlines)
231+ -- Skip any remaining whitespace
232+ _ <- manyP (\ x -> isSpace x && x /= ' \n ' && x /= ' \r ' )
233+ TDoubleQuote
234+ <$ chunk " \" "
235+ <|> TDoubleSingleQuote
236+ <$ chunk " ''"
170237
171238trivia :: Parser [ParseTrivium ]
172239trivia = many $ hidden $ lineComment <|> blockComment <|> newlines
@@ -188,7 +255,11 @@ lexeme p = do
188255 parsedTrivia <- trivia
189256 -- This is the position of the next lexeme after the currently parsed one
190257 SourcePos {sourceColumn = col} <- getSourcePos
191- let (trailing, nextLeading) = convertTrivia parsedTrivia col
258+
259+ -- Add lookahead for next token
260+ nextToken <- optional (try $ lookAhead $ preLexeme parseNextTokenType)
261+
262+ let (trailing, nextLeading) = convertTrivia parsedTrivia col nextToken
192263 pushTrivia nextLeading
193264 return $
194265 Ann
0 commit comments