Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions src/Nixfmt/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
module Nixfmt.Lexer (lexeme, pushTrivia, takeTrivia, whole) where

import Control.Monad.State.Strict (MonadState, evalStateT, get, modify, put)
import Data.Char (isSpace)
import Data.Char (isAlphaNum, isSpace)
import Data.Functor (($>))
import Data.List (dropWhileEnd)
import Data.Maybe (fromMaybe)
import Data.Text as Text (
Text,
all,
isPrefixOf,
length,
lines,
Expand Down Expand Up @@ -43,6 +45,7 @@ import Text.Megaparsec (
chunk,
getSourcePos,
hidden,
lookAhead,
many,
manyTill,
notFollowedBy,
Expand All @@ -59,6 +62,8 @@ data ParseTrivium
PTLineComment Text Pos
| -- Track whether it is a doc comment
PTBlockComment Bool [Text]
| -- | Language annotation like /* lua */ (single line, non-doc)
PTLanguageAnnotation Text
deriving (Show)

preLexeme :: Parser a -> Parser a
Expand Down Expand Up @@ -127,6 +132,30 @@ blockComment = try $ preLexeme $ do
commonIndentationLength :: Int -> [Text] -> Int
commonIndentationLength = foldr (min . Text.length . Text.takeWhile (== ' '))

languageAnnotation :: Parser ParseTrivium
languageAnnotation = try $ do
-- Parse a block comment and extract its content
PTBlockComment False [content] <- blockComment
isStringDelimiterNext <- lookAhead isNextStringDelimiter

if isStringDelimiterNext && isValidLanguageIdentifier content
then return (PTLanguageAnnotation (strip content))
else fail "Not a language annotation"
where
-- Check if a text is a valid language identifier for language annotations
isValidLanguageIdentifier txt =
let stripped = strip txt
in not (Text.null stripped)
&& Text.length stripped <= 30
&& Text.all (\c -> isAlphaNum c || c `elem` ['-', '+', '.', '_']) stripped

-- Parser to peek at the next token to see if it's a string delimiter (" or '')
isNextStringDelimiter = do
_ <- manyP isSpace -- Skip any remaining whitespace
(chunk "\"" $> True)
<|> (chunk "''" $> True)
<|> pure False

-- This should be called with zero or one elements, as per `span isTrailing`
convertTrailing :: [ParseTrivium] -> Maybe TrailingComment
convertTrailing = toMaybe . join . map toText
Expand All @@ -148,6 +177,7 @@ convertLeading =
PTBlockComment _ [] -> []
PTBlockComment False [c] -> [LineComment $ " " <> strip c]
PTBlockComment isDoc cs -> [BlockComment isDoc cs]
PTLanguageAnnotation c -> [LanguageAnnotation c]
)

isTrailing :: ParseTrivium -> Bool
Expand All @@ -169,7 +199,7 @@ convertTrivia pts nextCol =
_ -> (convertTrailing trailing, convertLeading leading)

trivia :: Parser [ParseTrivium]
trivia = many $ hidden $ lineComment <|> blockComment <|> newlines
trivia = many $ hidden $ languageAnnotation <|> lineComment <|> blockComment <|> newlines

-- The following primitives to interact with the state monad that stores trivia
-- are designed to prevent trivia from being dropped or duplicated by accident.
Expand Down
16 changes: 15 additions & 1 deletion src/Nixfmt/Pretty.hs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ instance Pretty TrailingComment where
instance Pretty Trivium where
pretty EmptyLine = emptyline
pretty (LineComment c) = comment ("#" <> c) <> hardline
pretty (LanguageAnnotation lang) = comment ("/* " <> lang <> " */") <> hardspace
pretty (BlockComment isDoc c) =
comment (if isDoc then "/**" else "/*")
<> hardline
Expand All @@ -105,10 +106,23 @@ instance (Pretty a) => Pretty (Item a) where

-- For lists, attribute sets and let bindings
prettyItems :: (Pretty a) => Items a -> Doc
prettyItems (Items items) = sepBy hardline items
prettyItems (Items items) = go items
where
go [] = mempty
go [item] = pretty item
-- Special case: language annotation comment followed by string item
go (Comments [LanguageAnnotation lang] : Item stringItem : rest) =
pretty (LanguageAnnotation lang)
<> hardspace
<> group stringItem
<> if null rest then mempty else hardline <> go rest
go (item : rest) =
pretty item <> if null rest then mempty else hardline <> go rest

instance Pretty [Trivium] where
pretty [] = mempty
-- Special case: if trivia consists only of a single language annotation, render it inline without a preceding hardline
pretty [langAnnotation@(LanguageAnnotation _)] = pretty langAnnotation
pretty trivia = hardline <> hcat trivia

instance (Pretty a) => Pretty (Ann a) where
Expand Down
2 changes: 2 additions & 0 deletions src/Nixfmt/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ data Trivium
| -- Multi-line comments with /* or /**. Multiple # comments are treated as a list of `LineComment`.
-- The bool indicates a doc comment (/**)
BlockComment Bool [Text]
| -- | Language annotation comments like /* lua */ that should remain as block comments before strings
LanguageAnnotation Text
deriving (Eq, Show)

type Trivia = [Trivium]
Expand Down
27 changes: 26 additions & 1 deletion standard.md
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,11 @@ throw ''Some very long error messages containing ${
- Specifically, the expression that the comment is attached to must be maintained by the formatter, as well as the resulting doc string.
- Empty comments may be deleted.
- Often their only purpose is to vertically align lines, which is not allowed.
- Single-line `/*` comments must be converted to `#` comments.
- Single-line `/*` comments must be converted to `#` comments, except for language annotations.
- Language annotation comments that directly precede string literals must be preserved as block comments.
- A language annotation is a single-line block comment containing only a valid language identifier (alphanumeric characters, plus `-`, `+`, `.`, `_`).
- Language annotations must be immediately followed by a string literal (`"..."` or `''...''`) to be preserved as block comments.
- Language annotations not followed by strings are converted to line comments like other single-line block comments.
- Single-line comments may be moved up or down a line to improve the layout.
- Anything after the first `#` of single-line comments must be preserved.
- This allows the common pattern of prefixing many lines with `#` to comment them out, without the formatter trying to change anything.
Expand All @@ -421,6 +425,27 @@ Note that these examples show *allowed* transformations, which may or may not be
# bar
/* Language annotations are preserved when followed by strings */
/* bash */ ''
echo "Hello, world!"
''
/* bash */ ''
echo "Hello, world!"
''
/* python */ "print('Hello')"
/* python */ "print('Hello')"
/* Language annotations not followed by strings are converted */
/* bash */ { key = "value"; }
# bash
{
key = "value";
}
function call ( # trailing comment
body
)
Expand Down
136 changes: 136 additions & 0 deletions test/diff/language-annotation/in.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
# Basic language annotation cases
luaScript = /* lua */ ''
print("Hello, world!")
local x = 42
'';

# With extra whitespace
jsCode = /* javascript */ ''
console.log("Hello from JS");
const x = 42;
'';

# missing whitespace after comment
noSpace = /*python*/''
print("No space after comment")
'';

# Language annotation with indented multiline string
indentedCode = {
script = /* python */ ''
import os
def main():
print("Indented Python")
'';
};

# Language annotation followed by regular string
regularString = /* json */ "{ \"key\": \"value\" }";
multilineRegularString = /* js */ "
console.log('Hello, world!');
";

# Multiple block comments in sequence
sequentialComments = /* first */ /* second */ ''
some content
'';

# Block comment with line breaks
multilineBlockComment = /* this is a
multiline comment */ ''
content
'';

# Mixed comment styles
mixedComments = /* inline */ # line comment
''
content
'';

# Language annotation in function arguments
processCode = builtins.readFile (/* lua */ ''
return "Hello"
'');
Comment on lines +51 to +54
Copy link
Contributor Author

@dyegoaurelio dyegoaurelio Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This input is not idempotent in master

Checking test/diff/language-annotation/in.nix …
<stdin>: Nixfmt is not idempotent. This is a bug in nixfmt. Please report it at https://github.com/NixOS/nixfmt

After one formatting:
builtins.readFile (
  # lua
  ''
    return "Hello"
  ''
)


After two:
builtins.readFile (
  # lua
  ''
    return "Hello"
  '') 

so we're also fixing this bug here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but also I can imagine inputs not covered by this PR which still exhibit this bug, so it might be worth tracking separately

# without parentheses
processCode2 = builtins.readFile /* lua */ ''
return "Hello"
'';

# Language annotation in list
scripts = [
/* bash */ ''
echo "Script 1"
''
/* python */ ''
print("Script 2")
''
/* ruby */ "puts 'Script 3'"
/* js */
"console.log('Script 4');"
];

aboveString =
/* bash */
"echo 'Above string'";

# Language annotation in attribute set
languages = {
lua = /* lua */ ''
print("Lua")
'';
python = /* python */ ''
print("Python")
'';
};

# Edge case: empty language annotation
emptyAnnotation = /**/ ''
content without annotation
'';

# Edge case: language annotation with special characters
specialChars = /* c++ */ ''
#include <iostream>
int main() { return 0; }
'';
withDot = /* .ts */ ''
let x: number = 42;
'';

# Edge case: very long language annotation
longAnnotation = /* this-is-a-very-long-language-annotation-that-might-affect-line-length */ ''
content
'';

# Language annotation not followed by string
object = /* json */ { key = "value"; };
fn = /* foo */ x: x + 1;
fnCall = /* foo */ fnName "bar";


# Language annotation with interpolated expressions
interpolatedExpr = /* bash */ ''
${/* inline-comment */ "echo hello"}
'';

# Language annotation in let expression
letExpr = let
code = /* python */ ''
print("In let")
'';
in code;

# Language annotation in function definition
mkScript = lang: content: /* lang */ ''
${content}
'';

# on in block
expr =
let
in
/* bash */ ''
echo "Hello"
'';
}
Loading