NixOS · jfly · Oct 6, 2025 · Sep 30, 2025 · Sep 30, 2025 · Sep 30, 2025
diff --git a/src/Nixfmt/Lexer.hs b/src/Nixfmt/Lexer.hs
@@ -6,11 +6,13 @@
 module Nixfmt.Lexer (lexeme, pushTrivia, takeTrivia, whole) where
 
 import Control.Monad.State.Strict (MonadState, evalStateT, get, modify, put)
-import Data.Char (isSpace)
+import Data.Char (isAlphaNum, isSpace)
+import Data.Functor (($>))
 import Data.List (dropWhileEnd)
 import Data.Maybe (fromMaybe)
 import Data.Text as Text (
   Text,
+  all,
   isPrefixOf,
   length,
   lines,
@@ -43,6 +45,7 @@ import Text.Megaparsec (
   chunk,
   getSourcePos,
   hidden,
+  lookAhead,
   many,
   manyTill,
   notFollowedBy,
@@ -59,6 +62,8 @@ data ParseTrivium
     PTLineComment Text Pos
   | -- Track whether it is a doc comment
     PTBlockComment Bool [Text]
+  | -- | Language annotation like /* lua */ (single line, non-doc)
+    PTLanguageAnnotation Text
   deriving (Show)
 
 preLexeme :: Parser a -> Parser a
@@ -127,6 +132,30 @@ blockComment = try $ preLexeme $ do
     commonIndentationLength :: Int -> [Text] -> Int
     commonIndentationLength = foldr (min . Text.length . Text.takeWhile (== ' '))
 
+languageAnnotation :: Parser ParseTrivium
+languageAnnotation = try $ do
+  -- Parse a block comment and extract its content
+  PTBlockComment False [content] <- blockComment
+  isStringDelimiterNext <- lookAhead isNextStringDelimiter
+
+  if isStringDelimiterNext && isValidLanguageIdentifier content
+    then return (PTLanguageAnnotation (strip content))
+    else fail "Not a language annotation"
+  where
+    -- Check if a text is a valid language identifier for language annotations
+    isValidLanguageIdentifier txt =
+      let stripped = strip txt
+      in not (Text.null stripped)
+          && Text.length stripped <= 30
+          && Text.all (\c -> isAlphaNum c || c `elem` ['-', '+', '.', '_']) stripped
+
+    -- Parser to peek at the next token to see if it's a string delimiter (" or '')
+    isNextStringDelimiter = do
+      _ <- manyP isSpace -- Skip any remaining whitespace
+      (chunk "\"" $> True)
+        <|> (chunk "''" $> True)
+        <|> pure False
+
 -- This should be called with zero or one elements, as per `span isTrailing`
 convertTrailing :: [ParseTrivium] -> Maybe TrailingComment
 convertTrailing = toMaybe . join . map toText
@@ -148,6 +177,7 @@ convertLeading =
         PTBlockComment _ [] -> []
         PTBlockComment False [c] -> [LineComment $ " " <> strip c]
         PTBlockComment isDoc cs -> [BlockComment isDoc cs]
+        PTLanguageAnnotation c -> [LanguageAnnotation c]
     )
 
 isTrailing :: ParseTrivium -> Bool
@@ -169,7 +199,7 @@ convertTrivia pts nextCol =
       _ -> (convertTrailing trailing, convertLeading leading)
 
 trivia :: Parser [ParseTrivium]
-trivia = many $ hidden $ lineComment <|> blockComment <|> newlines
+trivia = many $ hidden $ languageAnnotation <|> lineComment <|> blockComment <|> newlines
 
 -- The following primitives to interact with the state monad that stores trivia
 -- are designed to prevent trivia from being dropped or duplicated by accident.

diff --git a/src/Nixfmt/Pretty.hs b/src/Nixfmt/Pretty.hs
@@ -86,6 +86,7 @@ instance Pretty TrailingComment where
 instance Pretty Trivium where
   pretty EmptyLine = emptyline
   pretty (LineComment c) = comment ("#" <> c) <> hardline
+  pretty (LanguageAnnotation lang) = comment ("/* " <> lang <> " */") <> hardspace
   pretty (BlockComment isDoc c) =
     comment (if isDoc then "/**" else "/*")
       <> hardline
@@ -105,10 +106,23 @@ instance (Pretty a) => Pretty (Item a) where
 
 -- For lists, attribute sets and let bindings
 prettyItems :: (Pretty a) => Items a -> Doc
-prettyItems (Items items) = sepBy hardline items
+prettyItems (Items items) = go items
+  where
+    go [] = mempty
+    go [item] = pretty item
+    -- Special case: language annotation comment followed by string item
+    go (Comments [LanguageAnnotation lang] : Item stringItem : rest) =
+      pretty (LanguageAnnotation lang)
+        <> hardspace
+        <> group stringItem
+        <> if null rest then mempty else hardline <> go rest
+    go (item : rest) =
+      pretty item <> if null rest then mempty else hardline <> go rest
 
 instance Pretty [Trivium] where
   pretty [] = mempty
+  -- Special case: if trivia consists only of a single language annotation, render it inline without a preceding hardline
+  pretty [langAnnotation@(LanguageAnnotation _)] = pretty langAnnotation
   pretty trivia = hardline <> hcat trivia
 
 instance (Pretty a) => Pretty (Ann a) where

diff --git a/src/Nixfmt/Types.hs b/src/Nixfmt/Types.hs
@@ -72,6 +72,8 @@ data Trivium
   | -- Multi-line comments with /* or /**. Multiple # comments are treated as a list of `LineComment`.
     -- The bool indicates a doc comment (/**)
     BlockComment Bool [Text]
+  | -- | Language annotation comments like /* lua */ that should remain as block comments before strings
+    LanguageAnnotation Text
   deriving (Eq, Show)
 
 type Trivia = [Trivium]

diff --git a/standard.md b/standard.md
@@ -395,7 +395,11 @@ throw ''Some very long error messages containing ${
   - Specifically, the expression that the comment is attached to must be maintained by the formatter, as well as the resulting doc string.
 - Empty comments may be deleted.
   - Often their only purpose is to vertically align lines, which is not allowed.
-- Single-line `/*` comments must be converted to `#` comments.
+- Single-line `/*` comments must be converted to `#` comments, except for language annotations.
+- Language annotation comments that directly precede string literals must be preserved as block comments.
+  - A language annotation is a single-line block comment containing only a valid language identifier (alphanumeric characters, plus `-`, `+`, `.`, `_`).
+  - Language annotations must be immediately followed by a string literal (`"..."` or `''...''`) to be preserved as block comments.
+  - Language annotations not followed by strings are converted to line comments like other single-line block comments.
 - Single-line comments may be moved up or down a line to improve the layout.
 - Anything after the first `#` of single-line comments must be preserved.
   - This allows the common pattern of prefixing many lines with `#` to comment them out, without the formatter trying to change anything.
@@ -421,6 +425,27 @@ Note that these examples show *allowed* transformations, which may or may not be
 ↓
 # bar
 
+/* Language annotations are preserved when followed by strings */
+/* bash */ ''
+  echo "Hello, world!"
+''
+↓
+/* bash */ ''
+  echo "Hello, world!"
+''
+
+/* python */ "print('Hello')"
+↓
+/* python */ "print('Hello')"
+
+/* Language annotations not followed by strings are converted */
+/* bash */ { key = "value"; }
+↓
+# bash
+{
+  key = "value";
+}
+
 function call ( # trailing comment
   body
 )

diff --git a/test/diff/language-annotation/in.nix b/test/diff/language-annotation/in.nix
@@ -0,0 +1,136 @@
+{
+  # Basic language annotation cases
+  luaScript = /* lua */ ''
+    print("Hello, world!")
+    local x = 42
+  '';
+
+  # With extra whitespace
+  jsCode = /*   javascript   */ ''
+    console.log("Hello from JS");
+    const x = 42;
+  '';
+
+  # missing whitespace after comment
+  noSpace = /*python*/''
+    print("No space after comment")
+  '';
+
+  # Language annotation with indented multiline string
+  indentedCode = {
+    script = /* python */ ''
+      import os
+      def main():
+          print("Indented Python")
+    '';
+  };
+
+  # Language annotation followed by regular string
+  regularString = /* json */ "{ \"key\": \"value\" }";
+  multilineRegularString = /* js */ "
+    console.log('Hello, world!');
+  ";
+
+  # Multiple block comments in sequence
+  sequentialComments = /* first */ /* second */ ''
+    some content
+  '';
+
+  # Block comment with line breaks
+  multilineBlockComment = /* this is a
+                             multiline comment */ ''
+    content
+  '';
+
+  # Mixed comment styles
+  mixedComments = /* inline */ # line comment
+    ''
+      content
+    '';
+
+  # Language annotation in function arguments
+  processCode = builtins.readFile (/* lua */ ''
+    return "Hello"
+  '');
+  # without parentheses
+  processCode2 = builtins.readFile /* lua */ ''
+    return "Hello"
+  '';
+
+  # Language annotation in list
+  scripts = [
+    /* bash */ ''
+      echo "Script 1"
+    ''
+    /* python */ ''
+      print("Script 2")
+    ''
+    /* ruby */ "puts 'Script 3'"
+    /* js */
+    "console.log('Script 4');"
+  ];
+
+  aboveString = 
+    /* bash */
+    "echo 'Above string'";
+
+  # Language annotation in attribute set
+  languages = {
+    lua = /* lua */ ''
+      print("Lua")
+    '';
+    python = /* python */ ''
+      print("Python")
+    '';
+  };
+
+  # Edge case: empty language annotation
+  emptyAnnotation = /**/ ''
+    content without annotation
+  '';
+
+  # Edge case: language annotation with special characters
+  specialChars = /* c++ */ ''
+    #include <iostream>
+    int main() { return 0; }
+  '';
+  withDot = /* .ts */ ''
+    let x: number = 42;
+  '';
+
+  # Edge case: very long language annotation
+  longAnnotation = /* this-is-a-very-long-language-annotation-that-might-affect-line-length */ ''
+    content
+  '';
+
+  # Language annotation not followed by string
+  object = /* json */ { key = "value"; };
+  fn = /* foo */ x: x + 1;
+  fnCall =  /* foo */ fnName "bar";
+
+
+  # Language annotation with interpolated expressions
+  interpolatedExpr = /* bash */ ''
+    ${/* inline-comment */ "echo hello"}
+  '';
+
+  # Language annotation in let expression
+  letExpr = let
+    code = /* python */ ''
+      print("In let")
+    '';
+  in code;
+
+  # Language annotation in function definition
+  mkScript = lang: content: /* lang */ ''
+    ${content}
+  '';
+
+  # on in block
+  expr =
+    let
+    in
+    /* bash */ ''
+      echo "Hello"
+    '';
+}