diff --git a/src/Nixfmt/Lexer.hs b/src/Nixfmt/Lexer.hs index b34edecf..aed9f892 100644 --- a/src/Nixfmt/Lexer.hs +++ b/src/Nixfmt/Lexer.hs @@ -6,11 +6,13 @@ module Nixfmt.Lexer (lexeme, pushTrivia, takeTrivia, whole) where import Control.Monad.State.Strict (MonadState, evalStateT, get, modify, put) -import Data.Char (isSpace) +import Data.Char (isAlphaNum, isSpace) +import Data.Functor (($>)) import Data.List (dropWhileEnd) import Data.Maybe (fromMaybe) import Data.Text as Text ( Text, + all, isPrefixOf, length, lines, @@ -43,6 +45,7 @@ import Text.Megaparsec ( chunk, getSourcePos, hidden, + lookAhead, many, manyTill, notFollowedBy, @@ -59,6 +62,8 @@ data ParseTrivium PTLineComment Text Pos | -- Track whether it is a doc comment PTBlockComment Bool [Text] + | -- | Language annotation like /* lua */ (single line, non-doc) + PTLanguageAnnotation Text deriving (Show) preLexeme :: Parser a -> Parser a @@ -127,6 +132,30 @@ blockComment = try $ preLexeme $ do commonIndentationLength :: Int -> [Text] -> Int commonIndentationLength = foldr (min . Text.length . Text.takeWhile (== ' ')) +languageAnnotation :: Parser ParseTrivium +languageAnnotation = try $ do + -- Parse a block comment and extract its content + PTBlockComment False [content] <- blockComment + isStringDelimiterNext <- lookAhead isNextStringDelimiter + + if isStringDelimiterNext && isValidLanguageIdentifier content + then return (PTLanguageAnnotation (strip content)) + else fail "Not a language annotation" + where + -- Check if a text is a valid language identifier for language annotations + isValidLanguageIdentifier txt = + let stripped = strip txt + in not (Text.null stripped) + && Text.length stripped <= 30 + && Text.all (\c -> isAlphaNum c || c `elem` ['-', '+', '.', '_']) stripped + + -- Parser to peek at the next token to see if it's a string delimiter (" or '') + isNextStringDelimiter = do + _ <- manyP isSpace -- Skip any remaining whitespace + (chunk "\"" $> True) + <|> (chunk "''" $> True) + <|> pure False + -- This should be called with zero or one elements, as per `span isTrailing` convertTrailing :: [ParseTrivium] -> Maybe TrailingComment convertTrailing = toMaybe . join . map toText @@ -148,6 +177,7 @@ convertLeading = PTBlockComment _ [] -> [] PTBlockComment False [c] -> [LineComment $ " " <> strip c] PTBlockComment isDoc cs -> [BlockComment isDoc cs] + PTLanguageAnnotation c -> [LanguageAnnotation c] ) isTrailing :: ParseTrivium -> Bool @@ -169,7 +199,7 @@ convertTrivia pts nextCol = _ -> (convertTrailing trailing, convertLeading leading) trivia :: Parser [ParseTrivium] -trivia = many $ hidden $ lineComment <|> blockComment <|> newlines +trivia = many $ hidden $ languageAnnotation <|> lineComment <|> blockComment <|> newlines -- The following primitives to interact with the state monad that stores trivia -- are designed to prevent trivia from being dropped or duplicated by accident. diff --git a/src/Nixfmt/Pretty.hs b/src/Nixfmt/Pretty.hs index 8b09a99d..96fd3c7e 100644 --- a/src/Nixfmt/Pretty.hs +++ b/src/Nixfmt/Pretty.hs @@ -86,6 +86,7 @@ instance Pretty TrailingComment where instance Pretty Trivium where pretty EmptyLine = emptyline pretty (LineComment c) = comment ("#" <> c) <> hardline + pretty (LanguageAnnotation lang) = comment ("/* " <> lang <> " */") <> hardspace pretty (BlockComment isDoc c) = comment (if isDoc then "/**" else "/*") <> hardline @@ -105,10 +106,23 @@ instance (Pretty a) => Pretty (Item a) where -- For lists, attribute sets and let bindings prettyItems :: (Pretty a) => Items a -> Doc -prettyItems (Items items) = sepBy hardline items +prettyItems (Items items) = go items + where + go [] = mempty + go [item] = pretty item + -- Special case: language annotation comment followed by string item + go (Comments [LanguageAnnotation lang] : Item stringItem : rest) = + pretty (LanguageAnnotation lang) + <> hardspace + <> group stringItem + <> if null rest then mempty else hardline <> go rest + go (item : rest) = + pretty item <> if null rest then mempty else hardline <> go rest instance Pretty [Trivium] where pretty [] = mempty + -- Special case: if trivia consists only of a single language annotation, render it inline without a preceding hardline + pretty [langAnnotation@(LanguageAnnotation _)] = pretty langAnnotation pretty trivia = hardline <> hcat trivia instance (Pretty a) => Pretty (Ann a) where diff --git a/src/Nixfmt/Types.hs b/src/Nixfmt/Types.hs index 9f7d8d1d..daec75b1 100644 --- a/src/Nixfmt/Types.hs +++ b/src/Nixfmt/Types.hs @@ -72,6 +72,8 @@ data Trivium | -- Multi-line comments with /* or /**. Multiple # comments are treated as a list of `LineComment`. -- The bool indicates a doc comment (/**) BlockComment Bool [Text] + | -- | Language annotation comments like /* lua */ that should remain as block comments before strings + LanguageAnnotation Text deriving (Eq, Show) type Trivia = [Trivium] diff --git a/standard.md b/standard.md index b503b95d..f866431d 100644 --- a/standard.md +++ b/standard.md @@ -395,7 +395,11 @@ throw ''Some very long error messages containing ${ - Specifically, the expression that the comment is attached to must be maintained by the formatter, as well as the resulting doc string. - Empty comments may be deleted. - Often their only purpose is to vertically align lines, which is not allowed. -- Single-line `/*` comments must be converted to `#` comments. +- Single-line `/*` comments must be converted to `#` comments, except for language annotations. +- Language annotation comments that directly precede string literals must be preserved as block comments. + - A language annotation is a single-line block comment containing only a valid language identifier (alphanumeric characters, plus `-`, `+`, `.`, `_`). + - Language annotations must be immediately followed by a string literal (`"..."` or `''...''`) to be preserved as block comments. + - Language annotations not followed by strings are converted to line comments like other single-line block comments. - Single-line comments may be moved up or down a line to improve the layout. - Anything after the first `#` of single-line comments must be preserved. - This allows the common pattern of prefixing many lines with `#` to comment them out, without the formatter trying to change anything. @@ -421,6 +425,27 @@ Note that these examples show *allowed* transformations, which may or may not be ↓ # bar +/* Language annotations are preserved when followed by strings */ +/* bash */ '' + echo "Hello, world!" +'' +↓ +/* bash */ '' + echo "Hello, world!" +'' + +/* python */ "print('Hello')" +↓ +/* python */ "print('Hello')" + +/* Language annotations not followed by strings are converted */ +/* bash */ { key = "value"; } +↓ +# bash +{ + key = "value"; +} + function call ( # trailing comment body ) diff --git a/test/diff/language-annotation/in.nix b/test/diff/language-annotation/in.nix new file mode 100644 index 00000000..ddd6b691 --- /dev/null +++ b/test/diff/language-annotation/in.nix @@ -0,0 +1,136 @@ +{ + # Basic language annotation cases + luaScript = /* lua */ '' + print("Hello, world!") + local x = 42 + ''; + + # With extra whitespace + jsCode = /* javascript */ '' + console.log("Hello from JS"); + const x = 42; + ''; + + # missing whitespace after comment + noSpace = /*python*/'' + print("No space after comment") + ''; + + # Language annotation with indented multiline string + indentedCode = { + script = /* python */ '' + import os + def main(): + print("Indented Python") + ''; + }; + + # Language annotation followed by regular string + regularString = /* json */ "{ \"key\": \"value\" }"; + multilineRegularString = /* js */ " + console.log('Hello, world!'); + "; + + # Multiple block comments in sequence + sequentialComments = /* first */ /* second */ '' + some content + ''; + + # Block comment with line breaks + multilineBlockComment = /* this is a + multiline comment */ '' + content + ''; + + # Mixed comment styles + mixedComments = /* inline */ # line comment + '' + content + ''; + + # Language annotation in function arguments + processCode = builtins.readFile (/* lua */ '' + return "Hello" + ''); + # without parentheses + processCode2 = builtins.readFile /* lua */ '' + return "Hello" + ''; + + # Language annotation in list + scripts = [ + /* bash */ '' + echo "Script 1" + '' + /* python */ '' + print("Script 2") + '' + /* ruby */ "puts 'Script 3'" + /* js */ + "console.log('Script 4');" + ]; + + aboveString = + /* bash */ + "echo 'Above string'"; + + # Language annotation in attribute set + languages = { + lua = /* lua */ '' + print("Lua") + ''; + python = /* python */ '' + print("Python") + ''; + }; + + # Edge case: empty language annotation + emptyAnnotation = /**/ '' + content without annotation + ''; + + # Edge case: language annotation with special characters + specialChars = /* c++ */ '' + #include + int main() { return 0; } + ''; + withDot = /* .ts */ '' + let x: number = 42; + ''; + + # Edge case: very long language annotation + longAnnotation = /* this-is-a-very-long-language-annotation-that-might-affect-line-length */ '' + content + ''; + + # Language annotation not followed by string + object = /* json */ { key = "value"; }; + fn = /* foo */ x: x + 1; + fnCall = /* foo */ fnName "bar"; + + + # Language annotation with interpolated expressions + interpolatedExpr = /* bash */ '' + ${/* inline-comment */ "echo hello"} + ''; + + # Language annotation in let expression + letExpr = let + code = /* python */ '' + print("In let") + ''; + in code; + + # Language annotation in function definition + mkScript = lang: content: /* lang */ '' + ${content} + ''; + + # on in block + expr = + let + in + /* bash */ '' + echo "Hello" + ''; +} diff --git a/test/diff/language-annotation/out-pure.nix b/test/diff/language-annotation/out-pure.nix new file mode 100644 index 00000000..7ee5549d --- /dev/null +++ b/test/diff/language-annotation/out-pure.nix @@ -0,0 +1,145 @@ +{ + # Basic language annotation cases + luaScript = /* lua */ '' + print("Hello, world!") + local x = 42 + ''; + + # With extra whitespace + jsCode = /* javascript */ '' + console.log("Hello from JS"); + const x = 42; + ''; + + # missing whitespace after comment + noSpace = /* python */ '' + print("No space after comment") + ''; + + # Language annotation with indented multiline string + indentedCode = { + script = /* python */ '' + import os + def main(): + print("Indented Python") + ''; + }; + + # Language annotation followed by regular string + regularString = /* json */ "{ \"key\": \"value\" }"; + multilineRegularString = /* js */ " + console.log('Hello, world!'); + "; + + # Multiple block comments in sequence + sequentialComments = # first + /* second */ '' + some content + ''; + + # Block comment with line breaks + multilineBlockComment = + /* + this is a + multiline comment + */ + '' + content + ''; + + # Mixed comment styles + mixedComments = # inline line comment + '' + content + ''; + + # Language annotation in function arguments + processCode = builtins.readFile (/* lua */ '' + return "Hello" + ''); + # without parentheses + processCode2 = builtins.readFile /* lua */ '' + return "Hello" + ''; + + # Language annotation in list + scripts = [ + /* bash */ '' + echo "Script 1" + '' + /* python */ '' + print("Script 2") + '' + /* ruby */ "puts 'Script 3'" + /* js */ "console.log('Script 4');" + ]; + + aboveString = /* bash */ "echo 'Above string'"; + + # Language annotation in attribute set + languages = { + lua = /* lua */ '' + print("Lua") + ''; + python = /* python */ '' + print("Python") + ''; + }; + + # Edge case: empty language annotation + emptyAnnotation = '' + content without annotation + ''; + + # Edge case: language annotation with special characters + specialChars = /* c++ */ '' + #include + int main() { return 0; } + ''; + withDot = /* .ts */ '' + let x: number = 42; + ''; + + # Edge case: very long language annotation + longAnnotation = # this-is-a-very-long-language-annotation-that-might-affect-line-length + '' + content + ''; + + # Language annotation not followed by string + object = # json + { + key = "value"; + }; + fn = # foo + x: x + 1; + fnCall = # foo + fnName "bar"; + + # Language annotation with interpolated expressions + interpolatedExpr = /* bash */ '' + ${/* inline-comment */ "echo hello"} + ''; + + # Language annotation in let expression + letExpr = + let + code = /* python */ '' + print("In let") + ''; + in + code; + + # Language annotation in function definition + mkScript = lang: content: /* lang */ '' + ${content} + ''; + + # on in block + expr = + let + in + /* bash */ '' + echo "Hello" + ''; +} diff --git a/test/diff/language-annotation/out.nix b/test/diff/language-annotation/out.nix new file mode 100644 index 00000000..7ee5549d --- /dev/null +++ b/test/diff/language-annotation/out.nix @@ -0,0 +1,145 @@ +{ + # Basic language annotation cases + luaScript = /* lua */ '' + print("Hello, world!") + local x = 42 + ''; + + # With extra whitespace + jsCode = /* javascript */ '' + console.log("Hello from JS"); + const x = 42; + ''; + + # missing whitespace after comment + noSpace = /* python */ '' + print("No space after comment") + ''; + + # Language annotation with indented multiline string + indentedCode = { + script = /* python */ '' + import os + def main(): + print("Indented Python") + ''; + }; + + # Language annotation followed by regular string + regularString = /* json */ "{ \"key\": \"value\" }"; + multilineRegularString = /* js */ " + console.log('Hello, world!'); + "; + + # Multiple block comments in sequence + sequentialComments = # first + /* second */ '' + some content + ''; + + # Block comment with line breaks + multilineBlockComment = + /* + this is a + multiline comment + */ + '' + content + ''; + + # Mixed comment styles + mixedComments = # inline line comment + '' + content + ''; + + # Language annotation in function arguments + processCode = builtins.readFile (/* lua */ '' + return "Hello" + ''); + # without parentheses + processCode2 = builtins.readFile /* lua */ '' + return "Hello" + ''; + + # Language annotation in list + scripts = [ + /* bash */ '' + echo "Script 1" + '' + /* python */ '' + print("Script 2") + '' + /* ruby */ "puts 'Script 3'" + /* js */ "console.log('Script 4');" + ]; + + aboveString = /* bash */ "echo 'Above string'"; + + # Language annotation in attribute set + languages = { + lua = /* lua */ '' + print("Lua") + ''; + python = /* python */ '' + print("Python") + ''; + }; + + # Edge case: empty language annotation + emptyAnnotation = '' + content without annotation + ''; + + # Edge case: language annotation with special characters + specialChars = /* c++ */ '' + #include + int main() { return 0; } + ''; + withDot = /* .ts */ '' + let x: number = 42; + ''; + + # Edge case: very long language annotation + longAnnotation = # this-is-a-very-long-language-annotation-that-might-affect-line-length + '' + content + ''; + + # Language annotation not followed by string + object = # json + { + key = "value"; + }; + fn = # foo + x: x + 1; + fnCall = # foo + fnName "bar"; + + # Language annotation with interpolated expressions + interpolatedExpr = /* bash */ '' + ${/* inline-comment */ "echo hello"} + ''; + + # Language annotation in let expression + letExpr = + let + code = /* python */ '' + print("In let") + ''; + in + code; + + # Language annotation in function definition + mkScript = lang: content: /* lang */ '' + ${content} + ''; + + # on in block + expr = + let + in + /* bash */ '' + echo "Hello" + ''; +} diff --git a/test/diff/monsters_4/out-pure.nix b/test/diff/monsters_4/out-pure.nix index ac311ac3..ab027715 100644 --- a/test/diff/monsters_4/out-pure.nix +++ b/test/diff/monsters_4/out-pure.nix @@ -37,34 +37,27 @@ stdenv.mkDerivation # Foo { # Foo pname # Foo - = # Foo - "contrast"; # Foo + = /* Foo */ "contrast"; # Foo version # Foo - = # Foo - "0.0.5"; # Foo + = /* Foo */ "0.0.5"; # Foo src # Foo = # Foo # Foo fetchFromGitLab { # Foo domain # Foo - = # Foo - "gitlab.gnome.org"; # Foo + = /* Foo */ "gitlab.gnome.org"; # Foo group # Foo - = # Foo - "World"; # Foo + = /* Foo */ "World"; # Foo owner # Foo - = # Foo - "design"; # Foo + = /* Foo */ "design"; # Foo repo # Foo - = # Foo - "contrast"; # Foo + = /* Foo */ "contrast"; # Foo rev # Foo = # Foo version; # Foo sha256 # Foo - = # Foo - "cypSbqLwSmauOoWOuppWpF3hvrxiqmkLspxAWzvlUC0="; # Foo + = /* Foo */ "cypSbqLwSmauOoWOuppWpF3hvrxiqmkLspxAWzvlUC0="; # Foo }; # Foo cargoDeps # Foo = # Foo @@ -75,11 +68,9 @@ stdenv.mkDerivation # Foo src ; # Foo name # Foo - = # Foo - "${pname}-${version}"; # Foo + = /* Foo */ "${pname}-${version}"; # Foo hash # Foo - = # Foo - "sha256-W4FyqwJpimf0isQRCq9TegpTQPQfsumx40AFQCFG5VQ="; # Foo + = /* Foo */ "sha256-W4FyqwJpimf0isQRCq9TegpTQPQfsumx40AFQCFG5VQ="; # Foo }; # Foo nativeBuildInputs # Foo = # Foo @@ -109,13 +100,12 @@ stdenv.mkDerivation # Foo pango # Foo ]; # Foo postPatch # Foo - = # Foo - '' - patchShebangs build-aux/meson_post_install.py - # https://gitlab.gnome.org/World/design/contrast/-/merge_requests/23 - substituteInPlace build-aux/meson_post_install.py \ - --replace "gtk-update-icon-cache" "gtk4-update-icon-cache" - ''; # Foo + = /* Foo */ '' + patchShebangs build-aux/meson_post_install.py + # https://gitlab.gnome.org/World/design/contrast/-/merge_requests/23 + substituteInPlace build-aux/meson_post_install.py \ + --replace "gtk-update-icon-cache" "gtk4-update-icon-cache" + ''; # Foo meta # Foo = # Foo with # Foo @@ -123,11 +113,9 @@ stdenv.mkDerivation # Foo { # Foo description # Foo - = # Foo - "Checks whether the contrast between two colors meet the WCAG requirements"; # Foo + = /* Foo */ "Checks whether the contrast between two colors meet the WCAG requirements"; # Foo homepage # Foo - = # Foo - "https://gitlab.gnome.org/World/design/contrast"; # Foo + = /* Foo */ "https://gitlab.gnome.org/World/design/contrast"; # Foo license # Foo = # Foo licenses.gpl3Plus; # Foo diff --git a/test/diff/monsters_4/out.nix b/test/diff/monsters_4/out.nix index ac311ac3..ab027715 100644 --- a/test/diff/monsters_4/out.nix +++ b/test/diff/monsters_4/out.nix @@ -37,34 +37,27 @@ stdenv.mkDerivation # Foo { # Foo pname # Foo - = # Foo - "contrast"; # Foo + = /* Foo */ "contrast"; # Foo version # Foo - = # Foo - "0.0.5"; # Foo + = /* Foo */ "0.0.5"; # Foo src # Foo = # Foo # Foo fetchFromGitLab { # Foo domain # Foo - = # Foo - "gitlab.gnome.org"; # Foo + = /* Foo */ "gitlab.gnome.org"; # Foo group # Foo - = # Foo - "World"; # Foo + = /* Foo */ "World"; # Foo owner # Foo - = # Foo - "design"; # Foo + = /* Foo */ "design"; # Foo repo # Foo - = # Foo - "contrast"; # Foo + = /* Foo */ "contrast"; # Foo rev # Foo = # Foo version; # Foo sha256 # Foo - = # Foo - "cypSbqLwSmauOoWOuppWpF3hvrxiqmkLspxAWzvlUC0="; # Foo + = /* Foo */ "cypSbqLwSmauOoWOuppWpF3hvrxiqmkLspxAWzvlUC0="; # Foo }; # Foo cargoDeps # Foo = # Foo @@ -75,11 +68,9 @@ stdenv.mkDerivation # Foo src ; # Foo name # Foo - = # Foo - "${pname}-${version}"; # Foo + = /* Foo */ "${pname}-${version}"; # Foo hash # Foo - = # Foo - "sha256-W4FyqwJpimf0isQRCq9TegpTQPQfsumx40AFQCFG5VQ="; # Foo + = /* Foo */ "sha256-W4FyqwJpimf0isQRCq9TegpTQPQfsumx40AFQCFG5VQ="; # Foo }; # Foo nativeBuildInputs # Foo = # Foo @@ -109,13 +100,12 @@ stdenv.mkDerivation # Foo pango # Foo ]; # Foo postPatch # Foo - = # Foo - '' - patchShebangs build-aux/meson_post_install.py - # https://gitlab.gnome.org/World/design/contrast/-/merge_requests/23 - substituteInPlace build-aux/meson_post_install.py \ - --replace "gtk-update-icon-cache" "gtk4-update-icon-cache" - ''; # Foo + = /* Foo */ '' + patchShebangs build-aux/meson_post_install.py + # https://gitlab.gnome.org/World/design/contrast/-/merge_requests/23 + substituteInPlace build-aux/meson_post_install.py \ + --replace "gtk-update-icon-cache" "gtk4-update-icon-cache" + ''; # Foo meta # Foo = # Foo with # Foo @@ -123,11 +113,9 @@ stdenv.mkDerivation # Foo { # Foo description # Foo - = # Foo - "Checks whether the contrast between two colors meet the WCAG requirements"; # Foo + = /* Foo */ "Checks whether the contrast between two colors meet the WCAG requirements"; # Foo homepage # Foo - = # Foo - "https://gitlab.gnome.org/World/design/contrast"; # Foo + = /* Foo */ "https://gitlab.gnome.org/World/design/contrast"; # Foo license # Foo = # Foo licenses.gpl3Plus; # Foo diff --git a/test/diff/string_interpol/out-pure.nix b/test/diff/string_interpol/out-pure.nix index f195f6ef..e92a237d 100644 --- a/test/diff/string_interpol/out-pure.nix +++ b/test/diff/string_interpol/out-pure.nix @@ -1,17 +1,9 @@ [ "${ - # a - "${ - # b - "${c}" - }" # d + /* a */ "${/* b */ "${c}"}" # d }" ''${ - # a - ''${ - # b - ''${c}'' - }'' # d + /* a */ ''${/* b */ ''${c}''}'' # d }'' { ExecStart = "${pkgs.openarena}/bin/oa_ded +set fs_basepath ${pkgs.openarena}/openarena-0.8.8 +set fs_homepath /var/lib/openarena ${ diff --git a/test/diff/string_interpol/out.nix b/test/diff/string_interpol/out.nix index f195f6ef..e92a237d 100644 --- a/test/diff/string_interpol/out.nix +++ b/test/diff/string_interpol/out.nix @@ -1,17 +1,9 @@ [ "${ - # a - "${ - # b - "${c}" - }" # d + /* a */ "${/* b */ "${c}"}" # d }" ''${ - # a - ''${ - # b - ''${c}'' - }'' # d + /* a */ ''${/* b */ ''${c}''}'' # d }'' { ExecStart = "${pkgs.openarena}/bin/oa_ded +set fs_basepath ${pkgs.openarena}/openarena-0.8.8 +set fs_homepath /var/lib/openarena ${