From 0a9f4c5df790e4dd436bd1f956dd7831faae6b36 Mon Sep 17 00:00:00 2001 From: Taimoor Zaeem Date: Thu, 9 Oct 2025 10:48:01 +0500 Subject: [PATCH] fix: loading utf-8 file when locale encoding is set to ascii Configurator-pg fails to load UTF-8 encoded files when the locale settings are set to ASCII chars. This commit fixes this by using a safer and faster UTF-8 decoding when reading files. See: https://github.com/PostgREST/postgrest/issues/4386#issuecomment-3377862644 for more info on this. Signed-off-by: Taimoor Zaeem --- CHANGELOG.md | 4 ++++ configurator-pg.cabal | 1 + src/Data/Configurator/Load.hs | 4 +++- tests/Test.hs | 15 +++++++++++++++ tests/resources/err-import.cfg.err | 2 +- tests/resources/err-import.cfg.err.ghc8 | 1 + tests/resources/utf-8.cfg | 2 ++ 7 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 tests/resources/err-import.cfg.err.ghc8 create mode 100644 tests/resources/utf-8.cfg diff --git a/CHANGELOG.md b/CHANGELOG.md index d282ddb..d1ff44e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Revision history for configurator-pg +# Unreleased + +* Fix loading `utf-8` file when locale encoding is set to `ASCII` + ## 0.2.10 -- 2024-03-06 * Allow megaparsec-9.6 diff --git a/configurator-pg.cabal b/configurator-pg.cabal index 17e6af2..4f8dc2d 100644 --- a/configurator-pg.cabal +++ b/configurator-pg.cabal @@ -34,6 +34,7 @@ library Data.Configurator.Syntax Data.Configurator.Types build-depends: base >= 4.9 && < 4.22 + , bytestring >= 0.10.8 && < 0.13 , megaparsec >= 7.0.0 && < 9.8 , containers >= 0.5.6.2 && < 0.8 , protolude >= 0.1.10 && < 0.4 diff --git a/src/Data/Configurator/Load.hs b/src/Data/Configurator/Load.hs index 11d0923..01b7e33 100644 --- a/src/Data/Configurator/Load.hs +++ b/src/Data/Configurator/Load.hs @@ -6,10 +6,12 @@ import Protolude import Control.Exception (throw) import Text.Megaparsec (parse, errorBundlePretty) +import qualified Data.ByteString as BS import qualified Data.Map.Strict as M import Data.Scientific (toBoundedInteger, toRealFloat) import qualified Data.Text as T +import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL import Data.Text.Lazy.Builder (fromString, fromText, @@ -31,7 +33,7 @@ load path = applyDirective "" "" M.empty (Import $ T.pack path) loadOne :: Path -> IO [Directive] loadOne path = do - s <- readFile (T.unpack path) + s <- T.decodeUtf8 <$> BS.readFile (T.unpack path) case parse topLevel (T.unpack path) s of Left err -> throw $ ParseError $ T.pack $ errorBundlePretty err Right directives -> return directives diff --git a/tests/Test.hs b/tests/Test.hs index 3a41ea3..45cc1b4 100644 --- a/tests/Test.hs +++ b/tests/Test.hs @@ -1,5 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE CPP #-} module Main where @@ -7,6 +8,8 @@ import Protolude hiding (bool, list, optional) import Data.Configurator import qualified Data.Text as T +import qualified GHC.IO.Encoding as E +import qualified GHC.IO.Encoding.Latin1 as E import System.Environment import System.FilePath import Test.Framework @@ -20,6 +23,7 @@ tests :: [Test] tests = [ testCase "read-simple" $ readTest "simple.cfg" , testCase "read-pathological" $ readTest "pathological.cfg" + , testCase "read-utf-8-with-ascii-locale" $ readTestWithLocale "utf-8.cfg" E.ascii , testCase "load" loadTest , testCase "load" loadTest , testCase "types" typesTest @@ -51,6 +55,9 @@ testFile name = "tests" "resources" name errorFile :: FilePath -> FilePath errorFile name = testFile name <> ".err" +errorFileGHC8 :: FilePath -> FilePath +errorFileGHC8 name = testFile name <> ".err.ghc8" + parse :: Config -> Parser Value a -> Key -> Either Text a parse cfg p key = runParser (required key p) cfg @@ -63,6 +70,10 @@ parseSub cfg p prefix = runParser (subassocs prefix p) cfg readTest :: FilePath -> Assertion readTest file = load (testFile file) >> return () +readTestWithLocale :: FilePath -> E.TextEncoding -> Assertion +readTestWithLocale file locale = + E.setLocaleEncoding locale >> load (testFile file) >> return () + loadTest :: Assertion loadTest = withLoad "pathological.cfg" $ \cfg -> do @@ -233,7 +244,11 @@ parseErrorTest file = do ioErrorTest :: FilePath -> Assertion ioErrorTest file = do +#if __GLASGOW_HASKELL__ >= 900 err <- readFile $ errorFile file +#else + err <- readFile $ errorFileGHC8 file +#endif (load (testFile file) >> assertFailure "expected an IO error") `catch` \ (ex :: IOException) -> do assertEqual "" err (show ex) diff --git a/tests/resources/err-import.cfg.err b/tests/resources/err-import.cfg.err index 1cd0b38..393118c 100644 --- a/tests/resources/err-import.cfg.err +++ b/tests/resources/err-import.cfg.err @@ -1 +1 @@ -tests/resources/not-exist.cfg: openFile: does not exist (No such file or directory) \ No newline at end of file +tests/resources/not-exist.cfg: withBinaryFile: does not exist (No such file or directory) \ No newline at end of file diff --git a/tests/resources/err-import.cfg.err.ghc8 b/tests/resources/err-import.cfg.err.ghc8 new file mode 100644 index 0000000..4efd75e --- /dev/null +++ b/tests/resources/err-import.cfg.err.ghc8 @@ -0,0 +1 @@ +tests/resources/not-exist.cfg: openBinaryFile: does not exist (No such file or directory) \ No newline at end of file diff --git a/tests/resources/utf-8.cfg b/tests/resources/utf-8.cfg new file mode 100644 index 0000000..d6a51e0 --- /dev/null +++ b/tests/resources/utf-8.cfg @@ -0,0 +1,2 @@ +# Commènt utf-8 chàrs +utf-kèy = "utf-8-vàlue"