PostgREST · laurenceisla · Jan 17, 2025 · laurenceisla · Jan 17, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
  - #3727, Log maximum pool size - @steve-chavez
  - #1536, Add string comparison feature for jwt-role-claim-key - @taimoorzaeem
  - #3747, Allow `not_null` value for the `is` operator - @taimoorzaeem
+ - #2255, Apply `to_tsvector()` explicitly to the full-text search filtered column (excluding `tsvector` types) - @laurenceisla
 
 ### Fixed
 

diff --git a/docs/references/api/tables_views.rst b/docs/references/api/tables_views.rst
@@ -193,7 +193,7 @@ The :code:`fts` filter mentioned above has a number of options to support flexib
 
   curl "http://localhost:3000/tsearch?my_tsv=not.wfts(french).amusant"
 
-Using `websearch_to_tsquery` requires PostgreSQL of version at least 11.0 and will raise an error in earlier versions of the database.
+This also works with columns of text and JSON types, converting them to ``tsvector`` accordingly.
 
 .. _v_filter:
 

diff --git a/src/PostgREST/Plan.hs b/src/PostgREST/Plan.hs
@@ -265,27 +265,29 @@ data ResolverContext = ResolverContext
   , outputType      :: Text                 -- ^ The output type for the response payload; e.g. "csv", "json", "binary".
   }
 
-resolveColumnField :: Column -> CoercibleField
-resolveColumnField col = CoercibleField (colName col) mempty False (colNominalType col) Nothing (colDefault col) False
+resolveColumnField :: Column -> ToTsVector -> CoercibleField
+resolveColumnField col toTsV = CoercibleField (colName col) mempty False toTsV (colNominalType col) Nothing (colDefault col) False
 
-resolveTableFieldName :: Table -> FieldName -> CoercibleField
-resolveTableFieldName table fieldName =
+resolveTableFieldName :: Table -> FieldName -> ToTsVector -> CoercibleField
+resolveTableFieldName table fieldName toTsV=
   fromMaybe (unknownField fieldName []) $ HMI.lookup fieldName (tableColumns table) >>=
-    Just . resolveColumnField
+    Just . flip resolveColumnField toTsV
 
 -- | Resolve a type within the context based on the given field name and JSON path. Although there are situations where failure to resolve a field is considered an error (see `resolveOrError`), there are also situations where we allow it (RPC calls). If it should be an error and `resolveOrError` doesn't fit, ensure to check the `cfIRType` isn't empty.
-resolveTypeOrUnknown :: ResolverContext -> Field -> CoercibleField
-resolveTypeOrUnknown ResolverContext{..} (fn, jp) =
+resolveTypeOrUnknown :: ResolverContext -> Field -> ToTsVector -> CoercibleField
+resolveTypeOrUnknown ResolverContext{..} (fn, jp) toTsV =
   case res of
     -- types that are already json/jsonb don't need to be converted with `to_jsonb` for using arrow operators `data->attr`
     -- this prevents indexes not applying https://github.com/PostgREST/postgrest/issues/2594
-    cf@CoercibleField{cfIRType="json"}  -> cf{cfJsonPath=jp, cfToJson=False}
-    cf@CoercibleField{cfIRType="jsonb"} -> cf{cfJsonPath=jp, cfToJson=False}
+    cf@CoercibleField{cfIRType="json"}     -> cf{cfJsonPath=jp, cfToJson=False}
+    cf@CoercibleField{cfIRType="jsonb"}    -> cf{cfJsonPath=jp, cfToJson=False}
+    -- Do not apply to_tsvector to tsvector types
+    cf@CoercibleField{cfIRType="tsvector"} -> cf{cfJsonPath=jp, cfToJson=True, cfToTsVector=Nothing}
     -- other types will get converted `to_jsonb(col)->attr`, even unknown types
-    cf                                  -> cf{cfJsonPath=jp, cfToJson=True}
+    cf                                     -> cf{cfJsonPath=jp, cfToJson=True}
   where
     res = fromMaybe (unknownField fn jp) $ HM.lookup qi tables >>=
-          Just . flip resolveTableFieldName fn
+          Just . (\t -> resolveTableFieldName t fn toTsV)
 
 -- | Install any pre-defined data representation from source to target to coerce this reference.
 --
@@ -311,11 +313,15 @@ withJsonParse ctx field@CoercibleField{cfIRType} = withTransformer ctx "json" cf
 
 -- | Map the intermediate representation type to the output type defined by the resolver context (normally json), if available.
 resolveOutputField :: ResolverContext -> Field -> CoercibleField
-resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field
+resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field Nothing
 
 -- | Map the query string format of a value (text) into the intermediate representation type, if available.
-resolveQueryInputField :: ResolverContext -> Field -> CoercibleField
-resolveQueryInputField ctx field = withTextParse ctx $ resolveTypeOrUnknown ctx field
+resolveQueryInputField :: ResolverContext -> Field -> OpExpr -> CoercibleField
+resolveQueryInputField ctx field opExpr = withTextParse ctx $ resolveTypeOrUnknown ctx field toTsVector
+  where
+    toTsVector = case opExpr of
+      OpExpr _ (Fts _ lang _) -> Just lang
+      _                       -> Nothing
 
 -- | Builds the ReadPlan tree on a number of stages.
 -- | Adds filters, order, limits on its respective nodes.
@@ -452,7 +458,7 @@ expandStarsForTable ctx@ResolverContext{representations, outputType} hasAgg rp@R
 
     expandStarSelectField :: Bool -> [Column] -> CoercibleSelectField -> [CoercibleSelectField]
     expandStarSelectField _ columns sel@CoercibleSelectField{csField=CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Nothing} =
-      map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col }) columns
+      map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col Nothing }) columns
     expandStarSelectField True _ sel@CoercibleSelectField{csField=fld@CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Just Count} =
       [sel { csField = fld { cfFullRow = True } }]
     expandStarSelectField _ _ selectField = [selectField]
@@ -766,7 +772,7 @@ addOrders ctx ApiRequest{..} rReq =
 
 resolveOrder :: ResolverContext -> OrderTerm -> CoercibleOrderTerm
 resolveOrder _ (OrderRelationTerm a b c d) = CoercibleOrderRelationTerm a b c d
-resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld) dir nulls
+resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld Nothing) dir nulls
 
 -- Validates that the related resource on the order is an embedded resource,
 -- e.g. if `clients` is inside the `select` in /projects?order=clients(id)&select=*,clients(*),
@@ -831,7 +837,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
 --       where_ = [
 --         CoercibleStmnt (
 --           CoercibleFilter {
---            field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
+--            field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
 --            opExpr = op
 --           }
 --         )
@@ -847,7 +853,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
 -- Don't do anything to the filter if there's no embedding (a subtree) on projects. Assume it's a normal filter.
 --
 -- >>> ReadPlan.where_ . rootLabel <$> addNullEmbedFilters (readPlanTree nullOp [])
--- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
+-- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
 --
 -- If there's an embedding on projects, then change the filter to use the internal aggregate name (`clients_projects_1`) so the filter can succeed later.
 --
@@ -866,7 +872,7 @@ addNullEmbedFilters (Node rp@ReadPlan{where_=curLogic} forest) = do
     newNullFilters rPlans = \case
       (CoercibleExpr b lOp trees) ->
         CoercibleExpr b lOp <$> (newNullFilters rPlans `traverse` trees)
-      flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _) opExpr)) ->
+      flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _ _) opExpr)) ->
         let foundRP = find (\ReadPlan{relName, relAlias} -> fld == fromMaybe relName relAlias) rPlans in
         case (foundRP, opExpr) of
           (Just ReadPlan{relAggAlias}, OpExpr b (Is IsNull)) -> Right $ CoercibleStmnt $ CoercibleFilterNullEmbed b relAggAlias
@@ -900,7 +906,7 @@ resolveLogicTree ctx (Stmnt flt) = CoercibleStmnt $ resolveFilter ctx flt
 resolveLogicTree ctx (Expr b op lts) = CoercibleExpr b op (map (resolveLogicTree ctx) lts)
 
 resolveFilter :: ResolverContext -> Filter -> CoercibleFilter
-resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld, opExpr=opExpr}
+resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld opExpr, opExpr=opExpr}
 
 -- Validates that spread embeds are only done on to-one relationships
 validateSpreadEmbeds :: ReadPlanTree -> Either ApiRequestError ReadPlanTree
@@ -963,7 +969,7 @@ mutatePlan mutation qi ApiRequest{iPreferences=Preferences{..}, ..} SchemaCache{
 resolveOrError :: ResolverContext -> Maybe Table -> FieldName -> Either ApiRequestError CoercibleField
 resolveOrError _ Nothing _ = Left NotFound
 resolveOrError ctx (Just table) field =
-  case resolveTableFieldName table field of
+  case resolveTableFieldName table field Nothing of
     CoercibleField{cfIRType=""} -> Left $ ColumnNotFound (tableName table) field
     cf                          -> Right $ withJsonParse ctx cf
 

diff --git a/src/PostgREST/Plan/Types.hs b/src/PostgREST/Plan/Types.hs
@@ -5,6 +5,7 @@
   , CoercibleLogicTree(..)
   , CoercibleFilter(..)
   , TransformerProc
+  , ToTsVector
   , CoercibleOrderTerm(..)
   , RelSelectField(..)
   , RelJsonEmbedMode(..)
@@ -20,6 +21,7 @@
 import Protolude
 
 type TransformerProc = Text
+type ToTsVector = Maybe (Maybe Text)
 
 -- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
 -- |
@@ -33,17 +35,18 @@
 -- |
 -- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
 data CoercibleField = CoercibleField
-  { cfName      :: FieldName
-  , cfJsonPath  :: JsonPath
-  , cfToJson    :: Bool
-  , cfIRType    :: Text                  -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
-  , cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
-  , cfDefault   :: Maybe Text
-  , cfFullRow   :: Bool                  -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
+  { cfName       :: FieldName
+  , cfJsonPath   :: JsonPath
+  , cfToJson     :: Bool
+  , cfToTsVector:: ToTsVector            -- ^ If the field should be converted using to_tsvector(<language>, <field>)
+  , cfIRType     :: Text                  -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
+  , cfTransform  :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
+  , cfDefault    :: Maybe Text
+  , cfFullRow    :: Bool                  -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
   } deriving (Eq, Show)
 
 unknownField :: FieldName -> JsonPath -> CoercibleField
-unknownField name path = CoercibleField name path False "" Nothing Nothing False
+unknownField name path = CoercibleField name path False Nothing "" Nothing Nothing False
 
 -- | Like an API request LogicTree, but with coercible field information.
 data CoercibleLogicTree

diff --git a/src/PostgREST/Query/QueryBuilder.hs b/src/PostgREST/Query/QueryBuilder.hs
@@ -206,7 +206,7 @@ callPlanToQuery (FunctionCall qi params arguments returnsScalar returnsSetOfScal
       KeyParams []    -> "FROM " <> callIt mempty
       KeyParams prms  -> case arguments of
         DirectArgs args -> "FROM " <> callIt (fmtArgs prms args)
-        JsonArgs json   -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
+        JsonArgs json   -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False Nothing (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
                          "LATERAL " <> callIt (fmtParams prms)
 
     callIt :: SQL.Snippet -> SQL.Snippet

diff --git a/src/PostgREST/Query/SqlFragment.hs b/src/PostgREST/Query/SqlFragment.hs
@@ -252,10 +252,15 @@ pgFmtCallUnary :: Text -> SQL.Snippet -> SQL.Snippet
 pgFmtCallUnary f x = SQL.sql (encodeUtf8 f) <> "(" <> x <> ")"
 
 pgFmtField :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
-pgFmtField table CoercibleField{cfFullRow=True}                                          = fromQi table
-pgFmtField table CoercibleField{cfName=fn, cfJsonPath=[]}                                = pgFmtColumn table fn
-pgFmtField table CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson  = "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
-                                                                             | otherwise = pgFmtColumn table fn <> pgFmtJsonPath jp
+pgFmtField table cf = case cfToTsVector cf of
+  Just lang -> "to_tsvector(" <> pgFmtFtsLang lang <> fmtFld <> ")"
+  _         -> fmtFld
+  where
+    fmtFld = case cf of
+      CoercibleField{cfFullRow=True}                                          -> fromQi table
+      CoercibleField{cfName=fn, cfJsonPath=[]}                                -> pgFmtColumn table fn
+      CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson  -> "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
+                                                                  | otherwise -> pgFmtColumn table fn <> pgFmtJsonPath jp
 
 -- Select the value of a named element from a table, applying its optional coercion mapping if any.
 pgFmtTableCoerce :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
@@ -399,16 +404,18 @@ pgFmtFilter table (CoercibleFilter fld (OpExpr hasNot oper)) = notOp <> " " <> p
       [""] -> "= ANY('{}') "
       _    -> "= ANY (" <> pgFmtArrayLiteralForField vals fld <> ") "
 
-   Fts op lang val -> " " <> ftsOperator op <> "(" <> ftsLang lang <> unknownLiteral val <> ") "
+   Fts op lang val -> " " <> ftsOperator op <> "(" <> pgFmtFtsLang lang <> unknownLiteral val <> ") "
  where
-   ftsLang = maybe mempty (\l -> unknownLiteral l <> ", ")
    notOp = if hasNot then "NOT" else mempty
    star c = if c == '*' then '%' else c
    fmtQuant q val = case q of
     Just QuantAny -> "ANY(" <> val <> ")"
     Just QuantAll -> "ALL(" <> val <> ")"
     Nothing       -> val
 
+pgFmtFtsLang :: Maybe Text -> SQL.Snippet
+pgFmtFtsLang = maybe mempty (\l -> unknownLiteral l <> ", ")
+
 pgFmtJoinCondition :: JoinCondition -> SQL.Snippet
 pgFmtJoinCondition (JoinCondition (qi1, col1) (qi2, col2)) =
   pgFmtColumn qi1 col1 <> " = " <> pgFmtColumn qi2 col2

diff --git a/test/spec/Feature/Query/AndOrParamsSpec.hs b/test/spec/Feature/Query/AndOrParamsSpec.hs
@@ -81,7 +81,7 @@ spec =
         it "can handle is" $
           get "/entities?and=(name.is.null,arr.is.null)&select=id" `shouldRespondWith`
             [json|[{ "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }
-        it "can handle fts" $ do
+        it "can handle fts on tsvector columns" $ do
           get "/entities?or=(text_search_vector.fts.bar,text_search_vector.fts.baz)&select=id" `shouldRespondWith`
             [json|[{ "id": 1 }, { "id": 2 }]|] { matchHeaders = [matchContentTypeJson] }
           get "/tsearch?or=(text_search_vector.plfts(german).Art%20Spass, text_search_vector.plfts(french).amusant%20impossible, text_search_vector.fts(english).impossible)" `shouldRespondWith`
@@ -90,6 +90,17 @@ spec =
               {"text_search_vector": "'amus':5 'fair':7 'impossibl':9 'peu':4" },
               {"text_search_vector": "'art':4 'spass':5 'unmog':7"}
             ]|] { matchHeaders = [matchContentTypeJson] }
+        it "can handle fts on text and json columns" $ do
+          get "/grandchild_entities?or=(jsonb_col.fts.bar,jsonb_col.fts.foo)&select=jsonb_col" `shouldRespondWith`
+            [json|[
+              { "jsonb_col": {"a": {"b":"foo"}} },
+              { "jsonb_col": {"b":"bar"} }]
+            |] { matchHeaders = [matchContentTypeJson] }
+          get "/tsearch_to_tsvector?and=(text_search.not.plfts(german).Art%20Spass, text_search.not.plfts(french).amusant%20impossible, text_search.not.fts(english).impossible)&select=text_search" `shouldRespondWith`
+            [json|[
+              { "text_search": "But also fun to do what is possible" },
+              { "text_search": "Fat cats ate rats" }]
+            |] { matchHeaders = [matchContentTypeJson] }
         it "can handle isdistinct" $
           get "/entities?and=(id.gte.2,arr.isdistinct.{1,2})&select=id" `shouldRespondWith`
             [json|[{ "id": 3 }, { "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }