Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: apply to_tsvector to the filtered columns when a fts operator is used #3845

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- #3727, Log maximum pool size - @steve-chavez
- #1536, Add string comparison feature for jwt-role-claim-key - @taimoorzaeem
- #3747, Allow `not_null` value for the `is` operator - @taimoorzaeem
- #2255, Apply `to_tsvector()` explicitly to the full-text search filtered column (excluding `tsvector` types) - @laurenceisla

### Fixed

Expand Down
2 changes: 1 addition & 1 deletion docs/references/api/tables_views.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ The :code:`fts` filter mentioned above has a number of options to support flexib

curl "http://localhost:3000/tsearch?my_tsv=not.wfts(french).amusant"

Using `websearch_to_tsquery` requires PostgreSQL of version at least 11.0 and will raise an error in earlier versions of the database.
This also works with columns of text and JSON types, converting them to ``tsvector`` accordingly.

.. _v_filter:

Expand Down
48 changes: 27 additions & 21 deletions src/PostgREST/Plan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -265,27 +265,29 @@ data ResolverContext = ResolverContext
, outputType :: Text -- ^ The output type for the response payload; e.g. "csv", "json", "binary".
}

resolveColumnField :: Column -> CoercibleField
resolveColumnField col = CoercibleField (colName col) mempty False (colNominalType col) Nothing (colDefault col) False
resolveColumnField :: Column -> ToTsVector -> CoercibleField
resolveColumnField col toTsV = CoercibleField (colName col) mempty False toTsV (colNominalType col) Nothing (colDefault col) False

resolveTableFieldName :: Table -> FieldName -> CoercibleField
resolveTableFieldName table fieldName =
resolveTableFieldName :: Table -> FieldName -> ToTsVector -> CoercibleField
resolveTableFieldName table fieldName toTsV=
fromMaybe (unknownField fieldName []) $ HMI.lookup fieldName (tableColumns table) >>=
Just . resolveColumnField
Just . flip resolveColumnField toTsV

-- | Resolve a type within the context based on the given field name and JSON path. Although there are situations where failure to resolve a field is considered an error (see `resolveOrError`), there are also situations where we allow it (RPC calls). If it should be an error and `resolveOrError` doesn't fit, ensure to check the `cfIRType` isn't empty.
resolveTypeOrUnknown :: ResolverContext -> Field -> CoercibleField
resolveTypeOrUnknown ResolverContext{..} (fn, jp) =
resolveTypeOrUnknown :: ResolverContext -> Field -> ToTsVector -> CoercibleField
resolveTypeOrUnknown ResolverContext{..} (fn, jp) toTsV =
case res of
-- types that are already json/jsonb don't need to be converted with `to_jsonb` for using arrow operators `data->attr`
-- this prevents indexes not applying https://github.com/PostgREST/postgrest/issues/2594
cf@CoercibleField{cfIRType="json"} -> cf{cfJsonPath=jp, cfToJson=False}
cf@CoercibleField{cfIRType="jsonb"} -> cf{cfJsonPath=jp, cfToJson=False}
cf@CoercibleField{cfIRType="json"} -> cf{cfJsonPath=jp, cfToJson=False}
cf@CoercibleField{cfIRType="jsonb"} -> cf{cfJsonPath=jp, cfToJson=False}
-- Do not apply to_tsvector to tsvector types
cf@CoercibleField{cfIRType="tsvector"} -> cf{cfJsonPath=jp, cfToJson=True, cfToTsVector=Nothing}
-- other types will get converted `to_jsonb(col)->attr`, even unknown types
cf -> cf{cfJsonPath=jp, cfToJson=True}
cf -> cf{cfJsonPath=jp, cfToJson=True}
where
res = fromMaybe (unknownField fn jp) $ HM.lookup qi tables >>=
Just . flip resolveTableFieldName fn
Just . (\t -> resolveTableFieldName t fn toTsV)
Comment on lines 289 to +290
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was a performance concern here: #2255 (comment)

I don't think there will be a difference in performance since it's using this lookup that is already applied for all filters before this PR.


-- | Install any pre-defined data representation from source to target to coerce this reference.
--
Expand All @@ -311,11 +313,15 @@ withJsonParse ctx field@CoercibleField{cfIRType} = withTransformer ctx "json" cf

-- | Map the intermediate representation type to the output type defined by the resolver context (normally json), if available.
resolveOutputField :: ResolverContext -> Field -> CoercibleField
resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field
resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field Nothing

-- | Map the query string format of a value (text) into the intermediate representation type, if available.
resolveQueryInputField :: ResolverContext -> Field -> CoercibleField
resolveQueryInputField ctx field = withTextParse ctx $ resolveTypeOrUnknown ctx field
resolveQueryInputField :: ResolverContext -> Field -> OpExpr -> CoercibleField
resolveQueryInputField ctx field opExpr = withTextParse ctx $ resolveTypeOrUnknown ctx field toTsVector
where
toTsVector = case opExpr of
OpExpr _ (Fts _ lang _) -> Just lang
_ -> Nothing

-- | Builds the ReadPlan tree on a number of stages.
-- | Adds filters, order, limits on its respective nodes.
Expand Down Expand Up @@ -452,7 +458,7 @@ expandStarsForTable ctx@ResolverContext{representations, outputType} hasAgg rp@R

expandStarSelectField :: Bool -> [Column] -> CoercibleSelectField -> [CoercibleSelectField]
expandStarSelectField _ columns sel@CoercibleSelectField{csField=CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Nothing} =
map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col }) columns
map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col Nothing }) columns
expandStarSelectField True _ sel@CoercibleSelectField{csField=fld@CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Just Count} =
[sel { csField = fld { cfFullRow = True } }]
expandStarSelectField _ _ selectField = [selectField]
Expand Down Expand Up @@ -766,7 +772,7 @@ addOrders ctx ApiRequest{..} rReq =

resolveOrder :: ResolverContext -> OrderTerm -> CoercibleOrderTerm
resolveOrder _ (OrderRelationTerm a b c d) = CoercibleOrderRelationTerm a b c d
resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld) dir nulls
resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld Nothing) dir nulls

-- Validates that the related resource on the order is an embedded resource,
-- e.g. if `clients` is inside the `select` in /projects?order=clients(id)&select=*,clients(*),
Expand Down Expand Up @@ -831,7 +837,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
-- where_ = [
-- CoercibleStmnt (
-- CoercibleFilter {
-- field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
-- field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
-- opExpr = op
-- }
-- )
Expand All @@ -847,7 +853,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
-- Don't do anything to the filter if there's no embedding (a subtree) on projects. Assume it's a normal filter.
--
-- >>> ReadPlan.where_ . rootLabel <$> addNullEmbedFilters (readPlanTree nullOp [])
-- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
-- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
--
-- If there's an embedding on projects, then change the filter to use the internal aggregate name (`clients_projects_1`) so the filter can succeed later.
--
Expand All @@ -866,7 +872,7 @@ addNullEmbedFilters (Node rp@ReadPlan{where_=curLogic} forest) = do
newNullFilters rPlans = \case
(CoercibleExpr b lOp trees) ->
CoercibleExpr b lOp <$> (newNullFilters rPlans `traverse` trees)
flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _) opExpr)) ->
flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _ _) opExpr)) ->
let foundRP = find (\ReadPlan{relName, relAlias} -> fld == fromMaybe relName relAlias) rPlans in
case (foundRP, opExpr) of
(Just ReadPlan{relAggAlias}, OpExpr b (Is IsNull)) -> Right $ CoercibleStmnt $ CoercibleFilterNullEmbed b relAggAlias
Expand Down Expand Up @@ -900,7 +906,7 @@ resolveLogicTree ctx (Stmnt flt) = CoercibleStmnt $ resolveFilter ctx flt
resolveLogicTree ctx (Expr b op lts) = CoercibleExpr b op (map (resolveLogicTree ctx) lts)

resolveFilter :: ResolverContext -> Filter -> CoercibleFilter
resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld, opExpr=opExpr}
resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld opExpr, opExpr=opExpr}

-- Validates that spread embeds are only done on to-one relationships
validateSpreadEmbeds :: ReadPlanTree -> Either ApiRequestError ReadPlanTree
Expand Down Expand Up @@ -963,7 +969,7 @@ mutatePlan mutation qi ApiRequest{iPreferences=Preferences{..}, ..} SchemaCache{
resolveOrError :: ResolverContext -> Maybe Table -> FieldName -> Either ApiRequestError CoercibleField
resolveOrError _ Nothing _ = Left NotFound
resolveOrError ctx (Just table) field =
case resolveTableFieldName table field of
case resolveTableFieldName table field Nothing of
CoercibleField{cfIRType=""} -> Left $ ColumnNotFound (tableName table) field
cf -> Right $ withJsonParse ctx cf

Expand Down
19 changes: 11 additions & 8 deletions src/PostgREST/Plan/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
, CoercibleLogicTree(..)
, CoercibleFilter(..)
, TransformerProc
, ToTsVector
, CoercibleOrderTerm(..)
, RelSelectField(..)
, RelJsonEmbedMode(..)
Expand All @@ -20,6 +21,7 @@
import Protolude

type TransformerProc = Text
type ToTsVector = Maybe (Maybe Text)

-- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
-- |
Expand All @@ -33,17 +35,18 @@
-- |
-- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
data CoercibleField = CoercibleField
{ cfName :: FieldName
, cfJsonPath :: JsonPath
, cfToJson :: Bool
, cfIRType :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
, cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
, cfDefault :: Maybe Text
, cfFullRow :: Bool -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
{ cfName :: FieldName
, cfJsonPath :: JsonPath
, cfToJson :: Bool

Check warning on line 40 in src/PostgREST/Plan/Types.hs

View check run for this annotation

Codecov / codecov/patch

src/PostgREST/Plan/Types.hs#L39-L40

Added lines #L39 - L40 were not covered by tests
, cfToTsVector:: ToTsVector -- ^ If the field should be converted using to_tsvector(<language>, <field>)
, cfIRType :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
, cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
, cfDefault :: Maybe Text

Check warning on line 44 in src/PostgREST/Plan/Types.hs

View check run for this annotation

Codecov / codecov/patch

src/PostgREST/Plan/Types.hs#L43-L44

Added lines #L43 - L44 were not covered by tests
, cfFullRow :: Bool -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
} deriving (Eq, Show)

unknownField :: FieldName -> JsonPath -> CoercibleField
unknownField name path = CoercibleField name path False "" Nothing Nothing False
unknownField name path = CoercibleField name path False Nothing "" Nothing Nothing False

-- | Like an API request LogicTree, but with coercible field information.
data CoercibleLogicTree
Expand Down
2 changes: 1 addition & 1 deletion src/PostgREST/Query/QueryBuilder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ callPlanToQuery (FunctionCall qi params arguments returnsScalar returnsSetOfScal
KeyParams [] -> "FROM " <> callIt mempty
KeyParams prms -> case arguments of
DirectArgs args -> "FROM " <> callIt (fmtArgs prms args)
JsonArgs json -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
JsonArgs json -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False Nothing (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
"LATERAL " <> callIt (fmtParams prms)

callIt :: SQL.Snippet -> SQL.Snippet
Expand Down
19 changes: 13 additions & 6 deletions src/PostgREST/Query/SqlFragment.hs
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,15 @@ pgFmtCallUnary :: Text -> SQL.Snippet -> SQL.Snippet
pgFmtCallUnary f x = SQL.sql (encodeUtf8 f) <> "(" <> x <> ")"

pgFmtField :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
pgFmtField table CoercibleField{cfFullRow=True} = fromQi table
pgFmtField table CoercibleField{cfName=fn, cfJsonPath=[]} = pgFmtColumn table fn
pgFmtField table CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson = "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
| otherwise = pgFmtColumn table fn <> pgFmtJsonPath jp
pgFmtField table cf = case cfToTsVector cf of
Just lang -> "to_tsvector(" <> pgFmtFtsLang lang <> fmtFld <> ")"
_ -> fmtFld
where
fmtFld = case cf of
CoercibleField{cfFullRow=True} -> fromQi table
CoercibleField{cfName=fn, cfJsonPath=[]} -> pgFmtColumn table fn
CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson -> "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
| otherwise -> pgFmtColumn table fn <> pgFmtJsonPath jp

-- Select the value of a named element from a table, applying its optional coercion mapping if any.
pgFmtTableCoerce :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
Expand Down Expand Up @@ -399,16 +404,18 @@ pgFmtFilter table (CoercibleFilter fld (OpExpr hasNot oper)) = notOp <> " " <> p
[""] -> "= ANY('{}') "
_ -> "= ANY (" <> pgFmtArrayLiteralForField vals fld <> ") "

Fts op lang val -> " " <> ftsOperator op <> "(" <> ftsLang lang <> unknownLiteral val <> ") "
Fts op lang val -> " " <> ftsOperator op <> "(" <> pgFmtFtsLang lang <> unknownLiteral val <> ") "
where
ftsLang = maybe mempty (\l -> unknownLiteral l <> ", ")
notOp = if hasNot then "NOT" else mempty
star c = if c == '*' then '%' else c
fmtQuant q val = case q of
Just QuantAny -> "ANY(" <> val <> ")"
Just QuantAll -> "ALL(" <> val <> ")"
Nothing -> val

pgFmtFtsLang :: Maybe Text -> SQL.Snippet
pgFmtFtsLang = maybe mempty (\l -> unknownLiteral l <> ", ")

pgFmtJoinCondition :: JoinCondition -> SQL.Snippet
pgFmtJoinCondition (JoinCondition (qi1, col1) (qi2, col2)) =
pgFmtColumn qi1 col1 <> " = " <> pgFmtColumn qi2 col2
Expand Down
13 changes: 12 additions & 1 deletion test/spec/Feature/Query/AndOrParamsSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ spec =
it "can handle is" $
get "/entities?and=(name.is.null,arr.is.null)&select=id" `shouldRespondWith`
[json|[{ "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }
it "can handle fts" $ do
it "can handle fts on tsvector columns" $ do
get "/entities?or=(text_search_vector.fts.bar,text_search_vector.fts.baz)&select=id" `shouldRespondWith`
[json|[{ "id": 1 }, { "id": 2 }]|] { matchHeaders = [matchContentTypeJson] }
get "/tsearch?or=(text_search_vector.plfts(german).Art%20Spass, text_search_vector.plfts(french).amusant%20impossible, text_search_vector.fts(english).impossible)" `shouldRespondWith`
Expand All @@ -90,6 +90,17 @@ spec =
{"text_search_vector": "'amus':5 'fair':7 'impossibl':9 'peu':4" },
{"text_search_vector": "'art':4 'spass':5 'unmog':7"}
]|] { matchHeaders = [matchContentTypeJson] }
it "can handle fts on text and json columns" $ do
get "/grandchild_entities?or=(jsonb_col.fts.bar,jsonb_col.fts.foo)&select=jsonb_col" `shouldRespondWith`
[json|[
{ "jsonb_col": {"a": {"b":"foo"}} },
{ "jsonb_col": {"b":"bar"} }]
|] { matchHeaders = [matchContentTypeJson] }
get "/tsearch_to_tsvector?and=(text_search.not.plfts(german).Art%20Spass, text_search.not.plfts(french).amusant%20impossible, text_search.not.fts(english).impossible)&select=text_search" `shouldRespondWith`
[json|[
{ "text_search": "But also fun to do what is possible" },
{ "text_search": "Fat cats ate rats" }]
|] { matchHeaders = [matchContentTypeJson] }
it "can handle isdistinct" $
get "/entities?and=(id.gte.2,arr.isdistinct.{1,2})&select=id" `shouldRespondWith`
[json|[{ "id": 3 }, { "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }
Expand Down
Loading