Skip to content

Commit b762a52

Browse files
Update vendored DuckDB sources to 5511fb39ac
1 parent 9989217 commit b762a52

File tree

22 files changed

+665
-216
lines changed

22 files changed

+665
-216
lines changed

src/duckdb/extension/core_functions/function_list.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ static const StaticFunctionDefinition core_functions[] = {
158158
DUCKDB_SCALAR_FUNCTION_SET(DayOfWeekFun),
159159
DUCKDB_SCALAR_FUNCTION_SET(DayOfYearFun),
160160
DUCKDB_SCALAR_FUNCTION_SET(DecadeFun),
161-
DUCKDB_SCALAR_FUNCTION(DecodeFun),
161+
DUCKDB_SCALAR_FUNCTION_SET(DecodeFun),
162162
DUCKDB_SCALAR_FUNCTION(DegreesFun),
163163
DUCKDB_SCALAR_FUNCTION_ALIAS(Editdist3Fun),
164164
DUCKDB_SCALAR_FUNCTION_ALIAS(ElementAtFun),

src/duckdb/extension/core_functions/include/core_functions/scalar/blob_functions.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ namespace duckdb {
1717

1818
struct DecodeFun {
1919
static constexpr const char *Name = "decode";
20-
static constexpr const char *Parameters = "blob";
21-
static constexpr const char *Description = "Converts `blob` to `VARCHAR`. Fails if `blob` is not valid UTF-8.";
22-
static constexpr const char *Example = "decode('\\xC3\\xBC'::BLOB)";
20+
static constexpr const char *Parameters = "blob,varchar";
21+
static constexpr const char *Description = "Converts `blob` to `VARCHAR`. Invalid UTF-8 is handled based on the error behavior argument. Can be 'strict' (default, fail), 'replace' to replace invalid characters with '?', or 'ignore' to skip invalid characters.";
22+
static constexpr const char *Example = "decode('\\xC3\\xBC'::BLOB)\002decode('\\xA0'::BLOB, 'replace')\002decode('\\xA0'::BLOB, 'ignore')";
2323
static constexpr const char *Categories = "blob";
2424

25-
static ScalarFunction GetFunction();
25+
static ScalarFunctionSet GetFunctions();
2626
};
2727

2828
struct EncodeFun {
Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "core_functions/scalar/blob_functions.hpp"
22
#include "utf8proc_wrapper.hpp"
3+
#include "duckdb/common/string_util.hpp"
34
#include "duckdb/common/exception/conversion_exception.hpp"
45

56
namespace duckdb {
@@ -12,22 +13,87 @@ void EncodeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
1213
result.Reinterpret(args.data[0]);
1314
}
1415

15-
struct BlobDecodeOperator {
16+
enum class DecodeErrorBehavior : uint8_t {
17+
STRICT = 1, // raise an error
18+
REPLACE = 2, // replace invalid characters with '?'
19+
IGNORE = 3 // ignore invalid characters (remove from string)
20+
};
21+
22+
DecodeErrorBehavior GetDecodeErrorBehavior(const string_t &specifier_p) {
23+
auto size = specifier_p.GetSize();
24+
auto data = specifier_p.GetData();
25+
if (StringUtil::CIEquals(data, size, "strict", 6)) {
26+
return DecodeErrorBehavior::STRICT;
27+
} else if (StringUtil::CIEquals(data, size, "replace", 7)) {
28+
return DecodeErrorBehavior::REPLACE;
29+
} else if (StringUtil::CIEquals(data, size, "ignore", 6)) {
30+
return DecodeErrorBehavior::IGNORE;
31+
} else {
32+
throw ConversionException("decode error behavior specifier \"%s\" not recognized", specifier_p.GetString());
33+
}
34+
}
35+
36+
struct UnaryBlobDecodeOperator {
1637
template <class INPUT_TYPE, class RESULT_TYPE>
1738
static RESULT_TYPE Operation(INPUT_TYPE input) {
1839
auto input_data = input.GetData();
1940
auto input_length = input.GetSize();
2041
if (Utf8Proc::Analyze(input_data, input_length) == UnicodeType::INVALID) {
2142
throw ConversionException(
22-
"Failure in decode: could not convert blob to UTF8 string, the blob contained invalid UTF8 characters");
43+
"Failure in decode: could not convert blob to UTF8 string, the blob "
44+
"contained invalid UTF8 characters. \n"
45+
"Use try(decode(BLOB)) to return NULL and continue instead of returning an error. "
46+
"Specify decode(BLOB, 'replace') to replace invalid characters with '?'. "
47+
"Specify decode(BLOB, 'ignore') to remove invalid characters when encountered.");
2348
}
2449
return input;
2550
}
2651
};
2752

28-
void DecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
53+
void UnaryDecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
54+
// decode is also a nop cast, but requires verification if the provided string is actually
55+
UnaryExecutor::Execute<string_t, string_t, UnaryBlobDecodeOperator>(args.data[0], result, args.size());
56+
StringVector::AddHeapReference(result, args.data[0]);
57+
}
58+
59+
void BinaryDecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
2960
// decode is also a nop cast, but requires verification if the provided string is actually
30-
UnaryExecutor::Execute<string_t, string_t, BlobDecodeOperator>(args.data[0], result, args.size());
61+
BinaryExecutor::Execute<string_t, string_t, string_t>(
62+
args.data[0], args.data[1], result, args.size(), [&](string_t input, string_t error_option) {
63+
auto input_data = input.GetDataWriteable();
64+
auto input_length = input.GetSize();
65+
66+
if (Utf8Proc::Analyze(input_data, input_length) != UnicodeType::INVALID) {
67+
return input;
68+
}
69+
auto const error_behavior = GetDecodeErrorBehavior(error_option);
70+
71+
switch (error_behavior) {
72+
case DecodeErrorBehavior::REPLACE:
73+
Utf8Proc::MakeValid(input_data, input_length);
74+
return input;
75+
76+
case DecodeErrorBehavior::IGNORE: {
77+
auto new_str = Utf8Proc::RemoveInvalid(input_data, input_length);
78+
auto target = StringVector::EmptyString(result, new_str.size());
79+
auto output = target.GetDataWriteable();
80+
memcpy(output, new_str.data(), new_str.size());
81+
target.Finalize();
82+
return target;
83+
}
84+
85+
case DecodeErrorBehavior::STRICT:
86+
throw ConversionException(
87+
"Failure in decode: could not convert blob to UTF8 string, the blob "
88+
"contained invalid UTF8 characters. \n"
89+
"Use try(decode(BLOB)) to return NULL and continue instead of returning an error. "
90+
"Specify decode(BLOB, 'replace') to replace invalid characters with '?'. "
91+
"Specify decode(BLOB, 'ignore') to remove invalid characters when encountered.");
92+
93+
default:
94+
throw InternalException("Unimplemented decode error behavior");
95+
}
96+
});
3197
StringVector::AddHeapReference(result, args.data[0]);
3298
}
3399

@@ -37,10 +103,20 @@ ScalarFunction EncodeFun::GetFunction() {
37103
return ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, EncodeFunction);
38104
}
39105

40-
ScalarFunction DecodeFun::GetFunction() {
41-
ScalarFunction function({LogicalType::BLOB}, LogicalType::VARCHAR, DecodeFunction);
42-
function.SetFallible();
43-
return function;
106+
ScalarFunctionSet DecodeFun::GetFunctions() {
107+
ScalarFunctionSet decode("decode");
108+
109+
ScalarFunction unary_function({LogicalType::BLOB}, LogicalType::VARCHAR, UnaryDecodeFunction);
110+
ScalarFunction binary_function({LogicalType::BLOB, LogicalType::VARCHAR}, LogicalType::VARCHAR,
111+
BinaryDecodeFunction);
112+
113+
unary_function.SetFallible();
114+
binary_function.SetFallible();
115+
116+
decode.AddFunction(unary_function);
117+
decode.AddFunction(binary_function);
118+
119+
return decode;
44120
}
45121

46122
} // namespace duckdb

src/duckdb/src/catalog/default/default_functions.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,6 @@ static const DefaultMacro internal_macros[] = {
115115
{DEFAULT_SCHEMA, "list_reverse", {"l", nullptr}, {{nullptr, nullptr}}, "l[:-:-1]"},
116116
{DEFAULT_SCHEMA, "array_reverse", {"l", nullptr}, {{nullptr, nullptr}}, "list_reverse(l)"},
117117

118-
// FIXME implement as actual function if we encounter a lot of performance issues. Complexity now: n * m, with hashing possibly n + m
119-
{DEFAULT_SCHEMA, "list_intersect", {"l1", "l2", nullptr}, {{nullptr, nullptr}}, "list_filter(list_distinct(l1), lambda variable_intersect: list_contains(l2, variable_intersect))"},
120-
{DEFAULT_SCHEMA, "array_intersect", {"l1", "l2", nullptr}, {{nullptr, nullptr}}, "list_intersect(l1, l2)"},
121-
122118
// algebraic list aggregates
123119
{DEFAULT_SCHEMA, "list_avg", {"l", nullptr}, {{nullptr, nullptr}}, "list_aggr(l, 'avg')"},
124120
{DEFAULT_SCHEMA, "list_var_samp", {"l", nullptr}, {{nullptr, nullptr}}, "list_aggr(l, 'var_samp')"},

src/duckdb/src/common/enum_util.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3812,19 +3812,20 @@ const StringUtil::EnumStringLiteral *GetRelationTypeValues() {
38123812
{ static_cast<uint32_t>(RelationType::VIEW_RELATION), "VIEW_RELATION" },
38133813
{ static_cast<uint32_t>(RelationType::QUERY_RELATION), "QUERY_RELATION" },
38143814
{ static_cast<uint32_t>(RelationType::DELIM_JOIN_RELATION), "DELIM_JOIN_RELATION" },
3815-
{ static_cast<uint32_t>(RelationType::DELIM_GET_RELATION), "DELIM_GET_RELATION" }
3815+
{ static_cast<uint32_t>(RelationType::DELIM_GET_RELATION), "DELIM_GET_RELATION" },
3816+
{ static_cast<uint32_t>(RelationType::EXTENSION_RELATION), "EXTENSION_RELATION" }
38163817
};
38173818
return values;
38183819
}
38193820

38203821
template<>
38213822
const char* EnumUtil::ToChars<RelationType>(RelationType value) {
3822-
return StringUtil::EnumToString(GetRelationTypeValues(), 28, "RelationType", static_cast<uint32_t>(value));
3823+
return StringUtil::EnumToString(GetRelationTypeValues(), 29, "RelationType", static_cast<uint32_t>(value));
38233824
}
38243825

38253826
template<>
38263827
RelationType EnumUtil::FromString<RelationType>(const char *value) {
3827-
return static_cast<RelationType>(StringUtil::StringToEnum(GetRelationTypeValues(), 28, "RelationType", value));
3828+
return static_cast<RelationType>(StringUtil::StringToEnum(GetRelationTypeValues(), 29, "RelationType", value));
38283829
}
38293830

38303831
const StringUtil::EnumStringLiteral *GetRenderModeValues() {

src/duckdb/src/common/enums/relation_type.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ string RelationTypeToString(RelationType type) {
6161
return "VIEW_RELATION";
6262
case RelationType::QUERY_RELATION:
6363
return "QUERY_RELATION";
64+
case RelationType::EXTENSION_RELATION:
65+
return "EXTENSION_RELATION";
6466
case RelationType::INVALID_RELATION:
6567
break;
6668
}

src/duckdb/src/common/sort/hashed_sort.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,16 @@ static SourceResultType MaterializeHashGroupData(ExecutionContext &context, idx_
759759
return hash_group.get_columns++ ? SourceResultType::HAVE_MORE_OUTPUT : SourceResultType::FINISHED;
760760
}
761761

762+
// OVER(PARTITION BY...)
763+
if (gsink.grouping_data) {
764+
lock_guard<mutex> reset_guard(hash_group.scan_lock);
765+
auto &partitions = gsink.grouping_data->GetPartitions();
766+
if (hash_bin < partitions.size()) {
767+
// Release the memory now that we have finished scanning it.
768+
partitions[hash_bin].reset();
769+
}
770+
}
771+
762772
auto &sort = *hash_group.sort;
763773
auto &sort_global = *hash_group.sort_source;
764774
auto sort_local = sort.GetLocalSourceState(context, sort_global);
@@ -792,6 +802,7 @@ HashedSort::HashGroupPtr HashedSort::GetColumnData(idx_t hash_bin, OperatorSourc
792802

793803
OperatorSourceInput input {sort_global, source.local_state, source.interrupt_state};
794804
auto result = sort.GetColumnData(input);
805+
hash_group.sort_source.reset();
795806

796807
// Just because MaterializeColumnData returned FINISHED doesn't mean that the same thread will
797808
// get the result...
@@ -824,6 +835,8 @@ HashedSort::SortedRunPtr HashedSort::GetSortedRun(ClientContext &client, idx_t h
824835
result = make_uniq<SortedRun>(client, sort, false);
825836
}
826837

838+
hash_group.sort_source.reset();
839+
827840
return result;
828841
}
829842

src/duckdb/src/execution/operator/join/physical_asof_join.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,10 +1303,6 @@ class AsOfLocalSourceState : public LocalSourceState {
13031303
case AsOfJoinSourceStage::DONE:
13041304
throw InternalException("Invalid state for AsOf Task");
13051305
}
1306-
1307-
if (TaskFinished()) {
1308-
++gsource.finished;
1309-
}
13101306
}
13111307

13121308
void BeginRightScan();
@@ -1392,6 +1388,8 @@ void AsOfGlobalSourceState::FinishTask(TaskPtr task) {
13921388
return;
13931389
}
13941390

1391+
++finished;
1392+
13951393
const auto group_idx = task->group_idx;
13961394
auto &finished_hash_group = asof_groups[group_idx];
13971395
D_ASSERT(finished_hash_group);
@@ -1587,7 +1585,12 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
15871585
// Therefore, we loop until we've produced tuples, or until the operator is actually done
15881586
while (gsource.HasUnfinishedTasks() && chunk.size() == 0) {
15891587
if (!lsource.TaskFinished() || lsource.TryAssignTask()) {
1590-
lsource.ExecuteTask(context, chunk, input);
1588+
try {
1589+
lsource.ExecuteTask(context, chunk, input);
1590+
} catch (...) {
1591+
gsource.stopped = true;
1592+
throw;
1593+
}
15911594
} else {
15921595
auto guard = gsource.Lock();
15931596
if (!gsource.HasMoreTasks()) {

src/duckdb/src/function/function_list.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ static const StaticFunctionDefinition function[] = {
8181
DUCKDB_SCALAR_FUNCTION_SET(ArrayExtractFun),
8282
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayHasFun),
8383
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayIndexofFun),
84+
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayIntersectFun),
8485
DUCKDB_SCALAR_FUNCTION_SET(ArrayLengthFun),
8586
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayPositionFun),
8687
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArrayResizeFun),
@@ -122,6 +123,7 @@ static const StaticFunctionDefinition function[] = {
122123
DUCKDB_SCALAR_FUNCTION_SET(ListExtractFun),
123124
DUCKDB_SCALAR_FUNCTION_ALIAS(ListHasFun),
124125
DUCKDB_SCALAR_FUNCTION_ALIAS(ListIndexofFun),
126+
DUCKDB_SCALAR_FUNCTION(ListIntersectFun),
125127
DUCKDB_SCALAR_FUNCTION(ListPositionFun),
126128
DUCKDB_SCALAR_FUNCTION_SET(ListResizeFun),
127129
DUCKDB_SCALAR_FUNCTION(ListSelectFun),

0 commit comments

Comments
 (0)