diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 00000000000..c6f1bef64aa
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,31 @@
+name: build
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ test:
+ runs-on:
+ - ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ compiler: [clang, gcc]
+ check_type: [normal, debug]
+ env:
+ LLVM_VER: 10
+ COMPILER: ${{ matrix.compiler }}
+ CHECK_TYPE: ${{ matrix.check_type }}
+ steps:
+ - name: Checkout code into workspace directory
+ uses: actions/checkout@v2
+ - name: Setup prerequisites
+ run: bash ./ci/prerequisites.sh
+ - name: Build
+ run: bash ./ci/build.sh
+ - name: Check
+ run: bash ./ci/check.sh
+ - name: Check output
+ run: bash ./ci/check_output.sh
+ if: ${{ success() || failure() }}
diff --git a/ci/build.sh b/ci/build.sh
new file mode 100644
index 00000000000..f541929e69c
--- /dev/null
+++ b/ci/build.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -eu
+
+if [ $COMPILER = "clang" ]; then
+ export CC=clang-$LLVM_VER
+else
+ export CC=gcc
+fi
+
+# configure & build
+if [ $CHECK_TYPE = "debug" ]; then
+ CFLAGS="-O0" ./configure --enable-debug --enable-cassert --enable-tap-tests --with-icu
+else
+ ./configure --disable-debug --disable-cassert --enable-tap-tests --with-icu
+fi
+
+make -sj4
+cd contrib
+make -sj4
+cd ..
diff --git a/ci/check.sh b/ci/check.sh
new file mode 100644
index 00000000000..faa8c25e84a
--- /dev/null
+++ b/ci/check.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -eu
+
+# unsets limit for coredumps size
+ulimit -c unlimited -S
+# sets a coredump file pattern
+mkdir -p /tmp/cores-$GITHUB_SHA-$TIMESTAMP
+sudo sh -c "echo \"/tmp/cores-$GITHUB_SHA-$TIMESTAMP/%t_%p_%s.core\" > /proc/sys/kernel/core_pattern"
+
+make check-world -j4
diff --git a/ci/check_output.sh b/ci/check_output.sh
new file mode 100644
index 00000000000..ae26cf63d68
--- /dev/null
+++ b/ci/check_output.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -eu
+
+status=0
+
+# show diff if it exists
+for f in ` find . -name regression.diffs ` ; do
+ echo "========= Contents of $f"
+ cat $f
+ status=1
+done
+
+# check core dumps if any
+cores=$(find /tmp/cores-$GITHUB_SHA-$TIMESTAMP/ -name '*.core' 2>/dev/null)
+
+if [ -n "$cores" ]; then
+ for corefile in $cores ; do
+ if [[ $corefile != *_3.core ]]; then
+ binary=$(gdb -quiet -core $corefile -batch -ex 'info auxv' | grep AT_EXECFN | perl -pe "s/^.*\"(.*)\"\$/\$1/g")
+ echo dumping $corefile for $binary
+ gdb --batch --quiet -ex "thread apply all bt full" -ex "quit" $binary $corefile
+ status=1
+ fi
+ done
+fi
+
+rm -rf /tmp/cores-$GITHUB_SHA-$TIMESTAMP
+
+exit $status
diff --git a/ci/prerequisites.sh b/ci/prerequisites.sh
new file mode 100644
index 00000000000..b26251b711c
--- /dev/null
+++ b/ci/prerequisites.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -eu
+
+# print the hostname to be able to identify runner by logs
+echo "HOSTNAME=`hostname`"
+TIMESTAMP=$(date +%s)
+echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_ENV
+echo "TIMESTAMP=$TIMESTAMP"
+
+sudo apt-get -y install -qq wget ca-certificates
+
+sudo apt-get update -qq
+
+apt_packages="build-essential flex bison pkg-config libreadline-dev make gdb libipc-run-perl libicu-dev python3 python3-dev python3-pip python3-setuptools python3-testresources"
+
+if [ $COMPILER = "clang" ]; then
+ apt_packages="$apt_packages llvm-$LLVM_VER clang-$LLVM_VER clang-tools-$LLVM_VER"
+fi
+
+# install required packages
+sudo apt-get -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -y install -qq $apt_packages
diff --git a/configure b/configure
index 71155f46e0d..f66a81e682d 100755
--- a/configure
+++ b/configure
@@ -628,6 +628,7 @@ ac_includes_default="\
ac_subst_vars='LTLIBOBJS
vpath_build
PG_SYSROOT
+ORIOLEDB_PATCHSET_VERSION
PG_VERSION_NUM
LDFLAGS_EX_BE
PROVE
@@ -6657,6 +6658,99 @@ fi
if test -n "$NOT_THE_CFLAGS"; then
CFLAGS="$CFLAGS -Wno-cast-function-type-strict"
fi
+ if test x"$host_cpu" == x"aarch64"; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -moutline-atomics, for CFLAGS" >&5
+$as_echo_n "checking whether ${CC} supports -moutline-atomics, for CFLAGS... " >&6; }
+if ${pgac_cv_prog_CC_cflags__moutline_atomics+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ pgac_save_CFLAGS=$CFLAGS
+pgac_save_CC=$CC
+CC=${CC}
+CFLAGS="${CFLAGS} -moutline-atomics"
+ac_save_c_werror_flag=$ac_c_werror_flag
+ac_c_werror_flag=yes
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv_prog_CC_cflags__moutline_atomics=yes
+else
+ pgac_cv_prog_CC_cflags__moutline_atomics=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_c_werror_flag=$ac_save_c_werror_flag
+CFLAGS="$pgac_save_CFLAGS"
+CC="$pgac_save_CC"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__moutline_atomics" >&5
+$as_echo "$pgac_cv_prog_CC_cflags__moutline_atomics" >&6; }
+if test x"$pgac_cv_prog_CC_cflags__moutline_atomics" = x"yes"; then
+ CFLAGS="${CFLAGS} -moutline-atomics"
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS" >&5
+$as_echo_n "checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS... " >&6; }
+if ${pgac_cv_prog_CXX_cxxflags__moutline_atomics+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ pgac_save_CXXFLAGS=$CXXFLAGS
+pgac_save_CXX=$CXX
+CXX=${CXX}
+CXXFLAGS="${CXXFLAGS} -moutline-atomics"
+ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+ac_cxx_werror_flag=yes
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ pgac_cv_prog_CXX_cxxflags__moutline_atomics=yes
+else
+ pgac_cv_prog_CXX_cxxflags__moutline_atomics=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+CXXFLAGS="$pgac_save_CXXFLAGS"
+CXX="$pgac_save_CXX"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&5
+$as_echo "$pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&6; }
+if test x"$pgac_cv_prog_CXX_cxxflags__moutline_atomics" = x"yes"; then
+ CXXFLAGS="${CXXFLAGS} -moutline-atomics"
+fi
+
+
+ fi
elif test "$ICC" = yes; then
# Intel's compiler has a bug/misoptimization in checking for
# division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -15706,7 +15800,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
-for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
+for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@@ -19386,6 +19480,10 @@ _ACEOF
+# Needed to check postgresql patches git tag during orioledb extension build
+ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`
+
+
# If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not
# literally, so that it's possible to override it at build time using
# a command like "make ... PG_SYSROOT=path". This has to be done after
diff --git a/configure.ac b/configure.ac
index bf0cfdf2ba9..e4ebb267341 100644
--- a/configure.ac
+++ b/configure.ac
@@ -579,6 +579,10 @@ if test "$GCC" = yes -a "$ICC" = no; then
if test -n "$NOT_THE_CFLAGS"; then
CFLAGS="$CFLAGS -Wno-cast-function-type-strict"
fi
+ if test x"$host_cpu" == x"aarch64"; then
+ PGAC_PROG_CC_CFLAGS_OPT([-moutline-atomics])
+ PGAC_PROG_CXX_CFLAGS_OPT([-moutline-atomics])
+ fi
elif test "$ICC" = yes; then
# Intel's compiler has a bug/misoptimization in checking for
# division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -1804,7 +1808,6 @@ AC_CHECK_FUNCS(m4_normalize([
memset_s
posix_fallocate
ppoll
- pthread_is_threaded_np
setproctitle
setproctitle_fast
strchrnul
@@ -2414,6 +2417,10 @@ $AWK '{printf "%d%04d", $1, $2}'`"]
AC_DEFINE_UNQUOTED(PG_VERSION_NUM, $PG_VERSION_NUM, [PostgreSQL version as a number])
AC_SUBST(PG_VERSION_NUM)
+# Needed to check postgresql patches git tag during orioledb extension build
+[ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`]
+AC_SUBST(ORIOLEDB_PATCHSET_VERSION)
+
# If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not
# literally, so that it's possible to override it at build time using
# a command like "make ... PG_SYSROOT=path". This has to be done after
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
index b90145148d4..99aed8f9948 100644
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -172,7 +172,7 @@ blbuildempty(Relation index)
*/
bool
blinsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -189,6 +189,7 @@ blinsert(Relation index, Datum *values, bool *isnull,
BlockNumber blkno = InvalidBlockNumber;
OffsetNumber nStart;
GenericXLogState *state;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"Bloom insert temporary context",
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index 330811ec608..15ef1b9aee2 100644
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -189,7 +189,7 @@ extern bool blvalidate(Oid opclassoid);
/* index access method interface functions */
extern bool blinsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index f23fbb1d9e0..d92858a3433 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -130,7 +130,8 @@ blhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = blbuild;
amroutine->ambuildempty = blbuildempty;
- amroutine->aminsert = blinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = blinsert;
amroutine->ambulkdelete = blbulkdelete;
amroutine->amvacuumcleanup = blvacuumcleanup;
amroutine->amcanreturn = NULL;
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 0f0252558c5..d1ac2fd85ee 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -364,6 +364,7 @@ tuple_data_split_internal(Oid relid, char *tupdata,
*/
if (VARATT_IS_EXTERNAL(tupdata + off) &&
!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
+ !VARATT_IS_EXTERNAL_ORIOLEDB(tupdata + off) &&
!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c
index 12d1d0505d7..dedc4be074f 100644
--- a/contrib/test_decoding/test_decoding.c
+++ b/contrib/test_decoding/test_decoding.c
@@ -578,7 +578,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_
/* print data */
if (isnull)
appendStringInfoString(s, "null");
- else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval))
+ else if (typisvarlena && (VARATT_IS_EXTERNAL_ONDISK(origval) || VARATT_IS_EXTERNAL_ORIOLEDB(origval)))
appendStringInfoString(s, "unchanged-toast-datum");
else if (!typisvarlena)
print_literal(s, typid,
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 30eda37afa8..cee79776683 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -139,6 +139,7 @@ typedef struct IndexAmRoutine
ambuild_function ambuild;
ambuildempty_function ambuildempty;
aminsert_function aminsert;
+ aminsert_extended_function aminsertextended;
ambulkdelete_function ambulkdelete;
amvacuumcleanup_function amvacuumcleanup;
amcanreturn_function amcanreturn; /* can be NULL */
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 2de747ec37f..01d20462e33 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,16 @@ PostgreSQL documentation
+
+
+
+
+
+ Load shared library that performs custom rewind for postgres extension. The path may be full or relative to PKGLIBDIR. File extension is optional. Multiple extensions can be selected by multiple switches.
+
+
+
+
diff --git a/meson.build b/meson.build
index 56454cc3395..4158f96ad41 100644
--- a/meson.build
+++ b/meson.build
@@ -153,6 +153,8 @@ cdata.set('PG_VERSION_NUM', pg_version_num)
# PG_VERSION_STR is built later, it depends on compiler test results
cdata.set_quoted('CONFIGURE_ARGS', '')
+git_describe_tags = run_command('git', 'describe', '--tags', check: true)
+orioledb_patchset_version = git_describe_tags.stdout().strip().split('_')[1]
###############################################################
@@ -2539,7 +2541,6 @@ func_checks = [
['posix_fallocate'],
['ppoll'],
['pthread_barrier_wait', {'dependencies': [thread_dep]}],
- ['pthread_is_threaded_np', {'dependencies': [thread_dep]}],
['sem_init', {'dependencies': [rt_dep, thread_dep], 'skip': sema_kind != 'unnamed_posix', 'define': false}],
['setproctitle', {'dependencies': [util_dep]}],
['setproctitle_fast'],
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index cc4dc6de91e..ccae8c39d87 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -42,6 +42,9 @@ VERSION_NUM = @PG_VERSION_NUM@
PACKAGE_URL = @PACKAGE_URL@
+# OrioleDB patchset git tag number
+ORIOLEDB_PATCHSET_VERSION = @ORIOLEDB_PATCHSET_VERSION@
+
# Set top_srcdir, srcdir, and VPATH.
ifdef PGXS
top_srcdir = $(top_builddir)
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index a257903991d..38469a5a554 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -116,7 +116,8 @@ brinhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = brinbuild;
amroutine->ambuildempty = brinbuildempty;
- amroutine->aminsert = brininsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = brininsert;
amroutine->ambulkdelete = brinbulkdelete;
amroutine->amvacuumcleanup = brinvacuumcleanup;
amroutine->amcanreturn = NULL;
@@ -154,7 +155,7 @@ brinhandler(PG_FUNCTION_ARGS)
*/
bool
brininsert(Relation idxRel, Datum *values, bool *nulls,
- ItemPointer heaptid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -168,6 +169,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
MemoryContext tupcxt = NULL;
MemoryContext oldcxt = CurrentMemoryContext;
bool autosummarize = BrinGetAutoSummarize(idxRel);
+ ItemPointer heaptid = DatumGetItemPointer(tupleid);
revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
index 108e0126a14..f54dcb03517 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -26,9 +26,10 @@ static struct varlena *toast_fetch_datum(struct varlena *attr);
static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
-static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
+static ToastFunc o_detoast_func = NULL;
+
/* ----------
* detoast_external_attr -
*
@@ -46,7 +47,7 @@ detoast_external_attr(struct varlena *attr)
{
struct varlena *result;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
{
/*
* This is an external stored plain value
@@ -115,7 +116,7 @@ detoast_external_attr(struct varlena *attr)
struct varlena *
detoast_attr(struct varlena *attr)
{
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
{
/*
* This is an externally stored datum --- fetch it back from there
@@ -223,7 +224,14 @@ detoast_attr_slice(struct varlena *attr,
else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
slicelength = slicelimit = -1;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ Assert(o_detoast_func != NULL);
+ preslice = o_detoast_func(attr);
+ if (preslice == NULL)
+ elog(ERROR, "unexpected NULL detoast result");
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
@@ -332,6 +340,18 @@ detoast_attr_slice(struct varlena *attr,
return result;
}
+void
+register_o_detoast_func(ToastFunc func)
+{
+ o_detoast_func = func;
+}
+
+void
+deregister_o_detoast_func()
+{
+ o_detoast_func = NULL;
+}
+
/* ----------
* toast_fetch_datum -
*
@@ -347,6 +367,17 @@ toast_fetch_datum(struct varlena *attr)
struct varatt_external toast_pointer;
int32 attrsize;
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ if (o_detoast_func != NULL)
+ {
+ result = o_detoast_func(attr);
+ if (result == NULL)
+ elog(ERROR, "unexpected NULL detoast result");
+ return result;
+ }
+ }
+
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
@@ -467,7 +498,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
*
* Decompress a compressed version of a varlena datum
*/
-static struct varlena *
+struct varlena *
toast_decompress_datum(struct varlena *attr)
{
ToastCompressionId cmid;
@@ -547,11 +578,17 @@ toast_raw_datum_size(Datum value)
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+ result = toasted->raw_size + VARHDRSZ;
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
- /* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
+ /* va_rawsize is the size of the original datum -- including header */
+
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
}
@@ -603,7 +640,12 @@ toast_datum_size(Datum value)
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+ result = toasted->toasted_size - VARHDRSZ;
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* Attribute is stored externally - return the extsize whether
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 6bedbdf07ff..75d9c272177 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -756,6 +756,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
case TableOidAttributeNumber:
result = ObjectIdGetDatum(tup->t_tableOid);
break;
+ case RowIdAttributeNumber:
+ *isnull = true;
+ result = 0;
+ break;
default:
elog(ERROR, "invalid attnum: %d", attnum);
result = 0; /* keep compiler quiet */
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 469de9bb49f..a19499af976 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -24,6 +24,7 @@
#include "access/nbtree.h"
#include "access/reloptions.h"
#include "access/spgist_private.h"
+#include "access/tableam.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "commands/tablespace.h"
@@ -1379,7 +1380,7 @@ untransformRelOptions(Datum options)
*/
bytea *
extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
- amoptions_function amoptions)
+ const TableAmRoutine *tableam, amoptions_function amoptions)
{
bytea *options;
bool isnull;
@@ -1401,7 +1402,8 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
- options = heap_reloptions(classForm->relkind, datum, false);
+ options = tableam_reloptions(tableam, classForm->relkind,
+ datum, false);
break;
case RELKIND_PARTITIONED_TABLE:
options = partitioned_table_reloptions(datum, false);
diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c
index 4cf956a759c..4b281ed438d 100644
--- a/src/backend/access/common/toast_compression.c
+++ b/src/backend/access/common/toast_compression.c
@@ -262,7 +262,12 @@ toast_get_compression_id(struct varlena *attr)
* the external toast pointer. If compressed inline, fetch it from the
* toast compression header.
*/
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+ cmid = toasted->formatFlags >> ORIOLEDB_EXT_FORMAT_FLAGS_BITS;
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index 588825ed85d..9b6a5d9091c 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -240,7 +240,7 @@ toast_save_datum(Relation rel, Datum value,
{
struct varatt_external old_toast_pointer;
- Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
+ Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal) || VARATT_IS_EXTERNAL_ORIOLEDB(oldexternal));
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
@@ -396,7 +396,7 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative)
int validIndex;
SnapshotData SnapshotToast;
- if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr) && !VARATT_IS_EXTERNAL_ORIOLEDB(attr))
return;
/* Must copy to access aligned fields */
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 56968b95acf..36815547151 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -484,7 +484,7 @@ ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
bool
gininsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -493,6 +493,7 @@ gininsert(Relation index, Datum *values, bool *isnull,
MemoryContext oldCtx;
MemoryContext insertCtx;
int i;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* Initialize GinState cache if first call in this statement */
if (ginstate == NULL)
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 7a4cd93f301..52d9a725fc4 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -63,7 +63,8 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = ginbuild;
amroutine->ambuildempty = ginbuildempty;
- amroutine->aminsert = gininsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = gininsert;
amroutine->ambulkdelete = ginbulkdelete;
amroutine->amvacuumcleanup = ginvacuumcleanup;
amroutine->amcanreturn = NULL;
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 8ef5fa03290..73193f0970d 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -85,7 +85,8 @@ gisthandler(PG_FUNCTION_ARGS)
amroutine->ambuild = gistbuild;
amroutine->ambuildempty = gistbuildempty;
- amroutine->aminsert = gistinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = gistinsert;
amroutine->ambulkdelete = gistbulkdelete;
amroutine->amvacuumcleanup = gistvacuumcleanup;
amroutine->amcanreturn = gistcanreturn;
@@ -156,7 +157,7 @@ gistbuildempty(Relation index)
*/
bool
gistinsert(Relation r, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -164,6 +165,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
IndexTuple itup;
MemoryContext oldCxt;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* Initialize GISTSTATE cache if first call in this statement */
if (giststate == NULL)
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index fc5d97f606e..ffddf7b900c 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -82,7 +82,8 @@ hashhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = hashbuild;
amroutine->ambuildempty = hashbuildempty;
- amroutine->aminsert = hashinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = hashinsert;
amroutine->ambulkdelete = hashbulkdelete;
amroutine->amvacuumcleanup = hashvacuumcleanup;
amroutine->amcanreturn = NULL;
@@ -247,7 +248,7 @@ hashbuildCallback(Relation index,
*/
bool
hashinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -255,6 +256,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
Datum index_values[1];
bool index_isnull[1];
IndexTuple itup;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* convert data to a hash key; on failure, do not insert anything */
if (!_hash_convert_tuple(rel,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 64f84a2e4bd..43d2bbcf84b 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2512,10 +2512,11 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
}
/*
- * heap_delete - delete a tuple
+ * heap_delete - delete a tuple, optionally fetching it into a slot
*
* See table_tuple_delete() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot. Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -2524,8 +2525,9 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
*/
TM_Result
heap_delete(Relation relation, ItemPointer tid,
- CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, bool changingPart)
+ CommandId cid, Snapshot crosscheck, int options,
+ TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
@@ -2603,7 +2605,7 @@ heap_delete(Relation relation, ItemPointer tid,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to delete invisible tuple")));
}
- else if (result == TM_BeingModified && wait)
+ else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{
TransactionId xwait;
uint16 infomask;
@@ -2744,7 +2746,30 @@ heap_delete(Relation relation, ItemPointer tid,
tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
else
tmfd->cmax = InvalidCommandId;
- UnlockReleaseBuffer(buffer);
+
+ /*
+ * If we're asked to lock the updated tuple, we just fetch the
+ * existing tuple. That let's the caller save some resources on
+ * placing the lock.
+ */
+ if (result == TM_Updated &&
+ (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ bslot->base.tupdata = tp;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ UnlockReleaseBuffer(buffer);
+ }
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
@@ -2918,8 +2943,24 @@ heap_delete(Relation relation, ItemPointer tid,
*/
CacheInvalidateHeapTuple(relation, &tp, NULL);
- /* Now we can release the buffer */
- ReleaseBuffer(buffer);
+ /* Fetch the old tuple version if we're asked for that. */
+ if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ bslot->base.tupdata = tp;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ /* Now we can release the buffer */
+ ReleaseBuffer(buffer);
+ }
/*
* Release the lmgr tuple lock, if we had it.
@@ -2951,8 +2992,8 @@ simple_heap_delete(Relation relation, ItemPointer tid)
result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, false /* changingPart */ );
+ TABLE_MODIFY_WAIT /* wait for commit */ ,
+ &tmfd, false /* changingPart */ , NULL);
switch (result)
{
case TM_SelfModified:
@@ -2979,10 +3020,11 @@ simple_heap_delete(Relation relation, ItemPointer tid)
}
/*
- * heap_update - replace a tuple
+ * heap_update - replace a tuple, optionally fetching it into a slot
*
* See table_tuple_update() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot. Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -2991,9 +3033,9 @@ simple_heap_delete(Relation relation, ItemPointer tid)
*/
TM_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
+ CommandId cid, Snapshot crosscheck, int options,
TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
@@ -3170,7 +3212,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
/* see below about the "no wait" case */
- Assert(result != TM_BeingModified || wait);
+ Assert(result != TM_BeingModified || (options & TABLE_MODIFY_WAIT));
if (result == TM_Invisible)
{
@@ -3179,7 +3221,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to update invisible tuple")));
}
- else if (result == TM_BeingModified && wait)
+ else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{
TransactionId xwait;
uint16 infomask;
@@ -3383,7 +3425,30 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
else
tmfd->cmax = InvalidCommandId;
- UnlockReleaseBuffer(buffer);
+
+ /*
+ * If we're asked to lock the updated tuple, we just fetch the
+ * existing tuple. That let's the caller save some resouces on
+ * placing the lock.
+ */
+ if (result == TM_Updated &&
+ (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ bslot->base.tupdata = oldtup;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ UnlockReleaseBuffer(buffer);
+ }
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
if (vmbuffer != InvalidBuffer)
@@ -3862,7 +3927,26 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
/* Now we can release the buffer(s) */
if (newbuf != buffer)
ReleaseBuffer(newbuf);
- ReleaseBuffer(buffer);
+
+ /* Fetch the old tuple version if we're asked for that. */
+ if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ bslot->base.tupdata = oldtup;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ /* Now we can release the buffer */
+ ReleaseBuffer(buffer);
+ }
+
if (BufferIsValid(vmbuffer_new))
ReleaseBuffer(vmbuffer_new);
if (BufferIsValid(vmbuffer))
@@ -4070,8 +4154,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
result = heap_update(relation, otid, tup,
GetCurrentCommandId(true), InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ TABLE_MODIFY_WAIT /* wait for commit */ ,
+ &tmfd, &lockmode, update_indexes, NULL);
switch (result)
{
case TM_SelfModified:
@@ -4134,12 +4218,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* tuples.
*
* Output parameters:
- * *tuple: all fields filled in
- * *buffer: set to buffer holding tuple (pinned but not locked at exit)
+ * *slot: BufferHeapTupleTableSlot filled with tuple
* *tmfd: filled in failure cases (see below)
*
* Function results are the same as the ones for table_tuple_lock().
*
+ * If *slot already contains the target tuple, it takes advantage on that by
+ * skipping the ReadBuffer() call.
+ *
* In the failure cases other than TM_Invisible, the routine fills
* *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
* if necessary), and t_cmax (the last only for TM_SelfModified,
@@ -4150,15 +4236,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* See README.tuplock for a thorough explanation of this mechanism.
*/
TM_Result
-heap_lock_tuple(Relation relation, HeapTuple tuple,
+heap_lock_tuple(Relation relation, ItemPointer tid, TupleTableSlot *slot,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
- bool follow_updates,
- Buffer *buffer, TM_FailureData *tmfd)
+ bool follow_updates, TM_FailureData *tmfd)
{
TM_Result result;
- ItemPointer tid = &(tuple->t_self);
ItemId lp;
Page page;
+ Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
BlockNumber block;
TransactionId xid,
@@ -4170,8 +4255,24 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
bool skip_tuple_lock = false;
bool have_tuple_lock = false;
bool cleared_all_frozen = false;
+ BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+ HeapTuple tuple = &bslot->base.tupdata;
+
+ Assert(TTS_IS_BUFFERTUPLE(slot));
- *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+ /* Take advantage if slot already contains the relevant tuple */
+ if (!TTS_EMPTY(slot) &&
+ slot->tts_tableOid == relation->rd_id &&
+ ItemPointerCompare(&slot->tts_tid, tid) == 0 &&
+ BufferIsValid(bslot->buffer))
+ {
+ buffer = bslot->buffer;
+ IncrBufferRefCount(buffer);
+ }
+ else
+ {
+ buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+ }
block = ItemPointerGetBlockNumber(tid);
/*
@@ -4180,21 +4281,22 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* in the middle of changing this, so we'll need to recheck after we have
* the lock.
*/
- if (PageIsAllVisible(BufferGetPage(*buffer)))
+ if (PageIsAllVisible(BufferGetPage(buffer)))
visibilitymap_pin(relation, block, &vmbuffer);
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
- page = BufferGetPage(*buffer);
+ page = BufferGetPage(buffer);
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
Assert(ItemIdIsNormal(lp));
+ tuple->t_self = *tid;
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
l3:
- result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
+ result = HeapTupleSatisfiesUpdate(tuple, cid, buffer);
if (result == TM_Invisible)
{
@@ -4223,7 +4325,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
infomask2 = tuple->t_data->t_infomask2;
ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/*
* If any subtransaction of the current top transaction already holds
@@ -4375,12 +4477,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = res;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
}
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4415,7 +4517,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
!HEAP_XMAX_IS_EXCL_LOCKED(infomask))
{
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4443,7 +4545,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* No conflict, but if the xmax changed under us in the
* meantime, start over.
*/
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait))
@@ -4455,7 +4557,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
}
else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
{
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* if the xmax changed in the meantime, start over */
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
@@ -4483,7 +4585,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
TransactionIdIsCurrentTransactionId(xwait))
{
/* ... but if the xmax changed in the meantime, start over */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait))
@@ -4505,7 +4607,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
*/
if (require_sleep && (result == TM_Updated || result == TM_Deleted))
{
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
else if (require_sleep)
@@ -4530,7 +4632,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
*/
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
@@ -4556,7 +4658,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
break;
@@ -4596,7 +4698,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
break;
@@ -4622,12 +4724,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = res;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
}
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* xwait is done, but if xwait had just locked the tuple then some
@@ -4649,7 +4751,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* don't check for this in the multixact case, because some
* locker transactions might still be running.
*/
- UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
+ UpdateXmaxHintBits(tuple->t_data, buffer, xwait);
}
}
@@ -4708,9 +4810,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
*/
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto l3;
}
@@ -4773,7 +4875,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
cleared_all_frozen = true;
- MarkBufferDirty(*buffer);
+ MarkBufferDirty(buffer);
/*
* XLOG stuff. You might think that we don't need an XLOG record because
@@ -4793,7 +4895,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
XLogRecPtr recptr;
XLogBeginInsert();
- XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
xlrec.xmax = xid;
@@ -4814,7 +4916,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
result = TM_Ok;
out_locked:
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
out_unlocked:
if (BufferIsValid(vmbuffer))
@@ -4832,6 +4934,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
if (have_tuple_lock)
UnlockTupleTuplock(relation, tid, mode);
+ /* Put the target tuple to the slot */
+ ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
+
return result;
}
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 5a17112c91e..a32fc3b69fb 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -23,6 +23,7 @@
#include "access/heapam.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
+#include "access/reloptions.h"
#include "access/rewriteheap.h"
#include "access/syncscan.h"
#include "access/tableam.h"
@@ -45,6 +46,12 @@
#include "utils/builtins.h"
#include "utils/rel.h"
+static TM_Result heapam_tuple_lock(Relation relation, Datum tid,
+ Snapshot snapshot, TupleTableSlot *slot,
+ CommandId cid, LockTupleMode mode,
+ LockWaitPolicy wait_policy, uint8 flags,
+ TM_FailureData *tmfd);
+
static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
Datum *values, bool *isnull, RewriteState rwstate);
@@ -69,6 +76,20 @@ heapam_slot_callbacks(Relation relation)
return &TTSOpsBufferHeapTuple;
}
+static RowRefType
+heapam_get_row_ref_type(Relation rel)
+{
+ return ROW_REF_TID;
+}
+
+static void
+heapam_free_rd_amcache(Relation rel)
+{
+ if (rel->rd_amcache)
+ pfree(rel->rd_amcache);
+ rel->rd_amcache = NULL;
+}
+
/* ------------------------------------------------------------------------
* Index Scan Callbacks for heap AM
@@ -110,7 +131,7 @@ heapam_index_fetch_end(IndexFetchTableData *scan)
static bool
heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
bool *call_again, bool *all_dead)
@@ -118,6 +139,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
bool got_heap_tuple;
+ ItemPointer tid = DatumGetItemPointer(tupleid);
Assert(TTS_IS_BUFFERTUPLE(slot));
@@ -178,7 +200,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
static bool
heapam_fetch_row_version(Relation relation,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot)
{
@@ -187,7 +209,7 @@ heapam_fetch_row_version(Relation relation,
Assert(TTS_IS_BUFFERTUPLE(slot));
- bslot->base.tupdata.t_self = *tid;
+ bslot->base.tupdata.t_self = *DatumGetItemPointer(tupleid);
if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
{
/* store in slot, transferring existing pin */
@@ -237,7 +259,7 @@ heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
* ----------------------------------------------------------------------------
*/
-static void
+static TupleTableSlot *
heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
int options, BulkInsertState bistate)
{
@@ -254,6 +276,8 @@ heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
if (shouldFree)
pfree(tuple);
+
+ return slot;
}
static void
@@ -296,36 +320,341 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
pfree(tuple);
}
+/*
+ * ExecCheckTupleVisible -- verify tuple is visible
+ *
+ * It would not be consistent with guarantees of the higher isolation levels to
+ * proceed with avoiding insertion (taking speculative insertion's alternative
+ * path) on the basis of another tuple that is not visible to MVCC snapshot.
+ * Check for the need to raise a serialization failure, and do so as necessary.
+ */
+static void
+ExecCheckTupleVisible(EState *estate,
+ Relation rel,
+ TupleTableSlot *slot)
+{
+ if (!IsolationUsesXactSnapshot())
+ return;
+
+ if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
+ {
+ Datum xminDatum;
+ TransactionId xmin;
+ bool isnull;
+
+ xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ xmin = DatumGetTransactionId(xminDatum);
+
+ /*
+ * We should not raise a serialization failure if the conflict is
+ * against a tuple inserted by our own transaction, even if it's not
+ * visible to our snapshot. (This would happen, for example, if
+ * conflicting keys are proposed for insertion in a single command.)
+ */
+ if (!TransactionIdIsCurrentTransactionId(xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent update")));
+ }
+}
+
+/*
+ * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
+ */
+static void
+ExecCheckTIDVisible(EState *estate,
+ Relation rel,
+ ItemPointer tid,
+ TupleTableSlot *tempSlot)
+{
+ /* Redundantly check isolation level */
+ if (!IsolationUsesXactSnapshot())
+ return;
+
+ if (!table_tuple_fetch_row_version(rel, PointerGetDatum(tid),
+ SnapshotAny, tempSlot))
+ elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
+ ExecCheckTupleVisible(estate, rel, tempSlot);
+ ExecClearTuple(tempSlot);
+}
+
+static inline TupleTableSlot *
+heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ CommandId cid, int options,
+ struct BulkInsertStateData *bistate,
+ List *arbiterIndexes,
+ EState *estate,
+ LockTupleMode lockmode,
+ TupleTableSlot *lockedSlot,
+ TupleTableSlot *tempSlot)
+{
+ Relation rel = resultRelInfo->ri_RelationDesc;
+ uint32 specToken;
+ ItemPointerData conflictTid;
+ bool specConflict;
+ List *recheckIndexes = NIL;
+
+ while (true)
+ {
+ specConflict = false;
+ if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, &conflictTid,
+ arbiterIndexes))
+ {
+ if (lockedSlot)
+ {
+ TM_Result test;
+ TM_FailureData tmfd;
+ Datum xminDatum;
+ TransactionId xmin;
+ bool isnull;
+
+ /* Determine lock mode to use */
+ lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+
+ /*
+ * Lock tuple for update. Don't follow updates when tuple cannot be
+ * locked without doing so. A row locking conflict here means our
+ * previous conclusion that the tuple is conclusively committed is not
+ * true anymore.
+ */
+ test = table_tuple_lock(rel, PointerGetDatum(&conflictTid),
+ estate->es_snapshot,
+ lockedSlot, estate->es_output_cid,
+ lockmode, LockWaitBlock, 0,
+ &tmfd);
+ switch (test)
+ {
+ case TM_Ok:
+ /* success! */
+ break;
+
+ case TM_Invisible:
+
+ /*
+ * This can occur when a just inserted tuple is updated again in
+ * the same command. E.g. because multiple rows with the same
+ * conflicting key values are inserted.
+ *
+ * This is somewhat similar to the ExecUpdate() TM_SelfModified
+ * case. We do not want to proceed because it would lead to the
+ * same row being updated a second time in some unspecified order,
+ * and in contrast to plain UPDATEs there's no historical behavior
+ * to break.
+ *
+ * It is the user's responsibility to prevent this situation from
+ * occurring. These problems are why the SQL standard similarly
+ * specifies that for SQL MERGE, an exception must be raised in
+ * the event of an attempt to update the same row twice.
+ */
+ xminDatum = slot_getsysattr(lockedSlot,
+ MinTransactionIdAttributeNumber,
+ &isnull);
+ Assert(!isnull);
+ xmin = DatumGetTransactionId(xminDatum);
+
+ if (TransactionIdIsCurrentTransactionId(xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_CARDINALITY_VIOLATION),
+ /* translator: %s is a SQL command name */
+ errmsg("%s command cannot affect row a second time",
+ "ON CONFLICT DO UPDATE"),
+ errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
+
+ /* This shouldn't happen */
+ elog(ERROR, "attempted to lock invisible tuple");
+ break;
+
+ case TM_SelfModified:
+
+ /*
+ * This state should never be reached. As a dirty snapshot is used
+ * to find conflicting tuples, speculative insertion wouldn't have
+ * seen this row to conflict with.
+ */
+ elog(ERROR, "unexpected self-updated tuple");
+ break;
+
+ case TM_Updated:
+ if (IsolationUsesXactSnapshot())
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent update")));
+
+ /*
+ * As long as we don't support an UPDATE of INSERT ON CONFLICT for
+ * a partitioned table we shouldn't reach to a case where tuple to
+ * be lock is moved to another partition due to concurrent update
+ * of the partition key.
+ */
+ Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
+
+ /*
+ * Tell caller to try again from the very start.
+ *
+ * It does not make sense to use the usual EvalPlanQual() style
+ * loop here, as the new version of the row might not conflict
+ * anymore, or the conflicting tuple has actually been deleted.
+ */
+ ExecClearTuple(lockedSlot);
+ return false;
+
+ case TM_Deleted:
+ if (IsolationUsesXactSnapshot())
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent delete")));
+
+ /* see TM_Updated case */
+ Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
+ ExecClearTuple(lockedSlot);
+ return false;
+
+ default:
+ elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+ }
+
+ /* Success, the tuple is locked. */
+
+ /*
+ * Verify that the tuple is visible to our MVCC snapshot if the current
+ * isolation level mandates that.
+ *
+ * It's not sufficient to rely on the check within ExecUpdate() as e.g.
+ * CONFLICT ... WHERE clause may prevent us from reaching that.
+ *
+ * This means we only ever continue when a new command in the current
+ * transaction could see the row, even though in READ COMMITTED mode the
+ * tuple will not be visible according to the current statement's
+ * snapshot. This is in line with the way UPDATE deals with newer tuple
+ * versions.
+ */
+ ExecCheckTupleVisible(estate, rel, lockedSlot);
+ return NULL;
+ }
+ else
+ {
+ ExecCheckTIDVisible(estate, rel, &conflictTid, tempSlot);
+ return NULL;
+ }
+ }
+
+ /*
+ * Before we start insertion proper, acquire our "speculative
+ * insertion lock". Others can use that to wait for us to decide
+ * if we're going to go ahead with the insertion, instead of
+ * waiting for the whole transaction to complete.
+ */
+ specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
+
+ /* insert the tuple, with the speculative token */
+ heapam_tuple_insert_speculative(rel, slot,
+ estate->es_output_cid,
+ 0,
+ NULL,
+ specToken);
+
+ /* insert index entries for tuple */
+ recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+ slot, estate, false, true,
+ &specConflict,
+ arbiterIndexes,
+ false);
+
+ /* adjust the tuple's state accordingly */
+ heapam_tuple_complete_speculative(rel, slot,
+ specToken, !specConflict);
+
+ /*
+ * Wake up anyone waiting for our decision. They will re-check
+ * the tuple, see that it's no longer speculative, and wait on our
+ * XID as if this was a regularly inserted tuple all along. Or if
+ * we killed the tuple, they will see it's dead, and proceed as if
+ * the tuple never existed.
+ */
+ SpeculativeInsertionLockRelease(GetCurrentTransactionId());
+
+ /*
+ * If there was a conflict, start from the beginning. We'll do
+ * the pre-check again, which will now find the conflicting tuple
+ * (unless it aborts before we get there).
+ */
+ if (specConflict)
+ {
+ list_free(recheckIndexes);
+ CHECK_FOR_INTERRUPTS();
+ continue;
+ }
+
+ return slot;
+ }
+}
+
static TM_Result
-heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
- Snapshot snapshot, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, bool changingPart)
+heapam_tuple_delete(Relation relation, Datum tupleid, CommandId cid,
+ Snapshot snapshot, Snapshot crosscheck, int options,
+ TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot)
{
+ TM_Result result;
+ ItemPointer tid = DatumGetItemPointer(tupleid);
+
/*
* Currently Deleting of index tuples are handled at vacuum, in case if
* the storage itself is cleaning the dead tuples by itself, it is the
* time to call the index tuple deletion also.
*/
- return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
+ result = heap_delete(relation, tid, cid, crosscheck, options,
+ tmfd, changingPart, oldSlot);
+
+ /*
+ * If the tuple has been concurrently updated, then get the lock on it.
+ * (Do only if caller asked for this by setting the
+ * TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
+ * delete should succeed even if there are more concurrent update
+ * attempts.
+ */
+ if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ /*
+ * heapam_tuple_lock() will take advantage of tuple loaded into
+ * oldSlot by heap_delete().
+ */
+ result = heapam_tuple_lock(relation, tupleid, snapshot,
+ oldSlot, cid, LockTupleExclusive,
+ (options & TABLE_MODIFY_WAIT) ?
+ LockWaitBlock :
+ LockWaitSkip,
+ TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+ tmfd);
+
+ if (result == TM_Ok)
+ return TM_Updated;
+ }
+
+ return result;
}
static TM_Result
-heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
+heapam_tuple_update(Relation relation, Datum tupleid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
- bool wait, TM_FailureData *tmfd,
- LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+ int options, TM_FailureData *tmfd,
+ LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
TM_Result result;
+ ItemPointer otid = DatumGetItemPointer(tupleid);
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
- result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
- tmfd, lockmode, update_indexes);
+ result = heap_update(relation, otid, tuple, cid, crosscheck, options,
+ tmfd, lockmode, update_indexes, oldSlot);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/*
@@ -352,19 +681,44 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
if (shouldFree)
pfree(tuple);
+ /*
+ * If the tuple has been concurrently updated, then get the lock on it.
+ * (Do only if caller asked for this by setting the
+ * TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
+ * update should succeed even if there are more concurrent update
+ * attempts.
+ */
+ if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ /*
+ * heapam_tuple_lock() will take advantage of tuple loaded into
+ * oldSlot by heap_update().
+ */
+ result = heapam_tuple_lock(relation, tupleid, snapshot,
+ oldSlot, cid, *lockmode,
+ (options & TABLE_MODIFY_WAIT) ?
+ LockWaitBlock :
+ LockWaitSkip,
+ TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+ tmfd);
+
+ if (result == TM_Ok)
+ return TM_Updated;
+ }
+
return result;
}
static TM_Result
-heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
+heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot,
TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
TM_Result result;
- Buffer buffer;
HeapTuple tuple = &bslot->base.tupdata;
+ ItemPointer tid = DatumGetItemPointer(tupleid);
bool follow_updates;
follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
@@ -373,9 +727,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
Assert(TTS_IS_BUFFERTUPLE(slot));
tuple_lock_retry:
- tuple->t_self = *tid;
- result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
- follow_updates, &buffer, tmfd);
+ result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
+ follow_updates, tmfd);
if (result == TM_Updated &&
(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
@@ -383,8 +736,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
/* Should not encounter speculative tuple on recheck */
Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
- ReleaseBuffer(buffer);
-
if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
{
SnapshotData SnapshotDirty;
@@ -406,6 +757,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
InitDirtySnapshot(SnapshotDirty);
for (;;)
{
+ Buffer buffer = InvalidBuffer;
+
if (ItemPointerIndicatesMovedPartitions(tid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@@ -500,7 +853,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
/*
* This is a live tuple, so try to lock it again.
*/
- ReleaseBuffer(buffer);
+ ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
goto tuple_lock_retry;
}
@@ -511,7 +864,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
*/
if (tuple->t_data == NULL)
{
- Assert(!BufferIsValid(buffer));
+ ReleaseBuffer(buffer);
return TM_Deleted;
}
@@ -564,9 +917,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
- /* store in slot, transferring existing pin */
- ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
-
return result;
}
@@ -2536,6 +2886,29 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
}
}
+static bool
+heapam_tuple_is_current(Relation rel, TupleTableSlot *slot)
+{
+ Datum xminDatum;
+ TransactionId xmin;
+ bool isnull;
+
+ xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ xmin = DatumGetTransactionId(xminDatum);
+ return TransactionIdIsCurrentTransactionId(xmin);
+}
+
+static bytea *
+heapam_reloptions(char relkind, Datum reloptions, bool validate)
+{
+ if (relkind == RELKIND_RELATION ||
+ relkind == RELKIND_TOASTVALUE ||
+ relkind == RELKIND_MATVIEW)
+ return heap_reloptions(relkind, reloptions, validate);
+
+ return NULL;
+}
/* ------------------------------------------------------------------------
* Definition of the heap table access method.
@@ -2546,6 +2919,8 @@ static const TableAmRoutine heapam_methods = {
.type = T_TableAmRoutine,
.slot_callbacks = heapam_slot_callbacks,
+ .get_row_ref_type = heapam_get_row_ref_type,
+ .free_rd_amcache = heapam_free_rd_amcache,
.scan_begin = heap_beginscan,
.scan_end = heap_endscan,
@@ -2565,8 +2940,7 @@ static const TableAmRoutine heapam_methods = {
.index_fetch_tuple = heapam_index_fetch_tuple,
.tuple_insert = heapam_tuple_insert,
- .tuple_insert_speculative = heapam_tuple_insert_speculative,
- .tuple_complete_speculative = heapam_tuple_complete_speculative,
+ .tuple_insert_with_arbiter = heapam_tuple_insert_with_arbiter,
.multi_insert = heap_multi_insert,
.tuple_delete = heapam_tuple_delete,
.tuple_update = heapam_tuple_update,
@@ -2598,7 +2972,11 @@ static const TableAmRoutine heapam_methods = {
.scan_bitmap_next_block = heapam_scan_bitmap_next_block,
.scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
.scan_sample_next_block = heapam_scan_sample_next_block,
- .scan_sample_next_tuple = heapam_scan_sample_next_tuple
+ .scan_sample_next_tuple = heapam_scan_sample_next_tuple,
+
+ .tuple_is_current = heapam_tuple_is_current,
+
+ .reloptions = heapam_reloptions
};
diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c
index 8b02cdbe825..ed2b9fc9e68 100644
--- a/src/backend/access/index/amapi.c
+++ b/src/backend/access/index/amapi.c
@@ -16,25 +16,27 @@
#include "access/amapi.h"
#include "access/htup_details.h"
#include "catalog/pg_am.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_index.h"
#include "catalog/pg_opclass.h"
#include "utils/builtins.h"
#include "utils/syscache.h"
+IndexAMRoutineHookType IndexAMRoutineHook = NULL;
-/*
- * GetIndexAmRoutine - call the specified access method handler routine to get
- * its IndexAmRoutine struct, which will be palloc'd in the caller's context.
- *
- * Note that if the amhandler function is built-in, this will not involve
- * any catalog access. It's therefore safe to use this while bootstrapping
- * indexes for the system catalogs. relcache.c relies on that.
- */
IndexAmRoutine *
-GetIndexAmRoutine(Oid amhandler)
+GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler)
{
Datum datum;
IndexAmRoutine *routine;
+ if (IndexAMRoutineHook != NULL)
+ {
+ routine = IndexAMRoutineHook(tamoid, amhandler);
+ if (routine)
+ return routine;
+ }
+
datum = OidFunctionCall0(amhandler);
routine = (IndexAmRoutine *) DatumGetPointer(datum);
@@ -45,6 +47,52 @@ GetIndexAmRoutine(Oid amhandler)
return routine;
}
+/*
+ * GetIndexAmRoutine - call the specified access method handler routine to get
+ * its IndexAmRoutine struct, which will be palloc'd in the caller's context.
+ *
+ * Note that if the amhandler function is built-in, this will not involve
+ * any catalog access. It's therefore safe to use this while bootstrapping
+ * indexes for the system catalogs. relcache.c relies on that.
+ */
+IndexAmRoutine *
+GetIndexAmRoutine(Oid amhandler)
+{
+ return GetIndexAmRoutineExtended(InvalidOid, amhandler);
+}
+
+IndexAmRoutine *
+GetIndexAmRoutineExtended(Oid indoid, Oid amhandler)
+{
+ HeapTuple ht_idx;
+ HeapTuple ht_tblrel;
+ Form_pg_index idxrec;
+ Form_pg_class tblrelrec;
+ Oid indrelid;
+ Oid tamoid;
+
+ if (!OidIsValid((indoid)) || indoid < FirstNormalObjectId)
+ return GetIndexAmRoutineWithTableAM(HEAP_TABLE_AM_OID, amhandler);
+
+ ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indoid));
+ if (!HeapTupleIsValid(ht_idx))
+ elog(ERROR, "cache lookup failed for index %u", indoid);
+ idxrec = (Form_pg_index) GETSTRUCT(ht_idx);
+ Assert(indoid == idxrec->indexrelid);
+ indrelid = idxrec->indrelid;
+
+ ht_tblrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indrelid));
+ if (!HeapTupleIsValid(ht_tblrel))
+ elog(ERROR, "cache lookup failed for relation %u", indrelid);
+ tblrelrec = (Form_pg_class) GETSTRUCT(ht_tblrel);
+ tamoid = tblrelrec->relam;
+
+ ReleaseSysCache(ht_tblrel);
+ ReleaseSysCache(ht_idx);
+
+ return GetIndexAmRoutineWithTableAM(tamoid, amhandler);
+}
+
/*
* GetIndexAmRoutineByAmId - look up the handler of the index access method
* with the given OID, and get its IndexAmRoutine struct.
@@ -53,7 +101,7 @@ GetIndexAmRoutine(Oid amhandler)
* noerror is true, else throws error.
*/
IndexAmRoutine *
-GetIndexAmRoutineByAmId(Oid amoid, bool noerror)
+GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror)
{
HeapTuple tuple;
Form_pg_am amform;
@@ -103,7 +151,7 @@ GetIndexAmRoutineByAmId(Oid amoid, bool noerror)
ReleaseSysCache(tuple);
/* And finally, call the handler function to get the API struct. */
- return GetIndexAmRoutine(amhandler);
+ return GetIndexAmRoutineExtended(indoid, amhandler);
}
@@ -129,7 +177,7 @@ amvalidate(PG_FUNCTION_ARGS)
ReleaseSysCache(classtup);
- amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
if (amroutine->amvalidate == NULL)
elog(ERROR, "function amvalidate is not defined for index access method %u",
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 709b2641021..e0535503145 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -104,6 +104,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
scan->orderByData = NULL;
scan->xs_want_itup = false; /* may be set later */
+ scan->xs_want_rowid = false; /* may be set later */
/*
* During recovery we ignore killed tuples and don't bother to kill them
@@ -125,6 +126,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
scan->xs_itupdesc = NULL;
scan->xs_hitup = NULL;
scan->xs_hitupdesc = NULL;
+ scan->xs_rowid.isnull = true;
return scan;
}
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 715e91e25f0..94bdec63666 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -218,24 +218,99 @@ bool
index_insert(Relation indexRelation,
Datum *values,
bool *isnull,
- ItemPointer heap_t_ctid,
+ ItemPointer tupleid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
RELATION_CHECKS;
- CHECK_REL_PROCEDURE(aminsert);
+
+ if (indexRelation->rd_indam->aminsertextended == NULL && indexRelation->rd_indam->aminsert == NULL )
+ elog(ERROR, "at least one function aminsert or aminsertextended should be defined for index \"%s\"", \
+ RelationGetRelationName(indexRelation));
if (!(indexRelation->rd_indam->ampredlocks))
CheckForSerializableConflictIn(indexRelation,
(ItemPointer) NULL,
InvalidBlockNumber);
- return indexRelation->rd_indam->aminsert(indexRelation, values, isnull,
- heap_t_ctid, heapRelation,
+ if (indexRelation->rd_indam->aminsert)
+ {
+ /* compatibility method for extension AM's not aware of aminsertextended */
+ return indexRelation->rd_indam->aminsert(indexRelation, values, isnull,
+ tupleid, heapRelation,
+ checkUnique, indexUnchanged,
+ indexInfo);
+ }
+ else
+ {
+ /* index insert method for internal AM's and Orioledb that are aware of aminsertextended */
+ return indexRelation->rd_indam->aminsertextended(indexRelation, values, isnull,
+ ItemPointerGetDatum(tupleid), heapRelation,
checkUnique, indexUnchanged,
indexInfo);
+ }
+}
+
+/* ----------------
+ * index_update - update an index tuple in a relation
+ * ----------------
+ */
+bool
+index_update(Relation indexRelation,
+ bool new_valid,
+ bool old_valid,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Datum *valuesOld,
+ bool *isnullOld,
+ Datum oldTupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ IndexInfo *indexInfo)
+{
+ RELATION_CHECKS;
+ CHECK_REL_PROCEDURE(amupdate);
+
+ if (!(indexRelation->rd_indam->ampredlocks))
+ CheckForSerializableConflictIn(indexRelation,
+ (ItemPointer) NULL,
+ InvalidBlockNumber);
+
+ return indexRelation->rd_indam->amupdate(indexRelation,
+ new_valid, old_valid,
+ values, isnull, tupleid,
+ valuesOld, isnullOld, oldTupleid,
+ heapRelation,
+ checkUnique,
+ indexInfo);
+}
+
+
+/* ----------------
+ * index_delete - delete an index tuple from a relation
+ * ----------------
+ */
+bool
+index_delete(Relation indexRelation,
+ Datum *values, bool *isnull, Datum tupleid,
+ Relation heapRelation,
+ IndexInfo *indexInfo)
+{
+ RELATION_CHECKS;
+ CHECK_REL_PROCEDURE(amdelete);
+
+ if (!(indexRelation->rd_indam->ampredlocks))
+ CheckForSerializableConflictIn(indexRelation,
+ (ItemPointer) NULL,
+ InvalidBlockNumber);
+
+ return indexRelation->rd_indam->amdelete(indexRelation,
+ values, isnull, tupleid,
+ heapRelation,
+ indexInfo);
}
/*
@@ -603,6 +678,55 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
return &scan->xs_heaptid;
}
+/* ----------------
+ * index_getnext_rowid - get the next ROWID from a scan
+ *
+ * The result is the next ROWID satisfying the scan keys,
+ * or isnull if no more matching tuples exist.
+ * ----------------
+ */
+NullableDatum
+index_getnext_rowid(IndexScanDesc scan, ScanDirection direction)
+{
+ NullableDatum result;
+ bool found;
+
+ SCAN_CHECKS;
+ CHECK_SCAN_PROCEDURE(amgettuple);
+
+ /* XXX: we should assert that a snapshot is pushed or registered */
+ Assert(TransactionIdIsValid(RecentXmin));
+
+ /*
+ * The AM's amgettuple proc finds the next index entry matching the scan
+ * keys, and puts the TID into scan->xs_heaptid. It should also set
+ * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we
+ * pay no attention to those fields here.
+ */
+ found = scan->indexRelation->rd_indam->amgettuple(scan, direction);
+
+ /* Reset kill flag immediately for safety */
+ scan->kill_prior_tuple = false;
+ scan->xs_heap_continue = false;
+
+ /* If we're out of index entries, we're done */
+ if (!found)
+ {
+ /* release resources (like buffer pins) from table accesses */
+ if (scan->xs_heapfetch)
+ table_index_fetch_reset(scan->xs_heapfetch);
+
+ result.isnull = true;
+ return result;
+ }
+ /* Assert(RowidIsValid(&scan->xs_rowid)); */
+
+ pgstat_count_index_tuples(scan->indexRelation, 1);
+
+ /* Return the ROWID of the tuple we found. */
+ return scan->xs_rowid;
+}
+
/* ----------------
* index_fetch_heap - get the scan's next heap tuple
*
@@ -626,8 +750,17 @@ index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
{
bool all_dead = false;
bool found;
+ Datum tupleid;
+
+ if (scan->xs_want_rowid)
+ {
+ Assert(!scan->xs_rowid.isnull);
+ tupleid = scan->xs_rowid.value;
+ }
+ else
+ tupleid = PointerGetDatum(&scan->xs_heaptid);
- found = table_index_fetch_tuple(scan->xs_heapfetch, &scan->xs_heaptid,
+ found = table_index_fetch_tuple(scan->xs_heapfetch, tupleid,
scan->xs_snapshot, slot,
&scan->xs_heap_continue, &all_dead);
@@ -669,16 +802,30 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *
{
if (!scan->xs_heap_continue)
{
- ItemPointer tid;
+ if (scan->xs_want_rowid)
+ {
+ NullableDatum rowid;
+ /* Time to fetch the next TID from the index */
+ rowid = index_getnext_rowid(scan, direction);
- /* Time to fetch the next TID from the index */
- tid = index_getnext_tid(scan, direction);
+ /* If we're out of index entries, we're done */
+ if (rowid.isnull)
+ break;
- /* If we're out of index entries, we're done */
- if (tid == NULL)
- break;
+ /* Assert(RowidEquals(rowid, &scan->xs_rowid)); */
+ }
+ else
+ {
+ ItemPointer tid;
+ /* Time to fetch the next TID from the index */
+ tid = index_getnext_tid(scan, direction);
- Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
+ /* If we're out of index entries, we're done */
+ if (tid == NULL)
+ break;
+
+ Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
+ }
}
/*
@@ -686,7 +833,8 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *
* If we don't find anything, loop around and grab the next TID from
* the index.
*/
- Assert(ItemPointerIsValid(&scan->xs_heaptid));
+ if (!scan->xs_want_rowid)
+ Assert(ItemPointerIsValid(&scan->xs_heaptid));
if (index_fetch_heap(scan, slot))
return true;
}
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 6c5b5c69ce5..44daed95baf 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -121,7 +121,8 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->ambuild = btbuild;
amroutine->ambuildempty = btbuildempty;
- amroutine->aminsert = btinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = btinsert;
amroutine->ambulkdelete = btbulkdelete;
amroutine->amvacuumcleanup = btvacuumcleanup;
amroutine->amcanreturn = btcanreturn;
@@ -188,13 +189,14 @@ btbuildempty(Relation index)
*/
bool
btinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
bool result;
IndexTuple itup;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* generate an index tuple */
itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 4443f1918df..1f5c9a930d2 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -198,7 +198,7 @@ spgbuildempty(Relation index)
*/
bool
spginsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -206,6 +206,7 @@ spginsert(Relation index, Datum *values, bool *isnull,
SpGistState spgstate;
MemoryContext oldCtx;
MemoryContext insertCtx;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"SP-GiST insert temporary context",
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 5fa9e230c08..127ff3922d1 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -69,7 +69,8 @@ spghandler(PG_FUNCTION_ARGS)
amroutine->ambuild = spgbuild;
amroutine->ambuildempty = spgbuildempty;
- amroutine->aminsert = spginsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = spginsert;
amroutine->ambulkdelete = spgbulkdelete;
amroutine->amvacuumcleanup = spgvacuumcleanup;
amroutine->amcanreturn = spgcanreturn;
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 771438c8cec..3f64d70666e 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -227,7 +227,7 @@ table_index_fetch_tuple_check(Relation rel,
slot = table_slot_create(rel, NULL);
scan = table_index_fetch_begin(rel);
- found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again,
+ found = table_index_fetch_tuple(scan, PointerGetDatum(tid), snapshot, slot, &call_again,
all_dead);
table_index_fetch_end(scan);
ExecDropSingleTupleTableSlot(slot);
@@ -297,16 +297,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
* via ereport().
*/
void
-simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
+simple_table_tuple_delete(Relation rel, Datum tupleid, Snapshot snapshot,
+ TupleTableSlot *oldSlot)
{
TM_Result result;
TM_FailureData tmfd;
+ int options = TABLE_MODIFY_WAIT; /* wait for commit */
- result = table_tuple_delete(rel, tid,
+ /* Fetch old tuple if the relevant slot is provided */
+ if (oldSlot)
+ options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+ result = table_tuple_delete(rel, tupleid,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, false /* changingPart */ );
+ options,
+ &tmfd, false /* changingPart */ ,
+ oldSlot);
switch (result)
{
@@ -342,20 +349,27 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
* via ereport().
*/
void
-simple_table_tuple_update(Relation rel, ItemPointer otid,
+simple_table_tuple_update(Relation rel, Datum tupleid,
TupleTableSlot *slot,
Snapshot snapshot,
- TU_UpdateIndexes *update_indexes)
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot)
{
TM_Result result;
TM_FailureData tmfd;
LockTupleMode lockmode;
+ int options = TABLE_MODIFY_WAIT; /* wait for commit */
+
+ /* Fetch old tuple if the relevant slot is provided */
+ if (oldSlot)
+ options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
- result = table_tuple_update(rel, otid, slot,
+ result = table_tuple_update(rel, tupleid, slot,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ options,
+ &tmfd, &lockmode, update_indexes,
+ oldSlot);
switch (result)
{
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c
index d7798b6afb6..26aca18dc50 100644
--- a/src/backend/access/table/tableamapi.c
+++ b/src/backend/access/table/tableamapi.c
@@ -75,8 +75,7 @@ GetTableAmRoutine(Oid amhandler)
* Could be made optional, but would require throwing error during
* parse-analysis.
*/
- Assert(routine->tuple_insert_speculative != NULL);
- Assert(routine->tuple_complete_speculative != NULL);
+ Assert(routine->tuple_insert_with_arbiter != NULL);
Assert(routine->multi_insert != NULL);
Assert(routine->tuple_delete != NULL);
@@ -104,9 +103,29 @@ GetTableAmRoutine(Oid amhandler)
Assert(routine->scan_sample_next_block != NULL);
Assert(routine->scan_sample_next_tuple != NULL);
+ Assert(routine->tuple_is_current != NULL);
+
return routine;
}
+const TableAmRoutine *
+GetTableAmRoutineByAmOid(Oid amoid)
+{
+ HeapTuple ht_am;
+ Form_pg_am amrec;
+ const TableAmRoutine *tableam = NULL;
+
+ ht_am = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid));
+ if (!HeapTupleIsValid(ht_am))
+ elog(ERROR, "cache lookup failed for access method %u",
+ amoid);
+ amrec = (Form_pg_am)GETSTRUCT(ht_am);
+
+ tableam = GetTableAmRoutine(amrec->amhandler);
+ ReleaseSysCache(ht_am);
+ return tableam;
+}
+
/* check_hook: validate new default_table_access_method */
bool
check_default_table_access_method(char **newval, void **extra, GucSource source)
diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c
index b5cfeb21aab..2afcd4830d5 100644
--- a/src/backend/access/table/toast_helper.c
+++ b/src/backend/access/table/toast_helper.c
@@ -72,10 +72,10 @@ toast_tuple_init(ToastTupleContext *ttc)
* we have to delete it later.
*/
if (att->attlen == -1 && !ttc->ttc_oldisnull[i] &&
- VARATT_IS_EXTERNAL_ONDISK(old_value))
+ (VARATT_IS_EXTERNAL_ONDISK(old_value) || VARATT_IS_EXTERNAL_ORIOLEDB(old_value)))
{
if (ttc->ttc_isnull[i] ||
- !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
+ !(VARATT_IS_EXTERNAL_ONDISK(new_value) || VARATT_IS_EXTERNAL_ORIOLEDB(new_value)) ||
memcmp((char *) old_value, (char *) new_value,
VARSIZE_EXTERNAL(old_value)) != 0)
{
@@ -331,7 +331,7 @@ toast_delete_external(Relation rel, Datum *values, bool *isnull,
if (isnull[i])
continue;
- else if (VARATT_IS_EXTERNAL_ONDISK(value))
+ else if (VARATT_IS_EXTERNAL_ONDISK(value) || VARATT_IS_EXTERNAL_ORIOLEDB(value))
toast_delete_datum(rel, value, is_speculative);
}
}
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index 7629904bbf7..d118c5fd61a 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -22,6 +22,7 @@
#include "access/clog.h"
#include "access/subtrans.h"
#include "access/transam.h"
+#include "storage/proc.h"
#include "utils/snapmgr.h"
/*
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 4a2ea4adbaf..dab73df4b2c 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -209,6 +209,7 @@ typedef struct TransactionStateData
int parallelModeLevel; /* Enter/ExitParallelMode counter */
bool chain; /* start a new block after this one */
bool topXidLogged; /* for a subxact: is top-level XID logged? */
+ CommitSeqNo csn;
struct TransactionStateData *parent; /* back link to parent */
} TransactionStateData;
@@ -242,6 +243,7 @@ static TransactionStateData TopTransactionStateData = {
.state = TRANS_DEFAULT,
.blockState = TBLOCK_DEFAULT,
.topXidLogged = false,
+ .csn = COMMITSEQNO_INPROGRESS
};
/*
@@ -320,6 +322,7 @@ typedef struct SubXactCallbackItem
static SubXactCallbackItem *SubXact_callbacks = NULL;
+xact_redo_hook_type xact_redo_hook = NULL;
/* local function prototypes */
static void AssignTransactionId(TransactionState s);
@@ -2014,6 +2017,7 @@ StartTransaction(void)
*/
s->state = TRANS_START;
s->fullTransactionId = InvalidFullTransactionId; /* until assigned */
+ s->csn = COMMITSEQNO_INPROGRESS;
/* Determine if statements are logged in this transaction */
xact_is_sampled = log_xact_sample_rate != 0 &&
@@ -2288,7 +2292,9 @@ CommitTransaction(void)
* must be done _before_ releasing locks we hold and _after_
* RecordTransactionCommit.
*/
+ MyProc->lastCommittedCSN = s->csn;
ProcArrayEndTransaction(MyProc, latestXid);
+ s->csn = MyProc->lastCommittedCSN;
/*
* This is all post-commit cleanup. Note that if an error is raised here,
@@ -2714,6 +2720,7 @@ AbortTransaction(void)
* while cleaning up!
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
/* Clear wait information and command progress indicator */
pgstat_report_wait_end();
@@ -5076,6 +5083,7 @@ AbortSubTransaction(void)
* Buffer locks, for example? I don't think so but I'm not sure.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
pgstat_report_wait_end();
pgstat_progress_end_command();
@@ -5958,6 +5966,9 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
TransactionId max_xid;
TimestampTz commit_time;
+ if (xact_redo_hook)
+ xact_redo_hook(xid, lsn);
+
Assert(TransactionIdIsValid(xid));
max_xid = TransactionIdLatest(xid, parsed->nsubxacts, parsed->subxacts);
@@ -6267,3 +6278,9 @@ xact_redo(XLogReaderState *record)
else
elog(PANIC, "xact_redo: unknown op code %u", info);
}
+
+CommitSeqNo
+GetCurrentCSN(void)
+{
+ return TopTransactionStateData.csn;
+}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a19ba7167fd..ee0794465b1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -138,6 +138,7 @@ int wal_retrieve_retry_interval = 5000;
int max_slot_wal_keep_size_mb = -1;
int wal_decode_buffer_size = 512 * 1024;
bool track_wal_io_timing = false;
+CommitSeqNo startupCommitSeqNo = COMMITSEQNO_FIRST_NORMAL + 1;
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
@@ -145,6 +146,11 @@ bool XLOG_DEBUG = false;
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE;
+/* Hook for plugins to get control in CheckPointGuts() */
+CheckPoint_hook_type CheckPoint_hook = NULL;
+double CheckPointProgress;
+after_checkpoint_cleanup_hook_type after_checkpoint_cleanup_hook = NULL;
+
/*
* Number of WAL insertion locks to use. A higher value allows more insertions
* to happen concurrently, but adds some CPU overhead to flushing the WAL,
@@ -4710,6 +4716,7 @@ BootStrapXLOG(void)
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
+ pg_atomic_write_u64(&ShmemVariableCache->nextCommitSeqNo, COMMITSEQNO_FIRST_NORMAL + 1);
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5049,6 +5056,7 @@ StartupXLOG(void)
XLogRecPtr missingContrecPtr;
TransactionId oldestActiveXID;
bool promoted = false;
+ bool wasInRecovery;
/*
* We should have an aux process resource owner to use, and we should not
@@ -5176,6 +5184,7 @@ StartupXLOG(void)
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
+ pg_atomic_write_u64(&ShmemVariableCache->nextCommitSeqNo, startupCommitSeqNo);
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5664,6 +5673,8 @@ StartupXLOG(void)
*/
PreallocXlogFiles(EndOfLog, newTLI);
+ wasInRecovery = InRecovery;
+
/*
* Okay, we're officially UP.
*/
@@ -5742,6 +5753,9 @@ StartupXLOG(void)
*/
CompleteCommitTsInitialization();
+ if (wasInRecovery && after_checkpoint_cleanup_hook)
+ after_checkpoint_cleanup_hook(EndOfLog, 0);
+
/*
* All done with end-of-recovery actions.
*
@@ -6866,6 +6880,9 @@ CreateCheckPoint(int flags)
if (!RecoveryInProgress())
TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning());
+ if (after_checkpoint_cleanup_hook)
+ after_checkpoint_cleanup_hook(ProcLastRecPtr, flags);
+
/* Real work is done; log and update stats. */
LogCheckpointEnd(false);
@@ -7040,6 +7057,9 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
CheckPointPredicate();
CheckPointBuffers(flags);
+ if (CheckPoint_hook)
+ CheckPoint_hook(checkPointRedo, flags);
+
/* Perform all queued up fsyncs */
TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
CheckpointStats.ckpt_sync_t = GetCurrentTimestamp();
@@ -8579,6 +8599,19 @@ get_backup_status(void)
return sessionBackupState;
}
+/*
+ * Check if there is a backup in progress.
+ *
+ * We do this check without lock assuming 32-bit reads are atomic. In fact,
+ * the false result means that there was at least a moment of time when there
+ * were no backups.
+ */
+bool
+have_backup_in_progress(void)
+{
+ return (XLogCtl->Insert.runningBackups > 0);
+}
+
/*
* do_pg_backup_stop
*
@@ -8988,3 +9021,5 @@ SetWalWriterSleeping(bool sleeping)
XLogCtl->WalWriterSleeping = sleeping;
SpinLockRelease(&XLogCtl->info_lck);
}
+
+void (*RedoShutdownHook) (void) = NULL;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 3c7fb913e7e..8de18a3a6ee 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -1806,6 +1806,8 @@ PerformWalRecovery(void)
* exit with special return code to request shutdown of
* postmaster. Log messages issued from postmaster.
*/
+ if (RedoShutdownHook != NULL)
+ RedoShutdownHook();
proc_exit(3);
case RECOVERY_TARGET_ACTION_PAUSE:
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index a2aad09e6a0..ac39a2c4c0a 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -1617,7 +1617,7 @@ expand_all_col_privileges(Oid table_oid, Form_pg_class classForm,
AttrNumber curr_att;
Assert(classForm->relnatts - FirstLowInvalidHeapAttributeNumber < num_col_privileges);
- for (curr_att = FirstLowInvalidHeapAttributeNumber + 1;
+ for (curr_att = FirstLowInvalidHeapAttributeNumber + 2;
curr_att <= classForm->relnatts;
curr_att++)
{
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 494738824cb..8627810dc23 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -242,6 +242,7 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
int flags)
{
int i;
+ bool *depends_on_relation;
/*
* Keep track of objects for event triggers, if necessary.
@@ -269,6 +270,33 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
}
}
+ depends_on_relation = palloc0(sizeof(bool) * targetObjects->numrefs);
+
+ for (i = targetObjects->numrefs - 1; i >= 0; i--)
+ {
+ ObjectAddressExtra *thisextra = targetObjects->extras + i;
+ int j;
+
+ if (thisextra->dependee.classId == RelationRelationId &&
+ thisextra->dependee.objectSubId == 0)
+ {
+ depends_on_relation[i] = true;
+ continue;
+ }
+
+ for (j = i + 1; j < targetObjects->numrefs; j++)
+ {
+ ObjectAddress *depobj = targetObjects->refs + j;
+ if (depobj->classId == thisextra->dependee.classId &&
+ depobj->objectId == thisextra->dependee.objectId &&
+ depobj->objectSubId == thisextra->dependee.objectSubId)
+ {
+ depends_on_relation[i] = depends_on_relation[j];
+ break;
+ }
+ }
+ }
+
/*
* Delete all the objects in the proper order, except that if told to, we
* should skip the original object(s).
@@ -277,13 +305,19 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
{
ObjectAddress *thisobj = targetObjects->refs + i;
ObjectAddressExtra *thisextra = targetObjects->extras + i;
+ int temp_flags = flags;
if ((flags & PERFORM_DELETION_SKIP_ORIGINAL) &&
(thisextra->flags & DEPFLAG_ORIGINAL))
continue;
- deleteOneObject(thisobj, depRel, flags);
+ if (depends_on_relation[i])
+ temp_flags |= PERFORM_DELETION_OF_RELATION;
+
+ deleteOneObject(thisobj, depRel, temp_flags);
}
+
+ pfree(depends_on_relation);
}
/*
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 6f1910a6e0f..69c6689245e 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -125,9 +125,6 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
bool immediate,
bool isvalid,
bool isready);
-static void index_update_stats(Relation rel,
- bool hasindex,
- double reltuples);
static void IndexCheckExclusion(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo);
@@ -301,7 +298,7 @@ ConstructTupleDescriptor(Relation heapRelation,
int i;
/* We need access to the index AM's API struct */
- amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, accessMethodObjectId, false);
/* ... and to the table's tuple descriptor */
heapTupDesc = RelationGetDescr(heapRelation);
@@ -2681,9 +2678,6 @@ BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
*/
Assert(ii->ii_Unique);
- if (index->rd_rel->relam != BTREE_AM_OID)
- elog(ERROR, "unexpected non-btree speculative unique index");
-
ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
@@ -2807,7 +2801,7 @@ FormIndexDatum(IndexInfo *indexInfo,
* index. When updating an index, it's important because some index AMs
* expect a relcache flush to occur after REINDEX.
*/
-static void
+void
index_update_stats(Relation rel,
bool hasindex,
double reltuples)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index bda364552ca..cd4a16a5572 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -97,9 +97,6 @@ static void compute_index_stats(Relation onerel, double totalrows,
MemoryContext col_context);
static VacAttrStats *examine_attribute(Relation onerel, int attnum,
Node *index_expr);
-static int acquire_sample_rows(Relation onerel, int elevel,
- HeapTuple *rows, int targrows,
- double *totalrows, double *totaldeadrows);
static int compare_rows(const void *a, const void *b, void *arg);
static int acquire_inherited_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
@@ -201,10 +198,7 @@ analyze_rel(Oid relid, RangeVar *relation,
if (onerel->rd_rel->relkind == RELKIND_RELATION ||
onerel->rd_rel->relkind == RELKIND_MATVIEW)
{
- /* Regular table, so we'll use the regular row acquisition function */
- acquirefunc = acquire_sample_rows;
- /* Also get regular table's size */
- relpages = RelationGetNumberOfBlocks(onerel);
+ table_analyze(onerel, &acquirefunc, &relpages);
}
else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
@@ -1133,7 +1127,7 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
* block. The previous sampling method put too much credence in the row
* density near the start of the table.
*/
-static int
+int
acquire_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows)
@@ -1460,9 +1454,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
if (childrel->rd_rel->relkind == RELKIND_RELATION ||
childrel->rd_rel->relkind == RELKIND_MATVIEW)
{
- /* Regular table, so use the regular row acquisition function */
- acquirefunc = acquire_sample_rows;
- relpages = RelationGetNumberOfBlocks(childrel);
+ table_analyze(childrel, &acquirefunc, &relpages);
}
else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
index 35c4451fc06..982bae9ed42 100644
--- a/src/backend/commands/constraint.c
+++ b/src/backend/commands/constraint.c
@@ -111,7 +111,7 @@ unique_key_recheck(PG_FUNCTION_ARGS)
IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation);
bool call_again = false;
- if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot,
+ if (!table_index_fetch_tuple(scan, PointerGetDatum(&tmptid), SnapshotSelf, slot,
&call_again, NULL))
{
/*
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 6c2e5c8a4f9..b3421e6e5a8 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -64,9 +64,6 @@ static void report_triggers(ResultRelInfo *rInfo, bool show_relname,
ExplainState *es);
static double elapsed_time(instr_time *starttime);
static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used);
-static void ExplainNode(PlanState *planstate, List *ancestors,
- const char *relationship, const char *plan_name,
- ExplainState *es);
static void show_plan_tlist(PlanState *planstate, List *ancestors,
ExplainState *es);
static void show_expression(Node *node, const char *qlabel,
@@ -75,9 +72,6 @@ static void show_expression(Node *node, const char *qlabel,
static void show_qual(List *qual, const char *qlabel,
PlanState *planstate, List *ancestors,
bool useprefix, ExplainState *es);
-static void show_scan_qual(List *qual, const char *qlabel,
- PlanState *planstate, List *ancestors,
- ExplainState *es);
static void show_upper_qual(List *qual, const char *qlabel,
PlanState *planstate, List *ancestors,
ExplainState *es);
@@ -114,8 +108,6 @@ static void show_memoize_info(MemoizeState *mstate, List *ancestors,
static void show_hashagg_info(AggState *aggstate, ExplainState *es);
static void show_tidbitmap_info(BitmapHeapScanState *planstate,
ExplainState *es);
-static void show_instrumentation_count(const char *qlabel, int which,
- PlanState *planstate, ExplainState *es);
static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
static const char *explain_get_index_name(Oid indexId);
@@ -1174,7 +1166,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
* to the nesting depth of logical output groups, and therefore is controlled
* by ExplainOpenGroup/ExplainCloseGroup.
*/
-static void
+void
ExplainNode(PlanState *planstate, List *ancestors,
const char *relationship, const char *plan_name,
ExplainState *es)
@@ -2346,7 +2338,7 @@ show_qual(List *qual, const char *qlabel,
/*
* Show a qualifier expression for a scan plan node
*/
-static void
+void
show_scan_qual(List *qual, const char *qlabel,
PlanState *planstate, List *ancestors,
ExplainState *es)
@@ -3437,7 +3429,7 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
*
* "which" identifies which instrumentation counter to print
*/
-static void
+void
show_instrumentation_count(const char *qlabel, int which,
PlanState *planstate, ExplainState *es)
{
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 21ed483b7fa..df4fffc4e37 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -69,6 +69,7 @@
#include "utils/snapmgr.h"
#include "utils/syscache.h"
+GetDefaultOpClass_hook_type GetDefaultOpClass_hook = NULL;
/* non-export function prototypes */
static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
@@ -87,11 +88,7 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo,
Oid ddl_userid,
int ddl_sec_context,
int *ddl_save_nestlevel);
-static char *ChooseIndexName(const char *tabname, Oid namespaceId,
- List *colnames, List *exclusionOpNames,
- bool primary, bool isconstraint);
static char *ChooseIndexNameAddition(List *colnames);
-static List *ChooseIndexColumnNames(List *indexElems);
static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
bool isTopLevel);
static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -217,7 +214,7 @@ CheckIndexCompatible(Oid oldId,
accessMethodName)));
accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
accessMethodId = accessMethodForm->oid;
- amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+ amRoutine = GetIndexAmRoutineExtended(oldId, accessMethodForm->amhandler);
ReleaseSysCache(tuple);
amcanorder = amRoutine->amcanorder;
@@ -841,7 +838,7 @@ DefineIndex(Oid relationId,
}
accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
accessMethodId = accessMethodForm->oid;
- amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+ amRoutine = GetIndexAmRoutineWithTableAM(rel->rd_rel->relam, accessMethodForm->amhandler);
pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
accessMethodId);
@@ -2284,6 +2281,9 @@ GetDefaultOpClass(Oid type_id, Oid am_id)
/* If it's a domain, look at the base type instead */
type_id = getBaseType(type_id);
+ if (GetDefaultOpClass_hook)
+ return GetDefaultOpClass_hook(type_id, am_id);
+
tcategory = TypeCategory(type_id);
/*
@@ -2499,7 +2499,7 @@ ChooseRelationName(const char *name1, const char *name2,
*
* The argument list is pretty ad-hoc :-(
*/
-static char *
+char *
ChooseIndexName(const char *tabname, Oid namespaceId,
List *colnames, List *exclusionOpNames,
bool primary, bool isconstraint)
@@ -2588,7 +2588,7 @@ ChooseIndexNameAddition(List *colnames)
*
* Returns a List of plain strings (char *, not String nodes).
*/
-static List *
+List *
ChooseIndexColumnNames(List *indexElems)
{
List *result = NIL;
diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c
index 5f7ee238863..76722d506b5 100644
--- a/src/backend/commands/opclasscmds.c
+++ b/src/backend/commands/opclasscmds.c
@@ -43,6 +43,7 @@
#include "parser/parse_func.h"
#include "parser/parse_oper.h"
#include "parser/parse_type.h"
+#include "postgres_ext.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
@@ -377,7 +378,7 @@ DefineOpClass(CreateOpClassStmt *stmt)
amform = (Form_pg_am) GETSTRUCT(tup);
amoid = amform->oid;
- amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
ReleaseSysCache(tup);
maxOpNumber = amroutine->amstrategies;
@@ -835,7 +836,7 @@ AlterOpFamily(AlterOpFamilyStmt *stmt)
amform = (Form_pg_am) GETSTRUCT(tup);
amoid = amform->oid;
- amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
ReleaseSysCache(tup);
maxOpNumber = amroutine->amstrategies;
@@ -882,7 +883,7 @@ AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid,
int maxOpNumber, int maxProcNumber, int optsProcNumber,
List *items)
{
- IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
List *operators; /* OpFamilyMember list for operators */
List *procedures; /* OpFamilyMember list for support procs */
ListCell *l;
@@ -1165,7 +1166,7 @@ assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
* the family has been created but not yet populated with the required
* operators.)
*/
- IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
if (!amroutine->amcanorderbyop)
ereport(ERROR,
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b4eeb2523a2..49f71638bbe 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -684,6 +684,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
LOCKMODE parentLockmode;
const char *accessMethod = NULL;
Oid accessMethodId = InvalidOid;
+ const TableAmRoutine *tableam = NULL;
/*
* Truncate relname to appropriate length (probably a waste of time, as
@@ -819,6 +820,26 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
if (!OidIsValid(ownerId))
ownerId = GetUserId();
+ /*
+ * If the statement hasn't specified an access method, but we're defining
+ * a type of relation that needs one, use the default.
+ */
+ if (stmt->accessMethod != NULL)
+ {
+ accessMethod = stmt->accessMethod;
+
+ if (partitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("specifying a table access method is not supported on a partitioned table")));
+ }
+ else if (RELKIND_HAS_TABLE_AM(relkind))
+ accessMethod = default_table_access_method;
+
+ /* look up the access method, verify it is for a table */
+ if (accessMethod != NULL)
+ accessMethodId = get_table_am_oid(accessMethod, false);
+
/*
* Parse and validate reloptions, if any.
*/
@@ -827,6 +848,12 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
switch (relkind)
{
+ case RELKIND_RELATION:
+ case RELKIND_TOASTVALUE:
+ case RELKIND_MATVIEW:
+ tableam = GetTableAmRoutineByAmOid(accessMethodId);
+ (void) tableam_reloptions(tableam, relkind, reloptions, true);
+ break;
case RELKIND_VIEW:
(void) view_reloptions(reloptions, true);
break;
@@ -835,6 +862,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
break;
default:
(void) heap_reloptions(relkind, reloptions, true);
+ break;
}
if (stmt->ofTypename)
@@ -938,26 +966,6 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
attr->attstorage = GetAttributeStorage(attr->atttypid, colDef->storage_name);
}
- /*
- * If the statement hasn't specified an access method, but we're defining
- * a type of relation that needs one, use the default.
- */
- if (stmt->accessMethod != NULL)
- {
- accessMethod = stmt->accessMethod;
-
- if (partitioned)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("specifying a table access method is not supported on a partitioned table")));
- }
- else if (RELKIND_HAS_TABLE_AM(relkind))
- accessMethod = default_table_access_method;
-
- /* look up the access method, verify it is for a table */
- if (accessMethod != NULL)
- accessMethodId = get_table_am_oid(accessMethod, false);
-
/*
* Create the relation. Inherited defaults and constraints are passed in
* for immediate handling --- since they don't need parsing, they can be
@@ -6136,8 +6144,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
/* Write the tuple out to the new relation */
if (newrel)
+ {
table_tuple_insert(newrel, insertslot, mycid,
ti_options, bistate);
+ }
ResetExprContext(econtext);
@@ -14435,7 +14445,8 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
- (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
+ (void) table_reloptions(rel, rel->rd_rel->relkind,
+ newOptions, true);
break;
case RELKIND_PARTITIONED_TABLE:
(void) partitioned_table_reloptions(newOptions, true);
@@ -18124,6 +18135,7 @@ static void
AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
{
List *idxes;
+ List *buildIdxes = NIL;
List *attachRelIdxs;
Relation *attachrelIdxRels;
IndexInfo **attachInfos;
@@ -18131,6 +18143,7 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
ListCell *cell;
MemoryContext cxt;
MemoryContext oldcxt;
+ AttrMap *attmap;
cxt = AllocSetContextCreate(CurrentMemoryContext,
"AttachPartitionEnsureIndexes",
@@ -18181,6 +18194,10 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
goto out;
}
+ attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
+ RelationGetDescr(rel),
+ false);
+
/*
* For each index on the partitioned table, find a matching one in the
* partition-to-be; if one is not found, create one.
@@ -18190,7 +18207,6 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
Oid idx = lfirst_oid(cell);
Relation idxRel = index_open(idx, AccessShareLock);
IndexInfo *info;
- AttrMap *attmap;
bool found = false;
Oid constraintOid;
@@ -18206,9 +18222,6 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
/* construct an indexinfo to compare existing indexes against */
info = BuildIndexInfo(idxRel);
- attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
- RelationGetDescr(rel),
- false);
constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx);
/*
@@ -18269,19 +18282,7 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
* now.
*/
if (!found)
- {
- IndexStmt *stmt;
- Oid conOid;
-
- stmt = generateClonedIndexStmt(NULL,
- idxRel, attmap,
- &conOid);
- DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
- RelationGetRelid(idxRel),
- conOid,
- -1,
- true, false, false, false, false);
- }
+ buildIdxes = lappend_oid(buildIdxes, RelationGetRelid(idxRel));
index_close(idxRel, AccessShareLock);
}
@@ -18290,6 +18291,25 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
/* Clean up. */
for (i = 0; i < list_length(attachRelIdxs); i++)
index_close(attachrelIdxRels[i], AccessShareLock);
+
+ foreach(cell, buildIdxes)
+ {
+ Oid idx = lfirst_oid(cell);
+ Relation idxRel = index_open(idx, AccessShareLock);
+ IndexStmt *stmt;
+ Oid conOid;
+
+ stmt = generateClonedIndexStmt(NULL,
+ idxRel, attmap,
+ &conOid);
+ DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
+ RelationGetRelid(idxRel),
+ conOid,
+ -1,
+ true, false, false, false, false);
+ index_close(idxRel, AccessShareLock);
+ }
+
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(cxt);
}
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 8b1d3b99fe9..b6f6ebaa624 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -83,7 +83,7 @@ static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger);
static bool GetTupleForTrigger(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tid,
+ Datum tupleid,
LockTupleMode lockmode,
TupleTableSlot *oldslot,
TupleTableSlot **epqslot,
@@ -2688,7 +2688,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
bool
ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot **epqslot,
TM_Result *tmresult,
@@ -2702,7 +2702,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
bool should_free = false;
int i;
- Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+ Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL));
if (fdw_trigtuple == NULL)
{
TupleTableSlot *epqslot_candidate = NULL;
@@ -2779,8 +2779,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
void
ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *slot,
TransitionCaptureState *transition_capture,
bool is_crosspart_update)
{
@@ -2789,20 +2789,11 @@ ExecARDeleteTriggers(EState *estate,
if ((trigdesc && trigdesc->trig_delete_after_row) ||
(transition_capture && transition_capture->tcs_delete_old_table))
{
- TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
-
- Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
- if (fdw_trigtuple == NULL)
- GetTupleForTrigger(estate,
- NULL,
- relinfo,
- tupleid,
- LockTupleExclusive,
- slot,
- NULL,
- NULL,
- NULL);
- else
+ /*
+ * Put the FDW old tuple to the slot. Otherwise, caller is expected
+ * to have old tuple alredy fetched to the slot.
+ */
+ if (fdw_trigtuple != NULL)
ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2939,7 +2930,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
bool
ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *newslot,
TM_Result *tmresult,
@@ -2959,7 +2950,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
/* Determine lock mode to use */
lockmode = ExecUpdateLockMode(estate, relinfo);
- Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+ Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL));
if (fdw_trigtuple == NULL)
{
TupleTableSlot *epqslot_candidate = NULL;
@@ -3093,18 +3084,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
* Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
* and destination partitions, respectively, of a cross-partition update of
* the root partitioned table mentioned in the query, given by 'relinfo'.
- * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
- * partition, and 'newslot' contains the "new" tuple in the destination
- * partition. This interface allows to support the requirements of
- * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
- * that case.
+ * 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
+ * contains the "new" tuple in the destination partition. This interface
+ * allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
+ * is_crosspart_update must be true in that case.
*/
void
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *oldslot,
TupleTableSlot *newslot,
List *recheckIndexes,
TransitionCaptureState *transition_capture,
@@ -3123,29 +3113,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
* separately for DELETE and INSERT to capture transition table rows.
* In such case, either old tuple or new tuple can be NULL.
*/
- TupleTableSlot *oldslot;
- ResultRelInfo *tupsrc;
-
Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
!is_crosspart_update);
- tupsrc = src_partinfo ? src_partinfo : relinfo;
- oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
-
- if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
- GetTupleForTrigger(estate,
- NULL,
- tupsrc,
- tupleid,
- LockTupleExclusive,
- oldslot,
- NULL,
- NULL,
- NULL);
- else if (fdw_trigtuple != NULL)
+ if (fdw_trigtuple != NULL)
+ {
+ Assert(oldslot);
ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
- else
- ExecClearTuple(oldslot);
+ }
AfterTriggerSaveEvent(estate, relinfo,
src_partinfo, dst_partinfo,
@@ -3292,7 +3267,7 @@ static bool
GetTupleForTrigger(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tid,
+ Datum tupleid,
LockTupleMode lockmode,
TupleTableSlot *oldslot,
TupleTableSlot **epqslot,
@@ -3317,7 +3292,9 @@ GetTupleForTrigger(EState *estate,
*/
if (!IsolationUsesXactSnapshot())
lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
- test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot,
+
+ test = table_tuple_lock(relation, tupleid,
+ estate->es_snapshot, oldslot,
estate->es_output_cid,
lockmode, LockWaitBlock,
lockflags,
@@ -3413,8 +3390,8 @@ GetTupleForTrigger(EState *estate,
* We expect the tuple to be present, thus very simple error handling
* suffices.
*/
- if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny,
- oldslot))
+ if (!table_tuple_fetch_row_version(relation, tupleid,
+ SnapshotAny, oldslot))
elog(ERROR, "failed to fetch tuple for trigger");
}
@@ -3620,18 +3597,22 @@ typedef SetConstraintStateData *SetConstraintState;
* cycles. So we need only ensure that ats_firing_id is zero when attaching
* a new event to an existing AfterTriggerSharedData record.
*/
-typedef uint32 TriggerFlags;
+typedef uint64 TriggerFlags;
-#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */
-#define AFTER_TRIGGER_DONE 0x80000000
-#define AFTER_TRIGGER_IN_PROGRESS 0x40000000
+#define AFTER_TRIGGER_SIZE UINT64CONST(0xFFFF000000000) /* must be low-order bits */
+#define AFTER_TRIGGER_SIZE_SHIFT (36)
+#define AFTER_TRIGGER_OFFSET UINT64CONST(0x000000FFFFFFF) /* must be low-order bits */
+#define AFTER_TRIGGER_DONE UINT64CONST(0x0000800000000)
+#define AFTER_TRIGGER_IN_PROGRESS UINT64CONST(0x0000400000000)
/* bits describing the size and tuple sources of this event */
-#define AFTER_TRIGGER_FDW_REUSE 0x00000000
-#define AFTER_TRIGGER_FDW_FETCH 0x20000000
-#define AFTER_TRIGGER_1CTID 0x10000000
-#define AFTER_TRIGGER_2CTID 0x30000000
-#define AFTER_TRIGGER_CP_UPDATE 0x08000000
-#define AFTER_TRIGGER_TUP_BITS 0x38000000
+#define AFTER_TRIGGER_FDW_REUSE UINT64CONST(0x0000000000000)
+#define AFTER_TRIGGER_FDW_FETCH UINT64CONST(0x0000200000000)
+#define AFTER_TRIGGER_1CTID UINT64CONST(0x0000100000000)
+#define AFTER_TRIGGER_ROWID1 UINT64CONST(0x0000010000000)
+#define AFTER_TRIGGER_2CTID UINT64CONST(0x0000300000000)
+#define AFTER_TRIGGER_ROWID2 UINT64CONST(0x0000020000000)
+#define AFTER_TRIGGER_CP_UPDATE UINT64CONST(0x0000080000000)
+#define AFTER_TRIGGER_TUP_BITS UINT64CONST(0x0000380000000)
typedef struct AfterTriggerSharedData *AfterTriggerShared;
typedef struct AfterTriggerSharedData
@@ -3683,6 +3664,9 @@ typedef struct AfterTriggerEventDataZeroCtids
} AfterTriggerEventDataZeroCtids;
#define SizeofTriggerEvent(evt) \
+ (((evt)->ate_flags & AFTER_TRIGGER_SIZE) >> AFTER_TRIGGER_SIZE_SHIFT)
+
+#define BasicSizeofTriggerEvent(evt) \
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \
sizeof(AfterTriggerEventData) : \
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
@@ -4035,14 +4019,34 @@ afterTriggerCopyBitmap(Bitmapset *src)
*/
static void
afterTriggerAddEvent(AfterTriggerEventList *events,
- AfterTriggerEvent event, AfterTriggerShared evtshared)
+ AfterTriggerEvent event, AfterTriggerShared evtshared,
+ bytea *rowid1, bytea *rowid2)
{
- Size eventsize = SizeofTriggerEvent(event);
- Size needed = eventsize + sizeof(AfterTriggerSharedData);
+ Size basiceventsize = MAXALIGN(BasicSizeofTriggerEvent(event));
+ Size eventsize;
+ Size needed;
AfterTriggerEventChunk *chunk;
AfterTriggerShared newshared;
AfterTriggerEvent newevent;
+ if (SizeofTriggerEvent(event) == 0)
+ {
+ eventsize = basiceventsize;
+ if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+ eventsize += MAXALIGN(VARSIZE(rowid1));
+
+ if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+ eventsize += MAXALIGN(VARSIZE(rowid2));
+
+ event->ate_flags |= eventsize << AFTER_TRIGGER_SIZE_SHIFT;
+ }
+ else
+ {
+ eventsize = SizeofTriggerEvent(event);
+ }
+
+ needed = eventsize + sizeof(AfterTriggerSharedData);
+
/*
* If empty list or not enough room in the tail chunk, make a new chunk.
* We assume here that a new shared record will always be needed.
@@ -4075,7 +4079,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
* sizes used should be MAXALIGN multiples, to ensure that the shared
* records will be aligned safely.
*/
-#define MIN_CHUNK_SIZE 1024
+#define MIN_CHUNK_SIZE (1024*4)
#define MAX_CHUNK_SIZE (1024*1024)
#if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1)
@@ -4094,6 +4098,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
chunksize *= 2; /* okay, double it */
else
chunksize /= 2; /* too many shared records */
+ chunksize = Max(chunksize, MIN_CHUNK_SIZE);
chunksize = Min(chunksize, MAX_CHUNK_SIZE);
}
chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize);
@@ -4134,7 +4139,26 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
/* Insert the data */
newevent = (AfterTriggerEvent) chunk->freeptr;
- memcpy(newevent, event, eventsize);
+ if (!rowid1 && !rowid2)
+ {
+ memcpy(newevent, event, eventsize);
+ }
+ else
+ {
+ Pointer ptr = chunk->freeptr;
+
+ memcpy(newevent, event, basiceventsize);
+ ptr += basiceventsize;
+
+ if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+ {
+ memcpy(ptr, rowid1, MAXALIGN(VARSIZE(rowid1)));
+ ptr += MAXALIGN(VARSIZE(rowid1));
+ }
+
+ if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+ memcpy(ptr, rowid2, MAXALIGN(VARSIZE(rowid2)));
+ }
/* ... and link the new event to its shared record */
newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET;
newevent->ate_flags |= (char *) newshared - (char *) newevent;
@@ -4294,6 +4318,7 @@ AfterTriggerExecute(EState *estate,
int tgindx;
bool should_free_trig = false;
bool should_free_new = false;
+ Pointer ptr;
/*
* Locate trigger in trigdesc. It might not be present, and in fact the
@@ -4329,15 +4354,17 @@ AfterTriggerExecute(EState *estate,
{
Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore();
- if (!tuplestore_gettupleslot(fdw_tuplestore, true, false,
- trig_tuple_slot1))
+ if (!tuplestore_force_gettupleslot(fdw_tuplestore, true, false,
+ trig_tuple_slot1))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
TRIGGER_EVENT_UPDATE &&
- !tuplestore_gettupleslot(fdw_tuplestore, true, false,
- trig_tuple_slot2))
+ !tuplestore_force_gettupleslot(fdw_tuplestore, true, false,
+ trig_tuple_slot2))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+ trig_tuple_slot1->tts_tid = event->ate_ctid1;
+ trig_tuple_slot2->tts_tid = event->ate_ctid2;
}
/* fall through */
case AFTER_TRIGGER_FDW_REUSE:
@@ -4369,13 +4396,26 @@ AfterTriggerExecute(EState *estate,
break;
default:
- if (ItemPointerIsValid(&(event->ate_ctid1)))
+ ptr = (Pointer) event + MAXALIGN(BasicSizeofTriggerEvent(event));
+ if (ItemPointerIsValid(&(event->ate_ctid1)) ||
+ (event->ate_flags & AFTER_TRIGGER_ROWID1))
{
+ Datum tupleid;
+
TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate,
src_relInfo);
- if (!table_tuple_fetch_row_version(src_rel,
- &(event->ate_ctid1),
+ if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+ {
+ tupleid = PointerGetDatum(ptr);
+ ptr += MAXALIGN(VARSIZE(ptr));
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&(event->ate_ctid1));
+ }
+
+ if (!table_tuple_fetch_row_version(src_rel, tupleid,
SnapshotAny,
src_slot))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
@@ -4411,13 +4451,23 @@ AfterTriggerExecute(EState *estate,
/* don't touch ctid2 if not there */
if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ||
(event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) &&
- ItemPointerIsValid(&(event->ate_ctid2)))
+ (ItemPointerIsValid(&(event->ate_ctid2)) ||
+ (event->ate_flags & AFTER_TRIGGER_ROWID2)))
{
+ Datum tupleid;
+
TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate,
dst_relInfo);
- if (!table_tuple_fetch_row_version(dst_rel,
- &(event->ate_ctid2),
+ if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+ {
+ tupleid = PointerGetDatum(ptr);
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&(event->ate_ctid2));
+ }
+ if (!table_tuple_fetch_row_version(dst_rel, tupleid,
SnapshotAny,
dst_slot))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
@@ -4591,7 +4641,7 @@ afterTriggerMarkEvents(AfterTriggerEventList *events,
{
deferred_found = true;
/* add it to move_list */
- afterTriggerAddEvent(move_list, event, evtshared);
+ afterTriggerAddEvent(move_list, event, evtshared, NULL, NULL);
/* mark original copy "done" so we don't do it again */
event->ate_flags |= AFTER_TRIGGER_DONE;
}
@@ -4695,6 +4745,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
/* caution: trigdesc could be NULL here */
finfo = rInfo->ri_TrigFunctions;
instr = rInfo->ri_TrigInstrument;
+
if (slot1 != NULL)
{
ExecDropSingleTupleTableSlot(slot1);
@@ -6084,6 +6135,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
int tgtype_level;
int i;
Tuplestorestate *fdw_tuplestore = NULL;
+ bytea *rowId1 = NULL;
+ bytea *rowId2 = NULL;
/*
* Check state. We use a normal test not Assert because it is possible to
@@ -6177,6 +6230,21 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
* if so. This preserves the behavior that statement-level triggers fire
* just once per statement and fire after row-level triggers.
*/
+
+ /* Determine flags */
+ if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
+ {
+ if (row_trigger && event == TRIGGER_EVENT_UPDATE)
+ {
+ if (relkind == RELKIND_PARTITIONED_TABLE)
+ new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
+ else
+ new_event.ate_flags = AFTER_TRIGGER_2CTID;
+ }
+ else
+ new_event.ate_flags = AFTER_TRIGGER_1CTID;
+ }
+
switch (event)
{
case TRIGGER_EVENT_INSERT:
@@ -6187,6 +6255,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newslot != NULL);
ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ rowId1 = DatumGetByteaP(slot_getsysattr(newslot, RowIdAttributeNumber, &isnull));
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+ Assert(!isnull);
+ }
}
else
{
@@ -6206,6 +6281,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newslot == NULL);
ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ rowId1 = DatumGetByteaP(slot_getsysattr(oldslot, RowIdAttributeNumber, &isnull));
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+ Assert(!isnull);
+ }
}
else
{
@@ -6221,10 +6303,54 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
tgtype_event = TRIGGER_TYPE_UPDATE;
if (row_trigger)
{
+ bool src_rowid = false,
+ dst_rowid = false;
Assert(oldslot != NULL);
Assert(newslot != NULL);
ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ Relation src_rel = src_partinfo->ri_RelationDesc;
+ Relation dst_rel = dst_partinfo->ri_RelationDesc;
+
+ src_rowid = table_get_row_ref_type(src_rel) ==
+ ROW_REF_ROWID;
+ dst_rowid = table_get_row_ref_type(dst_rel) ==
+ ROW_REF_ROWID;
+ }
+ else
+ {
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ src_rowid = true;
+ dst_rowid = true;
+ }
+ }
+
+ if (src_rowid)
+ {
+ Datum val;
+ bool isnull;
+ val = slot_getsysattr(oldslot,
+ RowIdAttributeNumber,
+ &isnull);
+ rowId1 = DatumGetByteaP(val);
+ Assert(!isnull);
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+ }
+
+ if (dst_rowid)
+ {
+ Datum val;
+ bool isnull;
+ val = slot_getsysattr(newslot,
+ RowIdAttributeNumber,
+ &isnull);
+ rowId2 = DatumGetByteaP(val);
+ Assert(!isnull);
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID2;
+ }
/*
* Also remember the OIDs of partitions to fetch these tuples
@@ -6262,20 +6388,6 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
}
- /* Determine flags */
- if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
- {
- if (row_trigger && event == TRIGGER_EVENT_UPDATE)
- {
- if (relkind == RELKIND_PARTITIONED_TABLE)
- new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
- else
- new_event.ate_flags = AFTER_TRIGGER_2CTID;
- }
- else
- new_event.ate_flags = AFTER_TRIGGER_1CTID;
- }
-
/* else, we'll initialize ate_flags for each trigger */
tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT);
@@ -6441,7 +6553,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols);
afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events,
- &new_event, &new_shared);
+ &new_event, &new_shared, rowId1, rowId2);
}
/*
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 9d18ce8c6b2..286a0f8f222 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -615,7 +615,7 @@ IndexSupportsBackwardScan(Oid indexid)
idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel);
/* Fetch the index AM's API struct */
- amroutine = GetIndexAmRoutineByAmId(idxrelrec->relam, false);
+ amroutine = GetIndexAmRoutineByAmId(indexid, idxrelrec->relam, false);
result = amroutine->amcanbackward;
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index bf3a08c5f08..928566b3e40 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -48,6 +48,9 @@
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
+#include "utils/jsonpath.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 6b7997465d0..5d1a31566e7 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4388,7 +4388,9 @@ ExecEvalSysVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext,
op->resnull);
*op->resvalue = d;
/* this ought to be unreachable, but it's cheap enough to check */
- if (unlikely(*op->resnull))
+ if (op->d.var.attnum != RowIdAttributeNumber &&
+ op->d.var.attnum != SelfItemPointerAttributeNumber &&
+ unlikely(*op->resnull))
elog(ERROR, "failed to fetch attribute from slot");
}
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 1d82b64b897..a40aebb1ef1 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -299,7 +299,6 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
List *arbiterIndexes,
bool onlySummarizing)
{
- ItemPointer tupleid = &slot->tts_tid;
List *result = NIL;
int i;
int numIndices;
@@ -309,8 +308,20 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
ExprContext *econtext;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
+ ItemPointer tupleid;
- Assert(ItemPointerIsValid(tupleid));
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull));
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&slot->tts_tid));
+ tupleid = &slot->tts_tid;
+ }
/*
* Get information from the result relation info structure.
@@ -501,6 +512,406 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
return result;
}
+List *
+ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ TupleTableSlot *oldSlot,
+ EState *estate,
+ bool noDupErr,
+ bool *specConflict,
+ List *arbiterIndexes,
+ bool onlySummarizing)
+{
+ List *result = NIL;
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ ItemPointer tupleid;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull));
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&slot->tts_tid));
+ tupleid = &slot->tts_tid;
+ }
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /* Sanity check: slot must belong to the same rel as the resultRelInfo. */
+ Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
+
+ /*
+ * for each index, form and insert the index tuple
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+ bool applyNoDupErr;
+ IndexUniqueCheck checkUnique;
+ bool satisfiesConstraint;
+ bool new_valid = true;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ /*
+ * Skip processing of non-summarizing indexes if we only update
+ * summarizing indexes
+ */
+ if (onlySummarizing && !indexInfo->ii_Summarizing)
+ continue;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ ExprState *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NULL)
+ {
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext))
+ {
+ if (!indexRelation->rd_indam->ammvccaware)
+ continue;
+ new_valid = false;
+ }
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ /* Check whether to apply noDupErr to this index */
+ applyNoDupErr = noDupErr &&
+ (arbiterIndexes == NIL ||
+ list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid));
+
+ /*
+ * The index AM does the actual insertion, plus uniqueness checking.
+ *
+ * For an immediate-mode unique index, we just tell the index AM to
+ * throw error if not unique.
+ *
+ * For a deferrable unique index, we tell the index AM to just detect
+ * possible non-uniqueness, and we add the index OID to the result
+ * list if further checking is needed.
+ *
+ * For a speculative insertion (used by INSERT ... ON CONFLICT), do
+ * the same as for a deferrable unique index.
+ */
+ if (!indexRelation->rd_index->indisunique)
+ checkUnique = UNIQUE_CHECK_NO;
+ else if (applyNoDupErr)
+ checkUnique = UNIQUE_CHECK_PARTIAL;
+ else if (indexRelation->rd_index->indimmediate)
+ checkUnique = UNIQUE_CHECK_YES;
+ else
+ checkUnique = UNIQUE_CHECK_PARTIAL;
+
+ if (indexRelation->rd_indam->ammvccaware)
+ {
+ Datum valuesOld[INDEX_MAX_KEYS];
+ bool isnullOld[INDEX_MAX_KEYS];
+ Datum oldTupleid;
+ bool old_valid = true;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ oldTupleid = slot_getsysattr(oldSlot, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&oldSlot->tts_tid));
+ oldTupleid = PointerGetDatum(&oldSlot->tts_tid);
+ }
+
+ econtext = GetPerTupleExprContext(estate);
+ econtext->ecxt_scantuple = oldSlot;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ ExprState *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NULL)
+ {
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext))
+ old_valid = false;
+ }
+
+ FormIndexDatum(indexInfo,
+ oldSlot,
+ estate,
+ valuesOld,
+ isnullOld);
+
+ satisfiesConstraint =
+ index_update(indexRelation, /* index relation */
+ new_valid,
+ old_valid,
+ values, /* array of index Datums */
+ isnull, /* null flags */
+ ItemPointerGetDatum(tupleid), /* tid of heap tuple */
+ valuesOld,
+ isnullOld,
+ oldTupleid,
+ heapRelation, /* heap relation */
+ checkUnique, /* type of uniqueness check to do */
+ indexInfo); /* index AM may need this */
+
+ }
+ else
+ {
+ bool indexUnchanged;
+ /*
+ * There's definitely going to be an index_insert() call for this
+ * index. If we're being called as part of an UPDATE statement,
+ * consider if the 'indexUnchanged' = true hint should be passed.
+ */
+ indexUnchanged = index_unchanged_by_update(resultRelInfo,
+ estate,
+ indexInfo,
+ indexRelation);
+
+ satisfiesConstraint =
+ index_insert(indexRelation, /* index relation */
+ values, /* array of index Datums */
+ isnull, /* null flags */
+ tupleid, /* tid of heap tuple */
+ heapRelation, /* heap relation */
+ checkUnique, /* type of uniqueness check to do */
+ indexUnchanged, /* UPDATE without logical change? */
+ indexInfo); /* index AM may need this */
+ }
+
+ /*
+ * If the index has an associated exclusion constraint, check that.
+ * This is simpler than the process for uniqueness checks since we
+ * always insert first and then check. If the constraint is deferred,
+ * we check now anyway, but don't throw error on violation or wait for
+ * a conclusive outcome from a concurrent insertion; instead we'll
+ * queue a recheck event. Similarly, noDupErr callers (speculative
+ * inserters) will recheck later, and wait for a conclusive outcome
+ * then.
+ *
+ * An index for an exclusion constraint can't also be UNIQUE (not an
+ * essential property, we just don't allow it in the grammar), so no
+ * need to preserve the prior state of satisfiesConstraint.
+ */
+ if (indexInfo->ii_ExclusionOps != NULL)
+ {
+ bool violationOK;
+ CEOUC_WAIT_MODE waitMode;
+
+ if (applyNoDupErr)
+ {
+ violationOK = true;
+ waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
+ }
+ else if (!indexRelation->rd_index->indimmediate)
+ {
+ violationOK = true;
+ waitMode = CEOUC_NOWAIT;
+ }
+ else
+ {
+ violationOK = false;
+ waitMode = CEOUC_WAIT;
+ }
+
+ satisfiesConstraint =
+ check_exclusion_or_unique_constraint(heapRelation,
+ indexRelation, indexInfo,
+ tupleid, values, isnull,
+ estate, false,
+ waitMode, violationOK, NULL);
+ }
+
+ if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
+ indexInfo->ii_ExclusionOps != NULL) &&
+ !satisfiesConstraint)
+ {
+ /*
+ * The tuple potentially violates the uniqueness or exclusion
+ * constraint, so make a note of the index so that we can re-check
+ * it later. Speculative inserters are told if there was a
+ * speculative conflict, since that always requires a restart.
+ */
+ result = lappend_oid(result, RelationGetRelid(indexRelation));
+ if (indexRelation->rd_index->indimmediate && specConflict)
+ *specConflict = true;
+ }
+ }
+
+ return result;
+}
+
+void
+ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
+ EState *estate)
+{
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ Datum tupleid;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&slot->tts_tid));
+ tupleid = PointerGetDatum(&slot->tts_tid);
+ }
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /* Sanity check: slot must belong to the same rel as the resultRelInfo. */
+ Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
+
+ /*
+ * for each index, form and insert the index tuple
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ if (!indexRelation->rd_indam->ammvccaware)
+ continue;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ ExprState *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NULL)
+ {
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext))
+ continue;
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ index_delete(indexRelation, /* index relation */
+ values, /* array of index Datums */
+ isnull, /* null flags */
+ tupleid, /* tid of heap tuple */
+ heapRelation, /* heap relation */
+ indexInfo); /* index AM may need this */
+ }
+}
+
/* ----------------------------------------------------------------
* ExecCheckIndexConstraints
*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 4c5a7bbf620..334458574ca 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -869,13 +869,15 @@ InitPlan(QueryDesc *queryDesc, int eflags)
Oid relid;
Relation relation;
ExecRowMark *erm;
+ RangeTblEntry *rangeEntry;
/* ignore "parent" rowmarks; they are irrelevant at runtime */
if (rc->isParent)
continue;
/* get relation's OID (will produce InvalidOid if subquery) */
- relid = exec_rt_fetch(rc->rti, estate)->relid;
+ rangeEntry = exec_rt_fetch(rc->rti, estate);
+ relid = rangeEntry->relid;
/* open relation, if we need to access it for this mark type */
switch (rc->markType)
@@ -908,6 +910,10 @@ InitPlan(QueryDesc *queryDesc, int eflags)
erm->prti = rc->prti;
erm->rowmarkId = rc->rowmarkId;
erm->markType = rc->markType;
+ if (erm->markType == ROW_MARK_COPY)
+ erm->refType = ROW_REF_COPY;
+ else
+ erm->refType = rangeEntry->reftype;
erm->strength = rc->strength;
erm->waitPolicy = rc->waitPolicy;
erm->ermActive = false;
@@ -1295,6 +1301,8 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_ChildToRootMap = NULL;
resultRelInfo->ri_ChildToRootMapValid = false;
resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
+
+ resultRelInfo->ri_RowRefType = table_get_row_ref_type(resultRelationDesc);
}
/*
@@ -2429,17 +2437,28 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
aerm->rowmark = erm;
/* Look up the resjunk columns associated with this rowmark */
- if (erm->markType != ROW_MARK_COPY)
+ if (erm->refType == ROW_REF_TID)
{
+ Assert(erm->markType != ROW_MARK_COPY);
/* need ctid for all methods other than COPY */
snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
resname);
if (!AttributeNumberIsValid(aerm->ctidAttNo))
elog(ERROR, "could not find junk %s column", resname);
+ } else if (erm->refType == ROW_REF_ROWID)
+ {
+ Assert(erm->markType != ROW_MARK_COPY);
+ /* need ctid for all methods other than COPY */
+ snprintf(resname, sizeof(resname), "rowid%u", erm->rowmarkId);
+ aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
+ resname);
+ if (!AttributeNumberIsValid(aerm->ctidAttNo))
+ elog(ERROR, "could not find junk %s column", resname);
}
else
{
+ Assert(erm->markType == ROW_MARK_COPY);
/* need wholerow if COPY */
snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
@@ -2727,8 +2746,9 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot)
{
/* ordinary table, fetch the tuple */
if (!table_tuple_fetch_row_version(erm->relation,
- (ItemPointer) DatumGetPointer(datum),
- SnapshotAny, slot))
+ datum,
+ SnapshotAny,
+ slot))
elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
return true;
}
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 25d2868744e..136e761fa2f 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -125,6 +125,25 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
return skey_attoff;
}
+static Datum
+slot_get_tupleid(Relation rel, TupleTableSlot *slot)
+{
+ Datum tupleid;
+
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&slot->tts_tid);
+ }
+
+ return tupleid;
+}
+
/*
* Search the relation 'rel' for tuple using the index.
*
@@ -209,7 +228,8 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
PushActiveSnapshot(GetLatestSnapshot());
- res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+ res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot),
+ GetLatestSnapshot(),
outslot,
GetCurrentCommandId(false),
lockmode,
@@ -393,7 +413,8 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
PushActiveSnapshot(GetLatestSnapshot());
- res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+ res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot),
+ GetLatestSnapshot(),
outslot,
GetCurrentCommandId(false),
lockmode,
@@ -516,7 +537,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
{
bool skip_tuple = false;
Relation rel = resultRelInfo->ri_RelationDesc;
- ItemPointer tid = &(searchslot->tts_tid);
+ Datum tupleid = slot_get_tupleid(rel, searchslot);
/* For now we support only tables. */
Assert(rel->rd_rel->relkind == RELKIND_RELATION);
@@ -528,7 +549,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_TrigDesc->trig_update_before_row)
{
if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
- tid, NULL, slot, NULL, NULL))
+ tupleid, NULL, slot, NULL, NULL))
skip_tuple = true; /* "do nothing" */
}
@@ -536,6 +557,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
{
List *recheckIndexes = NIL;
TU_UpdateIndexes update_indexes;
+ TupleTableSlot *oldSlot = NULL;
/* Compute stored generated columns */
if (rel->rd_att->constr &&
@@ -549,19 +571,24 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (rel->rd_rel->relispartition)
ExecPartitionCheck(resultRelInfo, slot, estate, true);
- simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
- &update_indexes);
+ oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
+ simple_table_tuple_update(rel, tupleid, slot, estate->es_snapshot,
+ &update_indexes, oldSlot);
if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, true, false,
+ recheckIndexes = ExecUpdateIndexTuples(resultRelInfo,
+ slot,
+ oldSlot,
+ estate,
+ false,
NULL, NIL,
(update_indexes == TU_Summarizing));
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
- tid, NULL, slot,
+ NULL, oldSlot, slot,
recheckIndexes, NULL, false);
list_free(recheckIndexes);
@@ -581,7 +608,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
{
bool skip_tuple = false;
Relation rel = resultRelInfo->ri_RelationDesc;
- ItemPointer tid = &searchslot->tts_tid;
+ Datum tupleid = slot_get_tupleid(rel, searchslot);
CheckCmdReplicaIdentity(rel, CMD_DELETE);
@@ -590,17 +617,25 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_TrigDesc->trig_delete_before_row)
{
skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
- tid, NULL, NULL, NULL, NULL);
+ tupleid, NULL, NULL, NULL, NULL);
}
if (!skip_tuple)
{
+ TupleTableSlot *oldSlot = NULL;
+
+ oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
/* OK, delete the tuple */
- simple_table_tuple_delete(rel, tid, estate->es_snapshot);
+ simple_table_tuple_delete(rel, tupleid, estate->es_snapshot, oldSlot);
+
+ /* delete index entries if necessary */
+ if (resultRelInfo->ri_NumIndices > 0)
+ ExecDeleteIndexTuples(resultRelInfo, oldSlot, estate);
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo,
- tid, NULL, NULL, false);
+ NULL, oldSlot, NULL, false);
}
}
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index c06b2288583..bb65bd078cb 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -1242,9 +1242,19 @@ ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
ResultRelInfo *rootRelInfo = resultRelInfo->ri_RootResultRelInfo;
if (rootRelInfo)
- resultRelInfo->ri_ChildToRootMap =
- convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc),
- RelationGetDescr(rootRelInfo->ri_RelationDesc));
+ {
+ TupleDesc indesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
+ TupleDesc outdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc);
+ AttrMap *attrMap;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID)
+ attrMap = build_attrmap_by_name_if_req(indesc, outdesc, false);
+ else
+ attrMap = build_attrmap_by_name(indesc, outdesc, false);
+ if (attrMap)
+ resultRelInfo->ri_ChildToRootMap =
+ convert_tuples_by_name_attrmap(indesc, outdesc, attrMap);
+ }
else /* this isn't a child result rel */
resultRelInfo->ri_ChildToRootMap = NULL;
@@ -1281,8 +1291,10 @@ ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
* to ignore by passing true for missing_ok.
*/
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
- attrMap = build_attrmap_by_name_if_req(indesc, outdesc,
- !childrel->rd_rel->relispartition);
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID)
+ attrMap = build_attrmap_by_name_if_req(indesc, outdesc, !childrel->rd_rel->relispartition);
+ else
+ attrMap = build_attrmap_by_name(indesc, outdesc, !childrel->rd_rel->relispartition);
if (attrMap)
resultRelInfo->ri_RootToChildMap =
convert_tuples_by_name_attrmap(indesc, outdesc, attrMap);
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 45d1a67a713..6ebddd36c95 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -66,7 +66,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
ScanDirection direction;
IndexScanDesc scandesc;
TupleTableSlot *slot;
- ItemPointer tid;
+ ItemPointer tid = NULL;
/*
* extract necessary information from index scan node
@@ -118,12 +118,36 @@ IndexOnlyNext(IndexOnlyScanState *node)
/*
* OK, now that we have what we need, fetch the next tuple.
*/
- while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
+ while (true)
{
bool tuple_from_heap = false;
CHECK_FOR_INTERRUPTS();
+ if (scandesc->xs_want_rowid)
+ {
+ NullableDatum rowid;
+ /* Time to fetch the next TID from the index */
+ rowid = index_getnext_rowid(scandesc, direction);
+
+ /* If we're out of index entries, we're done */
+ if (rowid.isnull)
+ break;
+
+ /* Assert(RowidEquals(rowid, &scan->xs_rowid)); */
+ }
+ else
+ {
+ /* Time to fetch the next TID from the index */
+ tid = index_getnext_tid(scandesc, direction);
+
+ /* If we're out of index entries, we're done */
+ if (tid == NULL)
+ break;
+
+ Assert(ItemPointerEquals(tid, &scandesc->xs_heaptid));
+ }
+
/*
* We can skip the heap fetch if the TID references a heap page on
* which all tuples are known visible to everybody. In any case,
@@ -158,7 +182,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
* It's worth going through this complexity to avoid needing to lock
* the VM buffer, which could cause significant contention.
*/
- if (!VM_ALL_VISIBLE(scandesc->heapRelation,
+ if (!scandesc->xs_want_rowid &&
+ !VM_ALL_VISIBLE(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
&node->ioss_VMBuffer))
{
@@ -243,7 +268,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
* If we didn't access the heap, then we'll need to take a predicate
* lock explicitly, as if we had. For now we do that at page level.
*/
- if (!tuple_from_heap)
+ if (!tuple_from_heap && !scandesc->xs_want_rowid)
PredicateLockPage(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
estate->es_snapshot);
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index e459971d32e..049c9841309 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -27,6 +27,7 @@
#include "executor/nodeLockRows.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
+#include "utils/datum.h"
#include "utils/rel.h"
@@ -157,7 +158,16 @@ ExecLockRows(PlanState *pstate)
}
/* okay, try to lock (and fetch) the tuple */
- tid = *((ItemPointer) DatumGetPointer(datum));
+ if (erm->refType == ROW_REF_TID)
+ {
+ tid = *((ItemPointer) DatumGetPointer(datum));
+ datum = PointerGetDatum(&tid);
+ }
+ else
+ {
+ Assert(erm->refType = ROW_REF_ROWID);
+ datum = datumCopy(datum, false, -1);
+ }
switch (erm->markType)
{
case ROW_MARK_EXCLUSIVE:
@@ -182,12 +192,15 @@ ExecLockRows(PlanState *pstate)
if (!IsolationUsesXactSnapshot())
lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
- test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot,
+ test = table_tuple_lock(erm->relation, datum, estate->es_snapshot,
markSlot, estate->es_output_cid,
lockmode, erm->waitPolicy,
lockflags,
&tmfd);
+ if (erm->refType == ROW_REF_ROWID)
+ pfree(DatumGetPointer(datum));
+
switch (test)
{
case TM_WouldBlock:
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 27b55334ed4..c10311cddb4 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -140,12 +140,11 @@ static void ExecPendingInserts(EState *estate);
static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
- ItemPointer tupleid,
- TupleTableSlot *oldslot,
+ Datum tupleid,
+ TupleTableSlot *oldSlot,
TupleTableSlot *newslot);
static bool ExecOnConflictUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer conflictTid,
TupleTableSlot *excludedSlot,
bool canSetTag,
TupleTableSlot **returning);
@@ -158,12 +157,12 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
static TupleTableSlot *ExecMerge(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid,
+ Datum tupleid,
bool canSetTag);
static void ExecInitMerge(ModifyTableState *mtstate, EState *estate);
static bool ExecMergeMatched(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid,
+ Datum tupleid,
bool canSetTag);
static void ExecMergeNotMatched(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
@@ -278,66 +277,6 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
return ExecProject(projectReturning);
}
-/*
- * ExecCheckTupleVisible -- verify tuple is visible
- *
- * It would not be consistent with guarantees of the higher isolation levels to
- * proceed with avoiding insertion (taking speculative insertion's alternative
- * path) on the basis of another tuple that is not visible to MVCC snapshot.
- * Check for the need to raise a serialization failure, and do so as necessary.
- */
-static void
-ExecCheckTupleVisible(EState *estate,
- Relation rel,
- TupleTableSlot *slot)
-{
- if (!IsolationUsesXactSnapshot())
- return;
-
- if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
- {
- Datum xminDatum;
- TransactionId xmin;
- bool isnull;
-
- xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
- Assert(!isnull);
- xmin = DatumGetTransactionId(xminDatum);
-
- /*
- * We should not raise a serialization failure if the conflict is
- * against a tuple inserted by our own transaction, even if it's not
- * visible to our snapshot. (This would happen, for example, if
- * conflicting keys are proposed for insertion in a single command.)
- */
- if (!TransactionIdIsCurrentTransactionId(xmin))
- ereport(ERROR,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("could not serialize access due to concurrent update")));
- }
-}
-
-/*
- * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
- */
-static void
-ExecCheckTIDVisible(EState *estate,
- ResultRelInfo *relinfo,
- ItemPointer tid,
- TupleTableSlot *tempSlot)
-{
- Relation rel = relinfo->ri_RelationDesc;
-
- /* Redundantly check isolation level */
- if (!IsolationUsesXactSnapshot())
- return;
-
- if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
- elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
- ExecCheckTupleVisible(estate, rel, tempSlot);
- ExecClearTuple(tempSlot);
-}
-
/*
* Initialize to compute stored generated columns for a tuple
*
@@ -578,6 +517,10 @@ ExecInitInsertProjection(ModifyTableState *mtstate,
resultRelInfo->ri_newTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
+ if (node->onConflictAction == ONCONFLICT_UPDATE)
+ resultRelInfo->ri_oldTupleSlot =
+ table_slot_create(resultRelInfo->ri_RelationDesc,
+ &estate->es_tupleTable);
/* Build ProjectionInfo if needed (it probably isn't). */
if (need_projection)
@@ -1019,12 +962,19 @@ ExecInsert(ModifyTableContext *context,
if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
{
/* Perform a speculative insertion. */
- uint32 specToken;
- ItemPointerData conflictTid;
- bool specConflict;
List *arbiterIndexes;
+ TupleTableSlot *existing = NULL,
+ *returningSlot,
+ *inserted;
+ LockTupleMode lockmode = LockTupleExclusive;
arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
+ returningSlot = ExecGetReturningSlot(estate, resultRelInfo);
+ if (onconflict == ONCONFLICT_UPDATE)
+ {
+ lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+ existing = resultRelInfo->ri_onConflict->oc_Existing;
+ }
/*
* Do a non-conclusive check for conflicts first.
@@ -1041,23 +991,29 @@ ExecInsert(ModifyTableContext *context,
*/
vlock:
CHECK_FOR_INTERRUPTS();
- specConflict = false;
- if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate,
- &conflictTid, arbiterIndexes))
+
+ inserted = table_tuple_insert_with_arbiter(resultRelInfo,
+ slot, estate->es_output_cid,
+ 0, NULL, arbiterIndexes, estate,
+ lockmode, existing, returningSlot);
+ if (!inserted)
{
- /* committed conflict tuple found */
if (onconflict == ONCONFLICT_UPDATE)
{
+ TupleTableSlot *returning = NULL;
+
+ if (TTS_EMPTY(existing))
+ goto vlock;
+
/*
* In case of ON CONFLICT DO UPDATE, execute the UPDATE
* part. Be prepared to retry if the UPDATE fails because
* of another concurrent UPDATE/DELETE to the conflict
* tuple.
*/
- TupleTableSlot *returning = NULL;
if (ExecOnConflictUpdate(context, resultRelInfo,
- &conflictTid, slot, canSetTag,
+ slot, canSetTag,
&returning))
{
InstrCountTuples2(&mtstate->ps, 1);
@@ -1080,57 +1036,13 @@ ExecInsert(ModifyTableContext *context,
* ExecGetReturningSlot() in the DO NOTHING case...
*/
Assert(onconflict == ONCONFLICT_NOTHING);
- ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
- ExecGetReturningSlot(estate, resultRelInfo));
InstrCountTuples2(&mtstate->ps, 1);
return NULL;
}
}
-
- /*
- * Before we start insertion proper, acquire our "speculative
- * insertion lock". Others can use that to wait for us to decide
- * if we're going to go ahead with the insertion, instead of
- * waiting for the whole transaction to complete.
- */
- specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
-
- /* insert the tuple, with the speculative token */
- table_tuple_insert_speculative(resultRelationDesc, slot,
- estate->es_output_cid,
- 0,
- NULL,
- specToken);
-
- /* insert index entries for tuple */
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, false, true,
- &specConflict,
- arbiterIndexes,
- false);
-
- /* adjust the tuple's state accordingly */
- table_tuple_complete_speculative(resultRelationDesc, slot,
- specToken, !specConflict);
-
- /*
- * Wake up anyone waiting for our decision. They will re-check
- * the tuple, see that it's no longer speculative, and wait on our
- * XID as if this was a regularly inserted tuple all along. Or if
- * we killed the tuple, they will see it's dead, and proceed as if
- * the tuple never existed.
- */
- SpeculativeInsertionLockRelease(GetCurrentTransactionId());
-
- /*
- * If there was a conflict, start from the beginning. We'll do
- * the pre-check again, which will now find the conflicting tuple
- * (unless it aborts before we get there).
- */
- if (specConflict)
+ else
{
- list_free(recheckIndexes);
- goto vlock;
+ slot = inserted;
}
/* Since there was no insertion conflict, we're done */
@@ -1138,9 +1050,9 @@ ExecInsert(ModifyTableContext *context,
else
{
/* insert the tuple normally */
- table_tuple_insert(resultRelationDesc, slot,
- estate->es_output_cid,
- 0, NULL);
+ slot = table_tuple_insert(resultRelationDesc, slot,
+ estate->es_output_cid,
+ 0, NULL);
/* insert index entries for tuple */
if (resultRelInfo->ri_NumIndices > 0)
@@ -1167,7 +1079,7 @@ ExecInsert(ModifyTableContext *context,
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
NULL,
- NULL,
+ resultRelInfo->ri_oldTupleSlot,
slot,
NULL,
mtstate->mt_transition_capture,
@@ -1316,12 +1228,20 @@ ExecPendingInserts(EState *estate)
*/
static bool
ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple,
+ Datum tupleid, HeapTuple oldtuple,
TupleTableSlot **epqreturnslot, TM_Result *result)
{
if (result)
*result = TM_Ok;
+ /*
+ * Open the table's indexes, if we have not done so already, so that we
+ * can delete index entries.
+ */
+ if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
+ resultRelInfo->ri_IndexRelationDescs == NULL)
+ ExecOpenIndices(resultRelInfo, false);
+
/* BEFORE ROW DELETE triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_before_row)
@@ -1347,7 +1267,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, bool changingPart)
+ Datum tupleid, bool changingPart, int options,
+ TupleTableSlot *oldSlot)
{
EState *estate = context->estate;
@@ -1355,9 +1276,10 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
- true /* wait for commit */ ,
+ options /* wait for commit */ ,
&context->tmfd,
- changingPart);
+ changingPart,
+ oldSlot);
}
/*
@@ -1369,12 +1291,17 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static void
ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, bool changingPart)
+ HeapTuple oldtuple,
+ TupleTableSlot *slot, bool changingPart)
{
ModifyTableState *mtstate = context->mtstate;
EState *estate = context->estate;
TransitionCaptureState *ar_delete_trig_tcs;
+ /* delete index entries if necessary */
+ if (resultRelInfo->ri_NumIndices > 0)
+ ExecDeleteIndexTuples(resultRelInfo, slot, context->estate);
+
/*
* If this delete is the result of a partition key update that moved the
* tuple to a new partition, put this row into the transition OLD TABLE,
@@ -1387,8 +1314,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
{
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
- tupleid, oldtuple,
- NULL, NULL, mtstate->mt_transition_capture,
+ oldtuple,
+ slot, NULL, NULL, mtstate->mt_transition_capture,
false);
/*
@@ -1399,10 +1326,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
/* AFTER ROW DELETE Triggers */
- ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
+ ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot,
ar_delete_trig_tcs, changingPart);
}
+/*
+ * Initializes the tuple slot in a ResultRelInfo for DELETE action.
+ *
+ * We mark 'projectNewInfoValid' even though the projections themselves
+ * are not initialized here.
+ */
+static void
+ExecInitDeleteTupleSlot(ModifyTableState *mtstate,
+ ResultRelInfo *resultRelInfo)
+{
+ EState *estate = mtstate->ps.state;
+
+ Assert(!resultRelInfo->ri_projectNewInfoValid);
+
+ resultRelInfo->ri_oldTupleSlot =
+ table_slot_create(resultRelInfo->ri_RelationDesc,
+ &estate->es_tupleTable);
+ resultRelInfo->ri_projectNewInfoValid = true;
+}
+
/* ----------------------------------------------------------------
* ExecDelete
*
@@ -1428,8 +1375,9 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
static TupleTableSlot *
ExecDelete(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple oldtuple,
+ TupleTableSlot *oldSlot,
bool processReturning,
bool changingPart,
bool canSetTag,
@@ -1493,6 +1441,11 @@ ExecDelete(ModifyTableContext *context,
}
else
{
+ int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+ if (!IsolationUsesXactSnapshot())
+ options |= TABLE_MODIFY_LOCK_UPDATED;
+
/*
* delete the tuple
*
@@ -1503,7 +1456,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions.
*/
ldelete:
- result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
+ result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
+ options, oldSlot);
if (tmresult)
*tmresult = result;
@@ -1550,7 +1504,6 @@ ExecDelete(ModifyTableContext *context,
case TM_Updated:
{
- TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
if (IsolationUsesXactSnapshot())
@@ -1559,87 +1512,29 @@ ExecDelete(ModifyTableContext *context,
errmsg("could not serialize access due to concurrent update")));
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * We need to do EPQ. The latest tuple is already found
+ * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
- EvalPlanQualBegin(context->epqstate);
- inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex);
-
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- LockTupleExclusive, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
+ Assert(context->tmfd.traversed);
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ oldSlot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
- switch (result)
+ /*
+ * If requested, skip delete and pass back the updated
+ * row.
+ */
+ if (epqreturnslot)
{
- case TM_Ok:
- Assert(context->tmfd.traversed);
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- /*
- * If requested, skip delete and pass back the
- * updated row.
- */
- if (epqreturnslot)
- {
- *epqreturnslot = epqslot;
- return NULL;
- }
- else
- goto ldelete;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously updated by this
- * command, ignore the delete, otherwise error
- * out.
- *
- * See also TM_SelfModified response to
- * table_tuple_delete() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
-
- default:
-
- /*
- * TM_Invisible should be impossible because we're
- * waiting for updated row versions, and would
- * already have errored out if the first version
- * is invisible.
- *
- * TM_Updated should be impossible, because we're
- * locking the latest version via
- * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
- */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
+ *epqreturnslot = epqslot;
+ return NULL;
}
-
- Assert(false);
- break;
+ else
+ goto ldelete;
}
case TM_Deleted:
@@ -1673,7 +1568,8 @@ ExecDelete(ModifyTableContext *context,
if (tupleDeleted)
*tupleDeleted = true;
- ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart);
+ ExecDeleteEpilogue(context, resultRelInfo, oldtuple,
+ oldSlot, changingPart);
/* Process RETURNING if present and if requested */
if (processReturning && resultRelInfo->ri_projectReturning)
@@ -1689,19 +1585,15 @@ ExecDelete(ModifyTableContext *context,
/* FDW must have provided a slot containing the deleted row */
Assert(!TupIsNull(slot));
}
- else
+ else if (!slot || TupIsNull(slot))
{
+ /* Copy old tuple to the returning slot */
slot = ExecGetReturningSlot(estate, resultRelInfo);
if (oldtuple != NULL)
- {
ExecForceStoreHeapTuple(oldtuple, slot, false);
- }
else
- {
- if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
- SnapshotAny, slot))
- elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
- }
+ ExecCopySlot(slot, oldSlot);
+ Assert(!TupIsNull(slot));
}
rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot);
@@ -1742,7 +1634,7 @@ ExecDelete(ModifyTableContext *context,
static bool
ExecCrossPartitionUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple,
+ Datum tupleid, HeapTuple oldtuple,
TupleTableSlot *slot,
bool canSetTag,
UpdateContext *updateCxt,
@@ -1801,12 +1693,16 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
MemoryContextSwitchTo(oldcxt);
}
+ /* Make sure ri_oldTupleSlot is initialized. */
+ if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+ ExecInitUpdateProjection(mtstate, resultRelInfo);
+
/*
* Row movement, part 1. Delete the tuple, but skip RETURNING processing.
* We want to return rows from INSERT.
*/
ExecDelete(context, resultRelInfo,
- tupleid, oldtuple,
+ tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot,
false, /* processReturning */
true, /* changingPart */
false, /* canSetTag */
@@ -1847,21 +1743,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
return true;
else
{
- /* Fetch the most recent version of old tuple. */
- TupleTableSlot *oldSlot;
-
- /* ... but first, make sure ri_oldTupleSlot is initialized. */
- if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
- ExecInitUpdateProjection(mtstate, resultRelInfo);
- oldSlot = resultRelInfo->ri_oldTupleSlot;
- if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
- tupleid,
- SnapshotAny,
- oldSlot))
- elog(ERROR, "failed to fetch tuple being updated");
- /* and project the new tuple to retry the UPDATE with */
+ /*
+ * ExecDelete already fetches the most recent version of old tuple
+ * to resultRelInfo->ri_RelationDesc. So, just project the new
+ * tuple to retry the UPDATE with.
+ */
*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
- oldSlot);
+ resultRelInfo->ri_oldTupleSlot);
return false;
}
}
@@ -1879,8 +1767,8 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
/* Tuple routing starts from the root table. */
context->cpUpdateReturningSlot =
- ExecInsert(context, mtstate->rootResultRelInfo, slot, canSetTag,
- inserted_tuple, insert_destrel);
+ ExecInsert(context, mtstate->rootResultRelInfo,
+ slot, canSetTag, inserted_tuple, insert_destrel);
/*
* Reset the transition state that may possibly have been written by
@@ -1902,7 +1790,7 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
*/
static bool
ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+ Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
TM_Result *result)
{
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -1979,8 +1867,9 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag, UpdateContext *updateCxt)
+ Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+ bool canSetTag, int options, TupleTableSlot *oldSlot,
+ UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2073,7 +1962,8 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ExecCrossPartitionUpdateForeignKey(context,
resultRelInfo,
insert_destrel,
- tupleid, slot,
+ tupleid,
+ resultRelInfo->ri_oldTupleSlot,
inserted_tuple);
return TM_Ok;
@@ -2116,9 +2006,10 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
- true /* wait for commit */ ,
+ options /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
- &updateCxt->updateIndexes);
+ &updateCxt->updateIndexes,
+ oldSlot);
if (result == TM_Ok)
updateCxt->updated = true;
@@ -2133,24 +2024,29 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static void
ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
- ResultRelInfo *resultRelInfo, ItemPointer tupleid,
- HeapTuple oldtuple, TupleTableSlot *slot)
+ ResultRelInfo *resultRelInfo,
+ HeapTuple oldtuple, TupleTableSlot *slot,
+ TupleTableSlot *oldSlot)
{
ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL;
/* insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, context->estate,
- true, false,
+ {
+ recheckIndexes = ExecUpdateIndexTuples(resultRelInfo,
+ slot,
+ oldSlot,
+ context->estate,
+ false,
NULL, NIL,
(updateCxt->updateIndexes == TU_Summarizing));
+ }
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo,
NULL, NULL,
- tupleid, oldtuple, slot,
+ oldtuple, oldSlot, slot,
recheckIndexes,
mtstate->operation == CMD_INSERT ?
mtstate->mt_oc_transition_capture :
@@ -2182,7 +2078,7 @@ static void
ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
- ItemPointer tupleid,
+ Datum tupleid,
TupleTableSlot *oldslot,
TupleTableSlot *newslot)
{
@@ -2239,7 +2135,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
/* Perform the root table's triggers. */
ExecARUpdateTriggers(context->estate,
rootRelInfo, sourcePartInfo, destPartInfo,
- tupleid, NULL, newslot, NIL, NULL, true);
+ NULL, oldslot, newslot, NIL, NULL, true);
}
/* ----------------------------------------------------------------
@@ -2261,6 +2157,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
* NULL when the foreign table has no relevant triggers.
*
* slot contains the new tuple value to be stored.
+ * oldSlot is the slot to store the old tuple.
* planSlot is the output of the ModifyTable's subplan; we use it
* to access values from other input tables (for RETURNING),
* row-ID junk columns, etc.
@@ -2272,8 +2169,8 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
*/
static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag)
+ Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+ TupleTableSlot *oldSlot, bool canSetTag, bool locked)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2326,6 +2223,15 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
else
{
+ int options = TABLE_MODIFY_WAIT;
+
+ if (!locked)
+ {
+ options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
+ if (!IsolationUsesXactSnapshot())
+ options |= TABLE_MODIFY_LOCK_UPDATED;
+ }
+
/*
* If we generate a new candidate tuple after EvalPlanQual testing, we
* must loop back here to try again. (We don't need to redo triggers,
@@ -2335,7 +2241,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
- canSetTag, &updateCxt);
+ canSetTag, options, oldSlot, &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@@ -2386,88 +2292,30 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
case TM_Updated:
{
- TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
- TupleTableSlot *oldSlot;
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ Assert(!locked);
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * We need to do EPQ. The latest tuple is already found
+ * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
- inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex);
-
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- updateCxt.lockmode, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
-
- switch (result)
- {
- case TM_Ok:
- Assert(context->tmfd.traversed);
-
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- /* Make sure ri_oldTupleSlot is initialized. */
- if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
- ExecInitUpdateProjection(context->mtstate,
- resultRelInfo);
-
- /* Fetch the most recent version of old tuple. */
- oldSlot = resultRelInfo->ri_oldTupleSlot;
- if (!table_tuple_fetch_row_version(resultRelationDesc,
- tupleid,
- SnapshotAny,
- oldSlot))
- elog(ERROR, "failed to fetch tuple being updated");
- slot = ExecGetUpdateNewTuple(resultRelInfo,
- epqslot, oldSlot);
- goto redo_act;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously modified by
- * this command, ignore the redundant update,
- * otherwise error out.
- *
- * See also TM_SelfModified response to
- * table_tuple_update() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
-
- default:
- /* see table_tuple_lock call in ExecDelete() */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
- }
+ Assert(context->tmfd.traversed);
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ oldSlot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
+ slot = ExecGetUpdateNewTuple(resultRelInfo,
+ epqslot,
+ oldSlot);
+ goto redo_act;
}
break;
@@ -2490,8 +2338,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
if (canSetTag)
(estate->es_processed)++;
- ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple,
- slot);
+ ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, oldtuple,
+ slot, oldSlot);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
@@ -2514,144 +2362,26 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
static bool
ExecOnConflictUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer conflictTid,
TupleTableSlot *excludedSlot,
bool canSetTag,
TupleTableSlot **returning)
{
ModifyTableState *mtstate = context->mtstate;
ExprContext *econtext = mtstate->ps.ps_ExprContext;
- Relation relation = resultRelInfo->ri_RelationDesc;
ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
- TM_FailureData tmfd;
- LockTupleMode lockmode;
- TM_Result test;
- Datum xminDatum;
- TransactionId xmin;
- bool isnull;
-
- /* Determine lock mode to use */
- lockmode = ExecUpdateLockMode(context->estate, resultRelInfo);
+ Datum tupleid;
- /*
- * Lock tuple for update. Don't follow updates when tuple cannot be
- * locked without doing so. A row locking conflict here means our
- * previous conclusion that the tuple is conclusively committed is not
- * true anymore.
- */
- test = table_tuple_lock(relation, conflictTid,
- context->estate->es_snapshot,
- existing, context->estate->es_output_cid,
- lockmode, LockWaitBlock, 0,
- &tmfd);
- switch (test)
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
{
- case TM_Ok:
- /* success! */
- break;
-
- case TM_Invisible:
-
- /*
- * This can occur when a just inserted tuple is updated again in
- * the same command. E.g. because multiple rows with the same
- * conflicting key values are inserted.
- *
- * This is somewhat similar to the ExecUpdate() TM_SelfModified
- * case. We do not want to proceed because it would lead to the
- * same row being updated a second time in some unspecified order,
- * and in contrast to plain UPDATEs there's no historical behavior
- * to break.
- *
- * It is the user's responsibility to prevent this situation from
- * occurring. These problems are why the SQL standard similarly
- * specifies that for SQL MERGE, an exception must be raised in
- * the event of an attempt to update the same row twice.
- */
- xminDatum = slot_getsysattr(existing,
- MinTransactionIdAttributeNumber,
- &isnull);
- Assert(!isnull);
- xmin = DatumGetTransactionId(xminDatum);
-
- if (TransactionIdIsCurrentTransactionId(xmin))
- ereport(ERROR,
- (errcode(ERRCODE_CARDINALITY_VIOLATION),
- /* translator: %s is a SQL command name */
- errmsg("%s command cannot affect row a second time",
- "ON CONFLICT DO UPDATE"),
- errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
-
- /* This shouldn't happen */
- elog(ERROR, "attempted to lock invisible tuple");
- break;
-
- case TM_SelfModified:
-
- /*
- * This state should never be reached. As a dirty snapshot is used
- * to find conflicting tuples, speculative insertion wouldn't have
- * seen this row to conflict with.
- */
- elog(ERROR, "unexpected self-updated tuple");
- break;
-
- case TM_Updated:
- if (IsolationUsesXactSnapshot())
- ereport(ERROR,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("could not serialize access due to concurrent update")));
-
- /*
- * As long as we don't support an UPDATE of INSERT ON CONFLICT for
- * a partitioned table we shouldn't reach to a case where tuple to
- * be lock is moved to another partition due to concurrent update
- * of the partition key.
- */
- Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
-
- /*
- * Tell caller to try again from the very start.
- *
- * It does not make sense to use the usual EvalPlanQual() style
- * loop here, as the new version of the row might not conflict
- * anymore, or the conflicting tuple has actually been deleted.
- */
- ExecClearTuple(existing);
- return false;
-
- case TM_Deleted:
- if (IsolationUsesXactSnapshot())
- ereport(ERROR,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("could not serialize access due to concurrent delete")));
-
- /* see TM_Updated case */
- Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
- ExecClearTuple(existing);
- return false;
-
- default:
- elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+ bool isnull;
+ tupleid = slot_getsysattr(existing, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&existing->tts_tid);
}
-
- /* Success, the tuple is locked. */
-
- /*
- * Verify that the tuple is visible to our MVCC snapshot if the current
- * isolation level mandates that.
- *
- * It's not sufficient to rely on the check within ExecUpdate() as e.g.
- * CONFLICT ... WHERE clause may prevent us from reaching that.
- *
- * This means we only ever continue when a new command in the current
- * transaction could see the row, even though in READ COMMITTED mode the
- * tuple will not be visible according to the current statement's
- * snapshot. This is in line with the way UPDATE deals with newer tuple
- * versions.
- */
- ExecCheckTupleVisible(context->estate, relation, existing);
/*
* Make tuple and any needed join variables available to ExecQual and
@@ -2707,9 +2437,10 @@ ExecOnConflictUpdate(ModifyTableContext *context,
/* Execute UPDATE with projection */
*returning = ExecUpdate(context, resultRelInfo,
- conflictTid, NULL,
+ tupleid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
- canSetTag);
+ existing,
+ canSetTag, true);
/*
* Clear out existing tuple, as there might not be another conflict among
@@ -2725,7 +2456,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*/
static TupleTableSlot *
ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, bool canSetTag)
+ Datum tupleid, bool canSetTag)
{
bool matched;
@@ -2772,7 +2503,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* from ExecMergeNotMatched to ExecMergeMatched, there is no risk of a
* livelock.
*/
- matched = tupleid != NULL;
+ matched = DatumGetPointer(tupleid) != NULL;
if (matched)
matched = ExecMergeMatched(context, resultRelInfo, tupleid, canSetTag);
@@ -2811,7 +2542,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static bool
ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, bool canSetTag)
+ Datum tupleid, bool canSetTag)
{
ModifyTableState *mtstate = context->mtstate;
TupleTableSlot *newslot;
@@ -2913,7 +2644,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
break; /* concurrent update/delete */
}
result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL,
- newslot, canSetTag, &updateCxt);
+ newslot, canSetTag, TABLE_MODIFY_WAIT, NULL,
+ &updateCxt);
/*
* As in ExecUpdate(), if ExecUpdateAct() reports that a
@@ -2931,7 +2663,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
if (result == TM_Ok && updateCxt.updated)
{
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
- tupleid, NULL, newslot);
+ NULL, newslot,
+ resultRelInfo->ri_oldTupleSlot);
mtstate->mt_merge_updated += 1;
}
break;
@@ -2945,11 +2678,12 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
return true; /* "do nothing" */
break; /* concurrent update/delete */
}
- result = ExecDeleteAct(context, resultRelInfo, tupleid, false);
+ result = ExecDeleteAct(context, resultRelInfo, tupleid, false,
+ TABLE_MODIFY_WAIT, NULL);
if (result == TM_Ok)
{
- ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
- false);
+ ExecDeleteEpilogue(context, resultRelInfo, NULL,
+ resultRelInfo->ri_oldTupleSlot, false);
mtstate->mt_merge_deleted += 1;
}
break;
@@ -3068,7 +2802,11 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
if (TupIsNull(epqslot))
return false;
- (void) ExecGetJunkAttribute(epqslot,
+ /*
+ * Update tupleid to that of the new tuple, for
+ * the refetch we do at the top.
+ */
+ tupleid = ExecGetJunkAttribute(epqslot,
resultRelInfo->ri_RowIdAttNo,
&isNull);
if (isNull)
@@ -3095,10 +2833,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* that the first qualifying WHEN MATCHED action
* is executed.
*
- * Update tupleid to that of the new tuple, for
- * the refetch we do at the top.
*/
- ItemPointerCopy(&context->tmfd.ctid, tupleid);
goto lmerge_matched;
case TM_Deleted:
@@ -3605,10 +3340,10 @@ ExecModifyTable(PlanState *pstate)
PlanState *subplanstate;
TupleTableSlot *slot;
TupleTableSlot *oldSlot;
+ Datum tupleid;
ItemPointerData tuple_ctid;
HeapTupleData oldtupdata;
HeapTuple oldtuple;
- ItemPointer tupleid;
CHECK_FOR_INTERRUPTS();
@@ -3657,6 +3392,8 @@ ExecModifyTable(PlanState *pstate)
*/
for (;;)
{
+ RowRefType refType;
+
/*
* Reset the per-output-tuple exprcontext. This is needed because
* triggers expect to use that context as workspace. It's a bit ugly
@@ -3706,7 +3443,7 @@ ExecModifyTable(PlanState *pstate)
{
EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
- ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag);
+ ExecMerge(&context, node->resultRelInfo, PointerGetDatum(NULL), node->canSetTag);
continue; /* no RETURNING support yet */
}
@@ -3742,7 +3479,8 @@ ExecModifyTable(PlanState *pstate)
EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
slot = context.planSlot;
- tupleid = NULL;
+ refType = resultRelInfo->ri_RowRefType;
+ tupleid = PointerGetDatum(NULL);
oldtuple = NULL;
/*
@@ -3784,16 +3522,32 @@ ExecModifyTable(PlanState *pstate)
{
EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
- ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag);
+ ExecMerge(&context, node->resultRelInfo,
+ PointerGetDatum(NULL), node->canSetTag);
continue; /* no RETURNING support yet */
}
elog(ERROR, "ctid is NULL");
}
- tupleid = (ItemPointer) DatumGetPointer(datum);
- tuple_ctid = *tupleid; /* be sure we don't free ctid!! */
- tupleid = &tuple_ctid;
+ if (refType == ROW_REF_TID)
+ {
+ /* shouldn't ever get a null result... */
+ if (isNull)
+ elog(ERROR, "ctid is NULL");
+
+ tuple_ctid = *((ItemPointer) DatumGetPointer(datum)); /* be sure we don't free ctid!! */
+ tupleid = PointerGetDatum(&tuple_ctid);
+ }
+ else
+ {
+ Assert(refType == ROW_REF_ROWID);
+ /* shouldn't ever get a null result... */
+ if (isNull)
+ elog(ERROR, "rowid is NULL");
+
+ tupleid = datumCopy(datum, false, -1);
+ }
}
/*
@@ -3870,6 +3624,7 @@ ExecModifyTable(PlanState *pstate)
/* Fetch the most recent version of old tuple. */
Relation relation = resultRelInfo->ri_RelationDesc;
+ Assert(DatumGetPointer(tupleid) != NULL);
if (!table_tuple_fetch_row_version(relation, tupleid,
SnapshotAny,
oldSlot))
@@ -3881,12 +3636,18 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
- slot, node->canSetTag);
+ slot, resultRelInfo->ri_oldTupleSlot,
+ node->canSetTag, false);
break;
case CMD_DELETE:
+ /* Initialize slot for DELETE to fetch the old tuple */
+ if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+ ExecInitDeleteTupleSlot(node, resultRelInfo);
+
slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple,
- true, false, node->canSetTag, NULL, NULL, NULL);
+ resultRelInfo->ri_oldTupleSlot, true, false,
+ node->canSetTag, NULL, NULL, NULL);
break;
case CMD_MERGE:
@@ -3898,6 +3659,9 @@ ExecModifyTable(PlanState *pstate)
break;
}
+ if (refType == ROW_REF_ROWID && DatumGetPointer(tupleid) != NULL)
+ pfree(DatumGetPointer(tupleid));
+
/*
* If we got a RETURNING result, return it to caller. We'll continue
* the work on next call.
@@ -4137,10 +3901,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_PARTITIONED_TABLE)
{
- resultRelInfo->ri_RowIdAttNo =
- ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
- if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
- elog(ERROR, "could not find junk ctid column");
+ if (resultRelInfo->ri_RowRefType == ROW_REF_TID)
+ {
+ resultRelInfo->ri_RowIdAttNo =
+ ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
+ if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
+ elog(ERROR, "could not find junk ctid column");
+ }
+ else
+ {
+ resultRelInfo->ri_RowIdAttNo =
+ ExecFindJunkAttributeInTlist(subplan->targetlist, "rowid");
+ if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
+ elog(ERROR, "could not find junk rowid column");
+ }
}
else if (relkind == RELKIND_FOREIGN_TABLE)
{
@@ -4452,6 +4226,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
estate->es_auxmodifytables = lcons(mtstate,
estate->es_auxmodifytables);
+
+
return mtstate;
}
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 862bd0330bc..8180a2991c3 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -378,7 +378,7 @@ TidNext(TidScanState *node)
if (node->tss_isCurrentOf)
table_tuple_get_latest_tid(scan, &tid);
- if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
+ if (table_tuple_fetch_row_version(heapRelation, PointerGetDatum(&tid), snapshot, slot))
return slot;
/* Bad TID or failed snapshot qual; try next */
diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c
index 5d76f56e4e8..07df92d813c 100644
--- a/src/backend/nodes/read.c
+++ b/src/backend/nodes/read.c
@@ -205,6 +205,17 @@ pg_strtok(int *length)
return ret_str;
}
+bool
+pg_str_hasfield(void)
+{
+ const char *local_str = pg_strtok_ptr;
+
+ while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
+ local_str++;
+
+ return (*local_str == ':');
+}
+
/*
* debackslash -
* create a palloc'd string holding the given token.
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index f75e0f99cb9..c62a407f4ca 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -86,6 +86,7 @@ int min_parallel_index_scan_size;
/* Hook for plugins to get control in set_rel_pathlist() */
set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
+set_plain_rel_pathlist_hook_type set_plain_rel_pathlist_hook = NULL;
/* Hook for plugins to replace standard_join_search() */
join_search_hook_type join_search_hook = NULL;
@@ -775,8 +776,10 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
*/
required_outer = rel->lateral_relids;
- /* Consider sequential scan */
- add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
+ if (!set_plain_rel_pathlist_hook ||
+ set_plain_rel_pathlist_hook(root, rel, rte))
+ /* Consider sequential scan */
+ add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
/* If appropriate, consider parallel sequential scan */
if (rel->consider_parallel && required_outer == NULL)
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 0065c8992bd..bf4968e348b 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -48,14 +48,6 @@ typedef enum
ST_ANYSCAN /* either is okay */
} ScanTypeControl;
-/* Data structure for collecting qual clauses that match an index */
-typedef struct
-{
- bool nonempty; /* True if lists are not all empty */
- /* Lists of IndexClause nodes, one list per index column */
- List *indexclauses[INDEX_MAX_KEYS];
-} IndexClauseSet;
-
/* Per-path data used within choose_bitmap_and() */
typedef struct
{
@@ -130,9 +122,6 @@ static double adjust_rowcount_for_semijoins(PlannerInfo *root,
Index outer_relid,
double rowcount);
static double approximate_joinrel_size(PlannerInfo *root, Relids relids);
-static void match_restriction_clauses_to_index(PlannerInfo *root,
- IndexOptInfo *index,
- IndexClauseSet *clauseset);
static void match_join_clauses_to_index(PlannerInfo *root,
RelOptInfo *rel, IndexOptInfo *index,
IndexClauseSet *clauseset,
@@ -2012,7 +2001,7 @@ approximate_joinrel_size(PlannerInfo *root, Relids relids)
* Identify restriction clauses for the rel that match the index.
* Matching clauses are added to *clauseset.
*/
-static void
+void
match_restriction_clauses_to_index(PlannerInfo *root,
IndexOptInfo *index,
IndexClauseSet *clauseset)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 974c50b29f9..48f251738e2 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -164,16 +164,12 @@ static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path)
static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path);
static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
-static void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
- List **stripped_indexquals_p,
- List **fixed_indexquals_p);
static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path);
static Node *fix_indexqual_clause(PlannerInfo *root,
IndexOptInfo *index, int indexcol,
Node *clause, List *indexcolnos);
static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol);
static List *get_switched_clauses(List *clauses, Relids outerrelids);
-static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_generic_path_info(Plan *dest, Path *src);
static void copy_plan_costsize(Plan *dest, Plan *src);
static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
@@ -4897,6 +4893,14 @@ replace_nestloop_params(PlannerInfo *root, Node *expr)
return replace_nestloop_params_mutator(expr, root);
}
+Node *
+replace_nestloop_params_compat(PlannerInfo *root, Node *expr)
+{
+ /* No setup needed for tree walk, so away we go */
+ return replace_nestloop_params_mutator(expr, root);
+}
+
+
static Node *
replace_nestloop_params_mutator(Node *node, PlannerInfo *root)
{
@@ -4977,7 +4981,7 @@ replace_nestloop_params_mutator(Node *node, PlannerInfo *root)
* are subplans in it (we need two separate copies of the subplan tree, or
* things will go awry).
*/
-static void
+void
fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
List **stripped_indexquals_p, List **fixed_indexquals_p)
{
@@ -5270,7 +5274,7 @@ get_switched_clauses(List *clauses, Relids outerrelids)
* instead of bare clauses. This is another reason why trying to consider
* selectivity in the ordering would likely do the wrong thing.
*/
-static List *
+List *
order_qual_clauses(PlannerInfo *root, List *clauses)
{
typedef struct
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 2ffef1bad78..7198fd4777c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2263,6 +2263,7 @@ preprocess_rowmarks(PlannerInfo *root)
RowMarkClause *rc = lfirst_node(RowMarkClause, l);
RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
PlanRowMark *newrc;
+ RowRefType refType;
/*
* Currently, it is syntactically impossible to have FOR UPDATE et al
@@ -2285,8 +2286,8 @@ preprocess_rowmarks(PlannerInfo *root)
newrc = makeNode(PlanRowMark);
newrc->rti = newrc->prti = rc->rti;
newrc->rowmarkId = ++(root->glob->lastRowMarkId);
- newrc->markType = select_rowmark_type(rte, rc->strength);
- newrc->allMarkTypes = (1 << newrc->markType);
+ newrc->markType = select_rowmark_type(rte, rc->strength, &refType);
+ newrc->allRefTypes = (1 << refType);
newrc->strength = rc->strength;
newrc->waitPolicy = rc->waitPolicy;
newrc->isParent = false;
@@ -2302,6 +2303,7 @@ preprocess_rowmarks(PlannerInfo *root)
{
RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
PlanRowMark *newrc;
+ RowRefType refType = ROW_REF_TID;
i++;
if (!bms_is_member(i, rels))
@@ -2310,8 +2312,8 @@ preprocess_rowmarks(PlannerInfo *root)
newrc = makeNode(PlanRowMark);
newrc->rti = newrc->prti = i;
newrc->rowmarkId = ++(root->glob->lastRowMarkId);
- newrc->markType = select_rowmark_type(rte, LCS_NONE);
- newrc->allMarkTypes = (1 << newrc->markType);
+ newrc->markType = select_rowmark_type(rte, LCS_NONE, &refType);
+ newrc->allRefTypes = (1 << refType);
newrc->strength = LCS_NONE;
newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
newrc->isParent = false;
@@ -2326,11 +2328,13 @@ preprocess_rowmarks(PlannerInfo *root)
* Select RowMarkType to use for a given table
*/
RowMarkType
-select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
+select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength,
+ RowRefType *refType)
{
if (rte->rtekind != RTE_RELATION)
{
/* If it's not a table at all, use ROW_MARK_COPY */
+ *refType = ROW_REF_COPY;
return ROW_MARK_COPY;
}
else if (rte->relkind == RELKIND_FOREIGN_TABLE)
@@ -2341,10 +2345,12 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
if (fdwroutine->GetForeignRowMarkType != NULL)
return fdwroutine->GetForeignRowMarkType(rte, strength);
/* Otherwise, use ROW_MARK_COPY by default */
+ *refType = ROW_REF_COPY;
return ROW_MARK_COPY;
}
else
{
+ *refType = rte->reftype;
/* Regular table, apply the appropriate lock type */
switch (strength)
{
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 9d46488ef7c..0d849332904 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -210,7 +210,7 @@ preprocess_targetlist(PlannerInfo *root)
if (rc->rti != rc->prti)
continue;
- if (rc->allMarkTypes & ~(1 << ROW_MARK_COPY))
+ if (rc->allRefTypes & (1 << ROW_REF_TID))
{
/* Need to fetch TID */
var = makeVar(rc->rti,
@@ -226,7 +226,23 @@ preprocess_targetlist(PlannerInfo *root)
true);
tlist = lappend(tlist, tle);
}
- if (rc->allMarkTypes & (1 << ROW_MARK_COPY))
+ if (rc->allRefTypes & (1 << ROW_REF_ROWID))
+ {
+ /* Need to fetch TID */
+ var = makeVar(rc->rti,
+ RowIdAttributeNumber,
+ BYTEAOID,
+ -1,
+ InvalidOid,
+ 0);
+ snprintf(resname, sizeof(resname), "rowid%u", rc->rowmarkId);
+ tle = makeTargetEntry((Expr *) var,
+ list_length(tlist) + 1,
+ pstrdup(resname),
+ true);
+ tlist = lappend(tlist, tle);
+ }
+ if (rc->allRefTypes & (1 << ROW_REF_COPY))
{
/* Need the whole row as a junk var */
var = makeWholeRowVar(rt_fetch(rc->rti, range_table),
diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c
index f456b3b0a44..43af763f1fe 100644
--- a/src/backend/optimizer/util/appendinfo.c
+++ b/src/backend/optimizer/util/appendinfo.c
@@ -896,17 +896,35 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex,
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_PARTITIONED_TABLE)
{
+ RowRefType refType = ROW_REF_TID;
+
+ refType = table_get_row_ref_type(target_relation);
+
/*
* Emit CTID so that executor can find the row to merge, update or
* delete.
*/
- var = makeVar(rtindex,
- SelfItemPointerAttributeNumber,
- TIDOID,
- -1,
- InvalidOid,
- 0);
- add_row_identity_var(root, var, rtindex, "ctid");
+ if (refType == ROW_REF_TID)
+ {
+ var = makeVar(rtindex,
+ SelfItemPointerAttributeNumber,
+ TIDOID,
+ -1,
+ InvalidOid,
+ 0);
+ add_row_identity_var(root, var, rtindex, "ctid");
+ }
+ else
+ {
+ Assert(refType == ROW_REF_ROWID);
+ var = makeVar(rtindex,
+ RowIdAttributeNumber,
+ BYTEAOID,
+ -1,
+ InvalidOid,
+ 0);
+ add_row_identity_var(root, var, rtindex, "rowid");
+ }
}
else if (relkind == RELKIND_FOREIGN_TABLE)
{
diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c
index f9d3ff1e7ac..e16e855cf64 100644
--- a/src/backend/optimizer/util/inherit.c
+++ b/src/backend/optimizer/util/inherit.c
@@ -16,6 +16,7 @@
#include "access/sysattr.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/partition.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_type.h"
@@ -91,7 +92,7 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
LOCKMODE lockmode;
PlanRowMark *oldrc;
bool old_isParent = false;
- int old_allMarkTypes = 0;
+ int old_allRefTypes = 0;
Assert(rte->inh); /* else caller error */
@@ -131,8 +132,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
{
old_isParent = oldrc->isParent;
oldrc->isParent = true;
- /* Save initial value of allMarkTypes before children add to it */
- old_allMarkTypes = oldrc->allMarkTypes;
+ /* Save initial value of allRefTypes before children add to it */
+ old_allRefTypes = oldrc->allRefTypes;
}
/* Scan the inheritance set and expand it */
@@ -239,15 +240,15 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
*/
if (oldrc)
{
- int new_allMarkTypes = oldrc->allMarkTypes;
+ int new_allRefTypes = oldrc->allRefTypes;
Var *var;
TargetEntry *tle;
char resname[32];
List *newvars = NIL;
/* Add TID junk Var if needed, unless we had it already */
- if (new_allMarkTypes & ~(1 << ROW_MARK_COPY) &&
- !(old_allMarkTypes & ~(1 << ROW_MARK_COPY)))
+ if (new_allRefTypes & (1 << ROW_REF_TID) &&
+ !(old_allRefTypes & (1 << ROW_REF_TID)))
{
/* Need to fetch TID */
var = makeVar(oldrc->rti,
@@ -266,8 +267,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
}
/* Add whole-row junk Var if needed, unless we had it already */
- if ((new_allMarkTypes & (1 << ROW_MARK_COPY)) &&
- !(old_allMarkTypes & (1 << ROW_MARK_COPY)))
+ if ((new_allRefTypes & (1 << ROW_REF_COPY)) &&
+ !(old_allRefTypes & (1 << ROW_REF_COPY)))
{
var = makeWholeRowVar(planner_rt_fetch(oldrc->rti, root),
oldrc->rti,
@@ -282,6 +283,24 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
newvars = lappend(newvars, var);
}
+ if ((new_allRefTypes & (1 << ROW_REF_ROWID)) &&
+ !(old_allRefTypes & (1 << ROW_REF_ROWID)))
+ {
+ var = makeVar(oldrc->rti,
+ RowIdAttributeNumber,
+ BYTEAOID,
+ -1,
+ InvalidOid,
+ 0);
+ snprintf(resname, sizeof(resname), "rowid%u", oldrc->rowmarkId);
+ tle = makeTargetEntry((Expr *) var,
+ list_length(root->processed_tlist) + 1,
+ pstrdup(resname),
+ true);
+ root->processed_tlist = lappend(root->processed_tlist, tle);
+ newvars = lappend(newvars, var);
+ }
+
/* Add tableoid junk Var, unless we had it already */
if (!old_isParent)
{
@@ -441,7 +460,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo,
* where the hierarchy is flattened during RTE expansion.)
*
* PlanRowMarks still carry the top-parent's RTI, and the top-parent's
- * allMarkTypes field still accumulates values from all descendents.
+ * allRefTypes field still accumulates values from all descendents.
*
* "parentrte" and "parentRTindex" are immediate parent's RTE and
* RTI. "top_parentrc" is top parent's PlanRowMark.
@@ -485,6 +504,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */
childrte->relid = childOID;
childrte->relkind = childrel->rd_rel->relkind;
+ childrte->reftype = table_get_row_ref_type(childrel);
/* A partitioned child will need to be expanded further. */
if (childrte->relkind == RELKIND_PARTITIONED_TABLE)
{
@@ -574,14 +594,16 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
if (top_parentrc)
{
PlanRowMark *childrc = makeNode(PlanRowMark);
+ RowRefType refType;
childrc->rti = childRTindex;
childrc->prti = top_parentrc->rti;
childrc->rowmarkId = top_parentrc->rowmarkId;
/* Reselect rowmark type, because relkind might not match parent */
childrc->markType = select_rowmark_type(childrte,
- top_parentrc->strength);
- childrc->allMarkTypes = (1 << childrc->markType);
+ top_parentrc->strength,
+ &refType);
+ childrc->allRefTypes = (1 << refType);
childrc->strength = top_parentrc->strength;
childrc->waitPolicy = top_parentrc->waitPolicy;
@@ -592,8 +614,8 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
*/
childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
- /* Include child's rowmark type in top parent's allMarkTypes */
- top_parentrc->allMarkTypes |= childrc->allMarkTypes;
+ /* Include child's rowmark type in top parent's allRefTypes */
+ top_parentrc->allRefTypes |= childrc->allRefTypes;
root->rowMarks = lappend(root->rowMarks, childrc);
}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 1a3045479ff..f4a0fc2b6c6 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -60,6 +60,7 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
/* Hook for plugins to get control in get_relation_info() */
get_relation_info_hook_type get_relation_info_hook = NULL;
+skip_tree_height_hook_type skip_tree_height_hook = NULL;
static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
@@ -457,7 +458,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
info->tuples = rel->tuples;
}
- if (info->relam == BTREE_AM_OID)
+ if (info->relam == BTREE_AM_OID && (!skip_tree_height_hook || !skip_tree_height_hook(indexRelation)))
{
/*
* For btrees, get tree height while we have the index
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index c31b3733587..c39c6f21939 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -371,6 +371,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type OptSchemaEltList parameter_name_list
%type am_type
+%type opt_for_tableam
%type TriggerForSpec TriggerForType
%type TriggerActionTime
@@ -5746,17 +5747,21 @@ row_security_cmd:
/*****************************************************************************
*
* QUERY:
- * CREATE ACCESS METHOD name HANDLER handler_name
+ * CREATE ACCESS METHOD name TYPE am_type
+ * [FOR tableam_name]
+ * HANDLER handler_name
*
*****************************************************************************/
-CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name
+CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type
+ opt_for_tableam HANDLER handler_name
{
CreateAmStmt *n = makeNode(CreateAmStmt);
n->amname = $4;
- n->handler_name = $8;
n->amtype = $6;
+ n->tableam_name = $7;
+ n->handler_name = $9;
$$ = (Node *) n;
}
;
@@ -5766,6 +5771,11 @@ am_type:
| TABLE { $$ = AMTYPE_TABLE; }
;
+opt_for_tableam:
+ FOR name { $$ = $2; }
+ | /*EMPTY*/ { $$ = NULL; }
+ ;
+
/*****************************************************************************
*
* QUERIES :
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 58bc222a8b9..23ef258340a 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -20,6 +20,7 @@
#include "access/relation.h"
#include "access/sysattr.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/heap.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
@@ -1502,6 +1503,7 @@ addRangeTableEntry(ParseState *pstate,
rte->relid = RelationGetRelid(rel);
rte->relkind = rel->rd_rel->relkind;
rte->rellockmode = lockmode;
+ rte->reftype = table_get_row_ref_type(rel);
/*
* Build the list of effective column names using user-supplied aliases
@@ -1587,6 +1589,7 @@ addRangeTableEntryForRelation(ParseState *pstate,
rte->relid = RelationGetRelid(rel);
rte->relkind = rel->rd_rel->relkind;
rte->rellockmode = lockmode;
+ rte->reftype = table_get_row_ref_type(rel);
/*
* Build the list of effective column names using user-supplied aliases
@@ -1656,6 +1659,7 @@ addRangeTableEntryForSubquery(ParseState *pstate,
rte->rtekind = RTE_SUBQUERY;
rte->subquery = subquery;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
eref = alias ? copyObject(alias) : makeAlias("unnamed_subquery", NIL);
numaliases = list_length(eref->colnames);
@@ -1764,6 +1768,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
rte->functions = NIL; /* we'll fill this list below */
rte->funcordinality = rangefunc->ordinality;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
/*
* Choose the RTE alias name. We default to using the first function's
@@ -2083,6 +2088,7 @@ addRangeTableEntryForTableFunc(ParseState *pstate,
rte->coltypmods = tf->coltypmods;
rte->colcollations = tf->colcollations;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
eref = alias ? copyObject(alias) : makeAlias(refname, NIL);
numaliases = list_length(eref->colnames);
@@ -2159,6 +2165,7 @@ addRangeTableEntryForValues(ParseState *pstate,
rte->coltypmods = coltypmods;
rte->colcollations = colcollations;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
eref = alias ? copyObject(alias) : makeAlias(refname, NIL);
@@ -2256,6 +2263,7 @@ addRangeTableEntryForJoin(ParseState *pstate,
rte->joinrightcols = rightcols;
rte->join_using_alias = join_using_alias;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
eref = alias ? copyObject(alias) : makeAlias("unnamed_join", NIL);
numaliases = list_length(eref->colnames);
@@ -2337,6 +2345,7 @@ addRangeTableEntryForCTE(ParseState *pstate,
rte->rtekind = RTE_CTE;
rte->ctename = cte->ctename;
rte->ctelevelsup = levelsup;
+ rte->reftype = ROW_REF_COPY;
/* Self-reference if and only if CTE's parse analysis isn't completed */
rte->self_reference = !IsA(cte->ctequery, Query);
@@ -2499,6 +2508,7 @@ addRangeTableEntryForENR(ParseState *pstate,
* if they access transition tables linked to a table that is altered.
*/
rte->relid = enrmd->reliddesc;
+ rte->reftype = ROW_REF_COPY;
/*
* Build the list of effective column names using user-supplied aliases
@@ -3268,6 +3278,9 @@ get_rte_attribute_name(RangeTblEntry *rte, AttrNumber attnum)
attnum > 0 && attnum <= list_length(rte->alias->colnames))
return strVal(list_nth(rte->alias->colnames, attnum - 1));
+ if (attnum == RowIdAttributeNumber)
+ return "rowid";
+
/*
* If the RTE is a relation, go to the system catalogs not the
* eref->colnames list. This is a little slower but it will give the
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 441f599d1a0..87b962f05de 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -2320,19 +2320,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt)
errdetail("Cannot create a non-deferrable constraint using a deferrable index."),
parser_errposition(cxt->pstate, constraint->location)));
- /*
- * Insist on it being a btree. That's the only kind that supports
- * uniqueness at the moment anyway; but we must have an index that
- * exactly matches what you'd get from plain ADD CONSTRAINT syntax,
- * else dump and reload will produce a different index (breaking
- * pg_upgrade in particular).
- */
- if (index_rel->rd_rel->relam != get_index_am_oid(DEFAULT_INDEX_TYPE, false))
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("index \"%s\" is not a btree", index_name),
- parser_errposition(cxt->pstate, constraint->location)));
-
/* Must get indclass the hard way */
indclassDatum = SysCacheGetAttrNotNull(INDEXRELID,
index_rel->rd_indextuple,
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 7dd9345c617..693db1b3c9f 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -538,6 +538,7 @@ AutoVacLauncherMain(int argc, char *argv[])
* transaction.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
pgstat_report_wait_end();
UnlockBuffers();
/* this is probably dead code, but let's be safe: */
@@ -2834,7 +2835,9 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
- relopts = extractRelOptions(tup, pg_class_desc, NULL);
+ relopts = extractRelOptions(tup, pg_class_desc,
+ GetTableAmRoutineByAmOid(((Form_pg_class) GETSTRUCT(tup))->relam),
+ NULL);
if (relopts == NULL)
return NULL;
diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c
index cae6feb3562..bc4c3d11359 100644
--- a/src/backend/postmaster/auxprocess.c
+++ b/src/backend/postmaster/auxprocess.c
@@ -178,6 +178,7 @@ static void
ShutdownAuxiliaryProcess(int code, Datum arg)
{
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
}
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index f2e4f23d9fc..7963fcd2a38 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -166,6 +166,7 @@ BackgroundWriterMain(void)
* about in bgwriter, but we do have LWLocks, buffers, and temp files.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
UnlockBuffers();
ReleaseAuxProcessResources(false);
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index f482f6423d1..7cd4552c526 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -53,11 +53,20 @@
#include "storage/proc.h"
#include "storage/procsignal.h"
#include "storage/shmem.h"
+#include "storage/sinvaladt.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
+#include "utils/syscache.h"
+
+/*
+ * Included for InitializeTimeouts and RegisterTimeout functions that
+ * needed for correct working of OrioleDB checkpoint.
+ * See comment for InitializeTimeouts call in CheckpointerMain for details.
+ */
+#include "utils/timeout.h"
/*----------
@@ -207,6 +216,21 @@ CheckpointerMain(void)
*/
pqsignal(SIGCHLD, SIG_DFL);
+ /*
+ * To use OrioleDB checkpoint, we must initialize the data for the primary
+ * lock mechanism (lock.h) to work correctly. Because locks of this type are
+ * needed by the OrioleDB module for debug events and relation locks, but
+ * they are not used by the postgres checkpointer and are not initialized
+ * for it.
+ */
+ InitializeTimeouts(); /* establishes SIGALRM handler */
+ InitDeadLockChecking();
+ RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLockAlert);
+ RelationCacheInitialize();
+ InitCatalogCache();
+ SharedInvalBackendInit(false);
+
+
/*
* Initialize so that first time-driven event happens at the correct time.
*/
@@ -269,6 +293,7 @@ CheckpointerMain(void)
* files.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
UnlockBuffers();
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 46af3495644..93ce77683a4 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -670,6 +670,22 @@ pgarch_readyXlog(char *xlog)
for (int i = 0; i < arch_files->arch_files_size; i++)
arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
+ /*
+ * Preload the WAL files if the relevant callback is provided.
+ */
+ if (ArchiveCallbacks->archive_preload_file_cb)
+ {
+ for (int i = 0; i < arch_files->arch_files_size; i++)
+ {
+ char *xlog1 = arch_files->arch_files[i];
+ char pathname[MAXPGPATH];
+
+ snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog1);
+ ArchiveCallbacks->archive_preload_file_cb(archive_module_state,
+ xlog1, pathname);
+ }
+ }
+
/* Return the highest priority file. */
arch_files->arch_files_size--;
strcpy(xlog, arch_files->arch_files[arch_files->arch_files_size]);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b42aae41fce..7a9c875ee7e 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -85,10 +85,6 @@
#include
#endif
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-#include
-#endif
-
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogrecovery.h"
@@ -145,7 +141,8 @@
#define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
#define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
#define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
-#define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
+#define BACKEND_TYPE_SYSTEM_BGWORKER 0x0010 /* system bgworker process */
+#define BACKEND_TYPE_ALL 0x001F /* OR of all the above */
/*
* List of active backends (or child processes anyway; we don't actually
@@ -451,7 +448,7 @@ static void InitPostmasterDeathWatchHandle(void);
* even during recovery.
*/
#define PgArchStartupAllowed() \
- (((XLogArchivingActive() && pmState == PM_RUN) || \
+ (((XLogArchivingActive() && (pmState == PM_RUN || pmState == PM_SHUTDOWN)) || \
(XLogArchivingAlways() && \
(pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
PgArchCanRestart())
@@ -579,6 +576,12 @@ int postmaster_alive_fds[2] = {-1, -1};
HANDLE PostmasterHandle;
#endif
+bool
+IsFatalError(void)
+{
+ return FatalError;
+}
+
/*
* Postmaster main entry point
*/
@@ -1417,24 +1420,6 @@ PostmasterMain(int argc, char *argv[])
*/
}
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
- /*
- * On macOS, libintl replaces setlocale() with a version that calls
- * CFLocaleCopyCurrent() when its second argument is "" and every relevant
- * environment variable is unset or empty. CFLocaleCopyCurrent() makes
- * the process multithreaded. The postmaster calls sigprocmask() and
- * calls fork() without an immediate exec(), both of which have undefined
- * behavior in a multithreaded program. A multithreaded postmaster is the
- * normal case on Windows, which offers neither fork() nor sigprocmask().
- */
- if (pthread_is_threaded_np() != 0)
- ereport(FATAL,
- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("postmaster became multithreaded during startup"),
- errhint("Set the LC_ALL environment variable to a valid locale.")));
-#endif
-
/*
* Remember postmaster startup time
*/
@@ -1852,15 +1837,6 @@ ServerLoop(void)
if (StartWorkerNeeded || HaveCrashedWorker)
maybe_start_bgworkers();
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
- /*
- * With assertions enabled, check regularly for appearance of
- * additional threads. All builds check at start and exit.
- */
- Assert(pthread_is_threaded_np() == 0);
-#endif
-
/*
* Lastly, check to see if it's time to do some things that we don't
* want to do every single time through the loop, because they're a
@@ -2466,8 +2442,9 @@ processCancelRequest(Port *port, void *pkt)
/*
* canAcceptConnections --- check to see if database state allows connections
* of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
- * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
- * know whether a NORMAL connection might turn into a walsender.)
+ * BACKEND_TYPE_AUTOVAC, BACKEND_TYPE_BGWORKER or BACKEND_TYPE_SYSTEM_BGWORKER.
+ * (Note that we don't yet know whether a NORMAL connection might turn into
+ * a walsender.)
*/
static CAC_state
canAcceptConnections(int backend_type)
@@ -2481,7 +2458,8 @@ canAcceptConnections(int backend_type)
* bgworker_should_start_now() decided whether the DB state allows them.
*/
if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
- backend_type != BACKEND_TYPE_BGWORKER)
+ backend_type != BACKEND_TYPE_BGWORKER &&
+ backend_type != BACKEND_TYPE_SYSTEM_BGWORKER)
{
if (Shutdown > NoShutdown)
return CAC_SHUTDOWN; /* shutdown is pending */
@@ -3160,6 +3138,13 @@ process_pm_child_exit(void)
if (PgArchPID != 0)
signal_child(PgArchPID, SIGUSR2);
+ /*
+ * Terminate system background workers since checpoint is
+ * complete.
+ */
+ SignalSomeChildren(SIGTERM,
+ BACKEND_TYPE_SYSTEM_BGWORKER);
+
/*
* Waken walsenders for the last time. No regular backends
* should be around anymore.
@@ -3561,7 +3546,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
* Background workers were already processed above; ignore them
* here.
*/
- if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
+ if (bp->bkend_type == BACKEND_TYPE_BGWORKER ||
+ bp->bkend_type == BACKEND_TYPE_SYSTEM_BGWORKER)
continue;
if (take_action)
@@ -3740,7 +3726,7 @@ PostmasterStateMachine(void)
/* Signal all backend children except walsenders */
SignalSomeChildren(SIGTERM,
- BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
+ BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER);
/* and the autovac launcher too */
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
@@ -3778,7 +3764,7 @@ PostmasterStateMachine(void)
* and archiver are also disregarded, they will be terminated later
* after writing the checkpoint record.
*/
- if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
+ if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER) == 0 &&
StartupPID == 0 &&
WalReceiverPID == 0 &&
BgWriterPID == 0 &&
@@ -5045,21 +5031,6 @@ SubPostmasterMain(int argc, char *argv[])
static void
ExitPostmaster(int status)
{
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
- /*
- * There is no known cause for a postmaster to become multithreaded after
- * startup. Recheck to account for the possibility of unknown causes.
- * This message uses LOG level, because an unclean shutdown at this point
- * would usually not look much different from a clean shutdown.
- */
- if (pthread_is_threaded_np() != 0)
- ereport(LOG,
- (errcode(ERRCODE_INTERNAL_ERROR),
- errmsg_internal("postmaster became multithreaded"),
- errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
-#endif
-
/* should cleanup shared memory and kill all backends */
/*
@@ -5788,16 +5759,20 @@ do_start_bgworker(RegisteredBgWorker *rw)
* specified start_time?
*/
static bool
-bgworker_should_start_now(BgWorkerStartTime start_time)
+bgworker_should_start_now(BgWorkerStartTime start_time, int flags)
{
switch (pmState)
{
case PM_NO_CHILDREN:
case PM_WAIT_DEAD_END:
case PM_SHUTDOWN_2:
+ break;
+
case PM_SHUTDOWN:
case PM_WAIT_BACKENDS:
case PM_STOP_BACKENDS:
+ if (flags & BGWORKER_CLASS_SYSTEM)
+ return true;
break;
case PM_RUN:
@@ -5872,7 +5847,10 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
bn->cancel_key = MyCancelKey;
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
- bn->bkend_type = BACKEND_TYPE_BGWORKER;
+ if (rw->rw_worker.bgw_flags & BGWORKER_CLASS_SYSTEM)
+ bn->bkend_type = BACKEND_TYPE_SYSTEM_BGWORKER;
+ else
+ bn->bkend_type = BACKEND_TYPE_BGWORKER;
bn->dead_end = false;
bn->bgworker_notify = false;
@@ -5970,7 +5948,8 @@ maybe_start_bgworkers(void)
}
}
- if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
+ if (bgworker_should_start_now(rw->rw_worker.bgw_start_time,
+ rw->rw_worker.bgw_flags))
{
/* reset crash time before trying to start worker */
rw->rw_crashed_at = 0;
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 0e7de26bc28..ce79e4f8f43 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -79,6 +79,8 @@ static volatile sig_atomic_t startup_progress_timer_expired = false;
*/
int log_startup_progress_interval = 10000; /* 10 sec */
+HandleStartupProcInterrupts_hook_type HandleStartupProcInterrupts_hook = NULL;
+
/* Signal handlers */
static void StartupProcTriggerHandler(SIGNAL_ARGS);
static void StartupProcSigHupHandler(SIGNAL_ARGS);
@@ -186,6 +188,9 @@ HandleStartupProcInterrupts(void)
static uint32 postmaster_poll_count = 0;
#endif
+ if (HandleStartupProcInterrupts_hook)
+ HandleStartupProcInterrupts_hook();
+
/*
* Process any requests or signals received recently.
*/
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 266fbc23399..4e8a9573006 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -161,6 +161,7 @@ WalWriterMain(void)
* about in walwriter, but we do have LWLocks, and perhaps buffers?
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
UnlockBuffers();
diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
index 504f94d4a77..03f9a54f587 100644
--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -814,7 +814,7 @@ logicalrep_write_tuple(StringInfo out, Relation rel, TupleTableSlot *slot,
continue;
}
- if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
+ if (att->attlen == -1 && (VARATT_IS_EXTERNAL_ONDISK(values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(values[i])))
{
/*
* Unchanged toasted datum. (Note that we don't promise to detect
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 3ed2f79dd06..3cc86087fd1 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -207,6 +207,8 @@ struct SnapBuild
*/
TransactionId next_phase_at;
+ CSNSnapshotData csnSnapshotData;
+
/*
* Array of transactions which could have catalog changes that committed
* between xmin and xmax.
@@ -404,6 +406,17 @@ SnapBuildCurrentState(SnapBuild *builder)
return builder->state;
}
+/*
+ * An which transaction id the next phase of initial snapshot building will
+ * happen?
+ */
+TransactionId
+SnapBuildNextPhaseAt(SnapBuild *builder)
+{
+ return builder->next_phase_at;
+}
+
+
/*
* Return the LSN at which the two-phase decoding was first enabled.
*/
@@ -551,6 +564,8 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
snapshot->regd_count = 0;
snapshot->snapXactCompletionCount = 0;
+ snapshot->csnSnapshotData = builder->csnSnapshotData;
+
return snapshot;
}
@@ -648,6 +663,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
snap->snapshot_type = SNAPSHOT_MVCC;
snap->xcnt = newxcnt;
snap->xip = newxip;
+ snap->csnSnapshotData = builder->csnSnapshotData;
return snap;
}
@@ -1028,6 +1044,8 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid,
TransactionId xmax = xid;
+ builder->csnSnapshotData.xlogptr = lsn;
+
/*
* Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor
* will they be part of a snapshot. So we don't need to record anything.
@@ -1215,6 +1233,10 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact
ReorderBufferTXN *txn;
TransactionId xmin;
+ builder->csnSnapshotData.snapshotcsn = running->csn;
+ builder->csnSnapshotData.xmin = 0;
+ builder->csnSnapshotData.xlogptr = lsn;
+
/*
* If we're not consistent yet, inspect the record to see whether it
* allows to get closer to being consistent. If we are consistent, dump
@@ -2139,3 +2161,10 @@ CheckPointSnapBuild(void)
}
FreeDir(snap_dir);
}
+
+void
+SnapBuildUpdateCSNSnaphot(SnapBuild *builder,
+ CSNSnapshotData *csnSnapshotData)
+{
+ builder->csnSnapshotData = *csnSnapshotData;
+}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 832b1cf7642..dfd72bf8cca 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -2430,9 +2430,8 @@ apply_handle_insert(StringInfo s)
/* Initialize the executor state. */
edata = create_edata_for_relation(rel);
estate = edata->estate;
- remoteslot = ExecInitExtraTupleSlot(estate,
- RelationGetDescr(rel->localrel),
- &TTSOpsVirtual);
+ remoteslot = table_slot_create(rel->localrel,
+ &estate->es_tupleTable);
/* Process and store remote tuple in the slot */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -2586,9 +2585,8 @@ apply_handle_update(StringInfo s)
/* Initialize the executor state. */
edata = create_edata_for_relation(rel);
estate = edata->estate;
- remoteslot = ExecInitExtraTupleSlot(estate,
- RelationGetDescr(rel->localrel),
- &TTSOpsVirtual);
+ remoteslot = table_slot_create(rel->localrel,
+ &estate->es_tupleTable);
/*
* Populate updatedCols so that per-column triggers can fire, and so
@@ -2766,9 +2764,8 @@ apply_handle_delete(StringInfo s)
/* Initialize the executor state. */
edata = create_edata_for_relation(rel);
estate = edata->estate;
- remoteslot = ExecInitExtraTupleSlot(estate,
- RelationGetDescr(rel->localrel),
- &TTSOpsVirtual);
+ remoteslot = table_slot_create(rel->localrel,
+ &estate->es_tupleTable);
/* Build the search tuple. */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index c57c5ed8de9..18f8824d5a3 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -1315,8 +1315,8 @@ pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot,
* VARTAG_INDIRECT. See ReorderBufferToastReplace.
*/
if (att->attlen == -1 &&
- VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) &&
- !VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i]))
+ (VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(new_slot->tts_values[i])) &&
+ !(VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(old_slot->tts_values[i])) )
{
if (!tmp_new_slot)
{
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 4c53de08b9b..ce4e40bf137 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -315,6 +315,7 @@ void
WalSndErrorCleanup(void)
{
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 9cd96fd17ef..f2307c43612 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -23,6 +23,7 @@
#include "access/relation.h"
#include "access/sysattr.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/dependency.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e066a3f888f..aa82637b1d1 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2667,6 +2667,7 @@ BufferSync(int flags)
BufferDesc *bufHdr = NULL;
CkptTsStatus *ts_stat = (CkptTsStatus *)
DatumGetPointer(binaryheap_first(ts_heap));
+ double progress;
buf_id = CkptBufferIds[ts_stat->index].buf_id;
Assert(buf_id != -1);
@@ -2721,7 +2722,10 @@ BufferSync(int flags)
*
* (This will check for barrier events even if it doesn't sleep.)
*/
- CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
+ progress = (double) num_processed / num_to_scan;
+ progress = CheckPointProgress + progress * (1 - CheckPointProgress);
+
+ CheckpointWriteDelay(flags, progress);
}
/*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 316b4fa7197..a5ada9beb54 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -309,6 +309,8 @@ static GlobalVisState GlobalVisTempRels;
*/
static TransactionId ComputeXidHorizonsResultLastXmin;
+snapshot_hook_type snapshot_hook = NULL;
+
#ifdef XIDCACHE_DEBUG
/* counters for XidCache measurement */
@@ -752,6 +754,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
proc->delayChkptFlags = 0;
proc->recoveryConflictPending = false;
+ proc->lastCommittedCSN = pg_atomic_fetch_add_u64(&ShmemVariableCache->nextCommitSeqNo, 1);
/* must be cleared with xid/xmin: */
/* avoid unnecessarily dirtying shared cachelines */
@@ -2258,6 +2261,8 @@ GetSnapshotData(Snapshot snapshot)
if (GetSnapshotDataReuse(snapshot))
{
+ if (snapshot_hook)
+ snapshot_hook(snapshot);
LWLockRelease(ProcArrayLock);
return snapshot;
}
@@ -2439,6 +2444,9 @@ GetSnapshotData(Snapshot snapshot)
if (!TransactionIdIsValid(MyProc->xmin))
MyProc->xmin = TransactionXmin = xmin;
+ if (snapshot_hook)
+ snapshot_hook(snapshot);
+
LWLockRelease(ProcArrayLock);
/* maintain state for GlobalVis* */
@@ -2858,6 +2866,7 @@ GetRunningTransactionData(void)
CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
+ CurrentRunningXacts->csn = pg_atomic_read_u64(&ShmemVariableCache->nextCommitSeqNo);
Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 3bdc5f7fb6c..1e6760a7c49 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -1355,6 +1355,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
xlrec.nextXid = CurrRunningXacts->nextXid;
xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
+ xlrec.csn = CurrRunningXacts->csn;
/* Header */
XLogBeginInsert();
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index ba66e820d06..245e15f0cc5 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -648,6 +648,27 @@ GetLockMethodLocalHash(void)
}
#endif
+/*
+ * Returns true if any LOCKMODE lock with given locktag exist in LocalMethodLocalHash.
+ */
+bool
+DoLocalLockExist(const LOCKTAG *locktag)
+{
+ HASH_SEQ_STATUS scan_status;
+ LOCALLOCK* locallock;
+
+ hash_seq_init(&scan_status, LockMethodLocalHash);
+ while ((locallock = (LOCALLOCK *) hash_seq_search(&scan_status)) != NULL)
+ {
+ if (memcmp(&locallock->tag.lock, locktag, sizeof(LOCKTAG)) == 0)
+ {
+ hash_seq_term(&scan_status);
+ return true;
+ }
+ }
+ return false;
+}
+
/*
* LockHasWaiters -- look up 'locktag' and check if releasing this
* lock would wake up other processes waiting for it.
@@ -797,7 +818,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
bool reportMemoryError,
LOCALLOCK **locallockp)
{
- LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+ LOCKMETHODID lockmethodid;
LockMethod lockMethodTable;
LOCALLOCKTAG localtag;
LOCALLOCK *locallock;
@@ -809,6 +830,15 @@ LockAcquireExtended(const LOCKTAG *locktag,
LWLock *partitionLock;
bool found_conflict;
bool log_lock = false;
+ bool no_log_lock = false;
+
+ if (locktag->locktag_lockmethodid == NO_LOG_LOCKMETHOD)
+ {
+ ((LOCKTAG *)locktag)->locktag_lockmethodid = DEFAULT_LOCKMETHOD;
+ no_log_lock = true;
+ }
+
+ lockmethodid = locktag->locktag_lockmethodid;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
@@ -923,7 +953,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
if (lockmode >= AccessExclusiveLock &&
locktag->locktag_type == LOCKTAG_RELATION &&
!RecoveryInProgress() &&
- XLogStandbyInfoActive())
+ XLogStandbyInfoActive() &&
+ !no_log_lock)
{
LogAccessExclusiveLockPrepare();
log_lock = true;
@@ -1134,12 +1165,35 @@ LockAcquireExtended(const LOCKTAG *locktag,
*/
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
{
+ int i;
+
AbortStrongLockAcquire();
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
/* Should we retry ? */
LWLockRelease(partitionLock);
- elog(ERROR, "LockAcquire failed");
+ /*
+ * We've been removed from the queue without obtaining a lock.
+ * That's OK, we're going to return LOCKACQUIRE_NOT_AVAIL, but
+ * need to release a local lock first.
+ */
+ locallock->nLocks--;
+ for (i = 0; i < locallock->numLockOwners; i++)
+ {
+ if (locallock->lockOwners[i].owner == owner)
+ {
+ locallock->lockOwners[i].nLocks--;
+ if (locallock->lockOwners[i].nLocks == 0)
+ {
+ ResourceOwnerForgetLock(owner, locallock);
+ locallock->lockOwners[i] = locallock->lockOwners[--locallock->numLockOwners];
+ }
+ break;
+ }
+ }
+
+ return LOCKACQUIRE_NOT_AVAIL;
+
}
PROCLOCK_PRINT("LockAcquire: granted", proclock);
LOCK_PRINT("LockAcquire: granted", lock, lockmode);
@@ -4641,8 +4695,8 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
LWLockRelease(&proc->fpInfoLock);
/* Time to wait. */
- (void) LockAcquire(&tag, ShareLock, false, false);
-
+ if (LockAcquire(&tag, ShareLock, false, false) == LOCKACQUIRE_NOT_AVAIL)
+ return false;
LockRelease(&tag, ShareLock, false);
return XactLockForVirtualXact(vxid, xid, wait);
}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e9e445bb216..905fccd673d 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -829,6 +829,7 @@ ProcKill(int code, Datum arg)
* facility by releasing our PGPROC ...
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
/* Cancel any pending condition variable sleep, too */
ConditionVariableCancelSleep();
@@ -940,6 +941,7 @@ AuxiliaryProcKill(int code, Datum arg)
/* Release any LW locks I am holding (see notes above) */
LWLockReleaseAll();
+ CustomErrorCleanup();
/* Cancel any pending condition variable sleep, too */
ConditionVariableCancelSleep();
@@ -1190,7 +1192,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
* If InHotStandby we set lock waits slightly later for clarity with other
* code.
*/
- if (!InHotStandby)
+ if (!InHotStandby && !InRecovery)
{
if (LockTimeout > 0)
{
@@ -1550,7 +1552,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
* already caused QueryCancelPending to become set, we want the cancel to
* be reported as a lock timeout, not a user cancel.
*/
- if (!InHotStandby)
+ if (!InHotStandby && !InRecovery)
{
if (LockTimeout > 0)
{
diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c
index 48852bf79e2..265fcfc86c4 100644
--- a/src/backend/utils/adt/amutils.c
+++ b/src/backend/utils/adt/amutils.c
@@ -195,7 +195,7 @@ indexam_property(FunctionCallInfo fcinfo,
/*
* Get AM information. If we don't have a valid AM OID, return NULL.
*/
- routine = GetIndexAmRoutineByAmId(amoid, true);
+ routine = GetIndexAmRoutineByAmId(index_oid, amoid, true);
if (routine == NULL)
PG_RETURN_NULL();
@@ -455,7 +455,7 @@ pg_indexam_progress_phasename(PG_FUNCTION_ARGS)
IndexAmRoutine *routine;
char *name;
- routine = GetIndexAmRoutineByAmId(amoid, true);
+ routine = GetIndexAmRoutineByAmId(InvalidOid, amoid, true);
if (routine == NULL || !routine->ambuildphasename)
PG_RETURN_NULL();
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index f9b9590997b..6f7bcc4394c 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -18,8 +18,11 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/predicate_internals.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/wait_event.h"
/*
@@ -614,6 +617,7 @@ pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS)
Datum
pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
{
+ PGPROC *blocked_proc;
int blocked_pid = PG_GETARG_INT32(0);
ArrayType *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1);
ArrayType *blocking_pids_a;
@@ -674,6 +678,10 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0)
PG_RETURN_BOOL(true);
+ blocked_proc = BackendPidGetProc(blocked_pid);
+ if ((blocked_proc->wait_event_info & 0xFF000000) == PG_WAIT_EXTENSION)
+ PG_RETURN_BOOL(true);
+
PG_RETURN_BOOL(false);
}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 99a21f20b9f..d903a746b36 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -134,6 +134,7 @@ typedef struct
static HTAB *collation_cache = NULL;
+pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook = NULL;
#if defined(WIN32) && defined(LC_MESSAGES)
static char *IsoLocaleName(const char *);
@@ -1600,6 +1601,7 @@ pg_newlocale_from_collation(Oid collid)
{
char *actual_versionstr;
char *collversionstr;
+ int level = WARNING;
collversionstr = TextDatumGetCString(datum);
@@ -1619,8 +1621,11 @@ pg_newlocale_from_collation(Oid collid)
NameStr(collform->collname))));
}
+ if (pg_newlocale_from_collation_hook && pg_newlocale_from_collation_hook())
+ level = ERROR;
+
if (strcmp(actual_versionstr, collversionstr) != 0)
- ereport(WARNING,
+ ereport(level,
(errmsg("collation \"%s\" has version mismatch",
NameStr(collform->collname)),
errdetail("The collation in the database was created using version %s, "
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 6945d99b3d5..9abe334b563 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -247,6 +247,7 @@ RI_FKey_check(TriggerData *trigdata)
TupleTableSlot *newslot;
RI_QueryKey qkey;
SPIPlanPtr qplan;
+ Relation rel = trigdata->tg_relation;
riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
trigdata->tg_relation, false);
@@ -264,7 +265,7 @@ RI_FKey_check(TriggerData *trigdata)
* and lock on the buffer to call HeapTupleSatisfiesVisibility. Caller
* should be holding pin, but not lock.
*/
- if (!table_tuple_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf))
+ if (!table_tuple_satisfies_snapshot(rel, newslot, SnapshotSelf))
return PointerGetDatum(NULL);
/*
@@ -1263,9 +1264,6 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
{
const RI_ConstraintInfo *riinfo;
int ri_nullcheck;
- Datum xminDatum;
- TransactionId xmin;
- bool isnull;
/*
* AfterTriggerSaveEvent() handles things such that this function is never
@@ -1333,10 +1331,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
* this if we knew the INSERT trigger already fired, but there is no easy
* way to know that.)
*/
- xminDatum = slot_getsysattr(oldslot, MinTransactionIdAttributeNumber, &isnull);
- Assert(!isnull);
- xmin = DatumGetTransactionId(xminDatum);
- if (TransactionIdIsCurrentTransactionId(xmin))
+ if (table_tuple_is_current(fk_rel, oldslot))
return true;
/* If all old and new key values are equal, no check is needed */
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index f01cc2521c8..ecae9d86420 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -1313,7 +1313,7 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
amrec = (Form_pg_am) GETSTRUCT(ht_am);
/* Fetch the index AM's API struct */
- amroutine = GetIndexAmRoutine(amrec->amhandler);
+ amroutine = GetIndexAmRoutineExtended(indexrelid, amrec->amhandler);
/*
* Get the index expressions, if any. (NOTE: we do not use the relcache
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c4fcd0076ea..675c743bcc5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6223,12 +6223,32 @@ get_actual_variable_endpoint(Relation heapRel,
index_scan->xs_want_itup = true;
index_rescan(index_scan, scankeys, 1, NULL, 0);
- /* Fetch first/next tuple in specified direction */
- while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
+ while (true)
{
- BlockNumber block = ItemPointerGetBlockNumber(tid);
+ BlockNumber block = InvalidBlockNumber;
- if (!VM_ALL_VISIBLE(heapRel,
+ /* Fetch first/next tuple in specified direction */
+ if (index_scan->xs_want_rowid)
+ {
+ NullableDatum rowid;
+ rowid = index_getnext_rowid(index_scan, indexscandir);
+
+ if (rowid.isnull)
+ break;
+ }
+ else
+ {
+ tid = index_getnext_tid(index_scan, indexscandir);
+
+ if (tid == NULL)
+ break;
+
+ Assert(ItemPointerEquals(tid, &index_scan->xs_heaptid));
+ block = ItemPointerGetBlockNumber(tid);
+ }
+
+ if (!index_scan->xs_want_rowid &&
+ !VM_ALL_VISIBLE(heapRel,
block,
&vmbuffer))
{
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 000e81a2d96..91136f5cfbd 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -65,6 +65,10 @@
/* Cache management header --- pointer is NULL until created */
static CatCacheHeader *CacheHdr = NULL;
+SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook = NULL;
+SearchCatCacheList_hook_type SearchCatCacheList_hook = NULL;
+GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook = NULL;
+
static inline HeapTuple SearchCatCacheInternal(CatCache *cache,
int nkeys,
Datum v1, Datum v2,
@@ -1270,6 +1274,14 @@ SearchCatCacheInternal(CatCache *cache,
dlist_head *bucket;
CatCTup *ct;
+ if (SearchCatCacheInternal_hook)
+ {
+ ct = SearchCatCacheInternal_hook(cache, nkeys, v1, v2, v3, v4);
+
+ if (ct)
+ return &ct->tuple;
+ }
+
/* Make sure we're in an xact, even if this ends up being a cache hit */
Assert(IsTransactionState());
@@ -1555,6 +1567,11 @@ GetCatCacheHashValue(CatCache *cache,
Datum v3,
Datum v4)
{
+ if (GetCatCacheHashValue_hook)
+ {
+ return GetCatCacheHashValue_hook(cache, cache->cc_nkeys,
+ v1, v2, v3, v4);
+ }
/*
* one-time startup overhead for each cache
*/
@@ -1605,6 +1622,14 @@ SearchCatCacheList(CatCache *cache,
MemoryContext oldcxt;
int i;
+ if (SearchCatCacheList_hook)
+ {
+ cl = SearchCatCacheList_hook(cache, nkeys, v1, v2, v3);
+
+ if (cl)
+ return cl;
+ }
+
/*
* one-time startup overhead for each cache
*/
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 0008826f67c..b7a5f0c48a0 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -252,6 +252,7 @@ int debug_discard_caches = 0;
#define MAX_SYSCACHE_CALLBACKS 64
#define MAX_RELCACHE_CALLBACKS 10
+#define MAX_USERCACHE_CALLBACKS 10
static struct SYSCACHECALLBACK
{
@@ -273,6 +274,14 @@ static struct RELCACHECALLBACK
static int relcache_callback_count = 0;
+static struct USERCACHECALLBACK
+{
+ UsercacheCallbackFunction function;
+ Datum arg;
+} usercache_callback_list[MAX_RELCACHE_CALLBACKS];
+
+static int usercache_callback_count = 0;
+
/* ----------------------------------------------------------------
* Invalidation subgroup support functions
* ----------------------------------------------------------------
@@ -683,6 +692,19 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
else if (msg->sn.dbId == MyDatabaseId)
InvalidateCatalogSnapshot();
}
+ else if (msg->id == SHAREDINVALUSERCACHE_ID)
+ {
+ int i;
+ for (i = 0; i < usercache_callback_count; i++)
+ {
+ struct USERCACHECALLBACK *ccitem = usercache_callback_list + i;
+
+ ccitem->function(ccitem->arg,
+ msg->usr.arg1,
+ msg->usr.arg2,
+ msg->usr.arg3);
+ }
+ }
else
elog(FATAL, "unrecognized SI message ID: %d", msg->id);
}
@@ -726,6 +748,17 @@ InvalidateSystemCachesExtended(bool debug_discard)
ccitem->function(ccitem->arg, InvalidOid);
}
+
+ for (i = 0; i < usercache_callback_count; i++)
+ {
+ struct USERCACHECALLBACK *ccitem = usercache_callback_list + i;
+
+ ccitem->function(ccitem->arg,
+ InvalidOid,
+ InvalidOid,
+ InvalidOid);
+ }
+
}
@@ -1432,6 +1465,25 @@ CacheInvalidateRelcacheByRelid(Oid relid)
ReleaseSysCache(tup);
}
+/*
+ * CacheInvalidateRelcacheByDbidRelid
+ */
+void
+CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid)
+{
+ SharedInvalidationMessage msg;
+
+ PrepareInvalidationState();
+
+ msg.rc.id = SHAREDINVALRELCACHE_ID;
+ msg.rc.dbId = dbid;
+ msg.rc.relId = relid;
+ /* check AddCatcacheInvalidationMessage() for an explanation */
+ VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
+
+ SendSharedInvalidMessages(&msg, 1);
+}
+
/*
* CacheInvalidateSmgr
@@ -1570,6 +1622,22 @@ CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
++relcache_callback_count;
}
+/*
+ * CacheRegisterUsercacheCallback
+ */
+void
+CacheRegisterUsercacheCallback(UsercacheCallbackFunction func,
+ Datum arg)
+{
+ if (usercache_callback_count >= MAX_USERCACHE_CALLBACKS)
+ elog(FATAL, "out of usercache_callback_list slots");
+
+ usercache_callback_list[usercache_callback_count].function = func;
+ usercache_callback_list[usercache_callback_count].arg = arg;
+
+ ++usercache_callback_count;
+}
+
/*
* CallSyscacheCallbacks
*
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 8e08ca1c680..18b2ebdd59f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -34,6 +34,7 @@
#include "access/multixact.h"
#include "access/nbtree.h"
#include "access/parallel.h"
+#include "access/relation.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/table.h"
@@ -317,6 +318,7 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
StrategyNumber numSupport);
static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
static void unlink_initfile(const char *initfilename, int elevel);
+static void release_rd_amcache(Relation rel);
/*
@@ -461,8 +463,9 @@ AllocateRelationDesc(Form_pg_class relp)
static void
RelationParseRelOptions(Relation relation, HeapTuple tuple)
{
- bytea *options;
- amoptions_function amoptsfn;
+ bytea *options;
+ amoptions_function amoptsfn;
+ const TableAmRoutine *tableam = NULL;
relation->rd_options = NULL;
@@ -474,9 +477,10 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
{
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
- case RELKIND_VIEW:
case RELKIND_MATVIEW:
+ case RELKIND_VIEW:
case RELKIND_PARTITIONED_TABLE:
+ tableam = relation->rd_tableam;
amoptsfn = NULL;
break;
case RELKIND_INDEX:
@@ -488,11 +492,12 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
}
/*
- * Fetch reloptions from tuple; have to use a hardwired descriptor because
- * we might not have any other for pg_class yet (consider executing this
- * code for pg_class itself)
- */
- options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
+ * Fetch reloptions from tuple; have to use a hardwired descriptor because
+ * we might not have any other for pg_class yet (consider executing this
+ * code for pg_class itself)
+ */
+ options = extractRelOptions(tuple, GetPgClassDescriptor(),
+ tableam, amoptsfn);
/*
* Copy parsed data into CacheMemoryContext. To guard against the
@@ -1399,7 +1404,7 @@ InitIndexAmRoutine(Relation relation)
* Call the amhandler in current, short-lived memory context, just in case
* it leaks anything (it probably won't, but let's be paranoid).
*/
- tmp = GetIndexAmRoutine(relation->rd_amhandler);
+ tmp = GetIndexAmRoutineExtended(relation->rd_id, relation->rd_amhandler);
/* OK, now transfer the data into relation's rd_indexcxt. */
cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
@@ -2230,9 +2235,7 @@ RelationReloadIndexInfo(Relation relation)
RelationCloseSmgr(relation);
/* Must free any AM cached data upon relcache flush */
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
- relation->rd_amcache = NULL;
+ release_rd_amcache(relation);
/*
* If it's a shared index, we might be called before backend startup has
@@ -2452,8 +2455,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
pfree(relation->rd_options);
if (relation->rd_indextuple)
pfree(relation->rd_indextuple);
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
+ release_rd_amcache(relation);
if (relation->rd_fdwroutine)
pfree(relation->rd_fdwroutine);
if (relation->rd_indexcxt)
@@ -2515,9 +2517,7 @@ RelationClearRelation(Relation relation, bool rebuild)
RelationCloseSmgr(relation);
/* Free AM cached data, if any */
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
- relation->rd_amcache = NULL;
+ release_rd_amcache(relation);
/*
* Treat nailed-in system relations separately, they always need to be
@@ -6820,3 +6820,9 @@ unlink_initfile(const char *initfilename, int elevel)
initfilename)));
}
}
+
+static void
+release_rd_amcache(Relation rel)
+{
+ table_free_rd_amcache(rel);
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 4e4a34bde80..a5b3e437f7c 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -696,6 +696,7 @@ static int SysCacheSupportingRelOidSize;
static int oid_compare(const void *a, const void *b);
+SysCacheGetAttr_hook_type SysCacheGetAttr_hook = NULL;
/*
* InitCatalogCache - initialize the caches
@@ -1080,6 +1081,7 @@ SysCacheGetAttr(int cacheId, HeapTuple tup,
AttrNumber attributeNumber,
bool *isNull)
{
+ TupleDesc cc_tupdesc = SysCache[cacheId]->cc_tupdesc;
/*
* We just need to get the TupleDesc out of the cache entry, and then we
* can apply heap_getattr(). Normally the cache control data is already
@@ -1089,14 +1091,18 @@ SysCacheGetAttr(int cacheId, HeapTuple tup,
if (cacheId < 0 || cacheId >= SysCacheSize ||
!PointerIsValid(SysCache[cacheId]))
elog(ERROR, "invalid cache ID: %d", cacheId);
- if (!PointerIsValid(SysCache[cacheId]->cc_tupdesc))
+
+ if (!PointerIsValid(cc_tupdesc) && SysCacheGetAttr_hook)
+ cc_tupdesc = SysCacheGetAttr_hook(SysCache[cacheId]);
+ if (!PointerIsValid(cc_tupdesc))
{
InitCatCachePhase2(SysCache[cacheId], false);
Assert(PointerIsValid(SysCache[cacheId]->cc_tupdesc));
+ cc_tupdesc = SysCache[cacheId]->cc_tupdesc;
}
return heap_getattr(tup, attributeNumber,
- SysCache[cacheId]->cc_tupdesc,
+ cc_tupdesc,
isNull);
}
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 608cd5e8e43..71619cf04d0 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -290,6 +290,8 @@ static int32 NextRecordTypmod = 0; /* number of entries used */
* as identifiers, so we start the counter at INVALID_TUPLEDESC_IDENTIFIER.
*/
static uint64 tupledesc_id_counter = INVALID_TUPLEDESC_IDENTIFIER;
+load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook = NULL;
+load_enum_cache_data_hook_type load_enum_cache_data_hook = NULL;
static void load_typcache_tupdesc(TypeCacheEntry *typentry);
static void load_rangetype_info(TypeCacheEntry *typentry);
@@ -879,6 +881,12 @@ load_typcache_tupdesc(TypeCacheEntry *typentry)
{
Relation rel;
+ if (load_typcache_tupdesc_hook)
+ {
+ load_typcache_tupdesc_hook(typentry);
+ return;
+ }
+
if (!OidIsValid(typentry->typrelid)) /* should not happen */
elog(ERROR, "invalid typrelid for composite type %u",
typentry->type_id);
@@ -2560,6 +2568,12 @@ load_enum_cache_data(TypeCacheEntry *tcache)
int bm_size,
start_pos;
+ if (load_enum_cache_data_hook)
+ {
+ load_enum_cache_data_hook(tcache);
+ return;
+ }
+
/* Check that this is actually an enum */
if (tcache->typtype != TYPTYPE_ENUM)
ereport(ERROR,
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 7112fb00069..5badf5eaedc 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -3749,7 +3749,6 @@ write_stderr(const char *fmt,...)
va_end(ap);
}
-
/*
* Write a message to STDERR using only async-signal-safe functions. This can
* be used to safely emit a message from a signal handler.
@@ -3802,3 +3801,12 @@ trace_recovery(int trace_level)
return trace_level;
}
+
+CustomErrorCleanupHookType CustomErrorCleanupHook = NULL;
+
+void
+CustomErrorCleanup(void)
+{
+ if (CustomErrorCleanupHook)
+ CustomErrorCleanupHook();
+}
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 9208c31fe06..85811af84ff 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -72,7 +72,7 @@ extern Datum fmgr_security_definer(PG_FUNCTION_ARGS);
* or name, but search by Oid is much faster.
*/
-static const FmgrBuiltin *
+const FmgrBuiltin *
fmgr_isbuiltin(Oid id)
{
uint16 index;
@@ -97,7 +97,7 @@ fmgr_isbuiltin(Oid id)
* the array with the same name, but they should all point to the same
* routine.
*/
-static const FmgrBuiltin *
+const FmgrBuiltin *
fmgr_lookupByName(const char *name)
{
int i;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index df4d15a50fb..7b89c11feb6 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -81,7 +81,7 @@ static void ClientCheckTimeoutHandler(void);
static bool ThereIsAtLeastOneRole(void);
static void process_startup_options(Port *port, bool am_superuser);
static void process_settings(Oid databaseid, Oid roleid);
-
+base_init_startup_hook_type base_init_startup_hook = NULL;
/*** InitPostgres support ***/
@@ -641,6 +641,9 @@ BaseInit(void)
*/
InitFileAccess();
+ if (base_init_startup_hook)
+ base_init_startup_hook();
+
/*
* Initialize statistics reporting. This needs to happen early to ensure
* that pgstat's shutdown callback runs after the shutdown callbacks of
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index f60633df241..120db339150 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -1100,6 +1100,36 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
}
}
+/*
+ * Same as tuplestore_gettupleslot(), but foces tuple storage to slot. Thus,
+ * it can work with slot types different than minimal tuple.
+ */
+bool
+tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward,
+ bool copy, TupleTableSlot *slot)
+{
+ MinimalTuple tuple;
+ bool should_free;
+
+ tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free);
+
+ if (tuple)
+ {
+ if (copy && !should_free)
+ {
+ tuple = heap_copy_minimal_tuple(tuple);
+ should_free = true;
+ }
+ ExecForceStoreMinimalTuple(tuple, slot, should_free);
+ return true;
+ }
+ else
+ {
+ ExecClearTuple(slot);
+ return false;
+ }
+}
+
/*
* tuplestore_advance - exported function to adjust position without fetching
*
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 3a419e348fa..283255cdaad 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -116,6 +116,10 @@ TransactionId RecentXmin = FirstNormalTransactionId;
/* (table, ctid) => (cmin, cmax) mapping during timetravel */
static HTAB *tuplecid_data = NULL;
+snapshot_hook_type snapshot_register_hook = NULL;
+snapshot_hook_type snapshot_deregister_hook = NULL;
+reset_xmin_hook_type reset_xmin_hook = NULL;
+
/*
* Elements of the active snapshot stack.
*
@@ -192,6 +196,11 @@ typedef struct SerializedSnapshotData
CommandId curcid;
TimestampTz whenTaken;
XLogRecPtr lsn;
+ CSNSnapshotData csnSnapshotData;
+ uint64 undoRegularLocation;
+ uint64 undoRegularXmin;
+ uint64 undoSystemLocation;
+ uint64 undoSystemXmin;
} SerializedSnapshotData;
Size
@@ -298,6 +307,8 @@ GetTransactionSnapshot(void)
/* Mark it as "registered" in FirstXactSnapshot */
FirstXactSnapshot->regd_count++;
pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(FirstXactSnapshot);
}
else
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
@@ -438,6 +449,8 @@ GetNonHistoricCatalogSnapshot(Oid relid)
* CatalogSnapshot pointer is already valid.
*/
pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(CatalogSnapshot);
}
return CatalogSnapshot;
@@ -459,6 +472,8 @@ InvalidateCatalogSnapshot(void)
if (CatalogSnapshot)
{
pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(CatalogSnapshot);
CatalogSnapshot = NULL;
SnapshotResetXmin();
}
@@ -536,6 +551,7 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
CurrentSnapshot->xmin = sourcesnap->xmin;
CurrentSnapshot->xmax = sourcesnap->xmax;
CurrentSnapshot->xcnt = sourcesnap->xcnt;
+ CurrentSnapshot->csnSnapshotData = sourcesnap->csnSnapshotData;
Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
if (sourcesnap->xcnt > 0)
memcpy(CurrentSnapshot->xip, sourcesnap->xip,
@@ -593,6 +609,8 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
/* Mark it as "registered" in FirstXactSnapshot */
FirstXactSnapshot->regd_count++;
pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(FirstXactSnapshot);
}
FirstSnapshotSet = true;
@@ -855,7 +873,11 @@ RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
ResourceOwnerRememberSnapshot(owner, snap);
if (snap->regd_count == 1)
+ {
pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(snap);
+ }
return snap;
}
@@ -893,7 +915,11 @@ UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
snapshot->regd_count--;
if (snapshot->regd_count == 0)
+ {
pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(snapshot);
+ }
if (snapshot->regd_count == 0 && snapshot->active_count == 0)
{
@@ -945,6 +971,9 @@ SnapshotResetXmin(void)
{
Snapshot minSnapshot;
+ if (reset_xmin_hook)
+ reset_xmin_hook();
+
if (ActiveSnapshot != NULL)
return;
@@ -1038,6 +1067,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
Assert(FirstXactSnapshot->regd_count > 0);
Assert(!pairingheap_is_empty(&RegisteredSnapshots));
pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(FirstXactSnapshot);
}
FirstXactSnapshot = NULL;
@@ -1069,6 +1100,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
pairingheap_remove(&RegisteredSnapshots,
&esnap->snapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(esnap->snapshot);
}
exportedSnapshots = NIL;
@@ -1196,6 +1229,8 @@ ExportSnapshot(Snapshot snapshot)
snapshot->regd_count++;
pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(snapshot);
/*
* Fill buf with a text serialization of the snapshot, plus identification
@@ -2160,6 +2195,13 @@ SerializeSnapshot(Snapshot snapshot, char *start_address)
serialized_snapshot.curcid = snapshot->curcid;
serialized_snapshot.whenTaken = snapshot->whenTaken;
serialized_snapshot.lsn = snapshot->lsn;
+ serialized_snapshot.csnSnapshotData.xmin = snapshot->csnSnapshotData.xmin;
+ serialized_snapshot.csnSnapshotData.snapshotcsn = snapshot->csnSnapshotData.snapshotcsn;
+ serialized_snapshot.csnSnapshotData.xlogptr = snapshot->csnSnapshotData.xlogptr;
+ serialized_snapshot.undoRegularXmin = snapshot->undoRegularLocationPhNode.xmin;
+ serialized_snapshot.undoRegularLocation = snapshot->undoRegularLocationPhNode.undoLocation;
+ serialized_snapshot.undoSystemXmin = snapshot->undoSystemLocationPhNode.xmin;
+ serialized_snapshot.undoSystemLocation = snapshot->undoSystemLocationPhNode.undoLocation;
/*
* Ignore the SubXID array if it has overflowed, unless the snapshot was
@@ -2235,6 +2277,13 @@ RestoreSnapshot(char *start_address)
snapshot->whenTaken = serialized_snapshot.whenTaken;
snapshot->lsn = serialized_snapshot.lsn;
snapshot->snapXactCompletionCount = 0;
+ snapshot->csnSnapshotData.xmin = serialized_snapshot.csnSnapshotData.xmin;
+ snapshot->csnSnapshotData.snapshotcsn = serialized_snapshot.csnSnapshotData.snapshotcsn;
+ snapshot->csnSnapshotData.xlogptr = serialized_snapshot.csnSnapshotData.xlogptr;
+ snapshot->undoRegularLocationPhNode.xmin = serialized_snapshot.undoRegularXmin;
+ snapshot->undoRegularLocationPhNode.undoLocation = serialized_snapshot.undoRegularLocation;
+ snapshot->undoSystemLocationPhNode.xmin = serialized_snapshot.undoSystemXmin;
+ snapshot->undoSystemLocationPhNode.undoLocation = serialized_snapshot.undoSystemLocation;
/* Copy XIDs, if present. */
if (serialized_snapshot.xcnt > 0)
diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile
index bed05f1609c..5ff8163b841 100644
--- a/src/bin/pg_rewind/Makefile
+++ b/src/bin/pg_rewind/Makefile
@@ -21,6 +21,7 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
OBJS = \
$(WIN32RES) \
datapagemap.o \
+ extension.o \
file_ops.o \
filemap.o \
libpq_source.o \
@@ -35,19 +36,21 @@ EXTRA_CLEAN = xlogreader.c
all: pg_rewind
pg_rewind: $(OBJS) | submake-libpq submake-libpgport
- $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+ $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LDFLAGS_EX_BE) $(LIBS) -o $@$(X)
xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
rm -f $@ && $(LN_S) $< .
install: all installdirs
$(INSTALL_PROGRAM) pg_rewind$(X) '$(DESTDIR)$(bindir)/pg_rewind$(X)'
+ $(INSTALL_DATA) $(srcdir)/pg_rewind_ext.h '$(DESTDIR)$(includedir)'
installdirs:
- $(MKDIR_P) '$(DESTDIR)$(bindir)'
+ $(MKDIR_P) '$(DESTDIR)$(bindir)' '$(DESTDIR)$(includedir)'
uninstall:
rm -f '$(DESTDIR)$(bindir)/pg_rewind$(X)'
+ rm -f '$(DESTDIR)$(includedir)/pg_rewind_ext.h'
clean distclean maintainer-clean:
rm -f pg_rewind$(X) $(OBJS) xlogreader.c
diff --git a/src/bin/pg_rewind/extension.c b/src/bin/pg_rewind/extension.c
new file mode 100644
index 00000000000..29ec4b5a6f6
--- /dev/null
+++ b/src/bin/pg_rewind/extension.c
@@ -0,0 +1,132 @@
+/*-------------------------------------------------------------------------
+ *
+ * extension.c
+ * Functions for processing shared libraries loaded by pg_rewind.
+ *
+ * Copyright (c) 2013-2023, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#ifndef WIN32
+#include
+
+/*
+ * On macOS, insists on including . If we're not
+ * using stdbool, undef bool to undo the damage.
+ */
+#ifndef PG_USE_STDBOOL
+#ifdef bool
+#undef bool
+#endif
+#endif
+#endif /* !WIN32 */
+
+#include
+
+#include "access/xlog_internal.h"
+#include "pg_rewind.h"
+
+/* signature for pg_rewind extension library rewind function */
+typedef void (*PG_rewind_t) (const char *datadir_target, char *datadir_source,
+ char *connstr_source, XLogRecPtr startpoint,
+ int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand, const char *argv0,
+ bool debug);
+
+static bool
+file_exists(const char *argv0, const char *name)
+{
+ struct stat st;
+
+ Assert(name != NULL);
+
+ if (stat(name, &st) == 0)
+ return !S_ISDIR(st.st_mode);
+ else if (!(errno == ENOENT || errno == ENOTDIR || errno == EACCES))
+ {
+ const char *progname;
+
+ progname = get_progname(argv0);
+ pg_log_error("could not access file \"%s\": %m", name);
+ pg_log_error_hint("Try \"%s --help\" for more information.", progname);
+ exit(1);
+ }
+
+ return false;
+}
+
+static char *
+expand_dynamic_library_name(const char *argv0, const char *name)
+{
+ char *full;
+ char my_exec_path[MAXPGPATH];
+ char pkglib_path[MAXPGPATH];
+
+ Assert(name);
+
+ if (find_my_exec(argv0, my_exec_path) < 0)
+ pg_fatal("%s: could not locate my own executable path", argv0);
+ get_pkglib_path(my_exec_path, pkglib_path);
+ full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1);
+ sprintf(full, "%s/%s", pkglib_path, name);
+ if (file_exists(argv0, full))
+ return full;
+ pfree(full);
+
+ full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1 +
+ strlen(DLSUFFIX) + 1);
+ sprintf(full, "%s/%s%s", pkglib_path, name, DLSUFFIX);
+ if (file_exists(argv0, full))
+ return full;
+ pfree(full);
+
+ return pstrdup(name);
+}
+
+void
+process_extensions(SimpleStringList *extensions, const char *datadir_target,
+ char *datadir_source, char *connstr_source,
+ XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand, const char *argv0,
+ bool debug)
+{
+ SimpleStringListCell *cell;
+
+ if (extensions->head == NULL)
+ return; /* nothing to do */
+
+ for (cell = extensions->head; cell; cell = cell->next)
+ {
+ char *filename = cell->val;
+ char *fullname;
+ void *lib_handle;
+ PG_rewind_t PG_rewind;
+ char *load_error;
+
+ fullname = expand_dynamic_library_name(argv0, filename);
+
+ lib_handle = dlopen(fullname, RTLD_NOW | RTLD_GLOBAL);
+ if (lib_handle == NULL)
+ {
+ load_error = dlerror();
+ pg_fatal("could not load library \"%s\": %s", fullname, load_error);
+ }
+
+ PG_rewind = dlsym(lib_handle, "_PG_rewind");
+
+ if (PG_rewind == NULL)
+ pg_fatal("could not find function \"_PG_rewind\" in \"%s\"",
+ fullname);
+ pfree(fullname);
+
+ if (showprogress)
+ pg_log_info("performing rewind for '%s' extension", filename);
+ PG_rewind(datadir_target, datadir_source, connstr_source, startpoint,
+ tliIndex, endpoint, restoreCommand, argv0, debug);
+
+ pg_log_debug("loaded library \"%s\"", filename);
+ }
+}
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 435742d20d1..a936c3d3586 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -54,6 +54,7 @@ static uint32 hash_string_pointer(const char *s);
#define FILEHASH_INITIAL_SIZE 1000
static filehash_hash *filehash;
+static SimpleStringList extensions_exclude = {NULL, NULL};
static bool isRelDataFile(const char *path);
static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
@@ -261,6 +262,8 @@ process_target_file(const char *path, file_type_t type, size_t size,
* from the target data folder all paths which have been filtered out from
* the source data folder when processing the source files.
*/
+ if (check_file_excluded(path, false))
+ return;
/*
* Like in process_source_file, pretend that pg_wal is always a directory.
@@ -405,6 +408,31 @@ check_file_excluded(const char *path, bool is_source)
}
}
+ /*
+ * Exclude extensions directories
+ */
+ if (extensions_exclude.head != NULL)
+ {
+ SimpleStringListCell *cell;
+
+ for (cell = extensions_exclude.head; cell; cell = cell->next)
+ {
+ char *exclude_dir = cell->val;
+
+ snprintf(localpath, sizeof(localpath), "%s/", exclude_dir);
+ if (strstr(path, localpath) == path)
+ {
+ if (is_source)
+ pg_log_debug("entry \"%s\" excluded from source file list",
+ path);
+ else
+ pg_log_debug("entry \"%s\" excluded from target file list",
+ path);
+ return true;
+ }
+ }
+ }
+
return false;
}
@@ -822,7 +850,6 @@ decide_file_actions(void)
return filemap;
}
-
/*
* Helper function for filemap hash table.
*/
@@ -833,3 +860,15 @@ hash_string_pointer(const char *s)
return hash_bytes(ss, strlen(s));
}
+
+void
+extensions_exclude_add(char **exclude_dirs)
+{
+ int i;
+
+ for (i = 0; exclude_dirs[i] != NULL; i++)
+ {
+ simple_string_list_append(&extensions_exclude,
+ pstrdup(exclude_dirs[i]));
+ }
+}
diff --git a/src/bin/pg_rewind/meson.build b/src/bin/pg_rewind/meson.build
index fd22818be4d..36e9a4766f3 100644
--- a/src/bin/pg_rewind/meson.build
+++ b/src/bin/pg_rewind/meson.build
@@ -2,6 +2,7 @@
pg_rewind_sources = files(
'datapagemap.c',
+ 'extension.c',
'file_ops.c',
'filemap.c',
'libpq_source.c',
@@ -23,6 +24,7 @@ pg_rewind = executable('pg_rewind',
pg_rewind_sources,
dependencies: [frontend_code, libpq, lz4, zstd],
c_args: ['-DFRONTEND'], # needed for xlogreader et al
+ export_dynamic: true,
kwargs: default_bin_args,
)
bin_targets += pg_rewind
@@ -48,3 +50,7 @@ tests += {
}
subdir('po', if_found: libintl)
+
+install_headers(
+ 'pg_rewind_ext.h'
+)
\ No newline at end of file
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 27782237d05..f8202d298e4 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -38,7 +38,7 @@ static const char *RmgrNames[RM_MAX_ID + 1] = {
#define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
RmgrNames[rmid] : "custom")
-static void extractPageInfo(XLogReaderState *record);
+static void extractPageInfo(XLogReaderState *record, void *arg);
static int xlogreadfd = -1;
static XLogSegNo xlogreadsegno = 0;
@@ -54,17 +54,11 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader,
XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
-/*
- * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
- * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
- * the data blocks touched by the WAL records, and return them in a page map.
- *
- * 'endpoint' is the end of the last record to read. The record starting at
- * 'endpoint' is the first one that is not read.
- */
void
-extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
- XLogRecPtr endpoint, const char *restoreCommand)
+SimpleXLogRead(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+ XLogRecPtr endpoint, const char *restoreCommand,
+ void (*page_callback) (XLogReaderState *, void *arg),
+ void *arg)
{
XLogRecord *record;
XLogReaderState *xlogreader;
@@ -97,7 +91,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
LSN_FORMAT_ARGS(errptr));
}
- extractPageInfo(xlogreader);
+ page_callback(xlogreader, arg);
} while (xlogreader->EndRecPtr < endpoint);
/*
@@ -116,6 +110,22 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
}
}
+/*
+ * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
+ * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
+ * the data blocks touched by the WAL records, and return them in a page map.
+ *
+ * 'endpoint' is the end of the last record to read. The record starting at
+ * 'endpoint' is the first one that is not read.
+ */
+void
+extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+ XLogRecPtr endpoint, const char *restoreCommand)
+{
+ SimpleXLogRead(datadir, startpoint, tliIndex, endpoint, restoreCommand,
+ extractPageInfo, NULL);
+}
+
/*
* Reads one WAL record. Returns the end position of the record, without
* doing anything with the record itself.
@@ -365,7 +375,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
* Extract information on which blocks the current record modifies.
*/
static void
-extractPageInfo(XLogReaderState *record)
+extractPageInfo(XLogReaderState *record, void *arg)
{
int block_id;
RmgrId rmid = XLogRecGetRmid(record);
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index f7f3b8227fd..d08d421bbdd 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -75,6 +75,8 @@ bool dry_run = false;
bool do_sync = true;
bool restore_wal = false;
+static SimpleStringList extensions = {NULL, NULL};
+
/* Target history */
TimeLineHistoryEntry *targetHistory;
int targetNentries;
@@ -107,6 +109,7 @@ usage(const char *progname)
" file when running target cluster\n"));
printf(_(" --debug write a lot of debug messages\n"));
printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
+ printf(_(" -e, --extension=PATH path to library performing rewind for extension\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -131,6 +134,7 @@ main(int argc, char **argv)
{"no-sync", no_argument, NULL, 'N'},
{"progress", no_argument, NULL, 'P'},
{"debug", no_argument, NULL, 3},
+ {"extension", required_argument, NULL, 'e'},
{NULL, 0, NULL, 0}
};
int option_index;
@@ -169,7 +173,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
+ while ((c = getopt_long(argc, argv, "cD:nNPRe", long_options, &option_index)) != -1)
{
switch (c)
{
@@ -218,6 +222,10 @@ main(int argc, char **argv)
config_file = pg_strdup(optarg);
break;
+ case 'e': /* -e or --extension */
+ simple_string_list_append(&extensions, optarg);
+ break;
+
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -454,6 +462,12 @@ main(int argc, char **argv)
/* Initialize the hash table to track the status of each file */
filehash_init();
+ if (extensions.head != NULL)
+ process_extensions(&extensions, datadir_target, datadir_source,
+ connstr_source, chkptrec, lastcommontliIndex,
+ target_wal_endrec, restore_command, argv[0],
+ debug);
+
/*
* Collect information about all files in the both data directories.
*/
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb8..1d42a921246 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -14,7 +14,9 @@
#include "access/timeline.h"
#include "common/logging.h"
#include "datapagemap.h"
+#include "fe_utils/simple_list.h"
#include "libpq-fe.h"
+#include "pg_rewind_ext.h"
#include "storage/block.h"
#include "storage/relfilelocator.h"
@@ -53,4 +55,12 @@ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer,
TimeLineID targetTLI,
int *nentries);
+/* in extension.c */
+extern void process_extensions(SimpleStringList *extensions,
+ const char *datadir_target, char *datadir_source,
+ char *connstr_source, XLogRecPtr startpoint,
+ int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand, const char *argv0,
+ bool debug);
+
#endif /* PG_REWIND_H */
diff --git a/src/bin/pg_rewind/pg_rewind_ext.h b/src/bin/pg_rewind/pg_rewind_ext.h
new file mode 100644
index 00000000000..3616d94f588
--- /dev/null
+++ b/src/bin/pg_rewind/pg_rewind_ext.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_rewind_ext.h
+ *
+ *
+ * Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_REWIND_EXT_H
+#define PG_REWIND_EXT_H
+
+#include "access/xlogreader.h"
+
+/* in parsexlog.c */
+/*
+ * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
+ * index 'tliIndex' in target timeline history, until 'endpoint'.
+ * Pass all WAL records to 'page_callback'.
+ *
+ * 'endpoint' is the end of the last record to read. The record starting at
+ * 'endpoint' is the first one that is not read.
+ */
+extern void SimpleXLogRead(const char *datadir, XLogRecPtr startpoint,
+ int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand,
+ void (*page_callback) (XLogReaderState *,
+ void *arg),
+ void *arg);
+
+
+/* in filemap.c */
+/* Add NULL-terminated list of dirs that pg_rewind can skip copying */
+extern void extensions_exclude_add(char **exclude_dirs);
+
+/* signature for pg_rewind extension library rewind function */
+extern PGDLLEXPORT void _PG_rewind(const char *datadir_target,
+ char *datadir_source, char *connstr_source,
+ XLogRecPtr startpoint, int tliIndex,
+ XLogRecPtr endpoint,
+ const char *restoreCommand,
+ const char *argv0, bool debug);
+
+#endif /* PG_REWIND_EXT_H */
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index c1134eae5b5..70db51c6bec 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -220,6 +220,11 @@ double throttle_delay = 0;
*/
int64 latency_limit = 0;
+/*
+ * tableam selection
+ */
+char *tableam = NULL;
+
/*
* tablespace selection
*/
@@ -890,6 +895,7 @@ usage(void)
" --partition-method=(range|hash)\n"
" partition pgbench_accounts with this method (default: range)\n"
" --partitions=NUM partition pgbench_accounts into NUM parts (default: 0)\n"
+ " --tableam=TABLEAM create tables using the specified tableam\n"
" --tablespace=TABLESPACE create tables in the specified tablespace\n"
" --unlogged-tables create tables as unlogged tables\n"
"\nOptions to select what to run:\n"
@@ -4749,14 +4755,34 @@ createPartitions(PGconn *con)
appendPQExpBufferStr(&query, "maxvalue");
appendPQExpBufferChar(&query, ')');
+
+ if (tableam != NULL)
+ {
+ char *escape_tableam;
+
+ escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+ appendPQExpBuffer(&query, " using %s", escape_tableam);
+ PQfreemem(escape_tableam);
+ }
}
else if (partition_method == PART_HASH)
+ {
printfPQExpBuffer(&query,
"create%s table pgbench_accounts_%d\n"
" partition of pgbench_accounts\n"
" for values with (modulus %d, remainder %d)",
unlogged_tables ? " unlogged" : "", p,
partitions, p - 1);
+
+ if (tableam != NULL)
+ {
+ char *escape_tableam;
+
+ escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+ appendPQExpBuffer(&query, " using %s", escape_tableam);
+ PQfreemem(escape_tableam);
+ }
+ }
else /* cannot get there */
Assert(0);
@@ -4843,10 +4869,20 @@ initCreateTables(PGconn *con)
if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
appendPQExpBuffer(&query,
" partition by %s (aid)", PARTITION_METHOD[partition_method]);
- else if (ddl->declare_fillfactor)
+ else
{
+ if (tableam != NULL)
+ {
+ char *escape_tableam;
+
+ escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+ appendPQExpBuffer(&query, " using %s", escape_tableam);
+ PQfreemem(escape_tableam);
+ }
+
/* fillfactor is only expected on actual tables */
- appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor);
+ if (ddl->declare_fillfactor)
+ appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor);
}
if (tablespace != NULL)
@@ -6602,6 +6638,7 @@ main(int argc, char **argv)
{"failures-detailed", no_argument, NULL, 13},
{"max-tries", required_argument, NULL, 14},
{"verbose-errors", no_argument, NULL, 15},
+ {"tableam", required_argument, NULL, 16},
{NULL, 0, NULL, 0}
};
@@ -6939,6 +6976,10 @@ main(int argc, char **argv)
benchmarking_option_set = true;
verbose_errors = true;
break;
+ case 16: /* tableam */
+ initialization_option_set = true;
+ tableam = pg_strdup(optarg);
+ break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index 4476ff7fba1..73320f93be7 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -107,12 +107,42 @@ typedef void (*ambuildempty_function) (Relation indexRelation);
typedef bool (*aminsert_function) (Relation indexRelation,
Datum *values,
bool *isnull,
- ItemPointer heap_tid,
+ ItemPointer tupleid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
+/* extended version of aminsert taking Datum tupleid */
+typedef bool (*aminsert_extended_function) (Relation indexRelation,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ bool indexUnchanged,
+ struct IndexInfo *indexInfo);
+
+/* update this tuple */
+typedef bool (*amupdate_function) (Relation indexRelation,
+ bool new_valid,
+ bool old_valid,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Datum *valuesOld,
+ bool *isnullOld,
+ Datum oldTupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ struct IndexInfo *indexInfo);
+/* delete this tuple */
+typedef bool (*amdelete_function) (Relation indexRelation,
+ Datum *values, bool *isnull,
+ Datum tupleid,
+ Relation heapRelation,
+ struct IndexInfo *indexInfo);
+
/* bulk delete */
typedef IndexBulkDeleteResult *(*ambulkdelete_function) (IndexVacuumInfo *info,
IndexBulkDeleteResult *stats,
@@ -246,6 +276,8 @@ typedef struct IndexAmRoutine
bool amusemaintenanceworkmem;
/* does AM store tuple information only at block granularity? */
bool amsummarizing;
+ /* does AM can provide MVCC */
+ bool ammvccaware;
/* OR of parallel vacuum flags. See vacuum.h for flags. */
uint8 amparallelvacuumoptions;
/* type of data stored in index, or InvalidOid if variable */
@@ -261,6 +293,9 @@ typedef struct IndexAmRoutine
ambuild_function ambuild;
ambuildempty_function ambuildempty;
aminsert_function aminsert;
+ aminsert_extended_function aminsertextended;
+ amupdate_function amupdate;
+ amdelete_function amdelete;
ambulkdelete_function ambulkdelete;
amvacuumcleanup_function amvacuumcleanup;
amcanreturn_function amcanreturn; /* can be NULL */
@@ -286,7 +321,13 @@ typedef struct IndexAmRoutine
/* Functions in access/index/amapi.c */
+extern IndexAmRoutine *GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler);
extern IndexAmRoutine *GetIndexAmRoutine(Oid amhandler);
-extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid amoid, bool noerror);
+extern IndexAmRoutine *GetIndexAmRoutineExtended(Oid indoid, Oid amhandler);
+extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror);
+
+typedef IndexAmRoutine *(*IndexAMRoutineHookType) (Oid tamoid, Oid amhandler);
+
+extern IndexAMRoutineHookType IndexAMRoutineHook;
#endif /* AMAPI_H */
diff --git a/src/include/access/brin_internal.h b/src/include/access/brin_internal.h
index 97ddc925b27..418b32d5515 100644
--- a/src/include/access/brin_internal.h
+++ b/src/include/access/brin_internal.h
@@ -92,7 +92,7 @@ extern IndexBuildResult *brinbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void brinbuildempty(Relation index);
extern bool brininsert(Relation idxRel, Datum *values, bool *nulls,
- ItemPointer heaptid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h
index 908e1fc6919..26ef91e23df 100644
--- a/src/include/access/detoast.h
+++ b/src/include/access/detoast.h
@@ -63,6 +63,13 @@ extern struct varlena *detoast_attr_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
+/* ----------
+ * toast_decompress_datum -
+ *
+ * Decompress a compressed version of a varlena datum
+ */
+extern struct varlena *toast_decompress_datum(struct varlena *attr);
+
/* ----------
* toast_raw_datum_size -
*
@@ -79,4 +86,11 @@ extern Size toast_raw_datum_size(Datum value);
*/
extern Size toast_datum_size(Datum value);
+/*
+ * for in_memory module
+ */
+typedef struct varlena* (*ToastFunc) (struct varlena *attr);
+extern void register_o_detoast_func(ToastFunc func);
+extern void deregister_o_detoast_func(void);
+
#endif /* DETOAST_H */
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index b071cedd44b..0de79f782a5 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -144,11 +144,26 @@ extern void index_close(Relation relation, LOCKMODE lockmode);
extern bool index_insert(Relation indexRelation,
Datum *values, bool *isnull,
- ItemPointer heap_t_ctid,
+ ItemPointer tupleid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
+extern bool index_update(Relation indexRelation,
+ bool new_valid,
+ bool old_valid,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Datum *valuesOld,
+ bool *isnullOld,
+ Datum oldTupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ struct IndexInfo *indexInfo);
+extern bool index_delete(Relation indexRelation, Datum *values, bool *isnull,
+ Datum tupleid, Relation heapRelation,
+ struct IndexInfo *indexInfo);
extern IndexScanDesc index_beginscan(Relation heapRelation,
Relation indexRelation,
@@ -173,6 +188,9 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
ParallelIndexScanDesc pscan);
extern ItemPointer index_getnext_tid(IndexScanDesc scan,
ScanDirection direction);
+extern NullableDatum index_getnext_rowid(IndexScanDesc scan,
+ ScanDirection direction);
+extern Datum index_getnext_tupleid(IndexScanDesc scan, ScanDirection direction);
struct TupleTableSlot;
extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot);
extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction,
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 6da64928b66..7ba1d4bc999 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -114,7 +114,7 @@ extern IndexBuildResult *ginbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void ginbuildempty(Relation index);
extern bool gininsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 3edc740a3f3..0cd19757208 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -401,7 +401,7 @@ typedef struct GiSTOptions
/* gist.c */
extern void gistbuildempty(Relation index);
extern bool gistinsert(Relation r, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 9e035270a16..14fb8e4ce1e 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -364,7 +364,7 @@ extern IndexBuildResult *hashbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void hashbuildempty(Relation index);
extern bool hashinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index c7278219b24..72b4f8e7634 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -242,19 +242,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
int ntuples, CommandId cid, int options,
BulkInsertState bistate);
extern TM_Result heap_delete(Relation relation, ItemPointer tid,
- CommandId cid, Snapshot crosscheck, bool wait,
- struct TM_FailureData *tmfd, bool changingPart);
+ CommandId cid, Snapshot crosscheck, int options,
+ struct TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot);
extern void heap_finish_speculative(Relation relation, ItemPointer tid);
extern void heap_abort_speculative(Relation relation, ItemPointer tid);
extern TM_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
+ CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes);
-extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
- CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
- bool follow_updates,
- Buffer *buffer, struct TM_FailureData *tmfd);
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot);
+extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
+ TupleTableSlot *slot,
+ CommandId cid, LockTupleMode mode,
+ LockWaitPolicy wait_policy, bool follow_updates,
+ struct TM_FailureData *tmfd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 9020abebc92..3f36ea455aa 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1128,7 +1128,7 @@ typedef struct BTOptions
*/
extern void btbuildempty(Relation index);
extern bool btinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index 1d5bfa62ffc..4812bc4481d 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -21,6 +21,7 @@
#include "access/amapi.h"
#include "access/htup.h"
+#include "access/tableam.h"
#include "access/tupdesc.h"
#include "nodes/pg_list.h"
#include "storage/lock.h"
@@ -224,6 +225,7 @@ extern Datum transformRelOptions(Datum oldOptions, List *defList,
bool acceptOidsOff, bool isReset);
extern List *untransformRelOptions(Datum options);
extern bytea *extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
+ const TableAmRoutine *tableam,
amoptions_function amoptions);
extern void *build_reloptions(Datum reloptions, bool validate,
relopt_kind kind,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index d03360eac04..ea0913ce6f2 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -122,6 +122,7 @@ typedef struct IndexScanDescData
struct ScanKeyData *keyData; /* array of index qualifier descriptors */
struct ScanKeyData *orderByData; /* array of ordering op descriptors */
bool xs_want_itup; /* caller requests index tuples */
+ bool xs_want_rowid; /* caller requests index tuples */
bool xs_temp_snap; /* unregister snapshot at scan end? */
/* signaling to index AM about killing index tuples */
@@ -145,6 +146,7 @@ typedef struct IndexScanDescData
struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */
ItemPointerData xs_heaptid; /* result */
+ NullableDatum xs_rowid; /* result if xs_want_rowid */
bool xs_heap_continue; /* T if must keep walking, potential
* further results */
IndexFetchTableData *xs_heapfetch;
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index fe31d32dbe9..e44d3561abf 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -197,7 +197,7 @@ extern IndexBuildResult *spgbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void spgbuildempty(Relation index);
extern bool spginsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h
index 8f08682750b..d717a7cafec 100644
--- a/src/include/access/sysattr.h
+++ b/src/include/access/sysattr.h
@@ -24,6 +24,7 @@
#define MaxTransactionIdAttributeNumber (-4)
#define MaxCommandIdAttributeNumber (-5)
#define TableOidAttributeNumber (-6)
-#define FirstLowInvalidHeapAttributeNumber (-7)
+#define RowIdAttributeNumber (-7)
+#define FirstLowInvalidHeapAttributeNumber (-8)
#endif /* SYSATTR_H */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 5e195fd292f..62ffc14e8f8 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -17,10 +17,14 @@
#ifndef TABLEAM_H
#define TABLEAM_H
+#include "access/amapi.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/xact.h"
#include "executor/tuptable.h"
+#include "nodes/execnodes.h"
+#include "storage/bufmgr.h"
+#include "utils/guc.h"
#include "utils/rel.h"
#include "utils/snapshot.h"
@@ -39,6 +43,16 @@ struct TBMIterateResult;
struct VacuumParams;
struct ValidateIndexState;
+typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows,
+ double *totaldeadrows);
+
+/* in commands/analyze.c */
+extern int acquire_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows);
+
/*
* Bitmask values for the flags argument to the scan_begin callback.
*/
@@ -259,6 +273,11 @@ typedef struct TM_IndexDeleteOp
/* Follow update chain and lock latest version of tuple */
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
+/* "options" flag bits for table_tuple_update and table_tuple_delete */
+#define TABLE_MODIFY_WAIT 0x0001
+#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
+#define TABLE_MODIFY_LOCK_UPDATED 0x0004
+
/* Typedef for callback function for table_index_build_scan */
typedef void (*IndexBuildCallback) (Relation index,
@@ -295,6 +314,9 @@ typedef struct TableAmRoutine
*/
const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
+ RowRefType (*get_row_ref_type) (Relation rel);
+
+ void (*free_rd_amcache) (Relation rel);
/* ------------------------------------------------------------------------
* Table scan callbacks.
@@ -447,7 +469,7 @@ typedef struct TableAmRoutine
* future searches.
*/
bool (*index_fetch_tuple) (struct IndexFetchTableData *scan,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
bool *call_again, bool *all_dead);
@@ -464,7 +486,7 @@ typedef struct TableAmRoutine
* test, returns true, false otherwise.
*/
bool (*tuple_fetch_row_version) (Relation rel,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot);
@@ -500,23 +522,19 @@ typedef struct TableAmRoutine
*/
/* see table_tuple_insert() for reference about parameters */
- void (*tuple_insert) (Relation rel, TupleTableSlot *slot,
+ TupleTableSlot *(*tuple_insert) (Relation rel, TupleTableSlot *slot,
CommandId cid, int options,
struct BulkInsertStateData *bistate);
- /* see table_tuple_insert_speculative() for reference about parameters */
- void (*tuple_insert_speculative) (Relation rel,
- TupleTableSlot *slot,
- CommandId cid,
- int options,
- struct BulkInsertStateData *bistate,
- uint32 specToken);
-
- /* see table_tuple_complete_speculative() for reference about parameters */
- void (*tuple_complete_speculative) (Relation rel,
- TupleTableSlot *slot,
- uint32 specToken,
- bool succeeded);
+ TupleTableSlot *(*tuple_insert_with_arbiter) (ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ CommandId cid, int options,
+ struct BulkInsertStateData *bistate,
+ List *arbiterIndexes,
+ EState *estate,
+ LockTupleMode lockmode,
+ TupleTableSlot *lockedSlot,
+ TupleTableSlot *tempSlot);
/* see table_multi_insert() for reference about parameters */
void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
@@ -524,29 +542,31 @@ typedef struct TableAmRoutine
/* see table_tuple_delete() for reference about parameters */
TM_Result (*tuple_delete) (Relation rel,
- ItemPointer tid,
+ Datum tupleid,
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
- bool wait,
+ int options,
TM_FailureData *tmfd,
- bool changingPart);
+ bool changingPart,
+ TupleTableSlot *oldSlot);
/* see table_tuple_update() for reference about parameters */
TM_Result (*tuple_update) (Relation rel,
- ItemPointer otid,
+ Datum tupleid,
TupleTableSlot *slot,
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
- bool wait,
+ int options,
TM_FailureData *tmfd,
LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes);
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot);
/* see table_tuple_lock() for reference about parameters */
TM_Result (*tuple_lock) (Relation rel,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
CommandId cid,
@@ -866,6 +886,14 @@ typedef struct TableAmRoutine
struct SampleScanState *scanstate,
TupleTableSlot *slot);
+ /* Check if tuple in the slot belongs to the current transaction */
+ bool (*tuple_is_current) (Relation rel, TupleTableSlot *slot);
+
+ void (*analyze_table) (Relation relation,
+ AcquireSampleRowsFunc *func,
+ BlockNumber *totalpages);
+
+ bytea *(*reloptions) (char relkind, Datum reloptions, bool validate);
} TableAmRoutine;
@@ -1234,7 +1262,7 @@ table_index_fetch_end(struct IndexFetchTableData *scan)
*/
static inline bool
table_index_fetch_tuple(struct IndexFetchTableData *scan,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
bool *call_again, bool *all_dead)
@@ -1247,7 +1275,7 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan,
if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
- return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
+ return scan->rel->rd_tableam->index_fetch_tuple(scan, tupleid, snapshot,
slot, call_again,
all_dead);
}
@@ -1281,7 +1309,7 @@ extern bool table_index_fetch_tuple_check(Relation rel,
*/
static inline bool
table_tuple_fetch_row_version(Relation rel,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot)
{
@@ -1293,7 +1321,7 @@ table_tuple_fetch_row_version(Relation rel,
if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
- return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
+ return rel->rd_tableam->tuple_fetch_row_version(rel, tupleid, snapshot, slot);
}
/*
@@ -1393,45 +1421,32 @@ table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
* insertion. But note that any toasting of fields within the slot is NOT
* reflected in the slots contents.
*/
-static inline void
+static inline TupleTableSlot *
table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
int options, struct BulkInsertStateData *bistate)
{
- rel->rd_tableam->tuple_insert(rel, slot, cid, options,
- bistate);
+ return rel->rd_tableam->tuple_insert(rel, slot, cid, options, bistate);
}
-/*
- * Perform a "speculative insertion". These can be backed out afterwards
- * without aborting the whole transaction. Other sessions can wait for the
- * speculative insertion to be confirmed, turning it into a regular tuple, or
- * aborted, as if it never existed. Speculatively inserted tuples behave as
- * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
- *
- * A transaction having performed a speculative insertion has to either abort,
- * or finish the speculative insertion with
- * table_tuple_complete_speculative(succeeded = ...).
- */
-static inline void
-table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot,
- CommandId cid, int options,
- struct BulkInsertStateData *bistate,
- uint32 specToken)
+static inline TupleTableSlot *
+table_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ CommandId cid, int options,
+ struct BulkInsertStateData *bistate,
+ List *arbiterIndexes,
+ EState *estate,
+ LockTupleMode lockmode,
+ TupleTableSlot *lockedSlot,
+ TupleTableSlot *tempSlot)
{
- rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
- bistate, specToken);
-}
-
-/*
- * Complete "speculative insertion" started in the same transaction. If
- * succeeded is true, the tuple is fully inserted, if false, it's removed.
- */
-static inline void
-table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot,
- uint32 specToken, bool succeeded)
-{
- rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
- succeeded);
+ Relation rel = resultRelInfo->ri_RelationDesc;
+
+ return rel->rd_tableam->tuple_insert_with_arbiter(resultRelInfo,
+ slot, cid, options,
+ bistate, arbiterIndexes,
+ estate,
+ lockmode, lockedSlot,
+ tempSlot);
}
/*
@@ -1457,7 +1472,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
}
/*
- * Delete a tuple.
+ * Delete a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_delete instead.
@@ -1468,11 +1483,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* cid - delete command ID (used for visibility test, and stored into
* cmax if successful)
* crosscheck - if not InvalidSnapshot, also check tuple against this
- * wait - true if should wait for any conflicting update to commit/abort
+ * options:
+ * If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ * If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ * fetched into oldSlot when the update is successful.
+ * If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ * concurrently updated, then the last tuple version is locked and fetched
+ * into oldSlot.
+ *
* Output parameters:
* tmfd - filled in failure cases (see below)
* changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false.
+ * oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ * TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ * is specified.
*
* Normal, successful return value is TM_Ok, which means we did actually
* delete it. Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1483,17 +1508,19 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* TM_FailureData for additional info.
*/
static inline TM_Result
-table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
- Snapshot snapshot, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, bool changingPart)
+table_tuple_delete(Relation rel, Datum tupleid, CommandId cid,
+ Snapshot snapshot, Snapshot crosscheck, int options,
+ TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot)
{
- return rel->rd_tableam->tuple_delete(rel, tid, cid,
+ return rel->rd_tableam->tuple_delete(rel, tupleid, cid,
snapshot, crosscheck,
- wait, tmfd, changingPart);
+ options, tmfd, changingPart,
+ oldSlot);
}
/*
- * Update a tuple.
+ * Update a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless you are prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_update instead.
@@ -1505,13 +1532,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* cid - update command ID (used for visibility test, and stored into
* cmax/cmin if successful)
* crosscheck - if not InvalidSnapshot, also check old tuple against this
- * wait - true if should wait for any conflicting update to commit/abort
+ * options:
+ * If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ * If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ * fetched into oldSlot when the update is successful.
+ * If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ * concurrently updated, then the last tuple version is locked and fetched
+ * into oldSlot.
+ *
* Output parameters:
* tmfd - filled in failure cases (see below)
* lockmode - filled with lock mode acquired on tuple
* update_indexes - in success cases this is set to true if new index entries
* are required for this tuple
- *
+ * oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ * TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ * is specified.
+
* Normal, successful return value is TM_Ok, which means we did actually
* update it. Failure return codes are TM_SelfModified, TM_Updated, and
* TM_BeingModified (the last only possible if wait == false).
@@ -1527,15 +1564,17 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* for additional info.
*/
static inline TM_Result
-table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
+table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
- bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot)
{
- return rel->rd_tableam->tuple_update(rel, otid, slot,
+ return rel->rd_tableam->tuple_update(rel, tupleid, slot,
cid, snapshot, crosscheck,
- wait, tmfd,
- lockmode, update_indexes);
+ options, tmfd,
+ lockmode, update_indexes,
+ oldSlot);
}
/*
@@ -1572,12 +1611,12 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
* comments for struct TM_FailureData for additional info.
*/
static inline TM_Result
-table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
+table_tuple_lock(Relation rel, Datum tupleid, Snapshot snapshot,
TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd)
{
- return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
+ return rel->rd_tableam->tuple_lock(rel, tupleid, snapshot, slot,
cid, mode, wait_policy,
flags, tmfd);
}
@@ -2043,6 +2082,11 @@ table_scan_sample_next_tuple(TableScanDesc scan,
slot);
}
+static inline bool
+table_tuple_is_current(Relation rel, TupleTableSlot *slot)
+{
+ return rel->rd_tableam->tuple_is_current(rel, slot);
+}
/* ----------------------------------------------------------------------------
* Functions to make modifications a bit simpler.
@@ -2050,11 +2094,13 @@ table_scan_sample_next_tuple(TableScanDesc scan,
*/
extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
-extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
- Snapshot snapshot);
-extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
+extern void simple_table_tuple_delete(Relation rel, Datum tupleid,
+ Snapshot snapshot,
+ TupleTableSlot *oldSlot);
+extern void simple_table_tuple_update(Relation rel, Datum tupleid,
TupleTableSlot *slot, Snapshot snapshot,
- TU_UpdateIndexes *update_indexes);
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot);
/* ----------------------------------------------------------------------------
@@ -2095,6 +2141,60 @@ extern void table_block_relation_estimate_size(Relation rel,
*/
extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
+extern const TableAmRoutine *GetTableAmRoutineByAmOid(Oid amoid);
extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
+static inline RowRefType
+table_get_row_ref_type(Relation rel)
+{
+ if (rel->rd_tableam)
+ return rel->rd_tableam->get_row_ref_type(rel);
+ else
+ return ROW_REF_TID;
+}
+
+static inline void
+table_free_rd_amcache(Relation rel)
+{
+ if (rel->rd_tableam)
+ {
+ rel->rd_tableam->free_rd_amcache(rel);
+ }
+ else
+ {
+ if (rel->rd_amcache)
+ pfree(rel->rd_amcache);
+ rel->rd_amcache = NULL;
+ }
+}
+
+static inline void
+table_analyze(Relation relation, AcquireSampleRowsFunc *func,
+ BlockNumber *totalpages)
+{
+ if (relation->rd_tableam->analyze_table)
+ {
+ relation->rd_tableam->analyze_table(relation, func, totalpages);
+ }
+ else
+ {
+ *func = acquire_sample_rows;
+ *totalpages = RelationGetNumberOfBlocks(relation);
+ }
+}
+
+static inline bytea *
+table_reloptions(Relation rel, char relkind,
+ Datum reloptions, bool validate)
+{
+ return rel->rd_tableam->reloptions(relkind, reloptions, validate);
+}
+
+static inline bytea *
+tableam_reloptions(const TableAmRoutine *tableam, char relkind,
+ Datum reloptions, bool validate)
+{
+ return tableam->reloptions(relkind, reloptions, validate);
+}
+
#endif /* TABLEAM_H */
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index f5af6d30556..ed931c770ec 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -15,7 +15,9 @@
#define TRANSAM_H
#include "access/xlogdefs.h"
-
+#ifndef FRONTEND
+#include "port/atomics.h"
+#endif
/* ----------------
* Special transaction ID values
@@ -196,6 +198,22 @@ FullTransactionIdAdvance(FullTransactionId *dest)
#define FirstUnpinnedObjectId 12000
#define FirstNormalObjectId 16384
+#define COMMITSEQNO_INPROGRESS UINT64CONST(0x0)
+#define COMMITSEQNO_NON_DELETED UINT64CONST(0x1)
+#define COMMITSEQNO_ABORTED UINT64CONST(0x2)
+#define COMMITSEQNO_FROZEN UINT64CONST(0x3)
+#define COMMITSEQNO_COMMITTING UINT64CONST(0x4)
+#define COMMITSEQNO_FIRST_NORMAL UINT64CONST(0x5)
+#define COMMITSEQNO_MAX_NORMAL UINT64CONST(0x7FFFFFFFFFFFFFFF)
+
+#define COMMITSEQNO_IS_INPROGRESS(csn) ((csn) == COMMITSEQNO_INPROGRESS || (csn) == COMMITSEQNO_NON_DELETED)
+#define COMMITSEQNO_IS_NON_DELETED(csn) ((csn) == COMMITSEQNO_NON_DELETED)
+#define COMMITSEQNO_IS_ABORTED(csn) ((csn) == COMMITSEQNO_ABORTED)
+#define COMMITSEQNO_IS_FROZEN(csn) ((csn) == COMMITSEQNO_FROZEN)
+#define COMMITSEQNO_IS_NORMAL(csn) ((csn) >= COMMITSEQNO_FIRST_NORMAL)
+#define COMMITSEQNO_IS_COMMITTING(csn) ((csn) == COMMITSEQNO_COMMITTING)
+#define COMMITSEQNO_IS_COMMITTED(csn) ((csn) >= COMMITSEQNO_FROZEN)
+
/*
* VariableCache is a data structure in shared memory that is used to track
* OID and XID assignment state. For largely historical reasons, there is
@@ -252,6 +270,11 @@ typedef struct VariableCacheData
*/
TransactionId oldestClogXid; /* oldest it's safe to look up in clog */
+#ifndef FRONTEND
+ pg_atomic_uint64 nextCommitSeqNo;
+#else
+ CommitSeqNo nextCommitSeqNo;
+#endif
} VariableCacheData;
typedef VariableCacheData *VariableCache;
@@ -294,6 +317,7 @@ extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
extern bool ForceTransactionIdLimitUpdate(void);
extern Oid GetNewObjectId(void);
extern void StopGeneratingPinnedObjectIds(void);
+extern CommitSeqNo GetCurrentCSN(void);
#ifdef USE_ASSERT_CHECKING
extern void AssertTransactionIdInAllowableRange(TransactionId xid);
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 7d3b9446e62..e8200d55720 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -527,4 +527,7 @@ extern void EnterParallelMode(void);
extern void ExitParallelMode(void);
extern bool IsInParallelMode(void);
+typedef void (*xact_redo_hook_type) (TransactionId xid, XLogRecPtr lsn);
+extern xact_redo_hook_type xact_redo_hook;
+
#endif /* XACT_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 48ca8523810..b3b2191e733 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -53,6 +53,7 @@ extern PGDLLIMPORT bool track_wal_io_timing;
extern PGDLLIMPORT int wal_decode_buffer_size;
extern PGDLLIMPORT int CheckPointSegments;
+extern PGDLLIMPORT CommitSeqNo startupCommitSeqNo;
/* Archive modes */
typedef enum ArchiveMode
@@ -284,6 +285,7 @@ extern void do_pg_backup_start(const char *backupidstr, bool fast,
StringInfo tblspcmapfile);
extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
extern void do_pg_abort_backup(int code, Datum arg);
+extern bool have_backup_in_progress(void);
extern void register_persistent_abort_backup_handler(void);
extern SessionBackupState get_backup_status(void);
@@ -299,4 +301,14 @@ extern SessionBackupState get_backup_status(void);
/* files to signal promotion to primary */
#define PROMOTE_SIGNAL_FILE "promote"
+typedef void (*CheckPoint_hook_type) (XLogRecPtr checkPointRedo, int flags);
+extern PGDLLIMPORT CheckPoint_hook_type CheckPoint_hook;
+extern double CheckPointProgress;
+typedef void (*after_checkpoint_cleanup_hook_type)(XLogRecPtr checkPointRedo,
+ int flags);
+extern PGDLLIMPORT after_checkpoint_cleanup_hook_type
+ after_checkpoint_cleanup_hook;
+
+extern void (*RedoShutdownHook) (void);
+
#endif /* XLOG_H */
diff --git a/src/include/archive/archive_module.h b/src/include/archive/archive_module.h
index 679ce5a6dbd..2921c0a05f8 100644
--- a/src/include/archive/archive_module.h
+++ b/src/include/archive/archive_module.h
@@ -37,13 +37,17 @@ typedef struct ArchiveModuleState
*/
typedef void (*ArchiveStartupCB) (ArchiveModuleState *state);
typedef bool (*ArchiveCheckConfiguredCB) (ArchiveModuleState *state);
-typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, const char *path);
+typedef void (*ArchivePreloadFileCB) (ArchiveModuleState *state,
+ const char *file, const char *path);
+typedef bool (*ArchiveFileCB) (ArchiveModuleState *state,
+ const char *file, const char *path);
typedef void (*ArchiveShutdownCB) (ArchiveModuleState *state);
typedef struct ArchiveModuleCallbacks
{
ArchiveStartupCB startup_cb;
ArchiveCheckConfiguredCB check_configured_cb;
+ ArchivePreloadFileCB archive_preload_file_cb;
ArchiveFileCB archive_file_cb;
ArchiveShutdownCB shutdown_cb;
} ArchiveModuleCallbacks;
diff --git a/src/include/c.h b/src/include/c.h
index f69d739be57..024d376e9fa 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -621,7 +621,7 @@ typedef double float8;
/*
* Oid, RegProcedure, TransactionId, SubTransactionId, MultiXactId,
- * CommandId
+ * CommandId, CommitSeqNo
*/
/* typedef Oid is in postgres_ext.h */
@@ -652,6 +652,8 @@ typedef uint32 CommandId;
#define FirstCommandId ((CommandId) 0)
#define InvalidCommandId (~(CommandId)0)
+typedef uint64 CommitSeqNo;
+
/* ----------------
* Variable-length datatypes all share the 'struct varlena' header.
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index ffd5e9dc82d..4cf8df01077 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -140,6 +140,8 @@ typedef enum ObjectClass
#define PERFORM_DELETION_SKIP_EXTENSIONS 0x0010 /* keep extensions */
#define PERFORM_DELETION_CONCURRENT_LOCK 0x0020 /* normal drop with
* concurrent lock mode */
+#define PERFORM_DELETION_OF_RELATION 0x0040 /* used for orioledb
+ * extension */
/* in dependency.c */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index c8532fb97c8..3fa15391d83 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -211,4 +211,6 @@ itemptr_decode(ItemPointer itemptr, int64 encoded)
ItemPointerSet(itemptr, block, offset);
}
+extern void index_update_stats(Relation rel, bool hasindex, double reltuples);
+
#endif /* INDEX_H */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 478203ed4c4..b8b4dbfc2a3 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -41,6 +41,10 @@ extern char *makeObjectName(const char *name1, const char *name2,
extern char *ChooseRelationName(const char *name1, const char *name2,
const char *label, Oid namespaceid,
bool isconstraint);
+extern List *ChooseIndexColumnNames(List *indexElems);
+extern char *ChooseIndexName(const char *tabname, Oid namespaceId,
+ List *colnames, List *exclusionOpNames,
+ bool primary, bool isconstraint);
extern bool CheckIndexCompatible(Oid oldId,
const char *accessMethodName,
List *attributeList,
@@ -158,4 +162,7 @@ extern int defGetTypeLength(DefElem *def);
extern List *defGetStringList(DefElem *def);
extern void errorConflictingDefElem(DefElem *defel, ParseState *pstate) pg_attribute_noreturn();
+typedef Oid (*GetDefaultOpClass_hook_type)(Oid type_id, Oid am_id);
+extern PGDLLIMPORT GetDefaultOpClass_hook_type GetDefaultOpClass_hook;
+
#endif /* DEFREM_H */
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 3d3e632a0cc..ae8b2b63de9 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -93,6 +93,14 @@ extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into,
ParamListInfo params, QueryEnvironment *queryEnv,
const instr_time *planduration,
const BufferUsage *bufusage);
+extern void ExplainNode(PlanState *planstate, List *ancestors,
+ const char *relationship, const char *plan_name,
+ ExplainState *es);
+extern void show_scan_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ ExplainState *es);
+extern void show_instrumentation_count(const char *qlabel, int which,
+ PlanState *planstate, ExplainState *es);
extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc);
extern void ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc);
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index 430e3ca7ddf..15e1fbe7700 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -209,15 +209,15 @@ extern void ExecASDeleteTriggers(EState *estate,
extern bool ExecBRDeleteTriggers(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot **epqslot,
TM_Result *tmresult,
TM_FailureData *tmfd);
extern void ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *slot,
TransitionCaptureState *transition_capture,
bool is_crosspart_update);
extern bool ExecIRDeleteTriggers(EState *estate,
@@ -231,7 +231,7 @@ extern void ExecASUpdateTriggers(EState *estate,
extern bool ExecBRUpdateTriggers(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *newslot,
TM_Result *tmresult,
@@ -240,8 +240,8 @@ extern void ExecARUpdateTriggers(EState *estate,
ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *oldslot,
TupleTableSlot *newslot,
List *recheckIndexes,
TransitionCaptureState *transition_capture,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 39fbd5f10a5..3a8ee4fbf05 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -376,6 +376,9 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
extern void analyze_rel(Oid relid, RangeVar *relation,
VacuumParams *params, List *va_cols, bool in_outer_xact,
BufferAccessStrategy bstrategy);
+extern int acquire_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows);
extern bool std_typanalyze(VacAttrStats *stats);
/* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index ac02247947e..2cc92d66f93 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -633,6 +633,16 @@ extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
bool noDupErr,
bool *specConflict, List *arbiterIndexes,
bool onlySummarizing);
+extern List *ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ TupleTableSlot *oldSlot,
+ EState *estate,
+ bool noDupErr,
+ bool *specConflict, List *arbiterIndexes,
+ bool onlySummarizing);
+extern void ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ EState *estate);
extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot,
EState *estate, ItemPointer conflictTid,
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 996c62e3055..50a2494c019 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -13,6 +13,7 @@
#define FDWAPI_H
#include "access/parallel.h"
+#include "access/tableam.h"
#include "nodes/execnodes.h"
#include "nodes/pathnodes.h"
@@ -148,11 +149,6 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
struct ExplainState *es);
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
- HeapTuple *rows, int targrows,
- double *totalrows,
- double *totaldeadrows);
-
typedef bool (*AnalyzeForeignTable_function) (Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 49419f14f0d..037ab7dd3da 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -449,6 +449,8 @@ typedef struct ResultRelInfo
/* relation descriptor for result relation */
Relation ri_RelationDesc;
+ RowRefType ri_RowRefType;
+
/* # of indices existing on result relation */
int ri_NumIndices;
@@ -744,6 +746,7 @@ typedef struct ExecRowMark
Index prti; /* parent range table index, if child */
Index rowmarkId; /* unique identifier for resjunk columns */
RowMarkType markType; /* see enum in nodes/plannodes.h */
+ RowRefType refType;
LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */
LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */
bool ermActive; /* is this mark relevant for current tuple? */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 9dca3b65287..f51ec29fc92 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1073,6 +1073,7 @@ typedef struct RangeTblEntry
int rellockmode; /* lock level that query requires on the rel */
struct TableSampleClause *tablesample; /* sampling info, or NULL */
Index perminfoindex;
+ RowRefType reftype;
/*
* Fields valid for a subquery RTE (else NULL):
@@ -2823,6 +2824,7 @@ typedef struct CreateAmStmt
char *amname; /* access method name */
List *handler_name; /* handler function name */
char amtype; /* type of access method */
+ char *tableam_name; /* table AM name */
} CreateAmStmt;
/* ----------------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index d64fe6a328b..77130245e8f 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1352,7 +1352,7 @@ typedef enum RowMarkType
* child relations will also have entries with isParent = true. The child
* entries have rti == child rel's RT index and prti == top parent's RT index,
* and can therefore be recognized as children by the fact that prti != rti.
- * The parent's allMarkTypes field gets the OR of (1<nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId latestCompletedXid; /* so we can set xmax */
+ CommitSeqNo csn; /* current csn */
TransactionId *xids; /* array of (sub)xids still running */
} RunningTransactionsData;
diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h
index 188e348618a..23dddce8d84 100644
--- a/src/include/storage/standbydefs.h
+++ b/src/include/storage/standbydefs.h
@@ -52,6 +52,7 @@ typedef struct xl_running_xacts
TransactionId nextXid; /* xid from ShmemVariableCache->nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId latestCompletedXid; /* so we can set xmax */
+ CommitSeqNo csn; /* current csn */
TransactionId xids[FLEXIBLE_ARRAY_MEMBER];
} xl_running_xacts;
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index a32d7222a99..91880e498f7 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -232,5 +232,28 @@ extern void PrepareToInvalidateCacheTuple(Relation relation,
extern void PrintCatCacheLeakWarning(HeapTuple tuple);
extern void PrintCatCacheListLeakWarning(CatCList *list);
+typedef CatCTup *(*SearchCatCacheInternal_hook_type)(CatCache *cache,
+ int nkeys,
+ Datum v1, Datum v2,
+ Datum v3, Datum v4);
+extern SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook;
+
+typedef CatCList *(*SearchCatCacheList_hook_type)(CatCache *cache,
+ int nkeys,
+ Datum v1,
+ Datum v2,
+ Datum v3);
+extern SearchCatCacheList_hook_type SearchCatCacheList_hook;
+
+typedef TupleDesc (*SysCacheGetAttr_hook_type)(CatCache *SysCache);
+extern SysCacheGetAttr_hook_type SysCacheGetAttr_hook;
+
+typedef uint32 (*GetCatCacheHashValue_hook_type)(CatCache *cache,
+ int nkeys,
+ Datum v1,
+ Datum v2,
+ Datum v3,
+ Datum v4);
+extern GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook;
#endif /* CATCACHE_H */
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 0292e88b4f2..5b7deaa286c 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -542,4 +542,10 @@ extern void write_stderr(const char *fmt,...) pg_attribute_printf(1, 2);
*/
extern void write_stderr_signal_safe(const char *fmt);
+typedef void (*CustomErrorCleanupHookType) (void);
+
+extern CustomErrorCleanupHookType CustomErrorCleanupHook;
+
+extern void CustomErrorCleanup(void);
+
#endif /* ELOG_H */
diff --git a/src/include/utils/fmgrtab.h b/src/include/utils/fmgrtab.h
index 838ffe3bc1c..f7e416653a6 100644
--- a/src/include/utils/fmgrtab.h
+++ b/src/include/utils/fmgrtab.h
@@ -46,4 +46,7 @@ extern PGDLLIMPORT const Oid fmgr_last_builtin_oid; /* highest function OID in
#define InvalidOidBuiltinMapping PG_UINT16_MAX
extern PGDLLIMPORT const uint16 fmgr_builtin_oid_index[];
+extern const FmgrBuiltin *fmgr_isbuiltin(Oid id);
+extern const FmgrBuiltin *fmgr_lookupByName(const char *name);
+
#endif /* FMGRTAB_H */
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 14b4eac0630..1461271bbe6 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -22,6 +22,7 @@ extern PGDLLIMPORT int debug_discard_caches;
typedef void (*SyscacheCallbackFunction) (Datum arg, int cacheid, uint32 hashvalue);
typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid);
+typedef void (*UsercacheCallbackFunction) (Datum arg, Oid arg1, Oid arg2, Oid arg3);
extern void AcceptInvalidationMessages(void);
@@ -48,6 +49,8 @@ extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple);
extern void CacheInvalidateRelcacheByRelid(Oid relid);
+extern void CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid);
+
extern void CacheInvalidateSmgr(RelFileLocatorBackend rlocator);
extern void CacheInvalidateRelmap(Oid databaseId);
@@ -59,6 +62,9 @@ extern void CacheRegisterSyscacheCallback(int cacheid,
extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
Datum arg);
+extern void CacheRegisterUsercacheCallback(UsercacheCallbackFunction func,
+ Datum arg);
+
extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
extern void InvalidateSystemCaches(void);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index e2a72435427..2fd19a95cbb 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -102,6 +102,8 @@ extern void make_icu_collator(const char *iculocstr,
extern bool pg_locale_deterministic(pg_locale_t locale);
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
+typedef bool (*pg_newlocale_from_collation_hook_type)();
+extern pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook;
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index 980d37a1947..d05de790428 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -18,6 +18,9 @@
#include "utils/resowner.h"
#include "utils/snapshot.h"
+#ifndef SNAPSHOT_H
+typedef void (*snapshot_hook_type) (Snapshot snapshot);
+#endif
/*
* The structure used to map times to TransactionId values for the "snapshot
@@ -120,7 +123,7 @@ extern void PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level);
extern void PushCopiedSnapshot(Snapshot snapshot);
extern void UpdateActiveSnapshotCommandId(void);
extern void PopActiveSnapshot(void);
-extern Snapshot GetActiveSnapshot(void);
+extern PGDLLIMPORT Snapshot GetActiveSnapshot(void);
extern bool ActiveSnapshotSet(void);
extern Snapshot RegisterSnapshot(Snapshot snapshot);
@@ -178,4 +181,10 @@ extern void SerializeSnapshot(Snapshot snapshot, char *start_address);
extern Snapshot RestoreSnapshot(char *start_address);
extern void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc);
+typedef void (*reset_xmin_hook_type) (void);
+
+extern snapshot_hook_type snapshot_register_hook;
+extern snapshot_hook_type snapshot_deregister_hook;
+extern reset_xmin_hook_type reset_xmin_hook;
+
#endif /* SNAPMGR_H */
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 583a667a40a..01093a33315 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -122,6 +122,20 @@ typedef struct SnapshotData *Snapshot;
#define InvalidSnapshot ((Snapshot) NULL)
+typedef struct
+{
+ uint64 undoLocation; /* undo log location retained by this snapshot */
+ uint64 xmin;
+ pairingheap_node ph_node;
+} RetainUndoLocationPHNode;
+
+typedef struct CSNSnapshotData
+{
+ uint64 xmin;
+ CommitSeqNo snapshotcsn;
+ XLogRecPtr xlogptr;
+} CSNSnapshotData;
+
/*
* Struct representing all kind of possible snapshots.
*
@@ -214,6 +228,12 @@ typedef struct SnapshotData
* transactions completed since the last GetSnapshotData().
*/
uint64 snapXactCompletionCount;
+
+ RetainUndoLocationPHNode undoRegularLocationPhNode;
+ RetainUndoLocationPHNode undoSystemLocationPhNode;
+ CSNSnapshotData csnSnapshotData;
} SnapshotData;
+typedef void (*snapshot_hook_type) (Snapshot snapshot);
+
#endif /* SNAPSHOT_H */
diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h
index 36424b80b1b..e26d9a65308 100644
--- a/src/include/utils/tuplestore.h
+++ b/src/include/utils/tuplestore.h
@@ -73,6 +73,9 @@ extern bool tuplestore_in_memory(Tuplestorestate *state);
extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
bool copy, TupleTableSlot *slot);
+extern bool tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward,
+ bool copy, TupleTableSlot *slot);
+
extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
extern bool tuplestore_skiptuples(Tuplestorestate *state,
diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h
index 95f3a9ee308..77d57927de0 100644
--- a/src/include/utils/typcache.h
+++ b/src/include/utils/typcache.h
@@ -206,4 +206,9 @@ extern void SharedRecordTypmodRegistryInit(SharedRecordTypmodRegistry *,
extern void SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *);
+typedef void (*load_typcache_tupdesc_hook_type)(TypeCacheEntry *typentry);
+extern PGDLLIMPORT load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook;
+typedef void (*load_enum_cache_data_hook_type)(TypeCacheEntry *tcache);
+extern PGDLLIMPORT load_enum_cache_data_hook_type load_enum_cache_data_hook;
+
#endif /* TYPCACHE_H */
diff --git a/src/include/varatt.h b/src/include/varatt.h
index e34870526ba..bc2b39e89f8 100644
--- a/src/include/varatt.h
+++ b/src/include/varatt.h
@@ -38,6 +38,25 @@ typedef struct varatt_external
Oid va_toastrelid; /* RelID of TOAST table containing it */
} varatt_external;
+typedef struct OToastExternal
+{
+ uint16 data_size; /* length of OToastExternal data */
+ int16 attnum;
+ int32 raw_size; /* original data size */
+ int32 toasted_size; /* compressed original data size */
+ /* for fetching data from TOAST tree */
+ CommitSeqNo csn;
+ /* for finding TOAST tree */
+ Oid datoid;
+ Oid relid;
+ Oid relnode;
+ /* for storing primary index tuple */
+ uint8 formatFlags; /* primary index tuple flags */
+ char data[FLEXIBLE_ARRAY_MEMBER]; /* data (primary index tuple) */
+} OToastExternal;
+
+#define ORIOLEDB_EXT_FORMAT_FLAGS_BITS 6
+
/*
* These macros define the "saved size" portion of va_extinfo. Its remaining
* two high-order bits identify the compression method.
@@ -86,17 +105,21 @@ typedef enum vartag_external
VARTAG_INDIRECT = 1,
VARTAG_EXPANDED_RO = 2,
VARTAG_EXPANDED_RW = 3,
- VARTAG_ONDISK = 18
+ VARTAG_ONDISK = 18,
+ VARTAG_ORIOLEDB = 34
} vartag_external;
/* this test relies on the specific tag values above */
#define VARTAG_IS_EXPANDED(tag) \
(((tag) & ~1) == VARTAG_EXPANDED_RO)
+#define O_TOAST_EXTERNAL_SZ offsetof(OToastExternal, data)
+
#define VARTAG_SIZE(tag) \
((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \
VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \
(tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \
+ (tag) == VARTAG_ORIOLEDB ? O_TOAST_EXTERNAL_SZ : \
(AssertMacro(false), 0))
/*
@@ -282,11 +305,16 @@ typedef struct
#define VARDATA_SHORT(PTR) VARDATA_1B(PTR)
#define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR)
-#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)))
+#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)) \
+ + (VARATT_IS_EXTERNAL_ORIOLEDB(PTR) ? \
+ *((uint16 *) VARDATA_1B_E(PTR)) \
+ : 0))
+
#define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR)
#define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR)
#define VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR)
+
#define VARATT_IS_EXTERNAL_ONDISK(PTR) \
(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK)
#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
@@ -299,6 +327,9 @@ typedef struct
(VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
#define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \
(VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
+#define VARATT_IS_EXTERNAL_ORIOLEDB(PTR) \
+ (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ORIOLEDB)
+
#define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR)
#define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR))
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index 13045cbd6e4..16ce1650e2e 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -37,6 +37,7 @@ pgxs_kv = {
'PACKAGE_VERSION': pg_version,
'PG_MAJORVERSION': pg_version_major,
'PG_VERSION_NUM': pg_version_num,
+ 'ORIOLEDB_PATCHSET_VERSION': orioledb_patchset_version,
'configure_input': 'meson',
'vpath_build': 'yes',
diff --git a/src/test/isolation/expected/eval-plan-qual-2.out b/src/test/isolation/expected/eval-plan-qual-2.out
new file mode 100644
index 00000000000..117a3d3be8d
--- /dev/null
+++ b/src/test/isolation/expected/eval-plan-qual-2.out
@@ -0,0 +1,37 @@
+Parsed test spec with 3 sessions
+
+starting permutation: read_u wx2 wb1 c2 c1 read_u read
+step read_u: SELECT * FROM accounts;
+accountid|balance|balance2
+---------+-------+--------
+checking | 600| 1200
+savings | 600| 1200
+(2 rows)
+
+step wx2: UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance;
+balance
+-------
+ 1050
+(1 row)
+
+step wb1: DELETE FROM accounts WHERE balance = 600 RETURNING *;
+step c2: COMMIT;
+step wb1: <... completed>
+accountid|balance|balance2
+---------+-------+--------
+savings | 600| 1200
+(1 row)
+
+step c1: COMMIT;
+step read_u: SELECT * FROM accounts;
+accountid|balance|balance2
+---------+-------+--------
+checking | 1050| 2100
+(1 row)
+
+step read: SELECT * FROM accounts ORDER BY accountid;
+accountid|balance|balance2
+---------+-------+--------
+checking | 1050| 2100
+(1 row)
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 9b0bb8a29b3..124c170746c 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -36,6 +36,7 @@ test: fk-partitioned-2
test: fk-snapshot
test: subxid-overflow
test: eval-plan-qual
+test: eval-plan-qual-2
test: eval-plan-qual-trigger
test: inplace-inval
test: intra-grant-inplace
diff --git a/src/test/isolation/specs/eval-plan-qual-2.spec b/src/test/isolation/specs/eval-plan-qual-2.spec
new file mode 100644
index 00000000000..30447bef24a
--- /dev/null
+++ b/src/test/isolation/specs/eval-plan-qual-2.spec
@@ -0,0 +1,30 @@
+setup
+{
+ CREATE TABLE accounts (accountid text PRIMARY KEY, balance numeric not null,
+ balance2 numeric GENERATED ALWAYS AS (balance * 2) STORED);
+ INSERT INTO accounts VALUES ('checking', 600), ('savings', 600);
+}
+
+teardown
+{
+ DROP TABLE accounts;
+}
+
+session s1
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step wb1 { DELETE FROM accounts WHERE balance = 600 RETURNING *; }
+step c1 { COMMIT; }
+
+session s2
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step wx2 { UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance; }
+step c2 { COMMIT; }
+
+session s3
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step read { SELECT * FROM accounts ORDER BY accountid; }
+step read_u { SELECT * FROM accounts; }
+
+teardown { COMMIT; }
+
+permutation read_u wx2 wb1 c2 c1 read_u read
diff --git a/src/test/modules/dummy_index_am/dummy_index_am.c b/src/test/modules/dummy_index_am/dummy_index_am.c
index c14e0abe0c6..09c5d20479d 100644
--- a/src/test/modules/dummy_index_am/dummy_index_am.c
+++ b/src/test/modules/dummy_index_am/dummy_index_am.c
@@ -164,7 +164,7 @@ dibuildempty(Relation index)
*/
static bool
diinsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -302,7 +302,8 @@ dihandler(PG_FUNCTION_ARGS)
amroutine->ambuild = dibuild;
amroutine->ambuildempty = dibuildempty;
- amroutine->aminsert = diinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = diinsert;
amroutine->ambulkdelete = dibulkdelete;
amroutine->amvacuumcleanup = divacuumcleanup;
amroutine->amcanreturn = NULL;
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index bcbc6d910f1..fb75cf0905f 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -606,7 +606,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
continue;
/* copy datum, so it still lives later */
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
attr = detoast_external_attr(attr);
else
{
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 4791528e140..264bdbdee0f 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3146,6 +3146,7 @@ amgetbitmap_function
amgettuple_function
aminitparallelscan_function
aminsert_function
+aminsert_extended_function
ammarkpos_function
amoptions_function
amparallelrescan_function