Skip to content

[PGPRO-12159] Added functions for exploring the pages of the rum index. #150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \
src/rumbtree.o src/rumbulk.o src/rumdatapage.o \
src/rumentrypage.o src/rumget.o src/ruminsert.o \
src/rumscan.o src/rumutil.o src/rumvacuum.o src/rumvalidate.o \
src/btree_rum.o src/rum_arr_utils.o $(WIN32RES)
src/btree_rum.o src/rum_arr_utils.o src/rum_debug_funcs.o $(WIN32RES)

DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql \
rum--1.2--1.3.sql
Expand All @@ -26,7 +26,7 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp \
int2 int4 int8 float4 float8 money oid \
time timetz date interval \
macaddr inet cidr text varchar char bytea bit varbit \
numeric rum_weight expr array
numeric rum_weight expr array rum_debug_funcs

TAP_TESTS = 1

Expand All @@ -45,6 +45,11 @@ include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

# rum_debug_funcs tests only for enterprise
ifneq ($(PGPRO_EDITION), enterprise)
REGRESS := $(filter-out rum_debug_funcs, $(REGRESS))
endif

$(EXTENSION)--$(EXTVERSION).sql: rum_init.sql
cat $^ > $@

Expand Down
12,011 changes: 12,011 additions & 0 deletions expected/rum_debug_funcs.out

Large diffs are not rendered by default.

11,720 changes: 11,720 additions & 0 deletions expected/rum_debug_funcs_1.out

Large diffs are not rendered by default.

128 changes: 127 additions & 1 deletion rum_init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ LANGUAGE C;
/*
* RUM access method
*/

CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler;

/*
Expand Down Expand Up @@ -1724,3 +1723,130 @@ RETURNS float4
AS 'MODULE_PATHNAME', 'rum_ts_score_td'
LANGUAGE C IMMUTABLE STRICT;

/*--------------------RUM debug functions-----------------------*/

CREATE FUNCTION rum_metapage_info(
IN rel_name text,
IN blk_num int4,
OUT pending_head bigint,
OUT pending_tail bigint,
OUT tail_free_size int4,
OUT n_pending_pages bigint,
OUT n_pending_tuples bigint,
OUT n_total_pages bigint,
OUT n_entry_pages bigint,
OUT n_data_pages bigint,
OUT n_entries bigint,
OUT version varchar)
AS 'MODULE_PATHNAME', 'rum_metapage_info'
LANGUAGE C STRICT PARALLEL SAFE;

CREATE FUNCTION rum_page_opaque_info(
IN rel_name text,
IN blk_num int4,
OUT leftlink bigint,
OUT rightlink bigint,
OUT maxoff int4,
OUT freespace int4,
OUT flags text[])
AS 'MODULE_PATHNAME', 'rum_page_opaque_info'
LANGUAGE C STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION
rum_page_items_info(rel_name text, blk_num int4, page_type int4)
RETURNS SETOF record
AS 'MODULE_PATHNAME', 'rum_page_items_info'
LANGUAGE C STRICT;

CREATE FUNCTION rum_leaf_data_page_items(
rel_name text,
blk_num int4
)
RETURNS TABLE(
is_high_key bool,
tuple_id tid,
add_info_is_null bool,
add_info varchar
)
AS $$
SELECT *
FROM rum_page_items_info(rel_name, blk_num, 0)
AS rum_page_items_info(
is_high_key bool,
tuple_id tid,
add_info_is_null bool,
add_info varchar
);
$$ LANGUAGE sql;

CREATE FUNCTION rum_internal_data_page_items(
rel_name text,
blk_num int4
)
RETURNS TABLE(
is_high_key bool,
block_number int4,
tuple_id tid,
add_info_is_null bool,
add_info varchar
)
AS $$
SELECT *
FROM rum_page_items_info(rel_name, blk_num, 1)
AS rum_page_items_info(
is_high_key bool,
block_number int4,
tuple_id tid,
add_info_is_null bool,
add_info varchar
);
$$ LANGUAGE sql;

CREATE FUNCTION rum_leaf_entry_page_items(
rel_name text,
blk_num int4
)
RETURNS TABLE(
key varchar,
attrnum int4,
category varchar,
tuple_id tid,
add_info_is_null bool,
add_info varchar,
is_postring_tree bool,
postring_tree_root int4
)
AS $$
SELECT *
FROM rum_page_items_info(rel_name, blk_num, 2)
AS rum_page_items_info(
key varchar,
attrnum int4,
category varchar,
tuple_id tid,
add_info_is_null bool,
add_info varchar,
is_postring_tree bool,
postring_tree_root int4
);
$$ LANGUAGE sql;

CREATE FUNCTION rum_internal_entry_page_items(
rel_name text,
blk_num int4
)
RETURNS TABLE(
key varchar,
attrnum int4,
category varchar,
down_link int4)
AS $$
SELECT *
FROM rum_page_items_info(rel_name, blk_num, 3)
AS rum_page_items_info(
key varchar,
attrnum int4,
category varchar,
down_link int4
);
$$ LANGUAGE sql;
148 changes: 148 additions & 0 deletions sql/rum_debug_funcs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
CREATE TABLE test_debug_table (id int, t tsvector, d timestamp);

\copy test_debug_table from 'data/tsts.data'
\copy test_debug_table from 'data/tsts.data'
\copy test_debug_table from 'data/tsts.data'
\copy test_debug_table from 'data/tsts.data'
\copy test_debug_table from 'data/tsts.data'

-- It is necessary to create all types of pages in the Posting Tree
DO $$
BEGIN
FOR i IN 1..5000 LOOP
INSERT INTO test_debug_table(id, t, d)
VALUES(i, 'b9', '2016-05-02 00:21:22.326724');
END LOOP;
END;
$$;

-- Testing on 32-bit and 64-bit machine on the index without additional information
CREATE INDEX test_without_addinfo_idx ON test_debug_table
USING rum (t, d);

SELECT * FROM rum_metapage_info('test_without_addinfo_idx', 0) \gx

SELECT * FROM rum_page_opaque_info('test_without_addinfo_idx', 1);
SELECT * FROM rum_internal_entry_page_items('test_without_addinfo_idx', 1);

SELECT * FROM rum_page_opaque_info('test_without_addinfo_idx', 10);
SELECT * FROM rum_leaf_entry_page_items('test_without_addinfo_idx', 10);

SELECT * FROM rum_page_opaque_info('test_without_addinfo_idx', 7);
SELECT * FROM rum_internal_data_page_items('test_without_addinfo_idx', 7);

SELECT * FROM rum_page_opaque_info('test_without_addinfo_idx', 9);
SELECT * FROM rum_leaf_data_page_items('test_without_addinfo_idx', 9);

-- Testing on the index with additional information (order_by_attach = false)
CREATE INDEX test_with_addinfo_idx_false ON test_debug_table
USING rum (t rum_tsvector_addon_ops, d)
WITH (attach = 'd', to = 't', order_by_attach='f');

SELECT * FROM rum_metapage_info('test_with_addinfo_idx_false', 0) \gx

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_false', 1);
SELECT * FROM rum_internal_entry_page_items('test_with_addinfo_idx_false', 1);

-- 64-bit machine
SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_false', 28);
SELECT * FROM rum_leaf_entry_page_items('test_with_addinfo_idx_false', 28);

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_false', 19);
SELECT * FROM rum_internal_data_page_items('test_with_addinfo_idx_false', 19);

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_false', 20);
SELECT * FROM rum_leaf_data_page_items('test_with_addinfo_idx_false', 20);

-- 32-bit machine
SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_false', 32);
SELECT * FROM rum_leaf_entry_page_items('test_with_addinfo_idx_false', 32);

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_false', 22);
SELECT * FROM rum_internal_data_page_items('test_with_addinfo_idx_false', 22);

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_false', 27);
SELECT * FROM rum_leaf_data_page_items('test_with_addinfo_idx_false', 27);

-- Testing on 32-bit and 64-bit on the index with additional information (order_by_attach = true)
CREATE INDEX test_with_addinfo_idx_true ON test_debug_table
USING rum (t rum_tsvector_addon_ops, id)
WITH (attach = 'id', to = 't', order_by_attach='t');

SELECT * FROM rum_metapage_info('test_with_addinfo_idx_true', 0) \gx

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_true', 1);
SELECT * FROM rum_internal_entry_page_items('test_with_addinfo_idx_true', 1);

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_true', 27);
SELECT * FROM rum_leaf_entry_page_items('test_with_addinfo_idx_true', 27);

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_true', 19);
SELECT * FROM rum_internal_data_page_items('test_with_addinfo_idx_true', 19);

SELECT * FROM rum_page_opaque_info('test_with_addinfo_idx_true', 22);
SELECT * FROM rum_leaf_data_page_items('test_with_addinfo_idx_true', 22);

DROP TABLE test_debug_table;

-- Check with the positions of the lexemes
CREATE TABLE test_debug_table_with_weight(t text, a tsvector, r text);

CREATE FUNCTION fill_test_debug_weight_trigger() RETURNS trigger AS $$
begin
new.a :=
setweight(to_tsvector('pg_catalog.english', coalesce(new.r,'')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(new.t,'')), 'D');
return new;
end
$$ LANGUAGE plpgsql;

CREATE TRIGGER test_debug_weight_trigger
BEFORE INSERT OR UPDATE ON test_debug_table_with_weight
FOR EACH ROW EXECUTE PROCEDURE fill_test_debug_weight_trigger();

\copy test_debug_table_with_weight(t,r) FROM 'data/rum_weight.data' DELIMITER '|';
\copy test_debug_table_with_weight(t,r) FROM 'data/rum_weight.data' DELIMITER '|';
\copy test_debug_table_with_weight(t,r) FROM 'data/rum_weight.data' DELIMITER '|';
\copy test_debug_table_with_weight(t,r) FROM 'data/rum_weight.data' DELIMITER '|';
\copy test_debug_table_with_weight(t,r) FROM 'data/rum_weight.data' DELIMITER '|';

DO $$
BEGIN
FOR i IN 1..5000 LOOP
INSERT INTO test_debug_table_with_weight(t,r)
VALUES('As a reward for your reformation I write to you on this precious sheet.', 'write');
END LOOP;
END;
$$;

CREATE INDEX test_with_weight_idx ON test_debug_table_with_weight
USING rum (a rum_tsvector_ops);

SELECT * FROM rum_metapage_info('test_with_weight_idx', 0) \gx

SELECT * FROM rum_page_opaque_info('test_with_weight_idx', 1);
SELECT * FROM rum_internal_entry_page_items('test_with_weight_idx', 1);

-- 64-bit machine
SELECT * FROM rum_page_opaque_info('test_with_weight_idx', 20);
SELECT * FROM rum_leaf_entry_page_items('test_with_weight_idx', 20);

SELECT * FROM rum_page_opaque_info('test_with_weight_idx', 21);
SELECT * FROM rum_internal_data_page_items('test_with_weight_idx', 21);

SELECT * FROM rum_page_opaque_info('test_with_weight_idx', 22);
SELECT * FROM rum_leaf_data_page_items('test_with_weight_idx', 22);

-- 32-bit machine
SELECT * FROM rum_page_opaque_info('test_with_weight_idx', 10);
SELECT * FROM rum_leaf_entry_page_items('test_with_weight_idx', 10);

SELECT * FROM rum_page_opaque_info('test_with_weight_idx', 12);
SELECT * FROM rum_internal_data_page_items('test_with_weight_idx', 12);

SELECT * FROM rum_page_opaque_info('test_with_weight_idx', 14);
SELECT * FROM rum_leaf_data_page_items('test_with_weight_idx', 14);

DROP TABLE test_debug_table_with_weight;
DROP FUNCTION fill_test_debug_weight_trigger;
6 changes: 6 additions & 0 deletions src/rum.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "storage/bufmgr.h"
#include "utils/datum.h"
#include "utils/memutils.h"
#include "tsearch/ts_type.h"

#include "rumsort.h"

Expand Down Expand Up @@ -836,6 +837,8 @@ extern RumItem *rumGetBAEntry(BuildAccumulator *accum,
#define RUM_ADDINFO_JOIN 10
#define RUMNProcs 10

#define LOWERMASK 0x1F

extern PGDLLEXPORT Datum rum_extract_tsvector(PG_FUNCTION_ARGS);
extern PGDLLEXPORT Datum rum_extract_tsquery(PG_FUNCTION_ARGS);
extern PGDLLEXPORT Datum rum_tsvector_config(PG_FUNCTION_ARGS);
Expand All @@ -847,6 +850,9 @@ extern PGDLLEXPORT Datum rum_ts_distance_td(PG_FUNCTION_ARGS);

extern PGDLLEXPORT Datum tsquery_to_distance_query(PG_FUNCTION_ARGS);

extern char* decompress_pos(char *ptr, WordEntryPos *pos);
extern unsigned int count_pos(char *ptr, int len);

/* rum_arr_utils.c */
typedef enum SimilarityType
{
Expand Down
Loading