Skip to content

Commit 2c9cedf

Browse files
author
Rishitha Kalicheti
committed
{181463388} phys rep alternate metadb system table and message traps
Signed-off-by: Rishitha Kalicheti <[email protected]>
1 parent 1785421 commit 2c9cedf

File tree

13 files changed

+300
-149
lines changed

13 files changed

+300
-149
lines changed

db/phys_rep.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,6 @@ char *gbl_physrep_source_host;
8484
char *gbl_physrep_metadb_name;
8585
char *gbl_physrep_metadb_host;
8686

87-
struct metadb {
88-
char *dbname;
89-
char *host;
90-
char **hosts;
91-
pthread_mutex_t lk;
92-
int host_count;
93-
};
94-
95-
#define MAX_ALTERNATE_METADBS 10
9687
struct metadb gbl_altmetadb[MAX_ALTERNATE_METADBS] = {0};
9788
__thread int altmetadb_index[MAX_ALTERNATE_METADBS] = {0};
9889
int gbl_altmetadb_count = 0;
@@ -189,6 +180,16 @@ void physrep_fanout_dump(void)
189180
Pthread_mutex_unlock(&fanout_lk);
190181
}
191182

183+
void physrep_alt_metadb_print(void)
184+
{
185+
physrep_logmsg(LOGMSG_USER, "Alternate metadb count: %d\n", gbl_altmetadb_count);
186+
for (int i = 0; i < gbl_altmetadb_count; ++i) {
187+
physrep_logmsg(LOGMSG_USER, " metadb %d: dbname %s host %s\n", i,
188+
gbl_altmetadb[i].dbname ? gbl_altmetadb[i].dbname : "NULL",
189+
gbl_altmetadb[i].host ? gbl_altmetadb[i].host : "NULL");
190+
}
191+
}
192+
192193
void cleanup_hosts()
193194
{
194195
DB_Connection *cnct;

db/phys_rep.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@
2020
#include <stdlib.h>
2121
#include <cdb2api.h>
2222

23+
struct metadb {
24+
char *dbname;
25+
char *host;
26+
char **hosts;
27+
pthread_mutex_t lk;
28+
int host_count;
29+
};
30+
31+
#define MAX_ALTERNATE_METADBS 10
32+
2333
extern char *gbl_physrep_source_dbname;
2434
extern char *gbl_physrep_source_host;
2535
extern char *gbl_physrep_metadb_name;
@@ -31,6 +41,8 @@ extern int gbl_deferred_phys_flag;
3141

3242
extern unsigned int gbl_deferred_phys_update;
3343

44+
extern struct metadb gbl_altmetadb[MAX_ALTERNATE_METADBS];
45+
3446
int start_physrep_threads();
3547
int stop_physrep_threads();
3648
int physrep_exited();
@@ -41,5 +53,6 @@ void physrep_fanout_override(const char *dbname, int fanout);
4153
int physrep_fanout_get(const char *dbname);
4254
void physrep_fanout_dump(void);
4355
int physrep_add_alternate_metadb(char *dbname, char *host);
56+
void physrep_alt_metadb_print(void);
4457

4558
#endif /* PHYS_REP_H */

db/process_message.c

Lines changed: 133 additions & 133 deletions
Large diffs are not rendered by default.

docs/pages/operating/physical_replication.md

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pull the physical logs from the source machine and apply it locally.
1313
It is important to note that physreps are applying the logs out-of-band and, thus,
1414
should not be considered part of the source cluster.
1515

16-
In order to enable replication, `replicate_from` must be added to the copycomdb2-d
16+
In order to enable replication, `replicate_from` must be added to the `copycomdb2 -d`
1717
physrep's lrl file. This line can either take a valid Comdb2 cluster tier, a
1818
hostname or a comma-separated list of hostnames (without any space).
1919

@@ -30,8 +30,9 @@ physrep added to the system would add to the overall cost incurred by the source
3030
host/cluster.
3131

3232
In order to avoid having source support all physreps directly, one could setup tiered
33-
replication in which some physical replicants could become the source for the other
34-
replicants, thus keeping some load off of the top-level source host/cluster.
33+
replication (not to be confused with machine classes), in which some physical replicants
34+
could become the source for the other replicants, thus keeping some load off of
35+
the top-level source host/cluster.
3536

3637
```
3738
@@ -49,20 +50,22 @@ Note: The source cluster nodes are always considered at tier 0.
4950
Setting up tiered replication topology requires 2 base tables to maintain the
5051
current state of replication as well as the replication topology.
5152

52-
* comdb2_physreps
53-
* comdb2_physrep_connection
53+
* [comdb2_physreps](#comdb2_physreps)
54+
* [comdb2_physrep_connection](#comdb2_physrep_connections)
5455

5556
These tables automatically get updated to reflect the changes as replicants
5657
join or leave the system and thus are not designed to be manually modified
5758
under normal circumstances. In order to keep the load evenly spread, these table
5859
are consulted to ensure a certain fanout `physrep_fanout` is maintained across all
59-
the nodes. The LSN information in `comdb2_physreps` table is used by all the
60+
the nodes. The LSN (file:offset) information in `comdb2_physreps` table is used by all the
6061
nodes to pause log-deletion.
6162

6263
## Algorithm
6364

6465
On start, a physical replicant executes `sys.physrep.register_replicant()` against
65-
the `physrep_metadb`, which in turn, responds with a list of potential nodes that
66+
the `physrep_metadb`, which in turn, responds with a list of potential nodes
67+
(by doing a graph traversal on nodes (`comdb2_physreps`) and edges (`comdb2_physrep_connections`)
68+
, starting at source as root node/tier 0, ref: `lua/lib/physrep_register_replicant.lua`) that
6669
can be used as the source of physical logs. The replicant then picks up a node from
6770
the list and tries to connect to it. On successful connection, the replicant executes
6871
`sys.physrep.update_registry()` against the `physrep_metadb`, confirming that the
@@ -112,6 +115,21 @@ the hosts listed in that cluster, as represented by the bbcpu.lst.
112115
NOTE: In cross-tier replication, the replication metadata tables must be hosted by a
113116
separate database running in the a lower (development) tier.
114117

118+
### Alternate Metadbs
119+
120+
Physrep setup supports configuring multiple alternate metadbs in addition to the primary
121+
`physrep_metadb`. The idea was to setup an alternate metadb in a separate tier/class (say beta) so that
122+
production tier/class doesn't have to directly interact with lower level tiers (this is an update to the
123+
cross-tier replication model discussed above).
124+
125+
Key gotchas:
126+
* A physical replicant registers (`register_replicant`) only against the primary metadb (never an alternate).
127+
* Metadb does not provide transaction logs, but returns candidate source nodes to replicate from (based on fanout and tree traversal, refer to [algorithm](#algorithm)).
128+
* Alternate metadbs are primarily used by the source (physrep-parent) side to try and establish a reverse connection based on the `comdb2_physrep_sources` table.
129+
* The source cluster writes replication metadata (entries into `comdb2_physreps`, `comdb2_physrep_connections`) to primary physrep_metadb.
130+
* If a source is itself a physrep (tiered chain), it still uses only its primary metadb for its own registration, while reverse connecting outwards based on configured alternate metadbs.
131+
132+
115133
## Physical replication metadata tables
116134

117135
### comdb2_physreps
@@ -125,6 +143,7 @@ CREATE TABLE comdb2_physreps(dbname CSTRING(60),
125143
state CSTRING(60),
126144
UNIQUE (dbname, host))
127145
```
146+
This could be thought of as the registry of all the physical replicants nodes in the system (both sources and replicants).
128147
* Physical replicant states:
129148
* `Pending` : The node has requested to become a physical replicant (registration in-progress)
130149
* `Active` : The node is a physical replicant
@@ -181,6 +200,7 @@ CREATE TABLE comdb2_physrep_sources(dbname CSTRING(60),
181200
* replicate_from dbname @host/tier: This line sets the source host/cluster. It is required for all physical replicants.
182201
* replicate_wait <sec>: Tells the physical replicant to wait for this many seconds before applying the log records.
183202
* physrep_metadb: If set, all the nodes will connect to this database (as against source host/cluster mentioned via `replicate_from`) for replication metadata tables
203+
* alternate_metadb <dbname> <host>: If set, parent node will try to establish reverse connection based on the `comdb2_physrep_sources` table.
184204
* physrep_fanout_override <dbname> <fanout>: This is set on the metadb, and allows per-database overrides of the 'physrep_fanout' tunable. The 'physrep_fanout_override' message-trap allows this to be set dynamically. The 'physrep_fanout_dump' message-trap prints the current overrides.
185205
* physrep_ignore <tables>: All the log records that belong to any of these tables are ignored by physical replicants
186206
* nonames: This configuration forces system database file names to not carry the database name. This setting is required for physical-log based replication to work properly.

sqlite/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ add_library(sqlite
3737
ext/comdb2/opcode_handlers.c
3838
ext/comdb2/partial_datacopies.c
3939
ext/comdb2/permissions.c
40+
ext/comdb2/phys_rep_alt_metadb.c
4041
ext/comdb2/plugins.c
4142
ext/comdb2/procedures.c
4243
ext/comdb2/query_plans.c

sqlite/ext/comdb2/comdb2systblInt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ int systblTableMetricsInit(sqlite3 *db);
107107
int systblApiHistoryInit(sqlite3 *db);
108108
int systblDbInfoInit(sqlite3 *db);
109109
int systblUnusedFilesInit(sqlite3 *db);
110+
int systblPhysrepAltmetadbInit(sqlite3 *db);
110111

111112
/* Simple yes/no answer for booleans */
112113
#define YESNO(x) ((x) ? "Y" : "N")
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
Copyright 2020 Bloomberg Finance L.P.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
#include <stdlib.h>
18+
#include <string.h>
19+
#include <stddef.h>
20+
21+
#include "comdb2.h"
22+
#include "comdb2systblInt.h"
23+
#include "sql.h"
24+
#include "ezsystables.h"
25+
#include "phys_rep.h"
26+
27+
typedef struct systable_physrep_altmetadb_t {
28+
char *dbname;
29+
char *host;
30+
int host_count;
31+
} systable_physrep_altmetadb_t;
32+
33+
static int collect_physrep_alt_metadbs(void **pdata, int *pn) {
34+
systable_physrep_altmetadb_t *rows;
35+
int nrows = 0;
36+
37+
extern int gbl_altmetadb_count;
38+
extern struct metadb gbl_altmetadb[MAX_ALTERNATE_METADBS];
39+
40+
if (gbl_altmetadb_count == 0) {
41+
*pdata = NULL;
42+
*pn = 0;
43+
return 0;
44+
}
45+
46+
rows = calloc(gbl_altmetadb_count, sizeof(systable_physrep_altmetadb_t));
47+
if (!rows) return -1;
48+
49+
// Populate rows from gbl_altmetadb array
50+
for (int i = 0; i < gbl_altmetadb_count; i++) {
51+
rows[i].dbname = strdup(gbl_altmetadb[i].dbname);
52+
rows[i].host = strdup(gbl_altmetadb[i].host);
53+
rows[i].host_count = gbl_altmetadb[i].host_count;
54+
nrows++;
55+
}
56+
*pdata = rows;
57+
*pn = nrows;
58+
return 0;
59+
}
60+
61+
62+
static void free_physrep_altmetadb(void *data, int nrows) {
63+
systable_physrep_altmetadb_t *rows = (systable_physrep_altmetadb_t *)data;
64+
65+
for (int i = 0; i < nrows; i++) {
66+
free(rows[i].dbname);
67+
free(rows[i].host);
68+
}
69+
free(rows);
70+
}
71+
72+
sqlite3_module systblPhysrepAltmetadbModule = {
73+
.access_flag = CDB2_ALLOW_ALL,
74+
};
75+
76+
int systblPhysrepAltmetadbInit(sqlite3 *db) {
77+
return create_system_table(
78+
db, "comdb2_physrep_altmetadb", &systblPhysrepAltmetadbModule,
79+
collect_physrep_alt_metadbs, free_physrep_altmetadb,
80+
sizeof(systable_physrep_altmetadb_t),
81+
CDB2_CSTRING, "dbname", -1, offsetof(systable_physrep_altmetadb_t, dbname),
82+
CDB2_CSTRING, "host", -1, offsetof(systable_physrep_altmetadb_t, host),
83+
CDB2_INTEGER, "host_count", -1, offsetof(systable_physrep_altmetadb_t, host_count),
84+
SYSTABLE_END_OF_FIELDS
85+
);
86+
}

sqlite/ext/comdb2/tables.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ int comdb2SystblInit(
250250
rc = systblDbInfoInit(db);
251251
if (rc == SQLITE_OK)
252252
rc = systblUnusedFilesInit(db);
253+
if (rc == SQLITE_OK)
254+
rc = systblPhysrepAltmetadbInit(db);
253255
#endif
254256
return rc;
255257
}

tests/auth.test/t09.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@
257257
(candidate='comdb2_net_userfuncs')
258258
(candidate='comdb2_opcode_handlers')
259259
(candidate='comdb2_partial_datacopies')
260+
(candidate='comdb2_physrep_altmetadb')
260261
(candidate='comdb2_plugins')
261262
(candidate='comdb2_prepared')
262263
(candidate='comdb2_procedures')

tests/cdb2sql.test/t00.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ unknown @ls sub-command foo
4848
(name='comdb2_net_userfuncs')
4949
(name='comdb2_opcode_handlers')
5050
(name='comdb2_partial_datacopies')
51+
(name='comdb2_physrep_altmetadb')
5152
(name='comdb2_plugins')
5253
(name='comdb2_prepared')
5354
(name='comdb2_procedures')

0 commit comments

Comments
 (0)