Skip to content

Commit c725c35

Browse files
committed
database/delete-project: refactor/fixes
1 parent 0857338 commit c725c35

File tree

5 files changed

+102
-74
lines changed

5 files changed

+102
-74
lines changed

src/packages/backend/metrics.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { Counter, Gauge, Histogram } from "prom-client";
22

3-
type Aspect = "db" | "database" | "server" | "llm";
3+
type Aspect = "db" | "database" | "server" | "llm" | "database";
44

55
function withPrefix(aspect: Aspect, name: string): string {
66
return `cocalc_${aspect}_${name}`;
@@ -13,7 +13,7 @@ export function newCounter(
1313
name: string,
1414
help: string,
1515
labelNames: string[] = [],
16-
) {
16+
): Counter<string> {
1717
name = withPrefix(aspect, name);
1818
const key = `counter-${name}`;
1919
if (cache[key] != null) {

src/packages/database/postgres/bulk-delete.test.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import getPool, { initEphemeralDatabase } from "@cocalc/database/pool";
77
import { uuid } from "@cocalc/util/misc";
8-
import { bulk_delete } from "./bulk-delete";
8+
import { bulkDelete } from "./bulk-delete";
99

1010
beforeAll(async () => {
1111
await initEphemeralDatabase({});
@@ -41,7 +41,7 @@ describe("bulk delete", () => {
4141
);
4242
expect(num1.rows[0].num).toEqual(N);
4343

44-
const res = await bulk_delete({
44+
const res = await bulkDelete({
4545
table: "project_log",
4646
field: "project_id",
4747
value: project_id,

src/packages/database/postgres/bulk-delete.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ WHERE ${ID} IN (
3737
)`;
3838
}
3939

40-
export async function bulk_delete(opts: Opts): Ret {
40+
export async function bulkDelete(opts: Opts): Ret {
4141
const { table, field, value, id = "id", maxUtilPct = 10 } = opts;
4242
let { limit = 1024 } = opts;
4343
// assert table name is a key in SCHEMA

src/packages/database/postgres/delete-projects.ts

+89-67
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,33 @@ Code related to permanently deleting projects.
88
*/
99

1010
import getLogger from "@cocalc/backend/logger";
11+
import { newCounter } from "@cocalc/backend/metrics";
1112
import getPool from "@cocalc/database/pool";
1213
import { getServerSettings } from "@cocalc/database/settings";
1314
import { callback2 } from "@cocalc/util/async-utils";
1415
import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults";
1516
import { minutes_ago } from "@cocalc/util/misc";
16-
import { bulk_delete } from "./bulk-delete";
17+
import { bulkDelete } from "./bulk-delete";
1718
import { PostgreSQL } from "./types";
1819

1920
const log = getLogger("db:delete-projects");
2021

22+
const delete_projects_prom = newCounter(
23+
"database",
24+
"delete_projects_total",
25+
"Deleting projects and associated data operations counter.",
26+
["op"],
27+
);
28+
2129
/*
2230
Permanently delete from the database all project records, where the
2331
project is explicitly deleted already (so the deleted field is true).
2432
Call this function to setup projects for permanent deletion. This blanks
2533
the user field so the user no longer can access the project, and we don't
2634
know that the user had anything to do with the project. A separate phase
2735
later then purges these projects from disk as well as the database.
36+
37+
TODO:it's referenced from postgres-server-queries.coffee, but is it actually used anywhere?
2838
*/
2939
export async function permanently_unlink_all_deleted_projects_of_user(
3040
db: PostgreSQL,
@@ -85,7 +95,7 @@ FROM projects as p
8595
INNER JOIN syncstrings as s
8696
ON p.project_id = s.project_id
8797
WHERE p.deleted = true
88-
AND users IS NULL
98+
AND p.users IS NULL
8999
AND p.state ->> 'state' != 'deleted'
90100
ORDER BY
91101
p.project_id, s.string_id
@@ -117,6 +127,7 @@ export async function cleanup_old_projects_data(
117127
const { rows } = await pool.query(Q_CLEANUP_SYNCSTRINGS);
118128

119129
let num = 0;
130+
let num2 = 0;
120131
let pid = "";
121132

122133
for (const row of rows) {
@@ -129,84 +140,28 @@ export async function cleanup_old_projects_data(
129140
L(`deleting syncstring ${project_id}/${string_id}`);
130141
num += 1;
131142
await callback2(db.delete_syncstring, { string_id });
143+
delete_projects_prom.labels("syncstring").inc();
132144

133145
// wait a bit after deleting syncstrings, e.g. to let the standby db catch up
134-
await new Promise((done) => setTimeout(done, 100));
146+
await new Promise((done) => setTimeout(done, 10));
135147

136148
// Q_CLEANUP_SYNCSTRINGS orders by project_id, hence we trigger project specific actions when the id changes
137149
if (pid != project_id) {
138150
pid = project_id;
139151
const L2 = L0.extend(project_id).debug;
152+
delete_projects_prom.labels("project").inc();
153+
num2 += 1;
154+
let delRows = 0;
140155

141156
if (on_prem) {
142-
L2(`cleanup_old_projects_data for project_id=${project_id}`);
157+
L2(`delete all project files`);
143158
// TODO: this only works on-prem, and requires the project files to be mounted
144159

145-
L2(`deleting all shared files in project ${project_id}`);
160+
L2(`deleting all shared files`);
146161
// TODO: do it directly like above, and also get rid of all those shares in the database
147162

148-
const delPublicPaths = await bulk_delete({
149-
table: "public_paths",
150-
field: "project_id",
151-
value: project_id,
152-
});
153-
L2(`deleted public_paths ${delPublicPaths.rowsDeleted} entries`);
154-
155-
const delProjectLog = await bulk_delete({
156-
table: "project_log",
157-
field: "project_id",
158-
value: project_id,
159-
});
160-
L2(`deleted project_log ${delProjectLog.rowsDeleted} entries`);
161-
162-
const delFileUse = await bulk_delete({
163-
table: "file_use",
164-
field: "project_id",
165-
value: project_id,
166-
});
167-
L2(`deleted file_use ${delFileUse.rowsDeleted} entries`);
168-
169-
const delAccessLog = await bulk_delete({
170-
table: "file_access_log",
171-
field: "project_id",
172-
value: project_id,
173-
});
174-
L2(`deleted file_access_log ${delAccessLog.rowsDeleted} entries`);
175-
176-
const delJupyterApiLog = await bulk_delete({
177-
table: "jupyter_api_log",
178-
field: "project_id",
179-
value: project_id,
180-
});
181-
L2(`deleted jupyter_api_log ${delJupyterApiLog.rowsDeleted} entries`);
182-
183-
for (const field of [
184-
"target_project_id",
185-
"source_project_id",
186-
] as const) {
187-
const delCopyPaths = await bulk_delete({
188-
table: "copy_paths",
189-
field,
190-
value: project_id,
191-
});
192-
L2(`deleted copy_paths/${field} ${delCopyPaths.rowsDeleted} entries`);
193-
}
194-
195-
const delListings = await bulk_delete({
196-
table: "listings",
197-
field: "project_id",
198-
id: "project_id", // TODO listings has a more complex ID, is this a problem?
199-
value: project_id,
200-
});
201-
L2(`deleted ${delListings.rowsDeleted} listings`);
202-
203-
const delInviteTokens = await bulk_delete({
204-
table: "project_invite_tokens",
205-
field: "project_id",
206-
value: project_id,
207-
id: "token",
208-
});
209-
L2(`deleted ${delInviteTokens.rowsDeleted} entries`);
163+
// for now, on-prem only as well. This gets rid of all sorts of data in tables specific to the given project.
164+
delRows += await delete_associated_project_data(L2, project_id);
210165
}
211166

212167
// now, that we're done with that project, mark it as state.state ->> 'deleted'
@@ -215,6 +170,73 @@ export async function cleanup_old_projects_data(
215170
project_id,
216171
state: "deleted",
217172
});
173+
L2(
174+
`finished deleting project data | deleted ${delRows} entries | setting state.state="deleted"`,
175+
);
218176
}
219177
}
178+
L(`finished deleting ${num} syncstrings and data of ${num2} projects`);
179+
}
180+
181+
async function delete_associated_project_data(
182+
L2,
183+
project_id: string,
184+
): Promise<number> {
185+
let total = 0;
186+
// collecting tables, where the primary key is the default (i.e. "id") and
187+
// the field to check is always called "project_id"
188+
const tables = [
189+
"public_paths",
190+
"project_log",
191+
"file_use",
192+
"file_access_log",
193+
"jupyter_api_log",
194+
"openai_chatgpt_log",
195+
] as const;
196+
197+
for (const table of tables) {
198+
const { rowsDeleted } = await bulkDelete({
199+
table,
200+
field: "project_id",
201+
value: project_id,
202+
});
203+
total += rowsDeleted;
204+
L2(`deleted ${table} ${rowsDeleted} entries`);
205+
}
206+
207+
// these tables are different, i.e. another id, or the field to check the project_id value against is called differently
208+
209+
for (const field of ["target_project_id", "source_project_id"] as const) {
210+
const { rowsDeleted } = await bulkDelete({
211+
table: "copy_paths",
212+
field,
213+
value: project_id,
214+
});
215+
total += rowsDeleted;
216+
L2(`deleted copy_paths/${field} ${rowsDeleted} entries`);
217+
}
218+
219+
{
220+
const { rowsDeleted } = await bulkDelete({
221+
table: "listings",
222+
field: "project_id",
223+
id: "project_id", // TODO listings has a more complex ID, is this a problem?
224+
value: project_id,
225+
});
226+
total += rowsDeleted;
227+
L2(`deleted ${rowsDeleted} listings`);
228+
}
229+
230+
{
231+
const { rowsDeleted } = await bulkDelete({
232+
table: "project_invite_tokens",
233+
field: "project_id",
234+
value: project_id,
235+
id: "token",
236+
});
237+
total += rowsDeleted;
238+
L2(`deleted ${rowsDeleted} entries`);
239+
}
240+
241+
return total;
220242
}

src/packages/hub/run/delete-projects.js

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
#!/usr/bin/env node
2+
23
/*
34
Periodically delete projects.
45
5-
TODO: For now, this just calls the unlink function. Later on it
6-
should do more (actually delete data, etc.).
6+
STATUS:
7+
For now, this just calls the unlink function and deletes all assocated syncstrings and data.
8+
In "onprem" mode, this also entries in various tables, which contain data specific to the deleted projects.
9+
10+
TESTING: to run this in development and see logging, call it like that:
11+
./src/packages/hub$ env DEBUG_CONSOLE=yes DEBUG=cocalc:debug:db:* pnpm cocalc-hub-delete-projects
712
*/
813

914
const postgres = require("@cocalc/database");
@@ -16,6 +21,7 @@ async function update() {
1621
console.log("unlinking old deleted projects...");
1722
try {
1823
await db.unlink_old_deleted_projects();
24+
// limit the max runtime to half the interval time
1925
const max_run_m = (INTERVAL_MS / 2) / (1000 * 60)
2026
await db.cleanup_old_projects_data(max_run_m);
2127
} catch (err) {

0 commit comments

Comments
 (0)