@@ -8,23 +8,33 @@ Code related to permanently deleting projects.
8
8
*/
9
9
10
10
import getLogger from "@cocalc/backend/logger" ;
11
+ import { newCounter } from "@cocalc/backend/metrics" ;
11
12
import getPool from "@cocalc/database/pool" ;
12
13
import { getServerSettings } from "@cocalc/database/settings" ;
13
14
import { callback2 } from "@cocalc/util/async-utils" ;
14
15
import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults" ;
15
16
import { minutes_ago } from "@cocalc/util/misc" ;
16
- import { bulk_delete } from "./bulk-delete" ;
17
+ import { bulkDelete } from "./bulk-delete" ;
17
18
import { PostgreSQL } from "./types" ;
18
19
19
20
const log = getLogger ( "db:delete-projects" ) ;
20
21
22
+ const delete_projects_prom = newCounter (
23
+ "database" ,
24
+ "delete_projects_total" ,
25
+ "Deleting projects and associated data operations counter." ,
26
+ [ "op" ] ,
27
+ ) ;
28
+
21
29
/*
22
30
Permanently delete from the database all project records, where the
23
31
project is explicitly deleted already (so the deleted field is true).
24
32
Call this function to setup projects for permanent deletion. This blanks
25
33
the user field so the user no longer can access the project, and we don't
26
34
know that the user had anything to do with the project. A separate phase
27
35
later then purges these projects from disk as well as the database.
36
+
37
+ TODO:it's referenced from postgres-server-queries.coffee, but is it actually used anywhere?
28
38
*/
29
39
export async function permanently_unlink_all_deleted_projects_of_user (
30
40
db : PostgreSQL ,
@@ -85,7 +95,7 @@ FROM projects as p
85
95
INNER JOIN syncstrings as s
86
96
ON p.project_id = s.project_id
87
97
WHERE p.deleted = true
88
- AND users IS NULL
98
+ AND p. users IS NULL
89
99
AND p.state ->> 'state' != 'deleted'
90
100
ORDER BY
91
101
p.project_id, s.string_id
@@ -117,6 +127,7 @@ export async function cleanup_old_projects_data(
117
127
const { rows } = await pool . query ( Q_CLEANUP_SYNCSTRINGS ) ;
118
128
119
129
let num = 0 ;
130
+ let num2 = 0 ;
120
131
let pid = "" ;
121
132
122
133
for ( const row of rows ) {
@@ -129,84 +140,28 @@ export async function cleanup_old_projects_data(
129
140
L ( `deleting syncstring ${ project_id } /${ string_id } ` ) ;
130
141
num += 1 ;
131
142
await callback2 ( db . delete_syncstring , { string_id } ) ;
143
+ delete_projects_prom . labels ( "syncstring" ) . inc ( ) ;
132
144
133
145
// wait a bit after deleting syncstrings, e.g. to let the standby db catch up
134
- await new Promise ( ( done ) => setTimeout ( done , 100 ) ) ;
146
+ await new Promise ( ( done ) => setTimeout ( done , 10 ) ) ;
135
147
136
148
// Q_CLEANUP_SYNCSTRINGS orders by project_id, hence we trigger project specific actions when the id changes
137
149
if ( pid != project_id ) {
138
150
pid = project_id ;
139
151
const L2 = L0 . extend ( project_id ) . debug ;
152
+ delete_projects_prom . labels ( "project" ) . inc ( ) ;
153
+ num2 += 1 ;
154
+ let delRows = 0 ;
140
155
141
156
if ( on_prem ) {
142
- L2 ( `cleanup_old_projects_data for project_id= ${ project_id } ` ) ;
157
+ L2 ( `delete all project files ` ) ;
143
158
// TODO: this only works on-prem, and requires the project files to be mounted
144
159
145
- L2 ( `deleting all shared files in project ${ project_id } ` ) ;
160
+ L2 ( `deleting all shared files` ) ;
146
161
// TODO: do it directly like above, and also get rid of all those shares in the database
147
162
148
- const delPublicPaths = await bulk_delete ( {
149
- table : "public_paths" ,
150
- field : "project_id" ,
151
- value : project_id ,
152
- } ) ;
153
- L2 ( `deleted public_paths ${ delPublicPaths . rowsDeleted } entries` ) ;
154
-
155
- const delProjectLog = await bulk_delete ( {
156
- table : "project_log" ,
157
- field : "project_id" ,
158
- value : project_id ,
159
- } ) ;
160
- L2 ( `deleted project_log ${ delProjectLog . rowsDeleted } entries` ) ;
161
-
162
- const delFileUse = await bulk_delete ( {
163
- table : "file_use" ,
164
- field : "project_id" ,
165
- value : project_id ,
166
- } ) ;
167
- L2 ( `deleted file_use ${ delFileUse . rowsDeleted } entries` ) ;
168
-
169
- const delAccessLog = await bulk_delete ( {
170
- table : "file_access_log" ,
171
- field : "project_id" ,
172
- value : project_id ,
173
- } ) ;
174
- L2 ( `deleted file_access_log ${ delAccessLog . rowsDeleted } entries` ) ;
175
-
176
- const delJupyterApiLog = await bulk_delete ( {
177
- table : "jupyter_api_log" ,
178
- field : "project_id" ,
179
- value : project_id ,
180
- } ) ;
181
- L2 ( `deleted jupyter_api_log ${ delJupyterApiLog . rowsDeleted } entries` ) ;
182
-
183
- for ( const field of [
184
- "target_project_id" ,
185
- "source_project_id" ,
186
- ] as const ) {
187
- const delCopyPaths = await bulk_delete ( {
188
- table : "copy_paths" ,
189
- field,
190
- value : project_id ,
191
- } ) ;
192
- L2 ( `deleted copy_paths/${ field } ${ delCopyPaths . rowsDeleted } entries` ) ;
193
- }
194
-
195
- const delListings = await bulk_delete ( {
196
- table : "listings" ,
197
- field : "project_id" ,
198
- id : "project_id" , // TODO listings has a more complex ID, is this a problem?
199
- value : project_id ,
200
- } ) ;
201
- L2 ( `deleted ${ delListings . rowsDeleted } listings` ) ;
202
-
203
- const delInviteTokens = await bulk_delete ( {
204
- table : "project_invite_tokens" ,
205
- field : "project_id" ,
206
- value : project_id ,
207
- id : "token" ,
208
- } ) ;
209
- L2 ( `deleted ${ delInviteTokens . rowsDeleted } entries` ) ;
163
+ // for now, on-prem only as well. This gets rid of all sorts of data in tables specific to the given project.
164
+ delRows += await delete_associated_project_data ( L2 , project_id ) ;
210
165
}
211
166
212
167
// now, that we're done with that project, mark it as state.state ->> 'deleted'
@@ -215,6 +170,73 @@ export async function cleanup_old_projects_data(
215
170
project_id,
216
171
state : "deleted" ,
217
172
} ) ;
173
+ L2 (
174
+ `finished deleting project data | deleted ${ delRows } entries | setting state.state="deleted"` ,
175
+ ) ;
218
176
}
219
177
}
178
+ L ( `finished deleting ${ num } syncstrings and data of ${ num2 } projects` ) ;
179
+ }
180
+
181
+ async function delete_associated_project_data (
182
+ L2 ,
183
+ project_id : string ,
184
+ ) : Promise < number > {
185
+ let total = 0 ;
186
+ // collecting tables, where the primary key is the default (i.e. "id") and
187
+ // the field to check is always called "project_id"
188
+ const tables = [
189
+ "public_paths" ,
190
+ "project_log" ,
191
+ "file_use" ,
192
+ "file_access_log" ,
193
+ "jupyter_api_log" ,
194
+ "openai_chatgpt_log" ,
195
+ ] as const ;
196
+
197
+ for ( const table of tables ) {
198
+ const { rowsDeleted } = await bulkDelete ( {
199
+ table,
200
+ field : "project_id" ,
201
+ value : project_id ,
202
+ } ) ;
203
+ total += rowsDeleted ;
204
+ L2 ( `deleted ${ table } ${ rowsDeleted } entries` ) ;
205
+ }
206
+
207
+ // these tables are different, i.e. another id, or the field to check the project_id value against is called differently
208
+
209
+ for ( const field of [ "target_project_id" , "source_project_id" ] as const ) {
210
+ const { rowsDeleted } = await bulkDelete ( {
211
+ table : "copy_paths" ,
212
+ field,
213
+ value : project_id ,
214
+ } ) ;
215
+ total += rowsDeleted ;
216
+ L2 ( `deleted copy_paths/${ field } ${ rowsDeleted } entries` ) ;
217
+ }
218
+
219
+ {
220
+ const { rowsDeleted } = await bulkDelete ( {
221
+ table : "listings" ,
222
+ field : "project_id" ,
223
+ id : "project_id" , // TODO listings has a more complex ID, is this a problem?
224
+ value : project_id ,
225
+ } ) ;
226
+ total += rowsDeleted ;
227
+ L2 ( `deleted ${ rowsDeleted } listings` ) ;
228
+ }
229
+
230
+ {
231
+ const { rowsDeleted } = await bulkDelete ( {
232
+ table : "project_invite_tokens" ,
233
+ field : "project_id" ,
234
+ value : project_id ,
235
+ id : "token" ,
236
+ } ) ;
237
+ total += rowsDeleted ;
238
+ L2 ( `deleted ${ rowsDeleted } entries` ) ;
239
+ }
240
+
241
+ return total ;
220
242
}
0 commit comments