Skip to content

Commit

Permalink
delete records before force load dataset & add opt param to prevent t…
Browse files Browse the repository at this point in the history
…his action

Signed-off-by: cbh778899 <[email protected]>
  • Loading branch information
cbh778899 committed Aug 16, 2024
1 parent 5b2abbd commit 2405bcb
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
4 changes: 2 additions & 2 deletions actions/embedding.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ export async function uploadDataset(req, res) {
return;
}

const { name, json, url, force } = req.body;
const { name, json, url, force, keep_records } = req.body;
if(!name || (!json && !url)) {
res.status(422).send("Please specify dataset name and one choice of json / url.");
return;
Expand All @@ -85,7 +85,7 @@ export async function uploadDataset(req, res) {
await getDatasetFromURL(url) :
await parseDatasetWithoutVector(json);

await loader(dataset);
await loader(dataset, force && keep_records);
} catch(error) {
res.status(500).send(error.message)
}
Expand Down
23 changes: 14 additions & 9 deletions database/rag-inference.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,14 @@ export async function parseDatasetWithoutVector(dataset) {

/**
* Load a given dataset into database.
* If `force` specified, it will load the dataset without check whether it is already in system.
* If `force` specified, it will delete the old loaded dataset if applicable and load the dataset.
* @param {String} dataset_name The dataset name to load
* @param {Boolean} force Specify whether to force load the dataset, default `false`.
* @returns {Promise<Promise|null>}
* If dataset is loaded and `force` not specified, this will return null.\
* Otherwise returns function takes a dataset array, which should in the format of\
* `[{identifier:"",context:"",vector:[...]}]`
* Otherwise returns function takes parameters:
* * `dataset` - The dataset to be loaded, in the format of `[{identifier:"",context:"",vector:[...]}]`
* * `keep_records` - Optional, default `false`, set to `true` to prevent remove existed data from DB if `force` is `true`
*
* @example
* const loader = await loadDataset("<your-dataset-name>");
Expand All @@ -96,12 +97,16 @@ export async function loadDataset(dataset_name, force = false) {

if(dataset_loaded && !force) return null;

return async function(dataset) {
const adding_dataset =
dataset.map(({identifier, context, vector})=>{
return { identifier, context, vector, dataset_name }
})
await dataset_table.add(adding_dataset)
return async function(dataset, keep_records = false) {
if(dataset_loaded && force && !keep_records) {
await dataset_table.delete(`dataset_name="${dataset_name}"`);
}

await dataset_table.add(
dataset.map(({identifier, context, vector})=>{
return { identifier, context, vector, dataset_name }
}
))

if(!dataset_loaded) {
await system_table.add([{title: "loaded_dataset_name", value: dataset_name}])
Expand Down
9 changes: 8 additions & 1 deletion swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,14 @@
},
"force": {
"type": "boolean",
"description": "If specified, force load dataset into database.",
"description": "If specified, delete previous records of current dataset and load dataset into database.",
"examples": [
false
]
},
"keep_records": {
"type": "boolean",
"description": "If `force` is specified and `keep_records` is `true`, keeps records and load data into database. This can cause duplicate data in DB.",
"examples": [
false
]
Expand Down

0 comments on commit 2405bcb

Please sign in to comment.