diff --git a/config.js b/config.js index 057864f62c..eb4440f971 100644 --- a/config.js +++ b/config.js @@ -936,6 +936,9 @@ config.NSFS_GLACIER_DMAPI_PMIG_DAYS = config.S3_RESTORE_REQUEST_MAX_DAYS; // accidental blocking reads from happening. config.NSFS_GLACIER_DMAPI_FINALIZE_RESTORE_ENABLE = false; +config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM = false; +config.NSFS_GLACIER_RECLAIM_INTERVAL = 15 * 60 * 1000; + config.NSFS_STATFS_CACHE_SIZE = 10000; config.NSFS_STATFS_CACHE_EXPIRY_MS = 1 * 1000; @@ -979,7 +982,7 @@ config.NSFS_GLACIER_MIGRATE_LOG_THRESHOLD = 50 * 1024; config.NSFS_GLACIER_METRICS_STATFS_PATHS = []; config.NSFS_GLACIER_METRICS_STATFS_INTERVAL = 60 * 1000; // Refresh statfs value every minute -/** +/** * NSFS_GLACIER_RESERVED_BUCKET_TAGS defines an object of bucket tags which will be reserved * by the system and PUT operations for them via S3 API would be limited - as in they would be * mutable only if specified and only under certain conditions. @@ -990,7 +993,7 @@ config.NSFS_GLACIER_METRICS_STATFS_INTERVAL = 60 * 1000; // Refresh statfs value * default: any, * event: boolean * }>} - * + * * @example * { 'deep-archive-copies': { diff --git a/src/cmd/manage_nsfs.js b/src/cmd/manage_nsfs.js index 388fd0d645..f8ad07c3a2 100644 --- a/src/cmd/manage_nsfs.js +++ b/src/cmd/manage_nsfs.js @@ -879,6 +879,9 @@ async function manage_glacier_operations(action, argv) { case GLACIER_ACTIONS.EXPIRY: await manage_nsfs_glacier.process_expiry(); break; + case GLACIER_ACTIONS.RECLAIM: + await manage_nsfs_glacier.process_reclaim(); + break; default: throw_cli_error(ManageCLIError.InvalidGlacierOperation); } diff --git a/src/manage_nsfs/manage_nsfs_constants.js b/src/manage_nsfs/manage_nsfs_constants.js index 3a3d9561ae..1b88a0bf65 100644 --- a/src/manage_nsfs/manage_nsfs_constants.js +++ b/src/manage_nsfs/manage_nsfs_constants.js @@ -26,6 +26,7 @@ const GLACIER_ACTIONS = Object.freeze({ MIGRATE: 'migrate', RESTORE: 'restore', EXPIRY: 'expiry', + RECLAIM: 'reclaim', }); const DIAGNOSE_ACTIONS = Object.freeze({ @@ -72,6 +73,7 @@ const VALID_OPTIONS_GLACIER = { 'migrate': new Set([ CONFIG_ROOT_FLAG]), 'restore': new Set([ CONFIG_ROOT_FLAG]), 'expiry': new Set([ CONFIG_ROOT_FLAG]), + 'reclaim': new Set([ CONFIG_ROOT_FLAG]), }; const VALID_OPTIONS_DIAGNOSE = { diff --git a/src/manage_nsfs/manage_nsfs_glacier.js b/src/manage_nsfs/manage_nsfs_glacier.js index 7c7f3a5104..10c3452161 100644 --- a/src/manage_nsfs/manage_nsfs_glacier.js +++ b/src/manage_nsfs/manage_nsfs_glacier.js @@ -58,6 +58,19 @@ async function process_expiry() { } } +async function process_reclaim() { + const fs_context = native_fs_utils.get_process_fs_context(); + const backend = Glacier.getBackend(); + + if ( + await backend.low_free_space() || + !(await time_exceeded(fs_context, config.NSFS_GLACIER_RECLAIM_INTERVAL, Glacier.RECLAIM_TIMESTAMP_FILE)) + ) return; + + await backend.perform(prepare_galcier_fs_context(fs_context), "RECLAIM"); + const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.RECLAIM_TIMESTAMP_FILE); + await record_current_time(fs_context, timestamp_file_path); +} /** * time_exceeded returns true if the time between last run recorded in the given @@ -129,3 +142,4 @@ function prepare_galcier_fs_context(fs_context) { exports.process_migrations = process_migrations; exports.process_restores = process_restores; exports.process_expiry = process_expiry; +exports.process_reclaim = process_reclaim; diff --git a/src/native/fs/fs_napi.cpp b/src/native/fs/fs_napi.cpp index 4fd4f895c8..dbdb6207fc 100644 --- a/src/native/fs/fs_napi.cpp +++ b/src/native/fs/fs_napi.cpp @@ -50,9 +50,11 @@ #define GPFS_DMAPI_DOT_IBMOBJ_EA "IBMObj" #define GPFS_DMAPI_DOT_IBMPMIG_EA "IBMPMig" #define GPFS_DMAPI_DOT_IBMTPS_EA "IBMTPS" +#define GPFS_DMAPI_DOT_IBMUID_EA "IBMUID" #define GPFS_DMAPI_XATTR_TAPE_INDICATOR GPFS_DMAPI_XATTR_PREFIX "." GPFS_DMAPI_DOT_IBMOBJ_EA #define GPFS_DMAPI_XATTR_TAPE_PREMIG GPFS_DMAPI_XATTR_PREFIX "." GPFS_DMAPI_DOT_IBMPMIG_EA #define GPFS_DMAPI_XATTR_TAPE_TPS GPFS_DMAPI_XATTR_PREFIX "." GPFS_DMAPI_DOT_IBMTPS_EA +#define GPFS_DMAPI_XATTR_TAPE_UID GPFS_DMAPI_XATTR_PREFIX "." GPFS_DMAPI_DOT_IBMUID_EA // This macro should be used after openning a file // it will autoclose the file using AutoCloser and will throw an error in case of failures @@ -255,6 +257,7 @@ const static std::vector GPFS_DMAPI_XATTRS{ GPFS_DMAPI_XATTR_TAPE_INDICATOR, GPFS_DMAPI_XATTR_TAPE_PREMIG, GPFS_DMAPI_XATTR_TAPE_TPS, + GPFS_DMAPI_XATTR_TAPE_UID, }; const static std::vector USER_XATTRS{ "user.content_type", diff --git a/src/sdk/glacier.js b/src/sdk/glacier.js index d44647d00c..59672ee597 100644 --- a/src/sdk/glacier.js +++ b/src/sdk/glacier.js @@ -21,6 +21,7 @@ class Glacier { static MIGRATE_TIMESTAMP_FILE = 'migrate.timestamp'; static RESTORE_TIMESTAMP_FILE = 'restore.timestamp'; static EXPIRY_TIMESTAMP_FILE = 'expiry.timestamp'; + static RECLAIM_TIMESTAMP_FILE = 'reclaim.timestamp'; /** * XATTR_RESTORE_REQUEST is set to a NUMBER (expiry days) by `restore_object` when @@ -71,10 +72,21 @@ class Glacier { */ static GPFS_DMAPI_XATTR_TAPE_TPS = 'dmapi.IBMTPS'; + /** + * GPFS_DMAPI_XATTR_TAPE_UID xattr contains UID which contains the unique ID of the UID + * + * Example: `1284427297506873931-5499940123615166566-1799306066-279655-0` (here 279655 is + * the inode number) + * + * NOTE: If IBMUID EA exists, that means the file is either migrated or premigrated. + */ + static GPFS_DMAPI_XATTR_TAPE_UID = 'dmapi.IBMUID'; + static MIGRATE_WAL_NAME = 'migrate'; static MIGRATE_STAGE_WAL_NAME = 'stage.migrate'; static RESTORE_WAL_NAME = 'restore'; static RESTORE_STAGE_WAL_NAME = 'stage.restore'; + static RECLAIM_WAL_NAME = 'reclaim'; /** @type {nb.RestoreState} */ static RESTORE_STATUS_CAN_RESTORE = 'CAN_RESTORE'; @@ -86,6 +98,7 @@ class Glacier { static GLACIER_CLUSTER_LOCK = 'glacier.cluster.lock'; static GLACIER_MIGRATE_CLUSTER_LOCK = 'glacier.cluster.migrate.lock'; static GLACIER_RESTORE_CLUSTER_LOCK = 'glacier.cluster.restore.lock'; + static GLACIER_RECLAIM_CLUSTER_LOCK = 'glacier.cluster.reclaim.lock'; static GLACIER_SCAN_LOCK = 'glacier.scan.lock'; /** @@ -181,6 +194,20 @@ class Glacier { throw new Error('Unimplementented'); } + /** + * reclaim cleans up inindexed items in the underlying + * glacier storage + * + * NOTE: This needs to be implemented by each backend. + * @param {nb.NativeFSContext} fs_context + * @param {LogFile} log_file log filename + * @param {(entry: string) => Promise} failure_recorder + * @returns {Promise} + */ + async reclaim(fs_context, log_file, failure_recorder) { + throw new Error('Unimplementented'); + } + /** * low_free_space must return true if the backend has * low free space. @@ -199,7 +226,7 @@ class Glacier { /** * @param {nb.NativeFSContext} fs_context - * @param {"MIGRATION" | "RESTORE" | "EXPIRY"} type + * @param {"MIGRATION" | "RESTORE" | "EXPIRY" | "RECLAIM"} type */ async perform(fs_context, type) { const lock_path = lock_file => path.join(config.NSFS_GLACIER_LOGS_DIR, lock_file); @@ -217,8 +244,8 @@ class Glacier { * ) => Promise} log_cb */ /** - * @param {string} namespace - * @param {log_cb} cb + * @param {string} namespace + * @param {log_cb} cb */ const process_glacier_logs = async (namespace, cb) => { const logs = new PersistentLogger( @@ -266,6 +293,10 @@ class Glacier { this.restore.bind(this), Glacier.GLACIER_RESTORE_CLUSTER_LOCK, ); + } else if (type === 'RECLAIM') { + await native_fs_utils.lock_and_run(fs_context, lock_path(Glacier.GLACIER_RECLAIM_CLUSTER_LOCK), async () => { + await process_glacier_logs(Glacier.RECLAIM_WAL_NAME, this.reclaim.bind(this)); + }); } } diff --git a/src/sdk/glacier_tapecloud.js b/src/sdk/glacier_tapecloud.js index e6bfc5fdd3..67d594f4ef 100644 --- a/src/sdk/glacier_tapecloud.js +++ b/src/sdk/glacier_tapecloud.js @@ -24,6 +24,7 @@ function get_bin_path(bin_name) { class TapeCloudUtils { static MIGRATE_SCRIPT = 'migrate'; static RECALL_SCRIPT = 'recall'; + static RECLAIM_SCRIPT = 'reclaim'; static TASK_SHOW_SCRIPT = 'task_show'; static PROCESS_EXPIRED_SCRIPT = 'process_expired'; static LOW_FREE_SPACE_SCRIPT = 'low_free_space'; @@ -182,6 +183,29 @@ class TapeCloudUtils { } } + /** + * reclaim takes name of a file which contains the list + * of the files to be reclaimed. + * + * reclaim doesn't perform any failure handling and expects the + * underlying scripts to take care of retries. + * + * @param {string} file filename + * @returns {Promise} Indicates success if true + */ + static async reclaim(file) { + try { + dbg.log1("Starting reclaim for file", file); + const out = await exec(`${get_bin_path(TapeCloudUtils.RECLAIM_SCRIPT)} ${file}`, { return_stdout: true }); + dbg.log4("reclaim finished with:", out); + dbg.log1("Finished reclaim for file", file); + } catch (error) { + dbg.error("Failed to run TapeCloudUtils.reclaim for file:", file, "due to error:", error); + } + + return true; + } + static async process_expired() { dbg.log1("Starting process_expired"); const out = await exec(`${get_bin_path(TapeCloudUtils.PROCESS_EXPIRED_SCRIPT)}`, { return_stdout: true }); @@ -444,6 +468,21 @@ class TapeCloudGlacier extends Glacier { } } + /** + * + * @param {nb.NativeFSContext} fs_context + * @param {LogFile} log_file log filename + * @param {(entry: string) => Promise} failure_recorder + * @returns {Promise} + */ + async reclaim(fs_context, log_file, failure_recorder) { + try { + return this._reclaim(log_file.log_path); + } catch (error) { + dbg.error('unexpected error occured while running tapecloud.reclaim:', error); + } + } + async low_free_space() { const result = await exec(get_bin_path(TapeCloudUtils.LOW_FREE_SPACE_SCRIPT), { return_stdout: true }); return result.toLowerCase().trim() === 'true'; @@ -511,6 +550,17 @@ class TapeCloudGlacier extends Glacier { return TapeCloudUtils.process_expired(); } + /** + * _reclaim should perform object reclaim from tape + * + * NOTE: Must be overwritten for tests + * @param {string} file + * @returns {Promise} + */ + async _reclaim(file) { + return TapeCloudUtils.reclaim(file); + } + /** * finalizes the restore by setting the required EAs * diff --git a/src/sdk/namespace_fs.js b/src/sdk/namespace_fs.js index a63bad90b3..bffa69d24d 100644 --- a/src/sdk/namespace_fs.js +++ b/src/sdk/namespace_fs.js @@ -1407,6 +1407,8 @@ class NamespaceFS { const is_disabled_dir_content = this._is_directory_content(file_path, params.key) && this._is_versioning_disabled(); const stat = await target_file.stat(fs_context); + const file_path_stat = config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM && + await nb_native().fs.stat(fs_context, file_path).catch(_.noop); this._verify_encryption(params.encryption, this._get_encryption_info(stat)); const copy_xattr = params.copy_source && params.xattr_copy; @@ -1455,6 +1457,10 @@ class NamespaceFS { dbg.log1('NamespaceFS._finish_upload:', open_mode, file_path, upload_path, fs_xattr); if (!same_inode && !part_upload) { + if (file_path_stat) { + await this.append_to_reclaim_wal(fs_context, file_path, file_path_stat); + } + await this._move_to_dest(fs_context, upload_path, file_path, target_file, open_mode, params.key); } @@ -2126,7 +2132,16 @@ class NamespaceFS { if (files) await this._close_files(fs_context, files.delete_version, undefined, true); } } else { - await native_fs_utils.unlink_ignore_enoent(fs_context, file_path); + try { + const stat = config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM && + await nb_native().fs.stat(fs_context, file_path).catch(dbg.warn.bind(this)); + await nb_native().fs.unlink(fs_context, file_path); + if (stat) { + await this.append_to_reclaim_wal(fs_context, file_path, stat); + } + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'EISDIR') throw err; + } } await this._delete_path_dirs(file_path, fs_context); @@ -3715,6 +3730,28 @@ class NamespaceFS { await NamespaceFS.restore_wal.append(Glacier.getBackend().encode_log(entry)); } + /** + * + * @param {nb.NativeFSContext} fs_context + * @param {string} file_path + * @param {nb.NativeFSStats} [stat] + * @returns + */ + async append_to_reclaim_wal(fs_context, file_path, stat) { + if (!config.NSFS_GLACIER_LOGS_ENABLED || !config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM) return; + + if (!stat) { + stat = await nb_native().fs.stat(fs_context, file_path); + } + + const data = JSON.stringify({ + full_path: file_path, + logical_size: stat.size, + ea: stat.xattr, + }); + await NamespaceFS.reclaim_wal.append(data); + } + static get migrate_wal() { if (!NamespaceFS._migrate_wal) { NamespaceFS._migrate_wal = new PersistentLogger(config.NSFS_GLACIER_LOGS_DIR, Glacier.MIGRATE_WAL_NAME, { @@ -3737,6 +3774,17 @@ class NamespaceFS { return NamespaceFS._restore_wal; } + static get reclaim_wal() { + if (!NamespaceFS._reclaim_wal) { + NamespaceFS._reclaim_wal = new PersistentLogger(config.NSFS_GLACIER_LOGS_DIR, Glacier.RECLAIM_WAL_NAME, { + poll_interval: config.NSFS_GLACIER_LOGS_POLL_INTERVAL, + locking: 'SHARED', + }); + } + + return NamespaceFS._reclaim_wal; + } + //////////////////////////// // LIFECYLE HELPERS // //////////////////////////// @@ -3763,6 +3811,9 @@ class NamespaceFS { this._check_lifecycle_filter_before_deletion(params, stat); const bucket_tmp_dir_path = this.get_bucket_tmpdir_full_path(); await native_fs_utils.safe_unlink(fs_context, file_path, stat, { dir_file, src_file }, bucket_tmp_dir_path); + if (!is_dir_content) { + await this.append_to_reclaim_wal(fs_context, file_path, src_stat).catch(dbg.warn.bind(this)); + } } catch (err) { dbg.log0('_verify_lifecycle_filter_and_unlink err', err.code, err, file_path); if (err.code !== 'ENOENT' && err.code !== 'EISDIR') throw err; @@ -3809,7 +3860,8 @@ NamespaceFS._migrate_wal = null; /** @type {PersistentLogger} */ NamespaceFS._restore_wal = null; +/** @type {PersistentLogger} */ +NamespaceFS._reclaim_wal = null; + module.exports = NamespaceFS; module.exports.multi_buffer_pool = multi_buffer_pool; - - diff --git a/src/test/unit_tests/nsfs/test_nsfs_glacier_backend.js b/src/test/unit_tests/nsfs/test_nsfs_glacier_backend.js index 7be970aeda..86c76f859a 100644 --- a/src/test/unit_tests/nsfs/test_nsfs_glacier_backend.js +++ b/src/test/unit_tests/nsfs/test_nsfs_glacier_backend.js @@ -7,7 +7,9 @@ const path = require('path'); const mocha = require('mocha'); const crypto = require('crypto'); const assert = require('assert'); +const Stream = require('stream'); const os = require('os'); + const config = require('../../../../config'); const NamespaceFS = require('../../../sdk/namespace_fs'); const s3_utils = require('../../../endpoint/s3/s3_utils'); @@ -19,7 +21,7 @@ const { Glacier } = require('../../../sdk/glacier'); const { Semaphore } = require('../../../util/semaphore'); const nb_native = require('../../../util/nb_native'); const { handler: s3_get_bucket } = require('../../../endpoint/s3/ops/s3_get_bucket'); -const Stream = require('stream'); +const lifecycle_utils = require('../../../util/lifecycle_utils'); const inspect = (x, max_arr = 5) => util.inspect(x, { colors: true, depth: null, maxArrayLength: max_arr }); @@ -93,6 +95,7 @@ function get_patched_backend() { backend._migrate = async () => true; backend._recall = async () => true; backend._process_expired = async () => null; + backend._reclaim = async () => true; return backend; } @@ -143,6 +146,7 @@ mocha.describe('nsfs_glacier', function() { config.NSFS_GLACIER_LOGS_ENABLED = true; config.NSFS_GLACIER_LOGS_DIR = await fs.mkdtemp(path.join(os.tmpdir(), 'nsfs-wal-')); + config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM = true; // Replace the logger by custom one @@ -163,6 +167,15 @@ mocha.describe('nsfs_glacier', function() { ); if (restore_wal) await restore_wal.close(); + + const reclaim_wal = NamespaceFS._reclaim_wal; + NamespaceFS._reclaim_wal = new PersistentLogger( + config.NSFS_GLACIER_LOGS_DIR, + Glacier.RECLAIM_WAL_NAME, + { locking: 'EXCLUSIVE', poll_interval: 10 } + ); + + if (reclaim_wal) await reclaim_wal.close(); }); mocha.describe('nsfs_glacier_tapecloud', async function() { @@ -523,6 +536,153 @@ mocha.describe('nsfs_glacier', function() { assert(status.expiry_time.getDate() === expected_expiry.getDate()); }); }); + + mocha.it('object deletion should mark object for tape reclaim', async function() { + const params = { + bucket: upload_bkt, + storage_class: s3_utils.STORAGE_CLASS_GLACIER, + xattr, + }; + const key = i => `${restore_key}_delete_${i}`; + + for (let i = 0; i < 3; i++) { + await glacier_ns.upload_object( + { + ...params, + key: key(i), + source_stream: buffer_utils.buffer_to_read_stream(crypto.randomBytes(100)) + }, + dummy_object_sdk, + ); + } + + // Delete a single object + await glacier_ns.delete_object({...params, key: key(0)}, dummy_object_sdk); + + let found = 0; + await NamespaceFS.reclaim_wal._process(async file => { + await file.collect_and_process(async entry => { + const parsed_entry = JSON.parse(entry); + if (parsed_entry.full_path.endsWith(key(0))) { + found += 1; + } + }); + + return false; + }); + + assert(found === 1); + + // Delete multiple objects + await glacier_ns.delete_multiple_objects({ + objects: [{ key: key(1) }, { key: key(2) }] + }, dummy_object_sdk); + + found = 0; + await NamespaceFS.reclaim_wal._process(async file => { + await file.collect_and_process(async entry => { + const parsed_entry = JSON.parse(entry); + for (let i = 0; i < 3; i++) { + if (parsed_entry.full_path.endsWith(key(i))) { + found += 1; + } + } + }); + + return false; + }); + + assert(found === 3); + }); + + mocha.it('object overwrite should mark object for tape reclaim', async function() { + const data = crypto.randomBytes(100); + const params = { + bucket: upload_bkt, + storage_class: s3_utils.STORAGE_CLASS_GLACIER, + xattr, + days: 1, + source_stream: buffer_utils.buffer_to_read_stream(data) + }; + + const overwritekey = restore_key + "_reclaim_overwrite"; + await glacier_ns.upload_object( + { + ...params, + key: overwritekey, + source_stream: buffer_utils.buffer_to_read_stream(data) + }, + dummy_object_sdk + ); + + let found = 0; + await NamespaceFS.reclaim_wal._process(async file => { + await file.collect_and_process(async entry => { + const parsed_entry = JSON.parse(entry); + if (parsed_entry.full_path.endsWith(overwritekey)) { + found += 1; + } + }); + + return false; + }); + + // Should not be in the log + assert(found === 0); + + // Overwrite the object + await glacier_ns.upload_object( + { + ...params, + key: overwritekey, + source_stream: buffer_utils.buffer_to_read_stream(crypto.randomBytes(200)) + }, + dummy_object_sdk + ); + + found = 0; + await NamespaceFS.reclaim_wal._process(async file => { + await file.collect_and_process(async entry => { + const parsed_entry = JSON.parse(entry); + if (parsed_entry.full_path.endsWith(overwritekey)) { + found += 1; + } + }); + + return false; + }); + + // The overwritten log file must be present + assert(found === 1); + }); + + mocha.it('lifecycle lead deletion should mark object for tape reclaim', async function() { + const data = crypto.randomBytes(100); + const data_buffer = buffer_utils.buffer_to_read_stream(data); + const key = `${restore_key}_lifecycle`; + + await glacier_ns.upload_object({ + bucket: upload_bkt, key: key, source_stream: data_buffer + }, dummy_object_sdk); + const filter_func = lifecycle_utils.build_lifecycle_filter({ filter: { prefix: '' }, expiration: 0 }); + const delete_res = await glacier_ns.delete_multiple_objects({ objects: [{ key }], filter_func }, dummy_object_sdk); + assert(delete_res[0].key === key); + + let found = 0; + await NamespaceFS.reclaim_wal._process(async file => { + await file.collect_and_process(async entry => { + const parsed_entry = JSON.parse(entry); + if (parsed_entry.full_path.endsWith(key)) { + found += 1; + } + }); + + return false; + }); + + // Should be in the log + assert(found === 1); + }); }); mocha.describe('nsfs_glacier_s3_flow', async function() {