Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
3235803
feat: added initial tdigest.cdf logic
SharonIV0x86 Sep 6, 2025
f3bdd77
Merge branch 'apache:unstable' into feat/tdigest.cdf-command
SharonIV0x86 Sep 6, 2025
9e26d86
fix: fixed typo in inputs and registered tdigest.cdf in MakeCmdAttr
SharonIV0x86 Sep 6, 2025
7366edb
Merge branch 'apache:unstable' into feat/tdigest.cdf-command
SharonIV0x86 Sep 9, 2025
d3a44cf
Merge branch 'apache:unstable' into feat/tdigest.cdf-command
SharonIV0x86 Sep 11, 2025
262574f
fix: fixed MakeCmdAttr to read-only
SharonIV0x86 Sep 11, 2025
b64389d
fix: used util::float2String instead of to_string.
SharonIV0x86 Sep 12, 2025
4731484
Merge branch 'unstable' into feat/tdigest.cdf-command
SharonIV0x86 Sep 14, 2025
f361fc4
Added gocase unit test case.
SharonIV0x86 Sep 14, 2025
6753870
Merge branch 'unstable' into feat/tdigest.cdf-command
SharonIV0x86 Sep 22, 2025
cb86cb3
progress: added basic c++ unit test case for CDF
SharonIV0x86 Sep 22, 2025
fcd7d5a
Merge branch 'unstable' into feat/tdigest.cdf-command
SharonIV0x86 Sep 26, 2025
e0451ba
progress: added more c++ test cases for cdf
SharonIV0x86 Sep 26, 2025
1d425a0
fix: fixed go test file formatting, distorted from the prev commit e0…
SharonIV0x86 Sep 26, 2025
31423cd
Merge branch 'unstable' into feat/tdigest.cdf-command
SharonIV0x86 Sep 27, 2025
67923f4
fix clang-tidy error
LindaSummer Sep 28, 2025
4d1988b
Merge branch 'unstable' into feat/tdigest.cdf-command
LindaSummer Sep 28, 2025
ec52575
Merge branch 'unstable' into feat/tdigest.cdf-command
SharonIV0x86 Oct 4, 2025
d23ad76
fix: go formatting
SharonIV0x86 Oct 5, 2025
cbff56e
Merge branch 'unstable' into feat/tdigest.cdf-command
SharonIV0x86 Oct 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion src/commands/cmd_tdigest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,51 @@ class CommandTDigestMerge : public Commander {
std::vector<std::string> source_keys_;
TDigestMergeOptions options_;
};
class CommandTDigestCDF : public Commander {
Status Parse(const std::vector<std::string> &args) override {
key_name_ = args[1];
if (args.size() == 2) return {Status::RedisParseErr, errWrongNumOfArguments};
Copy link

Copilot AI Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The command registration specifies minimum 4 arguments (-4), but this validation only checks for exactly 2 arguments. It should validate that there are at least 3 arguments (command + key + at least one value).

Suggested change
if (args.size() == 2) return {Status::RedisParseErr, errWrongNumOfArguments};
if (args.size() < 3) return {Status::RedisParseErr, errWrongNumOfArguments};

Copilot uses AI. Check for mistakes.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could check the vector size at beginning.

values_.reserve(args.size() - 2);
for (size_t i = 2; i < args.size(); i++) {
auto value = ParseFloat(args[i]);
if (!value) {
return {Status::RedisParseErr, errValueIsNotFloat};
}
values_.push_back(*value);
}
return Status::OK();
}
Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override {
TDigest tdigest(srv->storage, conn->GetNamespace());
std::vector<std::string> cdf_result;
TDigestCDFResult result;
TDigestMetadata metadata;
auto meta_status = tdigest.GetMetaData(ctx, key_name_, &metadata);
if (!meta_status.ok()) {
if (meta_status.IsNotFound()) {
return {Status::RedisExecErr, errKeyNotFound};
}
return {Status::RedisExecErr, meta_status.ToString()};
}
if (metadata.total_observations == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tested with Redis Docker, it should be the ["nan"] vector with the same size as the input.

*output = redis::MultiBulkString(RESP::v2, cdf_result);
return Status::OK();
}
auto s = tdigest.CDF(ctx, key_name_, values_, &result);
if (!s.ok()) {
return {Status::RedisExecErr, s.ToString()};
}
for (const auto &val : result.cdf_values) {
cdf_result.push_back(std::to_string(val));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could use util::Float2String as the quantile command.

}
*output = redis::MultiBulkString(RESP::v2, cdf_result);
return Status::OK();
}

private:
std::string key_name_;
std::vector<double> values_;
};
std::vector<CommandKeyRange> GetMergeKeyRange(const std::vector<std::string> &args) {
auto numkeys = ParseInt<int>(args[2], 10).ValueOr(0);
return {{1, 1, 1}, {3, 2 + numkeys, 1}};
Expand All @@ -371,5 +415,6 @@ REDIS_REGISTER_COMMANDS(TDigest, MakeCmdAttr<CommandTDigestCreate>("tdigest.crea
MakeCmdAttr<CommandTDigestMin>("tdigest.min", 2, "read-only", 1, 1, 1),
MakeCmdAttr<CommandTDigestQuantile>("tdigest.quantile", -3, "read-only", 1, 1, 1),
MakeCmdAttr<CommandTDigestReset>("tdigest.reset", 2, "write", 1, 1, 1),
MakeCmdAttr<CommandTDigestMerge>("tdigest.merge", -4, "write", GetMergeKeyRange));
MakeCmdAttr<CommandTDigestMerge>("tdigest.merge", -4, "write", GetMergeKeyRange),
MakeCmdAttr<CommandTDigestCDF>("tdigest.cdf", -4, "write", 1, 1, 1));
Copy link

Copilot AI Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TDIGEST.CDF command should be marked as 'read-only' instead of 'write' since it only reads data and doesn't modify the T-Digest structure.

Suggested change
MakeCmdAttr<CommandTDigestCDF>("tdigest.cdf", -4, "write", 1, 1, 1));
MakeCmdAttr<CommandTDigestCDF>("tdigest.cdf", -4, "read-only", 1, 1, 1));

Copilot uses AI. Check for mistakes.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a read-only command, but with a lock guard in merge action.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, i will work on it, i was unsure about how the command registration works and what exactly those parameters do in the MakeCmdAttr so it will be helpful if you could explain it in a little detail.
Thanks.

Copy link
Member

@LindaSummer LindaSummer Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can refer to PR #2620 for further understanding of the MakeCmdAttr flags.

SetLastCmd(cmd_name);
{
std::optional<MultiLockGuard> guard;
if (cmd_flags & kCmdWrite) {
std::vector<std::string> lock_keys;
attributes->ForEachKeyRange(
[&lock_keys, this](const std::vector<std::string> &args, const CommandKeyRange &key_range) {
key_range.ForEachKey(
[&, this](const std::string &key) {
auto ns_key = ComposeNamespaceKey(ns_, key, srv_->storage->IsSlotIdEncoded());
lock_keys.emplace_back(std::move(ns_key));
},
args);
},
cmd_tokens);
guard.emplace(srv_->storage->GetLockManager(), lock_keys);
}
engine::Context ctx(srv_->storage);

Copy link
Contributor Author

@SharonIV0x86 SharonIV0x86 Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made this change.

} // namespace redis
60 changes: 60 additions & 0 deletions src/types/redis_tdigest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,66 @@ rocksdb::Status TDigest::Merge(engine::Context& ctx, const Slice& dest_digest,

return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch());
}
rocksdb::Status TDigest::CDF(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs,
TDigestCDFResult* result) {
auto ns_key = AppendNamespacePrefix(digest_name);
TDigestMetadata metadata;
{
LockGuard guard(storage_->GetLockManager(), ns_key);

if (auto status = getMetaDataByNsKey(ctx, ns_key, &metadata); !status.ok()) {
return status;
}

if (metadata.unmerged_nodes > 0) {
auto batch = storage_->GetWriteBatchBase();
WriteBatchLogData log_data(kRedisTDigest);
if (auto status = batch->PutLogData(log_data.Encode()); !status.ok()) {
return status;
}

if (auto status = mergeCurrentBuffer(ctx, ns_key, batch, &metadata); !status.ok()) {
return status;
}

std::string metadata_bytes;
metadata.Encode(&metadata_bytes);
if (auto status = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); !status.ok()) {
return status;
}

if (auto status = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); !status.ok()) {
return status;
}
ctx.RefreshLatestSnapshot();
}
}
std::vector<Centroid> centroids;
if (auto status = dumpCentroids(ctx, ns_key, metadata, &centroids); !status.ok()) {
return status;
}
auto dump_centroids = DummyCentroids(metadata, centroids);
double total_weight = dump_centroids.TotalWeight();
std::vector<double> results;
for (double val : inputs) {
auto iter_begin = dump_centroids.Begin();
auto iter_end = dump_centroids.End();
double eq_count = 0;
double smaller_count = 0;
for (; iter_begin->Valid(); iter_begin->Next()) {
auto current_centroid = iter_begin->GetCentroid();
if (val > current_centroid->mean) {
smaller_count++;
} else if (val == current_centroid->mean) {
eq_count++;
}
}
double cdf_val = (smaller_count / total_weight) + ((eq_count / 2) / total_weight);
Comment on lines +464 to +474
Copy link

Copilot AI Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CDF calculation is incorrect. It's counting the number of centroids rather than their weights. It should accumulate current_centroid->weight instead of incrementing by 1 for proper cumulative distribution calculation.

Suggested change
double eq_count = 0;
double smaller_count = 0;
for (; iter_begin->Valid(); iter_begin->Next()) {
auto current_centroid = iter_begin->GetCentroid();
if (val > current_centroid->mean) {
smaller_count++;
} else if (val == current_centroid->mean) {
eq_count++;
}
}
double cdf_val = (smaller_count / total_weight) + ((eq_count / 2) / total_weight);
double eq_weight = 0;
double smaller_weight = 0;
for (; iter_begin->Valid(); iter_begin->Next()) {
auto current_centroid = iter_begin->GetCentroid();
if (val > current_centroid->mean) {
smaller_weight += current_centroid->weight;
} else if (val == current_centroid->mean) {
eq_weight += current_centroid->weight;
}
}
double cdf_val = (smaller_weight / total_weight) + ((eq_weight / 2) / total_weight);

Copilot uses AI. Check for mistakes.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @SharonIV0x86 ,

It seems that we mistake the count with weight here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I will take a look into it.

results.push_back(cdf_val);
}
result->cdf_values = results;
return rocksdb::Status::OK();
}

rocksdb::Status TDigest::GetMetaData(engine::Context& context, const Slice& digest_name, TDigestMetadata* metadata) {
auto ns_key = AppendNamespacePrefix(digest_name);
Expand Down
6 changes: 5 additions & 1 deletion src/types/redis_tdigest.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ struct TDigestMergeOptions {
uint32_t compression = 0;
bool override_flag = false;
};

struct TDigestCDFResult {
std::vector<double> cdf_values;
};
struct TDigestQuantitleResult {
std::optional<std::vector<double>> quantiles;
};
Expand Down Expand Up @@ -79,6 +81,8 @@ class TDigest : public SubKeyScanner {
const TDigestMergeOptions& options);

rocksdb::Status GetMetaData(engine::Context& context, const Slice& digest_name, TDigestMetadata* metadata);
rocksdb::Status CDF(engine::Context& ctx, const Slice& digest_name, const std::vector<double>& inputs,
TDigestCDFResult* result);

private:
enum class SegmentType : uint8_t { kBuffer = 0, kCentroids = 1, kGuardFlag = 0xFF };
Expand Down
Loading