diff --git a/libdd-profiling-ffi/src/lib.rs b/libdd-profiling-ffi/src/lib.rs index 264423ae3c..067de9dbae 100644 --- a/libdd-profiling-ffi/src/lib.rs +++ b/libdd-profiling-ffi/src/lib.rs @@ -7,13 +7,13 @@ #![cfg_attr(not(test), deny(clippy::todo))] #![cfg_attr(not(test), deny(clippy::unimplemented))] -#[cfg(all(feature = "symbolizer", not(target_os = "windows")))] -pub use symbolizer_ffi::*; - mod exporter; mod profiles; mod string_storage; +#[cfg(all(feature = "symbolizer", not(target_os = "windows")))] +pub use symbolizer_ffi::*; + // re-export crashtracker ffi #[cfg(feature = "crashtracker-ffi")] pub use libdd_crashtracker_ffi::*; diff --git a/libdd-profiling-ffi/src/profiles/datatypes.rs b/libdd-profiling-ffi/src/profiles/datatypes.rs index 97f40c3476..51f4142e5a 100644 --- a/libdd-profiling-ffi/src/profiles/datatypes.rs +++ b/libdd-profiling-ffi/src/profiles/datatypes.rs @@ -6,8 +6,7 @@ use anyhow::Context; use function_name::named; use libdd_common_ffi::slice::{AsBytes, ByteSlice, CharSlice, Slice}; use libdd_common_ffi::{wrap_with_ffi_result, Error, Handle, Timespec, ToInner}; -use libdd_profiling::api; -use libdd_profiling::api::ManagedStringId; +use libdd_profiling::api::{self, ManagedStringId}; use libdd_profiling::internal; use std::num::NonZeroI64; use std::str::Utf8Error; diff --git a/libdd-profiling/benches/add_samples.rs b/libdd-profiling/benches/add_samples.rs new file mode 100644 index 0000000000..ca392216df --- /dev/null +++ b/libdd-profiling/benches/add_samples.rs @@ -0,0 +1,161 @@ +// Copyright 2025-Present Datadog, Inc. +// SPDX-License-Identifier: Apache-2.0 + +use criterion::*; +use libdd_profiling::api2::Location2; +use libdd_profiling::profiles::datatypes::{Function, FunctionId2, MappingId2}; +use libdd_profiling::{self as profiling, api, api2}; + +fn make_sample_types() -> Vec> { + vec![api::ValueType::new("samples", "count")] +} + +fn make_stack_api(frames: &[Frame]) -> (Vec>, Vec) { + // No mappings in Ruby, but the v1 API requires it. + let mapping = api::Mapping::default(); + let mut locations = Vec::with_capacity(frames.len()); + for frame in frames { + locations.push(api::Location { + mapping, + function: api::Function { + name: frame.function_name, + filename: frame.file_name, + ..Default::default() + }, + line: frame.line_number as i64, + ..Default::default() + }); + } + let values = vec![1i64]; + (locations, values) +} + +fn make_stack_api2(frames: &[Frame2]) -> (Vec, Vec) { + let mut locations = Vec::with_capacity(frames.len()); + + for frame in frames { + locations.push(Location2 { + mapping: MappingId2::default(), + function: frame.function, + address: 0, + line: frame.line_number as i64, + }); + } + + let values = vec![1i64]; + (locations, values) +} + +#[derive(Clone, Copy)] +struct Frame { + file_name: &'static str, + line_number: u32, + function_name: &'static str, +} + +impl Frame { + pub const fn new( + file_name: &'static str, + line_number: u32, + function_name: &'static str, + ) -> Self { + Self { + file_name, + line_number, + function_name, + } + } +} + +#[derive(Clone, Copy)] +struct Frame2 { + function: FunctionId2, + line_number: u32, +} + +pub fn bench_add_sample_vs_add2(c: &mut Criterion) { + let sample_types = make_sample_types(); + let dict = profiling::profiles::datatypes::ProfilesDictionary::try_new().unwrap(); + + // This is root-to-leaf, instead of leaf-to-root. We'll reverse it below. + // Taken from a Ruby app, everything here is source-available. + let mut frames = [ + Frame::new("/usr/local/bundle/gems/logging-2.4.0/lib/logging/diagnostic_context.rb", 474, "create_with_logging_context"), + Frame::new("/usr/local/bundle/gems/puma-6.4.3/lib/puma/thread_pool.rb", 155, "spawn_thread"), + Frame::new("/usr/local/bundle/gems/puma-6.4.3/lib/puma/server.rb", 245, "run"), + Frame::new("/usr/local/bundle/gems/puma-6.4.3/lib/puma/server.rb", 464, "process_client"), + Frame::new("/usr/local/bundle/gems/puma-6.4.3/lib/puma/request.rb", 99, "handle_request"), + Frame::new("/usr/local/bundle/gems/puma-6.4.3/lib/puma/thread_pool.rb", 378, "with_force_shutdown"), + Frame::new("/usr/local/bundle/gems/puma-6.4.3/lib/puma/request.rb", 100, "handle_request"), + Frame::new("/usr/local/bundle/gems/puma-6.4.3/lib/puma/configuration.rb", 272, "call"), + Frame::new("/usr/local/bundle/gems/railties-7.0.8.7/lib/rails/engine.rb", 530, "call"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/tracing/contrib/rack/middlewares.rb", 474, "call"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/tracing/contrib/rack/trace_proxy_middleware.rb", 17, "call"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/tracing/contrib/rack/middlewares.rb", 70, "call"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/contrib/rack/request_middleware.rb", 82, "call"), + Frame::new("/usr/local/lib/libruby.so.3.3", 0, "catch"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/contrib/rack/request_middleware.rb", 85, "catch"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/instrumentation/gateway.rb", 41, "push"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/instrumentation/gateway.rb", 37, "push"), + Frame::new("/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/instrumentation/gateway/middleware.rb", 18, "call"), + ]; + frames.reverse(); + + let strings = dict.strings(); + let functions = dict.functions(); + + let frames2 = frames.map(|f| { + let set_id = functions + .try_insert(Function { + name: strings.try_insert(f.file_name).unwrap(), + system_name: Default::default(), + file_name: strings.try_insert(f.file_name).unwrap(), + }) + .unwrap(); + Frame2 { + function: FunctionId2::from(set_id), + line_number: f.line_number, + } + }); + let dict = profiling::profiles::collections::Arc::try_new(dict).unwrap(); + + c.bench_function("profile_add_sample_frames_x1000", |b| { + b.iter(|| { + let mut profile = profiling::internal::Profile::try_new(&sample_types, None).unwrap(); + let (locations, values) = make_stack_api(frames.as_slice()); + for _ in 0..1000 { + let sample = api::Sample { + locations: locations.clone(), + values: &values, + labels: vec![], + }; + black_box(profile.try_add_sample(sample, None)).unwrap(); + } + black_box(profile.only_for_testing_num_aggregated_samples()) + }) + }); + + c.bench_function("profile_add_sample2_frames_x1000", |b| { + b.iter(|| { + let mut profile = profiling::internal::Profile::try_new_with_dictionary( + &sample_types, + None, + dict.try_clone().unwrap(), + ) + .unwrap(); + let (locations, values) = make_stack_api2(frames2.as_slice()); + for _ in 0..1000 { + // Provide an empty iterator for labels conversion path + let labels_iter = std::iter::empty::>(); + // SAFETY: all ids come from the profile's dictionary. + black_box(unsafe { + profile.try_add_sample2(&locations, &values, labels_iter, None) + }) + .unwrap(); + } + black_box(profile.only_for_testing_num_aggregated_samples()) + }) + }); +} + +criterion_group!(benches, bench_add_sample_vs_add2); diff --git a/libdd-profiling/benches/main.rs b/libdd-profiling/benches/main.rs index e2d220f767..3c29e7fef6 100644 --- a/libdd-profiling/benches/main.rs +++ b/libdd-profiling/benches/main.rs @@ -3,6 +3,7 @@ use criterion::criterion_main; +mod add_samples; mod interning_strings; -criterion_main!(interning_strings::benches); +criterion_main!(interning_strings::benches, add_samples::benches); diff --git a/libdd-profiling/src/api2.rs b/libdd-profiling/src/api2.rs new file mode 100644 index 0000000000..3dd35e90c8 --- /dev/null +++ b/libdd-profiling/src/api2.rs @@ -0,0 +1,57 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::profiles::datatypes::{FunctionId2, MappingId2, StringId2}; + +#[derive(Copy, Clone, Debug, Default)] +#[repr(C)] +pub struct Location2 { + pub mapping: MappingId2, + pub function: FunctionId2, + + /// The instruction address for this location, if available. It + /// should be within [Mapping.memory_start...Mapping.memory_limit] + /// for the corresponding mapping. A non-leaf address may be in the + /// middle of a call instruction. It is up to display tools to find + /// the beginning of the instruction if necessary. + pub address: u64, + pub line: i64, +} + +#[derive(Copy, Clone, Debug, Default)] +pub struct Label<'a> { + pub key: StringId2, + + /// At most one of `.str` and `.num` should not be empty. + pub str: &'a str, + pub num: i64, + + /// Should only be present when num is present. + /// Specifies the units of num. + /// Use arbitrary string (for example, "requests") as a custom count unit. + /// If no unit is specified, consumer may apply heuristic to deduce the unit. + /// Consumers may also interpret units like "bytes" and "kilobytes" as memory + /// units and units like "seconds" and "nanoseconds" as time units, + /// and apply appropriate unit conversions to these. + pub num_unit: &'a str, +} + +impl<'a> Label<'a> { + pub const fn str(key: StringId2, str: &'a str) -> Label<'a> { + Label { + key, + str, + num: 0, + num_unit: "", + } + } + + pub const fn num(key: StringId2, num: i64, num_unit: &'a str) -> Label<'a> { + Label { + key, + str: "", + num, + num_unit, + } + } +} diff --git a/libdd-profiling/src/internal/function.rs b/libdd-profiling/src/internal/function.rs index 3be7292005..1291cf70a8 100644 --- a/libdd-profiling/src/internal/function.rs +++ b/libdd-profiling/src/internal/function.rs @@ -6,7 +6,7 @@ use super::*; /// Represents a [pprof::Function] with some space-saving changes: /// - The id is not stored on the struct. It's stored in the container that holds the struct. /// - ids for linked objects use 32-bit numbers instead of 64 bit ones. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash, PartialOrd, Ord)] pub struct Function { pub name: StringId, pub system_name: StringId, diff --git a/libdd-profiling/src/internal/owned_types/mod.rs b/libdd-profiling/src/internal/owned_types/mod.rs index a828f0639c..d4955292f1 100644 --- a/libdd-profiling/src/internal/owned_types/mod.rs +++ b/libdd-profiling/src/internal/owned_types/mod.rs @@ -33,6 +33,13 @@ pub struct Period { pub value: i64, } +impl<'a> From> for Period { + #[inline] + fn from(period: api::Period<'a>) -> Self { + Period::from(&period) + } +} + impl<'a> From<&'a api::Period<'a>> for Period { #[inline] fn from(period: &'a api::Period<'a>) -> Self { diff --git a/libdd-profiling/src/internal/profile/mod.rs b/libdd-profiling/src/internal/profile/mod.rs index fa687f43b1..4a4e1f9d28 100644 --- a/libdd-profiling/src/internal/profile/mod.rs +++ b/libdd-profiling/src/internal/profile/mod.rs @@ -5,27 +5,37 @@ mod fuzz_tests; pub mod interning_api; +mod profiles_dictionary_translator; + +pub use profiles_dictionary_translator::*; use self::api::UpscalingInfo; use super::*; -use crate::api; use crate::api::ManagedStringId; use crate::collections::identifiable::*; use crate::collections::string_storage::{CachedProfileId, ManagedStringStorage}; use crate::collections::string_table::{self, StringTable}; +use crate::internal::owned_types; use crate::iter::{IntoLendingIterator, LendingIterator}; +use crate::profiles::collections::ArcOverflow; +use crate::profiles::datatypes::ProfilesDictionary; use crate::profiles::{Compressor, DefaultProfileCodec}; +use crate::{api, api2}; use anyhow::Context; use interning_api::Generation; use libdd_profiling_protobuf::{self as protobuf, Record, Value, NO_OPT_ZERO, OPT_ZERO}; use std::borrow::Cow; use std::collections::HashMap; use std::io; +use std::ops::Deref; use std::sync::atomic::AtomicU64; use std::sync::{Arc, Mutex}; use std::time::{Duration, SystemTime}; pub struct Profile { + /// Translates from the new Id2 APIs to the older internal APIs. Long-term, + /// this should probably use the dictionary directly. + profiles_dictionary_translator: Option, /// When profiles are reset, the sample-types need to be preserved. This /// maintains them in a way that does not depend on the string table. The /// Option part is this is taken from the old profile and moved to the new @@ -133,6 +143,8 @@ impl Profile { let labels = { let mut lbls = Vec::new(); + // Using try_reserve_exact because it will be converted to Box<[]>, + // so excess capacity would make that conversion more expensive. lbls.try_reserve_exact(sample.labels.len())?; for label in &sample.labels { let key = self.try_intern(label.key)?; @@ -162,6 +174,98 @@ impl Profile { self.try_add_sample_internal(sample.values, labels, locations, timestamp) } + /// Tries to add a sample using `api2` structures. + /// + /// # Safety + /// + /// All MappingId2, FunctionId2, and StringId2 values should be coming + /// from the same profiles dictionary used by this profile internally. + pub unsafe fn try_add_sample2< + 'a, + L: ExactSizeIterator>>, + >( + &mut self, + locations: &[api2::Location2], + values: &[i64], + labels: L, + timestamp: Option, + ) -> anyhow::Result<()> { + let Some(translator) = &mut self.profiles_dictionary_translator else { + anyhow::bail!("profiles dictionary not set"); + }; + + // In debug builds, we iterate over the labels twice. That's not + // something the trait bounds support, so we collect into a vector. + // Since this is debug-only, the performance is fine. + #[cfg(debug_assertions)] + let labels = labels.collect::>(); + #[cfg(debug_assertions)] + { + Self::validate_sample_labels2(labels.as_slice())?; + } + + let string_table = &mut self.strings; + let functions = &mut self.functions; + let mappings = &mut self.mappings; + let locations_set = &mut self.locations; + let labels_set = &mut self.labels; + + let labels = { + let mut lbls = Vec::new(); + // Using try_reserve_exact because it will be converted to Box<[]>, + // so excess capacity would make that conversion more expensive. + lbls.try_reserve_exact(labels.len())?; + for label in labels { + let label = label.context("profile label failed to convert")?; + let key = translator.translate_string(string_table, label.key.into())?; + let internal_label = if !label.str.is_empty() { + let str = string_table.try_intern(label.str)?; + Label::str(key, str) + } else { + let num = label.num; + let num_unit = string_table.try_intern(label.num_unit)?; + Label::num(key, num, num_unit) + }; + + let id = labels_set.try_dedup(internal_label)?; + lbls.push(id); + } + lbls.into_boxed_slice() + }; + + let mut internal_locations = Vec::new(); + internal_locations + .try_reserve_exact(locations.len()) + .context("failed to reserve memory for sample locations")?; + for location in locations { + let l = Location { + mapping_id: translator.translate_mapping( + mappings, + string_table, + location.mapping, + )?, + function_id: translator.translate_function( + functions, + string_table, + location.function, + )?, + address: location.address, + line: location.line, + }; + let location_id = locations_set.checked_dedup(l)?; + internal_locations.push(location_id); + } + + self.try_add_sample_internal(values, labels, internal_locations, timestamp) + } + + /// Gets the profiles dictionary, needed for `api2` operations. + pub fn get_profiles_dictionary(&self) -> Option<&ProfilesDictionary> { + self.profiles_dictionary_translator + .as_ref() + .map(|p| p.profiles_dictionary.deref()) + } + pub fn add_string_id_sample( &mut self, sample: api::StringIdSample, @@ -283,12 +387,15 @@ impl Profile { } /// Tries to create a profile with the given `period`. - /// Initializes the string table to hold: + /// Initializes the string table to hold common strings such as: /// - "" (the empty string) /// - "local root span id" /// - "trace endpoint" + /// - "end_timestamp_ns" /// /// All other fields are default. + /// + /// It's recommended to use [`Profile::try_new_with_dictionary`] instead. pub fn try_new( sample_types: &[api::ValueType], period: Option, @@ -297,6 +404,28 @@ impl Profile { Self::backup_period(period), Self::backup_sample_types(sample_types), None, + None, + ) + } + + /// Tries to create a profile with the given period and sample types. + #[inline(never)] + #[cold] + pub fn try_new_with_dictionary( + sample_types: &[api::ValueType], + period: Option, + profiles_dictionary: crate::profiles::collections::Arc, + ) -> io::Result { + let mut owned_sample_types = Vec::new(); + // Using try_reserve_exact because it will be converted to Box<[]>, + // so excess capacity would make that conversion more expensive. + owned_sample_types.try_reserve_exact(sample_types.len())?; + owned_sample_types.extend(sample_types.iter().map(owned_types::ValueType::from)); + Self::try_new_internal( + period.map(owned_types::Period::from), + Some(owned_sample_types.into_boxed_slice()), + None, + Some(ProfilesDictionaryTranslator::new(profiles_dictionary)), ) } @@ -310,6 +439,7 @@ impl Profile { Self::backup_period(period), Self::backup_sample_types(sample_types), Some(string_storage), + None, ) } @@ -323,10 +453,20 @@ impl Profile { "Can't rotate the profile, there are still active samples. Drain them and try again." ); + let profiles_dictionary_translator = self + .profiles_dictionary_translator + .as_ref() + .map(|t| -> Result<_, ArcOverflow> { + let dict = t.profiles_dictionary.try_clone()?; + Ok(ProfilesDictionaryTranslator::new(dict)) + }) + .transpose()?; + let mut profile = Profile::try_new_internal( self.owned_period.take(), self.owned_sample_types.take(), self.string_storage.clone(), + profiles_dictionary_translator, ) .context("failed to initialize new profile")?; @@ -785,9 +925,11 @@ impl Profile { owned_period: Option, owned_sample_types: Option>, string_storage: Option>>, + profiles_dictionary_translator: Option, ) -> io::Result { let start_time = SystemTime::now(); let mut profile = Self { + profiles_dictionary_translator, owned_period, owned_sample_types, active_samples: Default::default(), @@ -905,6 +1047,35 @@ impl Profile { } Ok(()) } + + #[cfg(debug_assertions)] + fn validate_sample_labels2(labels: &[anyhow::Result]) -> anyhow::Result<()> { + use crate::profiles::collections::StringRef; + let mut seen: HashMap = HashMap::with_capacity(labels.len()); + + for label in labels.iter() { + let Ok(label) = label.as_ref() else { + anyhow::bail!("profiling FFI label string failed to convert") + }; + let key = StringRef::from(label.key); + if let Some(duplicate) = seen.insert(key, label) { + anyhow::bail!("Duplicate label on sample: {duplicate:?} {label:?}"); + } + + if key == StringRef::LOCAL_ROOT_SPAN_ID { + anyhow::ensure!( + label.str.is_empty() && label.num != 0, + "Invalid \"local root span id\" label: {label:?}" + ); + } + + anyhow::ensure!( + key != StringRef::END_TIMESTAMP_NS, + "Timestamp should not be passed as a label {label:?}" + ); + } + Ok(()) + } } /// For testing and debugging purposes @@ -931,6 +1102,7 @@ mod api_tests { use super::*; use crate::pprof::test_utils::{roundtrip_to_pprof, sorted_samples, string_table_fetch}; use libdd_profiling_protobuf::prost_impls; + use std::collections::HashSet; #[test] fn interning() { @@ -2543,6 +2715,207 @@ mod api_tests { Ok(()) } + #[test] + #[cfg_attr(miri, ignore)] + fn test_try_new_with_dictionary_and_try_add_sample2() { + struct Frame { + file_name: &'static str, + line_number: u32, + function_name: &'static str, + } + + // Create a ProfilesDictionary with realistic data from Ruby app + let dict = crate::profiles::datatypes::ProfilesDictionary::try_new().unwrap(); + let sample_types = vec![api::ValueType::new("samples", "count")]; + + // Ruby stack trace (leaf-to-root order) + // Taken from a Ruby app, everything here is source-available + let frames = [ + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/instrumentation/gateway/middleware.rb", line_number: 18, function_name: "call" }, + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/instrumentation/gateway.rb", line_number: 37, function_name: "push" }, + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/instrumentation/gateway.rb", line_number: 41, function_name: "push" }, + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/contrib/rack/request_middleware.rb", line_number: 85, function_name: "catch" }, + Frame { file_name: "/usr/local/lib/libruby.so.3.3", line_number: 0, function_name: "catch" }, + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/appsec/contrib/rack/request_middleware.rb", line_number: 82, function_name: "call" }, + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/tracing/contrib/rack/middlewares.rb", line_number: 70, function_name: "call" }, + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/tracing/contrib/rack/trace_proxy_middleware.rb", line_number: 17, function_name: "call" }, + Frame { file_name: "/usr/local/bundle/gems/datadog-2.18.0/lib/datadog/tracing/contrib/rack/middlewares.rb", line_number: 474, function_name: "call" }, + Frame { file_name: "/usr/local/bundle/gems/railties-7.0.8.7/lib/rails/engine.rb", line_number: 530, function_name: "call" }, + Frame { file_name: "/usr/local/bundle/gems/puma-6.4.3/lib/puma/configuration.rb", line_number: 272, function_name: "call" }, + Frame { file_name: "/usr/local/bundle/gems/puma-6.4.3/lib/puma/request.rb", line_number: 100, function_name: "handle_request" }, + Frame { file_name: "/usr/local/bundle/gems/puma-6.4.3/lib/puma/thread_pool.rb", line_number: 378, function_name: "with_force_shutdown" }, + Frame { file_name: "/usr/local/bundle/gems/puma-6.4.3/lib/puma/request.rb", line_number: 99, function_name: "handle_request" }, + Frame { file_name: "/usr/local/bundle/gems/puma-6.4.3/lib/puma/server.rb", line_number: 464, function_name: "process_client" }, + Frame { file_name: "/usr/local/bundle/gems/puma-6.4.3/lib/puma/server.rb", line_number: 245, function_name: "run" }, + Frame { file_name: "/usr/local/bundle/gems/puma-6.4.3/lib/puma/thread_pool.rb", line_number: 155, function_name: "spawn_thread" }, + Frame { file_name: "/usr/local/bundle/gems/logging-2.4.0/lib/logging/diagnostic_context.rb", line_number: 474, function_name: "create_with_logging_context" }, + ]; + + // Create a fake mapping to exercise the code path (Ruby doesn't currently use mappings) + let fake_mapping_filename = dict.try_insert_str2("/usr/lib/ruby.so").unwrap(); + let fake_mapping = crate::profiles::datatypes::Mapping2 { + memory_start: 0x7f0000000000, + memory_limit: 0x7f0000100000, + file_offset: 0, + filename: fake_mapping_filename, + build_id: crate::profiles::datatypes::StringId2::default(), + }; + let mapping_id = dict.try_insert_mapping2(fake_mapping).unwrap(); + + // Create locations from frames + let mut locations = Vec::new(); + for frame in &frames { + let function_name_id = dict.try_insert_str2(frame.function_name).unwrap(); + let filename_id = dict.try_insert_str2(frame.file_name).unwrap(); + let function = crate::profiles::datatypes::Function2 { + name: function_name_id, + system_name: crate::profiles::datatypes::StringId2::default(), + file_name: filename_id, + }; + let function_id = dict.try_insert_function2(function).unwrap(); + + locations.push(api2::Location2 { + mapping: mapping_id, + function: function_id, + address: 0, + line: frame.line_number as i64, + }); + } + + // Wrap in Arc + let dict = crate::profiles::collections::Arc::try_new(dict).unwrap(); + + // Create profile with dictionary + let mut profile = + Profile::try_new_with_dictionary(&sample_types, None, dict.try_clone().unwrap()) + .unwrap(); + + assert_eq!(profile.only_for_testing_num_aggregated_samples(), 0); + + let values = vec![1i64]; + + // Add sample without labels + let labels_iter = std::iter::empty::>(); + // SAFETY: adding ids from the correct ProfilesDictionary. + unsafe { + profile + .try_add_sample2(&locations, &values, labels_iter, None) + .expect("add to succeed"); + } + + assert_eq!(profile.only_for_testing_num_aggregated_samples(), 1); + + // Add same sample again - should aggregate + let labels_iter = std::iter::empty::>(); + // SAFETY: adding ids from the correct ProfilesDictionary. + unsafe { + profile + .try_add_sample2(&locations, &values, labels_iter, None) + .expect("add to succeed"); + } + + // Still 1 sample because it aggregated + assert_eq!(profile.only_for_testing_num_aggregated_samples(), 1); + + // Test with labels + let label_key = dict.try_insert_str2("thread_id").unwrap(); + let label_value = "worker-1"; + + let labels_iter = std::iter::once(Ok(api2::Label::str(label_key, label_value))); + // SAFETY: adding ids from the correct ProfilesDictionary. + unsafe { + profile + .try_add_sample2(&locations, &values, labels_iter, None) + .expect("add with label to succeed"); + } + + // Should be 2 samples now (different label set) + assert_eq!(profile.only_for_testing_num_aggregated_samples(), 2); + + // Test with numeric label + let thread_id_key = dict.try_insert_str2("thread_id_num").unwrap(); + let labels_iter = std::iter::once(Ok(api2::Label::num(thread_id_key, 42, ""))); + // SAFETY: adding ids from the correct ProfilesDictionary. + unsafe { + profile + .try_add_sample2(&locations, &values, labels_iter, None) + .expect("add with numeric label to succeed"); + } + + // Should be 3 samples now + assert_eq!(profile.only_for_testing_num_aggregated_samples(), 3); + + // Verify the profile roundtrips correctly through pprof serialization + let pprof = roundtrip_to_pprof(profile).unwrap(); + assert_eq!(pprof.samples.len(), 3); + + // Verify we have the expected sample type + assert_eq!(pprof.sample_types.len(), 1); + let sample_type = &pprof.sample_types[0]; + let type_str = &pprof.string_table[sample_type.r#type as usize]; + let unit_str = &pprof.string_table[sample_type.unit as usize]; + assert_eq!(type_str, "samples"); + assert_eq!(unit_str, "count"); + + // Verify the mapping is present and has the correct filename + assert_eq!(pprof.mappings.len(), 1); + let mapping = &pprof.mappings[0]; + let mapping_filename = &pprof.string_table[mapping.filename as usize]; + assert_eq!(mapping_filename, "/usr/lib/ruby.so"); + + // Verify all 18 locations are present in each sample (same stack) + assert_eq!(pprof.samples[0].location_ids.len(), 18); + assert_eq!(pprof.samples[1].location_ids.len(), 18); + assert_eq!(pprof.samples[2].location_ids.len(), 18); + + // Verify all filenames and function names from our frames are present + let mut expected_files = std::collections::HashSet::new(); + let mut expected_functions = std::collections::HashSet::new(); + for frame in &frames { + expected_files.insert(frame.file_name); + expected_functions.insert(frame.function_name); + } + + let string_table_set: std::collections::HashSet<&str> = + pprof.string_table.iter().map(|s| s.as_str()).collect(); + + assert!( + expected_files.is_subset(&string_table_set), + "Missing files from string table: {:?}", + expected_files + .difference(&string_table_set) + .collect::>() + ); + + assert!( + expected_functions.is_subset(&string_table_set), + "Missing functions from string table: {:?}", + expected_functions + .difference(&string_table_set) + .collect::>() + ); + + // Verify the label keys and values we added are present in string table + let expected_label_strings = ["thread_id", "thread_id_num", "worker-1"] + .into_iter() + .collect::>(); + let diff = expected_label_strings + .difference(&string_table_set) + .collect::>(); + assert!( + diff.is_empty(), + "Missing label strings from string table: {:?}", + diff + ); + + // Verify sample values + // We have 3 samples: one with value 2 (aggregated), two with value 1 + // Samples may be reordered, so collect and sort the values + let mut values: Vec = pprof.samples.iter().map(|s| s.values[0]).collect(); + values.sort_unstable(); + assert_eq!(values, vec![1, 1, 2]); + } + #[test] fn test_regression_managed_string_table_correctly_maps_ids() { let storage = Arc::new(Mutex::new(ManagedStringStorage::new())); diff --git a/libdd-profiling/src/internal/profile/profiles_dictionary_translator.rs b/libdd-profiling/src/internal/profile/profiles_dictionary_translator.rs new file mode 100644 index 0000000000..794a7a56bc --- /dev/null +++ b/libdd-profiling/src/internal/profile/profiles_dictionary_translator.rs @@ -0,0 +1,173 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::collections::identifiable::{Dedup, FxIndexMap, StringId}; +use crate::collections::string_table::StringTable; +use crate::internal::{Function, FunctionId, Mapping, MappingId}; +use crate::profiles::collections::{SetId, StringRef}; +use crate::profiles::datatypes::{self as dt, FunctionId2, MappingId2, ProfilesDictionary}; +use anyhow::Context; +use indexmap::map::Entry; +use std::ptr::NonNull; + +/// Translates IDs from a [`ProfilesDictionary`] into the IDs used by the +/// current Profile's internal collections. +/// +/// # Safety +/// +/// All IDs passed to the translate methods (translate_function, +/// translate_mapping, translate_string) MUST have been created by the same +/// ProfilesDictionary that this translator wraps. +pub struct ProfilesDictionaryTranslator { + pub profiles_dictionary: crate::profiles::collections::Arc, + pub mappings: FxIndexMap, MappingId>, + pub functions: FxIndexMap>, FunctionId>, + pub strings: FxIndexMap, +} + +// SAFETY: ProfilesDictionaryTranslator is Send because: +// 1. The profiles_dictionary Arc ensures the underlying storage remains alive and valid for the +// lifetime of this translator, and Arc is Send when T is Send + Sync. ProfilesDictionary is +// Send + Sync. +// 2. SetId and StringRef are non-owning handles (thin pointers) to immutable data in the +// ProfilesDictionary's concurrent collections, which use arena allocation with stable addresses. +// The Arc protects this data, making the pointers safe to send across threads. +// 3. FxIndexMap is Send when K and V are Send. The keys (SetId, StringRef) and values +// (MappingId, FunctionId, StringId) are all Copy types that are Send. +unsafe impl Send for ProfilesDictionaryTranslator {} + +impl ProfilesDictionaryTranslator { + pub fn new( + profiles_dictionary: crate::profiles::collections::Arc, + ) -> ProfilesDictionaryTranslator { + ProfilesDictionaryTranslator { + profiles_dictionary, + mappings: Default::default(), + functions: Default::default(), + strings: Default::default(), + } + } + + /// Translates a FunctionId2 from the ProfilesDictionary into a FunctionId + /// for this profile's StringTable. + /// + /// # Safety + /// + /// The `id2` must have been created by `self.profiles_dictionary`, and + /// the strings must also live in the same dictionary. + pub unsafe fn translate_function( + &mut self, + functions: &mut impl Dedup, + string_table: &mut StringTable, + id2: FunctionId2, + ) -> anyhow::Result { + let (set_id, function) = match NonNull::new(id2.0) { + // Since the internal model treats functions as required, we + // translate the null FunctionId2 to the default function. + None => (None, Function::default()), + Some(nn) => { + let set_id = SetId(nn.cast::()); + if let Some(internal) = self.functions.get(&Some(set_id)) { + return Ok(*internal); + } + + // SAFETY: This is safe if `id2` (the FunctionId2) was created by + // `self.profiles_dictionary`, which is a precondition of calling + // this method. + let function = unsafe { *self.profiles_dictionary.functions().get(set_id) }; + // SAFETY: safe if the strings were made by + // `self.profiles_dictionary`, which is a precondition of + // calling this method. + let function = unsafe { + Function { + name: self.translate_string(string_table, function.name)?, + system_name: self.translate_string(string_table, function.system_name)?, + filename: self.translate_string(string_table, function.file_name)?, + } + }; + (Some(set_id), function) + } + }; + + let internal_id = functions + .try_dedup(function) + .context("failed to deduplicate function in ProfilesDictionaryTranslator")?; + self.functions.try_reserve(1).context( + "failed to reserve memory for a new function in ProfilesDictionaryTranslator", + )?; + self.functions.insert(set_id, internal_id); + Ok(internal_id) + } + + /// Translates a MappingId2 from the ProfilesDictionary into a MappingId + /// for this profile's internal collections. + /// + /// # Safety + /// + /// The `id2` must have been created by `self.profiles_dictionary`, and + /// the strings must also live in the same dictionary. + pub unsafe fn translate_mapping( + &mut self, + mappings: &mut impl Dedup, + string_table: &mut StringTable, + id2: MappingId2, + ) -> anyhow::Result> { + // Translate null MappingId2 to Ok(None). This is different from + // functions because the internal module uses Option, + // whereas it assumes functions are required. + let Some(nn) = NonNull::new(id2.0) else { + return Ok(None); + }; + let set_id = SetId(nn.cast::()); + if let Some(internal) = self.mappings.get(&set_id) { + return Ok(Some(*internal)); + } + + // SAFETY: This is safe if `id2` (the MappingId2) was created by + // `self.profiles_dictionary`, which is a precondition of calling + // this method. + let mapping = unsafe { *self.profiles_dictionary.mappings().get(set_id) }; + let internal = Mapping { + memory_start: mapping.memory_start, + memory_limit: mapping.memory_limit, + file_offset: mapping.file_offset, + filename: self.translate_string(string_table, mapping.filename)?, + build_id: self.translate_string(string_table, mapping.build_id)?, + }; + let internal_id = mappings + .try_dedup(internal) + .context("failed to deduplicate mapping in ProfilesDictionaryTranslator")?; + self.mappings.try_reserve(1).context( + "failed to reserve memory for a new mapping in ProfilesDictionaryTranslator", + )?; + self.mappings.insert(set_id, internal_id); + Ok(Some(internal_id)) + } + + /// Translates a StringRef from the ProfilesDictionary into a StringId + /// for this profile's internal string table. + /// + /// # Safety + /// + /// The `str_ref` must have been created by `self.profiles_dictionary`. + /// Violating this precondition results in undefined behavior. + pub unsafe fn translate_string( + &mut self, + string_table: &mut StringTable, + str_ref: StringRef, + ) -> anyhow::Result { + self.strings.try_reserve(1)?; + match self.strings.entry(str_ref) { + Entry::Occupied(o) => Ok(*o.get()), + Entry::Vacant(v) => { + // SAFETY: This is safe if `str_ref` was created by + // `self.profiles_dictionary`, which is a precondition of calling + // this method. + let str = unsafe { self.profiles_dictionary.strings().get(str_ref) }; + let internal_id = string_table.try_intern(str)?; + v.insert(internal_id); + Ok(internal_id) + } + } + } +} diff --git a/libdd-profiling/src/lib.rs b/libdd-profiling/src/lib.rs index 486104e792..01a99d6c44 100644 --- a/libdd-profiling/src/lib.rs +++ b/libdd-profiling/src/lib.rs @@ -7,6 +7,7 @@ #![cfg_attr(not(test), deny(clippy::unimplemented))] pub mod api; +pub mod api2; pub mod collections; pub mod exporter; pub mod internal; diff --git a/libdd-profiling/src/profiles/datatypes/function.rs b/libdd-profiling/src/profiles/datatypes/function.rs index aa98b33a1f..9245ed4320 100644 --- a/libdd-profiling/src/profiles/datatypes/function.rs +++ b/libdd-profiling/src/profiles/datatypes/function.rs @@ -9,7 +9,10 @@ use crate::profiles::datatypes::StringId2; /// doesn't use this in any way. /// /// This representation is used internally by the `ProfilesDictionary`, and -/// utilizes the fact that `StringRef`s don't have null values. +/// utilizes the fact that `StringRef`s don't have null values. It is also +/// repr(C) to be layout-compatible with [`Function2`]. Every pointer to a +/// Function is a valid Function2 (but the reverse is not true for the null +/// case of null StringId2). #[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash)] #[repr(C)] pub struct Function { @@ -18,7 +21,8 @@ pub struct Function { pub file_name: StringRef, } -/// An FFI-safe version of the Function which allows null. +/// An FFI-safe version of the Function which allows null. Be sure to maintain +/// layout-compatibility with it, except that StringId2 may be null. #[repr(C)] #[derive(Copy, Clone, Debug, Default)] pub struct Function2 { diff --git a/libdd-profiling/src/profiles/datatypes/mapping.rs b/libdd-profiling/src/profiles/datatypes/mapping.rs index 25aa37fb40..66f7c4e1b7 100644 --- a/libdd-profiling/src/profiles/datatypes/mapping.rs +++ b/libdd-profiling/src/profiles/datatypes/mapping.rs @@ -9,7 +9,10 @@ use crate::profiles::datatypes::StringId2; /// in any way. /// /// This representation is used internally by the `ProfilesDictionary`, and -/// utilizes the fact that `StringRef`s don't have null values. +/// utilizes the fact that `StringRef`s don't have null values. It is also +/// repr(C) to be layout-compatible with [`Mapping2`]. Every pointer to a +/// Mapping is a valid Mapping2 (but the reverse is not true for the null case +/// of null StringId2). #[repr(C)] #[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash)] pub struct Mapping { @@ -20,7 +23,8 @@ pub struct Mapping { pub build_id: StringRef, // missing in Otel, is it made into an attribute? } -/// An FFI-safe version of the Mapping which allows null. +/// An FFI-safe version of the Mapping which allows null. Be sure to maintain +/// layout-compatibility with it, except that StringId2 may be null. #[repr(C)] #[derive(Copy, Clone, Debug, Default)] pub struct Mapping2 { @@ -107,6 +111,7 @@ impl From> for MappingId2 { mod tests { use super::*; use std::mem::offset_of; + #[test] fn v1_and_v2_have_compatible_representations() { // Begin with basic size and alignment. diff --git a/libdd-profiling/src/profiles/datatypes/mod.rs b/libdd-profiling/src/profiles/datatypes/mod.rs index 9706609582..45892fc97a 100644 --- a/libdd-profiling/src/profiles/datatypes/mod.rs +++ b/libdd-profiling/src/profiles/datatypes/mod.rs @@ -5,8 +5,10 @@ mod function; mod mapping; mod profiles_dictionary; mod string; +mod value_type; pub use function::*; pub use mapping::*; pub use profiles_dictionary::*; pub use string::*; +pub use value_type::*; diff --git a/libdd-profiling/src/profiles/datatypes/profiles_dictionary.rs b/libdd-profiling/src/profiles/datatypes/profiles_dictionary.rs index 7259903b22..f47a5e0601 100644 --- a/libdd-profiling/src/profiles/datatypes/profiles_dictionary.rs +++ b/libdd-profiling/src/profiles/datatypes/profiles_dictionary.rs @@ -167,7 +167,7 @@ mod tests { build_id: build_id_id, }; - // Test insert and read back (exercises unsafe transmute) + // Test insert and read back. let id1 = dict.try_insert_mapping2(mapping2).unwrap(); prop_assert!(!id1.is_empty()); diff --git a/libdd-profiling/src/profiles/datatypes/value_type.rs b/libdd-profiling/src/profiles/datatypes/value_type.rs new file mode 100644 index 0000000000..7eb6d0aece --- /dev/null +++ b/libdd-profiling/src/profiles/datatypes/value_type.rs @@ -0,0 +1,17 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::profiles::collections::StringRef; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ValueType { + pub type_id: StringRef, + pub unit_id: StringRef, +} + +impl ValueType { + pub fn new(type_id: StringRef, unit_id: StringRef) -> ValueType { + ValueType { type_id, unit_id } + } +}