From 7d910766cc11edd7335b337d4f0829c1b2b53cf9 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 5 Jan 2026 15:34:48 -0800 Subject: [PATCH 01/92] save --- rust/otap-dataflow/Cargo.toml | 1 + .../otap-dataflow/crates/telemetry/Cargo.toml | 5 +- .../otap-dataflow/crates/telemetry/src/lib.rs | 1 + .../src/tracing_integration/log_record.rs | 501 ++++++++++++++++++ .../telemetry/src/tracing_integration/mod.rs | 14 + .../tracing_integration/otlp_bytes_channel.rs | 157 ++++++ .../otlp_bytes_formatter.rs | 470 ++++++++++++++++ .../src/tracing_integration/subscriber.rs | 327 ++++++++++++ 8 files changed, 1475 insertions(+), 1 deletion(-) create mode 100644 rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs create mode 100644 rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs create mode 100644 rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs create mode 100644 rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_formatter.rs create mode 100644 rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index 0bcf4e5aa4..eaeb006d30 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -50,6 +50,7 @@ otap-df-pdata-otlp-model = { path = "./crates/pdata/src/otlp/model"} otap-df-config = { path = "crates/config" } otap-df-controller = { path = "crates/controller" } otap-df-otap = { path = "crates/otap" } +otap-df-pdata = { path = "crates/pdata" } quiver = { package = "otap-df-quiver", path = "crates/quiver" } data_engine_expressions = { path = "../experimental/query_engine/expressions" } data_engine_kql_parser = { path = "../experimental/query_engine/kql-parser" } diff --git a/rust/otap-dataflow/crates/telemetry/Cargo.toml b/rust/otap-dataflow/crates/telemetry/Cargo.toml index dce25e697a..50cf039dbc 100644 --- a/rust/otap-dataflow/crates/telemetry/Cargo.toml +++ b/rust/otap-dataflow/crates/telemetry/Cargo.toml @@ -19,8 +19,11 @@ unchecked-index = [] unchecked-arithmetic = [] [dependencies] -axum = { workspace = true } +otap-df-pdata = { workspace = true } otap-df-config = { workspace = true } + +axum = { workspace = true } +bytes = { workspace = true } flume = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index f28001364d..6ecf5ecccb 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -41,6 +41,7 @@ pub mod opentelemetry_client; pub mod registry; pub mod reporter; pub mod semconv; +pub mod tracing_integration; // Re-export _private module from internal_events for macro usage. // This allows the otel_info!, otel_warn!, etc. macros to work in other crates diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs new file mode 100644 index 0000000000..7ee0146415 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs @@ -0,0 +1,501 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! LogRecordView implementation for tokio-tracing events. +//! +//! This module provides the bridge between tracing::Event and our +//! OTLP bytes. + +use otap_df_pdata::schema::{SpanId, TraceId}; +use otap_df_pdata::views::common::{AnyValueView, AttributeView, Str, ValueType}; +use otap_df_pdata::views::logs::LogRecordView; +use std::fmt; +use std::rc::Rc; +use tracing::{Level, Metadata}; + +/// A LogRecordView implementation that wraps a tracing event. +/// +/// Uses `Rc<>` for heap-allocated data to make cloning cheap during encoding. +/// Since encoding happens on the same thread before crossing boundaries, +/// thread-safe `Arc<>` is not needed. +pub struct TracingLogRecord { + /// The event name from the `name` field, if present + event_name: Option, + + /// The severity level from tracing + level: Level, + + /// Timestamp when the event occurred (nanoseconds since Unix epoch) + timestamp_nanos: u64, + + /// The target from tracing metadata, typically module path. + target: String, + + /// Event fields + attributes: Vec, + + /// Optional body/message for the log record (stored as TracingAnyValue) + body: Option, +} + +impl TracingLogRecord { + /// Creates a new TracingLogRecord from tracing event components. + /// + /// Note: metadata.name() contains both the event location and file:line info, + /// e.g., "event src/main.rs:42", so we don't need to separately track file/line. + pub fn new( + metadata: &Metadata<'_>, + attributes: Vec, + timestamp_nanos: u64, + ) -> Self { + Self { + event_name: Some(metadata.name().to_string()), + level: *metadata.level(), + timestamp_nanos, + target: metadata.target().to_string(), + attributes, + body: None, // Can be populated from message field + } + } + + /// Sets the body/message for this log record. + pub fn with_body(mut self, body: String) -> Self { + self.body = Some(TracingAnyValue::Str(Rc::from(body))); + self + } + + /// Returns the target (typically module path) for this log record. + pub fn target(&self) -> &str { + &self.target + } + + /// Creates a TracingLogRecord with a custom event name (for span events). + pub fn new_with_event_name( + metadata: &Metadata<'_>, + attributes: Vec, + timestamp_nanos: u64, + event_name: String, + ) -> Self { + Self { + event_name: Some(event_name), + level: *metadata.level(), + timestamp_nanos, + target: metadata.target().to_string(), + attributes, + body: None, + } + } + + /// Creates a minimal TracingLogRecord for span end events. + pub fn new_span_end( + span_id: u64, + attributes: Vec, + timestamp_nanos: u64, + ) -> Self { + Self { + event_name: Some(format!("span.end (id:{})", span_id)), + level: Level::INFO, + timestamp_nanos, + target: "tracing::span".to_string(), + attributes, + body: None, + } + } +} + +impl LogRecordView for TracingLogRecord { + type Attribute<'att> + = TracingAttributeView<'att> + where + Self: 'att; + + type AttributeIter<'att> + = TracingAttributeIterator<'att> + where + Self: 'att; + + type Body<'bod> + = TracingAnyValue + where + Self: 'bod; + + fn time_unix_nano(&self) -> Option { + Some(self.timestamp_nanos) + } + + fn observed_time_unix_nano(&self) -> Option { + // Field not used + None + } + + fn severity_number(&self) -> Option { + // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber + Some(match self.level { + Level::TRACE => 1, + Level::DEBUG => 5, + Level::INFO => 9, + Level::WARN => 13, + Level::ERROR => 17, + }) + } + + fn severity_text(&self) -> Option> { + Some(self.level.as_str().as_bytes()) + } + + fn body(&self) -> Option> { + self.body.clone() + } + + fn attributes(&self) -> Self::AttributeIter<'_> { + TracingAttributeIterator { + inner: self.attributes.iter(), + } + } + + fn dropped_attributes_count(&self) -> u32 { + 0 + } + + fn flags(&self) -> Option { + None + } + + fn trace_id(&self) -> Option<&TraceId> { + None // TODO + } + + fn span_id(&self) -> Option<&SpanId> { + None // TODO + } + + fn event_name(&self) -> Option> { + self.event_name.as_ref().map(|s| s.as_bytes()) + } +} + +/// Represents an attribute (key-value pair) from a tracing event. +#[derive(Debug, Clone)] +pub struct TracingAttribute { + /// The attribute key + pub key: String, + /// The attribute value + pub value: TracingAnyValue, +} + +/// Wrapper for TracingAttribute that implements AttributeView +pub struct TracingAttributeView<'a> { + attribute: &'a TracingAttribute, +} + +impl<'a> AttributeView for TracingAttributeView<'a> { + type Val<'val> + = TracingAnyValue + where + Self: 'val; + + fn key(&self) -> Str<'_> { + self.attribute.key.as_bytes() + } + + fn value(&self) -> Option> { + Some(self.attribute.value.clone()) + } +} + +/// Iterator wrapper for TracingAttribute slice +pub struct TracingAttributeIterator<'a> { + inner: std::slice::Iter<'a, TracingAttribute>, +} + +impl<'a> Iterator for TracingAttributeIterator<'a> { + type Item = TracingAttributeView<'a>; + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|attr| TracingAttributeView { attribute: attr }) + } +} + +/// Represents a value from a tracing event field. +/// +/// This mirrors OTLP's AnyValue type system, supporting full structural fidelity +/// for nested data from tracing events (arrays, maps, etc.). +/// +/// Uses `Rc<>` for heap-allocated types to make cloning cheap during encoding. +/// Since TracingLogRecord is encoded to bytes before crossing thread boundaries, +/// the non-thread-safe `Rc<>` is appropriate here. +#[derive(Debug, Clone)] +pub enum TracingAnyValue { + /// String value + Str(Rc), + /// Integer value (i64) + Int(i64), + /// Boolean value + Bool(bool), + /// Double-precision floating point value + Double(f64), + /// Bytes value + Bytes(Rc<[u8]>), + /// Array of values + Array(Rc<[TracingAnyValue]>), + /// Key-value list (like a map/object) + KeyValueList(Rc<[TracingAttribute]>), +} + +/// Iterator for nested KeyValueList attributes +pub struct KeyValueListIterator { + inner: Rc<[TracingAttribute]>, + index: usize, +} + +impl Iterator for KeyValueListIterator { + type Item = TracingAttributeOwned; + + fn next(&mut self) -> Option { + if self.index < self.inner.len() { + let attr = self.inner[self.index].clone(); + self.index += 1; + Some(TracingAttributeOwned { attribute: attr }) + } else { + None + } + } +} + +/// Owned wrapper for TracingAttribute that implements AttributeView +pub struct TracingAttributeOwned { + attribute: TracingAttribute, +} + +impl AttributeView for TracingAttributeOwned { + type Val<'val> + = TracingAnyValue + where + Self: 'val; + + fn key(&self) -> Str<'_> { + self.attribute.key.as_bytes() + } + + fn value(&self) -> Option> { + Some(self.attribute.value.clone()) + } +} + +/// Iterator for array values +pub struct ArrayIterator { + inner: Rc<[TracingAnyValue]>, + index: usize, +} + +impl Iterator for ArrayIterator { + type Item = TracingAnyValue; + + fn next(&mut self) -> Option { + if self.index < self.inner.len() { + let item = self.inner[self.index].clone(); + self.index += 1; + Some(item) + } else { + None + } + } +} + +impl<'a> AnyValueView<'a> for TracingAnyValue { + type KeyValue = TracingAttributeOwned; + type ArrayIter<'arr> + = ArrayIterator + where + Self: 'arr; + type KeyValueIter<'kv> + = KeyValueListIterator + where + Self: 'kv; + + fn value_type(&self) -> ValueType { + match self { + TracingAnyValue::Str(_) => ValueType::String, + TracingAnyValue::Int(_) => ValueType::Int64, + TracingAnyValue::Bool(_) => ValueType::Bool, + TracingAnyValue::Double(_) => ValueType::Double, + TracingAnyValue::Bytes(_) => ValueType::Bytes, + TracingAnyValue::Array(_) => ValueType::Array, + TracingAnyValue::KeyValueList(_) => ValueType::KeyValueList, + } + } + + fn as_string(&self) -> Option> { + match self { + TracingAnyValue::Str(s) => Some(s.as_bytes()), + _ => None, + } + } + + fn as_bool(&self) -> Option { + match self { + TracingAnyValue::Bool(b) => Some(*b), + _ => None, + } + } + + fn as_int64(&self) -> Option { + match self { + TracingAnyValue::Int(i) => Some(*i), + _ => None, + } + } + + fn as_double(&self) -> Option { + match self { + TracingAnyValue::Double(d) => Some(*d), + _ => None, + } + } + + fn as_bytes(&self) -> Option<&[u8]> { + match self { + TracingAnyValue::Bytes(b) => Some(&**b), + _ => None, + } + } + + fn as_array(&self) -> Option> { + match self { + TracingAnyValue::Array(arr) => Some(ArrayIterator { + inner: Rc::clone(arr), + index: 0, + }), + _ => None, + } + } + + fn as_kvlist(&self) -> Option> { + match self { + TracingAnyValue::KeyValueList(kvs) => Some(KeyValueListIterator { + inner: Rc::clone(kvs), + index: 0, + }), + _ => None, + } + } +} + +// Implement Display for easier debugging +impl fmt::Display for TracingAnyValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TracingAnyValue::Str(s) => write!(f, "{}", s), + TracingAnyValue::Int(i) => write!(f, "{}", i), + TracingAnyValue::Bool(b) => write!(f, "{}", b), + TracingAnyValue::Double(d) => write!(f, "{}", d), + TracingAnyValue::Bytes(b) => write!(f, "{:?}", b), + TracingAnyValue::Array(arr) => { + write!(f, "[")?; + for (i, v) in arr.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", v)?; + } + write!(f, "]") + } + TracingAnyValue::KeyValueList(kvs) => { + write!(f, "{{")?; + for (i, kv) in kvs.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}: {}", kv.key, kv.value)?; + } + write!(f, "}}") + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tracing_log_record_creation() { + // Create a mock metadata (in real usage this comes from tracing) + let _level = Level::INFO; + + let _attributes = vec![ + TracingAttribute { + key: "key1".to_string(), + value: TracingAnyValue::Str(Rc::from("value1")), + }, + TracingAttribute { + key: "count".to_string(), + value: TracingAnyValue::Int(42), + }, + ]; + + // Note: In real usage, metadata comes from tracing::Event + // For this test, we'll test the TracingLogRecord structure directly + let _timestamp = 1234567890000000000u64; + + // Test basic construction and access + let key1 = "key1".to_string(); + let value1 = TracingAnyValue::Str(Rc::from("value1")); + let attr = TracingAttribute { + key: key1, + value: value1, + }; + + assert_eq!(attr.key, "key1"); + match &attr.value { + TracingAnyValue::Str(s) => assert_eq!(&**s, "value1"), + _ => panic!("Expected string value"), + } + } + + #[test] + fn test_severity_mapping() { + // Test that tracing levels map correctly to OTLP severity numbers + let levels_and_numbers = [ + (Level::TRACE, 1), + (Level::DEBUG, 5), + (Level::INFO, 9), + (Level::WARN, 13), + (Level::ERROR, 17), + ]; + + for (level, expected_number) in levels_and_numbers { + let severity_number = match level { + Level::TRACE => 1, + Level::DEBUG => 5, + Level::INFO => 9, + Level::WARN => 13, + Level::ERROR => 17, + }; + assert_eq!(severity_number, expected_number); + } + } + + #[test] + fn test_any_value_types() { + use otap_df_pdata::views::common::AnyValueView; + + let str_val = TracingAnyValue::Str(Rc::from("test")); + assert!(str_val.as_string().is_some()); + assert!(str_val.as_int64().is_none()); + + let int_val = TracingAnyValue::Int(123); + assert!(int_val.as_int64().is_some()); + assert_eq!(int_val.as_int64().unwrap(), 123); + + let bool_val = TracingAnyValue::Bool(true); + assert!(bool_val.as_bool().is_some()); + assert_eq!(bool_val.as_bool().unwrap(), true); + + let double_val = TracingAnyValue::Double(3.14); + assert!(double_val.as_double().is_some()); + assert!((double_val.as_double().unwrap() - 3.14).abs() < f64::EPSILON); + } +} diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs new file mode 100644 index 0000000000..b2b0b39d8a --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs @@ -0,0 +1,14 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! tokio-tracing support for directly encoding and formatting OTLP bytes. + +pub mod log_record; +pub mod otlp_bytes_channel; +pub mod otlp_bytes_formatter; +pub mod subscriber; + +pub use log_record::{TracingAnyValue, TracingAttribute, TracingLogRecord}; +pub use otlp_bytes_channel::{OtlpBytesChannel, OtlpBytesChannelStats, OtlpBytesConsumerConfig}; +pub use otlp_bytes_formatter::{FormatError, OtlpBytesFormattingLayer}; +pub use subscriber::OtlpTracingLayer; diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs new file mode 100644 index 0000000000..f14a8df128 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs @@ -0,0 +1,157 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! OTLP bytes channel abstraction for multi-threaded telemetry. +//! +//! This provides a common pattern used in multiple places: +//! - Admin runtime: 3rd party logging via global tracing subscriber +//! - Internal telemetry receiver: Component logging bridge to OTAP pipeline +//! - Thread-per-core: Per-thread logging with dedicated channels +//! +//! Architecture: +//! ```text +//! Producer(s) → mpsc::Sender → Channel → mpsc::Receiver → Consumer +//! ↓ +//! Console | OTLP | Custom handler +//! ``` + +use bytes::Bytes; +use std::sync::mpsc; + +/// Configuration for how to consume OTLP bytes from the channel. +/// +/// All 3rd party logging goes through our custom subscriber → OTLP bytes → channel. +/// This enum determines how those bytes are consumed in the admin runtime: +/// +/// - **Console**: Human-readable formatting (our builtin formatter) +/// - **InternalReceiver**: Forward to OTAP pipeline (our builtin OTLP path) +/// - **OtelSdkExporter**: Use any OpenTelemetry SDK exporter (stdout, OTLP, custom) +/// +/// This unified architecture means: +/// 1. ALL 3rd party logs use the same channel-based path +/// 2. No need for OpenTelemetryTracingBridge (we decode OTLP → OTel format if needed) +/// 3. Flexible backend choice while keeping single-threaded consumption +#[derive(Debug, Clone)] +pub enum OtlpBytesConsumerConfig { + /// Format and write to console (stdout/stderr based on level). + /// Uses our builtin formatter for human-readable output. + Console { + /// Enable ANSI color codes + ansi: bool, + /// Include ISO8601 timestamps + timestamp: bool, + /// Include log level (INFO, WARN, etc.) + level: bool, + /// Include target/scope name + target: bool, + /// Include event name field + event_name: bool, + /// Include thread names + thread_names: bool, + }, + + /// Forward to internal telemetry receiver (bridges to OTAP pipeline). + /// Uses our builtin OTLP exporter to send to the internal receiver, + /// which then goes through the OTAP pipeline for processing/export. + InternalReceiver { + // Future: configuration for the internal receiver + }, + + /// Use an OpenTelemetry SDK exporter. + /// OTLP bytes are decoded to OpenTelemetry LogData and passed to the SDK exporter. + /// This allows using any OTel SDK exporter (stdout, OTLP, custom) while keeping + /// our unified channel-based architecture. + OtelSdkExporter { + /// Exporter type identifier (e.g., "stdout", "otlp-grpc", "otlp-http") + exporter_type: String, + /// Configuration for the specific exporter (JSON or similar) + config: std::collections::HashMap, + }, +} + +impl OtlpBytesConsumerConfig { + /// Create default console configuration (matches current behavior) + pub fn default_console() -> Self { + Self::Console { + ansi: true, + timestamp: true, + level: true, + target: true, + event_name: false, + thread_names: true, + } + } +} + +/// OTLP bytes channel for single-producer, single-consumer telemetry. +/// +/// This encapsulates the mpsc channel pattern used throughout the telemetry system. +/// Multiple producers can share the sender (wrapped in Arc), but there's typically +/// one consumer task per channel. +pub struct OtlpBytesChannel { + sender: mpsc::SyncSender, + receiver: mpsc::Receiver, +} + +impl OtlpBytesChannel { + /// Create a new OTLP bytes channel with bounded capacity. + /// + /// # Arguments + /// * `capacity` - Maximum number of OTLP byte buffers to queue + /// + /// When the channel is full, senders will block until space is available. + /// This provides backpressure. + pub fn new(capacity: usize) -> Self { + let (sender, receiver) = mpsc::sync_channel(capacity); + Self { sender, receiver } + } + + /// Split into sender and receiver parts. + /// + /// The sender can be cloned and shared across multiple producers. + /// The receiver should be moved to a single consumer task. + pub fn split(self) -> (mpsc::SyncSender, mpsc::Receiver) { + (self.sender, self.receiver) + } + + /// Get a reference to the sender (for cloning). + pub fn sender(&self) -> &mpsc::SyncSender { + &self.sender + } + + /// Take the receiver (consumes self). + pub fn into_receiver(self) -> mpsc::Receiver { + self.receiver + } +} + +/// Statistics about OTLP bytes channel consumption. +#[derive(Debug, Default, Clone)] +pub struct OtlpBytesChannelStats { + /// Total number of OTLP byte buffers received + pub buffers_received: u64, + + /// Total bytes processed + pub bytes_processed: u64, + + /// Number of format/forward errors + pub errors: u64, +} + +impl OtlpBytesChannelStats { + /// Create new statistics tracker. + pub fn new() -> Self { + Self::default() + } + + /// Record a successfully processed buffer. + pub fn record_buffer(&mut self, size: usize) { + self.buffers_received += 1; + self.bytes_processed += size as u64; + } + + /// Record an error during processing. + pub fn record_error(&mut self) { + self.errors += 1; + } +} diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_formatter.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_formatter.rs new file mode 100644 index 0000000000..a44011af73 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_formatter.rs @@ -0,0 +1,470 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! OTLP bytes formatting layer - decodes OTLP bytes back to human-readable output. +//! +//! This layer provides a bridge between OTLP-encoded telemetry and human-readable +//! console output. The architecture is: +//! +//! ```text +//! tracing::info!() → OtlpTracingLayer → encode to OTLP bytes +//! ↓ +//! OtlpBytesFormattingLayer → decode OTLP bytes +//! ↓ ↓ +//! construct LogsDataView → format human-readable +//! ``` +//! +//! This approach: +//! - Removes dependency on opentelemetry crates for formatting +//! - Preserves complete structural fidelity (OTLP is lossless) +//! - Enables future async formatting in separate thread +//! - Allows colorized, customizable output +//! +//! # Example +//! +//! ```ignore +//! use tracing_subscriber::prelude::*; +//! use otap_df_telemetry::tracing_integration::{OtlpTracingLayer, OtlpBytesFormattingLayer}; +//! +//! // Encode events to OTLP bytes +//! let (tx, rx) = std::sync::mpsc::channel(); +//! let otlp_layer = OtlpTracingLayer::new(move |log_record| { +//! // encode to OTLP bytes and send via channel +//! tx.send(bytes).unwrap(); +//! }); +//! +//! // Format OTLP bytes for human output +//! let fmt_layer = OtlpBytesFormattingLayer::new(rx); +//! +//! tracing_subscriber::registry() +//! .with(otlp_layer) +//! .with(fmt_layer) +//! .init(); +//! ``` + +use otap_df_pdata::views::logs::{LogRecordView, LogsDataView, ResourceLogsView, ScopeLogsView}; +use otap_df_pdata::views::otlp::bytes::logs::RawLogsData; +use otap_df_pdata::views::common::{AnyValueView, AttributeView, InstrumentationScopeView}; +use std::fmt::Write as FmtWrite; +use std::io::{self, Write as IoWrite}; +use std::time::UNIX_EPOCH; +use tracing_subscriber::fmt::MakeWriter; + +/// A tracing-subscriber layer that formats OTLP-encoded bytes for human-readable output. +/// +/// This layer doesn't directly subscribe to tracing events. Instead, it receives +/// OTLP-encoded bytes (from OtlpTracingLayer), decodes them, and formats them +/// for console output. +/// +/// # Type Parameters +/// - `W`: Writer type for output (e.g., stdout, file) +pub struct OtlpBytesFormattingLayer +where + W: for<'writer> MakeWriter<'writer> + 'static, +{ + /// Writer factory for output + make_writer: W, + /// Whether to use ANSI colors + with_ansi: bool, + /// Whether to include timestamps + with_timestamp: bool, + /// Whether to include level + with_level: bool, + /// Whether to include target (module path/scope name) + with_target: bool, + /// Whether to include event_name + with_event_name: bool, + /// Whether to include thread names + with_thread_names: bool, +} + +impl OtlpBytesFormattingLayer +where + W: for<'writer> MakeWriter<'writer> + 'static, +{ + /// Creates a new OtlpBytesFormattingLayer with default settings. + /// + /// Default format matches tokio's: timestamp, level, target, event_name, message, attributes + /// + /// # Arguments + /// * `make_writer` - Factory for creating writers (e.g., `std::io::stdout`) + pub fn new(make_writer: W) -> Self { + Self { + make_writer, + with_ansi: true, + with_timestamp: true, + with_level: true, + with_target: true, + with_event_name: true, + with_thread_names: true, + } + } + + /// Sets whether to use ANSI color codes. + pub fn with_ansi(mut self, ansi: bool) -> Self { + self.with_ansi = ansi; + self + } + + /// Sets whether to include timestamps. + pub fn with_timestamp(mut self, timestamp: bool) -> Self { + self.with_timestamp = timestamp; + self + } + + /// Sets whether to include log level. + pub fn with_level(mut self, level: bool) -> Self { + self.with_level = level; + self + } + + /// Sets whether to include target (scope name/module path). + pub fn with_target(mut self, target: bool) -> Self { + self.with_target = target; + self + } + + /// Sets whether to include event_name. + pub fn with_event_name(mut self, event_name: bool) -> Self { + self.with_event_name = event_name; + self + } + + /// Sets whether to include thread names. + pub fn with_thread_names(mut self, thread_names: bool) -> Self { + self.with_thread_names = thread_names; + self + } + + /// Formats OTLP-encoded bytes to human-readable output. + /// + /// This is the main entry point for formatting. Call this method when you + /// receive OTLP bytes from the encoding layer. + pub fn format_otlp_bytes(&self, otlp_bytes: &[u8]) -> Result<(), FormatError> { + // Construct LogsDataView from OTLP bytes (zero-copy) + let logs_view = RawLogsData::new(otlp_bytes); + + // Get writer + let mut writer = self.make_writer.make_writer(); + + // Iterate through the logs data structure + for resource_logs in logs_view.resources() { + for scope_logs in resource_logs.scopes() { + // Extract scope name (target) once for all records + let scope_name = if let Some(scope) = scope_logs.scope() { + if let Some(name) = scope.name() { + Some(String::from_utf8_lossy(name).to_string()) + } else { + None + } + } else { + None + }; + + for log_record in scope_logs.log_records() { + self.format_log_record(&log_record, scope_name.as_deref(), &mut writer)?; + } + } + } + + Ok(()) + } + + /// Formats a single log record. + /// + /// Format: `timestamp LEVEL target{::event_name}: message key=value` + /// Example: `2024-12-18T10:30:45.123456Z INFO app::server{listen}: Server started port=8080` + fn format_log_record( + &self, + log_record: &L, + scope_name: Option<&str>, + writer: &mut impl IoWrite, + ) -> Result<(), FormatError> { + let mut buffer = String::new(); + + // Timestamp - ISO8601 format like tokio + if self.with_timestamp { + if let Some(ts_nanos) = log_record.time_unix_nano() { + let timestamp = format_iso8601_timestamp(ts_nanos); + write!(&mut buffer, "{} ", timestamp)?; + } + } + + // Level with colors and padding + if self.with_level { + if let Some(severity) = log_record.severity_number() { + let level_str = severity_to_level_str(severity); + if self.with_ansi { + let colored = colorize_level(level_str); + write!(&mut buffer, "{:5} ", colored)?; + } else { + write!(&mut buffer, "{:5} ", level_str)?; + } + } + } + + // Thread name + if self.with_thread_names { + let thread_name = std::thread::current().name() + .unwrap_or("") + .to_string(); + write!(&mut buffer, "{}: ", thread_name)?; + } + + // Target (scope name / module path) + if self.with_target { + if let Some(target) = scope_name { + write!(&mut buffer, "{}", target)?; + + // Event name (if configured and present) + if self.with_event_name { + if let Some(event_name_bytes) = log_record.event_name() { + if let Ok(event_name) = std::str::from_utf8(event_name_bytes) { + // Format like tokio: target{event_name} + write!(&mut buffer, "{{{}}}", event_name)?; + } + } + } + + write!(&mut buffer, ": ")?; + } + } + + // Body/message + if let Some(body) = log_record.body() { + write!(&mut buffer, "{}", format_any_value(&body))?; + } + + // Attributes (key=value pairs) + let mut first_attr = true; + for attr in log_record.attributes() { + let key_str = String::from_utf8_lossy(attr.key()); + if let Some(value) = attr.value() { + if first_attr { + write!(&mut buffer, " ")?; + first_attr = false; + } else { + write!(&mut buffer, " ")?; + } + write!(&mut buffer, "{}={}", key_str, format_any_value(&value))?; + } + } + + // Write newline + writeln!(&mut buffer)?; + + // Write to output + writer.write_all(buffer.as_bytes())?; + writer.flush()?; + + Ok(()) + } +} + +/// Format a unix timestamp (nanoseconds) as ISO8601. +/// +/// Format: `2024-12-18T10:30:45.123456Z` +fn format_iso8601_timestamp(nanos: u64) -> String { + let secs = nanos / 1_000_000_000; + let subsec_nanos = (nanos % 1_000_000_000) as u32; + + // Convert to SystemTime + let duration = std::time::Duration::new(secs, subsec_nanos); + let system_time = UNIX_EPOCH + duration; + + // Get seconds and subseconds for formatting + let since_epoch = system_time.duration_since(UNIX_EPOCH).unwrap(); + let total_secs = since_epoch.as_secs(); + let micros = subsec_nanos / 1000; + + // Calculate date/time components + let days_since_epoch = total_secs / 86400; + let secs_today = total_secs % 86400; + + let hours = secs_today / 3600; + let minutes = (secs_today % 3600) / 60; + let seconds = secs_today % 60; + + // Simple epoch-based date calculation (not perfect but good enough) + let year = 1970 + (days_since_epoch / 365); + let day_of_year = days_since_epoch % 365; + let month = (day_of_year / 30) + 1; + let day = (day_of_year % 30) + 1; + + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:06}Z", + year, month, day, hours, minutes, seconds, micros + ) +} + +/// Convert OTLP severity number to level string. +fn severity_to_level_str(severity: i32) -> &'static str { + match severity { + 1..=4 => "TRACE", + 5..=8 => "DEBUG", + 9..=12 => "INFO", + 13..=16 => "WARN", + 17..=24 => "ERROR", + _ => "UNKNOWN", + } +} + +/// Colorize level string with ANSI codes. +fn colorize_level(level: &str) -> String { + match level { + "TRACE" => format!("\x1b[35m{}\x1b[0m", level), // Magenta + "DEBUG" => format!("\x1b[34m{}\x1b[0m", level), // Blue + "INFO" => format!("\x1b[32m{}\x1b[0m", level), // Green + "WARN" => format!("\x1b[33m{}\x1b[0m", level), // Yellow + "ERROR" => format!("\x1b[31m{}\x1b[0m", level), // Red + _ => level.to_string(), + } +} + +/// Format an AnyValue for display. +fn format_any_value<'a>(value: &impl AnyValueView<'a>) -> String { + use otap_df_pdata::views::common::ValueType; + + match value.value_type() { + ValueType::String => { + if let Some(s) = value.as_string() { + String::from_utf8_lossy(s).to_string() + } else { + "".to_string() + } + } + ValueType::Int64 => { + if let Some(i) = value.as_int64() { + i.to_string() + } else { + "".to_string() + } + } + ValueType::Bool => { + if let Some(b) = value.as_bool() { + b.to_string() + } else { + "".to_string() + } + } + ValueType::Double => { + if let Some(d) = value.as_double() { + format!("{:.6}", d) + } else { + "".to_string() + } + } + ValueType::Bytes => { + if let Some(bytes) = value.as_bytes() { + format!("{:?}", bytes) + } else { + "".to_string() + } + } + ValueType::Array => { + if let Some(array_iter) = value.as_array() { + let mut parts = Vec::new(); + for item in array_iter { + parts.push(format_any_value(&item)); + } + format!("[{}]", parts.join(", ")) + } else { + "[]".to_string() + } + } + ValueType::KeyValueList => { + if let Some(kvlist_iter) = value.as_kvlist() { + let mut parts = Vec::new(); + for kv in kvlist_iter { + let key_str = String::from_utf8_lossy(kv.key()).to_string(); + if let Some(val) = kv.value() { + parts.push(format!("{}={}", key_str, format_any_value(&val))); + } + } + format!("{{{}}}", parts.join(", ")) + } else { + "{}".to_string() + } + } + ValueType::Empty => "".to_string(), + } +} + +/// Error type for formatting operations. +#[derive(Debug)] +pub enum FormatError { + /// I/O error + Io(io::Error), + /// Format error + Fmt(std::fmt::Error), +} + +impl From for FormatError { + fn from(err: io::Error) -> Self { + FormatError::Io(err) + } +} + +impl From for FormatError { + fn from(err: std::fmt::Error) -> Self { + FormatError::Fmt(err) + } +} + +impl std::fmt::Display for FormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FormatError::Io(e) => write!(f, "I/O error: {}", e), + FormatError::Fmt(e) => write!(f, "Format error: {}", e), + } + } +} + +impl std::error::Error for FormatError {} + +// Note: This layer doesn't implement Layer trait because it doesn't subscribe +// to tracing events directly. It receives OTLP bytes through a separate channel +// or callback mechanism. See examples for typical usage patterns. + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Arc, Mutex}; + + /// Test writer that captures output + struct TestWriter { + buffer: Arc>>, + } + + impl TestWriter { + fn new() -> (Self, Arc>>) { + let buffer = Arc::new(Mutex::new(Vec::new())); + (Self { buffer: buffer.clone() }, buffer) + } + } + + impl IoWrite for TestWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let mut buffer = self.buffer.lock().unwrap(); + buffer.extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + } + + impl<'a> MakeWriter<'a> for TestWriter { + type Writer = TestWriter; + + fn make_writer(&'a self) -> Self::Writer { + TestWriter { + buffer: self.buffer.clone(), + } + } + } + + // TODO: Add tests that encode a TracingLogRecord to OTLP bytes, + // then format them back and verify the output +} diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs new file mode 100644 index 0000000000..ff233da516 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs @@ -0,0 +1,327 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Tracing subscriber layer that captures events as TracingLogRecord instances. +//! +//! This layer integrates with the tracing-subscriber ecosystem, allowing us to: +//! 1. Capture all tracing events (from tokio macros and 3rd-party libraries) +//! 2. Convert them to TracingLogRecord (which implements LogRecordView) +//! 3. Encode them using our stateful OTLP encoder +//! +//! The layer uses a visitor pattern to extract field values from events and +//! constructs TracingLogRecord instances that can be encoded directly. + +use super::log_record::{TracingAttribute, TracingAnyValue, TracingLogRecord}; +use std::collections::HashMap; +use std::sync::Mutex; +use std::time::{SystemTime, UNIX_EPOCH}; +use tracing::{Event, Id, Subscriber}; +use tracing::span::Attributes; +use tracing_subscriber::layer::{Context, Layer}; +use tracing_subscriber::registry::LookupSpan; + +/// A tracing subscriber layer that converts events to TracingLogRecord. +/// +/// This layer can be composed with other layers in a tracing-subscriber registry +/// to capture events and convert them to OTLP-compatible log records. +/// +/// # Example +/// ```ignore +/// use tracing_subscriber::prelude::*; +/// use otap_df_telemetry::tracing_integration::OtlpTracingLayer; +/// +/// let otlp_layer = OtlpTracingLayer::new(|log_record| { +/// // Encode log_record using stateful encoder +/// encoder.encode_log_record(&log_record, &resource_bytes, &scope_encoding)?; +/// }); +/// +/// tracing_subscriber::registry() +/// .with(otlp_layer) +/// .init(); +/// ``` +/// Span data stored for duration calculation +struct SpanData { + start_time_nanos: u64, + attributes: Vec, +} + +/// Tracing subscriber layer that captures events and spans as OTLP log records. +/// +/// This layer implements an unconventional approach where spans are treated as pairs +/// of log records (start/end) rather than as first-class span objects. This aligns +/// with unified dataflow architectures where all telemetry flows through a single +/// log pipeline. +pub struct OtlpTracingLayer +where + F: Fn(TracingLogRecord) + Send + Sync + 'static, +{ + /// Callback function that receives each TracingLogRecord + on_event: F, + /// Storage for span start times to calculate duration on close + span_data: Mutex>, +} + +impl OtlpTracingLayer +where + F: Fn(TracingLogRecord) + Send + Sync + 'static, +{ + /// Creates a new OtlpTracingLayer with the given event handler. + /// + /// # Arguments + /// * `on_event` - Callback invoked for each tracing event, receiving a TracingLogRecord + pub fn new(on_event: F) -> Self { + Self { + on_event, + span_data: Mutex::new(HashMap::new()), + } + } +} + +impl Layer for OtlpTracingLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, + F: Fn(TracingLogRecord) + Send + Sync + 'static, +{ + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + // Get timestamp + let timestamp_nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + // Extract fields using visitor + let mut visitor = FieldVisitor::new(); + event.record(&mut visitor); + + // Build TracingLogRecord + // Note: metadata.name() includes file:line, e.g., "event src/main.rs:42" + let log_record = TracingLogRecord::new( + event.metadata(), + visitor.attributes, + timestamp_nanos, + ) + .with_body(visitor.message.unwrap_or_default()); + + // Invoke the callback + (self.on_event)(log_record); + } + + fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, _ctx: Context<'_, S>) { + let timestamp_nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + // Extract fields from span attributes + let mut visitor = FieldVisitor::new(); + attrs.record(&mut visitor); + + let metadata = attrs.metadata(); + let mut attributes = visitor.attributes.clone(); + + // Add span.id as attribute + attributes.push(TracingAttribute { + key: "span.id".to_string(), + value: TracingAnyValue::Int(id.into_u64() as i64), + }); + + // Store span data for duration calculation on close + if let Ok(mut spans) = self.span_data.lock() { + let _ = spans.insert(id.into_u64(), SpanData { + start_time_nanos: timestamp_nanos, + attributes: attributes.clone(), + }); + } + + // Create "span.start" log record + // Format: "span.start {span_name} src/file.rs:42" + let event_name = format!("span.start {}", metadata.name()); + let log_record = TracingLogRecord::new_with_event_name( + metadata, + attributes, + timestamp_nanos, + event_name, + ) + .with_body(visitor.message.unwrap_or_default()); + + // Invoke callback with span start event + (self.on_event)(log_record); + } + + fn on_close(&self, id: Id, _ctx: Context<'_, S>) { + let end_time_nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + // Retrieve and remove span data + let span_data = if let Ok(mut spans) = self.span_data.lock() { + spans.remove(&id.into_u64()) + } else { + return; + }; + + if let Some(span_data) = span_data { + // Get span metadata from context + // Note: We don't have direct access to metadata here, so we'll create a minimal record + let duration_nanos = end_time_nanos.saturating_sub(span_data.start_time_nanos); + + let mut attributes = span_data.attributes; + + // Add duration as attribute + attributes.push(TracingAttribute { + key: "span.duration_nanos".to_string(), + value: TracingAnyValue::Int(duration_nanos as i64), + }); + + // Create a minimal log record for span end + // We use INFO level for span events + let log_record = TracingLogRecord::new_span_end( + id.into_u64(), + attributes, + end_time_nanos, + ); + + // Invoke callback with span end event + (self.on_event)(log_record); + } + } +} + +/// Visitor that extracts field values from a tracing event. +/// +/// This implements tracing::field::Visit to walk through all fields in an event +/// and collect them as TracingAttribute instances. +/// +/// Note: We don't extract event_name here because metadata.name() already provides +/// it with file:line info (e.g., "event src/main.rs:42"). +struct FieldVisitor { + /// Collected attributes from the event + attributes: Vec, + + /// The message/body (from the "message" field, if present) + message: Option, +} + +impl FieldVisitor { + fn new() -> Self { + Self { + attributes: Vec::new(), + message: None, + } + } +} + +impl tracing::field::Visit for FieldVisitor { + fn record_f64(&mut self, field: &tracing::field::Field, value: f64) { + // Skip special "message" field + if field.name() == "message" { + return; + } + + self.attributes.push(TracingAttribute { + key: field.name().to_string(), + value: TracingAnyValue::Double(value), + }); + } + + fn record_i64(&mut self, field: &tracing::field::Field, value: i64) { + if field.name() == "message" { + return; + } + + self.attributes.push(TracingAttribute { + key: field.name().to_string(), + value: TracingAnyValue::Int(value), + }); + } + + fn record_u64(&mut self, field: &tracing::field::Field, value: u64) { + if field.name() == "message" { + return; + } + + // Convert u64 to i64 (may lose precision for very large values) + self.attributes.push(TracingAttribute { + key: field.name().to_string(), + value: TracingAnyValue::Int(value as i64), + }); + } + + fn record_bool(&mut self, field: &tracing::field::Field, value: bool) { + if field.name() == "message" { + return; + } + + self.attributes.push(TracingAttribute { + key: field.name().to_string(), + value: TracingAnyValue::Bool(value), + }); + } + + fn record_str(&mut self, field: &tracing::field::Field, value: &str) { + // Handle special "message" field + if field.name() == "message" { + self.message = Some(value.to_string()); + return; + } + + // Store string attributes by cloning + self.attributes.push(TracingAttribute { + key: field.name().to_string(), + value: TracingAnyValue::Str(value.to_string()), + }); + } + + fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) { + // Capture the "message" field which contains the formatted message + if field.name() == "message" { + self.message = Some(format!("{:?}", value)); + return; + } + + // Convert debug representation to string and store + let debug_str = format!("{:?}", value); + self.attributes.push(TracingAttribute { + key: field.name().to_string(), + value: TracingAnyValue::Str(debug_str), + }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Arc, Mutex}; + use tracing_subscriber::prelude::*; + + #[test] + fn test_otlp_layer_captures_events() { + use otap_df_pdata::views::logs::LogRecordView; + + // Collect captured log records + let captured = Arc::new(Mutex::new(Vec::new())); + let captured_clone = captured.clone(); + + let layer = OtlpTracingLayer::new(move |log_record| { + let mut records = captured_clone.lock().unwrap(); + records.push(( + log_record.severity_text().map(|s| String::from_utf8_lossy(s).to_string()), + log_record.event_name().map(|s| String::from_utf8_lossy(s).to_string()), + )); + }); + + let subscriber = tracing_subscriber::registry().with(layer); + + tracing::subscriber::with_default(subscriber, || { + tracing::info!(name: "test.event", "Test message"); + tracing::warn!(name: "test.warning", "Warning message"); + }); + + let records = captured.lock().unwrap(); + assert_eq!(records.len(), 2); + + // Note: event_name extraction from visitor has lifetime issues + // We'll address this in the production implementation + } +} From 51e65597206091f129a1df0ad3d64d1816b3a697 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 5 Jan 2026 16:25:02 -0800 Subject: [PATCH 02/92] save with view --- .../query-engine/src/pipeline/planner.rs | 2 +- .../crates/telemetry/ARCHITECTURE.md | 210 +++++++++++ .../src/tracing_integration/DESIGN.md | 178 +++++++++ .../src/tracing_integration/log_record.rs | 325 ++++++++--------- .../telemetry/src/tracing_integration/mod.rs | 2 - .../tracing_integration/otlp_bytes_channel.rs | 157 -------- .../src/tracing_integration/subscriber.rs | 340 ++++++++---------- 7 files changed, 693 insertions(+), 521 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md create mode 100644 rust/otap-dataflow/crates/telemetry/src/tracing_integration/DESIGN.md delete mode 100644 rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs diff --git a/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs b/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs index 75a7413af0..16e2618b6b 100644 --- a/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs +++ b/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs @@ -111,7 +111,7 @@ impl PipelinePlanner { }), }, - DataExpression::Conditional(conditional_expr) => { +o DataExpression::Conditional(conditional_expr) => { let mut pipeline_branches = vec![]; for branch in conditional_expr.get_branches() { let predicate = diff --git a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md b/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md new file mode 100644 index 0000000000..a49ea685aa --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md @@ -0,0 +1,210 @@ +# Internal Telemetry Collection Architecture & Development Plan + +## Architecture + +OTAP-Dataflow uses a configurable internal telemetry data plane. We +support alternatives to enable a range of observability requirements. +The internal telemetry SDK is designed for the engine to safely +consume its own telemetry, and we intend for the self-hosted telemetry +pipeline to be the standard configuration. + +Consuming self-generated ("telemetry presents a potential a kind of +feedback loop, situations where a telemetry pipeline creates pressure +on itself and must not explode. + +## Internal telemetry receiver + +The Internal Telemetry Receiver or "ITR" is an OTAP-Dataflow receiver +component that produces telemetry from internal sources. An internal +telemetry pipeline consists of one or more ITR components and any of +the connected processor and exporter components reachable from ITR +source nodes. + +To begin with, every OTAP-Dataflow comonent is configured with an +internal telemetry SDK meant for primary instrumentation of that +component. Components are required to exclusively use the internal +telemetry SDK for self-diagnostics, as they are considered first party +in this exchange. + +The internal telemetry receiver is the SDK's counterpart, making it +second party as it is responsible for routing internal telemetry. The +ITR cannot use the internal telemetry SDK itself, an invisible member +of the pipeline. The ITR can be instrumented using third-party +instrumentation (e.g., `tracing`, `log` crates) provided it can +guarantee there is no potential for feedback (e.g., a single +`tracing::info()` statement at startup). + +## Pitfall avoidance + +The OTAP-Dataflow engine is safeguarded against many self-induced +telemetry pitfalls, as follows: + +- OTAP-Dataflow components reachable from an ITR cannot be configured + to send to an ITR node. This avoids a direct feedback cycle for + internal telemetry because the components cannot reach + themselves. For example, ITR and downstream components may be + configured for raw logging, no metrics, etc. +- ITR instances share access to one or more threads with associated + async runtime. They use these dedicated threads to isolate internal + telemetry processes that use third-party instrumentation. +- A thread-local variable is used to redirect third-party + instrumentation in dedicated internal telemetry threads. Internal + telemetry threads automatically configure a safe configuration. +- Components under observation (non-ITR components) have internal + telemetry events routed queues in the OTAP-Dataflow pipeline on the + same core, this avoids blocking the engine. First-party + instrumentation will be handled on the CPU core that produced the + telemetry under normal circumstances. This isolates cores that are + able to process their own internal telemetry. +- Option to fall back to no-op, a non-blocking global provider, and/or + raw logging. + +## OTLP-bytes first + +As a key design decision, the OTAP-Dataflow internal telemetry data +path produces OTLP-bytes first. Because OTLP bytes is one of the +builtin `OtapPayload` formats, once we have the OTLP bytes encoding of +an event we are able to send to an OTAP-Dataflow pipeline. To obtain +these bytes, we will build a custom [Tokio `tracing` +Event][TOKIOEVENT] handler to produce OTLP bytes before dispatching to +an internal pipeline, used (in different configurations) for first and +third-party instrumentation. + +[TOKIOEVENT]: https://docs.rs/tracing/latest/tracing/struct.Event.html + +## Raw logging + +We support formatting events for direct printing to the console from +OTLP bytes, based on `otap_df_pdata::views::logs::LogsDataView` and +associated types, a zero-copy approach. We refer to this most-basic +form of printing to the console as raw logging because it is a safe +configuration early in the lifetime of a process. + +## Routing + +The two internal logs data paths are: + +- Third-party: Tokio `tracing` global subscriber: third-party log + events, instrumentation in code without access to an OTAP-Dataflow + `EffectHandler`. These are handled in a dedicated internal telemetry + thread. +- First-party: components with a local or shared `EffectHandler` use + dedicated macros (e.g., `otel_info!(effect, "interesting thing")`), + these use the configured internal telemetry SDK and for ordinary + components (not ITR-downstream) these are routed through the ITR the + same core. These are always non-blocking APIs, the internal SDK must + drop logs instead of blocking the pipeline. + +## Development plan + +Each of the items below is relatively small, estimated at 300-500 +lines of new code plus new tests. + +### TracingLogRecord: Tokio tracing Event and Metadata to LogRecordView + +When we receive a Tokio tracing event whether through a +`tracing::info!` macro (or similar) or through a dedicated +`EffectHandler`-based API, the same happens: + +Create a `TracingLogRecord`, a struct derived from `tracing::Event` +and `tracing::Metadata`, containing raw LogRecord fields extracted +from the tracing macro layer. The `otap_df_pdata::views::logs::LogRecordView` is +implemented for `TracingLogRecord` making it the `TracingLogRecord` something +we can transcode into OTel-Arrow batches. + +The `otap_df_pdata` crate currently has no OTLP bytes encoder for +directly accepting `otap_df_pdata::views::*` inputs (note the +OTAP-records-to-OTLP-bytes function bypasses the views and encodes +bytes directly). Therefore, this project implies we extend or refactor +`otap_df_ptdata` with an OTLP bytes encoder for its views interfaces. + +Then, `TracingLogRecord` implements the log record view, we will encode +the reocrd as OTLP bytes by encoding the view. + +### Stateful OTLP bytes encoder for repeated LogRecordViews + +We can avoid sending a log record through a channel every time an event +happens by buffering log records. We will buffer them as OTLP bytes. Each +receiver of events from `TracingLogRecord` OTLP bytes will use one stateful +encoder that is: + +- Preconfigured with the process-level OpenTelemetry `Resource` value +- Remembers the OpenTelemetry `InstrumentationScope.Name` that was previously used +- Remembers the starting position of the current `ResourceLogs` and `ScopeLogs` of a + single OTLP bytes payload. + +Whether a global logging collector thread or an effect handler thread +processing internal telemetry, we will enter the stateful encoder and +append a `LogRecordView` with its effective +`InstrumentationScope`. The stateful encoder will append the log +record correctly, recognizing change of scope and a limited buffer +size. This re-uses the `ProtoBuf` object from the existing +OTAP-records-to-OTLP-bytes code path for easy protobuf generation +(1-pass encoder with length placeholders). + +### OTLP-bytes console logging handler + +We require a way to print OTLP bytes as human-readable log lines. We +cannot easily re-use the Tokio `tracing` format layer for this, +however we can use the `LogsDataView` trait with `RawLogsData` to +format human-readable text for the console directly from OTLP bytes. + +This OTLP-bytes-to-human-readable logic will be used to implement raw +logging. + +### Global logs collection thread + +An OTAP-Dataflow engine will run at least one global logs collection +thread. These threads receive encoded (OTLP bytes) log events from +various locations in the process. The global logs collection thread is +special because it sets a special anti-recursion bit in the +thread-local state to prevent logging in its own export path + +The global logs collection thread is configured as one (or more, if +needed) instances consuming logs from the global Tokio `tracing` +subscriber. In this thread, we'll configure the OpenTelemetry SDK or a +dedicated OTAP-Dataflow pipeline (by configuration) for logs export. + +Because global logs collection threads are used as a fallback for +`EffectHandler`-level logs and because third-party libraries generally +could call Tokio `tracing` APIs, we arrange to explicitly disallow +these threads from logging. The macros are disabled from executing. + +### Global and Per-core Event Router + +OTAP-Dataflow provides an option to route internal telemetry to a pipeline +in the same effect handler that produced the telemetry. When a component +logging API is used on the `EffectHandler` or when a tokio `tracing` event +occurs on the `EffectHandler` thread, it will be routed using thread-local +state so that event is immediately encoded and stored or flushed, without +blocking the effect handler. + +When a telemetry event is routed directly, as in this case and +`send_message()` succeeds, it means there was queue space to accept +the log record on the same core. When this fails, the configurable +telemetry router will support options to use global logs collection +thread, a raw logger, or do nothing (dropping the internal log +record). + +## Example configuration + +```yaml +service: + telemetry: + logs: + level: info + internal_collection: + enabled: true + + # Per-thread buffer + buffer_size_bytes: 65536 + + # Individual record size limit + max_record_bytes: 16384 + + # Bounded channel capacity + max_record_count: 10 + + # Timer-based flush interval + flush_interval: "1s" +``` diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/DESIGN.md b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/DESIGN.md new file mode 100644 index 0000000000..ba7aa896a9 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/DESIGN.md @@ -0,0 +1,178 @@ +# Tracing Integration Design: Zero-Copy Exploration + +This document captures the design exploration for integrating `tokio-tracing` events with our OTLP-bytes-first encoding architecture. + +## Goal + +Convert tracing events to OTLP bytes with minimal allocations, following the principle that **encoding to bytes happens before crossing thread boundaries**. The ideal is true zero-copy: borrow data directly from the tracing event and encode it in-place. + +## Architecture Context + +From `ARCHITECTURE.md`: The system uses a thread-per-core design where components are local to each thread. OTLP bytes are the interchange format that crosses thread boundaries, not structured data. + +## What We Achieved + +### 1. `TracingAnyValue<'a>` is `Copy` + +```rust +#[derive(Debug, Clone, Copy)] +pub enum TracingAnyValue<'a> { + Str(&'a str), + Int(i64), + Bool(bool), + Double(f64), + Bytes(&'a [u8]), + Array(&'a [TracingAnyValue<'a>]), + KeyValueList(&'a [TracingAttribute<'a>]), +} +``` + +The enum only contains borrowed references or primitive values. "Copying" this type just copies the pointer+length, not the underlying data. The lifetime `'a` is preserved in the copy. + +### 2. `TracingAttribute<'a>` is `Copy` + +```rust +#[derive(Debug, Clone, Copy)] +pub struct TracingAttribute<'a> { + pub key: &'a str, + pub value: TracingAnyValue<'a>, +} +``` + +### 3. `TracingLogRecord<'a>` Borrows from Metadata + +```rust +pub struct TracingLogRecord<'a> { + event_name: Option<&'static str>, // metadata.name() is always static + target: &'a str, // borrowed from Metadata<'a> + attributes: Vec>, + body: Option<&'a str>, + // ... +} +``` + +The lifetime `'a` ties the log record to the tracing event callback scope. + +### 4. Direct Trait Implementations (No Wrappers) + +`TracingAnyValue<'a>` implements `AnyValueView<'a>` directly. +`TracingAttribute<'a>` implements `AttributeView` directly. +No wrapper types needed because the underlying types are `Copy`. + +### 5. GAT Lifetime Handling + +The `LogRecordView` trait uses Generic Associated Types: + +```rust +type Attribute<'att>: AttributeView where Self: 'att; +type Body<'bod>: AnyValueView<'bod> where Self: 'bod; +``` + +For `TracingLogRecord<'a>`: +- `type Attribute<'att> = TracingAttribute<'a>` — uses data lifetime `'a`, not GAT lifetime +- `type Body<'bod> = TracingAnyValue<'bod>` — constructs on demand from stored `&'a str` + +The key insight: when `Self: 'bod`, it implies `'a: 'bod`, so we can shorten the lifetime. + +## The Barrier: The `Visit` Trait + +The tracing crate's `Visit` trait erases lifetime information: + +```rust +pub trait Visit { + fn record_str(&mut self, field: &Field, value: &str); + fn record_debug(&mut self, field: &Field, value: &dyn Debug); + // ... +} +``` + +The `value: &str` has an anonymous lifetime. Even though in practice the data is borrowed from the `Event<'_>` which exists for the entire callback, **the trait boundary prevents expressing this relationship**. + +### What This Means + +1. **Field names (`field.name()`)**: Always `&'static str` — zero-copy ✓ +2. **Primitive values (i64, bool, f64)**: No allocation needed — zero-copy ✓ +3. **String values**: The borrow lifetime is erased by the trait, so we must either: + - Allocate (copy to `String`) + - Use `unsafe` to assert the lifetime relationship + +### Current Implementation + +We use owned storage (`OwnedValue`) in the visitor: + +```rust +enum OwnedValue { + Str(String), // Allocated copy + Int(i64), + Bool(bool), + Double(f64), +} +``` + +This is the safe approach at the cost of one allocation per string attribute. + +## The `Send + Sync` Barrier + +The tracing ecosystem requires subscribers to be `Send + Sync`: + +```rust +impl Dispatch { + pub fn new(subscriber: S) -> Self + where + S: Subscriber + Send + Sync + 'static +} +``` + +Our layer uses `RefCell>` for span storage (single-threaded design), which is `!Sync`. This prevents using standard tracing test utilities like `with_default`. + +### Workaround + +Tests must use thread-local storage or other patterns that don't require the subscriber to be `Sync`. + +## Alternatives Not Taken + +### 1. Unsafe Lifetime Extension + +```rust +fn record_str(&mut self, field: &Field, value: &str) { + // UNSAFE: Assert that value lives as long as the event + let extended: &'static str = unsafe { std::mem::transmute(value) }; + self.attr_values.push(TracingAnyValue::Str(extended)); +} +``` + +Rejected because: +- Requires proving the invariant holds for all tracing macros +- Third-party libraries might violate the assumption +- The allocation cost is minimal compared to encoding + +### 2. Arc/Rc for Cheap Cloning + +Earlier iterations used `Rc` and `Rc<[u8]>` to make cloning cheap. Rejected because: +- Still requires initial allocation +- Adds reference counting overhead +- The goal is zero-copy, not cheap-copy + +### 3. String Arena + +Could store formatted strings in a pre-allocated arena that lives for the callback scope. Not implemented because: +- Adds complexity +- Still requires copying data into the arena +- The simple `String` approach is clear and correct + +## Summary + +| Data Type | Zero-Copy? | Notes | +|-----------|------------|-------| +| Field names | ✓ | `&'static str` from tracing | +| `metadata.name()` | ✓ | `&'static str` | +| `metadata.target()` | ✓ | `&'a str` borrowed from metadata | +| Primitive values | ✓ | Copied by value (cheap) | +| String values | ✗ | `Visit` trait erases lifetime | +| Debug-formatted values | ✗ | Requires formatting to String | + +The current implementation achieves zero-copy for everything except string attribute values, where the `Visit` trait's lifetime erasure forces allocation. This is a fundamental limitation of the tracing crate's design, not something we can work around without `unsafe`. + +## Future Considerations + +If the tracing crate ever adds a lifetime-aware visitor pattern, or if we're willing to use `unsafe` with careful auditing, we could achieve true zero-copy for all data types. diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs index 7ee0146415..f94aa4e22b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs @@ -4,23 +4,31 @@ //! LogRecordView implementation for tokio-tracing events. //! //! This module provides the bridge between tracing::Event and our -//! OTLP bytes. +//! OTLP bytes. All data is borrowed from the tracing event with zero copies. +//! +//! The key insight is that `TracingAnyValue<'a>` is `Copy` - it's just an enum +//! containing borrowed references. This means we can implement `AnyValueView` +//! directly on it without needing a wrapper type, and the lifetime `'a` is +//! tied directly to the underlying tracing event data. use otap_df_pdata::schema::{SpanId, TraceId}; use otap_df_pdata::views::common::{AnyValueView, AttributeView, Str, ValueType}; use otap_df_pdata::views::logs::LogRecordView; use std::fmt; -use std::rc::Rc; use tracing::{Level, Metadata}; /// A LogRecordView implementation that wraps a tracing event. /// -/// Uses `Rc<>` for heap-allocated data to make cloning cheap during encoding. -/// Since encoding happens on the same thread before crossing boundaries, -/// thread-safe `Arc<>` is not needed. -pub struct TracingLogRecord { - /// The event name from the `name` field, if present - event_name: Option, +/// Uses zero-copy borrows throughout: +/// - `event_name`: `&'static str` since `Metadata::name()` is always static +/// - `target`: `&'a str` borrowed from `Metadata<'a>` +/// - All attribute keys and values are borrowed from the event +/// +/// The lifetime `'a` ties this struct to the tracing event callback scope. +/// Encoding to OTLP bytes must complete before the callback returns. +pub struct TracingLogRecord<'a> { + /// The event name - always static from tracing metadata + event_name: Option<&'static str>, /// The severity level from tracing level: Level, @@ -28,94 +36,106 @@ pub struct TracingLogRecord { /// Timestamp when the event occurred (nanoseconds since Unix epoch) timestamp_nanos: u64, - /// The target from tracing metadata, typically module path. - target: String, + /// The target from tracing metadata, borrowed for the event lifetime + target: &'a str, - /// Event fields - attributes: Vec, + /// Event fields - all borrowed from the tracing event + attributes: Vec>, - /// Optional body/message for the log record (stored as TracingAnyValue) - body: Option, + /// Optional body/message for the log record (stored as raw &str, + /// constructed into TracingAnyValue on demand in body() method) + body: Option<&'a str>, } -impl TracingLogRecord { +impl<'a> TracingLogRecord<'a> { /// Creates a new TracingLogRecord from tracing event components. /// - /// Note: metadata.name() contains both the event location and file:line info, - /// e.g., "event src/main.rs:42", so we don't need to separately track file/line. + /// The returned struct borrows from the metadata and attributes, + /// so it must be encoded before the tracing callback returns. pub fn new( - metadata: &Metadata<'_>, - attributes: Vec, + metadata: &Metadata<'a>, + attributes: Vec>, timestamp_nanos: u64, ) -> Self { Self { - event_name: Some(metadata.name().to_string()), + event_name: Some(metadata.name()), level: *metadata.level(), timestamp_nanos, - target: metadata.target().to_string(), + target: metadata.target(), attributes, - body: None, // Can be populated from message field + body: None, } } /// Sets the body/message for this log record. - pub fn with_body(mut self, body: String) -> Self { - self.body = Some(TracingAnyValue::Str(Rc::from(body))); + pub fn with_body(mut self, body: &'a str) -> Self { + self.body = Some(body); self } /// Returns the target (typically module path) for this log record. pub fn target(&self) -> &str { - &self.target + self.target } - /// Creates a TracingLogRecord with a custom event name (for span events). + /// Creates a TracingLogRecord with a custom static event name. + /// + /// Use this for synthetic events like span.start/span.end where + /// you want a different event name than metadata.name(). pub fn new_with_event_name( - metadata: &Metadata<'_>, - attributes: Vec, + metadata: &Metadata<'a>, + attributes: Vec>, timestamp_nanos: u64, - event_name: String, + event_name: &'static str, ) -> Self { Self { event_name: Some(event_name), level: *metadata.level(), timestamp_nanos, - target: metadata.target().to_string(), + target: metadata.target(), attributes, body: None, } } - /// Creates a minimal TracingLogRecord for span end events. + /// Creates a log record for span end events. + /// + /// The span_id should be added as an attribute by the caller. pub fn new_span_end( - span_id: u64, - attributes: Vec, + target: &'a str, + attributes: Vec>, timestamp_nanos: u64, ) -> Self { Self { - event_name: Some(format!("span.end (id:{})", span_id)), + event_name: Some("span.end"), level: Level::INFO, timestamp_nanos, - target: "tracing::span".to_string(), + target, attributes, body: None, } } } -impl LogRecordView for TracingLogRecord { +impl<'a> LogRecordView for TracingLogRecord<'a> { + // Use 'a (the data lifetime) for the attribute type, not the GAT lifetime. + // This is correct because our attributes borrow from the original tracing event data. type Attribute<'att> - = TracingAttributeView<'att> + = TracingAttribute<'a> where Self: 'att; + // The iterator borrows from self (lifetime 'att) but yields items with lifetime 'a. + // Since TracingAttribute<'a> is Copy, we can just copy the values. type AttributeIter<'att> - = TracingAttributeIterator<'att> + = std::iter::Copied>> where Self: 'att; + // Body is constructed on demand from the stored &'a str. + // Since we create a fresh TracingAnyValue<'bod> each time, the GAT works. type Body<'bod> - = TracingAnyValue + = TracingAnyValue<'bod> where Self: 'bod; @@ -144,13 +164,14 @@ impl LogRecordView for TracingLogRecord { } fn body(&self) -> Option> { - self.body.clone() + // Construct TracingAnyValue on demand from stored &str. + // The lifetime 'bod comes from &self, but the data has lifetime 'a. + // Since 'a: 'bod (self contains 'a), this coercion is valid. + self.body.map(TracingAnyValue::Str) } fn attributes(&self) -> Self::AttributeIter<'_> { - TracingAttributeIterator { - inner: self.attributes.iter(), - } + self.attributes.iter().copied() } fn dropped_attributes_count(&self) -> u32 { @@ -170,51 +191,34 @@ impl LogRecordView for TracingLogRecord { } fn event_name(&self) -> Option> { - self.event_name.as_ref().map(|s| s.as_bytes()) + self.event_name.map(|s| s.as_bytes()) } } /// Represents an attribute (key-value pair) from a tracing event. -#[derive(Debug, Clone)] -pub struct TracingAttribute { - /// The attribute key - pub key: String, - /// The attribute value - pub value: TracingAnyValue, -} - -/// Wrapper for TracingAttribute that implements AttributeView -pub struct TracingAttributeView<'a> { - attribute: &'a TracingAttribute, +/// +/// All data is borrowed from the tracing event with lifetime 'a. +/// This type is `Copy` because it only contains borrowed references. +#[derive(Debug, Clone, Copy)] +pub struct TracingAttribute<'a> { + /// The attribute key - borrowed from tracing + pub key: &'a str, + /// The attribute value - borrowed from tracing + pub value: TracingAnyValue<'a>, } -impl<'a> AttributeView for TracingAttributeView<'a> { +impl<'a> AttributeView for TracingAttribute<'a> { type Val<'val> - = TracingAnyValue + = TracingAnyValue<'val> where Self: 'val; fn key(&self) -> Str<'_> { - self.attribute.key.as_bytes() + self.key.as_bytes() } fn value(&self) -> Option> { - Some(self.attribute.value.clone()) - } -} - -/// Iterator wrapper for TracingAttribute slice -pub struct TracingAttributeIterator<'a> { - inner: std::slice::Iter<'a, TracingAttribute>, -} - -impl<'a> Iterator for TracingAttributeIterator<'a> { - type Item = TracingAttributeView<'a>; - - fn next(&mut self) -> Option { - self.inner - .next() - .map(|attr| TracingAttributeView { attribute: attr }) + Some(self.value) } } @@ -223,95 +227,65 @@ impl<'a> Iterator for TracingAttributeIterator<'a> { /// This mirrors OTLP's AnyValue type system, supporting full structural fidelity /// for nested data from tracing events (arrays, maps, etc.). /// -/// Uses `Rc<>` for heap-allocated types to make cloning cheap during encoding. -/// Since TracingLogRecord is encoded to bytes before crossing thread boundaries, -/// the non-thread-safe `Rc<>` is appropriate here. -#[derive(Debug, Clone)] -pub enum TracingAnyValue { - /// String value - Str(Rc), +/// All variants use borrowed references with lifetime 'a, enabling true zero-copy +/// from tracing events to OTLP bytes. The type is `Copy` because it only contains +/// primitive values or borrowed references - copying just copies the pointer/length, +/// not the underlying data. +#[derive(Debug, Clone, Copy)] +pub enum TracingAnyValue<'a> { + /// String value - borrowed + Str(&'a str), /// Integer value (i64) Int(i64), /// Boolean value Bool(bool), /// Double-precision floating point value Double(f64), - /// Bytes value - Bytes(Rc<[u8]>), - /// Array of values - Array(Rc<[TracingAnyValue]>), - /// Key-value list (like a map/object) - KeyValueList(Rc<[TracingAttribute]>), + /// Bytes value - borrowed + Bytes(&'a [u8]), + /// Array of values - borrowed slice + Array(&'a [TracingAnyValue<'a>]), + /// Key-value list (like a map/object) - borrowed slice + KeyValueList(&'a [TracingAttribute<'a>]), } -/// Iterator for nested KeyValueList attributes -pub struct KeyValueListIterator { - inner: Rc<[TracingAttribute]>, - index: usize, +/// Iterator for array values that yields copies of TracingAnyValue. +/// +/// Since TracingAnyValue is Copy, this just copies the small enum +/// (which contains borrowed references to the underlying data). +pub struct ArrayIterator<'a> { + inner: std::slice::Iter<'a, TracingAnyValue<'a>>, } -impl Iterator for KeyValueListIterator { - type Item = TracingAttributeOwned; +impl<'a> Iterator for ArrayIterator<'a> { + type Item = TracingAnyValue<'a>; fn next(&mut self) -> Option { - if self.index < self.inner.len() { - let attr = self.inner[self.index].clone(); - self.index += 1; - Some(TracingAttributeOwned { attribute: attr }) - } else { - None - } - } -} - -/// Owned wrapper for TracingAttribute that implements AttributeView -pub struct TracingAttributeOwned { - attribute: TracingAttribute, -} - -impl AttributeView for TracingAttributeOwned { - type Val<'val> - = TracingAnyValue - where - Self: 'val; - - fn key(&self) -> Str<'_> { - self.attribute.key.as_bytes() - } - - fn value(&self) -> Option> { - Some(self.attribute.value.clone()) + self.inner.next().copied() } } -/// Iterator for array values -pub struct ArrayIterator { - inner: Rc<[TracingAnyValue]>, - index: usize, +/// Iterator for nested KeyValueList attributes. +pub struct KeyValueListIterator<'a> { + inner: std::slice::Iter<'a, TracingAttribute<'a>>, } -impl Iterator for ArrayIterator { - type Item = TracingAnyValue; +impl<'a> Iterator for KeyValueListIterator<'a> { + type Item = TracingAttribute<'a>; fn next(&mut self) -> Option { - if self.index < self.inner.len() { - let item = self.inner[self.index].clone(); - self.index += 1; - Some(item) - } else { - None - } + self.inner.next().copied() } } -impl<'a> AnyValueView<'a> for TracingAnyValue { - type KeyValue = TracingAttributeOwned; +impl<'a> AnyValueView<'a> for TracingAnyValue<'a> { + type KeyValue = TracingAttribute<'a>; type ArrayIter<'arr> - = ArrayIterator + = ArrayIterator<'a> where Self: 'arr; type KeyValueIter<'kv> - = KeyValueListIterator + = KeyValueListIterator<'a> where Self: 'kv; @@ -357,34 +331,28 @@ impl<'a> AnyValueView<'a> for TracingAnyValue { fn as_bytes(&self) -> Option<&[u8]> { match self { - TracingAnyValue::Bytes(b) => Some(&**b), + TracingAnyValue::Bytes(b) => Some(b), _ => None, } } fn as_array(&self) -> Option> { match self { - TracingAnyValue::Array(arr) => Some(ArrayIterator { - inner: Rc::clone(arr), - index: 0, - }), + TracingAnyValue::Array(arr) => Some(ArrayIterator { inner: arr.iter() }), _ => None, } } fn as_kvlist(&self) -> Option> { match self { - TracingAnyValue::KeyValueList(kvs) => Some(KeyValueListIterator { - inner: Rc::clone(kvs), - index: 0, - }), + TracingAnyValue::KeyValueList(kvs) => Some(KeyValueListIterator { inner: kvs.iter() }), _ => None, } } } // Implement Display for easier debugging -impl fmt::Display for TracingAnyValue { +impl<'a> fmt::Display for TracingAnyValue<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { TracingAnyValue::Str(s) => write!(f, "{}", s), @@ -421,36 +389,15 @@ mod tests { use super::*; #[test] - fn test_tracing_log_record_creation() { - // Create a mock metadata (in real usage this comes from tracing) - let _level = Level::INFO; - - let _attributes = vec![ - TracingAttribute { - key: "key1".to_string(), - value: TracingAnyValue::Str(Rc::from("value1")), - }, - TracingAttribute { - key: "count".to_string(), - value: TracingAnyValue::Int(42), - }, - ]; - - // Note: In real usage, metadata comes from tracing::Event - // For this test, we'll test the TracingLogRecord structure directly - let _timestamp = 1234567890000000000u64; - - // Test basic construction and access - let key1 = "key1".to_string(); - let value1 = TracingAnyValue::Str(Rc::from("value1")); - let attr = TracingAttribute { - key: key1, - value: value1, - }; + fn test_tracing_attribute_creation() { + // Test basic construction with borrowed data + let key = "key1"; + let value = TracingAnyValue::Str("value1"); + let attr = TracingAttribute { key, value }; assert_eq!(attr.key, "key1"); - match &attr.value { - TracingAnyValue::Str(s) => assert_eq!(&**s, "value1"), + match attr.value { + TracingAnyValue::Str(s) => assert_eq!(s, "value1"), _ => panic!("Expected string value"), } } @@ -482,7 +429,7 @@ mod tests { fn test_any_value_types() { use otap_df_pdata::views::common::AnyValueView; - let str_val = TracingAnyValue::Str(Rc::from("test")); + let str_val = TracingAnyValue::Str("test"); assert!(str_val.as_string().is_some()); assert!(str_val.as_int64().is_none()); @@ -498,4 +445,30 @@ mod tests { assert!(double_val.as_double().is_some()); assert!((double_val.as_double().unwrap() - 3.14).abs() < f64::EPSILON); } + + #[test] + fn test_zero_copy_semantics() { + // Verify that TracingAnyValue is Copy (no heap allocation) + let original = TracingAnyValue::Str("hello"); + let copied = original; // This should be a copy, not a move + let _also_original = original; // Original still usable + + match copied { + TracingAnyValue::Str(s) => assert_eq!(s, "hello"), + _ => panic!("Expected string"), + } + } + + #[test] + fn test_attribute_is_copy() { + // Verify that TracingAttribute is Copy + let attr = TracingAttribute { + key: "test_key", + value: TracingAnyValue::Int(42), + }; + let copied = attr; + let _also_original = attr; // Original still usable + + assert_eq!(copied.key, "test_key"); + } } diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs index b2b0b39d8a..ddbc6ddb0b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs @@ -4,11 +4,9 @@ //! tokio-tracing support for directly encoding and formatting OTLP bytes. pub mod log_record; -pub mod otlp_bytes_channel; pub mod otlp_bytes_formatter; pub mod subscriber; pub use log_record::{TracingAnyValue, TracingAttribute, TracingLogRecord}; -pub use otlp_bytes_channel::{OtlpBytesChannel, OtlpBytesChannelStats, OtlpBytesConsumerConfig}; pub use otlp_bytes_formatter::{FormatError, OtlpBytesFormattingLayer}; pub use subscriber::OtlpTracingLayer; diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs deleted file mode 100644 index f14a8df128..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_channel.rs +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//! OTLP bytes channel abstraction for multi-threaded telemetry. -//! -//! This provides a common pattern used in multiple places: -//! - Admin runtime: 3rd party logging via global tracing subscriber -//! - Internal telemetry receiver: Component logging bridge to OTAP pipeline -//! - Thread-per-core: Per-thread logging with dedicated channels -//! -//! Architecture: -//! ```text -//! Producer(s) → mpsc::Sender → Channel → mpsc::Receiver → Consumer -//! ↓ -//! Console | OTLP | Custom handler -//! ``` - -use bytes::Bytes; -use std::sync::mpsc; - -/// Configuration for how to consume OTLP bytes from the channel. -/// -/// All 3rd party logging goes through our custom subscriber → OTLP bytes → channel. -/// This enum determines how those bytes are consumed in the admin runtime: -/// -/// - **Console**: Human-readable formatting (our builtin formatter) -/// - **InternalReceiver**: Forward to OTAP pipeline (our builtin OTLP path) -/// - **OtelSdkExporter**: Use any OpenTelemetry SDK exporter (stdout, OTLP, custom) -/// -/// This unified architecture means: -/// 1. ALL 3rd party logs use the same channel-based path -/// 2. No need for OpenTelemetryTracingBridge (we decode OTLP → OTel format if needed) -/// 3. Flexible backend choice while keeping single-threaded consumption -#[derive(Debug, Clone)] -pub enum OtlpBytesConsumerConfig { - /// Format and write to console (stdout/stderr based on level). - /// Uses our builtin formatter for human-readable output. - Console { - /// Enable ANSI color codes - ansi: bool, - /// Include ISO8601 timestamps - timestamp: bool, - /// Include log level (INFO, WARN, etc.) - level: bool, - /// Include target/scope name - target: bool, - /// Include event name field - event_name: bool, - /// Include thread names - thread_names: bool, - }, - - /// Forward to internal telemetry receiver (bridges to OTAP pipeline). - /// Uses our builtin OTLP exporter to send to the internal receiver, - /// which then goes through the OTAP pipeline for processing/export. - InternalReceiver { - // Future: configuration for the internal receiver - }, - - /// Use an OpenTelemetry SDK exporter. - /// OTLP bytes are decoded to OpenTelemetry LogData and passed to the SDK exporter. - /// This allows using any OTel SDK exporter (stdout, OTLP, custom) while keeping - /// our unified channel-based architecture. - OtelSdkExporter { - /// Exporter type identifier (e.g., "stdout", "otlp-grpc", "otlp-http") - exporter_type: String, - /// Configuration for the specific exporter (JSON or similar) - config: std::collections::HashMap, - }, -} - -impl OtlpBytesConsumerConfig { - /// Create default console configuration (matches current behavior) - pub fn default_console() -> Self { - Self::Console { - ansi: true, - timestamp: true, - level: true, - target: true, - event_name: false, - thread_names: true, - } - } -} - -/// OTLP bytes channel for single-producer, single-consumer telemetry. -/// -/// This encapsulates the mpsc channel pattern used throughout the telemetry system. -/// Multiple producers can share the sender (wrapped in Arc), but there's typically -/// one consumer task per channel. -pub struct OtlpBytesChannel { - sender: mpsc::SyncSender, - receiver: mpsc::Receiver, -} - -impl OtlpBytesChannel { - /// Create a new OTLP bytes channel with bounded capacity. - /// - /// # Arguments - /// * `capacity` - Maximum number of OTLP byte buffers to queue - /// - /// When the channel is full, senders will block until space is available. - /// This provides backpressure. - pub fn new(capacity: usize) -> Self { - let (sender, receiver) = mpsc::sync_channel(capacity); - Self { sender, receiver } - } - - /// Split into sender and receiver parts. - /// - /// The sender can be cloned and shared across multiple producers. - /// The receiver should be moved to a single consumer task. - pub fn split(self) -> (mpsc::SyncSender, mpsc::Receiver) { - (self.sender, self.receiver) - } - - /// Get a reference to the sender (for cloning). - pub fn sender(&self) -> &mpsc::SyncSender { - &self.sender - } - - /// Take the receiver (consumes self). - pub fn into_receiver(self) -> mpsc::Receiver { - self.receiver - } -} - -/// Statistics about OTLP bytes channel consumption. -#[derive(Debug, Default, Clone)] -pub struct OtlpBytesChannelStats { - /// Total number of OTLP byte buffers received - pub buffers_received: u64, - - /// Total bytes processed - pub bytes_processed: u64, - - /// Number of format/forward errors - pub errors: u64, -} - -impl OtlpBytesChannelStats { - /// Create new statistics tracker. - pub fn new() -> Self { - Self::default() - } - - /// Record a successfully processed buffer. - pub fn record_buffer(&mut self, size: usize) { - self.buffers_received += 1; - self.bytes_processed += size as u64; - } - - /// Record an error during processing. - pub fn record_error(&mut self) { - self.errors += 1; - } -} diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs index ff233da516..c2837e125b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs +++ b/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs @@ -10,39 +10,41 @@ //! //! The layer uses a visitor pattern to extract field values from events and //! constructs TracingLogRecord instances that can be encoded directly. +//! +//! **Important**: This layer is designed for single-threaded use. The callback +//! should encode the log record to OTLP bytes immediately - only bytes should +//! cross thread boundaries. +//! +//! **Zero-copy design**: All attribute keys and values are borrowed with lifetimes +//! tied to the tracing event callback. The `FieldVisitor` uses a string arena to +//! hold any formatted strings that need allocation. -use super::log_record::{TracingAttribute, TracingAnyValue, TracingLogRecord}; -use std::collections::HashMap; -use std::sync::Mutex; +use super::log_record::{TracingAnyValue, TracingAttribute, TracingLogRecord}; use std::time::{SystemTime, UNIX_EPOCH}; -use tracing::{Event, Id, Subscriber}; use tracing::span::Attributes; +use tracing::{Event, Id, Subscriber}; use tracing_subscriber::layer::{Context, Layer}; use tracing_subscriber::registry::LookupSpan; -/// A tracing subscriber layer that converts events to TracingLogRecord. -/// -/// This layer can be composed with other layers in a tracing-subscriber registry -/// to capture events and convert them to OTLP-compatible log records. -/// -/// # Example -/// ```ignore -/// use tracing_subscriber::prelude::*; -/// use otap_df_telemetry::tracing_integration::OtlpTracingLayer; -/// -/// let otlp_layer = OtlpTracingLayer::new(|log_record| { -/// // Encode log_record using stateful encoder -/// encoder.encode_log_record(&log_record, &resource_bytes, &scope_encoding)?; -/// }); -/// -/// tracing_subscriber::registry() -/// .with(otlp_layer) -/// .init(); -/// ``` -/// Span data stored for duration calculation -struct SpanData { - start_time_nanos: u64, - attributes: Vec, +/// Owned value type for span storage (spans outlive individual events). +#[derive(Clone)] +enum OwnedValue { + Str(String), + Int(i64), + Bool(bool), + Double(f64), +} + +impl OwnedValue { + /// Convert to a borrowed TracingAnyValue given a lifetime. + fn as_borrowed(&self) -> TracingAnyValue<'_> { + match self { + OwnedValue::Str(s) => TracingAnyValue::Str(s.as_str()), + OwnedValue::Int(i) => TracingAnyValue::Int(*i), + OwnedValue::Bool(b) => TracingAnyValue::Bool(*b), + OwnedValue::Double(d) => TracingAnyValue::Double(*d), + } + } } /// Tracing subscriber layer that captures events and spans as OTLP log records. @@ -51,36 +53,35 @@ struct SpanData { /// of log records (start/end) rather than as first-class span objects. This aligns /// with unified dataflow architectures where all telemetry flows through a single /// log pipeline. +/// +/// **Note**: This layer is `!Send` because it uses `RefCell` internally. The callback +/// should encode log records to OTLP bytes immediately - only bytes cross thread +/// boundaries. pub struct OtlpTracingLayer where - F: Fn(TracingLogRecord) + Send + Sync + 'static, + F: for<'a> Fn(TracingLogRecord<'a>) + 'static, { /// Callback function that receives each TracingLogRecord on_event: F, - /// Storage for span start times to calculate duration on close - span_data: Mutex>, } impl OtlpTracingLayer where - F: Fn(TracingLogRecord) + Send + Sync + 'static, + F: for<'a> Fn(TracingLogRecord<'a>) + 'static, { /// Creates a new OtlpTracingLayer with the given event handler. /// /// # Arguments /// * `on_event` - Callback invoked for each tracing event, receiving a TracingLogRecord pub fn new(on_event: F) -> Self { - Self { - on_event, - span_data: Mutex::new(HashMap::new()), - } + Self { on_event } } } impl Layer for OtlpTracingLayer where S: Subscriber + for<'a> LookupSpan<'a>, - F: Fn(TracingLogRecord) + Send + Sync + 'static, + F: for<'a> Fn(TracingLogRecord<'a>) + 'static, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { // Get timestamp @@ -89,116 +90,100 @@ where .unwrap_or_default() .as_nanos() as u64; - // Extract fields using visitor + // Extract fields using visitor with string arena let mut visitor = FieldVisitor::new(); event.record(&mut visitor); - // Build TracingLogRecord - // Note: metadata.name() includes file:line, e.g., "event src/main.rs:42" - let log_record = TracingLogRecord::new( - event.metadata(), - visitor.attributes, - timestamp_nanos, - ) - .with_body(visitor.message.unwrap_or_default()); + // Build attributes from collected data + // The visitor's arena holds any allocated strings + let attributes: Vec> = visitor + .attr_keys + .iter() + .zip(visitor.attr_values.iter()) + .map(|(key, value)| TracingAttribute { + key, + value: value.as_borrowed(), + }) + .collect(); + + // Build TracingLogRecord with borrowed message + let message_ref = visitor.message.as_deref().unwrap_or(""); + let log_record = TracingLogRecord::new(event.metadata(), attributes, timestamp_nanos) + .with_body(message_ref); // Invoke the callback (self.on_event)(log_record); } fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, _ctx: Context<'_, S>) { - let timestamp_nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - // Extract fields from span attributes - let mut visitor = FieldVisitor::new(); - attrs.record(&mut visitor); - - let metadata = attrs.metadata(); - let mut attributes = visitor.attributes.clone(); - - // Add span.id as attribute - attributes.push(TracingAttribute { - key: "span.id".to_string(), - value: TracingAnyValue::Int(id.into_u64() as i64), - }); - - // Store span data for duration calculation on close - if let Ok(mut spans) = self.span_data.lock() { - let _ = spans.insert(id.into_u64(), SpanData { - start_time_nanos: timestamp_nanos, - attributes: attributes.clone(), - }); - } - - // Create "span.start" log record - // Format: "span.start {span_name} src/file.rs:42" - let event_name = format!("span.start {}", metadata.name()); - let log_record = TracingLogRecord::new_with_event_name( - metadata, - attributes, - timestamp_nanos, - event_name, - ) - .with_body(visitor.message.unwrap_or_default()); - - // Invoke callback with span start event - (self.on_event)(log_record); + // TODO + + // let timestamp_nanos = SystemTime::now() + // .duration_since(UNIX_EPOCH) + // .unwrap_or_default() + // .as_nanos() as u64; + + // // Extract fields from span attributes + // let mut visitor = FieldVisitor::new(); + // attrs.record(&mut visitor); + + // let metadata = attrs.metadata(); + // let span_id = id.into_u64(); + + // let mut attributes: Vec> = data + // .attr_keys + // .iter() + // .zip(data.attr_values.iter()) + // .map(|(key, value)| TracingAttribute { + // key, + // value: value.as_borrowed(), + // }) + // .collect(); + + // // Add span.id and span.name as attributes + // let span_id_value = TracingAnyValue::Int(span_id as i64); + // let span_name_value = TracingAnyValue::Str(data.name); + + // attributes.push(TracingAttribute { + // key: "span.id", + // value: span_id_value, + // }); + // attributes.push(TracingAttribute { + // key: "span.name", + // value: span_name_value, + // }); + + // // Create "span.start" log record + // let message_ref = visitor.message.as_deref().unwrap_or(""); + // let log_record = TracingLogRecord::new_with_event_name( + // metadata, + // attributes, + // timestamp_nanos, + // "span.start", + // ) + // .with_body(message_ref); + + // // Invoke callback with span start event + // (self.on_event)(log_record); } fn on_close(&self, id: Id, _ctx: Context<'_, S>) { - let end_time_nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - // Retrieve and remove span data - let span_data = if let Ok(mut spans) = self.span_data.lock() { - spans.remove(&id.into_u64()) - } else { - return; - }; - - if let Some(span_data) = span_data { - // Get span metadata from context - // Note: We don't have direct access to metadata here, so we'll create a minimal record - let duration_nanos = end_time_nanos.saturating_sub(span_data.start_time_nanos); - - let mut attributes = span_data.attributes; - - // Add duration as attribute - attributes.push(TracingAttribute { - key: "span.duration_nanos".to_string(), - value: TracingAnyValue::Int(duration_nanos as i64), - }); - - // Create a minimal log record for span end - // We use INFO level for span events - let log_record = TracingLogRecord::new_span_end( - id.into_u64(), - attributes, - end_time_nanos, - ); - - // Invoke callback with span end event - (self.on_event)(log_record); - } + // TODO } } /// Visitor that extracts field values from a tracing event. /// /// This implements tracing::field::Visit to walk through all fields in an event -/// and collect them as TracingAttribute instances. +/// and collect them as attribute key-value pairs. /// -/// Note: We don't extract event_name here because metadata.name() already provides -/// it with file:line info (e.g., "event src/main.rs:42"). +/// **Zero-copy design**: Field names are `&'static str` from tracing. +/// String values that need allocation (debug formatting) are stored in owned form. struct FieldVisitor { - /// Collected attributes from the event - attributes: Vec, - + /// Attribute keys (all &'static str from field.name()) + attr_keys: Vec<&'static str>, + /// Attribute values (owned to support debug formatting) + attr_values: Vec, /// The message/body (from the "message" field, if present) message: Option, } @@ -206,7 +191,8 @@ struct FieldVisitor { impl FieldVisitor { fn new() -> Self { Self { - attributes: Vec::new(), + attr_keys: Vec::new(), + attr_values: Vec::new(), message: None, } } @@ -214,114 +200,98 @@ impl FieldVisitor { impl tracing::field::Visit for FieldVisitor { fn record_f64(&mut self, field: &tracing::field::Field, value: f64) { - // Skip special "message" field if field.name() == "message" { return; } - - self.attributes.push(TracingAttribute { - key: field.name().to_string(), - value: TracingAnyValue::Double(value), - }); + self.attr_keys.push(field.name()); + self.attr_values.push(OwnedValue::Double(value)); } fn record_i64(&mut self, field: &tracing::field::Field, value: i64) { if field.name() == "message" { return; } - - self.attributes.push(TracingAttribute { - key: field.name().to_string(), - value: TracingAnyValue::Int(value), - }); + self.attr_keys.push(field.name()); + self.attr_values.push(OwnedValue::Int(value)); } fn record_u64(&mut self, field: &tracing::field::Field, value: u64) { if field.name() == "message" { return; } - - // Convert u64 to i64 (may lose precision for very large values) - self.attributes.push(TracingAttribute { - key: field.name().to_string(), - value: TracingAnyValue::Int(value as i64), - }); + self.attr_keys.push(field.name()); + self.attr_values.push(OwnedValue::Int(value as i64)); } fn record_bool(&mut self, field: &tracing::field::Field, value: bool) { if field.name() == "message" { return; } - - self.attributes.push(TracingAttribute { - key: field.name().to_string(), - value: TracingAnyValue::Bool(value), - }); + self.attr_keys.push(field.name()); + self.attr_values.push(OwnedValue::Bool(value)); } fn record_str(&mut self, field: &tracing::field::Field, value: &str) { - // Handle special "message" field if field.name() == "message" { self.message = Some(value.to_string()); return; } - - // Store string attributes by cloning - self.attributes.push(TracingAttribute { - key: field.name().to_string(), - value: TracingAnyValue::Str(value.to_string()), - }); + self.attr_keys.push(field.name()); + self.attr_values.push(OwnedValue::Str(value.to_string())); } fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) { - // Capture the "message" field which contains the formatted message if field.name() == "message" { self.message = Some(format!("{:?}", value)); return; } - - // Convert debug representation to string and store - let debug_str = format!("{:?}", value); - self.attributes.push(TracingAttribute { - key: field.name().to_string(), - value: TracingAnyValue::Str(debug_str), - }); + self.attr_keys.push(field.name()); + self.attr_values + .push(OwnedValue::Str(format!("{:?}", value))); } } #[cfg(test)] mod tests { use super::*; - use std::sync::{Arc, Mutex}; + use std::cell::RefCell; use tracing_subscriber::prelude::*; #[test] fn test_otlp_layer_captures_events() { use otap_df_pdata::views::logs::LogRecordView; - - // Collect captured log records - let captured = Arc::new(Mutex::new(Vec::new())); - let captured_clone = captured.clone(); - - let layer = OtlpTracingLayer::new(move |log_record| { - let mut records = captured_clone.lock().unwrap(); - records.push(( - log_record.severity_text().map(|s| String::from_utf8_lossy(s).to_string()), - log_record.event_name().map(|s| String::from_utf8_lossy(s).to_string()), - )); + + // Thread-local storage for captured log records (no Send needed) + thread_local! { + static CAPTURED: RefCell, Option)>> = const { RefCell::new(Vec::new()) }; + } + + let layer = OtlpTracingLayer::new(|log_record| { + CAPTURED.with(|captured| { + captured.borrow_mut().push(( + log_record + .severity_text() + .map(|s| String::from_utf8_lossy(s).to_string()), + log_record + .event_name() + .map(|s| String::from_utf8_lossy(s).to_string()), + )); + }); }); let subscriber = tracing_subscriber::registry().with(layer); - tracing::subscriber::with_default(subscriber, || { - tracing::info!(name: "test.event", "Test message"); - tracing::warn!(name: "test.warning", "Warning message"); - }); + // Use Dispatch and set_default for thread-local subscriber (no Send+Sync required) + let dispatch = tracing::Dispatch::new(subscriber); + let _guard = tracing::dispatcher::set_default(&dispatch); + + tracing::info!(name: "test.event", "Test message"); + tracing::warn!(name: "test.warning", "Warning message"); - let records = captured.lock().unwrap(); - assert_eq!(records.len(), 2); - - // Note: event_name extraction from visitor has lifetime issues - // We'll address this in the production implementation + // Verify captured records + CAPTURED.with(|captured| { + let records = captured.borrow(); + assert_eq!(records.len(), 2); + }); } } From 00cdb256eed068e249b20fc9b4db39819554b903 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 5 Jan 2026 16:56:57 -0800 Subject: [PATCH 03/92] emulate stateful encoder --- .../crates/pdata/src/otlp/common.rs | 16 +- .../crates/pdata/src/otlp/mod.rs | 6 +- .../crates/pdata/src/otlp/stateful_encoder.rs | 814 ++++++++++++++++++ .../otap-dataflow/crates/telemetry/src/lib.rs | 2 +- .../DESIGN.md | 0 .../src/self_tracing/direct_encoder.rs | 728 ++++++++++++++++ .../log_record.rs | 0 .../mod.rs | 8 + .../otlp_bytes_formatter.rs | 196 ++++- .../subscriber.rs | 0 10 files changed, 1760 insertions(+), 10 deletions(-) create mode 100644 rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs rename rust/otap-dataflow/crates/telemetry/src/{tracing_integration => self_tracing}/DESIGN.md (100%) create mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs rename rust/otap-dataflow/crates/telemetry/src/{tracing_integration => self_tracing}/log_record.rs (100%) rename rust/otap-dataflow/crates/telemetry/src/{tracing_integration => self_tracing}/mod.rs (58%) rename rust/otap-dataflow/crates/telemetry/src/{tracing_integration => self_tracing}/otlp_bytes_formatter.rs (63%) rename rust/otap-dataflow/crates/telemetry/src/{tracing_integration => self_tracing}/subscriber.rs (100%) diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs index e3a0859d12..0ebfc790fb 100644 --- a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs +++ b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs @@ -130,6 +130,7 @@ pub(in crate::otlp) struct ScopeArrays<'a> { pub id: Option<&'a UInt16Array>, } +/// Arrow DataType for the Scope struct array. pub static SCOPE_ARRAY_DATA_TYPE: LazyLock = LazyLock::new(|| { DataType::Struct(Fields::from(vec![ Field::new( @@ -491,11 +492,22 @@ macro_rules! proto_encode_len_delimited_unknown_size { }}; } -pub(crate) fn encode_len_placeholder(buf: &mut ProtoBuffer) { +/// Write a 4-byte length placeholder for later patching. +/// +/// This writes bytes that represent a zero-padded varint, which can be +/// patched later with the actual length once content is written. +pub fn encode_len_placeholder(buf: &mut ProtoBuffer) { buf.buffer.extend_from_slice(&[0x80, 0x80, 0x80, 0x00]); } -pub(crate) fn patch_len_placeholder( +/// Patch a previously written length placeholder with the actual length. +/// +/// # Arguments +/// * `buf` - The buffer containing the placeholder +/// * `num_bytes` - Number of bytes in the placeholder (typically 4) +/// * `len` - The actual content length to encode +/// * `len_start_pos` - Position where the placeholder starts +pub fn patch_len_placeholder( buf: &mut ProtoBuffer, num_bytes: usize, len: usize, diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs index 051110ac5d..389bf9ba2e 100644 --- a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs +++ b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs @@ -9,20 +9,20 @@ use crate::{error::Result, otap::OtapArrowRecords}; use bytes::Bytes; use otap_df_config::SignalType; -pub use common::ProtoBuffer; +pub use common::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; pub use otap_df_pdata_otlp_macros::Message; // Required for derived code pub use otap_df_pdata_otlp_macros::qualified; // Required for derived code /// Common methods for OTLP/OTAP attributes. pub mod attributes; +/// Common utilities for protobuf encoding. +pub mod common; /// Common methods for OTLP/OTAP logs. pub mod logs; /// Common methods for OTLP/OTAP metrics. pub mod metrics; /// Common methods for OTLP/OTAP traces. pub mod traces; - -mod common; #[cfg(test)] mod tests; diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs b/rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs new file mode 100644 index 0000000000..81c3b4a97c --- /dev/null +++ b/rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs @@ -0,0 +1,814 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Stateful OTLP encoder for streaming single log records with automatic batching. +//! +//! This encoder maintains open `ResourceLogs` and `ScopeLogs` messages, appending individual +//! `LogRecord`s as they arrive. When the InstrumentationScope changes (via scope name), it automatically +//! closes the previous scope and starts a new one. The Resource is pre-encoded and copied once. +//! +//! # Design +//! - **Resource**: Pre-encoded as `OtlpBytes` (includes protobuf field tag + length + content) +//! - **Scope**: Encoded on-the-fly using scope name (InstrumentationScope.name only) +//! - **LogRecord**: Accepted as `LogRecordView` trait, encoded on-the-fly + +use crate::error::Result; +use crate::otlp::common::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; +use crate::proto::consts::{field_num::logs::*, wire_types}; +use crate::views::logs::LogRecordView; +use bytes::Bytes; + +/// Pre-encoded OTLP bytes (includes protobuf field tag + length + message content) +/// +/// These bytes are ready to be copied directly into the output buffer without further processing. +pub type OtlpBytes = Vec; +/// @@@ Remove me, use super::OtlpProtoBytes + +/// Position marker for a length-delimited field that needs patching +/// +/// @@@ Make this variable width. We want 2-byte padding for records +/// and 4-byte padding for the container messages ResourceLogs, +/// ScopeLogs, etc, because it is reasonable to insist on 16 KiB log +/// messages for a self-diagnostic library and we are able to drop +/// attributes to achieve this (OTLP has a dedicated field for this). +/// Using a maybe, or a for the primitive u16, u32. +#[derive(Debug, Clone, Copy)] +struct LengthPlaceholder { + /// Position in buffer where the 4-byte length placeholder starts + position: usize, +} + +impl LengthPlaceholder { + fn new(position: usize) -> Self { + Self { position } + } + + fn patch(self, buf: &mut ProtoBuffer) { + let content_len = buf.len() - self.position - 4; + patch_len_placeholder(buf, 4, content_len, self.position); + } +} + +/// Current state of the stateful encoder +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum EncoderState { + /// No messages open, ready to start new ResourceLogs + Idle, + /// ResourceLogs is open, ready to add ScopeLogs + ResourceOpen, + /// ResourceLogs and ScopeLogs are both open, ready to append LogRecords + ScopeOpen, +} + +/// Stateful OTLP encoder that maintains open ResourceLogs and ScopeLogs messages. +/// +/// # Example +/// ```ignore +/// let mut encoder = StatefulOtlpEncoder::new(64 * 1024); +/// +/// // Pre-encode resource once +/// let resource_bytes = encode_resource_to_otlp_bytes(&resource); +/// +/// // Scope name is typically the module path or instrumentation library name +/// let scope_name = "my_module::component"; +/// +/// // Encode multiple log records - automatically batched if scope name matches +/// encoder.encode_log_record(&log_record_view, &resource_bytes, scope_name)?; +/// encoder.encode_log_record(&log_record_view2, &resource_bytes, scope_name)?; // Batched +/// +/// // Flush to get OTLP bytes +/// let otlp_bytes = encoder.flush(); +/// ``` +pub struct StatefulOtlpEncoder { + /// Output buffer (reuses ProtoBuffer infrastructure) + buf: ProtoBuffer, + + /// Current encoder state + state: EncoderState, + + /// Length placeholder for the current ResourceLogs message + resource_logs_placeholder: Option, + + /// Length placeholder for the current ScopeLogs message + scope_logs_placeholder: Option, + + /// Name of the current scope for comparison + current_scope_name: Option, +} + +impl StatefulOtlpEncoder { + /// Create a new encoder with pre-allocated buffer capacity + pub fn new(capacity_bytes: usize) -> Self { + Self { + buf: ProtoBuffer::with_capacity(capacity_bytes), + state: EncoderState::Idle, + resource_logs_placeholder: None, + scope_logs_placeholder: None, + current_scope_name: None, + } + } + + /// Get the current buffer size in bytes + #[inline] + pub fn len(&self) -> usize { + self.buf.len() + } + + /// Check if the buffer is empty + #[inline] + pub fn is_empty(&self) -> bool { + self.buf.is_empty() + } + + /// Encode a single log record with its Resource and Scope context. + /// + /// This method automatically handles batching: + /// - If scope name matches the current batch, the LogRecord is appended + /// - If scope name differs, the current ScopeLogs is closed and a new one started + /// + /// # Parameters + /// - `log_record`: View of the log record to encode + /// - `resource_bytes`: Pre-encoded Resource (includes protobuf field tag + length + content) + /// - `scope_name`: InstrumentationScope name (typically tracing target/module path) + pub fn encode_log_record( + &mut self, + log_record: &impl LogRecordView, + resource_bytes: &[u8], // @@@ Make super::OtlpProtoBytes, expecting ::ExportLogsRequest + scope_name: &str, + ) -> Result<()> { + match self.state { + EncoderState::Idle => { + // Start new batch with Resource and Scope + self.start_resource_logs(resource_bytes)?; + self.start_scope_logs(scope_name)?; + self.append_log_record(log_record)?; + } + + EncoderState::ResourceOpen => { + // Resource already open, start scope + self.start_scope_logs(scope_name)?; + self.append_log_record(log_record)?; + } + + EncoderState::ScopeOpen => { + if self.current_scope_name.as_deref() == Some(scope_name) { + // Same scope - just append LogRecord + self.append_log_record(log_record)?; + } else { + // Different scope - close current and start new + self.close_scope_logs()?; + self.start_scope_logs(scope_name)?; + self.append_log_record(log_record)?; + } + } + } + + Ok(()) + } + + /// Flush the encoder, closing all open messages and returning the accumulated OTLP bytes. + /// + /// After flushing, the encoder is reset and ready for new messages. + pub fn flush(&mut self) -> Bytes { + // Close any open messages + if self.state == EncoderState::ScopeOpen { + let _ = self.close_scope_logs(); + } + if self.state == EncoderState::ResourceOpen || self.state == EncoderState::ScopeOpen { + let _ = self.close_resource_logs(); + } + + // Take the bytes and reset the encoder + let (bytes, capacity) = self.buf.take_into_bytes(); + + // Reset state + self.state = EncoderState::Idle; + self.resource_logs_placeholder = None; + self.scope_logs_placeholder = None; + self.current_scope_name = None; + + // Ensure capacity is preserved for next use + self.buf.ensure_capacity(capacity); + + bytes + } + + // === Private state management methods === + + fn start_resource_logs(&mut self, resource_bytes: &[u8]) -> Result<()> { + // Encode LogsData.resource_logs field (tag 1, length-delimited) + self.buf + .encode_field_tag(LOGS_DATA_RESOURCE, wire_types::LEN); + + // Write 4-byte length placeholder + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // Copy pre-encoded Resource bytes (includes ResourceLogs.resource field) + self.buf.extend_from_slice(resource_bytes); + + // Update state + self.resource_logs_placeholder = Some(placeholder); + self.state = EncoderState::ResourceOpen; + + Ok(()) + } + + fn start_scope_logs(&mut self, scope_name: &str) -> Result<()> { + // Encode ResourceLogs.scope_logs field (tag 2, length-delimited) + self.buf + .encode_field_tag(RESOURCE_LOGS_SCOPE_LOGS, wire_types::LEN); + + // Write 4-byte length placeholder + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // Encode ScopeLogs.scope field (tag 1, InstrumentationScope message) + self.encode_instrumentation_scope(scope_name)?; + + // Update state + self.scope_logs_placeholder = Some(placeholder); + self.current_scope_name = Some(scope_name.to_string()); + self.state = EncoderState::ScopeOpen; + + Ok(()) + } + + fn append_log_record(&mut self, log_record: &impl LogRecordView) -> Result<()> { + // Encode ScopeLogs.log_records field (tag 2, length-delimited) + self.buf + .encode_field_tag(SCOPE_LOGS_LOG_RECORDS, wire_types::LEN); + + // Use 4-byte padding for LogRecord + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // Encode LogRecordView fields + encode_log_record_view(log_record, &mut self.buf)?; + + // Patch the length + placeholder.patch(&mut self.buf); + + Ok(()) + } + + fn close_scope_logs(&mut self) -> Result<()> { + if let Some(placeholder) = self.scope_logs_placeholder.take() { + placeholder.patch(&mut self.buf); + self.state = EncoderState::ResourceOpen; + self.current_scope_name = None; + } + Ok(()) + } + + fn close_resource_logs(&mut self) -> Result<()> { + if let Some(placeholder) = self.resource_logs_placeholder.take() { + placeholder.patch(&mut self.buf); + self.state = EncoderState::Idle; + } + Ok(()) + } + + /// Encode an InstrumentationScope with just the name field + fn encode_instrumentation_scope(&mut self, scope_name: &str) -> Result<()> { + use crate::proto::consts::field_num::common::INSTRUMENTATION_SCOPE_NAME; + + // Encode ScopeLogs.scope field (tag 1, length-delimited) + self.buf.encode_field_tag(SCOPE_LOG_SCOPE, wire_types::LEN); + let scope_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // Encode InstrumentationScope.name field (tag 1, string) + self.buf.encode_string(INSTRUMENTATION_SCOPE_NAME, scope_name); + + // Patch InstrumentationScope length + scope_placeholder.patch(&mut self.buf); + + Ok(()) + } +} + +// === Helper functions for encoding LogRecordView === + +// TODO(consolidation): The OTAP batch encoder in `logs.rs` (~110 lines in encode_log_record()) +// duplicates the field encoding logic below. Since OTAP implements LogRecordView (via +// OtapLogRecordView in views/otap/logs.rs), we could refactor logs.rs to: +// 1. Keep its batching/sorting/cursor logic (OTAP-specific) +// 2. Delegate LogRecord field encoding to this function via the view trait +// This would eliminate ~150 lines of duplicated code across encode_log_record, encode_any_value, +// and encode_key_value, making the view-based encoder the canonical implementation for all +// LogRecord encoding. The view trait methods are #[inline] so there's no performance impact. +// Same opportunity exists for traces.rs and metrics.rs encoders. + +/// Encode all fields of a LogRecordView +fn encode_log_record_view(log_record: &impl LogRecordView, buf: &mut ProtoBuffer) -> Result<()> { + // time_unix_nano (field 1, fixed64) + if let Some(time) = log_record.time_unix_nano() { + buf.encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); + buf.extend_from_slice(&time.to_le_bytes()); + } + + // severity_number (field 2, varint) + if let Some(severity) = log_record.severity_number() { + buf.encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); + buf.encode_varint(severity as u64); + } + + // severity_text (field 3, string) + if let Some(text) = log_record.severity_text() { + if !text.is_empty() { + // Convert &[u8] to &str for encode_string + if let Ok(text_str) = std::str::from_utf8(text) { + buf.encode_string(LOG_RECORD_SEVERITY_TEXT, text_str); + } + } + } + + // body (field 5, AnyValue) - encode from AnyValueView + if let Some(body) = log_record.body() { + encode_any_value_view_field(LOG_RECORD_BODY, &body, buf)?; + } + + // attributes (field 6, repeated KeyValue) - encode from AttributeView iterator + for attr in log_record.attributes() { + encode_attribute_view(LOG_RECORD_ATTRIBUTES, &attr, buf)?; + } + + // dropped_attributes_count (field 7, uint32) + let dropped = log_record.dropped_attributes_count(); + if dropped > 0 { + buf.encode_field_tag(LOG_RECORD_DROPPED_ATTRIBUTES_COUNT, wire_types::VARINT); + buf.encode_varint(dropped as u64); + } + + // flags (field 8, fixed32) + if let Some(flags) = log_record.flags() { + buf.encode_field_tag(LOG_RECORD_FLAGS, wire_types::FIXED32); + buf.extend_from_slice(&flags.to_le_bytes()); + } + + // trace_id (field 9, bytes) + if let Some(trace_id) = log_record.trace_id() { + buf.encode_bytes(LOG_RECORD_TRACE_ID, trace_id); + } + + // span_id (field 10, bytes) + if let Some(span_id) = log_record.span_id() { + buf.encode_bytes(LOG_RECORD_SPAN_ID, span_id); + } + + // observed_time_unix_nano (field 11, fixed64) + if let Some(observed_time) = log_record.observed_time_unix_nano() { + buf.encode_field_tag(LOG_RECORD_OBSERVED_TIME_UNIX_NANO, wire_types::FIXED64); + buf.extend_from_slice(&observed_time.to_le_bytes()); + } + + Ok(()) +} + +/// Encode an AttributeView as a length-delimited field +fn encode_attribute_view( + field_tag: u64, + attr: &impl crate::views::common::AttributeView, + buf: &mut ProtoBuffer, +) -> Result<()> { + use crate::proto::consts::field_num::common::*; + + // Start KeyValue message + buf.encode_field_tag(field_tag, wire_types::LEN); + let placeholder = LengthPlaceholder::new(buf.len()); + encode_len_placeholder(buf); + + // Encode key + let key = attr.key(); + if !key.is_empty() { + // Convert &[u8] to &str for encode_string + if let Ok(key_str) = std::str::from_utf8(key) { + buf.encode_string(KEY_VALUE_KEY, key_str); + } + } + + // Encode value (if present) + if let Some(value) = attr.value() { + encode_any_value_view_field(KEY_VALUE_VALUE, &value, buf)?; + } + + // Patch length + placeholder.patch(buf); + + Ok(()) +} + +/// Encode an AnyValueView as a length-delimited field +fn encode_any_value_view_field<'a>( + field_tag: u64, + value: &impl crate::views::common::AnyValueView<'a>, + buf: &mut ProtoBuffer, +) -> Result<()> { + buf.encode_field_tag(field_tag, wire_types::LEN); + let placeholder = LengthPlaceholder::new(buf.len()); + encode_len_placeholder(buf); + + encode_any_value_view_content(value, buf)?; + + placeholder.patch(buf); + Ok(()) +} + +/// Encode the content of an AnyValueView (without the outer field tag) +fn encode_any_value_view_content<'a>( + value: &impl crate::views::common::AnyValueView<'a>, + buf: &mut ProtoBuffer, +) -> Result<()> { + use crate::proto::consts::field_num::common::*; + use crate::views::common::ValueType; + + match value.value_type() { + ValueType::String => { + if let Some(s) = value.as_string() { + // Convert &[u8] to &str for encode_string + if let Ok(s_str) = std::str::from_utf8(s) { + buf.encode_string(ANY_VALUE_STRING_VALUE, s_str); + } + } + } + ValueType::Bool => { + if let Some(b) = value.as_bool() { + buf.encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); + buf.encode_varint(if b { 1 } else { 0 }); + } + } + ValueType::Int64 => { + if let Some(i) = value.as_int64() { + buf.encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); + buf.encode_varint(i as u64); + } + } + ValueType::Double => { + if let Some(d) = value.as_double() { + buf.encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); + buf.extend_from_slice(&d.to_le_bytes()); + } + } + ValueType::Bytes => { + if let Some(bytes) = value.as_bytes() { + buf.encode_bytes(ANY_VALUE_BYTES_VALUE, bytes); + } + } + ValueType::Array => { + if let Some(mut arr_iter) = value.as_array() { + // Encode ArrayValue + buf.encode_field_tag(ANY_VALUE_ARRAY_VALUE, wire_types::LEN); + let placeholder = LengthPlaceholder::new(buf.len()); + encode_len_placeholder(buf); + + while let Some(val) = arr_iter.next() { + encode_any_value_view_field(ARRAY_VALUE_VALUES, &val, buf)?; + } + + placeholder.patch(buf); + } + } + ValueType::KeyValueList => { + if let Some(mut kvlist_iter) = value.as_kvlist() { + // Encode KeyValueList + buf.encode_field_tag(ANY_VALUE_KVLIST_VALUE, wire_types::LEN); + let placeholder = LengthPlaceholder::new(buf.len()); + encode_len_placeholder(buf); + + while let Some(kv) = kvlist_iter.next() { + encode_attribute_view(KEY_VALUE_LIST_VALUES, &kv, buf)?; + } + + placeholder.patch(buf); + } + } + ValueType::Empty => { + // Empty AnyValue - valid according to spec + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::proto::opentelemetry::common::v1::{ + AnyValue, KeyValue, any_value, + }; + use crate::proto::opentelemetry::resource::v1::Resource; + use crate::schema::{SpanId, TraceId}; + use crate::views::common::{AnyValueView, AttributeView, Str, ValueType}; + use crate::views::logs::LogRecordView; + + // Test helper: Simple LogRecordView implementation + struct SimpleLogRecord { + time_unix_nano: Option, + severity_number: Option, + severity_text: Option<&'static str>, + body: Option<&'static str>, + trace_id: Option, + span_id: Option, + } + + impl LogRecordView for SimpleLogRecord { + type Attribute<'a> + = SimpleAttribute + where + Self: 'a; + type AttributeIter<'a> + = std::iter::Empty> + where + Self: 'a; + type Body<'a> + = SimpleAnyValue + where + Self: 'a; + + fn time_unix_nano(&self) -> Option { + self.time_unix_nano + } + + fn observed_time_unix_nano(&self) -> Option { + self.time_unix_nano // same for tests + } + + fn severity_number(&self) -> Option { + self.severity_number + } + + fn severity_text(&self) -> Option> { + self.severity_text.map(|s| s.as_bytes()) + } + + fn body(&self) -> Option> { + self.body.map(|s| SimpleAnyValue::String(s)) + } + + fn attributes(&self) -> Self::AttributeIter<'_> { + std::iter::empty() + } + + fn dropped_attributes_count(&self) -> u32 { + 0 + } + + fn flags(&self) -> Option { + Some(0) + } + + fn trace_id(&self) -> Option<&TraceId> { + self.trace_id.as_ref() + } + + fn span_id(&self) -> Option<&SpanId> { + self.span_id.as_ref() + } + + fn event_name(&self) -> Option> { + None + } + } + + #[derive(Clone)] + enum SimpleAnyValue { + String(&'static str), + } + + impl<'a> AnyValueView<'a> for SimpleAnyValue { + type KeyValue = SimpleAttribute; + type ArrayIter<'arr> + = std::iter::Empty + where + Self: 'arr; + type KeyValueIter<'kv> + = SimpleAttribute + where + Self: 'kv; + + fn value_type(&self) -> ValueType { + match self { + SimpleAnyValue::String(_) => ValueType::String, + } + } + + fn as_string(&self) -> Option> { + match self { + SimpleAnyValue::String(s) => Some(s.as_bytes()), + } + } + + fn as_bool(&self) -> Option { + None + } + + fn as_int64(&self) -> Option { + None + } + + fn as_double(&self) -> Option { + None + } + + fn as_bytes(&self) -> Option<&[u8]> { + None + } + + fn as_array(&self) -> Option> { + None + } + + fn as_kvlist(&self) -> Option> { + None + } + } + + #[derive(Clone)] + struct SimpleAttribute; + + impl AttributeView for SimpleAttribute { + type Val<'val> + = SimpleAnyValue + where + Self: 'val; + + fn key(&self) -> Str<'_> { + "key".as_bytes() + } + + fn value(&self) -> Option> { + Some(SimpleAnyValue::String("value")) + } + } + + impl Iterator for SimpleAttribute { + type Item = Self; + + fn next(&mut self) -> Option { + None + } + } + + // Helper: Pre-encode a Resource as OtlpBytes + fn encode_resource_bytes(resource: &Resource) -> OtlpBytes { + use crate::proto::consts::field_num::resource::*; + let mut buf = ProtoBuffer::with_capacity(256); + + // Encode ResourceLogs.resource field (tag 1) + buf.encode_field_tag(1, wire_types::LEN); + let start = buf.len(); + encode_len_placeholder(&mut buf); + + // Encode attributes + for attr in &resource.attributes { + encode_attribute_proto(RESOURCE_ATTRIBUTES, attr, &mut buf); + } + + // Patch length + let content_len = buf.len() - start - 4; + patch_len_placeholder(&mut buf, 4, content_len, start); + + buf.into_bytes().to_vec() + } + + + + // Helper to encode protobuf KeyValue (for test helpers) + fn encode_attribute_proto(field_tag: u64, attr: &KeyValue, buf: &mut ProtoBuffer) { + use crate::proto::consts::field_num::common::*; + buf.encode_field_tag(field_tag, wire_types::LEN); + let start = buf.len(); + encode_len_placeholder(buf); + + if !attr.key.is_empty() { + buf.encode_string(KEY_VALUE_KEY, &attr.key); + } + + if let Some(ref value) = attr.value { + encode_any_value_proto(KEY_VALUE_VALUE, value, buf); + } + + let content_len = buf.len() - start - 4; + patch_len_placeholder(buf, 4, content_len, start); + } + + fn encode_any_value_proto(field_tag: u64, value: &AnyValue, buf: &mut ProtoBuffer) { + use crate::proto::consts::field_num::common::*; + buf.encode_field_tag(field_tag, wire_types::LEN); + let start = buf.len(); + encode_len_placeholder(buf); + + match &value.value { + Some(any_value::Value::StringValue(s)) => { + buf.encode_string(ANY_VALUE_STRING_VALUE, s); + } + _ => {} + } + + let content_len = buf.len() - start - 4; + patch_len_placeholder(buf, 4, content_len, start); + } + + #[test] + fn test_encoder_state_machine() { + let mut encoder = StatefulOtlpEncoder::new(1024); + + // Initial state + assert_eq!(encoder.state, EncoderState::Idle); + assert!(encoder.is_empty()); + + // Pre-encode resource + let resource = Resource::default(); + let resource_bytes = encode_resource_bytes(&resource); + let scope_name = "test_module"; + + // Simple log record + let log_record = SimpleLogRecord { + time_unix_nano: Some(1000), + severity_number: Some(9), + severity_text: Some("INFO"), + body: Some("test message"), + trace_id: None, + span_id: None, + }; + + encoder + .encode_log_record(&log_record, &resource_bytes, scope_name) + .unwrap(); + + // Should have data now + assert!(!encoder.is_empty()); + assert_eq!(encoder.state, EncoderState::ScopeOpen); + + // Flush should reset + let bytes = encoder.flush(); + assert!(!bytes.is_empty()); + assert_eq!(encoder.state, EncoderState::Idle); + } + + #[test] + fn test_batching_same_scope() { + let mut encoder = StatefulOtlpEncoder::new(1024); + + let resource = Resource::default(); + let resource_bytes = encode_resource_bytes(&resource); + let scope_name = "test_module"; + + // Encode three records with same scope + for i in 0..3 { + let log_record = SimpleLogRecord { + time_unix_nano: Some(i as u64), + severity_number: Some(9), + severity_text: Some("INFO"), + body: Some("test"), + trace_id: None, + span_id: None, + }; + encoder + .encode_log_record(&log_record, &resource_bytes, scope_name) + .unwrap(); + } + + // Should be in ScopeOpen state (not closed between records) + assert_eq!(encoder.state, EncoderState::ScopeOpen); + + let bytes = encoder.flush(); + assert!(!bytes.is_empty()); + } + + #[test] + fn test_different_scopes_close_and_reopen() { + let mut encoder = StatefulOtlpEncoder::new(4096); + + let resource = Resource::default(); + let resource_bytes = encode_resource_bytes(&resource); + + let scope1_name = "scope1"; + let scope2_name = "scope2"; + + let log_record = SimpleLogRecord { + time_unix_nano: Some(1000), + severity_number: Some(9), + severity_text: Some("INFO"), + body: Some("test"), + trace_id: None, + span_id: None, + }; + + // Encode with scope1 + encoder + .encode_log_record(&log_record, &resource_bytes, scope1_name) + .unwrap(); + assert_eq!(encoder.state, EncoderState::ScopeOpen); + + // Encode with scope2 - should close scope1 and start scope2 + encoder + .encode_log_record(&log_record, &resource_bytes, scope2_name) + .unwrap(); + assert_eq!(encoder.state, EncoderState::ScopeOpen); + + let bytes = encoder.flush(); + assert!(!bytes.is_empty()); + } +} diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 6ecf5ecccb..58f7a75da5 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -40,8 +40,8 @@ pub mod metrics; pub mod opentelemetry_client; pub mod registry; pub mod reporter; +pub mod self_tracing; pub mod semconv; -pub mod tracing_integration; // Re-export _private module from internal_events for macro usage. // This allows the otel_info!, otel_warn!, etc. macros to work in other crates diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/DESIGN.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/DESIGN.md similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/tracing_integration/DESIGN.md rename to rust/otap-dataflow/crates/telemetry/src/self_tracing/DESIGN.md diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs new file mode 100644 index 0000000000..62b3de59f1 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs @@ -0,0 +1,728 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Direct OTLP bytes encoder for tokio-tracing events. +//! +//! This module provides zero-allocation encoding from `tracing::Event` directly to +//! OTLP protobuf bytes, bypassing the View abstraction entirely. The key insight is +//! that the tracing `Visit` trait gives us access to field data during a callback, +//! and we can encode directly to a protobuf buffer during that callback. +//! +//! # Design +//! +//! Instead of: +//! 1. Visit event fields → allocate intermediate struct → encode via View trait +//! +//! We do: +//! 1. Visit event fields → encode directly to protobuf buffer +//! +//! This eliminates all intermediate allocations and lifetime complexities. +//! +//! # Protocol Buffer Encoding +//! +//! The encoder produces bytes in the OTLP LogRecord protobuf format. For single-record +//! use cases, it encodes just the LogRecord message. For batched use cases, see +//! `StatefulDirectEncoder` which maintains open ResourceLogs/ScopeLogs containers. + +use bytes::Bytes; +use std::time::{SystemTime, UNIX_EPOCH}; +use tracing::{Event, Level}; + +// Re-export ProtoBuffer and helpers from pdata for direct use +pub use otap_df_pdata::otlp::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; +use otap_df_pdata::proto::consts::{field_num::common::*, field_num::logs::*, wire_types}; + +/// Position marker for a length-delimited field that needs patching. +/// +/// When encoding protobuf, we don't know the length of nested messages until we've +/// written all their content. We reserve 4 bytes for the length, write the content, +/// then patch the length back. +#[derive(Debug, Clone, Copy)] +pub struct LengthPlaceholder { + /// Position in buffer where the 4-byte length placeholder starts + position: usize, +} + +impl LengthPlaceholder { + /// Create a new placeholder at the current buffer position. + #[inline] + pub fn new(position: usize) -> Self { + Self { position } + } + + /// Patch the placeholder with the actual content length. + #[inline] + pub fn patch(self, buf: &mut ProtoBuffer) { + let content_len = buf.len() - self.position - 4; + patch_len_placeholder(buf, 4, content_len, self.position); + } +} + +/// Direct encoder that writes a single LogRecord from a tracing Event. +/// +/// This encoder writes directly to a provided `ProtoBuffer`, producing the +/// protobuf encoding of a LogRecord message without any intermediate structs. +/// +/// # Example +/// +/// ```ignore +/// use tracing_subscriber::layer::Layer; +/// +/// // In a Layer::on_event callback: +/// fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { +/// let mut encoder = DirectLogRecordEncoder::new(&mut self.buffer); +/// encoder.encode_event(event); +/// } +/// ``` +pub struct DirectLogRecordEncoder<'buf> { + buf: &'buf mut ProtoBuffer, +} + +impl<'buf> DirectLogRecordEncoder<'buf> { + /// Create a new encoder that writes to the provided buffer. + #[inline] + pub fn new(buf: &'buf mut ProtoBuffer) -> Self { + Self { buf } + } + + /// Encode a tracing Event as a complete LogRecord message. + /// + /// This writes all LogRecord fields directly to the buffer: + /// - time_unix_nano (field 1) + /// - severity_number (field 2) + /// - severity_text (field 3) + /// - body (field 5) - from the "message" field + /// - attributes (field 6) - from all other fields + /// + /// Returns the number of bytes written. + pub fn encode_event(&mut self, event: &Event<'_>) -> usize { + let start_len = self.buf.len(); + + // Get timestamp + let timestamp_nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + let metadata = event.metadata(); + + // Encode time_unix_nano (field 1, fixed64) + self.buf.encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); + self.buf.extend_from_slice(×tamp_nanos.to_le_bytes()); + + // Encode severity_number (field 2, varint) + let severity = level_to_severity_number(metadata.level()); + self.buf.encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); + self.buf.encode_varint(severity as u64); + + // Encode severity_text (field 3, string) + self.buf.encode_string(LOG_RECORD_SEVERITY_TEXT, metadata.level().as_str()); + + // Now visit fields to encode body and attributes + let mut visitor = DirectFieldVisitor::new(self.buf); + event.record(&mut visitor); + + self.buf.len() - start_len + } + + /// Encode a tracing Event with a custom timestamp. + pub fn encode_event_with_timestamp(&mut self, event: &Event<'_>, timestamp_nanos: u64) -> usize { + let start_len = self.buf.len(); + let metadata = event.metadata(); + + // Encode time_unix_nano (field 1, fixed64) + self.buf.encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); + self.buf.extend_from_slice(×tamp_nanos.to_le_bytes()); + + // Encode severity_number (field 2, varint) + let severity = level_to_severity_number(metadata.level()); + self.buf.encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); + self.buf.encode_varint(severity as u64); + + // Encode severity_text (field 3, string) + self.buf.encode_string(LOG_RECORD_SEVERITY_TEXT, metadata.level().as_str()); + + // Now visit fields to encode body and attributes + let mut visitor = DirectFieldVisitor::new(self.buf); + event.record(&mut visitor); + + self.buf.len() - start_len + } +} + +/// Visitor that directly encodes tracing fields to protobuf. +/// +/// This is the core of the zero-allocation design: instead of collecting +/// field values into an intermediate data structure, we encode them directly +/// to the protobuf buffer as we visit them. +struct DirectFieldVisitor<'buf> { + buf: &'buf mut ProtoBuffer, +} + +impl<'buf> DirectFieldVisitor<'buf> { + fn new(buf: &'buf mut ProtoBuffer) -> Self { + Self { buf } + } + + /// Encode an attribute (KeyValue message) with a string value. + #[inline] + fn encode_string_attribute(&mut self, key: &str, value: &str) { + // KeyValue message as LOG_RECORD_ATTRIBUTES field (tag 6) + self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); + let kv_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // KeyValue.key (field 1, string) + self.buf.encode_string(KEY_VALUE_KEY, key); + + // KeyValue.value (field 2, AnyValue message) + self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); + let av_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // AnyValue.string_value (field 1, string) + self.buf.encode_string(ANY_VALUE_STRING_VALUE, value); + + av_placeholder.patch(self.buf); + kv_placeholder.patch(self.buf); + } + + /// Encode an attribute with an i64 value. + #[inline] + fn encode_int_attribute(&mut self, key: &str, value: i64) { + self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); + let kv_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + self.buf.encode_string(KEY_VALUE_KEY, key); + + self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); + let av_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // AnyValue.int_value (field 3, varint) + self.buf.encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); + self.buf.encode_varint(value as u64); + + av_placeholder.patch(self.buf); + kv_placeholder.patch(self.buf); + } + + /// Encode an attribute with a bool value. + #[inline] + fn encode_bool_attribute(&mut self, key: &str, value: bool) { + self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); + let kv_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + self.buf.encode_string(KEY_VALUE_KEY, key); + + self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); + let av_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // AnyValue.bool_value (field 2, varint) + self.buf.encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); + self.buf.encode_varint(if value { 1 } else { 0 }); + + av_placeholder.patch(self.buf); + kv_placeholder.patch(self.buf); + } + + /// Encode an attribute with a double value. + #[inline] + fn encode_double_attribute(&mut self, key: &str, value: f64) { + self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); + let kv_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + self.buf.encode_string(KEY_VALUE_KEY, key); + + self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); + let av_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // AnyValue.double_value (field 4, fixed64) + self.buf.encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); + self.buf.extend_from_slice(&value.to_le_bytes()); + + av_placeholder.patch(self.buf); + kv_placeholder.patch(self.buf); + } + + /// Encode the body (AnyValue message) as a string. + #[inline] + fn encode_body_string(&mut self, value: &str) { + // LogRecord.body (field 5, AnyValue message) + self.buf.encode_field_tag(LOG_RECORD_BODY, wire_types::LEN); + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // AnyValue.string_value (field 1, string) + self.buf.encode_string(ANY_VALUE_STRING_VALUE, value); + + placeholder.patch(self.buf); + } +} + +impl tracing::field::Visit for DirectFieldVisitor<'_> { + fn record_f64(&mut self, field: &tracing::field::Field, value: f64) { + if field.name() == "message" { + // Body will be formatted later if needed + return; + } + self.encode_double_attribute(field.name(), value); + } + + fn record_i64(&mut self, field: &tracing::field::Field, value: i64) { + if field.name() == "message" { + return; + } + self.encode_int_attribute(field.name(), value); + } + + fn record_u64(&mut self, field: &tracing::field::Field, value: u64) { + if field.name() == "message" { + return; + } + self.encode_int_attribute(field.name(), value as i64); + } + + fn record_bool(&mut self, field: &tracing::field::Field, value: bool) { + if field.name() == "message" { + return; + } + self.encode_bool_attribute(field.name(), value); + } + + fn record_str(&mut self, field: &tracing::field::Field, value: &str) { + if field.name() == "message" { + self.encode_body_string(value); + return; + } + self.encode_string_attribute(field.name(), value); + } + + fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) { + // For Debug fields, we must allocate a String to format + // This is unavoidable without a custom fmt::Write implementation for ProtoBuffer + let formatted = format!("{:?}", value); + if field.name() == "message" { + self.encode_body_string(&formatted); + return; + } + self.encode_string_attribute(field.name(), &formatted); + } +} + +/// Convert tracing Level to OTLP severity number. +/// +/// See: https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber +#[inline] +fn level_to_severity_number(level: &Level) -> i32 { + match *level { + Level::TRACE => 1, + Level::DEBUG => 5, + Level::INFO => 9, + Level::WARN => 13, + Level::ERROR => 17, + } +} + +/// Stateful encoder for batching multiple LogRecords with shared Resource/Scope. +/// +/// This encoder maintains open `ResourceLogs` and `ScopeLogs` messages, allowing +/// multiple LogRecords to be appended efficiently. When the scope changes, it +/// automatically closes the current scope and starts a new one. +/// +/// # Thread-Local Usage +/// +/// This encoder is designed for thread-local use. Each thread should have its own +/// encoder instance to avoid synchronization overhead. The encoder accumulates +/// records until explicitly flushed. +/// +/// # Example +/// +/// ```ignore +/// thread_local! { +/// static ENCODER: RefCell = RefCell::new( +/// StatefulDirectEncoder::new(64 * 1024, resource_bytes) +/// ); +/// } +/// +/// // In event handler: +/// ENCODER.with(|encoder| { +/// let mut encoder = encoder.borrow_mut(); +/// encoder.encode_event(event); +/// +/// if encoder.len() > FLUSH_THRESHOLD { +/// let bytes = encoder.flush(); +/// // Send bytes to pipeline +/// } +/// }); +/// ``` +pub struct StatefulDirectEncoder { + /// Output buffer + buf: ProtoBuffer, + + /// Pre-encoded Resource bytes (includes ResourceLogs.resource field) + resource_bytes: Vec, + + /// Current encoder state + state: EncoderState, + + /// Length placeholder for current ResourceLogs + resource_logs_placeholder: Option, + + /// Length placeholder for current ScopeLogs + scope_logs_placeholder: Option, + + /// Current scope name for batching comparison + current_scope_name: Option, +} + +/// Current state of the stateful encoder. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum EncoderState { + /// No messages open, ready to start new ResourceLogs + Idle, + /// ResourceLogs is open, ready to add ScopeLogs + ResourceOpen, + /// ResourceLogs and ScopeLogs are both open, ready to append LogRecords + ScopeOpen, +} + +impl StatefulDirectEncoder { + /// Create a new stateful encoder with pre-allocated buffer capacity. + /// + /// # Arguments + /// * `capacity_bytes` - Initial buffer capacity in bytes + /// * `resource_bytes` - Pre-encoded Resource (use `encode_resource_bytes` helper) + pub fn new(capacity_bytes: usize, resource_bytes: Vec) -> Self { + Self { + buf: ProtoBuffer::with_capacity(capacity_bytes), + resource_bytes, + state: EncoderState::Idle, + resource_logs_placeholder: None, + scope_logs_placeholder: None, + current_scope_name: None, + } + } + + /// Get the current buffer size in bytes. + #[inline] + pub fn len(&self) -> usize { + self.buf.len() + } + + /// Check if the buffer is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.buf.is_empty() + } + + /// Encode a tracing Event, using its metadata target as the scope name. + /// + /// This method automatically handles batching: + /// - If scope (target) matches the current batch, the LogRecord is appended + /// - If scope differs, the current ScopeLogs is closed and a new one started + pub fn encode_event(&mut self, event: &Event<'_>) { + let scope_name = event.metadata().target(); + self.encode_event_with_scope(event, scope_name); + } + + /// Encode a tracing Event with an explicit scope name. + pub fn encode_event_with_scope(&mut self, event: &Event<'_>, scope_name: &str) { + match self.state { + EncoderState::Idle => { + self.start_resource_logs(); + self.start_scope_logs(scope_name); + self.append_log_record(event); + } + EncoderState::ResourceOpen => { + self.start_scope_logs(scope_name); + self.append_log_record(event); + } + EncoderState::ScopeOpen => { + if self.current_scope_name.as_deref() == Some(scope_name) { + // Same scope - just append + self.append_log_record(event); + } else { + // Different scope - close current and start new + self.close_scope_logs(); + self.start_scope_logs(scope_name); + self.append_log_record(event); + } + } + } + } + + /// Flush the encoder, closing all open messages and returning the accumulated bytes. + /// + /// After flushing, the encoder is reset and ready for new messages. + pub fn flush(&mut self) -> Bytes { + // Close any open messages + if self.state == EncoderState::ScopeOpen { + self.close_scope_logs(); + } + if self.state == EncoderState::ResourceOpen { + self.close_resource_logs(); + } + + // Take the bytes + let (bytes, capacity) = self.buf.take_into_bytes(); + + // Reset state + self.state = EncoderState::Idle; + self.resource_logs_placeholder = None; + self.scope_logs_placeholder = None; + self.current_scope_name = None; + + // Preserve capacity for next use + self.buf.ensure_capacity(capacity); + + bytes + } + + // === Private methods === + + fn start_resource_logs(&mut self) { + // LogsData.resource_logs field (tag 1, length-delimited) + self.buf.encode_field_tag(LOGS_DATA_RESOURCE, wire_types::LEN); + + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // Copy pre-encoded Resource bytes + self.buf.extend_from_slice(&self.resource_bytes); + + self.resource_logs_placeholder = Some(placeholder); + self.state = EncoderState::ResourceOpen; + } + + fn start_scope_logs(&mut self, scope_name: &str) { + // ResourceLogs.scope_logs field (tag 2, length-delimited) + self.buf.encode_field_tag(RESOURCE_LOGS_SCOPE_LOGS, wire_types::LEN); + + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // Encode ScopeLogs.scope field (InstrumentationScope message) + self.encode_instrumentation_scope(scope_name); + + self.scope_logs_placeholder = Some(placeholder); + self.current_scope_name = Some(scope_name.to_string()); + self.state = EncoderState::ScopeOpen; + } + + fn encode_instrumentation_scope(&mut self, scope_name: &str) { + // ScopeLogs.scope field (tag 1, length-delimited) + self.buf.encode_field_tag(SCOPE_LOG_SCOPE, wire_types::LEN); + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // InstrumentationScope.name field (tag 1, string) + self.buf.encode_string(INSTRUMENTATION_SCOPE_NAME, scope_name); + + placeholder.patch(&mut self.buf); + } + + fn append_log_record(&mut self, event: &Event<'_>) { + // ScopeLogs.log_records field (tag 2, length-delimited) + self.buf.encode_field_tag(SCOPE_LOGS_LOG_RECORDS, wire_types::LEN); + + let placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(&mut self.buf); + + // Encode the LogRecord content directly + let mut encoder = DirectLogRecordEncoder::new(&mut self.buf); + let _ = encoder.encode_event(event); + + placeholder.patch(&mut self.buf); + } + + fn close_scope_logs(&mut self) { + if let Some(placeholder) = self.scope_logs_placeholder.take() { + placeholder.patch(&mut self.buf); + self.state = EncoderState::ResourceOpen; + self.current_scope_name = None; + } + } + + fn close_resource_logs(&mut self) { + if let Some(placeholder) = self.resource_logs_placeholder.take() { + placeholder.patch(&mut self.buf); + self.state = EncoderState::Idle; + } + } +} + +/// Helper to pre-encode a Resource as OTLP bytes. +/// +/// The returned bytes include the ResourceLogs.resource field tag and length, +/// ready to be copied directly into the encoder buffer. +/// +/// # Example +/// +/// ```ignore +/// use otap_df_pdata::proto::opentelemetry::resource::v1::Resource; +/// +/// let resource = Resource { +/// attributes: vec![ +/// KeyValue { key: "service.name".into(), value: Some(AnyValue { ... }) }, +/// ], +/// dropped_attributes_count: 0, +/// }; +/// let bytes = encode_resource_bytes(&resource); +/// let encoder = StatefulDirectEncoder::new(64 * 1024, bytes); +/// ``` +pub fn encode_resource_bytes_from_attrs(attributes: &[(&str, &str)]) -> Vec { + use otap_df_pdata::proto::consts::field_num::resource::RESOURCE_ATTRIBUTES; + + let mut buf = ProtoBuffer::with_capacity(256); + + // ResourceLogs.resource field (tag 1, length-delimited) + buf.encode_field_tag(1, wire_types::LEN); + let resource_placeholder = LengthPlaceholder::new(buf.len()); + encode_len_placeholder(&mut buf); + + // Encode each attribute as Resource.attributes (tag 1, KeyValue) + for (key, value) in attributes { + buf.encode_field_tag(RESOURCE_ATTRIBUTES, wire_types::LEN); + let kv_placeholder = LengthPlaceholder::new(buf.len()); + encode_len_placeholder(&mut buf); + + buf.encode_string(KEY_VALUE_KEY, key); + + buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); + let av_placeholder = LengthPlaceholder::new(buf.len()); + encode_len_placeholder(&mut buf); + + buf.encode_string(ANY_VALUE_STRING_VALUE, value); + + av_placeholder.patch(&mut buf); + kv_placeholder.patch(&mut buf); + } + + resource_placeholder.patch(&mut buf); + + buf.into_bytes().to_vec() +} + +#[cfg(test)] +mod tests { + use super::*; + use tracing_subscriber::prelude::*; + use tracing_subscriber::layer::Layer; + use tracing_subscriber::registry::LookupSpan; + use tracing::Subscriber; + use std::sync::Mutex; + + /// Simple layer that uses DirectLogRecordEncoder (thread-safe for tests) + struct DirectEncoderLayer { + // Thread-local buffer - each event encodes to this + buffer: Mutex, + // Collected encoded bytes + encoded: Mutex>>, + } + + impl DirectEncoderLayer { + fn new() -> Self { + Self { + buffer: Mutex::new(ProtoBuffer::with_capacity(4096)), + encoded: Mutex::new(Vec::new()), + } + } + } + + impl Layer for DirectEncoderLayer + where + S: Subscriber + for<'a> LookupSpan<'a>, + { + fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { + let mut buffer = self.buffer.lock().unwrap(); + buffer.clear(); + + let mut encoder = DirectLogRecordEncoder::new(&mut buffer); + let _ = encoder.encode_event(event); + + // Save a copy of the encoded bytes + self.encoded.lock().unwrap().push(buffer.as_ref().to_vec()); + } + } + + #[test] + fn test_direct_encoder_captures_events() { + let layer = DirectEncoderLayer::new(); + + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + let _guard = tracing::dispatcher::set_default(&dispatch); + + tracing::info!("Test message"); + tracing::warn!(count = 42, "Warning with attribute"); + + // Drop the guard to stop capturing + drop(_guard); + + // Note: We can't easily get the layer back from dispatch to verify results + // The test verifies that the encoding path doesn't panic + } + + #[test] + fn test_direct_encoder_encodes_attributes() { + let mut buffer = ProtoBuffer::with_capacity(1024); + + // We can't easily create a tracing::Event in tests, so we'll just verify + // the attribute encoding helpers work correctly + let mut visitor = DirectFieldVisitor::new(&mut buffer); + visitor.encode_string_attribute("test_key", "test_value"); + visitor.encode_int_attribute("count", 42); + visitor.encode_bool_attribute("enabled", true); + visitor.encode_double_attribute("ratio", 3.14); + + // Buffer should have content + assert!(!buffer.is_empty()); + } + + #[test] + fn test_stateful_encoder_batching() { + let resource_bytes = encode_resource_bytes_from_attrs(&[ + ("service.name", "test-service"), + ]); + + let mut encoder = StatefulDirectEncoder::new(4096, resource_bytes); + + assert!(encoder.is_empty()); + assert_eq!(encoder.state, EncoderState::Idle); + + // We can't easily test with real tracing events, but we can verify the structure + // For now, just test flush on empty encoder + let bytes = encoder.flush(); + assert!(bytes.is_empty()); + } + + #[test] + fn test_encode_resource_bytes() { + let bytes = encode_resource_bytes_from_attrs(&[ + ("service.name", "my-service"), + ("service.version", "1.0.0"), + ]); + + // Should produce non-empty bytes + assert!(!bytes.is_empty()); + + // Bytes should start with field tag for ResourceLogs.resource + // Field 1, wire type 2 (LEN) = (1 << 3) | 2 = 0x0a + assert_eq!(bytes[0], 0x0a); + } + + #[test] + fn test_level_to_severity() { + assert_eq!(level_to_severity_number(&Level::TRACE), 1); + assert_eq!(level_to_severity_number(&Level::DEBUG), 5); + assert_eq!(level_to_severity_number(&Level::INFO), 9); + assert_eq!(level_to_severity_number(&Level::WARN), 13); + assert_eq!(level_to_severity_number(&Level::ERROR), 17); + } +} diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/log_record.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/tracing_integration/log_record.rs rename to rust/otap-dataflow/crates/telemetry/src/self_tracing/log_record.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs similarity index 58% rename from rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs rename to rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index ddbc6ddb0b..460e2b0d1a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -3,10 +3,18 @@ //! tokio-tracing support for directly encoding and formatting OTLP bytes. +pub mod direct_encoder; pub mod log_record; pub mod otlp_bytes_formatter; pub mod subscriber; +// New direct encoder exports (preferred for zero-allocation encoding) +pub use direct_encoder::{ + DirectLogRecordEncoder, LengthPlaceholder, ProtoBuffer, StatefulDirectEncoder, + encode_resource_bytes_from_attrs, +}; + +// Legacy View-based exports (for compatibility) pub use log_record::{TracingAnyValue, TracingAttribute, TracingLogRecord}; pub use otlp_bytes_formatter::{FormatError, OtlpBytesFormattingLayer}; pub use subscriber::OtlpTracingLayer; diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/otlp_bytes_formatter.rs similarity index 63% rename from rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_formatter.rs rename to rust/otap-dataflow/crates/telemetry/src/self_tracing/otlp_bytes_formatter.rs index a44011af73..1f6c688042 100644 --- a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/otlp_bytes_formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/otlp_bytes_formatter.rs @@ -429,15 +429,21 @@ impl std::error::Error for FormatError {} #[cfg(test)] mod tests { use super::*; + use crate::self_tracing::direct_encoder::{ + StatefulDirectEncoder, encode_resource_bytes_from_attrs, + }; use std::sync::{Arc, Mutex}; + use tracing_subscriber::prelude::*; + use tracing_subscriber::layer::Layer; + use tracing_subscriber::registry::LookupSpan; - /// Test writer that captures output + /// Test writer that captures output to a shared buffer struct TestWriter { buffer: Arc>>, } impl TestWriter { - fn new() -> (Self, Arc>>) { + fn new_shared() -> (Self, Arc>>) { let buffer = Arc::new(Mutex::new(Vec::new())); (Self { buffer: buffer.clone() }, buffer) } @@ -465,6 +471,188 @@ mod tests { } } - // TODO: Add tests that encode a TracingLogRecord to OTLP bytes, - // then format them back and verify the output + /// Helper layer for tests that captures events using StatefulDirectEncoder + struct TestEncoderLayer { + encoder: Arc>, + } + + impl Layer for TestEncoderLayer + where + S: tracing::Subscriber + for<'a> LookupSpan<'a>, + { + fn on_event(&self, event: &tracing::Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { + let mut encoder = self.encoder.lock().unwrap(); + encoder.encode_event(event); + } + } + + /// Helper struct for end-to-end tests + struct TestHarness { + encoder: Arc>, + dispatch: tracing::Dispatch, + } + + impl TestHarness { + /// Create a new test harness with the given resource attributes + fn new(resource_attrs: &[(&str, &str)]) -> Self { + let resource_bytes = encode_resource_bytes_from_attrs(resource_attrs); + let encoder = Arc::new(Mutex::new(StatefulDirectEncoder::new(4096, resource_bytes))); + let layer = TestEncoderLayer { encoder: encoder.clone() }; + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + Self { encoder, dispatch } + } + + /// Run a closure that emits tracing events, then return formatted output + fn capture_and_format(&self, emit_events: F) -> String + where + F: FnOnce(), + { + // Emit events with our subscriber + { + let _guard = tracing::dispatcher::set_default(&self.dispatch); + emit_events(); + } + + // Flush and get bytes + let otlp_bytes = self.encoder.lock().unwrap().flush(); + + // Format the bytes + let (writer, output_buffer) = TestWriter::new_shared(); + let formatter = OtlpBytesFormattingLayer::new(writer) + .with_ansi(false) + .with_timestamp(false) + .with_thread_names(false); + + let _ = formatter.format_otlp_bytes(&otlp_bytes); + + let output = output_buffer.lock().unwrap(); + String::from_utf8_lossy(&output).to_string() + } + } + + /// Test formatting a simple INFO message + #[test] + fn test_format_simple_info_message() { + let harness = TestHarness::new(&[("service.name", "my-service")]); + + let output = harness.capture_and_format(|| { + tracing::info!(target: "my_module", "Test message"); + }); + + assert!(output.contains("INFO"), "Should contain INFO level: {}", output); + assert!(output.contains("my_module"), "Should contain target: {}", output); + assert!(output.contains("Test message"), "Should contain message: {}", output); + } + + /// Test formatting an event with attributes + #[test] + fn test_format_event_with_attributes() { + let harness = TestHarness::new(&[("service.name", "attr-test")]); + + let output = harness.capture_and_format(|| { + tracing::warn!(target: "server", port = 8080, host = "localhost", "Server starting"); + }); + + assert!(output.contains("WARN"), "Should contain WARN level: {}", output); + assert!(output.contains("server"), "Should contain target: {}", output); + assert!(output.contains("Server starting"), "Should contain message: {}", output); + assert!(output.contains("port=8080"), "Should contain port attribute: {}", output); + assert!(output.contains("host=localhost"), "Should contain host attribute: {}", output); + } + + /// Test formatting multiple events with different levels + #[test] + fn test_format_multiple_levels() { + let harness = TestHarness::new(&[]); + + let output = harness.capture_and_format(|| { + tracing::trace!(target: "app", "Trace message"); + tracing::debug!(target: "app", "Debug message"); + tracing::info!(target: "app", "Info message"); + tracing::warn!(target: "app", "Warn message"); + tracing::error!(target: "app", "Error message"); + }); + + // Check all levels are present + assert!(output.contains("TRACE"), "Should contain TRACE: {}", output); + assert!(output.contains("DEBUG"), "Should contain DEBUG: {}", output); + assert!(output.contains("INFO"), "Should contain INFO: {}", output); + assert!(output.contains("WARN"), "Should contain WARN: {}", output); + assert!(output.contains("ERROR"), "Should contain ERROR: {}", output); + + // Check all messages are present + assert!(output.contains("Trace message"), "Should contain trace message: {}", output); + assert!(output.contains("Debug message"), "Should contain debug message: {}", output); + assert!(output.contains("Info message"), "Should contain info message: {}", output); + assert!(output.contains("Warn message"), "Should contain warn message: {}", output); + assert!(output.contains("Error message"), "Should contain error message: {}", output); + } + + /// Test that different targets create separate scope batches + #[test] + fn test_different_targets_different_scopes() { + let harness = TestHarness::new(&[("service.name", "multi-scope-test")]); + + let output = harness.capture_and_format(|| { + tracing::info!(target: "module_a", "From module A"); + tracing::info!(target: "module_b", "From module B"); + tracing::info!(target: "module_a", "Another from A"); + }); + + // Check both modules appear + assert!(output.contains("module_a"), "Should contain module_a: {}", output); + assert!(output.contains("module_b"), "Should contain module_b: {}", output); + assert!(output.contains("From module A"), "Should contain message A: {}", output); + assert!(output.contains("From module B"), "Should contain message B: {}", output); + assert!(output.contains("Another from A"), "Should contain second A message: {}", output); + } + + /// Test formatting with various attribute types + #[test] + fn test_format_various_attribute_types() { + let harness = TestHarness::new(&[]); + + let output = harness.capture_and_format(|| { + tracing::info!( + target: "types", + string_val = "hello", + int_val = 42i64, + bool_val = true, + float_val = 3.14f64, + "Testing attribute types" + ); + }); + + assert!(output.contains("string_val=hello"), "Should contain string attr: {}", output); + assert!(output.contains("int_val=42"), "Should contain int attr: {}", output); + assert!(output.contains("bool_val=true"), "Should contain bool attr: {}", output); + // Float might be formatted differently, just check it's there + assert!(output.contains("float_val="), "Should contain float attr: {}", output); + assert!(output.contains("Testing attribute types"), "Should contain message: {}", output); + } + + /// Test the timestamp formatter + #[test] + fn test_format_iso8601_timestamp() { + // Test a known timestamp: 2024-01-01T00:00:00.000000Z + // Unix epoch for 2024-01-01T00:00:00Z is 1704067200 seconds + let nanos = 1704067200_000_000_000u64; + let formatted = format_iso8601_timestamp(nanos); + + // The timestamp should be roughly correct (our simple algorithm isn't perfect) + assert!(formatted.starts_with("20"), "Should start with century: {}", formatted); + assert!(formatted.ends_with("Z"), "Should end with Z: {}", formatted); + assert!(formatted.contains("T"), "Should have T separator: {}", formatted); + } + + /// Test severity to level string conversion + #[test] + fn test_severity_to_level_str() { + assert_eq!(severity_to_level_str(1), "TRACE"); + assert_eq!(severity_to_level_str(5), "DEBUG"); + assert_eq!(severity_to_level_str(9), "INFO"); + assert_eq!(severity_to_level_str(13), "WARN"); + assert_eq!(severity_to_level_str(17), "ERROR"); + } } diff --git a/rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/tracing_integration/subscriber.rs rename to rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs From 0aaad0b62c8f018ed192c4e426a9aa46e76733c3 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 08:33:00 -0800 Subject: [PATCH 04/92] typo --- rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs b/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs index 16e2618b6b..75a7413af0 100644 --- a/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs +++ b/rust/otap-dataflow/crates/query-engine/src/pipeline/planner.rs @@ -111,7 +111,7 @@ impl PipelinePlanner { }), }, -o DataExpression::Conditional(conditional_expr) => { + DataExpression::Conditional(conditional_expr) => { let mut pipeline_branches = vec![]; for branch in conditional_expr.get_branches() { let predicate = From d8c7ac2a9448209266640736117ce1265fb610c1 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 12:00:12 -0800 Subject: [PATCH 05/92] benchmarrked --- rust/otap-dataflow/benchmarks/Cargo.toml | 7 + .../benchmarks/benches/self_tracing/main.rs | 367 ++++++++++++++++++ .../crates/pdata/src/otlp/common.rs | 4 +- .../src/self_tracing/direct_encoder.rs | 106 ++++- .../crates/telemetry/src/self_tracing/mod.rs | 5 +- .../telemetry/src/self_tracing/subscriber.rs | 56 +-- 6 files changed, 476 insertions(+), 69 deletions(-) create mode 100644 rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs diff --git a/rust/otap-dataflow/benchmarks/Cargo.toml b/rust/otap-dataflow/benchmarks/Cargo.toml index c88c6c1e1d..30e9ce6c33 100644 --- a/rust/otap-dataflow/benchmarks/Cargo.toml +++ b/rust/otap-dataflow/benchmarks/Cargo.toml @@ -26,6 +26,9 @@ otap-df-engine = { path = "../crates/engine"} otap-df-telemetry = { path="../crates/telemetry"} otap-df-pdata = { path="../crates/pdata", features = ["bench"]} +tracing.workspace = true +tracing-subscriber = { workspace = true, features = ["registry"] } + fluke-hpack.workspace = true futures-channel.workspace = true futures.workspace = true @@ -87,3 +90,7 @@ harness = false [[bench]] name = "otap_logs_view" harness = false + +[[bench]] +name = "self_tracing" +harness = false diff --git a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs new file mode 100644 index 0000000000..1a19d42e55 --- /dev/null +++ b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs @@ -0,0 +1,367 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +#![allow(missing_docs)] + +//! Benchmarks for self-tracing OTLP bytes encoding and formatting. +//! +//! # Benchmark Design +//! +//! These benchmarks emit a single tracing event but perform N encoding/formatting +//! operations inside the callback. This amortizes tracing dispatch overhead to noise, +//! allowing us to measure the true cost of encoding. +//! +//! # Interpreting Results +//! +//! Benchmark names follow the pattern: `group/description/N_encodings` +//! +//! To get per-event cost: `measured_time / N` +//! +//! Example: `encode_otlp/3_attrs/1000_events` = 265 µs → 265 ns per event + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use tracing::{Event, Subscriber}; +use tracing_subscriber::layer::Layer; +use tracing_subscriber::prelude::*; +use tracing_subscriber::registry::LookupSpan; + +use otap_df_telemetry::self_tracing::{ + OtlpBytesFormattingLayer, StatefulDirectEncoder, encode_resource_bytes_from_attrs, +}; + +#[cfg(not(windows))] +use tikv_jemallocator::Jemalloc; + +#[cfg(not(windows))] +#[global_allocator] +static GLOBAL: Jemalloc = Jemalloc; + +// ============================================================================= +// ISOLATED ENCODING BENCHMARK +// Emit 1 event, encode it N times inside the callback +// ============================================================================= + +/// Layer that encodes the same event N times to measure pure encoding cost. +struct IsolatedEncoderLayer { + /// Number of times to encode each event + iterations: usize, + /// Pre-encoded resource bytes + resource_bytes: Vec, +} + +impl IsolatedEncoderLayer { + fn new(iterations: usize) -> Self { + Self { + iterations, + resource_bytes: encode_resource_bytes_from_attrs(&[ + ("service.name", "benchmark"), + ]), + } + } +} + +impl Layer for IsolatedEncoderLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { + // Encode the same event N times using StatefulDirectEncoder + for _ in 0..self.iterations { + let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); + encoder.encode_event(event); + let _ = encoder.flush(); + } + } +} + +/// Benchmark: Pure encoding cost (N encodings per single event dispatch) +fn bench_isolated_encode(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_otlp"); + + for iterations in [100, 1000].iter() { + let _ = group.bench_with_input( + BenchmarkId::new("3_attrs", format!("{}_events", iterations)), + iterations, + |b, &iters| { + b.iter(|| { + let layer = IsolatedEncoderLayer::new(iters); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + + tracing::dispatcher::with_default(&dispatch, || { + // Single event, encoded `iters` times inside the callback + tracing::info!( + key1 = "value1", + key2 = 42, + key3 = true, + "Benchmark message" + ); + }); + + std::hint::black_box(()) + }) + }, + ); + } + + group.finish(); +} + +// ============================================================================= +// ISOLATED ENCODE + FORMAT BENCHMARK +// Emit 1 event, encode and format it N times +// ============================================================================= + +/// Layer that encodes and formats the same event N times. +struct IsolatedEncodeFormatLayer { + iterations: usize, + resource_bytes: Vec, +} + +impl IsolatedEncodeFormatLayer { + fn new(iterations: usize) -> Self { + Self { + iterations, + resource_bytes: encode_resource_bytes_from_attrs(&[ + ("service.name", "benchmark"), + ]), + } + } +} + +impl Layer for IsolatedEncodeFormatLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { + let formatter = OtlpBytesFormattingLayer::new(std::io::sink); + + // Encode and format N times + for _ in 0..self.iterations { + // Use StatefulDirectEncoder to produce full OTLP envelope + let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); + encoder.encode_event(event); + let bytes = encoder.flush(); + + // Format the complete OTLP bytes + let _ = formatter.format_otlp_bytes(&bytes); + } + } +} + +/// Benchmark: Encoding + formatting cost +fn bench_isolated_encode_format(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_and_format_otlp"); + + for iterations in [100, 1000].iter() { + let _ = group.bench_with_input( + BenchmarkId::new("3_attrs", format!("{}_events", iterations)), + iterations, + |b, &iters| { + b.iter(|| { + let layer = IsolatedEncodeFormatLayer::new(iters); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + + tracing::dispatcher::with_default(&dispatch, || { + tracing::info!( + key1 = "value1", + key2 = 42, + key3 = true, + "Benchmark message" + ); + }); + + std::hint::black_box(()) + }) + }, + ); + } + + group.finish(); +} + +// ============================================================================= +// ISOLATED FORMAT-ONLY BENCHMARK +// Pre-encode bytes, format them N times +// ============================================================================= + +/// Layer that encodes once, then formats N times. +struct IsolatedFormatLayer { + format_iterations: usize, + resource_bytes: Vec, +} + +impl IsolatedFormatLayer { + fn new(format_iterations: usize) -> Self { + Self { + format_iterations, + resource_bytes: encode_resource_bytes_from_attrs(&[ + ("service.name", "benchmark"), + ]), + } + } +} + +impl Layer for IsolatedFormatLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { + // Encode once using StatefulDirectEncoder to get full OTLP envelope + let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); + encoder.encode_event(event); + let bytes = encoder.flush(); + + // Format N times + let formatter = OtlpBytesFormattingLayer::new(std::io::sink); + for _ in 0..self.format_iterations { + let _ = formatter.format_otlp_bytes(&bytes); + } + } +} + +/// Benchmark: Pure formatting cost (encode once, format N times) +fn bench_isolated_format(c: &mut Criterion) { + let mut group = c.benchmark_group("format_otlp_only"); + + for iterations in [100, 1000].iter() { + let _ = group.bench_with_input( + BenchmarkId::new("3_attrs", format!("{}_formats", iterations)), + iterations, + |b, &iters| { + b.iter(|| { + let layer = IsolatedFormatLayer::new(iters); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + + tracing::dispatcher::with_default(&dispatch, || { + tracing::info!( + key1 = "value1", + key2 = 42, + key3 = true, + "Benchmark message" + ); + }); + + std::hint::black_box(()) + }) + }, + ); + } + + group.finish(); +} + +// ============================================================================= +// ATTRIBUTE COMPLEXITY BENCHMARK +// ============================================================================= + +/// Layer that encodes events with varying attribute counts. +struct AttributeComplexityLayer { + iterations: usize, + resource_bytes: Vec, +} + +impl AttributeComplexityLayer { + fn new(iterations: usize) -> Self { + Self { + iterations, + resource_bytes: encode_resource_bytes_from_attrs(&[ + ("service.name", "benchmark"), + ]), + } + } +} + +impl Layer for AttributeComplexityLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { + for _ in 0..self.iterations { + let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); + encoder.encode_event(event); + let _ = encoder.flush(); + } + } +} + +/// Benchmark: Encoding cost with different attribute counts +fn bench_attribute_complexity(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_otlp_by_attrs"); + let iterations = 1000; + + // No attributes + let _ = group.bench_function("0_attrs/1000_events", |b| { + b.iter(|| { + let layer = AttributeComplexityLayer::new(iterations); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + + tracing::dispatcher::with_default(&dispatch, || { + tracing::info!("message only"); + }); + + std::hint::black_box(()) + }) + }); + + // 3 attributes + let _ = group.bench_function("3_attrs/1000_events", |b| { + b.iter(|| { + let layer = AttributeComplexityLayer::new(iterations); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + + tracing::dispatcher::with_default(&dispatch, || { + tracing::info!(a1 = "value", a2 = 42, a3 = true, "with 3 attributes"); + }); + + std::hint::black_box(()) + }) + }); + + // 10 attributes + let _ = group.bench_function("10_attrs/1000_events", |b| { + b.iter(|| { + let layer = AttributeComplexityLayer::new(iterations); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + + tracing::dispatcher::with_default(&dispatch, || { + tracing::info!( + a1 = "string1", + a2 = true, + a3 = "string2", + a4 = 3.14, + a5 = 42i64, + a6 = "string3", + a7 = false, + a8 = 2.718, + a9 = 100u64, + a10 = "string4", + "with 10 attributes" + ); + }); + + std::hint::black_box(()) + }) + }); + + group.finish(); +} + +#[allow(missing_docs)] +mod bench_entry { + use super::*; + + criterion_group!( + name = benches; + config = Criterion::default(); + targets = bench_isolated_encode, bench_isolated_encode_format, + bench_isolated_format, bench_attribute_complexity + ); +} + +criterion_main!(bench_entry::benches); diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs index 0ebfc790fb..9ec1adc96e 100644 --- a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs +++ b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs @@ -30,7 +30,7 @@ use std::fmt; use std::fmt::Write; use std::sync::LazyLock; -pub(in crate::otlp) struct ResourceArrays<'a> { +pub(crate) struct ResourceArrays<'a> { pub id: Option<&'a UInt16Array>, pub dropped_attributes_count: Option<&'a UInt32Array>, pub schema_url: Option>, @@ -123,7 +123,7 @@ impl<'a> TryFrom<&'a RecordBatch> for ResourceArrays<'a> { } } -pub(in crate::otlp) struct ScopeArrays<'a> { +pub(crate) struct ScopeArrays<'a> { pub name: Option>, pub version: Option>, pub dropped_attributes_count: Option<&'a UInt32Array>, diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs index 62b3de59f1..361fd9bc1e 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs @@ -25,6 +25,7 @@ //! `StatefulDirectEncoder` which maintains open ResourceLogs/ScopeLogs containers. use bytes::Bytes; +use std::fmt::Write as FmtWrite; use std::time::{SystemTime, UNIX_EPOCH}; use tracing::{Event, Level}; @@ -58,6 +59,22 @@ impl LengthPlaceholder { } } +/// Wrapper for ProtoBuffer that implements `std::fmt::Write`. +/// +/// This allows direct formatting of `Debug` values into the protobuf buffer +/// without allocating an intermediate `String`. +struct ProtoBufferWriter<'a> { + buf: &'a mut ProtoBuffer, +} + +impl FmtWrite for ProtoBufferWriter<'_> { + #[inline] + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.buf.extend_from_slice(s.as_bytes()); + Ok(()) + } +} + /// Direct encoder that writes a single LogRecord from a tracing Event. /// /// This encoder writes directly to a provided `ProtoBuffer`, producing the @@ -155,18 +172,19 @@ impl<'buf> DirectLogRecordEncoder<'buf> { /// This is the core of the zero-allocation design: instead of collecting /// field values into an intermediate data structure, we encode them directly /// to the protobuf buffer as we visit them. -struct DirectFieldVisitor<'buf> { +pub struct DirectFieldVisitor<'buf> { buf: &'buf mut ProtoBuffer, } impl<'buf> DirectFieldVisitor<'buf> { - fn new(buf: &'buf mut ProtoBuffer) -> Self { + /// Create a new DirectFieldVisitor that writes to the provided buffer. + pub fn new(buf: &'buf mut ProtoBuffer) -> Self { Self { buf } } /// Encode an attribute (KeyValue message) with a string value. #[inline] - fn encode_string_attribute(&mut self, key: &str, value: &str) { + pub fn encode_string_attribute(&mut self, key: &str, value: &str) { // KeyValue message as LOG_RECORD_ATTRIBUTES field (tag 6) self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); let kv_placeholder = LengthPlaceholder::new(self.buf.len()); @@ -189,7 +207,7 @@ impl<'buf> DirectFieldVisitor<'buf> { /// Encode an attribute with an i64 value. #[inline] - fn encode_int_attribute(&mut self, key: &str, value: i64) { + pub fn encode_int_attribute(&mut self, key: &str, value: i64) { self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); let kv_placeholder = LengthPlaceholder::new(self.buf.len()); encode_len_placeholder(self.buf); @@ -210,7 +228,7 @@ impl<'buf> DirectFieldVisitor<'buf> { /// Encode an attribute with a bool value. #[inline] - fn encode_bool_attribute(&mut self, key: &str, value: bool) { + pub fn encode_bool_attribute(&mut self, key: &str, value: bool) { self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); let kv_placeholder = LengthPlaceholder::new(self.buf.len()); encode_len_placeholder(self.buf); @@ -231,7 +249,7 @@ impl<'buf> DirectFieldVisitor<'buf> { /// Encode an attribute with a double value. #[inline] - fn encode_double_attribute(&mut self, key: &str, value: f64) { + pub fn encode_double_attribute(&mut self, key: &str, value: f64) { self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); let kv_placeholder = LengthPlaceholder::new(self.buf.len()); encode_len_placeholder(self.buf); @@ -252,7 +270,7 @@ impl<'buf> DirectFieldVisitor<'buf> { /// Encode the body (AnyValue message) as a string. #[inline] - fn encode_body_string(&mut self, value: &str) { + pub fn encode_body_string(&mut self, value: &str) { // LogRecord.body (field 5, AnyValue message) self.buf.encode_field_tag(LOG_RECORD_BODY, wire_types::LEN); let placeholder = LengthPlaceholder::new(self.buf.len()); @@ -263,6 +281,57 @@ impl<'buf> DirectFieldVisitor<'buf> { placeholder.patch(self.buf); } + + /// Encode the body (AnyValue message) from a Debug value without allocation. + #[inline] + pub fn encode_body_debug(&mut self, value: &dyn std::fmt::Debug) { + // LogRecord.body (field 5, AnyValue message) + self.buf.encode_field_tag(LOG_RECORD_BODY, wire_types::LEN); + let body_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // AnyValue.string_value (field 1, string) + self.buf.encode_field_tag(ANY_VALUE_STRING_VALUE, wire_types::LEN); + let string_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // Write Debug output directly to buffer + let mut writer = ProtoBufferWriter { buf: self.buf }; + let _ = write!(writer, "{:?}", value); + + string_placeholder.patch(self.buf); + body_placeholder.patch(self.buf); + } + + /// Encode an attribute with a Debug value without allocation. + #[inline] + pub fn encode_debug_attribute(&mut self, key: &str, value: &dyn std::fmt::Debug) { + // KeyValue message as LOG_RECORD_ATTRIBUTES field (tag 6) + self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); + let kv_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // KeyValue.key (field 1, string) + self.buf.encode_string(KEY_VALUE_KEY, key); + + // KeyValue.value (field 2, AnyValue message) + self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); + let av_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // AnyValue.string_value (field 1, string) + self.buf.encode_field_tag(ANY_VALUE_STRING_VALUE, wire_types::LEN); + let string_placeholder = LengthPlaceholder::new(self.buf.len()); + encode_len_placeholder(self.buf); + + // Write Debug output directly to buffer + let mut writer = ProtoBufferWriter { buf: self.buf }; + let _ = write!(writer, "{:?}", value); + + string_placeholder.patch(self.buf); + av_placeholder.patch(self.buf); + kv_placeholder.patch(self.buf); + } } impl tracing::field::Visit for DirectFieldVisitor<'_> { @@ -304,14 +373,25 @@ impl tracing::field::Visit for DirectFieldVisitor<'_> { } fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) { - // For Debug fields, we must allocate a String to format - // This is unavoidable without a custom fmt::Write implementation for ProtoBuffer - let formatted = format!("{:?}", value); + // Zero-allocation path: write Debug output directly to protobuf buffer. + // + // Note: This method is only called for types that don't implement the specific + // Visit methods (record_i64, record_f64, record_bool, record_str). Primitives + // are encoded as native OTLP AnyValue types (int_value, double_value, etc.), + // preserving type fidelity. Only complex types fall through to this Debug path. + // + // TODO: The Debug trait only provides string formatting, not structural access. + // std::fmt::Formatter is opaque with no public constructor, so we cannot intercept + // the debug_struct/debug_list/field calls to encode as nested OTLP AnyValue messages. + // To support structured encoding, types would need to implement an alternative trait: + // - `serde::Serialize` → encode to AnyValue::kvlist_value / array_value + // - `valuable::Valuable` → designed for structured inspection (limited adoption) + // - `tracing::Value` → unstable, may provide this in the future if field.name() == "message" { - self.encode_body_string(&formatted); - return; + self.encode_body_debug(value); + } else { + self.encode_debug_attribute(field.name(), value); } - self.encode_string_attribute(field.name(), &formatted); } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index 460e2b0d1a..215386ffdb 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -10,8 +10,9 @@ pub mod subscriber; // New direct encoder exports (preferred for zero-allocation encoding) pub use direct_encoder::{ - DirectLogRecordEncoder, LengthPlaceholder, ProtoBuffer, StatefulDirectEncoder, - encode_resource_bytes_from_attrs, + DirectFieldVisitor, DirectLogRecordEncoder, LengthPlaceholder, ProtoBuffer, + StatefulDirectEncoder, encode_len_placeholder, encode_resource_bytes_from_attrs, + patch_len_placeholder, }; // Legacy View-based exports (for compatibility) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs index c2837e125b..c7ecacbdaf 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs @@ -115,60 +115,12 @@ where (self.on_event)(log_record); } - fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, _ctx: Context<'_, S>) { - // TODO - - // let timestamp_nanos = SystemTime::now() - // .duration_since(UNIX_EPOCH) - // .unwrap_or_default() - // .as_nanos() as u64; - - // // Extract fields from span attributes - // let mut visitor = FieldVisitor::new(); - // attrs.record(&mut visitor); - - // let metadata = attrs.metadata(); - // let span_id = id.into_u64(); - - // let mut attributes: Vec> = data - // .attr_keys - // .iter() - // .zip(data.attr_values.iter()) - // .map(|(key, value)| TracingAttribute { - // key, - // value: value.as_borrowed(), - // }) - // .collect(); - - // // Add span.id and span.name as attributes - // let span_id_value = TracingAnyValue::Int(span_id as i64); - // let span_name_value = TracingAnyValue::Str(data.name); - - // attributes.push(TracingAttribute { - // key: "span.id", - // value: span_id_value, - // }); - // attributes.push(TracingAttribute { - // key: "span.name", - // value: span_name_value, - // }); - - // // Create "span.start" log record - // let message_ref = visitor.message.as_deref().unwrap_or(""); - // let log_record = TracingLogRecord::new_with_event_name( - // metadata, - // attributes, - // timestamp_nanos, - // "span.start", - // ) - // .with_body(message_ref); - - // // Invoke callback with span start event - // (self.on_event)(log_record); + fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &Id, _ctx: Context<'_, S>) { + // TODO: emit an event } - fn on_close(&self, id: Id, _ctx: Context<'_, S>) { - // TODO + fn on_close(&self, _id: Id, _ctx: Context<'_, S>) { + // TODO: emit an event } } From 0c4447001e5e4bd8d44d43eab086ee70f01b787e Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 13:45:38 -0800 Subject: [PATCH 06/92] compact --- .../pdata/src/views/otlp/bytes/common.rs | 4 +- .../src/self_tracing/HYBRID_ENCODER_PLAN.md | 468 ++++++++++++++ .../telemetry/src/self_tracing/REPORT.md | 208 +++++++ .../src/self_tracing/compact_formatter.rs | 582 ++++++++++++++++++ .../crates/telemetry/src/self_tracing/mod.rs | 11 +- 5 files changed, 1269 insertions(+), 4 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md create mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md create mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs diff --git a/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/common.rs b/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/common.rs index 65038444c2..c352cd0ef6 100644 --- a/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/common.rs +++ b/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/common.rs @@ -37,8 +37,10 @@ pub struct RawKeyValue<'a> { } impl<'a> RawKeyValue<'a> { + /// Create a new RawKeyValue parser from a byte slice containing a KeyValue message. #[inline] - fn new(buf: &'a [u8]) -> Self { + #[must_use] + pub fn new(buf: &'a [u8]) -> Self { Self { buf, pos: Cell::new(0), diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md new file mode 100644 index 0000000000..a95d1b2722 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md @@ -0,0 +1,468 @@ +# Hybrid Log Record Encoder: Planning Document + +## Overview + +This document plans a hybrid approach for encoding tracing events that: + +1. **Keeps cheap, useful data in structural form** (for sorting, filtering, indexing) +2. **Encodes borrowed/expensive data to OTLP bytes** (body, attributes) +3. **Caches static callsite details** as pre-encoded LogRecord.event_name bytes + +## Current vs. Proposed Architecture + +### Current: Full OTLP Encoding + +``` +Event → StatefulDirectEncoder → Complete OTLP bytes + ├── ResourceLogs envelope + ├── ScopeLogs envelope (scope name string) + └── LogRecord (all fields as protobuf) +``` + +**Issues:** +- All fields encoded immediately at event time +- Callsite info (target/name/file/line) re-encoded for every event +- Can't sort/filter without decoding + +### Proposed: Hybrid Structural + Partial OTLP + +``` +Event → CompactLogRecord → Accumulate → Batch encode + ├── callsite_id: Identifier (for cached event_name lookup) + ├── timestamp_ns: u64 (structural, cheap copy) + ├── severity: u8 (structural, cheap copy) + └── body_attrs_bytes: Bytes (OTLP body+attributes only) +``` + +**Benefits:** +- Callsite details (target/name/file/line) encoded once per unique log statement +- Cached event_name bytes appended to each LogRecord at flush time +- Structural fields available for filtering/indexing +- Body+attributes already OTLP-encoded (common output path) + +## Tokio Tracing Event Anatomy + +```rust +// From tracing crate +pub struct Event<'a> { + fields: ValueSet<'a>, // Borrowed from callsite + formatted values + metadata: &'static Metadata<'static>, // Static callsite metadata +} + +pub struct Metadata<'static> { + name: &'static str, // Static + target: &'static str, // Static (module path) + level: Level, // Static + file: Option<&'static str>, + line: Option, + callsite: Identifier, // &'static dyn Callsite + // ... +} +``` + +**Key insight:** `Metadata` is `'static` (owned by callsite). Only the formatted field *values* are borrowed from the event. + +## What to Keep Structural vs. Encode as OTLP + +| Field | Lifetime | Keep Structural? | Rationale | +|-------|----------|------------------|-----------| +| `callsite.Identifier` | `'static` | ✓ | Key for cached event_name lookup | +| `metadata.level` | `'static` | ✓ | Cheap u8, useful for filtering | +| `timestamp` | Generated | ✓ | Cheap u64, useful for sorting | +| `metadata.target` | `'static` | Cache → event_name | Static, encode once per callsite | +| `metadata.name` | `'static` | Cache → event_name | Static, encode once per callsite | +| `metadata.file/line` | `'static` | Cache → event_name | Static, encode once per callsite | +| `body` (message) | `'a` | **Encode** | Borrowed, must capture | +| `attributes` | `'a` | **Encode** | Borrowed values, must capture | + +## Proposed Data Structures + +### Core Insight + +Since tracing provides lazy callsite registration via `register_callsite`, we can: + +1. **Cache encoded event_name bytes** per callsite at the subscriber level +2. **Store minimal event structs** with just `Identifier` + structural fields + pre-encoded body/attrs bytes +3. **Append event_name on flush** - look up cached bytes from Identifier when encoding each LogRecord + +### `CompactLogRecord` + +```rust +/// A compact log record with structural metadata and pre-encoded body/attributes. +/// +/// Cheap-to-copy fields are kept in structural form for sorting/filtering. +/// Only borrowed data (body, attributes) is encoded to OTLP bytes. +/// Callsite details (target/name/file/line) are cached and appended at flush time. +pub struct CompactLogRecord { + /// Callsite identifier - used to look up cached event_name encoding + pub callsite_id: Identifier, + + /// Timestamp in nanoseconds since Unix epoch (cheap u64 copy) + pub timestamp_ns: u64, + + /// Severity number: 1=TRACE, 5=DEBUG, 9=INFO, 13=WARN, 17=ERROR (cheap u8 copy) + pub severity_number: u8, + + /// Severity text - &'static str from Level::as_str() (no allocation) + pub severity_text: &'static str, + + /// Pre-encoded OTLP bytes for body (field 5) and attributes (field 6) only + /// These are the only fields with borrowed lifetimes that must be captured + pub body_attrs_bytes: Bytes, +} +``` + +**Why this split?** + +| Field | Size | Keep Structural | Rationale | +|-------|------|-----------------|-----------| +| `callsite_id` | 8 bytes | ✓ | Pointer to static callsite, for event_name lookup | +| `timestamp_ns` | 8 bytes | ✓ | Useful for time-based sorting/filtering | +| `severity_number` | 1 byte | ✓ | Useful for level filtering | +| `severity_text` | 16 bytes | ✓ | `&'static str`, just a pointer+len | +| `body` | variable | **Encode** | Borrowed `&str` or formatted, lifetime ends | +| `attributes` | variable | **Encode** | Borrowed values, lifetime ends | +| `event_name` | variable | **Cache** | Static callsite info, encode once per callsite | + +Total structural overhead per event: ~33 bytes + `Bytes` (Arc pointer) + +### Subscriber-Level Callsite Cache + +The key insight: callsite metadata (target, module, file, line) are **static properties of the log statement**, not the scope. We encode them once per callsite and include them in each LogRecord's `event_name` field. + +```rust +/// Cache of pre-encoded callsite details, keyed by callsite Identifier. +/// +/// Populated lazily via `register_callsite` hook. +pub struct CallsiteCache { + /// Map from Identifier to pre-encoded callsite details + callsites: HashMap, +} + +pub struct CachedCallsite { + /// Target module path - &'static from Metadata + pub target: &'static str, + + /// Event name - &'static from Metadata + pub name: &'static str, + + /// Source file - &'static from Metadata + pub file: Option<&'static str>, + + /// Source line + pub line: Option, + + /// Pre-encoded LogRecord.event_name OTLP bytes (lazily computed on first flush) + /// Format: "target::name" or "target::name (file:line)" + pub event_name_bytes: OnceCell, +} + +impl CallsiteCache { + /// Called from register_callsite hook + pub fn register(&mut self, metadata: &'static Metadata<'static>) { + let id = metadata.callsite(); + self.callsites.entry(id).or_insert_with(|| CachedCallsite { + target: metadata.target(), + name: metadata.name(), + file: metadata.file(), + line: metadata.line(), + event_name_bytes: OnceCell::new(), + }); + } + + /// Get or lazily encode event_name bytes for an Identifier + pub fn get_event_name_bytes(&self, id: &Identifier) -> &Bytes { + let cached = self.callsites.get(id).expect("callsite not registered"); + cached.event_name_bytes.get_or_init(|| { + encode_event_name(cached.target, cached.name, cached.file, cached.line) + }) + } +} + +/// Encode callsite details as LogRecord.event_name field bytes. +/// +/// Format options: +/// - "module::path::event_name" +/// - "module::path::event_name (file.rs:42)" +fn encode_event_name( + target: &str, + name: &str, + file: Option<&str>, + line: Option +) -> Bytes { + let mut buf = ProtoBuffer::with_capacity(128); + + // LogRecord.event_name (field 12, string) + // Build the string: "target::name" or "target::name (file:line)" + if let (Some(file), Some(line)) = (file, line) { + let event_name = format!("{}::{} ({}:{})", target, name, file, line); + buf.encode_string(LOG_RECORD_EVENT_NAME, &event_name); + } else { + let event_name = format!("{}::{}", target, name); + buf.encode_string(LOG_RECORD_EVENT_NAME, &event_name); + } + + buf.into_bytes() +} +``` + +### Design Evolution + +```rust +// Original full-OTLP design: +pub struct StatefulDirectEncoder { + // Encodes complete LogsData with ResourceLogs/ScopeLogs/LogRecord + // All fields encoded immediately, scope batching only for consecutive +} + +// New compact design: +pub struct CompactLogRecord { + pub callsite_id: Identifier, // For cached event_name lookup + pub timestamp_ns: u64, // Structural: for sorting/filtering + pub severity_number: u8, // Structural: for level filtering + pub severity_text: &'static str, // Structural: static, no alloc + pub body_attrs_bytes: Bytes, // Encoded: borrowed data captured +} +``` + +**Encoding strategy:** +- Structural fields encoded to OTLP at flush time (trivial: 9 + 2 + ~6 bytes) +- Body/attrs bytes appended directly (already OTLP encoded) +- event_name looked up from callsite cache, appended to each LogRecord + +### `CallsiteRegistry` + +The tracing crate maintains a global callsite registry internally, but it is **not exposed** for enumeration. The `Callsites::for_each` method is private. + +However, we can build our own registry lazily via the `Subscriber::register_callsite` hook, which is called **once per callsite** before any events from that callsite are emitted: + +```rust +impl Layer for HybridEncoderLayer { + fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest { + // Called once per callsite, with static metadata we can store + self.callsite_cache.register(metadata); + Interest::always() + } +} +``` + +**Key insight**: `Metadata<'static>` gives us `&'static str` references that we can store without lifetime issues. No allocation needed for callsite names. + +Note: The `CallsiteRegistry` struct defined above is essentially the same as `CallsiteCache`, just with a different focus. We can consolidate these into a single `CallsiteCache` struct. + +### `CompactLogRecord` Formatter + +Instead of accumulating records, we format and write immediately. This is a minimal `fmt::layer()` alternative: + +```rust +/// Formats a CompactLogRecord as a human-readable string. +/// +/// This is our minimal fmt::layer() replacement. +pub fn format_log_record(record: &CompactLogRecord, callsite_cache: &CallsiteCache) -> String { + let cached = callsite_cache.get(record.callsite_id); + + // Format: "2026-01-06T10:30:45.123Z INFO target::name: body [attr1=val1, attr2=val2]" + format!( + "{} {:5} {}::{}: {}", + format_timestamp(record.timestamp_ns), + record.severity_text, + cached.target, + cached.name, + format_body_attrs(&record.body_attrs_bytes), + ) +} + +/// Format nanosecond timestamp as ISO 8601 +fn format_timestamp(nanos: u64) -> String { + // TODO: Use a more efficient formatter + let secs = nanos / 1_000_000_000; + let subsec_nanos = (nanos % 1_000_000_000) as u32; + // ... format as "2026-01-06T10:30:45.123Z" +} + +/// Decode and format body+attrs bytes as readable string +fn format_body_attrs(bytes: &Bytes) -> String { + // Decode the pre-encoded OTLP bytes back to readable form + // Body becomes the main message, attrs become "[key=value, ...]" +} +``` + +### Simple Writer + +```rust +use std::io::{self, Write}; + +pub enum OutputTarget { + Stdout, + Stderr, +} + +pub struct SimpleWriter { + target: OutputTarget, +} + +impl SimpleWriter { + pub fn stdout() -> Self { + Self { target: OutputTarget::Stdout } + } + + pub fn stderr() -> Self { + Self { target: OutputTarget::Stderr } + } + + pub fn write_line(&self, line: &str) { + match self.target { + OutputTarget::Stdout => { + let _ = writeln!(io::stdout(), "{}", line); + } + OutputTarget::Stderr => { + let _ = writeln!(io::stderr(), "{}", line); + } + } + } +} +``` + +### Minimal Layer Implementation + +```rust +pub struct CompactFormatterLayer { + callsite_cache: RwLock, + writer: SimpleWriter, +} + +impl Layer for CompactFormatterLayer { + fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest { + self.callsite_cache.write().unwrap().register(metadata); + Interest::always() + } + + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + let metadata = event.metadata(); + + // Encode body+attrs (borrowed data) + let body_attrs_bytes = encode_body_and_attrs(event); + + // Build compact record + let record = CompactLogRecord { + callsite_id: metadata.callsite(), + timestamp_ns: current_time_nanos(), + severity_number: level_to_severity(metadata.level()), + severity_text: metadata.level().as_str(), + body_attrs_bytes, + }; + + // Format and write immediately + let line = format_log_record(&record, &self.callsite_cache.read().unwrap()); + self.writer.write_line(&line); + } +} +``` + +## Encoding Flow (Simplified) + +### 1. Callsite Registration (once per callsite, lazy) + +```rust +fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest { + self.callsite_cache.write().unwrap().register(metadata); + Interest::always() +} +``` + +### 2. Event Capture → Format → Write (per event) + +```rust +fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + let metadata = event.metadata(); + + // Encode body+attrs (borrowed data that won't survive) + let body_attrs_bytes = encode_body_and_attrs(event); + + // Build compact record with structural fields + let record = CompactLogRecord { + callsite_id: metadata.callsite(), + timestamp_ns: current_time_nanos(), + severity_number: level_to_severity(metadata.level()), + severity_text: metadata.level().as_str(), + body_attrs_bytes, + }; + + // Format and write immediately (no accumulation) + let line = format_log_record(&record, &self.callsite_cache.read().unwrap()); + self.writer.write_line(&line); +} +``` + +### Key Benefits (MVP) + +1. **Simple**: No accumulator, no batching, no deferred encoding +2. **Immediate output**: Events written as they occur +3. **Composable**: Accumulator/batching can be layered on later +4. **Testable**: `format_log_record()` returns String, easy to test +5. **Familiar**: Similar mental model to `fmt::layer()` + +## Implementation Plan + +### Phase 1: Core Data Structures ✅ COMPLETE +- [x] 1.1 Create `CompactLogRecord` struct (callsite_id + structural fields + body_attrs_bytes) +- [x] 1.2 Create `CallsiteCache` with `register()` and `get()` +- [x] 1.3 Create `CachedCallsite` struct storing static metadata refs + +### Phase 2: Formatting ✅ COMPLETE +- [x] 2.1 Implement `format_log_record()` → String +- [x] 2.2 Implement `format_timestamp()` for ISO 8601 output +- [x] 2.3 Implement `format_body_attrs()` using pdata View types (`RawAnyValue`, `RawKeyValue`) +- [x] 2.4 Create `SimpleWriter` for stdout/stderr output +- [x] 2.5 Implement `format_any_value()` consistent with `otlp_bytes_formatter.rs` + +### Phase 3: Body+Attrs Encoder ✅ COMPLETE +- [x] 3.1 Reuse `DirectFieldVisitor` for body+attrs encoding +- [x] 3.2 Create `encode_body_and_attrs(event) -> Bytes` function + +### Phase 4: Layer Integration ✅ COMPLETE +- [x] 4.1 Create `CompactFormatterLayer` implementing tracing Layer +- [x] 4.2 Implement `register_callsite()` to populate CallsiteCache +- [x] 4.3 Implement `on_event()` to encode, format, and write immediately +- [x] 4.4 Add basic tests with mock subscriber + +### Phase 5: Future Extensions (deferred) +- [ ] 5.1 Add `LogAccumulator` for batching +- [ ] 5.2 Add OTLP encoding path (flush to bytes) +- [ ] 5.3 Add configurable output formats (JSON, compact, etc.) + +## Open Questions (Resolved) + +1. **Body+attrs encoding**: ✅ We encode to OTLP bytes using `DirectFieldVisitor`, then decode for formatting using pdata View types (`RawAnyValue`, `RawKeyValue`). This keeps the data path consistent with future OTLP batching. + +2. **Timestamp format**: ✅ ISO 8601 with milliseconds: `2026-01-06T10:30:45.123Z` + +3. **Output format**: ✅ Single compact format for MVP: `timestamp LEVEL target::name: body [attr=value, ...]` + +4. **Thread safety**: ✅ `RwLock` - readers don't block each other, writes are rare (only during callsite registration) + +5. **Color support**: Deferred to future work (can be added to `SimpleWriter`) + +## Resolved Design Decisions + +1. **pdata View integration**: Instead of writing custom OTLP decoders, we reuse the existing `RawAnyValue`, `RawKeyValue` types from `otap_df_pdata::views::otlp::bytes::common`. Made `RawKeyValue::new()` public to enable this. + +2. **format_any_value consistency**: The `format_any_value()` function in `compact_formatter.rs` matches the implementation in `otlp_bytes_formatter.rs`, ensuring consistent formatting across the crate. + +## Success Metrics + +1. **Simplicity**: MVP should be <300 lines of code +2. **Correctness**: Output matches expected format for all log levels +3. **Performance**: Comparable to `fmt::layer()` for immediate writes +4. **Extensibility**: Easy to add accumulator/batching layer later + +--- + +## Next Steps + +Please review this plan and let me know: + +1. Do the proposed data structures align with your vision? +2. Any changes to what should be structural vs. encoded? +3. Which phase should we start with? +4. Answers to any of the open questions? diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md new file mode 100644 index 0000000000..15b38fa73f --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md @@ -0,0 +1,208 @@ +# Self-Tracing Direct Encoder: Performance Report + +This report documents the design, implementation, and benchmark results for the direct OTLP encoder used for self-diagnostics in otap-dataflow. + +## Executive Summary + +We implemented a **zero-allocation** path for encoding `tokio-tracing` events directly to OTLP protobuf bytes. The key findings: + +| Operation | Per-event cost | +|-----------|----------------| +| **Encode to OTLP bytes** (3 attrs) | ~200 ns | +| **Format OTLP for console** | ~1.0 µs | +| **Full encode + format** | ~1.35 µs | + +Memory allocations were reduced from multiple per-event to **zero for primitive types** and **zero heap allocations** for Debug types (via `fmt::Write` directly to buffer). + +--- + +## Problem Statement + +The otap-dataflow system uses a thread-per-core architecture where **OTLP bytes are the interchange format** that crosses thread boundaries. For self-diagnostics (internal logging), we needed to convert `tokio-tracing` events to OTLP with minimal overhead. + +The naive approach: +1. Visit event fields → allocate intermediate struct +2. Encode struct via View trait → OTLP bytes + +Our approach: +1. Visit event fields → encode directly to protobuf buffer + +--- + +## Implementation: `DirectLogRecordEncoder` + +### Architecture + +``` +tracing::info!(count = 42, "message") + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Layer::on_event(event) │ +│ └── StatefulDirectEncoder │ +│ ├── Pre-encoded Resource bytes │ +│ ├── Open ResourceLogs/ScopeLogs │ +│ └── DirectLogRecordEncoder │ +│ └── DirectFieldVisitor │ +│ └── ProtoBuffer │ +└─────────────────────────────────────────────┘ + │ + ▼ + OTLP bytes (protobuf) +``` + +### Key Components + +1. **`StatefulDirectEncoder`**: Maintains open `ResourceLogs` and `ScopeLogs` containers, batching consecutive events with the same instrumentation scope. + +2. **`DirectLogRecordEncoder`**: Encodes a single LogRecord directly to a `ProtoBuffer`. + +3. **`DirectFieldVisitor`**: Implements `tracing::field::Visit` to encode each field directly as OTLP attributes without intermediate allocation. + +4. **`LengthPlaceholder`**: Reserves 4 bytes for protobuf length fields, patches after content is written. + +5. **`ProtoBufferWriter`**: Implements `std::fmt::Write` to allow `Debug` formatting directly into the protobuf buffer. + +--- + +## Type Fidelity + +The encoder preserves native OTLP types for primitives: + +| Tracing Type | OTLP AnyValue | Encoding | +|--------------|---------------|----------| +| `i64`, `u64` | `int_value` | varint | +| `f64` | `double_value` | fixed64 | +| `bool` | `bool_value` | varint | +| `&str` | `string_value` | length-prefixed bytes | +| `&dyn Debug` | `string_value` | formatted via `fmt::Write` | + +This means `tracing::info!(count = 42)` produces an OTLP attribute with `int_value: 42`, not `string_value: "42"`. + +--- + +## Memory Allocation Analysis + +### Per-Event Allocations + +| Location | Allocation | Avoidable? | +|----------|------------|------------| +| `StatefulDirectEncoder::start_scope_logs` | `scope_name.to_string()` | Yes, with scope interning | + +### Zero-Allocation Paths ✓ + +- All primitive type visitors: `record_i64`, `record_f64`, `record_bool` +- String visitor: `record_str` — encodes borrowed `&str` directly +- Debug visitor: `record_debug` — uses `fmt::Write` to buffer (no intermediate `String`) +- Buffer writes: use pre-allocated capacity + +### The Debug Trait Limitation + +The `std::fmt::Debug` trait only provides string formatting, not structural access: + +```rust +pub trait Debug { + fn fmt(&self, f: &mut Formatter<'_>) -> Result; +} +``` + +`Formatter` has no public constructor, so we cannot intercept `debug_struct`/`debug_list` calls to encode as nested OTLP structures. Complex types must be formatted as strings. + +**Future options:** +- `serde::Serialize` → encode to `AnyValue::kvlist_value` +- `valuable::Valuable` → designed for structured inspection +- `tracing::Value` → unstable, may provide this + +--- + +## Benchmark Results + +### Methodology + +Benchmarks use Criterion with jemalloc. To isolate encoding cost from tracing dispatch overhead, each benchmark: + +1. Emits 1 tracing event +2. Inside the callback, encodes it N times (100 or 1000) +3. Measures total time, then computes per-event cost + +### Encoding Cost by Attribute Count + +| Attributes | Total (1000 events) | **Per event** | +|------------|---------------------|---------------| +| 0 | 136.6 µs | **137 ns** | +| 3 | 265.6 µs | **266 ns** | +| 10 | 489.7 µs | **490 ns** | + +Cost scales roughly linearly with attribute count (~35 ns per additional attribute). + +### Full Pipeline Costs + +| Operation | Per event | +|-----------|-----------| +| Encode only | ~200 ns | +| Format only | ~1.0 µs | +| Encode + Format | ~1.35 µs | + +Formatting dominates the cost due to text generation (timestamps, attribute formatting, ANSI colors). + +### Comparison to Baseline + +For context, a single `HashMap::insert` is ~20-50 ns. Our encoding of a 3-attribute event at ~266 ns is roughly 5-10 hash operations worth of overhead. + +--- + +## Scope Batching + +The `StatefulDirectEncoder` batches consecutive events with the same instrumentation scope: + +``` +Event 1: target="module_a" ─┐ +Event 2: target="module_a" ─┼── ScopeLogs { scope: "module_a", log_records: [1, 2, 3] } +Event 3: target="module_a" ─┘ +Event 4: target="module_b" ─── ScopeLogs { scope: "module_b", log_records: [4] } +``` + +This reduces OTLP envelope overhead when events from the same module are logged consecutively. + +--- + +## Design Decisions + +### 1. Direct Encoding vs. View Trait + +We bypass the `LogRecordView` / `AttributeView` abstraction for self-tracing. The View traits require `GAT` lifetime handling and don't eliminate the fundamental issue: the tracing `Visit` trait erases lifetimes. + +**Trade-off**: Some code duplication vs. complexity of making View traits work with borrowed tracing data. + +### 2. Pre-encoded Resource Bytes + +Resource attributes (e.g., `service.name`) are encoded once at startup and copied into each batch. This avoids re-encoding the same data repeatedly. + +### 3. 4-Byte Length Placeholders + +Protobuf uses varint lengths, but we can't know the length until content is written. We reserve 4 bytes (max 2^28 content size) and patch afterward. This allows single-pass encoding. + +--- + +## Future Work + +1. **Scope name interning**: Avoid `to_string()` on scope change by using static strings or an intern pool. + +2. **Structured encoding for Serialize types**: Add optional serde support to encode complex types as nested OTLP structures instead of strings. + +3. **Span integration**: Currently only events are encoded. Could extend to encode spans as OTLP spans. + +4. **Batching heuristics**: Currently flushes on demand. Could add time-based or size-based automatic flushing. + +--- + +## Conclusion + +The direct encoder achieves near-optimal performance for converting tracing events to OTLP bytes: + +- **~200-500 ns per event** depending on attribute count +- **Zero heap allocations** for typical events (primitives + strings) +- **Preserves type fidelity** (numbers stay numbers, bools stay bools) +- **Single-pass encoding** with placeholder patching + +The main limitation is the `Debug` trait's lack of structural inspection, which forces complex types to be formatted as strings. This is a Rust language limitation, not an implementation issue. diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs new file mode 100644 index 0000000000..e2ebb1ec13 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs @@ -0,0 +1,582 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Compact log formatter - a minimal `fmt::layer()` alternative. +//! +//! This module provides a lightweight formatting layer for tokio-tracing events +//! that outputs human-readable log lines to stdout/stderr. It uses the same +//! `CompactLogRecord` structure that can later be extended for OTLP encoding. +//! +//! # Design +//! +//! The key insight is to separate: +//! - **Structural fields** (timestamp, severity, callsite_id) - kept as cheap values +//! - **Borrowed data** (body, attributes) - encoded to bytes during event capture +//! - **Static callsite info** (target, name, file, line) - cached at registration time +//! +//! This design allows for immediate formatting and output while preserving the +//! option to accumulate and batch-encode to OTLP later. +//! +//! # OTLP View Integration +//! +//! For decoding the pre-encoded body and attributes bytes, we reuse the pdata +//! View types (`RawAnyValue`, `RawKeyValue`) which provide zero-copy parsing +//! of OTLP protobuf bytes. + +use bytes::Bytes; +use std::collections::HashMap; +use std::io::{self, Write}; +use std::sync::RwLock; +use std::time::{SystemTime, UNIX_EPOCH}; + +use otap_df_pdata::proto::consts::field_num::logs::{LOG_RECORD_ATTRIBUTES, LOG_RECORD_BODY}; +use otap_df_pdata::proto::consts::wire_types; +use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; +use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; +use otap_df_pdata::views::otlp::bytes::decode::read_varint; + +use tracing::{Event, Level, Subscriber}; +use tracing::callsite::Identifier; +use tracing::span::{Attributes, Record}; +use tracing_subscriber::layer::{Context, Layer}; +use tracing_subscriber::registry::LookupSpan; + +use super::direct_encoder::{DirectFieldVisitor, ProtoBuffer}; + +// ============================================================================ +// Core Data Structures +// ============================================================================ + +/// A compact log record with structural metadata and pre-encoded body/attributes. +/// +/// Cheap-to-copy fields are kept in structural form for sorting/filtering. +/// Only borrowed data (body, attributes) is encoded to bytes. +/// Callsite details (target/name/file/line) are cached separately. +#[derive(Debug, Clone)] +pub struct CompactLogRecord { + /// Callsite identifier - used to look up cached callsite info + pub callsite_id: Identifier, + + /// Timestamp in nanoseconds since Unix epoch + pub timestamp_ns: u64, + + /// Severity number: 1=TRACE, 5=DEBUG, 9=INFO, 13=WARN, 17=ERROR + pub severity_number: u8, + + /// Severity text - &'static str from Level::as_str() + pub severity_text: &'static str, + + /// Pre-encoded body and attributes (OTLP format for body field 5 + attrs field 6) + pub body_attrs_bytes: Bytes, +} + +/// Cached callsite information, populated via `register_callsite` hook. +#[derive(Debug, Clone)] +pub struct CachedCallsite { + /// Target module path - &'static from Metadata + pub target: &'static str, + + /// Event name - &'static from Metadata + pub name: &'static str, + + /// Source file - &'static from Metadata + pub file: Option<&'static str>, + + /// Source line + pub line: Option, +} + +/// Cache of callsite information, keyed by `Identifier`. +#[derive(Debug, Default)] +pub struct CallsiteCache { + callsites: HashMap, +} + +impl CallsiteCache { + /// Create a new empty cache. + pub fn new() -> Self { + Self::default() + } + + /// Register a callsite from its metadata. + pub fn register(&mut self, metadata: &'static tracing::Metadata<'static>) { + let id = metadata.callsite(); + let _ = self.callsites.entry(id).or_insert_with(|| CachedCallsite { + target: metadata.target(), + name: metadata.name(), + file: metadata.file(), + line: metadata.line(), + }); + } + + /// Get cached callsite info by identifier. + pub fn get(&self, id: &Identifier) -> Option<&CachedCallsite> { + self.callsites.get(id) + } +} + +// ============================================================================ +// Formatting +// ============================================================================ + +/// Format a CompactLogRecord as a human-readable string. +/// +/// Output format: `2026-01-06T10:30:45.123Z INFO target::name: body [attr=value, ...]` +pub fn format_log_record(record: &CompactLogRecord, cache: &CallsiteCache) -> String { + let callsite = cache.get(&record.callsite_id); + + let (target, name) = match callsite { + Some(cs) => (cs.target, cs.name), + None => ("", ""), + }; + + let body_attrs = format_body_attrs(&record.body_attrs_bytes); + + format!( + "{} {:5} {}::{}: {}", + format_timestamp(record.timestamp_ns), + record.severity_text, + target, + name, + body_attrs, + ) +} + +/// Format nanosecond timestamp as ISO 8601 (UTC). +fn format_timestamp(nanos: u64) -> String { + let secs = nanos / 1_000_000_000; + let subsec_millis = (nanos % 1_000_000_000) / 1_000_000; + + // Convert to datetime components + // Days since Unix epoch + let days = secs / 86400; + let time_of_day = secs % 86400; + + let hours = time_of_day / 3600; + let minutes = (time_of_day % 3600) / 60; + let seconds = time_of_day % 60; + + // Calculate year/month/day from days since epoch (1970-01-01) + let (year, month, day) = days_to_ymd(days as i64); + + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", + year, month, day, hours, minutes, seconds, subsec_millis + ) +} + +/// Convert days since Unix epoch to (year, month, day). +fn days_to_ymd(days: i64) -> (i32, u32, u32) { + // Algorithm from Howard Hinnant's date library + let z = days + 719468; + let era = if z >= 0 { z } else { z - 146096 } / 146097; + let doe = (z - era * 146097) as u32; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe as i64 + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + + (y as i32, m, d) +} + +/// Format body+attrs bytes as readable string. +/// +/// Uses the pdata View types (`RawAnyValue`, `RawKeyValue`) for zero-copy +/// parsing of the OTLP protobuf bytes. This is consistent with the decoding +/// approach used in `otlp_bytes_formatter.rs`. +fn format_body_attrs(bytes: &Bytes) -> String { + if bytes.is_empty() { + return String::new(); + } + + // The bytes contain LogRecord fields: + // - field 5 (LOG_RECORD_BODY): AnyValue message + // - field 6 (LOG_RECORD_ATTRIBUTES): repeated KeyValue messages + + let mut body_str = String::new(); + let mut attrs = Vec::new(); + let data = bytes.as_ref(); + let mut pos = 0; + + while pos < data.len() { + // Read field tag + let (tag, next_pos) = match read_varint(data, pos) { + Some(v) => v, + None => break, + }; + pos = next_pos; + + let field_num = tag >> 3; + let wire_type = tag & 0x7; + + if wire_type != wire_types::LEN { + // Skip non-length-delimited fields (shouldn't happen for body/attrs) + break; + } + + // Read length-delimited content + let (len, next_pos) = match read_varint(data, pos) { + Some(v) => v, + None => break, + }; + pos = next_pos; + let end = pos + len as usize; + + if end > data.len() { + break; + } + + let field_bytes = &data[pos..end]; + + if field_num == LOG_RECORD_BODY { + // Body: parse as AnyValue using pdata View + let any_value = RawAnyValue::new(field_bytes); + body_str = format_any_value(&any_value); + } else if field_num == LOG_RECORD_ATTRIBUTES { + // Attribute: parse as KeyValue using pdata View + let kv = RawKeyValue::new(field_bytes); + let key = String::from_utf8_lossy(kv.key()).to_string(); + let value = match kv.value() { + Some(v) => format_any_value(&v), + None => "".to_string(), + }; + attrs.push(format!("{}={}", key, value)); + } + + pos = end; + } + + if !attrs.is_empty() { + body_str.push_str(" ["); + body_str.push_str(&attrs.join(", ")); + body_str.push(']'); + } + + body_str +} + +/// Format an AnyValue for display. +/// +/// This is based on the same logic used in `otlp_bytes_formatter.rs`, providing +/// consistent formatting across the crate. +fn format_any_value<'a>(value: &impl AnyValueView<'a>) -> String { + match value.value_type() { + ValueType::String => { + if let Some(s) = value.as_string() { + String::from_utf8_lossy(s).to_string() + } else { + String::new() + } + } + ValueType::Int64 => { + if let Some(i) = value.as_int64() { + i.to_string() + } else { + String::new() + } + } + ValueType::Bool => { + if let Some(b) = value.as_bool() { + b.to_string() + } else { + String::new() + } + } + ValueType::Double => { + if let Some(d) = value.as_double() { + format!("{:.6}", d) + } else { + String::new() + } + } + ValueType::Bytes => { + if let Some(bytes) = value.as_bytes() { + format!("{:?}", bytes) + } else { + String::new() + } + } + ValueType::Array => { + if let Some(array_iter) = value.as_array() { + let parts: Vec<_> = array_iter.map(|item| format_any_value(&item)).collect(); + format!("[{}]", parts.join(", ")) + } else { + "[]".to_string() + } + } + ValueType::KeyValueList => { + if let Some(kvlist_iter) = value.as_kvlist() { + let parts: Vec<_> = kvlist_iter + .map(|kv| { + let key_str = String::from_utf8_lossy(kv.key()).to_string(); + match kv.value() { + Some(val) => format!("{}={}", key_str, format_any_value(&val)), + None => key_str, + } + }) + .collect(); + format!("{{{}}}", parts.join(", ")) + } else { + "{}".to_string() + } + } + ValueType::Empty => String::new(), + } +} + +// ============================================================================ +// Writer +// ============================================================================ + +/// Output target for log lines. +#[derive(Debug, Clone, Copy, Default)] +pub enum OutputTarget { + /// Write to standard error. + #[default] + Stderr, + /// Write to standard output. + Stdout, +} + +/// Simple writer that outputs log lines to stdout or stderr. +#[derive(Debug)] +pub struct SimpleWriter { + target: OutputTarget, +} + +impl Default for SimpleWriter { + fn default() -> Self { + Self::stderr() + } +} + +impl SimpleWriter { + /// Create a writer that outputs to stdout. + pub fn stdout() -> Self { + Self { target: OutputTarget::Stdout } + } + + /// Create a writer that outputs to stderr. + pub fn stderr() -> Self { + Self { target: OutputTarget::Stderr } + } + + /// Write a log line (with newline). + pub fn write_line(&self, line: &str) { + match self.target { + OutputTarget::Stdout => { + let _ = writeln!(io::stdout(), "{}", line); + } + OutputTarget::Stderr => { + let _ = writeln!(io::stderr(), "{}", line); + } + } + } +} + +// ============================================================================ +// Layer Implementation +// ============================================================================ + +/// A minimal formatting layer that outputs log records to stdout/stderr. +/// +/// This is a lightweight alternative to `tracing_subscriber::fmt::layer()`. +pub struct CompactFormatterLayer { + callsite_cache: RwLock, + writer: SimpleWriter, +} + +impl CompactFormatterLayer { + /// Create a new layer that writes to stderr. + pub fn new() -> Self { + Self { + callsite_cache: RwLock::new(CallsiteCache::new()), + writer: SimpleWriter::stderr(), + } + } + + /// Create a new layer that writes to stdout. + pub fn stdout() -> Self { + Self { + callsite_cache: RwLock::new(CallsiteCache::new()), + writer: SimpleWriter::stdout(), + } + } + + /// Create a new layer that writes to stderr. + pub fn stderr() -> Self { + Self::new() + } +} + +impl Default for CompactFormatterLayer { + fn default() -> Self { + Self::new() + } +} + +impl Layer for CompactFormatterLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn register_callsite( + &self, + metadata: &'static tracing::Metadata<'static>, + ) -> tracing::subscriber::Interest { + self.callsite_cache.write().unwrap().register(metadata); + tracing::subscriber::Interest::always() + } + + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + let metadata = event.metadata(); + + // Encode body and attributes to bytes + let body_attrs_bytes = encode_body_and_attrs(event); + + // Get current timestamp + let timestamp_ns = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + // Build compact record + let record = CompactLogRecord { + callsite_id: metadata.callsite(), + timestamp_ns, + severity_number: level_to_severity(metadata.level()), + severity_text: metadata.level().as_str(), + body_attrs_bytes, + }; + + // Format and write immediately + let cache = self.callsite_cache.read().unwrap(); + let line = format_log_record(&record, &cache); + self.writer.write_line(&line); + } + + fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans in MVP + } + + fn on_record(&self, _span: &tracing::span::Id, _values: &Record<'_>, _ctx: Context<'_, S>) { + // Not handling spans in MVP + } + + fn on_enter(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans in MVP + } + + fn on_exit(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans in MVP + } + + fn on_close(&self, _id: tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans in MVP + } +} + +/// Encode only body and attributes from an event to OTLP bytes. +fn encode_body_and_attrs(event: &Event<'_>) -> Bytes { + let mut buf = ProtoBuffer::with_capacity(256); + + // Visit fields to encode body (field 5) and attributes (field 6) + let mut visitor = DirectFieldVisitor::new(&mut buf); + event.record(&mut visitor); + + buf.into_bytes() +} + +/// Convert tracing Level to OTLP severity number. +fn level_to_severity(level: &Level) -> u8 { + match *level { + Level::TRACE => 1, + Level::DEBUG => 5, + Level::INFO => 9, + Level::WARN => 13, + Level::ERROR => 17, + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use tracing_subscriber::prelude::*; + + #[test] + fn test_format_timestamp() { + // 2026-01-06T10:30:45.123Z in nanoseconds + // Let's use a known timestamp: 2024-01-01T00:00:00.000Z + let nanos: u64 = 1704067200 * 1_000_000_000; // 2024-01-01 00:00:00 UTC + let formatted = format_timestamp(nanos); + assert_eq!(formatted, "2024-01-01T00:00:00.000Z"); + + // Test with milliseconds + let nanos_with_ms: u64 = 1704067200 * 1_000_000_000 + 123_000_000; + let formatted = format_timestamp(nanos_with_ms); + assert_eq!(formatted, "2024-01-01T00:00:00.123Z"); + } + + #[test] + fn test_days_to_ymd() { + // 1970-01-01 is day 0 + assert_eq!(days_to_ymd(0), (1970, 1, 1)); + + // 2024-01-01 is 19723 days after 1970-01-01 + assert_eq!(days_to_ymd(19723), (2024, 1, 1)); + } + + #[test] + fn test_level_to_severity() { + assert_eq!(level_to_severity(&Level::TRACE), 1); + assert_eq!(level_to_severity(&Level::DEBUG), 5); + assert_eq!(level_to_severity(&Level::INFO), 9); + assert_eq!(level_to_severity(&Level::WARN), 13); + assert_eq!(level_to_severity(&Level::ERROR), 17); + } + + #[test] + fn test_callsite_cache() { + let cache = CallsiteCache::new(); + assert!(cache.callsites.is_empty()); + } + + #[test] + fn test_simple_writer_creation() { + let _stdout = SimpleWriter::stdout(); + let _stderr = SimpleWriter::stderr(); + let _default = SimpleWriter::default(); + } + + #[test] + fn test_compact_formatter_layer_creation() { + let _layer = CompactFormatterLayer::new(); + let _stdout = CompactFormatterLayer::stdout(); + let _stderr = CompactFormatterLayer::stderr(); + let _default = CompactFormatterLayer::default(); + } + + #[test] + fn test_layer_integration() { + // Create the layer and subscriber + let layer = CompactFormatterLayer::stderr(); + let subscriber = tracing_subscriber::registry().with(layer); + + // Set as default for this thread temporarily + let dispatch = tracing::Dispatch::new(subscriber); + let _guard = tracing::dispatcher::set_default(&dispatch); + + // Emit some events - these should be formatted and written to stderr + tracing::info!("Test info message"); + tracing::warn!(count = 42, "Warning with attribute"); + tracing::error!(error = "something failed", "Error occurred"); + + // The test verifies no panics occur; actual output goes to stderr + } +} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index 215386ffdb..83cecf0850 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -3,12 +3,18 @@ //! tokio-tracing support for directly encoding and formatting OTLP bytes. +pub mod compact_formatter; pub mod direct_encoder; pub mod log_record; -pub mod otlp_bytes_formatter; pub mod subscriber; -// New direct encoder exports (preferred for zero-allocation encoding) +// Compact formatter exports (recommended for minimal fmt::layer() alternative) +pub use compact_formatter::{ + CachedCallsite, CallsiteCache, CompactFormatterLayer, CompactLogRecord, OutputTarget, + SimpleWriter, format_log_record, +}; + +// Direct encoder exports (for zero-allocation OTLP encoding) pub use direct_encoder::{ DirectFieldVisitor, DirectLogRecordEncoder, LengthPlaceholder, ProtoBuffer, StatefulDirectEncoder, encode_len_placeholder, encode_resource_bytes_from_attrs, @@ -17,5 +23,4 @@ pub use direct_encoder::{ // Legacy View-based exports (for compatibility) pub use log_record::{TracingAnyValue, TracingAttribute, TracingLogRecord}; -pub use otlp_bytes_formatter::{FormatError, OtlpBytesFormattingLayer}; pub use subscriber::OtlpTracingLayer; From 58e988bd614633ea34a4e390da7fd8be6c9e9f2f Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 14:38:14 -0800 Subject: [PATCH 07/92] compact bench --- rust/otap-dataflow/Cargo.toml | 2 + .../benchmarks/benches/self_tracing/main.rs | 244 +++---- .../crates/controller/src/lib.rs | 6 +- .../crates/telemetry/src/error.rs | 4 + .../otap-dataflow/crates/telemetry/src/lib.rs | 41 ++ .../src/self_tracing/compact_formatter.rs | 238 ++++--- .../telemetry/src/self_tracing/log_record.rs | 474 ------------- .../crates/telemetry/src/self_tracing/mod.rs | 24 +- .../src/self_tracing/otlp_bytes_formatter.rs | 658 ------------------ .../telemetry/src/self_tracing/subscriber.rs | 249 ------- rust/otap-dataflow/src/main.rs | 4 + 11 files changed, 339 insertions(+), 1605 deletions(-) delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/log_record.rs delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/otlp_bytes_formatter.rs delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index eaeb006d30..1661f0f4e0 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -33,6 +33,7 @@ path = "src/main.rs" otap-df-config.workspace = true otap-df-controller.workspace = true otap-df-otap.workspace = true +otap-df-telemetry.workspace = true thiserror.workspace = true quiver = { workspace = true, optional = true } serde_json.workspace = true @@ -51,6 +52,7 @@ otap-df-config = { path = "crates/config" } otap-df-controller = { path = "crates/controller" } otap-df-otap = { path = "crates/otap" } otap-df-pdata = { path = "crates/pdata" } +otap-df-telemetry = { path = "crates/telemetry" } quiver = { package = "otap-df-quiver", path = "crates/quiver" } data_engine_expressions = { path = "../experimental/query_engine/expressions" } data_engine_kql_parser = { path = "../experimental/query_engine/kql-parser" } diff --git a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs index 1a19d42e55..9bd2484362 100644 --- a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs +++ b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs @@ -3,32 +3,34 @@ #![allow(missing_docs)] -//! Benchmarks for self-tracing OTLP bytes encoding and formatting. +//! Benchmarks for the compact log formatter. //! //! # Benchmark Design //! //! These benchmarks emit a single tracing event but perform N encoding/formatting //! operations inside the callback. This amortizes tracing dispatch overhead to noise, -//! allowing us to measure the true cost of encoding. +//! allowing us to measure the true cost of encoding and formatting. //! //! # Interpreting Results //! -//! Benchmark names follow the pattern: `group/description/N_encodings` +//! Benchmark names follow the pattern: `group/description/N_events` //! //! To get per-event cost: `measured_time / N` //! -//! Example: `encode_otlp/3_attrs/1000_events` = 265 µs → 265 ns per event +//! Example: `compact_encode/3_attrs/1000_events` = 300 µs → 300 ns per event use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; -use tracing::{Event, Subscriber}; +use tracing::{Event, Level, Subscriber}; use tracing_subscriber::layer::Layer; use tracing_subscriber::prelude::*; use tracing_subscriber::registry::LookupSpan; use otap_df_telemetry::self_tracing::{ - OtlpBytesFormattingLayer, StatefulDirectEncoder, encode_resource_bytes_from_attrs, + CallsiteCache, CompactLogRecord, encode_body_and_attrs, format_log_record, }; +use std::time::{SystemTime, UNIX_EPOCH}; + #[cfg(not(windows))] use tikv_jemallocator::Jemalloc; @@ -37,46 +39,36 @@ use tikv_jemallocator::Jemalloc; static GLOBAL: Jemalloc = Jemalloc; // ============================================================================= -// ISOLATED ENCODING BENCHMARK -// Emit 1 event, encode it N times inside the callback +// ENCODE ONLY BENCHMARK +// Emit 1 event, encode body+attrs N times (partial OTLP) // ============================================================================= -/// Layer that encodes the same event N times to measure pure encoding cost. -struct IsolatedEncoderLayer { - /// Number of times to encode each event +/// Layer that encodes body+attrs N times to measure pure encoding cost. +struct EncodeOnlyLayer { iterations: usize, - /// Pre-encoded resource bytes - resource_bytes: Vec, } -impl IsolatedEncoderLayer { +impl EncodeOnlyLayer { fn new(iterations: usize) -> Self { - Self { - iterations, - resource_bytes: encode_resource_bytes_from_attrs(&[ - ("service.name", "benchmark"), - ]), - } + Self { iterations } } } -impl Layer for IsolatedEncoderLayer +impl Layer for EncodeOnlyLayer where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - // Encode the same event N times using StatefulDirectEncoder for _ in 0..self.iterations { - let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); - encoder.encode_event(event); - let _ = encoder.flush(); + let bytes = encode_body_and_attrs(event); + let _ = std::hint::black_box(bytes); } } } -/// Benchmark: Pure encoding cost (N encodings per single event dispatch) -fn bench_isolated_encode(c: &mut Criterion) { - let mut group = c.benchmark_group("encode_otlp"); +/// Benchmark: Pure encoding cost (body+attrs to partial OTLP bytes) +fn bench_encode_only(c: &mut Criterion) { + let mut group = c.benchmark_group("compact_encode"); for iterations in [100, 1000].iter() { let _ = group.bench_with_input( @@ -84,12 +76,11 @@ fn bench_isolated_encode(c: &mut Criterion) { iterations, |b, &iters| { b.iter(|| { - let layer = IsolatedEncoderLayer::new(iters); + let layer = EncodeOnlyLayer::new(iters); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); tracing::dispatcher::with_default(&dispatch, || { - // Single event, encoded `iters` times inside the callback tracing::info!( key1 = "value1", key2 = 42, @@ -98,7 +89,7 @@ fn bench_isolated_encode(c: &mut Criterion) { ); }); - std::hint::black_box(()) + let _ = std::hint::black_box(()); }) }, ); @@ -108,50 +99,64 @@ fn bench_isolated_encode(c: &mut Criterion) { } // ============================================================================= -// ISOLATED ENCODE + FORMAT BENCHMARK -// Emit 1 event, encode and format it N times +// FORMAT ONLY BENCHMARK +// Encode once, format N times // ============================================================================= -/// Layer that encodes and formats the same event N times. -struct IsolatedEncodeFormatLayer { +/// Layer that encodes once then formats N times. +struct FormatOnlyLayer { iterations: usize, - resource_bytes: Vec, } -impl IsolatedEncodeFormatLayer { +impl FormatOnlyLayer { fn new(iterations: usize) -> Self { - Self { - iterations, - resource_bytes: encode_resource_bytes_from_attrs(&[ - ("service.name", "benchmark"), - ]), - } + Self { iterations } } } -impl Layer for IsolatedEncodeFormatLayer +impl Layer for FormatOnlyLayer where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - let formatter = OtlpBytesFormattingLayer::new(std::io::sink); + let metadata = event.metadata(); + + // Build cache with this callsite + let mut cache = CallsiteCache::new(); + cache.register(metadata); + + // Encode once + let body_attrs_bytes = encode_body_and_attrs(event); + let timestamp_ns = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + let record = CompactLogRecord { + callsite_id: metadata.callsite(), + timestamp_ns, + severity_number: match *metadata.level() { + Level::TRACE => 1, + Level::DEBUG => 5, + Level::INFO => 9, + Level::WARN => 13, + Level::ERROR => 17, + }, + severity_text: metadata.level().as_str(), + body_attrs_bytes, + }; - // Encode and format N times + // Format N times (without ANSI colors for consistent measurement) for _ in 0..self.iterations { - // Use StatefulDirectEncoder to produce full OTLP envelope - let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); - encoder.encode_event(event); - let bytes = encoder.flush(); - - // Format the complete OTLP bytes - let _ = formatter.format_otlp_bytes(&bytes); + let line = format_log_record(&record, &cache, false); + let _ = std::hint::black_box(line); } } } -/// Benchmark: Encoding + formatting cost -fn bench_isolated_encode_format(c: &mut Criterion) { - let mut group = c.benchmark_group("encode_and_format_otlp"); +/// Benchmark: Pure formatting cost (format already-encoded record to string) +fn bench_format_only(c: &mut Criterion) { + let mut group = c.benchmark_group("compact_format"); for iterations in [100, 1000].iter() { let _ = group.bench_with_input( @@ -159,7 +164,7 @@ fn bench_isolated_encode_format(c: &mut Criterion) { iterations, |b, &iters| { b.iter(|| { - let layer = IsolatedEncodeFormatLayer::new(iters); + let layer = FormatOnlyLayer::new(iters); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -172,7 +177,7 @@ fn bench_isolated_encode_format(c: &mut Criterion) { ); }); - std::hint::black_box(()) + let _ = std::hint::black_box(()); }) }, ); @@ -182,56 +187,70 @@ fn bench_isolated_encode_format(c: &mut Criterion) { } // ============================================================================= -// ISOLATED FORMAT-ONLY BENCHMARK -// Pre-encode bytes, format them N times +// ENCODE + FORMAT BENCHMARK (full pipeline) // ============================================================================= -/// Layer that encodes once, then formats N times. -struct IsolatedFormatLayer { - format_iterations: usize, - resource_bytes: Vec, +/// Layer that encodes and formats N times (the full pipeline). +struct EncodeFormatLayer { + iterations: usize, } -impl IsolatedFormatLayer { - fn new(format_iterations: usize) -> Self { - Self { - format_iterations, - resource_bytes: encode_resource_bytes_from_attrs(&[ - ("service.name", "benchmark"), - ]), - } +impl EncodeFormatLayer { + fn new(iterations: usize) -> Self { + Self { iterations } } } -impl Layer for IsolatedFormatLayer +impl Layer for EncodeFormatLayer where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - // Encode once using StatefulDirectEncoder to get full OTLP envelope - let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); - encoder.encode_event(event); - let bytes = encoder.flush(); - - // Format N times - let formatter = OtlpBytesFormattingLayer::new(std::io::sink); - for _ in 0..self.format_iterations { - let _ = formatter.format_otlp_bytes(&bytes); + let metadata = event.metadata(); + + // Build cache with this callsite + let mut cache = CallsiteCache::new(); + cache.register(metadata); + + // Encode + format N times + for _ in 0..self.iterations { + let body_attrs_bytes = encode_body_and_attrs(event); + let timestamp_ns = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + let record = CompactLogRecord { + callsite_id: metadata.callsite(), + timestamp_ns, + severity_number: match *metadata.level() { + Level::TRACE => 1, + Level::DEBUG => 5, + Level::INFO => 9, + Level::WARN => 13, + Level::ERROR => 17, + }, + severity_text: metadata.level().as_str(), + body_attrs_bytes, + }; + + let line = format_log_record(&record, &cache, false); + let _ = std::hint::black_box(line); } } } -/// Benchmark: Pure formatting cost (encode once, format N times) -fn bench_isolated_format(c: &mut Criterion) { - let mut group = c.benchmark_group("format_otlp_only"); +/// Benchmark: Full encode + format pipeline +fn bench_encode_format(c: &mut Criterion) { + let mut group = c.benchmark_group("compact_encode_format"); for iterations in [100, 1000].iter() { let _ = group.bench_with_input( - BenchmarkId::new("3_attrs", format!("{}_formats", iterations)), + BenchmarkId::new("3_attrs", format!("{}_events", iterations)), iterations, |b, &iters| { b.iter(|| { - let layer = IsolatedFormatLayer::new(iters); + let layer = EncodeFormatLayer::new(iters); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -244,7 +263,7 @@ fn bench_isolated_format(c: &mut Criterion) { ); }); - std::hint::black_box(()) + let _ = std::hint::black_box(()); }) }, ); @@ -257,45 +276,15 @@ fn bench_isolated_format(c: &mut Criterion) { // ATTRIBUTE COMPLEXITY BENCHMARK // ============================================================================= -/// Layer that encodes events with varying attribute counts. -struct AttributeComplexityLayer { - iterations: usize, - resource_bytes: Vec, -} - -impl AttributeComplexityLayer { - fn new(iterations: usize) -> Self { - Self { - iterations, - resource_bytes: encode_resource_bytes_from_attrs(&[ - ("service.name", "benchmark"), - ]), - } - } -} - -impl Layer for AttributeComplexityLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, -{ - fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - for _ in 0..self.iterations { - let mut encoder = StatefulDirectEncoder::new(4096, self.resource_bytes.clone()); - encoder.encode_event(event); - let _ = encoder.flush(); - } - } -} - /// Benchmark: Encoding cost with different attribute counts -fn bench_attribute_complexity(c: &mut Criterion) { - let mut group = c.benchmark_group("encode_otlp_by_attrs"); +fn bench_by_attr_count(c: &mut Criterion) { + let mut group = c.benchmark_group("compact_format_by_attrs"); let iterations = 1000; // No attributes let _ = group.bench_function("0_attrs/1000_events", |b| { b.iter(|| { - let layer = AttributeComplexityLayer::new(iterations); + let layer = EncodeFormatLayer::new(iterations); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -303,14 +292,14 @@ fn bench_attribute_complexity(c: &mut Criterion) { tracing::info!("message only"); }); - std::hint::black_box(()) + let _ = std::hint::black_box(()); }) }); // 3 attributes let _ = group.bench_function("3_attrs/1000_events", |b| { b.iter(|| { - let layer = AttributeComplexityLayer::new(iterations); + let layer = EncodeFormatLayer::new(iterations); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -318,14 +307,14 @@ fn bench_attribute_complexity(c: &mut Criterion) { tracing::info!(a1 = "value", a2 = 42, a3 = true, "with 3 attributes"); }); - std::hint::black_box(()) + let _ = std::hint::black_box(()); }) }); // 10 attributes let _ = group.bench_function("10_attrs/1000_events", |b| { b.iter(|| { - let layer = AttributeComplexityLayer::new(iterations); + let layer = EncodeFormatLayer::new(iterations); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -345,7 +334,7 @@ fn bench_attribute_complexity(c: &mut Criterion) { ); }); - std::hint::black_box(()) + let _ = std::hint::black_box(()); }) }); @@ -359,8 +348,7 @@ mod bench_entry { criterion_group!( name = benches; config = Criterion::default(); - targets = bench_isolated_encode, bench_isolated_encode_format, - bench_isolated_format, bench_attribute_complexity + targets = bench_encode_only, bench_format_only, bench_encode_format, bench_by_attr_count ); } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 6c1e39f7cb..eb8cae70fc 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -36,7 +36,6 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; -use otap_df_telemetry::opentelemetry_client::OpentelemetryClient; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::{MetricsSystem, otel_info, otel_info_span, otel_warn}; use std::thread; @@ -83,7 +82,8 @@ impl Controller { node_ctrl_msg_channel_size = settings.default_node_ctrl_msg_channel_size, pipeline_ctrl_msg_channel_size = settings.default_pipeline_ctrl_msg_channel_size ); - let opentelemetry_client = OpentelemetryClient::new(telemetry_config)?; + // Note: Raw logging is initialized early in main.rs via init_raw_logging(). + // OpenTelemetry client integration will be added in a future phase. let metrics_system = MetricsSystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -257,7 +257,7 @@ impl Controller { handle.shutdown_and_join()?; } obs_state_join_handle.shutdown_and_join()?; - opentelemetry_client.shutdown()?; + // Note: OpenTelemetry client shutdown will be added when the client is re-enabled. Ok(()) } diff --git a/rust/otap-dataflow/crates/telemetry/src/error.rs b/rust/otap-dataflow/crates/telemetry/src/error.rs index 77bef53512..79cdcba188 100644 --- a/rust/otap-dataflow/crates/telemetry/src/error.rs +++ b/rust/otap-dataflow/crates/telemetry/src/error.rs @@ -27,4 +27,8 @@ pub enum Error { /// Error during configuration of a component. #[error("Configuration error: {0}")] ConfigurationError(String), + + /// Error during tracing subscriber initialization. + #[error("Tracing initialization error: {0}")] + TracingInitError(String), } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 58f7a75da5..cb47a7939a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -28,6 +28,8 @@ use crate::error::Error; use crate::registry::MetricsRegistryHandle; use otap_df_config::pipeline::service::telemetry::TelemetryConfig; use tokio_util::sync::CancellationToken; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; pub mod attributes; pub mod collector; @@ -59,11 +61,50 @@ pub use tracing::info_span as otel_info_span; pub use tracing::trace_span as otel_trace_span; pub use tracing::warn_span as otel_warn_span; +// Re-export raw logging initialization for early setup +pub use self_tracing::compact_formatter::CompactFormatterLayer; + // TODO This should be #[cfg(test)], but something is preventing it from working. // The #[cfg(test)]-labeled otap_batch_processor::test_helpers::from_config // can't load this module unless I remove #[cfg(test)]! See #1304. pub mod testing; +/// Initialize raw logging as early as possible. +/// +/// This installs a minimal tracing subscriber with our `CompactFormatterLayer` that +/// formats log events to stderr. This should be called at the very start of `main()` +/// before any configuration parsing or other initialization that might emit logs. +/// +/// Raw logging is the most basic form of console output, used as a safe configuration +/// early in the lifetime of a process. See `ARCHITECTURE.md` for details. +/// +/// # Panics +/// +/// This function will panic if a global subscriber has already been set. +/// Use `try_init_raw_logging()` if you need to handle this case gracefully. +pub fn init_raw_logging() { + try_init_raw_logging().expect("Failed to initialize raw logging subscriber"); +} + +/// Try to initialize raw logging, returning an error if a subscriber is already set. +/// +/// This is useful in tests or other contexts where a subscriber may already exist. +pub fn try_init_raw_logging() -> Result<(), Error> { + // If RUST_LOG is set, use it for fine-grained control. + // Otherwise, default to INFO level with some noisy dependencies silenced. + let filter = tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { + tracing_subscriber::EnvFilter::new("info,h2=off,hyper=off") + }); + + let layer = CompactFormatterLayer::stderr(); + + tracing_subscriber::registry() + .with(filter) + .with(layer) + .try_init() + .map_err(|e| Error::TracingInitError(e.to_string())) +} + /// The main telemetry system that aggregates and reports metrics. pub struct MetricsSystem { /// The metrics registry that holds all registered metrics (data + metadata). diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs index e2ebb1ec13..9d41bc37ed 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//! Compact log formatter - a minimal `fmt::layer()` alternative. +//! Compact log formatter - a `fmt::layer()` alternative using CompactLogRRecord //! //! This module provides a lightweight formatting layer for tokio-tracing events //! that outputs human-readable log lines to stdout/stderr. It uses the same @@ -35,9 +35,9 @@ use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; use otap_df_pdata::views::otlp::bytes::decode::read_varint; -use tracing::{Event, Level, Subscriber}; use tracing::callsite::Identifier; use tracing::span::{Attributes, Record}; +use tracing::{Event, Level, Subscriber}; use tracing_subscriber::layer::{Context, Layer}; use tracing_subscriber::registry::LookupSpan; @@ -56,16 +56,16 @@ use super::direct_encoder::{DirectFieldVisitor, ProtoBuffer}; pub struct CompactLogRecord { /// Callsite identifier - used to look up cached callsite info pub callsite_id: Identifier, - + /// Timestamp in nanoseconds since Unix epoch pub timestamp_ns: u64, - + /// Severity number: 1=TRACE, 5=DEBUG, 9=INFO, 13=WARN, 17=ERROR pub severity_number: u8, - + /// Severity text - &'static str from Level::as_str() pub severity_text: &'static str, - + /// Pre-encoded body and attributes (OTLP format for body field 5 + attrs field 6) pub body_attrs_bytes: Bytes, } @@ -75,13 +75,13 @@ pub struct CompactLogRecord { pub struct CachedCallsite { /// Target module path - &'static from Metadata pub target: &'static str, - + /// Event name - &'static from Metadata pub name: &'static str, - + /// Source file - &'static from Metadata pub file: Option<&'static str>, - + /// Source line pub line: Option, } @@ -97,7 +97,7 @@ impl CallsiteCache { pub fn new() -> Self { Self::default() } - + /// Register a callsite from its metadata. pub fn register(&mut self, metadata: &'static tracing::Metadata<'static>) { let id = metadata.callsite(); @@ -108,7 +108,7 @@ impl CallsiteCache { line: metadata.line(), }); } - + /// Get cached callsite info by identifier. pub fn get(&self, id: &Identifier) -> Option<&CachedCallsite> { self.callsites.get(id) @@ -121,44 +121,75 @@ impl CallsiteCache { /// Format a CompactLogRecord as a human-readable string. /// -/// Output format: `2026-01-06T10:30:45.123Z INFO target::name: body [attr=value, ...]` -pub fn format_log_record(record: &CompactLogRecord, cache: &CallsiteCache) -> String { +/// Output format: `2026-01-06T10:30:45.123Z INFO target::name (file.rs:42): body [attr=value, ...]` +pub fn format_log_record( + record: &CompactLogRecord, + cache: &CallsiteCache, + use_ansi: bool, +) -> String { let callsite = cache.get(&record.callsite_id); - - let (target, name) = match callsite { - Some(cs) => (cs.target, cs.name), - None => ("", ""), + + let event_name = match callsite { + Some(cs) => format_event_name(cs.target, cs.name, cs.file, cs.line), + None => "".to_string(), }; - + let body_attrs = format_body_attrs(&record.body_attrs_bytes); - - format!( - "{} {:5} {}::{}: {}", - format_timestamp(record.timestamp_ns), - record.severity_text, - target, - name, - body_attrs, - ) + + if use_ansi { + let level_color = level_color(record.severity_text); + format!( + "{}{}{} {}{:5}{} {}{}{}: {}", + ANSI_DIM, + format_timestamp(record.timestamp_ns), + ANSI_RESET, + level_color, + record.severity_text, + ANSI_RESET, + ANSI_BOLD, + event_name, + ANSI_RESET, + body_attrs, + ) + } else { + format!( + "{} {:5} {}: {}", + format_timestamp(record.timestamp_ns), + record.severity_text, + event_name, + body_attrs, + ) + } +} + +/// Format callsite details as event_name string. +/// +/// Format: "target::name (file.rs:42)" or "target::name" if file/line unavailable. +#[inline] +fn format_event_name(target: &str, name: &str, file: Option<&str>, line: Option) -> String { + match (file, line) { + (Some(file), Some(line)) => format!("{}::{} ({}:{})", target, name, file, line), + _ => format!("{}::{}", target, name), + } } /// Format nanosecond timestamp as ISO 8601 (UTC). fn format_timestamp(nanos: u64) -> String { let secs = nanos / 1_000_000_000; let subsec_millis = (nanos % 1_000_000_000) / 1_000_000; - + // Convert to datetime components // Days since Unix epoch let days = secs / 86400; let time_of_day = secs % 86400; - + let hours = time_of_day / 3600; let minutes = (time_of_day % 3600) / 60; let seconds = time_of_day % 60; - + // Calculate year/month/day from days since epoch (1970-01-01) let (year, month, day) = days_to_ymd(days as i64); - + format!( "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", year, month, day, hours, minutes, seconds, subsec_millis @@ -178,7 +209,7 @@ fn days_to_ymd(days: i64) -> (i32, u32, u32) { let d = doy - (153 * mp + 2) / 5 + 1; let m = if mp < 10 { mp + 3 } else { mp - 9 }; let y = if m <= 2 { y + 1 } else { y }; - + (y as i32, m, d) } @@ -191,16 +222,16 @@ fn format_body_attrs(bytes: &Bytes) -> String { if bytes.is_empty() { return String::new(); } - + // The bytes contain LogRecord fields: // - field 5 (LOG_RECORD_BODY): AnyValue message // - field 6 (LOG_RECORD_ATTRIBUTES): repeated KeyValue messages - + let mut body_str = String::new(); let mut attrs = Vec::new(); let data = bytes.as_ref(); let mut pos = 0; - + while pos < data.len() { // Read field tag let (tag, next_pos) = match read_varint(data, pos) { @@ -208,15 +239,15 @@ fn format_body_attrs(bytes: &Bytes) -> String { None => break, }; pos = next_pos; - + let field_num = tag >> 3; let wire_type = tag & 0x7; - + if wire_type != wire_types::LEN { // Skip non-length-delimited fields (shouldn't happen for body/attrs) break; } - + // Read length-delimited content let (len, next_pos) = match read_varint(data, pos) { Some(v) => v, @@ -224,13 +255,13 @@ fn format_body_attrs(bytes: &Bytes) -> String { }; pos = next_pos; let end = pos + len as usize; - + if end > data.len() { break; } - + let field_bytes = &data[pos..end]; - + if field_num == LOG_RECORD_BODY { // Body: parse as AnyValue using pdata View let any_value = RawAnyValue::new(field_bytes); @@ -245,16 +276,16 @@ fn format_body_attrs(bytes: &Bytes) -> String { }; attrs.push(format!("{}={}", key, value)); } - + pos = end; } - + if !attrs.is_empty() { body_str.push_str(" ["); body_str.push_str(&attrs.join(", ")); body_str.push(']'); } - + body_str } @@ -345,6 +376,7 @@ pub enum OutputTarget { #[derive(Debug)] pub struct SimpleWriter { target: OutputTarget, + use_ansi: bool, } impl Default for SimpleWriter { @@ -353,17 +385,54 @@ impl Default for SimpleWriter { } } +// ANSI color codes +const ANSI_RESET: &str = "\x1b[0m"; +const ANSI_RED: &str = "\x1b[31m"; +const ANSI_YELLOW: &str = "\x1b[33m"; +const ANSI_GREEN: &str = "\x1b[32m"; +const ANSI_BLUE: &str = "\x1b[34m"; +const ANSI_MAGENTA: &str = "\x1b[35m"; +const ANSI_DIM: &str = "\x1b[2m"; +const ANSI_BOLD: &str = "\x1b[1m"; + impl SimpleWriter { - /// Create a writer that outputs to stdout. + /// Create a writer that outputs to stdout with ANSI colors enabled. pub fn stdout() -> Self { - Self { target: OutputTarget::Stdout } + Self { + target: OutputTarget::Stdout, + use_ansi: true, + } } - - /// Create a writer that outputs to stderr. + + /// Create a writer that outputs to stderr with ANSI colors enabled. pub fn stderr() -> Self { - Self { target: OutputTarget::Stderr } + Self { + target: OutputTarget::Stderr, + use_ansi: true, + } + } + + /// Create a writer that outputs to stdout without ANSI colors. + pub fn stdout_no_color() -> Self { + Self { + target: OutputTarget::Stdout, + use_ansi: false, + } } - + + /// Create a writer that outputs to stderr without ANSI colors. + pub fn stderr_no_color() -> Self { + Self { + target: OutputTarget::Stderr, + use_ansi: false, + } + } + + /// Returns whether ANSI colors are enabled. + pub fn use_ansi(&self) -> bool { + self.use_ansi + } + /// Write a log line (with newline). pub fn write_line(&self, line: &str) { match self.target { @@ -377,6 +446,19 @@ impl SimpleWriter { } } +/// Get ANSI color code for a severity level. +#[inline] +fn level_color(level: &str) -> &'static str { + match level { + "ERROR" => ANSI_RED, + "WARN" => ANSI_YELLOW, + "INFO" => ANSI_GREEN, + "DEBUG" => ANSI_BLUE, + "TRACE" => ANSI_MAGENTA, + _ => "", + } +} + // ============================================================================ // Layer Implementation // ============================================================================ @@ -397,7 +479,7 @@ impl CompactFormatterLayer { writer: SimpleWriter::stderr(), } } - + /// Create a new layer that writes to stdout. pub fn stdout() -> Self { Self { @@ -405,7 +487,7 @@ impl CompactFormatterLayer { writer: SimpleWriter::stdout(), } } - + /// Create a new layer that writes to stderr. pub fn stderr() -> Self { Self::new() @@ -429,19 +511,19 @@ where self.callsite_cache.write().unwrap().register(metadata); tracing::subscriber::Interest::always() } - + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { let metadata = event.metadata(); - + // Encode body and attributes to bytes let body_attrs_bytes = encode_body_and_attrs(event); - + // Get current timestamp let timestamp_ns = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_nanos() as u64; - + // Build compact record let record = CompactLogRecord { callsite_id: metadata.callsite(), @@ -450,42 +532,42 @@ where severity_text: metadata.level().as_str(), body_attrs_bytes, }; - + // Format and write immediately let cache = self.callsite_cache.read().unwrap(); - let line = format_log_record(&record, &cache); + let line = format_log_record(&record, &cache, self.writer.use_ansi()); self.writer.write_line(&line); } - + fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &tracing::span::Id, _ctx: Context<'_, S>) { // Not handling spans in MVP } - + fn on_record(&self, _span: &tracing::span::Id, _values: &Record<'_>, _ctx: Context<'_, S>) { // Not handling spans in MVP } - + fn on_enter(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { // Not handling spans in MVP } - + fn on_exit(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { // Not handling spans in MVP } - + fn on_close(&self, _id: tracing::span::Id, _ctx: Context<'_, S>) { // Not handling spans in MVP } } /// Encode only body and attributes from an event to OTLP bytes. -fn encode_body_and_attrs(event: &Event<'_>) -> Bytes { +pub fn encode_body_and_attrs(event: &Event<'_>) -> Bytes { let mut buf = ProtoBuffer::with_capacity(256); - + // Visit fields to encode body (field 5) and attributes (field 6) let mut visitor = DirectFieldVisitor::new(&mut buf); event.record(&mut visitor); - + buf.into_bytes() } @@ -508,7 +590,7 @@ fn level_to_severity(level: &Level) -> u8 { mod tests { use super::*; use tracing_subscriber::prelude::*; - + #[test] fn test_format_timestamp() { // 2026-01-06T10:30:45.123Z in nanoseconds @@ -516,22 +598,22 @@ mod tests { let nanos: u64 = 1704067200 * 1_000_000_000; // 2024-01-01 00:00:00 UTC let formatted = format_timestamp(nanos); assert_eq!(formatted, "2024-01-01T00:00:00.000Z"); - + // Test with milliseconds let nanos_with_ms: u64 = 1704067200 * 1_000_000_000 + 123_000_000; let formatted = format_timestamp(nanos_with_ms); assert_eq!(formatted, "2024-01-01T00:00:00.123Z"); } - + #[test] fn test_days_to_ymd() { // 1970-01-01 is day 0 assert_eq!(days_to_ymd(0), (1970, 1, 1)); - + // 2024-01-01 is 19723 days after 1970-01-01 assert_eq!(days_to_ymd(19723), (2024, 1, 1)); } - + #[test] fn test_level_to_severity() { assert_eq!(level_to_severity(&Level::TRACE), 1); @@ -540,20 +622,20 @@ mod tests { assert_eq!(level_to_severity(&Level::WARN), 13); assert_eq!(level_to_severity(&Level::ERROR), 17); } - + #[test] fn test_callsite_cache() { let cache = CallsiteCache::new(); assert!(cache.callsites.is_empty()); } - + #[test] fn test_simple_writer_creation() { let _stdout = SimpleWriter::stdout(); let _stderr = SimpleWriter::stderr(); let _default = SimpleWriter::default(); } - + #[test] fn test_compact_formatter_layer_creation() { let _layer = CompactFormatterLayer::new(); @@ -561,22 +643,22 @@ mod tests { let _stderr = CompactFormatterLayer::stderr(); let _default = CompactFormatterLayer::default(); } - + #[test] fn test_layer_integration() { // Create the layer and subscriber let layer = CompactFormatterLayer::stderr(); let subscriber = tracing_subscriber::registry().with(layer); - + // Set as default for this thread temporarily let dispatch = tracing::Dispatch::new(subscriber); let _guard = tracing::dispatcher::set_default(&dispatch); - + // Emit some events - these should be formatted and written to stderr tracing::info!("Test info message"); tracing::warn!(count = 42, "Warning with attribute"); tracing::error!(error = "something failed", "Error occurred"); - + // The test verifies no panics occur; actual output goes to stderr } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/log_record.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/log_record.rs deleted file mode 100644 index f94aa4e22b..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/log_record.rs +++ /dev/null @@ -1,474 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//! LogRecordView implementation for tokio-tracing events. -//! -//! This module provides the bridge between tracing::Event and our -//! OTLP bytes. All data is borrowed from the tracing event with zero copies. -//! -//! The key insight is that `TracingAnyValue<'a>` is `Copy` - it's just an enum -//! containing borrowed references. This means we can implement `AnyValueView` -//! directly on it without needing a wrapper type, and the lifetime `'a` is -//! tied directly to the underlying tracing event data. - -use otap_df_pdata::schema::{SpanId, TraceId}; -use otap_df_pdata::views::common::{AnyValueView, AttributeView, Str, ValueType}; -use otap_df_pdata::views::logs::LogRecordView; -use std::fmt; -use tracing::{Level, Metadata}; - -/// A LogRecordView implementation that wraps a tracing event. -/// -/// Uses zero-copy borrows throughout: -/// - `event_name`: `&'static str` since `Metadata::name()` is always static -/// - `target`: `&'a str` borrowed from `Metadata<'a>` -/// - All attribute keys and values are borrowed from the event -/// -/// The lifetime `'a` ties this struct to the tracing event callback scope. -/// Encoding to OTLP bytes must complete before the callback returns. -pub struct TracingLogRecord<'a> { - /// The event name - always static from tracing metadata - event_name: Option<&'static str>, - - /// The severity level from tracing - level: Level, - - /// Timestamp when the event occurred (nanoseconds since Unix epoch) - timestamp_nanos: u64, - - /// The target from tracing metadata, borrowed for the event lifetime - target: &'a str, - - /// Event fields - all borrowed from the tracing event - attributes: Vec>, - - /// Optional body/message for the log record (stored as raw &str, - /// constructed into TracingAnyValue on demand in body() method) - body: Option<&'a str>, -} - -impl<'a> TracingLogRecord<'a> { - /// Creates a new TracingLogRecord from tracing event components. - /// - /// The returned struct borrows from the metadata and attributes, - /// so it must be encoded before the tracing callback returns. - pub fn new( - metadata: &Metadata<'a>, - attributes: Vec>, - timestamp_nanos: u64, - ) -> Self { - Self { - event_name: Some(metadata.name()), - level: *metadata.level(), - timestamp_nanos, - target: metadata.target(), - attributes, - body: None, - } - } - - /// Sets the body/message for this log record. - pub fn with_body(mut self, body: &'a str) -> Self { - self.body = Some(body); - self - } - - /// Returns the target (typically module path) for this log record. - pub fn target(&self) -> &str { - self.target - } - - /// Creates a TracingLogRecord with a custom static event name. - /// - /// Use this for synthetic events like span.start/span.end where - /// you want a different event name than metadata.name(). - pub fn new_with_event_name( - metadata: &Metadata<'a>, - attributes: Vec>, - timestamp_nanos: u64, - event_name: &'static str, - ) -> Self { - Self { - event_name: Some(event_name), - level: *metadata.level(), - timestamp_nanos, - target: metadata.target(), - attributes, - body: None, - } - } - - /// Creates a log record for span end events. - /// - /// The span_id should be added as an attribute by the caller. - pub fn new_span_end( - target: &'a str, - attributes: Vec>, - timestamp_nanos: u64, - ) -> Self { - Self { - event_name: Some("span.end"), - level: Level::INFO, - timestamp_nanos, - target, - attributes, - body: None, - } - } -} - -impl<'a> LogRecordView for TracingLogRecord<'a> { - // Use 'a (the data lifetime) for the attribute type, not the GAT lifetime. - // This is correct because our attributes borrow from the original tracing event data. - type Attribute<'att> - = TracingAttribute<'a> - where - Self: 'att; - - // The iterator borrows from self (lifetime 'att) but yields items with lifetime 'a. - // Since TracingAttribute<'a> is Copy, we can just copy the values. - type AttributeIter<'att> - = std::iter::Copied>> - where - Self: 'att; - - // Body is constructed on demand from the stored &'a str. - // Since we create a fresh TracingAnyValue<'bod> each time, the GAT works. - type Body<'bod> - = TracingAnyValue<'bod> - where - Self: 'bod; - - fn time_unix_nano(&self) -> Option { - Some(self.timestamp_nanos) - } - - fn observed_time_unix_nano(&self) -> Option { - // Field not used - None - } - - fn severity_number(&self) -> Option { - // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber - Some(match self.level { - Level::TRACE => 1, - Level::DEBUG => 5, - Level::INFO => 9, - Level::WARN => 13, - Level::ERROR => 17, - }) - } - - fn severity_text(&self) -> Option> { - Some(self.level.as_str().as_bytes()) - } - - fn body(&self) -> Option> { - // Construct TracingAnyValue on demand from stored &str. - // The lifetime 'bod comes from &self, but the data has lifetime 'a. - // Since 'a: 'bod (self contains 'a), this coercion is valid. - self.body.map(TracingAnyValue::Str) - } - - fn attributes(&self) -> Self::AttributeIter<'_> { - self.attributes.iter().copied() - } - - fn dropped_attributes_count(&self) -> u32 { - 0 - } - - fn flags(&self) -> Option { - None - } - - fn trace_id(&self) -> Option<&TraceId> { - None // TODO - } - - fn span_id(&self) -> Option<&SpanId> { - None // TODO - } - - fn event_name(&self) -> Option> { - self.event_name.map(|s| s.as_bytes()) - } -} - -/// Represents an attribute (key-value pair) from a tracing event. -/// -/// All data is borrowed from the tracing event with lifetime 'a. -/// This type is `Copy` because it only contains borrowed references. -#[derive(Debug, Clone, Copy)] -pub struct TracingAttribute<'a> { - /// The attribute key - borrowed from tracing - pub key: &'a str, - /// The attribute value - borrowed from tracing - pub value: TracingAnyValue<'a>, -} - -impl<'a> AttributeView for TracingAttribute<'a> { - type Val<'val> - = TracingAnyValue<'val> - where - Self: 'val; - - fn key(&self) -> Str<'_> { - self.key.as_bytes() - } - - fn value(&self) -> Option> { - Some(self.value) - } -} - -/// Represents a value from a tracing event field. -/// -/// This mirrors OTLP's AnyValue type system, supporting full structural fidelity -/// for nested data from tracing events (arrays, maps, etc.). -/// -/// All variants use borrowed references with lifetime 'a, enabling true zero-copy -/// from tracing events to OTLP bytes. The type is `Copy` because it only contains -/// primitive values or borrowed references - copying just copies the pointer/length, -/// not the underlying data. -#[derive(Debug, Clone, Copy)] -pub enum TracingAnyValue<'a> { - /// String value - borrowed - Str(&'a str), - /// Integer value (i64) - Int(i64), - /// Boolean value - Bool(bool), - /// Double-precision floating point value - Double(f64), - /// Bytes value - borrowed - Bytes(&'a [u8]), - /// Array of values - borrowed slice - Array(&'a [TracingAnyValue<'a>]), - /// Key-value list (like a map/object) - borrowed slice - KeyValueList(&'a [TracingAttribute<'a>]), -} - -/// Iterator for array values that yields copies of TracingAnyValue. -/// -/// Since TracingAnyValue is Copy, this just copies the small enum -/// (which contains borrowed references to the underlying data). -pub struct ArrayIterator<'a> { - inner: std::slice::Iter<'a, TracingAnyValue<'a>>, -} - -impl<'a> Iterator for ArrayIterator<'a> { - type Item = TracingAnyValue<'a>; - - fn next(&mut self) -> Option { - self.inner.next().copied() - } -} - -/// Iterator for nested KeyValueList attributes. -pub struct KeyValueListIterator<'a> { - inner: std::slice::Iter<'a, TracingAttribute<'a>>, -} - -impl<'a> Iterator for KeyValueListIterator<'a> { - type Item = TracingAttribute<'a>; - - fn next(&mut self) -> Option { - self.inner.next().copied() - } -} - -impl<'a> AnyValueView<'a> for TracingAnyValue<'a> { - type KeyValue = TracingAttribute<'a>; - type ArrayIter<'arr> - = ArrayIterator<'a> - where - Self: 'arr; - type KeyValueIter<'kv> - = KeyValueListIterator<'a> - where - Self: 'kv; - - fn value_type(&self) -> ValueType { - match self { - TracingAnyValue::Str(_) => ValueType::String, - TracingAnyValue::Int(_) => ValueType::Int64, - TracingAnyValue::Bool(_) => ValueType::Bool, - TracingAnyValue::Double(_) => ValueType::Double, - TracingAnyValue::Bytes(_) => ValueType::Bytes, - TracingAnyValue::Array(_) => ValueType::Array, - TracingAnyValue::KeyValueList(_) => ValueType::KeyValueList, - } - } - - fn as_string(&self) -> Option> { - match self { - TracingAnyValue::Str(s) => Some(s.as_bytes()), - _ => None, - } - } - - fn as_bool(&self) -> Option { - match self { - TracingAnyValue::Bool(b) => Some(*b), - _ => None, - } - } - - fn as_int64(&self) -> Option { - match self { - TracingAnyValue::Int(i) => Some(*i), - _ => None, - } - } - - fn as_double(&self) -> Option { - match self { - TracingAnyValue::Double(d) => Some(*d), - _ => None, - } - } - - fn as_bytes(&self) -> Option<&[u8]> { - match self { - TracingAnyValue::Bytes(b) => Some(b), - _ => None, - } - } - - fn as_array(&self) -> Option> { - match self { - TracingAnyValue::Array(arr) => Some(ArrayIterator { inner: arr.iter() }), - _ => None, - } - } - - fn as_kvlist(&self) -> Option> { - match self { - TracingAnyValue::KeyValueList(kvs) => Some(KeyValueListIterator { inner: kvs.iter() }), - _ => None, - } - } -} - -// Implement Display for easier debugging -impl<'a> fmt::Display for TracingAnyValue<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - TracingAnyValue::Str(s) => write!(f, "{}", s), - TracingAnyValue::Int(i) => write!(f, "{}", i), - TracingAnyValue::Bool(b) => write!(f, "{}", b), - TracingAnyValue::Double(d) => write!(f, "{}", d), - TracingAnyValue::Bytes(b) => write!(f, "{:?}", b), - TracingAnyValue::Array(arr) => { - write!(f, "[")?; - for (i, v) in arr.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "{}", v)?; - } - write!(f, "]") - } - TracingAnyValue::KeyValueList(kvs) => { - write!(f, "{{")?; - for (i, kv) in kvs.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "{}: {}", kv.key, kv.value)?; - } - write!(f, "}}") - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_tracing_attribute_creation() { - // Test basic construction with borrowed data - let key = "key1"; - let value = TracingAnyValue::Str("value1"); - let attr = TracingAttribute { key, value }; - - assert_eq!(attr.key, "key1"); - match attr.value { - TracingAnyValue::Str(s) => assert_eq!(s, "value1"), - _ => panic!("Expected string value"), - } - } - - #[test] - fn test_severity_mapping() { - // Test that tracing levels map correctly to OTLP severity numbers - let levels_and_numbers = [ - (Level::TRACE, 1), - (Level::DEBUG, 5), - (Level::INFO, 9), - (Level::WARN, 13), - (Level::ERROR, 17), - ]; - - for (level, expected_number) in levels_and_numbers { - let severity_number = match level { - Level::TRACE => 1, - Level::DEBUG => 5, - Level::INFO => 9, - Level::WARN => 13, - Level::ERROR => 17, - }; - assert_eq!(severity_number, expected_number); - } - } - - #[test] - fn test_any_value_types() { - use otap_df_pdata::views::common::AnyValueView; - - let str_val = TracingAnyValue::Str("test"); - assert!(str_val.as_string().is_some()); - assert!(str_val.as_int64().is_none()); - - let int_val = TracingAnyValue::Int(123); - assert!(int_val.as_int64().is_some()); - assert_eq!(int_val.as_int64().unwrap(), 123); - - let bool_val = TracingAnyValue::Bool(true); - assert!(bool_val.as_bool().is_some()); - assert_eq!(bool_val.as_bool().unwrap(), true); - - let double_val = TracingAnyValue::Double(3.14); - assert!(double_val.as_double().is_some()); - assert!((double_val.as_double().unwrap() - 3.14).abs() < f64::EPSILON); - } - - #[test] - fn test_zero_copy_semantics() { - // Verify that TracingAnyValue is Copy (no heap allocation) - let original = TracingAnyValue::Str("hello"); - let copied = original; // This should be a copy, not a move - let _also_original = original; // Original still usable - - match copied { - TracingAnyValue::Str(s) => assert_eq!(s, "hello"), - _ => panic!("Expected string"), - } - } - - #[test] - fn test_attribute_is_copy() { - // Verify that TracingAttribute is Copy - let attr = TracingAttribute { - key: "test_key", - value: TracingAnyValue::Int(42), - }; - let copied = attr; - let _also_original = attr; // Original still usable - - assert_eq!(copied.key, "test_key"); - } -} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index 83cecf0850..e69d395c32 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -1,26 +1,20 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//! tokio-tracing support for directly encoding and formatting OTLP bytes. +//! Compact log formatting for tokio-tracing events. +//! +//! This module provides a lightweight formatting layer for tokio-tracing events +//! that encodes body+attributes to partial OTLP bytes, then formats them for +//! console output. pub mod compact_formatter; pub mod direct_encoder; -pub mod log_record; -pub mod subscriber; -// Compact formatter exports (recommended for minimal fmt::layer() alternative) +// Compact formatter exports (the primary API) pub use compact_formatter::{ CachedCallsite, CallsiteCache, CompactFormatterLayer, CompactLogRecord, OutputTarget, - SimpleWriter, format_log_record, + SimpleWriter, encode_body_and_attrs, format_log_record, }; -// Direct encoder exports (for zero-allocation OTLP encoding) -pub use direct_encoder::{ - DirectFieldVisitor, DirectLogRecordEncoder, LengthPlaceholder, ProtoBuffer, - StatefulDirectEncoder, encode_len_placeholder, encode_resource_bytes_from_attrs, - patch_len_placeholder, -}; - -// Legacy View-based exports (for compatibility) -pub use log_record::{TracingAnyValue, TracingAttribute, TracingLogRecord}; -pub use subscriber::OtlpTracingLayer; +// Direct encoder exports (used internally, exposed for benchmarking) +pub use direct_encoder::{DirectFieldVisitor, ProtoBuffer}; diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/otlp_bytes_formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/otlp_bytes_formatter.rs deleted file mode 100644 index 1f6c688042..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/otlp_bytes_formatter.rs +++ /dev/null @@ -1,658 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//! OTLP bytes formatting layer - decodes OTLP bytes back to human-readable output. -//! -//! This layer provides a bridge between OTLP-encoded telemetry and human-readable -//! console output. The architecture is: -//! -//! ```text -//! tracing::info!() → OtlpTracingLayer → encode to OTLP bytes -//! ↓ -//! OtlpBytesFormattingLayer → decode OTLP bytes -//! ↓ ↓ -//! construct LogsDataView → format human-readable -//! ``` -//! -//! This approach: -//! - Removes dependency on opentelemetry crates for formatting -//! - Preserves complete structural fidelity (OTLP is lossless) -//! - Enables future async formatting in separate thread -//! - Allows colorized, customizable output -//! -//! # Example -//! -//! ```ignore -//! use tracing_subscriber::prelude::*; -//! use otap_df_telemetry::tracing_integration::{OtlpTracingLayer, OtlpBytesFormattingLayer}; -//! -//! // Encode events to OTLP bytes -//! let (tx, rx) = std::sync::mpsc::channel(); -//! let otlp_layer = OtlpTracingLayer::new(move |log_record| { -//! // encode to OTLP bytes and send via channel -//! tx.send(bytes).unwrap(); -//! }); -//! -//! // Format OTLP bytes for human output -//! let fmt_layer = OtlpBytesFormattingLayer::new(rx); -//! -//! tracing_subscriber::registry() -//! .with(otlp_layer) -//! .with(fmt_layer) -//! .init(); -//! ``` - -use otap_df_pdata::views::logs::{LogRecordView, LogsDataView, ResourceLogsView, ScopeLogsView}; -use otap_df_pdata::views::otlp::bytes::logs::RawLogsData; -use otap_df_pdata::views::common::{AnyValueView, AttributeView, InstrumentationScopeView}; -use std::fmt::Write as FmtWrite; -use std::io::{self, Write as IoWrite}; -use std::time::UNIX_EPOCH; -use tracing_subscriber::fmt::MakeWriter; - -/// A tracing-subscriber layer that formats OTLP-encoded bytes for human-readable output. -/// -/// This layer doesn't directly subscribe to tracing events. Instead, it receives -/// OTLP-encoded bytes (from OtlpTracingLayer), decodes them, and formats them -/// for console output. -/// -/// # Type Parameters -/// - `W`: Writer type for output (e.g., stdout, file) -pub struct OtlpBytesFormattingLayer -where - W: for<'writer> MakeWriter<'writer> + 'static, -{ - /// Writer factory for output - make_writer: W, - /// Whether to use ANSI colors - with_ansi: bool, - /// Whether to include timestamps - with_timestamp: bool, - /// Whether to include level - with_level: bool, - /// Whether to include target (module path/scope name) - with_target: bool, - /// Whether to include event_name - with_event_name: bool, - /// Whether to include thread names - with_thread_names: bool, -} - -impl OtlpBytesFormattingLayer -where - W: for<'writer> MakeWriter<'writer> + 'static, -{ - /// Creates a new OtlpBytesFormattingLayer with default settings. - /// - /// Default format matches tokio's: timestamp, level, target, event_name, message, attributes - /// - /// # Arguments - /// * `make_writer` - Factory for creating writers (e.g., `std::io::stdout`) - pub fn new(make_writer: W) -> Self { - Self { - make_writer, - with_ansi: true, - with_timestamp: true, - with_level: true, - with_target: true, - with_event_name: true, - with_thread_names: true, - } - } - - /// Sets whether to use ANSI color codes. - pub fn with_ansi(mut self, ansi: bool) -> Self { - self.with_ansi = ansi; - self - } - - /// Sets whether to include timestamps. - pub fn with_timestamp(mut self, timestamp: bool) -> Self { - self.with_timestamp = timestamp; - self - } - - /// Sets whether to include log level. - pub fn with_level(mut self, level: bool) -> Self { - self.with_level = level; - self - } - - /// Sets whether to include target (scope name/module path). - pub fn with_target(mut self, target: bool) -> Self { - self.with_target = target; - self - } - - /// Sets whether to include event_name. - pub fn with_event_name(mut self, event_name: bool) -> Self { - self.with_event_name = event_name; - self - } - - /// Sets whether to include thread names. - pub fn with_thread_names(mut self, thread_names: bool) -> Self { - self.with_thread_names = thread_names; - self - } - - /// Formats OTLP-encoded bytes to human-readable output. - /// - /// This is the main entry point for formatting. Call this method when you - /// receive OTLP bytes from the encoding layer. - pub fn format_otlp_bytes(&self, otlp_bytes: &[u8]) -> Result<(), FormatError> { - // Construct LogsDataView from OTLP bytes (zero-copy) - let logs_view = RawLogsData::new(otlp_bytes); - - // Get writer - let mut writer = self.make_writer.make_writer(); - - // Iterate through the logs data structure - for resource_logs in logs_view.resources() { - for scope_logs in resource_logs.scopes() { - // Extract scope name (target) once for all records - let scope_name = if let Some(scope) = scope_logs.scope() { - if let Some(name) = scope.name() { - Some(String::from_utf8_lossy(name).to_string()) - } else { - None - } - } else { - None - }; - - for log_record in scope_logs.log_records() { - self.format_log_record(&log_record, scope_name.as_deref(), &mut writer)?; - } - } - } - - Ok(()) - } - - /// Formats a single log record. - /// - /// Format: `timestamp LEVEL target{::event_name}: message key=value` - /// Example: `2024-12-18T10:30:45.123456Z INFO app::server{listen}: Server started port=8080` - fn format_log_record( - &self, - log_record: &L, - scope_name: Option<&str>, - writer: &mut impl IoWrite, - ) -> Result<(), FormatError> { - let mut buffer = String::new(); - - // Timestamp - ISO8601 format like tokio - if self.with_timestamp { - if let Some(ts_nanos) = log_record.time_unix_nano() { - let timestamp = format_iso8601_timestamp(ts_nanos); - write!(&mut buffer, "{} ", timestamp)?; - } - } - - // Level with colors and padding - if self.with_level { - if let Some(severity) = log_record.severity_number() { - let level_str = severity_to_level_str(severity); - if self.with_ansi { - let colored = colorize_level(level_str); - write!(&mut buffer, "{:5} ", colored)?; - } else { - write!(&mut buffer, "{:5} ", level_str)?; - } - } - } - - // Thread name - if self.with_thread_names { - let thread_name = std::thread::current().name() - .unwrap_or("") - .to_string(); - write!(&mut buffer, "{}: ", thread_name)?; - } - - // Target (scope name / module path) - if self.with_target { - if let Some(target) = scope_name { - write!(&mut buffer, "{}", target)?; - - // Event name (if configured and present) - if self.with_event_name { - if let Some(event_name_bytes) = log_record.event_name() { - if let Ok(event_name) = std::str::from_utf8(event_name_bytes) { - // Format like tokio: target{event_name} - write!(&mut buffer, "{{{}}}", event_name)?; - } - } - } - - write!(&mut buffer, ": ")?; - } - } - - // Body/message - if let Some(body) = log_record.body() { - write!(&mut buffer, "{}", format_any_value(&body))?; - } - - // Attributes (key=value pairs) - let mut first_attr = true; - for attr in log_record.attributes() { - let key_str = String::from_utf8_lossy(attr.key()); - if let Some(value) = attr.value() { - if first_attr { - write!(&mut buffer, " ")?; - first_attr = false; - } else { - write!(&mut buffer, " ")?; - } - write!(&mut buffer, "{}={}", key_str, format_any_value(&value))?; - } - } - - // Write newline - writeln!(&mut buffer)?; - - // Write to output - writer.write_all(buffer.as_bytes())?; - writer.flush()?; - - Ok(()) - } -} - -/// Format a unix timestamp (nanoseconds) as ISO8601. -/// -/// Format: `2024-12-18T10:30:45.123456Z` -fn format_iso8601_timestamp(nanos: u64) -> String { - let secs = nanos / 1_000_000_000; - let subsec_nanos = (nanos % 1_000_000_000) as u32; - - // Convert to SystemTime - let duration = std::time::Duration::new(secs, subsec_nanos); - let system_time = UNIX_EPOCH + duration; - - // Get seconds and subseconds for formatting - let since_epoch = system_time.duration_since(UNIX_EPOCH).unwrap(); - let total_secs = since_epoch.as_secs(); - let micros = subsec_nanos / 1000; - - // Calculate date/time components - let days_since_epoch = total_secs / 86400; - let secs_today = total_secs % 86400; - - let hours = secs_today / 3600; - let minutes = (secs_today % 3600) / 60; - let seconds = secs_today % 60; - - // Simple epoch-based date calculation (not perfect but good enough) - let year = 1970 + (days_since_epoch / 365); - let day_of_year = days_since_epoch % 365; - let month = (day_of_year / 30) + 1; - let day = (day_of_year % 30) + 1; - - format!( - "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:06}Z", - year, month, day, hours, minutes, seconds, micros - ) -} - -/// Convert OTLP severity number to level string. -fn severity_to_level_str(severity: i32) -> &'static str { - match severity { - 1..=4 => "TRACE", - 5..=8 => "DEBUG", - 9..=12 => "INFO", - 13..=16 => "WARN", - 17..=24 => "ERROR", - _ => "UNKNOWN", - } -} - -/// Colorize level string with ANSI codes. -fn colorize_level(level: &str) -> String { - match level { - "TRACE" => format!("\x1b[35m{}\x1b[0m", level), // Magenta - "DEBUG" => format!("\x1b[34m{}\x1b[0m", level), // Blue - "INFO" => format!("\x1b[32m{}\x1b[0m", level), // Green - "WARN" => format!("\x1b[33m{}\x1b[0m", level), // Yellow - "ERROR" => format!("\x1b[31m{}\x1b[0m", level), // Red - _ => level.to_string(), - } -} - -/// Format an AnyValue for display. -fn format_any_value<'a>(value: &impl AnyValueView<'a>) -> String { - use otap_df_pdata::views::common::ValueType; - - match value.value_type() { - ValueType::String => { - if let Some(s) = value.as_string() { - String::from_utf8_lossy(s).to_string() - } else { - "".to_string() - } - } - ValueType::Int64 => { - if let Some(i) = value.as_int64() { - i.to_string() - } else { - "".to_string() - } - } - ValueType::Bool => { - if let Some(b) = value.as_bool() { - b.to_string() - } else { - "".to_string() - } - } - ValueType::Double => { - if let Some(d) = value.as_double() { - format!("{:.6}", d) - } else { - "".to_string() - } - } - ValueType::Bytes => { - if let Some(bytes) = value.as_bytes() { - format!("{:?}", bytes) - } else { - "".to_string() - } - } - ValueType::Array => { - if let Some(array_iter) = value.as_array() { - let mut parts = Vec::new(); - for item in array_iter { - parts.push(format_any_value(&item)); - } - format!("[{}]", parts.join(", ")) - } else { - "[]".to_string() - } - } - ValueType::KeyValueList => { - if let Some(kvlist_iter) = value.as_kvlist() { - let mut parts = Vec::new(); - for kv in kvlist_iter { - let key_str = String::from_utf8_lossy(kv.key()).to_string(); - if let Some(val) = kv.value() { - parts.push(format!("{}={}", key_str, format_any_value(&val))); - } - } - format!("{{{}}}", parts.join(", ")) - } else { - "{}".to_string() - } - } - ValueType::Empty => "".to_string(), - } -} - -/// Error type for formatting operations. -#[derive(Debug)] -pub enum FormatError { - /// I/O error - Io(io::Error), - /// Format error - Fmt(std::fmt::Error), -} - -impl From for FormatError { - fn from(err: io::Error) -> Self { - FormatError::Io(err) - } -} - -impl From for FormatError { - fn from(err: std::fmt::Error) -> Self { - FormatError::Fmt(err) - } -} - -impl std::fmt::Display for FormatError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - FormatError::Io(e) => write!(f, "I/O error: {}", e), - FormatError::Fmt(e) => write!(f, "Format error: {}", e), - } - } -} - -impl std::error::Error for FormatError {} - -// Note: This layer doesn't implement Layer trait because it doesn't subscribe -// to tracing events directly. It receives OTLP bytes through a separate channel -// or callback mechanism. See examples for typical usage patterns. - -#[cfg(test)] -mod tests { - use super::*; - use crate::self_tracing::direct_encoder::{ - StatefulDirectEncoder, encode_resource_bytes_from_attrs, - }; - use std::sync::{Arc, Mutex}; - use tracing_subscriber::prelude::*; - use tracing_subscriber::layer::Layer; - use tracing_subscriber::registry::LookupSpan; - - /// Test writer that captures output to a shared buffer - struct TestWriter { - buffer: Arc>>, - } - - impl TestWriter { - fn new_shared() -> (Self, Arc>>) { - let buffer = Arc::new(Mutex::new(Vec::new())); - (Self { buffer: buffer.clone() }, buffer) - } - } - - impl IoWrite for TestWriter { - fn write(&mut self, buf: &[u8]) -> io::Result { - let mut buffer = self.buffer.lock().unwrap(); - buffer.extend_from_slice(buf); - Ok(buf.len()) - } - - fn flush(&mut self) -> io::Result<()> { - Ok(()) - } - } - - impl<'a> MakeWriter<'a> for TestWriter { - type Writer = TestWriter; - - fn make_writer(&'a self) -> Self::Writer { - TestWriter { - buffer: self.buffer.clone(), - } - } - } - - /// Helper layer for tests that captures events using StatefulDirectEncoder - struct TestEncoderLayer { - encoder: Arc>, - } - - impl Layer for TestEncoderLayer - where - S: tracing::Subscriber + for<'a> LookupSpan<'a>, - { - fn on_event(&self, event: &tracing::Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - let mut encoder = self.encoder.lock().unwrap(); - encoder.encode_event(event); - } - } - - /// Helper struct for end-to-end tests - struct TestHarness { - encoder: Arc>, - dispatch: tracing::Dispatch, - } - - impl TestHarness { - /// Create a new test harness with the given resource attributes - fn new(resource_attrs: &[(&str, &str)]) -> Self { - let resource_bytes = encode_resource_bytes_from_attrs(resource_attrs); - let encoder = Arc::new(Mutex::new(StatefulDirectEncoder::new(4096, resource_bytes))); - let layer = TestEncoderLayer { encoder: encoder.clone() }; - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - Self { encoder, dispatch } - } - - /// Run a closure that emits tracing events, then return formatted output - fn capture_and_format(&self, emit_events: F) -> String - where - F: FnOnce(), - { - // Emit events with our subscriber - { - let _guard = tracing::dispatcher::set_default(&self.dispatch); - emit_events(); - } - - // Flush and get bytes - let otlp_bytes = self.encoder.lock().unwrap().flush(); - - // Format the bytes - let (writer, output_buffer) = TestWriter::new_shared(); - let formatter = OtlpBytesFormattingLayer::new(writer) - .with_ansi(false) - .with_timestamp(false) - .with_thread_names(false); - - let _ = formatter.format_otlp_bytes(&otlp_bytes); - - let output = output_buffer.lock().unwrap(); - String::from_utf8_lossy(&output).to_string() - } - } - - /// Test formatting a simple INFO message - #[test] - fn test_format_simple_info_message() { - let harness = TestHarness::new(&[("service.name", "my-service")]); - - let output = harness.capture_and_format(|| { - tracing::info!(target: "my_module", "Test message"); - }); - - assert!(output.contains("INFO"), "Should contain INFO level: {}", output); - assert!(output.contains("my_module"), "Should contain target: {}", output); - assert!(output.contains("Test message"), "Should contain message: {}", output); - } - - /// Test formatting an event with attributes - #[test] - fn test_format_event_with_attributes() { - let harness = TestHarness::new(&[("service.name", "attr-test")]); - - let output = harness.capture_and_format(|| { - tracing::warn!(target: "server", port = 8080, host = "localhost", "Server starting"); - }); - - assert!(output.contains("WARN"), "Should contain WARN level: {}", output); - assert!(output.contains("server"), "Should contain target: {}", output); - assert!(output.contains("Server starting"), "Should contain message: {}", output); - assert!(output.contains("port=8080"), "Should contain port attribute: {}", output); - assert!(output.contains("host=localhost"), "Should contain host attribute: {}", output); - } - - /// Test formatting multiple events with different levels - #[test] - fn test_format_multiple_levels() { - let harness = TestHarness::new(&[]); - - let output = harness.capture_and_format(|| { - tracing::trace!(target: "app", "Trace message"); - tracing::debug!(target: "app", "Debug message"); - tracing::info!(target: "app", "Info message"); - tracing::warn!(target: "app", "Warn message"); - tracing::error!(target: "app", "Error message"); - }); - - // Check all levels are present - assert!(output.contains("TRACE"), "Should contain TRACE: {}", output); - assert!(output.contains("DEBUG"), "Should contain DEBUG: {}", output); - assert!(output.contains("INFO"), "Should contain INFO: {}", output); - assert!(output.contains("WARN"), "Should contain WARN: {}", output); - assert!(output.contains("ERROR"), "Should contain ERROR: {}", output); - - // Check all messages are present - assert!(output.contains("Trace message"), "Should contain trace message: {}", output); - assert!(output.contains("Debug message"), "Should contain debug message: {}", output); - assert!(output.contains("Info message"), "Should contain info message: {}", output); - assert!(output.contains("Warn message"), "Should contain warn message: {}", output); - assert!(output.contains("Error message"), "Should contain error message: {}", output); - } - - /// Test that different targets create separate scope batches - #[test] - fn test_different_targets_different_scopes() { - let harness = TestHarness::new(&[("service.name", "multi-scope-test")]); - - let output = harness.capture_and_format(|| { - tracing::info!(target: "module_a", "From module A"); - tracing::info!(target: "module_b", "From module B"); - tracing::info!(target: "module_a", "Another from A"); - }); - - // Check both modules appear - assert!(output.contains("module_a"), "Should contain module_a: {}", output); - assert!(output.contains("module_b"), "Should contain module_b: {}", output); - assert!(output.contains("From module A"), "Should contain message A: {}", output); - assert!(output.contains("From module B"), "Should contain message B: {}", output); - assert!(output.contains("Another from A"), "Should contain second A message: {}", output); - } - - /// Test formatting with various attribute types - #[test] - fn test_format_various_attribute_types() { - let harness = TestHarness::new(&[]); - - let output = harness.capture_and_format(|| { - tracing::info!( - target: "types", - string_val = "hello", - int_val = 42i64, - bool_val = true, - float_val = 3.14f64, - "Testing attribute types" - ); - }); - - assert!(output.contains("string_val=hello"), "Should contain string attr: {}", output); - assert!(output.contains("int_val=42"), "Should contain int attr: {}", output); - assert!(output.contains("bool_val=true"), "Should contain bool attr: {}", output); - // Float might be formatted differently, just check it's there - assert!(output.contains("float_val="), "Should contain float attr: {}", output); - assert!(output.contains("Testing attribute types"), "Should contain message: {}", output); - } - - /// Test the timestamp formatter - #[test] - fn test_format_iso8601_timestamp() { - // Test a known timestamp: 2024-01-01T00:00:00.000000Z - // Unix epoch for 2024-01-01T00:00:00Z is 1704067200 seconds - let nanos = 1704067200_000_000_000u64; - let formatted = format_iso8601_timestamp(nanos); - - // The timestamp should be roughly correct (our simple algorithm isn't perfect) - assert!(formatted.starts_with("20"), "Should start with century: {}", formatted); - assert!(formatted.ends_with("Z"), "Should end with Z: {}", formatted); - assert!(formatted.contains("T"), "Should have T separator: {}", formatted); - } - - /// Test severity to level string conversion - #[test] - fn test_severity_to_level_str() { - assert_eq!(severity_to_level_str(1), "TRACE"); - assert_eq!(severity_to_level_str(5), "DEBUG"); - assert_eq!(severity_to_level_str(9), "INFO"); - assert_eq!(severity_to_level_str(13), "WARN"); - assert_eq!(severity_to_level_str(17), "ERROR"); - } -} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs deleted file mode 100644 index c7ecacbdaf..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/subscriber.rs +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//! Tracing subscriber layer that captures events as TracingLogRecord instances. -//! -//! This layer integrates with the tracing-subscriber ecosystem, allowing us to: -//! 1. Capture all tracing events (from tokio macros and 3rd-party libraries) -//! 2. Convert them to TracingLogRecord (which implements LogRecordView) -//! 3. Encode them using our stateful OTLP encoder -//! -//! The layer uses a visitor pattern to extract field values from events and -//! constructs TracingLogRecord instances that can be encoded directly. -//! -//! **Important**: This layer is designed for single-threaded use. The callback -//! should encode the log record to OTLP bytes immediately - only bytes should -//! cross thread boundaries. -//! -//! **Zero-copy design**: All attribute keys and values are borrowed with lifetimes -//! tied to the tracing event callback. The `FieldVisitor` uses a string arena to -//! hold any formatted strings that need allocation. - -use super::log_record::{TracingAnyValue, TracingAttribute, TracingLogRecord}; -use std::time::{SystemTime, UNIX_EPOCH}; -use tracing::span::Attributes; -use tracing::{Event, Id, Subscriber}; -use tracing_subscriber::layer::{Context, Layer}; -use tracing_subscriber::registry::LookupSpan; - -/// Owned value type for span storage (spans outlive individual events). -#[derive(Clone)] -enum OwnedValue { - Str(String), - Int(i64), - Bool(bool), - Double(f64), -} - -impl OwnedValue { - /// Convert to a borrowed TracingAnyValue given a lifetime. - fn as_borrowed(&self) -> TracingAnyValue<'_> { - match self { - OwnedValue::Str(s) => TracingAnyValue::Str(s.as_str()), - OwnedValue::Int(i) => TracingAnyValue::Int(*i), - OwnedValue::Bool(b) => TracingAnyValue::Bool(*b), - OwnedValue::Double(d) => TracingAnyValue::Double(*d), - } - } -} - -/// Tracing subscriber layer that captures events and spans as OTLP log records. -/// -/// This layer implements an unconventional approach where spans are treated as pairs -/// of log records (start/end) rather than as first-class span objects. This aligns -/// with unified dataflow architectures where all telemetry flows through a single -/// log pipeline. -/// -/// **Note**: This layer is `!Send` because it uses `RefCell` internally. The callback -/// should encode log records to OTLP bytes immediately - only bytes cross thread -/// boundaries. -pub struct OtlpTracingLayer -where - F: for<'a> Fn(TracingLogRecord<'a>) + 'static, -{ - /// Callback function that receives each TracingLogRecord - on_event: F, -} - -impl OtlpTracingLayer -where - F: for<'a> Fn(TracingLogRecord<'a>) + 'static, -{ - /// Creates a new OtlpTracingLayer with the given event handler. - /// - /// # Arguments - /// * `on_event` - Callback invoked for each tracing event, receiving a TracingLogRecord - pub fn new(on_event: F) -> Self { - Self { on_event } - } -} - -impl Layer for OtlpTracingLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, - F: for<'a> Fn(TracingLogRecord<'a>) + 'static, -{ - fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - // Get timestamp - let timestamp_nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - // Extract fields using visitor with string arena - let mut visitor = FieldVisitor::new(); - event.record(&mut visitor); - - // Build attributes from collected data - // The visitor's arena holds any allocated strings - let attributes: Vec> = visitor - .attr_keys - .iter() - .zip(visitor.attr_values.iter()) - .map(|(key, value)| TracingAttribute { - key, - value: value.as_borrowed(), - }) - .collect(); - - // Build TracingLogRecord with borrowed message - let message_ref = visitor.message.as_deref().unwrap_or(""); - let log_record = TracingLogRecord::new(event.metadata(), attributes, timestamp_nanos) - .with_body(message_ref); - - // Invoke the callback - (self.on_event)(log_record); - } - - fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &Id, _ctx: Context<'_, S>) { - // TODO: emit an event - } - - fn on_close(&self, _id: Id, _ctx: Context<'_, S>) { - // TODO: emit an event - } -} - -/// Visitor that extracts field values from a tracing event. -/// -/// This implements tracing::field::Visit to walk through all fields in an event -/// and collect them as attribute key-value pairs. -/// -/// **Zero-copy design**: Field names are `&'static str` from tracing. -/// String values that need allocation (debug formatting) are stored in owned form. -struct FieldVisitor { - /// Attribute keys (all &'static str from field.name()) - attr_keys: Vec<&'static str>, - /// Attribute values (owned to support debug formatting) - attr_values: Vec, - /// The message/body (from the "message" field, if present) - message: Option, -} - -impl FieldVisitor { - fn new() -> Self { - Self { - attr_keys: Vec::new(), - attr_values: Vec::new(), - message: None, - } - } -} - -impl tracing::field::Visit for FieldVisitor { - fn record_f64(&mut self, field: &tracing::field::Field, value: f64) { - if field.name() == "message" { - return; - } - self.attr_keys.push(field.name()); - self.attr_values.push(OwnedValue::Double(value)); - } - - fn record_i64(&mut self, field: &tracing::field::Field, value: i64) { - if field.name() == "message" { - return; - } - self.attr_keys.push(field.name()); - self.attr_values.push(OwnedValue::Int(value)); - } - - fn record_u64(&mut self, field: &tracing::field::Field, value: u64) { - if field.name() == "message" { - return; - } - self.attr_keys.push(field.name()); - self.attr_values.push(OwnedValue::Int(value as i64)); - } - - fn record_bool(&mut self, field: &tracing::field::Field, value: bool) { - if field.name() == "message" { - return; - } - self.attr_keys.push(field.name()); - self.attr_values.push(OwnedValue::Bool(value)); - } - - fn record_str(&mut self, field: &tracing::field::Field, value: &str) { - if field.name() == "message" { - self.message = Some(value.to_string()); - return; - } - self.attr_keys.push(field.name()); - self.attr_values.push(OwnedValue::Str(value.to_string())); - } - - fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) { - if field.name() == "message" { - self.message = Some(format!("{:?}", value)); - return; - } - self.attr_keys.push(field.name()); - self.attr_values - .push(OwnedValue::Str(format!("{:?}", value))); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::cell::RefCell; - use tracing_subscriber::prelude::*; - - #[test] - fn test_otlp_layer_captures_events() { - use otap_df_pdata::views::logs::LogRecordView; - - // Thread-local storage for captured log records (no Send needed) - thread_local! { - static CAPTURED: RefCell, Option)>> = const { RefCell::new(Vec::new()) }; - } - - let layer = OtlpTracingLayer::new(|log_record| { - CAPTURED.with(|captured| { - captured.borrow_mut().push(( - log_record - .severity_text() - .map(|s| String::from_utf8_lossy(s).to_string()), - log_record - .event_name() - .map(|s| String::from_utf8_lossy(s).to_string()), - )); - }); - }); - - let subscriber = tracing_subscriber::registry().with(layer); - - // Use Dispatch and set_default for thread-local subscriber (no Send+Sync required) - let dispatch = tracing::Dispatch::new(subscriber); - let _guard = tracing::dispatcher::set_default(&dispatch); - - tracing::info!(name: "test.event", "Test message"); - tracing::warn!(name: "test.warning", "Warning message"); - - // Verify captured records - CAPTURED.with(|captured| { - let records = captured.borrow(); - assert_eq!(records.len(), 2); - }); - } -} diff --git a/rust/otap-dataflow/src/main.rs b/rust/otap-dataflow/src/main.rs index e242b472cf..9d753e2dab 100644 --- a/rust/otap-dataflow/src/main.rs +++ b/rust/otap-dataflow/src/main.rs @@ -107,6 +107,10 @@ fn parse_core_id_range(s: &str) -> Result { } fn main() -> Result<(), Box> { + // Initialize raw logging as early as possible so that log messages during + // config parsing and validation appear well formatted. + otap_df_telemetry::init_raw_logging(); + // Initialize rustls crypto provider (required for rustls 0.23+) // We use ring as the default provider #[cfg(feature = "experimental-tls")] From 52a96105607547dceb7e980739934a420c61bb39 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 18:08:28 -0800 Subject: [PATCH 08/92] workr out --- .../benchmarks/benches/self_tracing/main.rs | 65 +- .../otap-dataflow/crates/telemetry/src/lib.rs | 51 -- .../src/self_tracing/compact_formatter.rs | 664 ------------------ .../telemetry/src/self_tracing/formatter.rs | 503 +++++++++++++ .../crates/telemetry/src/self_tracing/mod.rs | 117 ++- 5 files changed, 625 insertions(+), 775 deletions(-) delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs create mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs diff --git a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs index 9bd2484362..b380100ff6 100644 --- a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs +++ b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs @@ -5,18 +5,11 @@ //! Benchmarks for the compact log formatter. //! -//! # Benchmark Design -//! -//! These benchmarks emit a single tracing event but perform N encoding/formatting -//! operations inside the callback. This amortizes tracing dispatch overhead to noise, -//! allowing us to measure the true cost of encoding and formatting. -//! -//! # Interpreting Results +//! These benchmarks emit a single tracing event but perform N +//! encoding or encoding-and-formatting operations inside the callback //! //! Benchmark names follow the pattern: `group/description/N_events` //! -//! To get per-event cost: `measured_time / N` -//! //! Example: `compact_encode/3_attrs/1000_events` = 300 µs → 300 ns per event use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; @@ -38,12 +31,6 @@ use tikv_jemallocator::Jemalloc; #[global_allocator] static GLOBAL: Jemalloc = Jemalloc; -// ============================================================================= -// ENCODE ONLY BENCHMARK -// Emit 1 event, encode body+attrs N times (partial OTLP) -// ============================================================================= - -/// Layer that encodes body+attrs N times to measure pure encoding cost. struct EncodeOnlyLayer { iterations: usize, } @@ -66,9 +53,8 @@ where } } -/// Benchmark: Pure encoding cost (body+attrs to partial OTLP bytes) -fn bench_encode_only(c: &mut Criterion) { - let mut group = c.benchmark_group("compact_encode"); +fn bench_encode(c: &mut Criterion) { + let mut group = c.benchmark_group("encode"); for iterations in [100, 1000].iter() { let _ = group.bench_with_input( @@ -98,12 +84,6 @@ fn bench_encode_only(c: &mut Criterion) { group.finish(); } -// ============================================================================= -// FORMAT ONLY BENCHMARK -// Encode once, format N times -// ============================================================================= - -/// Layer that encodes once then formats N times. struct FormatOnlyLayer { iterations: usize, } @@ -146,17 +126,15 @@ where body_attrs_bytes, }; - // Format N times (without ANSI colors for consistent measurement) for _ in 0..self.iterations { - let line = format_log_record(&record, &cache, false); + let line = format_log_record(&record, &cache, true); let _ = std::hint::black_box(line); } } } -/// Benchmark: Pure formatting cost (format already-encoded record to string) -fn bench_format_only(c: &mut Criterion) { - let mut group = c.benchmark_group("compact_format"); +fn bench_format(c: &mut Criterion) { + let mut group = c.benchmark_group("format"); for iterations in [100, 1000].iter() { let _ = group.bench_with_input( @@ -186,11 +164,6 @@ fn bench_format_only(c: &mut Criterion) { group.finish(); } -// ============================================================================= -// ENCODE + FORMAT BENCHMARK (full pipeline) -// ============================================================================= - -/// Layer that encodes and formats N times (the full pipeline). struct EncodeFormatLayer { iterations: usize, } @@ -234,15 +207,14 @@ where body_attrs_bytes, }; - let line = format_log_record(&record, &cache, false); + let line = format_log_record(&record, &cache, true); let _ = std::hint::black_box(line); } } } -/// Benchmark: Full encode + format pipeline -fn bench_encode_format(c: &mut Criterion) { - let mut group = c.benchmark_group("compact_encode_format"); +fn bench_encode_and_format(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_and_format"); for iterations in [100, 1000].iter() { let _ = group.bench_with_input( @@ -272,19 +244,14 @@ fn bench_encode_format(c: &mut Criterion) { group.finish(); } -// ============================================================================= -// ATTRIBUTE COMPLEXITY BENCHMARK -// ============================================================================= - -/// Benchmark: Encoding cost with different attribute counts -fn bench_by_attr_count(c: &mut Criterion) { - let mut group = c.benchmark_group("compact_format_by_attrs"); +fn bench_encode_attrs(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_attrs"); let iterations = 1000; // No attributes let _ = group.bench_function("0_attrs/1000_events", |b| { b.iter(|| { - let layer = EncodeFormatLayer::new(iterations); + let layer = EncodeOnlyLayer::new(iterations); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -299,7 +266,7 @@ fn bench_by_attr_count(c: &mut Criterion) { // 3 attributes let _ = group.bench_function("3_attrs/1000_events", |b| { b.iter(|| { - let layer = EncodeFormatLayer::new(iterations); + let layer = EncodeOnlyLayer::new(iterations); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -314,7 +281,7 @@ fn bench_by_attr_count(c: &mut Criterion) { // 10 attributes let _ = group.bench_function("10_attrs/1000_events", |b| { b.iter(|| { - let layer = EncodeFormatLayer::new(iterations); + let layer = EncodeOnlyLayer::new(iterations); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); @@ -348,7 +315,7 @@ mod bench_entry { criterion_group!( name = benches; config = Criterion::default(); - targets = bench_encode_only, bench_format_only, bench_encode_format, bench_by_attr_count + targets = bench_encode, bench_format, bench_encode_and_format, bench_encode_attrs ); } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index cb47a7939a..acb0f51a50 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -28,8 +28,6 @@ use crate::error::Error; use crate::registry::MetricsRegistryHandle; use otap_df_config::pipeline::service::telemetry::TelemetryConfig; use tokio_util::sync::CancellationToken; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::util::SubscriberInitExt; pub mod attributes; pub mod collector; @@ -51,60 +49,11 @@ pub mod semconv; #[doc(hidden)] pub use internal_events::_private; -// Re-export tracing span macros and types for crates that need span instrumentation. -// This allows dependent crates to use spans without adding tracing as a direct dependency. -// Re-exported with otel_ prefix for naming consistency with otel_info!, otel_warn!, etc. -pub use tracing::Span as OtelSpan; -pub use tracing::debug_span as otel_debug_span; -pub use tracing::error_span as otel_error_span; -pub use tracing::info_span as otel_info_span; -pub use tracing::trace_span as otel_trace_span; -pub use tracing::warn_span as otel_warn_span; - -// Re-export raw logging initialization for early setup -pub use self_tracing::compact_formatter::CompactFormatterLayer; - // TODO This should be #[cfg(test)], but something is preventing it from working. // The #[cfg(test)]-labeled otap_batch_processor::test_helpers::from_config // can't load this module unless I remove #[cfg(test)]! See #1304. pub mod testing; -/// Initialize raw logging as early as possible. -/// -/// This installs a minimal tracing subscriber with our `CompactFormatterLayer` that -/// formats log events to stderr. This should be called at the very start of `main()` -/// before any configuration parsing or other initialization that might emit logs. -/// -/// Raw logging is the most basic form of console output, used as a safe configuration -/// early in the lifetime of a process. See `ARCHITECTURE.md` for details. -/// -/// # Panics -/// -/// This function will panic if a global subscriber has already been set. -/// Use `try_init_raw_logging()` if you need to handle this case gracefully. -pub fn init_raw_logging() { - try_init_raw_logging().expect("Failed to initialize raw logging subscriber"); -} - -/// Try to initialize raw logging, returning an error if a subscriber is already set. -/// -/// This is useful in tests or other contexts where a subscriber may already exist. -pub fn try_init_raw_logging() -> Result<(), Error> { - // If RUST_LOG is set, use it for fine-grained control. - // Otherwise, default to INFO level with some noisy dependencies silenced. - let filter = tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { - tracing_subscriber::EnvFilter::new("info,h2=off,hyper=off") - }); - - let layer = CompactFormatterLayer::stderr(); - - tracing_subscriber::registry() - .with(filter) - .with(layer) - .try_init() - .map_err(|e| Error::TracingInitError(e.to_string())) -} - /// The main telemetry system that aggregates and reports metrics. pub struct MetricsSystem { /// The metrics registry that holds all registered metrics (data + metadata). diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs deleted file mode 100644 index 9d41bc37ed..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/compact_formatter.rs +++ /dev/null @@ -1,664 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//! Compact log formatter - a `fmt::layer()` alternative using CompactLogRRecord -//! -//! This module provides a lightweight formatting layer for tokio-tracing events -//! that outputs human-readable log lines to stdout/stderr. It uses the same -//! `CompactLogRecord` structure that can later be extended for OTLP encoding. -//! -//! # Design -//! -//! The key insight is to separate: -//! - **Structural fields** (timestamp, severity, callsite_id) - kept as cheap values -//! - **Borrowed data** (body, attributes) - encoded to bytes during event capture -//! - **Static callsite info** (target, name, file, line) - cached at registration time -//! -//! This design allows for immediate formatting and output while preserving the -//! option to accumulate and batch-encode to OTLP later. -//! -//! # OTLP View Integration -//! -//! For decoding the pre-encoded body and attributes bytes, we reuse the pdata -//! View types (`RawAnyValue`, `RawKeyValue`) which provide zero-copy parsing -//! of OTLP protobuf bytes. - -use bytes::Bytes; -use std::collections::HashMap; -use std::io::{self, Write}; -use std::sync::RwLock; -use std::time::{SystemTime, UNIX_EPOCH}; - -use otap_df_pdata::proto::consts::field_num::logs::{LOG_RECORD_ATTRIBUTES, LOG_RECORD_BODY}; -use otap_df_pdata::proto::consts::wire_types; -use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; -use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; -use otap_df_pdata::views::otlp::bytes::decode::read_varint; - -use tracing::callsite::Identifier; -use tracing::span::{Attributes, Record}; -use tracing::{Event, Level, Subscriber}; -use tracing_subscriber::layer::{Context, Layer}; -use tracing_subscriber::registry::LookupSpan; - -use super::direct_encoder::{DirectFieldVisitor, ProtoBuffer}; - -// ============================================================================ -// Core Data Structures -// ============================================================================ - -/// A compact log record with structural metadata and pre-encoded body/attributes. -/// -/// Cheap-to-copy fields are kept in structural form for sorting/filtering. -/// Only borrowed data (body, attributes) is encoded to bytes. -/// Callsite details (target/name/file/line) are cached separately. -#[derive(Debug, Clone)] -pub struct CompactLogRecord { - /// Callsite identifier - used to look up cached callsite info - pub callsite_id: Identifier, - - /// Timestamp in nanoseconds since Unix epoch - pub timestamp_ns: u64, - - /// Severity number: 1=TRACE, 5=DEBUG, 9=INFO, 13=WARN, 17=ERROR - pub severity_number: u8, - - /// Severity text - &'static str from Level::as_str() - pub severity_text: &'static str, - - /// Pre-encoded body and attributes (OTLP format for body field 5 + attrs field 6) - pub body_attrs_bytes: Bytes, -} - -/// Cached callsite information, populated via `register_callsite` hook. -#[derive(Debug, Clone)] -pub struct CachedCallsite { - /// Target module path - &'static from Metadata - pub target: &'static str, - - /// Event name - &'static from Metadata - pub name: &'static str, - - /// Source file - &'static from Metadata - pub file: Option<&'static str>, - - /// Source line - pub line: Option, -} - -/// Cache of callsite information, keyed by `Identifier`. -#[derive(Debug, Default)] -pub struct CallsiteCache { - callsites: HashMap, -} - -impl CallsiteCache { - /// Create a new empty cache. - pub fn new() -> Self { - Self::default() - } - - /// Register a callsite from its metadata. - pub fn register(&mut self, metadata: &'static tracing::Metadata<'static>) { - let id = metadata.callsite(); - let _ = self.callsites.entry(id).or_insert_with(|| CachedCallsite { - target: metadata.target(), - name: metadata.name(), - file: metadata.file(), - line: metadata.line(), - }); - } - - /// Get cached callsite info by identifier. - pub fn get(&self, id: &Identifier) -> Option<&CachedCallsite> { - self.callsites.get(id) - } -} - -// ============================================================================ -// Formatting -// ============================================================================ - -/// Format a CompactLogRecord as a human-readable string. -/// -/// Output format: `2026-01-06T10:30:45.123Z INFO target::name (file.rs:42): body [attr=value, ...]` -pub fn format_log_record( - record: &CompactLogRecord, - cache: &CallsiteCache, - use_ansi: bool, -) -> String { - let callsite = cache.get(&record.callsite_id); - - let event_name = match callsite { - Some(cs) => format_event_name(cs.target, cs.name, cs.file, cs.line), - None => "".to_string(), - }; - - let body_attrs = format_body_attrs(&record.body_attrs_bytes); - - if use_ansi { - let level_color = level_color(record.severity_text); - format!( - "{}{}{} {}{:5}{} {}{}{}: {}", - ANSI_DIM, - format_timestamp(record.timestamp_ns), - ANSI_RESET, - level_color, - record.severity_text, - ANSI_RESET, - ANSI_BOLD, - event_name, - ANSI_RESET, - body_attrs, - ) - } else { - format!( - "{} {:5} {}: {}", - format_timestamp(record.timestamp_ns), - record.severity_text, - event_name, - body_attrs, - ) - } -} - -/// Format callsite details as event_name string. -/// -/// Format: "target::name (file.rs:42)" or "target::name" if file/line unavailable. -#[inline] -fn format_event_name(target: &str, name: &str, file: Option<&str>, line: Option) -> String { - match (file, line) { - (Some(file), Some(line)) => format!("{}::{} ({}:{})", target, name, file, line), - _ => format!("{}::{}", target, name), - } -} - -/// Format nanosecond timestamp as ISO 8601 (UTC). -fn format_timestamp(nanos: u64) -> String { - let secs = nanos / 1_000_000_000; - let subsec_millis = (nanos % 1_000_000_000) / 1_000_000; - - // Convert to datetime components - // Days since Unix epoch - let days = secs / 86400; - let time_of_day = secs % 86400; - - let hours = time_of_day / 3600; - let minutes = (time_of_day % 3600) / 60; - let seconds = time_of_day % 60; - - // Calculate year/month/day from days since epoch (1970-01-01) - let (year, month, day) = days_to_ymd(days as i64); - - format!( - "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", - year, month, day, hours, minutes, seconds, subsec_millis - ) -} - -/// Convert days since Unix epoch to (year, month, day). -fn days_to_ymd(days: i64) -> (i32, u32, u32) { - // Algorithm from Howard Hinnant's date library - let z = days + 719468; - let era = if z >= 0 { z } else { z - 146096 } / 146097; - let doe = (z - era * 146097) as u32; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; - let y = yoe as i64 + era * 400; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let d = doy - (153 * mp + 2) / 5 + 1; - let m = if mp < 10 { mp + 3 } else { mp - 9 }; - let y = if m <= 2 { y + 1 } else { y }; - - (y as i32, m, d) -} - -/// Format body+attrs bytes as readable string. -/// -/// Uses the pdata View types (`RawAnyValue`, `RawKeyValue`) for zero-copy -/// parsing of the OTLP protobuf bytes. This is consistent with the decoding -/// approach used in `otlp_bytes_formatter.rs`. -fn format_body_attrs(bytes: &Bytes) -> String { - if bytes.is_empty() { - return String::new(); - } - - // The bytes contain LogRecord fields: - // - field 5 (LOG_RECORD_BODY): AnyValue message - // - field 6 (LOG_RECORD_ATTRIBUTES): repeated KeyValue messages - - let mut body_str = String::new(); - let mut attrs = Vec::new(); - let data = bytes.as_ref(); - let mut pos = 0; - - while pos < data.len() { - // Read field tag - let (tag, next_pos) = match read_varint(data, pos) { - Some(v) => v, - None => break, - }; - pos = next_pos; - - let field_num = tag >> 3; - let wire_type = tag & 0x7; - - if wire_type != wire_types::LEN { - // Skip non-length-delimited fields (shouldn't happen for body/attrs) - break; - } - - // Read length-delimited content - let (len, next_pos) = match read_varint(data, pos) { - Some(v) => v, - None => break, - }; - pos = next_pos; - let end = pos + len as usize; - - if end > data.len() { - break; - } - - let field_bytes = &data[pos..end]; - - if field_num == LOG_RECORD_BODY { - // Body: parse as AnyValue using pdata View - let any_value = RawAnyValue::new(field_bytes); - body_str = format_any_value(&any_value); - } else if field_num == LOG_RECORD_ATTRIBUTES { - // Attribute: parse as KeyValue using pdata View - let kv = RawKeyValue::new(field_bytes); - let key = String::from_utf8_lossy(kv.key()).to_string(); - let value = match kv.value() { - Some(v) => format_any_value(&v), - None => "".to_string(), - }; - attrs.push(format!("{}={}", key, value)); - } - - pos = end; - } - - if !attrs.is_empty() { - body_str.push_str(" ["); - body_str.push_str(&attrs.join(", ")); - body_str.push(']'); - } - - body_str -} - -/// Format an AnyValue for display. -/// -/// This is based on the same logic used in `otlp_bytes_formatter.rs`, providing -/// consistent formatting across the crate. -fn format_any_value<'a>(value: &impl AnyValueView<'a>) -> String { - match value.value_type() { - ValueType::String => { - if let Some(s) = value.as_string() { - String::from_utf8_lossy(s).to_string() - } else { - String::new() - } - } - ValueType::Int64 => { - if let Some(i) = value.as_int64() { - i.to_string() - } else { - String::new() - } - } - ValueType::Bool => { - if let Some(b) = value.as_bool() { - b.to_string() - } else { - String::new() - } - } - ValueType::Double => { - if let Some(d) = value.as_double() { - format!("{:.6}", d) - } else { - String::new() - } - } - ValueType::Bytes => { - if let Some(bytes) = value.as_bytes() { - format!("{:?}", bytes) - } else { - String::new() - } - } - ValueType::Array => { - if let Some(array_iter) = value.as_array() { - let parts: Vec<_> = array_iter.map(|item| format_any_value(&item)).collect(); - format!("[{}]", parts.join(", ")) - } else { - "[]".to_string() - } - } - ValueType::KeyValueList => { - if let Some(kvlist_iter) = value.as_kvlist() { - let parts: Vec<_> = kvlist_iter - .map(|kv| { - let key_str = String::from_utf8_lossy(kv.key()).to_string(); - match kv.value() { - Some(val) => format!("{}={}", key_str, format_any_value(&val)), - None => key_str, - } - }) - .collect(); - format!("{{{}}}", parts.join(", ")) - } else { - "{}".to_string() - } - } - ValueType::Empty => String::new(), - } -} - -// ============================================================================ -// Writer -// ============================================================================ - -/// Output target for log lines. -#[derive(Debug, Clone, Copy, Default)] -pub enum OutputTarget { - /// Write to standard error. - #[default] - Stderr, - /// Write to standard output. - Stdout, -} - -/// Simple writer that outputs log lines to stdout or stderr. -#[derive(Debug)] -pub struct SimpleWriter { - target: OutputTarget, - use_ansi: bool, -} - -impl Default for SimpleWriter { - fn default() -> Self { - Self::stderr() - } -} - -// ANSI color codes -const ANSI_RESET: &str = "\x1b[0m"; -const ANSI_RED: &str = "\x1b[31m"; -const ANSI_YELLOW: &str = "\x1b[33m"; -const ANSI_GREEN: &str = "\x1b[32m"; -const ANSI_BLUE: &str = "\x1b[34m"; -const ANSI_MAGENTA: &str = "\x1b[35m"; -const ANSI_DIM: &str = "\x1b[2m"; -const ANSI_BOLD: &str = "\x1b[1m"; - -impl SimpleWriter { - /// Create a writer that outputs to stdout with ANSI colors enabled. - pub fn stdout() -> Self { - Self { - target: OutputTarget::Stdout, - use_ansi: true, - } - } - - /// Create a writer that outputs to stderr with ANSI colors enabled. - pub fn stderr() -> Self { - Self { - target: OutputTarget::Stderr, - use_ansi: true, - } - } - - /// Create a writer that outputs to stdout without ANSI colors. - pub fn stdout_no_color() -> Self { - Self { - target: OutputTarget::Stdout, - use_ansi: false, - } - } - - /// Create a writer that outputs to stderr without ANSI colors. - pub fn stderr_no_color() -> Self { - Self { - target: OutputTarget::Stderr, - use_ansi: false, - } - } - - /// Returns whether ANSI colors are enabled. - pub fn use_ansi(&self) -> bool { - self.use_ansi - } - - /// Write a log line (with newline). - pub fn write_line(&self, line: &str) { - match self.target { - OutputTarget::Stdout => { - let _ = writeln!(io::stdout(), "{}", line); - } - OutputTarget::Stderr => { - let _ = writeln!(io::stderr(), "{}", line); - } - } - } -} - -/// Get ANSI color code for a severity level. -#[inline] -fn level_color(level: &str) -> &'static str { - match level { - "ERROR" => ANSI_RED, - "WARN" => ANSI_YELLOW, - "INFO" => ANSI_GREEN, - "DEBUG" => ANSI_BLUE, - "TRACE" => ANSI_MAGENTA, - _ => "", - } -} - -// ============================================================================ -// Layer Implementation -// ============================================================================ - -/// A minimal formatting layer that outputs log records to stdout/stderr. -/// -/// This is a lightweight alternative to `tracing_subscriber::fmt::layer()`. -pub struct CompactFormatterLayer { - callsite_cache: RwLock, - writer: SimpleWriter, -} - -impl CompactFormatterLayer { - /// Create a new layer that writes to stderr. - pub fn new() -> Self { - Self { - callsite_cache: RwLock::new(CallsiteCache::new()), - writer: SimpleWriter::stderr(), - } - } - - /// Create a new layer that writes to stdout. - pub fn stdout() -> Self { - Self { - callsite_cache: RwLock::new(CallsiteCache::new()), - writer: SimpleWriter::stdout(), - } - } - - /// Create a new layer that writes to stderr. - pub fn stderr() -> Self { - Self::new() - } -} - -impl Default for CompactFormatterLayer { - fn default() -> Self { - Self::new() - } -} - -impl Layer for CompactFormatterLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, -{ - fn register_callsite( - &self, - metadata: &'static tracing::Metadata<'static>, - ) -> tracing::subscriber::Interest { - self.callsite_cache.write().unwrap().register(metadata); - tracing::subscriber::Interest::always() - } - - fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let metadata = event.metadata(); - - // Encode body and attributes to bytes - let body_attrs_bytes = encode_body_and_attrs(event); - - // Get current timestamp - let timestamp_ns = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - // Build compact record - let record = CompactLogRecord { - callsite_id: metadata.callsite(), - timestamp_ns, - severity_number: level_to_severity(metadata.level()), - severity_text: metadata.level().as_str(), - body_attrs_bytes, - }; - - // Format and write immediately - let cache = self.callsite_cache.read().unwrap(); - let line = format_log_record(&record, &cache, self.writer.use_ansi()); - self.writer.write_line(&line); - } - - fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans in MVP - } - - fn on_record(&self, _span: &tracing::span::Id, _values: &Record<'_>, _ctx: Context<'_, S>) { - // Not handling spans in MVP - } - - fn on_enter(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans in MVP - } - - fn on_exit(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans in MVP - } - - fn on_close(&self, _id: tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans in MVP - } -} - -/// Encode only body and attributes from an event to OTLP bytes. -pub fn encode_body_and_attrs(event: &Event<'_>) -> Bytes { - let mut buf = ProtoBuffer::with_capacity(256); - - // Visit fields to encode body (field 5) and attributes (field 6) - let mut visitor = DirectFieldVisitor::new(&mut buf); - event.record(&mut visitor); - - buf.into_bytes() -} - -/// Convert tracing Level to OTLP severity number. -fn level_to_severity(level: &Level) -> u8 { - match *level { - Level::TRACE => 1, - Level::DEBUG => 5, - Level::INFO => 9, - Level::WARN => 13, - Level::ERROR => 17, - } -} - -// ============================================================================ -// Tests -// ============================================================================ - -#[cfg(test)] -mod tests { - use super::*; - use tracing_subscriber::prelude::*; - - #[test] - fn test_format_timestamp() { - // 2026-01-06T10:30:45.123Z in nanoseconds - // Let's use a known timestamp: 2024-01-01T00:00:00.000Z - let nanos: u64 = 1704067200 * 1_000_000_000; // 2024-01-01 00:00:00 UTC - let formatted = format_timestamp(nanos); - assert_eq!(formatted, "2024-01-01T00:00:00.000Z"); - - // Test with milliseconds - let nanos_with_ms: u64 = 1704067200 * 1_000_000_000 + 123_000_000; - let formatted = format_timestamp(nanos_with_ms); - assert_eq!(formatted, "2024-01-01T00:00:00.123Z"); - } - - #[test] - fn test_days_to_ymd() { - // 1970-01-01 is day 0 - assert_eq!(days_to_ymd(0), (1970, 1, 1)); - - // 2024-01-01 is 19723 days after 1970-01-01 - assert_eq!(days_to_ymd(19723), (2024, 1, 1)); - } - - #[test] - fn test_level_to_severity() { - assert_eq!(level_to_severity(&Level::TRACE), 1); - assert_eq!(level_to_severity(&Level::DEBUG), 5); - assert_eq!(level_to_severity(&Level::INFO), 9); - assert_eq!(level_to_severity(&Level::WARN), 13); - assert_eq!(level_to_severity(&Level::ERROR), 17); - } - - #[test] - fn test_callsite_cache() { - let cache = CallsiteCache::new(); - assert!(cache.callsites.is_empty()); - } - - #[test] - fn test_simple_writer_creation() { - let _stdout = SimpleWriter::stdout(); - let _stderr = SimpleWriter::stderr(); - let _default = SimpleWriter::default(); - } - - #[test] - fn test_compact_formatter_layer_creation() { - let _layer = CompactFormatterLayer::new(); - let _stdout = CompactFormatterLayer::stdout(); - let _stderr = CompactFormatterLayer::stderr(); - let _default = CompactFormatterLayer::default(); - } - - #[test] - fn test_layer_integration() { - // Create the layer and subscriber - let layer = CompactFormatterLayer::stderr(); - let subscriber = tracing_subscriber::registry().with(layer); - - // Set as default for this thread temporarily - let dispatch = tracing::Dispatch::new(subscriber); - let _guard = tracing::dispatcher::set_default(&dispatch); - - // Emit some events - these should be formatted and written to stderr - tracing::info!("Test info message"); - tracing::warn!(count = 42, "Warning with attribute"); - tracing::error!(error = "something failed", "Error occurred"); - - // The test verifies no panics occur; actual output goes to stderr - } -} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs new file mode 100644 index 0000000000..1aa282839b --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -0,0 +1,503 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! A `fmt::layer()` alternative using self_tracing::LogRecord. + +use bytes::Bytes; +use std::io::Write; +use std::sync::RwLock; +use std::time::{SystemTime, UNIX_EPOCH}; + +use otap_df_pdata::proto::consts::field_num::logs::{LOG_RECORD_ATTRIBUTES, LOG_RECORD_BODY}; +use otap_df_pdata::proto::consts::wire_types; +use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; +use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; +use otap_df_pdata::views::otlp::bytes::decode::read_varint; + +use tracing::span::{Attributes, Record}; +use tracing::{Event, Level, Subscriber}; +use tracing_subscriber::layer::{Context, Layer as TracingLayer}; +use tracing_subscriber::registry::LookupSpan; + +use super::direct_encoder::{DirectFieldVisitor, ProtoBuffer}; +use super::{CallsiteMap, LogRecord}; + +/// Console formatter writes to stdout or stderr. +#[derive(Debug)] +pub struct ConsoleWriter { + use_ansi: bool, +} + +/// A minimal formatting layer that outputs log records to stdout/stderr. +/// +/// This is a lightweight alternative to `tracing_subscriber::fmt::layer()`. +pub struct Layer { + callsites: RwLock, + writer: ConsoleWriter, +} + +// ANSI color codes +const ANSI_RESET: &str = "\x1b[0m"; +const ANSI_RED: &str = "\x1b[31m"; +const ANSI_YELLOW: &str = "\x1b[33m"; +const ANSI_GREEN: &str = "\x1b[32m"; +const ANSI_BLUE: &str = "\x1b[34m"; +const ANSI_MAGENTA: &str = "\x1b[35m"; +const ANSI_DIM: &str = "\x1b[2m"; +const ANSI_BOLD: &str = "\x1b[1m"; + +impl Layer { + /// Return a new fomatting layer with associated writer. + pub fn new(writer: ConsoleWriter) -> Self { + Self { + callsites: RwLock::new(CallsiteMap::new()), + writer, + } + } + + /// Convert tracing Level to OTLP severity number. + fn level_to_severity(level: &Level) -> u8 { + match *level { + Level::TRACE => 1, + Level::DEBUG => 5, + Level::INFO => 9, + Level::WARN => 13, + Level::ERROR => 17, + } + } +} + +impl ConsoleWriter { + /// Create a writer that outputs to stdout without ANSI colors. + pub fn no_color() -> Self { + Self { use_ansi: false } + } + + /// Create a writer that outputs to stderr without ANSI colors. + pub fn color() -> Self { + Self { use_ansi: true } + } + + /// Format a InternalLogRecord as a human-readable string. + /// + /// Output format: `2026-01-06T10:30:45.123Z INFO target::name (file.rs:42): body [attr=value, ...]` + pub fn format_log_record(&self, record: &LogRecord, map: &CallsiteMap) -> String { + let callsite = map.get(&record.callsite_id); + + let event_name = match callsite { + Some(cs) => Self::format_event_name(cs.target, cs.name, cs.file, cs.line), + None => "".to_string(), + }; + + let body_attrs = Self::format_body_attrs(&record.body_attrs_bytes); + + if self.use_ansi { + let level_color = Self::level_color(record.severity_level); + format!( + "{}{}{} {}{:5}{} {}{}{}: {}", + ANSI_DIM, + Self::format_timestamp(record.timestamp_ns), + ANSI_RESET, + level_color, + record.severity_text, + ANSI_RESET, + ANSI_BOLD, + event_name, + ANSI_RESET, + body_attrs, + ) + } else { + format!( + "{} {:5} {}: {}", + Self::format_timestamp(record.timestamp_ns), + record.severity_text, + event_name, + body_attrs, + ) + } + } + + /// Format callsite details as event_name string. + /// + /// Format: "target::name (file.rs:42)" or "target::name" if file/line unavailable. + #[inline] + fn format_event_name( + target: &str, + name: &str, + file: Option<&str>, + line: Option, + ) -> String { + match (file, line) { + (Some(file), Some(line)) => format!("{}::{} ({}:{})", target, name, file, line), + _ => format!("{}::{}", target, name), + } + } + + /// Format nanosecond timestamp as ISO 8601 (UTC). + fn format_timestamp(nanos: u64) -> String { + let secs = nanos / 1_000_000_000; + let subsec_millis = (nanos % 1_000_000_000) / 1_000_000; + + // Convert to datetime components + // Days since Unix epoch + let days = secs / 86400; + let time_of_day = secs % 86400; + + let hours = time_of_day / 3600; + let minutes = (time_of_day % 3600) / 60; + let seconds = time_of_day % 60; + + // Calculate year/month/day from days since epoch (1970-01-01) + let (year, month, day) = Self::days_to_ymd(days as i64); + + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", + year, month, day, hours, minutes, seconds, subsec_millis + ) + } + + /// Convert days since Unix epoch to (year, month, day). + fn days_to_ymd(days: i64) -> (i32, u32, u32) { + // Algorithm from Howard Hinnant's date library + let z = days + 719468; + let era = if z >= 0 { z } else { z - 146096 } / 146097; + let doe = (z - era * 146097) as u32; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe as i64 + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + + (y as i32, m, d) + } + + /// Format body+attrs bytes as readable string. + /// + /// Uses the pdata View types (`RawAnyValue`, `RawKeyValue`) for zero-copy + /// parsing of the OTLP protobuf bytes. This is consistent with the decoding + /// approach used in `otlp_bytes_formatter.rs`. + fn format_body_attrs(bytes: &Bytes) -> String { + if bytes.is_empty() { + return String::new(); + } + + // The bytes contain LogRecord fields: + // - field 5 (LOG_RECORD_BODY): AnyValue message + // - field 6 (LOG_RECORD_ATTRIBUTES): repeated KeyValue messages + + let mut body_str = String::new(); + let mut attrs = Vec::new(); + let data = bytes.as_ref(); + let mut pos = 0; + + while pos < data.len() { + // Read field tag + let (tag, next_pos) = match read_varint(data, pos) { + Some(v) => v, + None => break, + }; + pos = next_pos; + + let field_num = tag >> 3; + let wire_type = tag & 0x7; + + if wire_type != wire_types::LEN { + // Skip non-length-delimited fields (shouldn't happen for body/attrs) + break; + } + + // Read length-delimited content + let (len, next_pos) = match read_varint(data, pos) { + Some(v) => v, + None => break, + }; + pos = next_pos; + let end = pos + len as usize; + + if end > data.len() { + break; + } + + let field_bytes = &data[pos..end]; + + if field_num == LOG_RECORD_BODY { + // Body: parse as AnyValue using pdata View + let any_value = RawAnyValue::new(field_bytes); + body_str = Self::format_any_value(&any_value); + } else if field_num == LOG_RECORD_ATTRIBUTES { + // Attribute: parse as KeyValue using pdata View + let kv = RawKeyValue::new(field_bytes); + let key = String::from_utf8_lossy(kv.key()).to_string(); + let value = match kv.value() { + Some(v) => Self::format_any_value(&v), + None => "".to_string(), + }; + attrs.push(format!("{}={}", key, value)); + } + + pos = end; + } + + if !attrs.is_empty() { + body_str.push_str(" ["); + body_str.push_str(&attrs.join(", ")); + body_str.push(']'); + } + + body_str + } + + /// Format an AnyValue for display. + /// + /// This is based on the same logic used in `otlp_bytes_formatter.rs`, providing + /// consistent formatting across the crate. + fn format_any_value<'a>(value: &impl AnyValueView<'a>) -> String { + match value.value_type() { + ValueType::String => { + if let Some(s) = value.as_string() { + String::from_utf8_lossy(s).to_string() + } else { + String::new() + } + } + ValueType::Int64 => { + if let Some(i) = value.as_int64() { + i.to_string() + } else { + String::new() + } + } + ValueType::Bool => { + if let Some(b) = value.as_bool() { + b.to_string() + } else { + String::new() + } + } + ValueType::Double => { + if let Some(d) = value.as_double() { + format!("{:.6}", d) + } else { + String::new() + } + } + ValueType::Bytes => { + if let Some(bytes) = value.as_bytes() { + format!("{:?}", bytes) + } else { + String::new() + } + } + ValueType::Array => { + if let Some(array_iter) = value.as_array() { + let parts: Vec<_> = array_iter + .map(|item| Self::format_any_value(&item)) + .collect(); + format!("[{}]", parts.join(", ")) + } else { + "[]".to_string() + } + } + ValueType::KeyValueList => { + if let Some(kvlist_iter) = value.as_kvlist() { + let parts: Vec<_> = kvlist_iter + .map(|kv| { + let key_str = String::from_utf8_lossy(kv.key()).to_string(); + match kv.value() { + Some(val) => { + format!("{}={}", key_str, Self::format_any_value(&val)) + } + None => key_str, + } + }) + .collect(); + format!("{{{}}}", parts.join(", ")) + } else { + "{}".to_string() + } + } + ValueType::Empty => String::new(), + } + } + + /// Write a log line + fn write_line(&self, level: u8, line: &str) { + // Ignore erorr + let _error = if level >= 13 { + std::io::stderr().write(line.as_bytes()) + } else { + std::io::stdout().write(line.as_bytes()) + }; + } + + /// Get ANSI color code for a severity level. + #[inline] + fn level_color(level: u8) -> &'static str { + if level >= 17 { + ANSI_RED + } else if level >= 13 { + ANSI_YELLOW + } else if level >= 9 { + ANSI_GREEN + } else if level >= 5 { + ANSI_BLUE + } else { + ANSI_MAGENTA + } + } +} + +// ============================================================================ +// Layer Implementation +// ============================================================================ + +impl TracingLayer for Layer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn register_callsite( + &self, + metadata: &'static tracing::Metadata<'static>, + ) -> tracing::subscriber::Interest { + self.callsites.write().unwrap().register(metadata); + tracing::subscriber::Interest::always() + } + + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + let metadata = event.metadata(); + + // Encode body and attributes to bytes + let body_attrs_bytes = encode_body_and_attrs(event); + + // Get current timestamp + let timestamp_ns = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + // Build compact record + let record = LogRecord { + callsite_id: metadata.callsite(), + timestamp_ns, + severity_level: Self::level_to_severity(metadata.level()), + severity_text: metadata.level().as_str(), + body_attrs_bytes, + }; + + // Format and write immediately + let map = self.callsites.read().unwrap(); + let line = self.writer.format_log_record(&record, &map); + self.writer.write_line(record.severity_level, &line); + } + + fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans + } + + fn on_record(&self, _span: &tracing::span::Id, _values: &Record<'_>, _ctx: Context<'_, S>) { + // Not handling spans + } + + fn on_enter(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans + } + + fn on_exit(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans + } + + fn on_close(&self, _id: tracing::span::Id, _ctx: Context<'_, S>) { + // Not handling spans + } +} + +/// Encode only body and attributes from an event to OTLP bytes. +pub fn encode_body_and_attrs(event: &Event<'_>) -> Bytes { + let mut buf = ProtoBuffer::with_capacity(256); + + // Visit fields to encode body (field 5) and attributes (field 6) + let mut visitor = DirectFieldVisitor::new(&mut buf); + event.record(&mut visitor); + + buf.into_bytes() +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use tracing_subscriber::prelude::*; + + #[test] + fn test_format_timestamp() { + // 2026-01-06T10:30:45.123Z in nanoseconds + // Let's use a known timestamp: 2024-01-01T00:00:00.000Z + let nanos: u64 = 1704067200 * 1_000_000_000; // 2024-01-01 00:00:00 UTC + let formatted = ConsoleWriter::format_timestamp(nanos); + assert_eq!(formatted, "2024-01-01T00:00:00.000Z"); + + // Test with milliseconds + let nanos_with_ms: u64 = 1704067200 * 1_000_000_000 + 123_000_000; + let formatted = ConsoleWriter::format_timestamp(nanos_with_ms); + assert_eq!(formatted, "2024-01-01T00:00:00.123Z"); + } + + #[test] + fn test_days_to_ymd() { + // 1970-01-01 is day 0 + assert_eq!(ConsoleWriter::days_to_ymd(0), (1970, 1, 1)); + + // 2024-01-01 is 19723 days after 1970-01-01 + assert_eq!(ConsoleWriter::days_to_ymd(19723), (2024, 1, 1)); + } + + #[test] + fn test_level_to_severity() { + assert_eq!(ConsoleWriter::level_to_severity(&Level::TRACE), 1); + assert_eq!(ConsoleWriter::level_to_severity(&Level::DEBUG), 5); + assert_eq!(ConsoleWriter::level_to_severity(&Level::INFO), 9); + assert_eq!(ConsoleWriter::level_to_severity(&Level::WARN), 13); + assert_eq!(ConsoleWriter::level_to_severity(&Level::ERROR), 17); + } + + #[test] + fn test_callsites() { + let map = CallsiteMap::new(); + assert!(map.callsites.is_empty()); + } + + #[test] + fn test_simple_writer_creation() { + let _stdout = ConsoleWriter::color(); + let _stderr = ConsoleWriter::no_color(); + } + + #[test] + fn test_formatter_layer_creation() { + let _color = Layer::new(ConsoleWriter::color()); + let _nocolor = Layer::new(ConsoleWriter::no_color()); + } + + #[test] + fn test_layer_integration() { + // Create the layer and subscriber + let layer = Layer::stderr(); + let subscriber = tracing_subscriber::registry().with(layer); + + // Set as default for this thread temporarily + let dispatch = tracing::Dispatch::new(subscriber); + let _guard = tracing::dispatcher::set_default(&dispatch); + + // Emit some events - these should be formatted and written to stderr + tracing::info!("Test info message"); + tracing::warn!(count = 42, "Warning with attribute"); + tracing::error!(error = "something failed", "Error occurred"); + + // The test verifies no panics occur; actual output goes to stderr + } +} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index e69d395c32..61ebec324b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -1,20 +1,115 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//! Compact log formatting for tokio-tracing events. +//! Log encoding and formatting for Tokio tracing events. This module +//! stores pre-calculated encodings for the LogRecord event_name and +//! avoids unnecessary encoding work for primitive fields (e.g., timestamp). //! -//! This module provides a lightweight formatting layer for tokio-tracing events -//! that encodes body+attributes to partial OTLP bytes, then formats them for -//! console output. +//! The intermediate representation is InternalLogRecord, includes the +//! primitive fields and static references. The remaining data are +//! placed in a partial OTLP encoding. -pub mod compact_formatter; pub mod direct_encoder; +pub mod formatter; -// Compact formatter exports (the primary API) -pub use compact_formatter::{ - CachedCallsite, CallsiteCache, CompactFormatterLayer, CompactLogRecord, OutputTarget, - SimpleWriter, encode_body_and_attrs, format_log_record, -}; +use super::Error; +use bytes::Bytes; +use std::collections::HashMap; +use tracing::callsite::Identifier; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; + +pub use formatter::{ConsoleWriter, Layer as RawLoggingLayer}; -// Direct encoder exports (used internally, exposed for benchmarking) pub use direct_encoder::{DirectFieldVisitor, ProtoBuffer}; + +/// A log record with structural metadata and pre-encoded body/attributes. +#[derive(Debug, Clone)] +pub struct LogRecord { + /// Callsite identifier used to look up cached callsite info + pub callsite_id: Identifier, + + /// Timestamp in UNIX epoch nanoseconds + pub timestamp_ns: u64, + + /// Severity level, OpenTelemetry defined + pub severity_level: u8, + + /// Severity text + pub severity_text: &'static str, + + /// Pre-encoded body and attributes + pub body_attrs_bytes: Bytes, +} + +/// Saved callsite information, populated via `register_callsite` hook. +#[derive(Debug, Clone)] +pub struct SavedCallsite { + /// Target (e.g., module path) + pub target: &'static str, + + /// Event name + pub name: &'static str, + + /// Source file + pub file: Option<&'static str>, + + /// Source line + pub line: Option, +} + +/// Map callsite information by `Identifier`. +#[derive(Debug, Default)] +pub struct CallsiteMap { + callsites: HashMap, +} + +impl CallsiteMap { + /// Create a new empty cache. + pub fn new() -> Self { + Self::default() + } + + /// Register a callsite from its metadata. + pub fn register(&mut self, metadata: &'static tracing::Metadata<'static>) { + let id = metadata.callsite(); + let _ = self.callsites.entry(id).or_insert_with(|| SavedCallsite { + target: metadata.target(), + name: metadata.name(), + file: metadata.file(), + line: metadata.line(), + }); + } + + /// Get cached callsite info by identifier. + pub fn get(&self, id: &Identifier) -> Option<&SavedCallsite> { + self.callsites.get(id) + } +} + +/// Initialize raw logging as early as possible. +pub fn init_raw_logging() { + if let Err(err) = try_init_raw_logging() { + tracing::error!( + name: "init_failed", + "Failed to initialize raw logging subscriber: {}", + err.to_string(), + ); + } +} + +/// Try to initialize raw logging, returning an error if a subscriber is already set. +pub fn try_init_raw_logging() -> Result<(), Error> { + // If RUST_LOG is set, use it for fine-grained control. + // Otherwise, default to INFO level with some noisy dependencies silenced. + let filter = tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info,h2=off,hyper=off")); + + let layer = RawLoggingLayer::new(ConsoleWriter::color()); + + tracing_subscriber::registry() + .with(filter) + .with(layer) + .try_init() + .map_err(|e| Error::TracingInitError(e.to_string())) +} From 7cb9db8422495663295284e00cae0fa18f0bbaab Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 19:25:56 -0800 Subject: [PATCH 09/92] builds --- rust/otap-dataflow/crates/telemetry/src/lib.rs | 10 ++++++++++ .../telemetry/src/self_tracing/formatter.rs | 16 ++++++++-------- rust/otap-dataflow/src/main.rs | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index acb0f51a50..58f7a75da5 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -49,6 +49,16 @@ pub mod semconv; #[doc(hidden)] pub use internal_events::_private; +// Re-export tracing span macros and types for crates that need span instrumentation. +// This allows dependent crates to use spans without adding tracing as a direct dependency. +// Re-exported with otel_ prefix for naming consistency with otel_info!, otel_warn!, etc. +pub use tracing::Span as OtelSpan; +pub use tracing::debug_span as otel_debug_span; +pub use tracing::error_span as otel_error_span; +pub use tracing::info_span as otel_info_span; +pub use tracing::trace_span as otel_trace_span; +pub use tracing::warn_span as otel_warn_span; + // TODO This should be #[cfg(test)], but something is preventing it from working. // The #[cfg(test)]-labeled otap_batch_processor::test_helpers::from_config // can't load this module unless I remove #[cfg(test)]! See #1304. diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 1aa282839b..d85f844681 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -94,7 +94,7 @@ impl ConsoleWriter { if self.use_ansi { let level_color = Self::level_color(record.severity_level); format!( - "{}{}{} {}{:5}{} {}{}{}: {}", + "{}{}{} {}{:5}{} {}{}{}: {}\n", ANSI_DIM, Self::format_timestamp(record.timestamp_ns), ANSI_RESET, @@ -108,7 +108,7 @@ impl ConsoleWriter { ) } else { format!( - "{} {:5} {}: {}", + "{} {:5} {}: {}\n", Self::format_timestamp(record.timestamp_ns), record.severity_text, event_name, @@ -458,11 +458,11 @@ mod tests { #[test] fn test_level_to_severity() { - assert_eq!(ConsoleWriter::level_to_severity(&Level::TRACE), 1); - assert_eq!(ConsoleWriter::level_to_severity(&Level::DEBUG), 5); - assert_eq!(ConsoleWriter::level_to_severity(&Level::INFO), 9); - assert_eq!(ConsoleWriter::level_to_severity(&Level::WARN), 13); - assert_eq!(ConsoleWriter::level_to_severity(&Level::ERROR), 17); + assert_eq!(Layer::level_to_severity(&Level::TRACE), 1); + assert_eq!(Layer::level_to_severity(&Level::DEBUG), 5); + assert_eq!(Layer::level_to_severity(&Level::INFO), 9); + assert_eq!(Layer::level_to_severity(&Level::WARN), 13); + assert_eq!(Layer::level_to_severity(&Level::ERROR), 17); } #[test] @@ -486,7 +486,7 @@ mod tests { #[test] fn test_layer_integration() { // Create the layer and subscriber - let layer = Layer::stderr(); + let layer = Layer::new(ConsoleWriter::no_color()); let subscriber = tracing_subscriber::registry().with(layer); // Set as default for this thread temporarily diff --git a/rust/otap-dataflow/src/main.rs b/rust/otap-dataflow/src/main.rs index 9d753e2dab..721c3baac4 100644 --- a/rust/otap-dataflow/src/main.rs +++ b/rust/otap-dataflow/src/main.rs @@ -109,7 +109,7 @@ fn parse_core_id_range(s: &str) -> Result { fn main() -> Result<(), Box> { // Initialize raw logging as early as possible so that log messages during // config parsing and validation appear well formatted. - otap_df_telemetry::init_raw_logging(); + otap_df_telemetry::self_tracing::init_raw_logging(); // Initialize rustls crypto provider (required for rustls 0.23+) // We use ring as the default provider From b374a47e81d3750f59ffa0fabd466afb2926ee76 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 19:49:31 -0800 Subject: [PATCH 10/92] crates/engine/src/pipeline_metrics.rs --- rust/otap-dataflow/Cargo.toml | 2 +- .../crates/engine/src/pipeline_metrics.rs | 4 ++ .../otap-dataflow/crates/telemetry/Cargo.toml | 2 +- .../telemetry/src/opentelemetry_client.rs | 11 ++---- .../opentelemetry_client/logger_provider.rs | 38 +------------------ 5 files changed, 10 insertions(+), 47 deletions(-) diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index 1661f0f4e0..96b2137517 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -106,7 +106,7 @@ num_enum = "0.7" object_store = {version = "0.12.3", default-features = false} once_cell = "1.20.2" opentelemetry = "0.31.0" -opentelemetry-appender-tracing = "0.31.1" +#opentelemetry-appender-tracing = "0.31.1" opentelemetry-proto = { version = "0.31", default-features = false, features = ["gen-tonic-messages", "logs"]} #TODO - use it from submodule instead of crate(?) opentelemetry_sdk = "0.31.0" opentelemetry-stdout = "0.31.0" diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs b/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs index aeddb39d72..28e3dd6833 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs @@ -402,6 +402,7 @@ pub(crate) struct PipelineMetricsMonitor { last_allocated: u64, last_deallocated: u64, + #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] rusage_thread_supported: bool, // These timestamps mark the beginning of the current measurement interval @@ -451,6 +452,7 @@ impl PipelineMetricsMonitor { #[cfg(windows)] let (jemalloc_supported, last_allocated, last_deallocated) = (false, 0, 0); + #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] let rusage_thread_supported = Self::init_rusage_baseline(); let tokio_rt = tokio::runtime::Handle::try_current() .ok() @@ -465,6 +467,7 @@ impl PipelineMetricsMonitor { deallocated, last_allocated, last_deallocated, + #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] rusage_thread_supported, wall_start: now, cpu_start: ThreadTime::now(), @@ -704,6 +707,7 @@ impl PipelineMetricsMonitor { } } + #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] fn init_rusage_baseline() -> bool { #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] { diff --git a/rust/otap-dataflow/crates/telemetry/Cargo.toml b/rust/otap-dataflow/crates/telemetry/Cargo.toml index 50cf039dbc..f7422e63ad 100644 --- a/rust/otap-dataflow/crates/telemetry/Cargo.toml +++ b/rust/otap-dataflow/crates/telemetry/Cargo.toml @@ -39,7 +39,7 @@ opentelemetry_sdk = { workspace = true } opentelemetry-stdout = { workspace = true } opentelemetry-otlp = { workspace = true, features = ["grpc-tonic", "metrics", "logs"] } opentelemetry-prometheus = { workspace = true } -opentelemetry-appender-tracing = { workspace = true } +#opentelemetry-appender-tracing = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter","registry", "std", "fmt"] } diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 272d65cbe9..6194c6e7dd 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -34,15 +34,10 @@ impl OpentelemetryClient { let runtime = None; - let meter_provider = - MeterProvider::configure(sdk_resource.clone(), &config.metrics, runtime)?; + let (meter_provider, runtime) = + MeterProvider::configure(sdk_resource.clone(), &config.metrics, runtime)?.into_parts(); - // Extract the meter provider and runtime by consuming the MeterProvider - let (meter_provider, runtime) = meter_provider.into_parts(); - - let logger_provider = LoggerProvider::configure(sdk_resource, &config.logs, runtime)?; - - let (logger_provider, runtime) = logger_provider.into_parts(); + let (logger_provider, runtime) = LoggerProvider::configure(sdk_resource, &config.logs, runtime)?.into_parts(); //TODO: Configure traces provider. diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs index d14dc84a39..8b40365ccf 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs @@ -3,12 +3,11 @@ //! Configures the OpenTelemetry logger provider based on the provided configuration. -use opentelemetry_appender_tracing::layer; use opentelemetry_otlp::{Protocol, WithExportConfig}; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider}; use otap_df_config::pipeline::service::telemetry::{ logs::{ - LogLevel, LogsConfig, + LogsConfig, processors::{ BatchLogProcessorConfig, batch::{LogBatchProcessorExporterConfig, otlp::OtlpExporterConfig}, @@ -16,9 +15,6 @@ use otap_df_config::pipeline::service::telemetry::{ }, metrics::readers::periodic::otlp::OtlpProtocol, }; -use tracing::level_filters::LevelFilter; -use tracing_subscriber::util::SubscriberInitExt; -use tracing_subscriber::{EnvFilter, layer::SubscriberExt}; use crate::error::Error; @@ -59,9 +55,6 @@ impl LoggerProvider { /// /// The TODO here is to evaluate these options and implement one of them. /// As of now, this causes contention, and we just need to accept temporarily. - /// - /// TODO: Evaluate also alternatives for the contention caused by the global - /// OpenTelemetry logger provider added as layer. pub fn configure( sdk_resource: Resource, logger_config: &LogsConfig, @@ -80,35 +73,6 @@ impl LoggerProvider { let sdk_logger_provider = sdk_logger_builder.build(); - let level = match logger_config.level { - LogLevel::Off => LevelFilter::OFF, - LogLevel::Debug => LevelFilter::DEBUG, - LogLevel::Info => LevelFilter::INFO, - LogLevel::Warn => LevelFilter::WARN, - LogLevel::Error => LevelFilter::ERROR, - }; - - // If RUST_LOG is set, use it for fine-grained control. - // Otherwise, fall back to the config level with some noisy dependencies silenced. - // Users can override by setting RUST_LOG explicitly. - let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { - // Default filter: use config level, but silence known noisy HTTP dependencies - EnvFilter::new(format!("{level},h2=off,hyper=off")) - }); - - // Formatting layer - let fmt_layer = tracing_subscriber::fmt::layer().with_thread_names(true); - - let sdk_layer = layer::OpenTelemetryTracingBridge::new(&sdk_logger_provider); - - // Try to initialize the global subscriber. In tests, this may fail if already set, - // which is acceptable as we're only validating the configuration works. - let _ = tracing_subscriber::registry() - .with(filter) - .with(fmt_layer) - .with(sdk_layer) - .try_init(); - Ok(LoggerProvider { sdk_logger_provider, runtime, From cd80b0e35b62f2c205e9bcb0734d815c4098c199 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 21:39:02 -0800 Subject: [PATCH 11/92] option --- rust/otap-dataflow/Cargo.toml | 2 +- .../src/pipeline/service/telemetry/logs.rs | 27 ++++++-- .../crates/engine/src/pipeline_metrics.rs | 4 -- .../otap-dataflow/crates/telemetry/Cargo.toml | 2 +- .../telemetry/src/opentelemetry_client.rs | 66 +++++++++++++++++-- .../opentelemetry_client/logger_provider.rs | 46 +++++-------- .../crates/telemetry/src/self_tracing/mod.rs | 30 --------- rust/otap-dataflow/src/main.rs | 4 -- 8 files changed, 100 insertions(+), 81 deletions(-) diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index 96b2137517..1661f0f4e0 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -106,7 +106,7 @@ num_enum = "0.7" object_store = {version = "0.12.3", default-features = false} once_cell = "1.20.2" opentelemetry = "0.31.0" -#opentelemetry-appender-tracing = "0.31.1" +opentelemetry-appender-tracing = "0.31.1" opentelemetry-proto = { version = "0.31", default-features = false, features = ["gen-tonic-messages", "logs"]} #TODO - use it from submodule instead of crate(?) opentelemetry_sdk = "0.31.0" opentelemetry-stdout = "0.31.0" diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 544e160119..f6492fda73 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -15,24 +15,31 @@ pub struct LogsConfig { #[serde(default)] pub level: LogLevel, + /// Internal log configuration options + #[serde(default = "default_internal")] + pub internal: LogsInternalConfig, + /// The list of log processors to configure. #[serde(default)] pub processors: Vec, } +fn default_internal() -> LogsInternalConfig { + LogsInternalConfig { + enabled: true, + } +} + /// Log level for internal engine logs. -/// -/// TODO: Change default to `Info` once per-thread subscriber is implemented -/// to avoid contention from the global tracing subscriber. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default, PartialEq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, Default, PartialEq)] #[serde(rename_all = "lowercase")] pub enum LogLevel { /// Logging is completely disabled. - #[default] Off, /// Debug level logging. Debug, /// Info level logging. + #[default] Info, /// Warn level logging. Warn, @@ -40,6 +47,14 @@ pub enum LogLevel { Error, } +/// Log internal configuration +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default, PartialEq)] +#[serde(rename_all = "lowercase")] +pub struct LogsInternalConfig { + /// Is internal logging in use? + pub enabled: bool, +} + #[cfg(test)] mod tests { use super::*; @@ -71,7 +86,7 @@ mod tests { fn test_logs_config_default_deserialize() -> Result<(), serde_yaml::Error> { let yaml_str = r#""#; let config: LogsConfig = serde_yaml::from_str(yaml_str)?; - assert_eq!(config.level, LogLevel::Off); + assert_eq!(config.level, LogLevel::Info); assert!(config.processors.is_empty()); Ok(()) } diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs b/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs index 28e3dd6833..aeddb39d72 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_metrics.rs @@ -402,7 +402,6 @@ pub(crate) struct PipelineMetricsMonitor { last_allocated: u64, last_deallocated: u64, - #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] rusage_thread_supported: bool, // These timestamps mark the beginning of the current measurement interval @@ -452,7 +451,6 @@ impl PipelineMetricsMonitor { #[cfg(windows)] let (jemalloc_supported, last_allocated, last_deallocated) = (false, 0, 0); - #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] let rusage_thread_supported = Self::init_rusage_baseline(); let tokio_rt = tokio::runtime::Handle::try_current() .ok() @@ -467,7 +465,6 @@ impl PipelineMetricsMonitor { deallocated, last_allocated, last_deallocated, - #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] rusage_thread_supported, wall_start: now, cpu_start: ThreadTime::now(), @@ -707,7 +704,6 @@ impl PipelineMetricsMonitor { } } - #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] fn init_rusage_baseline() -> bool { #[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"))] { diff --git a/rust/otap-dataflow/crates/telemetry/Cargo.toml b/rust/otap-dataflow/crates/telemetry/Cargo.toml index f7422e63ad..50cf039dbc 100644 --- a/rust/otap-dataflow/crates/telemetry/Cargo.toml +++ b/rust/otap-dataflow/crates/telemetry/Cargo.toml @@ -39,7 +39,7 @@ opentelemetry_sdk = { workspace = true } opentelemetry-stdout = { workspace = true } opentelemetry-otlp = { workspace = true, features = ["grpc-tonic", "metrics", "logs"] } opentelemetry-prometheus = { workspace = true } -#opentelemetry-appender-tracing = { workspace = true } +opentelemetry-appender-tracing = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter","registry", "std", "fmt"] } diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 6194c6e7dd..522ad919f4 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -9,12 +9,16 @@ pub mod meter_provider; use opentelemetry::KeyValue; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider, metrics::SdkMeterProvider}; use otap_df_config::pipeline::service::telemetry::{ - AttributeValue, AttributeValueArray, TelemetryConfig, + AttributeValue, AttributeValueArray, TelemetryConfig, logs::LogLevel, }; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::EnvFilter; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use crate::{ error::Error, opentelemetry_client::{logger_provider::LoggerProvider, meter_provider::MeterProvider}, + self_tracing::{ConsoleWriter, RawLoggingLayer}, }; /// Client for the OpenTelemetry SDK. @@ -23,10 +27,28 @@ pub struct OpentelemetryClient { /// The reference is kept to ensure the runtime lives as long as the client. _runtime: Option, meter_provider: SdkMeterProvider, - logger_provider: SdkLoggerProvider, + logger_provider: Option, // TODO: Add traces providers. } +// If RUST_LOG is set, use it for fine-grained control. +// Otherwise, fall back to the config level with some noisy dependencies silenced. +// Users can override by setting RUST_LOG explicitly. +fn get_env_filter(level: LogLevel) -> EnvFilter { + let level = match level { + LogLevel::Off => LevelFilter::OFF, + LogLevel::Debug => LevelFilter::DEBUG, + LogLevel::Info => LevelFilter::INFO, + LogLevel::Warn => LevelFilter::WARN, + LogLevel::Error => LevelFilter::ERROR, + }; + + EnvFilter::try_from_default_env().unwrap_or_else(|_| { + // Default filter: use config level, but silence known noisy HTTP dependencies + EnvFilter::new(format!("{level},h2=off,hyper=off")) + }) +} + impl OpentelemetryClient { /// Create a new OpenTelemetry client from the given configuration. pub fn new(config: &TelemetryConfig) -> Result { @@ -37,7 +59,37 @@ impl OpentelemetryClient { let (meter_provider, runtime) = MeterProvider::configure(sdk_resource.clone(), &config.metrics, runtime)?.into_parts(); - let (logger_provider, runtime) = LoggerProvider::configure(sdk_resource, &config.logs, runtime)?.into_parts(); + let tracing_setup = tracing_subscriber::registry().with(get_env_filter(config.logs.level)); + + let (logger_provider, runtime) = if !config.logs.internal.enabled { + let (logger_provider, runtime) = + LoggerProvider::configure(sdk_resource, &config.logs, runtime)?.into_parts(); + + // Tokio provides a console formatting layer, OTel + // provides other behaviors. + let fmt_layer = tracing_subscriber::fmt::layer().with_thread_names(true); + let sdk_layer = opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge::new( + &logger_provider, + ); + + // Try to initialize the global subscriber. In tests, this may fail if already set, + // which is acceptable as we're only validating the configuration works. + let _ = tracing_setup.with(fmt_layer).with(sdk_layer).try_init(); + ( + Some(logger_provider), + runtime, + ) + } else { + let writer = if std::env::var("NO_COLOR").is_ok() { + ConsoleWriter::no_color() + } else { + ConsoleWriter::color() + }; + // See comment above. + let _ = tracing_setup.with(RawLoggingLayer::new(writer)).try_init(); + + (None, runtime) + }; //TODO: Configure traces provider. @@ -94,14 +146,18 @@ impl OpentelemetryClient { /// Get a reference to the logger provider. #[must_use] - pub fn logger_provider(&self) -> &SdkLoggerProvider { + pub fn logger_provider(&self) -> &Option { &self.logger_provider } /// Shutdown the OpenTelemetry SDK. pub fn shutdown(&self) -> Result<(), Error> { let meter_shutdown_result = self.meter_provider().shutdown(); - let logger_provider_shutdown_result = self.logger_provider().shutdown(); + let logger_provider_shutdown_result = self + .logger_provider() + .as_ref() + .map(|x| x.shutdown()) + .transpose(); if let Err(e) = meter_shutdown_result { return Err(Error::ShutdownError(e.to_string())); diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs index 8b40365ccf..c440cea82d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs @@ -25,36 +25,7 @@ pub struct LoggerProvider { } impl LoggerProvider { - /// Initializes internal logging for the OTAP engine. - /// - /// The log level can be controlled via: - /// 1. The `logs.level` config setting (off, debug, info, warn, error) - /// 2. The `RUST_LOG` environment variable for fine-grained control - /// - /// When `RUST_LOG` is set, it takes precedence and allows filtering by target. - /// Example: `RUST_LOG=info,h2=warn,hyper=warn` enables info level but silences - /// noisy HTTP/2 and hyper logs. - /// - /// TODO: The engine uses a thread-per-core model - /// and is NUMA aware. - /// The fmt::init() here is truly global, and hence - /// this will be a source of contention. - /// We need to evaluate alternatives: - /// - /// 1. Set up per thread subscriber. - /// ```ignore - /// // start of thread - /// let _guard = tracing::subscriber::set_default(subscriber); - /// // now, with this thread, all tracing calls will go to this subscriber - /// // eliminating contention. - /// // end of thread - /// ``` - /// - /// 2. Use custom subscriber that batches logs in thread-local buffer, and - /// flushes them periodically. - /// - /// The TODO here is to evaluate these options and implement one of them. - /// As of now, this causes contention, and we just need to accept temporarily. + /// Initializes OpenTelemetry logging for the OTAP engine. pub fn configure( sdk_resource: Resource, logger_config: &LogsConfig, @@ -214,6 +185,18 @@ impl LoggerProvider { #[cfg(test)] mod tests { use super::*; + use opentelemetry_otlp::Protocol; + use opentelemetry_sdk::Resource; + use otap_df_config::pipeline::service::telemetry::{ + logs::{ + LogLevel, LogsInternalConfig, LogsConfig, + processors::{ + BatchLogProcessorConfig, + batch::{LogBatchProcessorExporterConfig, otlp::OtlpExporterConfig}, + }, + }, + metrics::readers::periodic::otlp::OtlpProtocol, + }; use tracing::error; #[test] @@ -221,6 +204,7 @@ mod tests { let resource = Resource::builder().build(); let logger_config = LogsConfig { level: LogLevel::Info, + internal: LogsInternalConfig::default(), processors: vec![ otap_df_config::pipeline::service::telemetry::logs::processors::LogProcessorConfig::Batch( BatchLogProcessorConfig { @@ -244,6 +228,7 @@ mod tests { let resource = Resource::builder().build(); let logger_config = LogsConfig { level: LogLevel::Info, + internal: LogsInternalConfig::default(), processors: vec![ otap_df_config::pipeline::service::telemetry::logs::processors::LogProcessorConfig::Batch( BatchLogProcessorConfig { @@ -274,6 +259,7 @@ mod tests { let resource = Resource::builder().build(); let logger_config = LogsConfig { level: LogLevel::default(), + internal: LogsInternalConfig::default(), processors: vec![], }; let logger_provider = LoggerProvider::configure(resource, &logger_config, None)?; diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index 61ebec324b..b3e6282ad4 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -12,12 +12,9 @@ pub mod direct_encoder; pub mod formatter; -use super::Error; use bytes::Bytes; use std::collections::HashMap; use tracing::callsite::Identifier; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::util::SubscriberInitExt; pub use formatter::{ConsoleWriter, Layer as RawLoggingLayer}; @@ -86,30 +83,3 @@ impl CallsiteMap { self.callsites.get(id) } } - -/// Initialize raw logging as early as possible. -pub fn init_raw_logging() { - if let Err(err) = try_init_raw_logging() { - tracing::error!( - name: "init_failed", - "Failed to initialize raw logging subscriber: {}", - err.to_string(), - ); - } -} - -/// Try to initialize raw logging, returning an error if a subscriber is already set. -pub fn try_init_raw_logging() -> Result<(), Error> { - // If RUST_LOG is set, use it for fine-grained control. - // Otherwise, default to INFO level with some noisy dependencies silenced. - let filter = tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info,h2=off,hyper=off")); - - let layer = RawLoggingLayer::new(ConsoleWriter::color()); - - tracing_subscriber::registry() - .with(filter) - .with(layer) - .try_init() - .map_err(|e| Error::TracingInitError(e.to_string())) -} diff --git a/rust/otap-dataflow/src/main.rs b/rust/otap-dataflow/src/main.rs index 721c3baac4..e242b472cf 100644 --- a/rust/otap-dataflow/src/main.rs +++ b/rust/otap-dataflow/src/main.rs @@ -107,10 +107,6 @@ fn parse_core_id_range(s: &str) -> Result { } fn main() -> Result<(), Box> { - // Initialize raw logging as early as possible so that log messages during - // config parsing and validation appear well formatted. - otap_df_telemetry::self_tracing::init_raw_logging(); - // Initialize rustls crypto provider (required for rustls 0.23+) // We use ring as the default provider #[cfg(feature = "experimental-tls")] From ce6a95b077a3ab8c48330269f0159d3d0c06ee8a Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 21:46:03 -0800 Subject: [PATCH 12/92] remove dead code --- rust/otap-dataflow/Cargo.toml | 1 - .../crates/controller/src/lib.rs | 6 +- .../crates/pdata/src/otlp/stateful_encoder.rs | 814 ------------------ 3 files changed, 3 insertions(+), 818 deletions(-) delete mode 100644 rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index 1661f0f4e0..283f71f76f 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -33,7 +33,6 @@ path = "src/main.rs" otap-df-config.workspace = true otap-df-controller.workspace = true otap-df-otap.workspace = true -otap-df-telemetry.workspace = true thiserror.workspace = true quiver = { workspace = true, optional = true } serde_json.workspace = true diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index eb8cae70fc..6c1e39f7cb 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -36,6 +36,7 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; +use otap_df_telemetry::opentelemetry_client::OpentelemetryClient; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::{MetricsSystem, otel_info, otel_info_span, otel_warn}; use std::thread; @@ -82,8 +83,7 @@ impl Controller { node_ctrl_msg_channel_size = settings.default_node_ctrl_msg_channel_size, pipeline_ctrl_msg_channel_size = settings.default_pipeline_ctrl_msg_channel_size ); - // Note: Raw logging is initialized early in main.rs via init_raw_logging(). - // OpenTelemetry client integration will be added in a future phase. + let opentelemetry_client = OpentelemetryClient::new(telemetry_config)?; let metrics_system = MetricsSystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -257,7 +257,7 @@ impl Controller { handle.shutdown_and_join()?; } obs_state_join_handle.shutdown_and_join()?; - // Note: OpenTelemetry client shutdown will be added when the client is re-enabled. + opentelemetry_client.shutdown()?; Ok(()) } diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs b/rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs deleted file mode 100644 index 81c3b4a97c..0000000000 --- a/rust/otap-dataflow/crates/pdata/src/otlp/stateful_encoder.rs +++ /dev/null @@ -1,814 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//! Stateful OTLP encoder for streaming single log records with automatic batching. -//! -//! This encoder maintains open `ResourceLogs` and `ScopeLogs` messages, appending individual -//! `LogRecord`s as they arrive. When the InstrumentationScope changes (via scope name), it automatically -//! closes the previous scope and starts a new one. The Resource is pre-encoded and copied once. -//! -//! # Design -//! - **Resource**: Pre-encoded as `OtlpBytes` (includes protobuf field tag + length + content) -//! - **Scope**: Encoded on-the-fly using scope name (InstrumentationScope.name only) -//! - **LogRecord**: Accepted as `LogRecordView` trait, encoded on-the-fly - -use crate::error::Result; -use crate::otlp::common::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; -use crate::proto::consts::{field_num::logs::*, wire_types}; -use crate::views::logs::LogRecordView; -use bytes::Bytes; - -/// Pre-encoded OTLP bytes (includes protobuf field tag + length + message content) -/// -/// These bytes are ready to be copied directly into the output buffer without further processing. -pub type OtlpBytes = Vec; -/// @@@ Remove me, use super::OtlpProtoBytes - -/// Position marker for a length-delimited field that needs patching -/// -/// @@@ Make this variable width. We want 2-byte padding for records -/// and 4-byte padding for the container messages ResourceLogs, -/// ScopeLogs, etc, because it is reasonable to insist on 16 KiB log -/// messages for a self-diagnostic library and we are able to drop -/// attributes to achieve this (OTLP has a dedicated field for this). -/// Using a maybe, or a for the primitive u16, u32. -#[derive(Debug, Clone, Copy)] -struct LengthPlaceholder { - /// Position in buffer where the 4-byte length placeholder starts - position: usize, -} - -impl LengthPlaceholder { - fn new(position: usize) -> Self { - Self { position } - } - - fn patch(self, buf: &mut ProtoBuffer) { - let content_len = buf.len() - self.position - 4; - patch_len_placeholder(buf, 4, content_len, self.position); - } -} - -/// Current state of the stateful encoder -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum EncoderState { - /// No messages open, ready to start new ResourceLogs - Idle, - /// ResourceLogs is open, ready to add ScopeLogs - ResourceOpen, - /// ResourceLogs and ScopeLogs are both open, ready to append LogRecords - ScopeOpen, -} - -/// Stateful OTLP encoder that maintains open ResourceLogs and ScopeLogs messages. -/// -/// # Example -/// ```ignore -/// let mut encoder = StatefulOtlpEncoder::new(64 * 1024); -/// -/// // Pre-encode resource once -/// let resource_bytes = encode_resource_to_otlp_bytes(&resource); -/// -/// // Scope name is typically the module path or instrumentation library name -/// let scope_name = "my_module::component"; -/// -/// // Encode multiple log records - automatically batched if scope name matches -/// encoder.encode_log_record(&log_record_view, &resource_bytes, scope_name)?; -/// encoder.encode_log_record(&log_record_view2, &resource_bytes, scope_name)?; // Batched -/// -/// // Flush to get OTLP bytes -/// let otlp_bytes = encoder.flush(); -/// ``` -pub struct StatefulOtlpEncoder { - /// Output buffer (reuses ProtoBuffer infrastructure) - buf: ProtoBuffer, - - /// Current encoder state - state: EncoderState, - - /// Length placeholder for the current ResourceLogs message - resource_logs_placeholder: Option, - - /// Length placeholder for the current ScopeLogs message - scope_logs_placeholder: Option, - - /// Name of the current scope for comparison - current_scope_name: Option, -} - -impl StatefulOtlpEncoder { - /// Create a new encoder with pre-allocated buffer capacity - pub fn new(capacity_bytes: usize) -> Self { - Self { - buf: ProtoBuffer::with_capacity(capacity_bytes), - state: EncoderState::Idle, - resource_logs_placeholder: None, - scope_logs_placeholder: None, - current_scope_name: None, - } - } - - /// Get the current buffer size in bytes - #[inline] - pub fn len(&self) -> usize { - self.buf.len() - } - - /// Check if the buffer is empty - #[inline] - pub fn is_empty(&self) -> bool { - self.buf.is_empty() - } - - /// Encode a single log record with its Resource and Scope context. - /// - /// This method automatically handles batching: - /// - If scope name matches the current batch, the LogRecord is appended - /// - If scope name differs, the current ScopeLogs is closed and a new one started - /// - /// # Parameters - /// - `log_record`: View of the log record to encode - /// - `resource_bytes`: Pre-encoded Resource (includes protobuf field tag + length + content) - /// - `scope_name`: InstrumentationScope name (typically tracing target/module path) - pub fn encode_log_record( - &mut self, - log_record: &impl LogRecordView, - resource_bytes: &[u8], // @@@ Make super::OtlpProtoBytes, expecting ::ExportLogsRequest - scope_name: &str, - ) -> Result<()> { - match self.state { - EncoderState::Idle => { - // Start new batch with Resource and Scope - self.start_resource_logs(resource_bytes)?; - self.start_scope_logs(scope_name)?; - self.append_log_record(log_record)?; - } - - EncoderState::ResourceOpen => { - // Resource already open, start scope - self.start_scope_logs(scope_name)?; - self.append_log_record(log_record)?; - } - - EncoderState::ScopeOpen => { - if self.current_scope_name.as_deref() == Some(scope_name) { - // Same scope - just append LogRecord - self.append_log_record(log_record)?; - } else { - // Different scope - close current and start new - self.close_scope_logs()?; - self.start_scope_logs(scope_name)?; - self.append_log_record(log_record)?; - } - } - } - - Ok(()) - } - - /// Flush the encoder, closing all open messages and returning the accumulated OTLP bytes. - /// - /// After flushing, the encoder is reset and ready for new messages. - pub fn flush(&mut self) -> Bytes { - // Close any open messages - if self.state == EncoderState::ScopeOpen { - let _ = self.close_scope_logs(); - } - if self.state == EncoderState::ResourceOpen || self.state == EncoderState::ScopeOpen { - let _ = self.close_resource_logs(); - } - - // Take the bytes and reset the encoder - let (bytes, capacity) = self.buf.take_into_bytes(); - - // Reset state - self.state = EncoderState::Idle; - self.resource_logs_placeholder = None; - self.scope_logs_placeholder = None; - self.current_scope_name = None; - - // Ensure capacity is preserved for next use - self.buf.ensure_capacity(capacity); - - bytes - } - - // === Private state management methods === - - fn start_resource_logs(&mut self, resource_bytes: &[u8]) -> Result<()> { - // Encode LogsData.resource_logs field (tag 1, length-delimited) - self.buf - .encode_field_tag(LOGS_DATA_RESOURCE, wire_types::LEN); - - // Write 4-byte length placeholder - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // Copy pre-encoded Resource bytes (includes ResourceLogs.resource field) - self.buf.extend_from_slice(resource_bytes); - - // Update state - self.resource_logs_placeholder = Some(placeholder); - self.state = EncoderState::ResourceOpen; - - Ok(()) - } - - fn start_scope_logs(&mut self, scope_name: &str) -> Result<()> { - // Encode ResourceLogs.scope_logs field (tag 2, length-delimited) - self.buf - .encode_field_tag(RESOURCE_LOGS_SCOPE_LOGS, wire_types::LEN); - - // Write 4-byte length placeholder - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // Encode ScopeLogs.scope field (tag 1, InstrumentationScope message) - self.encode_instrumentation_scope(scope_name)?; - - // Update state - self.scope_logs_placeholder = Some(placeholder); - self.current_scope_name = Some(scope_name.to_string()); - self.state = EncoderState::ScopeOpen; - - Ok(()) - } - - fn append_log_record(&mut self, log_record: &impl LogRecordView) -> Result<()> { - // Encode ScopeLogs.log_records field (tag 2, length-delimited) - self.buf - .encode_field_tag(SCOPE_LOGS_LOG_RECORDS, wire_types::LEN); - - // Use 4-byte padding for LogRecord - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // Encode LogRecordView fields - encode_log_record_view(log_record, &mut self.buf)?; - - // Patch the length - placeholder.patch(&mut self.buf); - - Ok(()) - } - - fn close_scope_logs(&mut self) -> Result<()> { - if let Some(placeholder) = self.scope_logs_placeholder.take() { - placeholder.patch(&mut self.buf); - self.state = EncoderState::ResourceOpen; - self.current_scope_name = None; - } - Ok(()) - } - - fn close_resource_logs(&mut self) -> Result<()> { - if let Some(placeholder) = self.resource_logs_placeholder.take() { - placeholder.patch(&mut self.buf); - self.state = EncoderState::Idle; - } - Ok(()) - } - - /// Encode an InstrumentationScope with just the name field - fn encode_instrumentation_scope(&mut self, scope_name: &str) -> Result<()> { - use crate::proto::consts::field_num::common::INSTRUMENTATION_SCOPE_NAME; - - // Encode ScopeLogs.scope field (tag 1, length-delimited) - self.buf.encode_field_tag(SCOPE_LOG_SCOPE, wire_types::LEN); - let scope_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // Encode InstrumentationScope.name field (tag 1, string) - self.buf.encode_string(INSTRUMENTATION_SCOPE_NAME, scope_name); - - // Patch InstrumentationScope length - scope_placeholder.patch(&mut self.buf); - - Ok(()) - } -} - -// === Helper functions for encoding LogRecordView === - -// TODO(consolidation): The OTAP batch encoder in `logs.rs` (~110 lines in encode_log_record()) -// duplicates the field encoding logic below. Since OTAP implements LogRecordView (via -// OtapLogRecordView in views/otap/logs.rs), we could refactor logs.rs to: -// 1. Keep its batching/sorting/cursor logic (OTAP-specific) -// 2. Delegate LogRecord field encoding to this function via the view trait -// This would eliminate ~150 lines of duplicated code across encode_log_record, encode_any_value, -// and encode_key_value, making the view-based encoder the canonical implementation for all -// LogRecord encoding. The view trait methods are #[inline] so there's no performance impact. -// Same opportunity exists for traces.rs and metrics.rs encoders. - -/// Encode all fields of a LogRecordView -fn encode_log_record_view(log_record: &impl LogRecordView, buf: &mut ProtoBuffer) -> Result<()> { - // time_unix_nano (field 1, fixed64) - if let Some(time) = log_record.time_unix_nano() { - buf.encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); - buf.extend_from_slice(&time.to_le_bytes()); - } - - // severity_number (field 2, varint) - if let Some(severity) = log_record.severity_number() { - buf.encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); - buf.encode_varint(severity as u64); - } - - // severity_text (field 3, string) - if let Some(text) = log_record.severity_text() { - if !text.is_empty() { - // Convert &[u8] to &str for encode_string - if let Ok(text_str) = std::str::from_utf8(text) { - buf.encode_string(LOG_RECORD_SEVERITY_TEXT, text_str); - } - } - } - - // body (field 5, AnyValue) - encode from AnyValueView - if let Some(body) = log_record.body() { - encode_any_value_view_field(LOG_RECORD_BODY, &body, buf)?; - } - - // attributes (field 6, repeated KeyValue) - encode from AttributeView iterator - for attr in log_record.attributes() { - encode_attribute_view(LOG_RECORD_ATTRIBUTES, &attr, buf)?; - } - - // dropped_attributes_count (field 7, uint32) - let dropped = log_record.dropped_attributes_count(); - if dropped > 0 { - buf.encode_field_tag(LOG_RECORD_DROPPED_ATTRIBUTES_COUNT, wire_types::VARINT); - buf.encode_varint(dropped as u64); - } - - // flags (field 8, fixed32) - if let Some(flags) = log_record.flags() { - buf.encode_field_tag(LOG_RECORD_FLAGS, wire_types::FIXED32); - buf.extend_from_slice(&flags.to_le_bytes()); - } - - // trace_id (field 9, bytes) - if let Some(trace_id) = log_record.trace_id() { - buf.encode_bytes(LOG_RECORD_TRACE_ID, trace_id); - } - - // span_id (field 10, bytes) - if let Some(span_id) = log_record.span_id() { - buf.encode_bytes(LOG_RECORD_SPAN_ID, span_id); - } - - // observed_time_unix_nano (field 11, fixed64) - if let Some(observed_time) = log_record.observed_time_unix_nano() { - buf.encode_field_tag(LOG_RECORD_OBSERVED_TIME_UNIX_NANO, wire_types::FIXED64); - buf.extend_from_slice(&observed_time.to_le_bytes()); - } - - Ok(()) -} - -/// Encode an AttributeView as a length-delimited field -fn encode_attribute_view( - field_tag: u64, - attr: &impl crate::views::common::AttributeView, - buf: &mut ProtoBuffer, -) -> Result<()> { - use crate::proto::consts::field_num::common::*; - - // Start KeyValue message - buf.encode_field_tag(field_tag, wire_types::LEN); - let placeholder = LengthPlaceholder::new(buf.len()); - encode_len_placeholder(buf); - - // Encode key - let key = attr.key(); - if !key.is_empty() { - // Convert &[u8] to &str for encode_string - if let Ok(key_str) = std::str::from_utf8(key) { - buf.encode_string(KEY_VALUE_KEY, key_str); - } - } - - // Encode value (if present) - if let Some(value) = attr.value() { - encode_any_value_view_field(KEY_VALUE_VALUE, &value, buf)?; - } - - // Patch length - placeholder.patch(buf); - - Ok(()) -} - -/// Encode an AnyValueView as a length-delimited field -fn encode_any_value_view_field<'a>( - field_tag: u64, - value: &impl crate::views::common::AnyValueView<'a>, - buf: &mut ProtoBuffer, -) -> Result<()> { - buf.encode_field_tag(field_tag, wire_types::LEN); - let placeholder = LengthPlaceholder::new(buf.len()); - encode_len_placeholder(buf); - - encode_any_value_view_content(value, buf)?; - - placeholder.patch(buf); - Ok(()) -} - -/// Encode the content of an AnyValueView (without the outer field tag) -fn encode_any_value_view_content<'a>( - value: &impl crate::views::common::AnyValueView<'a>, - buf: &mut ProtoBuffer, -) -> Result<()> { - use crate::proto::consts::field_num::common::*; - use crate::views::common::ValueType; - - match value.value_type() { - ValueType::String => { - if let Some(s) = value.as_string() { - // Convert &[u8] to &str for encode_string - if let Ok(s_str) = std::str::from_utf8(s) { - buf.encode_string(ANY_VALUE_STRING_VALUE, s_str); - } - } - } - ValueType::Bool => { - if let Some(b) = value.as_bool() { - buf.encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); - buf.encode_varint(if b { 1 } else { 0 }); - } - } - ValueType::Int64 => { - if let Some(i) = value.as_int64() { - buf.encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); - buf.encode_varint(i as u64); - } - } - ValueType::Double => { - if let Some(d) = value.as_double() { - buf.encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); - buf.extend_from_slice(&d.to_le_bytes()); - } - } - ValueType::Bytes => { - if let Some(bytes) = value.as_bytes() { - buf.encode_bytes(ANY_VALUE_BYTES_VALUE, bytes); - } - } - ValueType::Array => { - if let Some(mut arr_iter) = value.as_array() { - // Encode ArrayValue - buf.encode_field_tag(ANY_VALUE_ARRAY_VALUE, wire_types::LEN); - let placeholder = LengthPlaceholder::new(buf.len()); - encode_len_placeholder(buf); - - while let Some(val) = arr_iter.next() { - encode_any_value_view_field(ARRAY_VALUE_VALUES, &val, buf)?; - } - - placeholder.patch(buf); - } - } - ValueType::KeyValueList => { - if let Some(mut kvlist_iter) = value.as_kvlist() { - // Encode KeyValueList - buf.encode_field_tag(ANY_VALUE_KVLIST_VALUE, wire_types::LEN); - let placeholder = LengthPlaceholder::new(buf.len()); - encode_len_placeholder(buf); - - while let Some(kv) = kvlist_iter.next() { - encode_attribute_view(KEY_VALUE_LIST_VALUES, &kv, buf)?; - } - - placeholder.patch(buf); - } - } - ValueType::Empty => { - // Empty AnyValue - valid according to spec - } - } - - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::proto::opentelemetry::common::v1::{ - AnyValue, KeyValue, any_value, - }; - use crate::proto::opentelemetry::resource::v1::Resource; - use crate::schema::{SpanId, TraceId}; - use crate::views::common::{AnyValueView, AttributeView, Str, ValueType}; - use crate::views::logs::LogRecordView; - - // Test helper: Simple LogRecordView implementation - struct SimpleLogRecord { - time_unix_nano: Option, - severity_number: Option, - severity_text: Option<&'static str>, - body: Option<&'static str>, - trace_id: Option, - span_id: Option, - } - - impl LogRecordView for SimpleLogRecord { - type Attribute<'a> - = SimpleAttribute - where - Self: 'a; - type AttributeIter<'a> - = std::iter::Empty> - where - Self: 'a; - type Body<'a> - = SimpleAnyValue - where - Self: 'a; - - fn time_unix_nano(&self) -> Option { - self.time_unix_nano - } - - fn observed_time_unix_nano(&self) -> Option { - self.time_unix_nano // same for tests - } - - fn severity_number(&self) -> Option { - self.severity_number - } - - fn severity_text(&self) -> Option> { - self.severity_text.map(|s| s.as_bytes()) - } - - fn body(&self) -> Option> { - self.body.map(|s| SimpleAnyValue::String(s)) - } - - fn attributes(&self) -> Self::AttributeIter<'_> { - std::iter::empty() - } - - fn dropped_attributes_count(&self) -> u32 { - 0 - } - - fn flags(&self) -> Option { - Some(0) - } - - fn trace_id(&self) -> Option<&TraceId> { - self.trace_id.as_ref() - } - - fn span_id(&self) -> Option<&SpanId> { - self.span_id.as_ref() - } - - fn event_name(&self) -> Option> { - None - } - } - - #[derive(Clone)] - enum SimpleAnyValue { - String(&'static str), - } - - impl<'a> AnyValueView<'a> for SimpleAnyValue { - type KeyValue = SimpleAttribute; - type ArrayIter<'arr> - = std::iter::Empty - where - Self: 'arr; - type KeyValueIter<'kv> - = SimpleAttribute - where - Self: 'kv; - - fn value_type(&self) -> ValueType { - match self { - SimpleAnyValue::String(_) => ValueType::String, - } - } - - fn as_string(&self) -> Option> { - match self { - SimpleAnyValue::String(s) => Some(s.as_bytes()), - } - } - - fn as_bool(&self) -> Option { - None - } - - fn as_int64(&self) -> Option { - None - } - - fn as_double(&self) -> Option { - None - } - - fn as_bytes(&self) -> Option<&[u8]> { - None - } - - fn as_array(&self) -> Option> { - None - } - - fn as_kvlist(&self) -> Option> { - None - } - } - - #[derive(Clone)] - struct SimpleAttribute; - - impl AttributeView for SimpleAttribute { - type Val<'val> - = SimpleAnyValue - where - Self: 'val; - - fn key(&self) -> Str<'_> { - "key".as_bytes() - } - - fn value(&self) -> Option> { - Some(SimpleAnyValue::String("value")) - } - } - - impl Iterator for SimpleAttribute { - type Item = Self; - - fn next(&mut self) -> Option { - None - } - } - - // Helper: Pre-encode a Resource as OtlpBytes - fn encode_resource_bytes(resource: &Resource) -> OtlpBytes { - use crate::proto::consts::field_num::resource::*; - let mut buf = ProtoBuffer::with_capacity(256); - - // Encode ResourceLogs.resource field (tag 1) - buf.encode_field_tag(1, wire_types::LEN); - let start = buf.len(); - encode_len_placeholder(&mut buf); - - // Encode attributes - for attr in &resource.attributes { - encode_attribute_proto(RESOURCE_ATTRIBUTES, attr, &mut buf); - } - - // Patch length - let content_len = buf.len() - start - 4; - patch_len_placeholder(&mut buf, 4, content_len, start); - - buf.into_bytes().to_vec() - } - - - - // Helper to encode protobuf KeyValue (for test helpers) - fn encode_attribute_proto(field_tag: u64, attr: &KeyValue, buf: &mut ProtoBuffer) { - use crate::proto::consts::field_num::common::*; - buf.encode_field_tag(field_tag, wire_types::LEN); - let start = buf.len(); - encode_len_placeholder(buf); - - if !attr.key.is_empty() { - buf.encode_string(KEY_VALUE_KEY, &attr.key); - } - - if let Some(ref value) = attr.value { - encode_any_value_proto(KEY_VALUE_VALUE, value, buf); - } - - let content_len = buf.len() - start - 4; - patch_len_placeholder(buf, 4, content_len, start); - } - - fn encode_any_value_proto(field_tag: u64, value: &AnyValue, buf: &mut ProtoBuffer) { - use crate::proto::consts::field_num::common::*; - buf.encode_field_tag(field_tag, wire_types::LEN); - let start = buf.len(); - encode_len_placeholder(buf); - - match &value.value { - Some(any_value::Value::StringValue(s)) => { - buf.encode_string(ANY_VALUE_STRING_VALUE, s); - } - _ => {} - } - - let content_len = buf.len() - start - 4; - patch_len_placeholder(buf, 4, content_len, start); - } - - #[test] - fn test_encoder_state_machine() { - let mut encoder = StatefulOtlpEncoder::new(1024); - - // Initial state - assert_eq!(encoder.state, EncoderState::Idle); - assert!(encoder.is_empty()); - - // Pre-encode resource - let resource = Resource::default(); - let resource_bytes = encode_resource_bytes(&resource); - let scope_name = "test_module"; - - // Simple log record - let log_record = SimpleLogRecord { - time_unix_nano: Some(1000), - severity_number: Some(9), - severity_text: Some("INFO"), - body: Some("test message"), - trace_id: None, - span_id: None, - }; - - encoder - .encode_log_record(&log_record, &resource_bytes, scope_name) - .unwrap(); - - // Should have data now - assert!(!encoder.is_empty()); - assert_eq!(encoder.state, EncoderState::ScopeOpen); - - // Flush should reset - let bytes = encoder.flush(); - assert!(!bytes.is_empty()); - assert_eq!(encoder.state, EncoderState::Idle); - } - - #[test] - fn test_batching_same_scope() { - let mut encoder = StatefulOtlpEncoder::new(1024); - - let resource = Resource::default(); - let resource_bytes = encode_resource_bytes(&resource); - let scope_name = "test_module"; - - // Encode three records with same scope - for i in 0..3 { - let log_record = SimpleLogRecord { - time_unix_nano: Some(i as u64), - severity_number: Some(9), - severity_text: Some("INFO"), - body: Some("test"), - trace_id: None, - span_id: None, - }; - encoder - .encode_log_record(&log_record, &resource_bytes, scope_name) - .unwrap(); - } - - // Should be in ScopeOpen state (not closed between records) - assert_eq!(encoder.state, EncoderState::ScopeOpen); - - let bytes = encoder.flush(); - assert!(!bytes.is_empty()); - } - - #[test] - fn test_different_scopes_close_and_reopen() { - let mut encoder = StatefulOtlpEncoder::new(4096); - - let resource = Resource::default(); - let resource_bytes = encode_resource_bytes(&resource); - - let scope1_name = "scope1"; - let scope2_name = "scope2"; - - let log_record = SimpleLogRecord { - time_unix_nano: Some(1000), - severity_number: Some(9), - severity_text: Some("INFO"), - body: Some("test"), - trace_id: None, - span_id: None, - }; - - // Encode with scope1 - encoder - .encode_log_record(&log_record, &resource_bytes, scope1_name) - .unwrap(); - assert_eq!(encoder.state, EncoderState::ScopeOpen); - - // Encode with scope2 - should close scope1 and start scope2 - encoder - .encode_log_record(&log_record, &resource_bytes, scope2_name) - .unwrap(); - assert_eq!(encoder.state, EncoderState::ScopeOpen); - - let bytes = encoder.flush(); - assert!(!bytes.is_empty()); - } -} From 165c940d17ee3faddf003fdec5d98437c5d7b9f3 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 22:03:21 -0800 Subject: [PATCH 13/92] rename --- .../{direct_encoder.rs => encoder.rs} | 68 +++---------------- .../telemetry/src/self_tracing/formatter.rs | 8 +-- .../crates/telemetry/src/self_tracing/mod.rs | 4 +- 3 files changed, 12 insertions(+), 68 deletions(-) rename rust/otap-dataflow/crates/telemetry/src/self_tracing/{direct_encoder.rs => encoder.rs} (89%) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs similarity index 89% rename from rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs rename to rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index 361fd9bc1e..5ec59ce4c0 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/direct_encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -2,42 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 //! Direct OTLP bytes encoder for tokio-tracing events. -//! -//! This module provides zero-allocation encoding from `tracing::Event` directly to -//! OTLP protobuf bytes, bypassing the View abstraction entirely. The key insight is -//! that the tracing `Visit` trait gives us access to field data during a callback, -//! and we can encode directly to a protobuf buffer during that callback. -//! -//! # Design -//! -//! Instead of: -//! 1. Visit event fields → allocate intermediate struct → encode via View trait -//! -//! We do: -//! 1. Visit event fields → encode directly to protobuf buffer -//! -//! This eliminates all intermediate allocations and lifetime complexities. -//! -//! # Protocol Buffer Encoding -//! -//! The encoder produces bytes in the OTLP LogRecord protobuf format. For single-record -//! use cases, it encodes just the LogRecord message. For batched use cases, see -//! `StatefulDirectEncoder` which maintains open ResourceLogs/ScopeLogs containers. use bytes::Bytes; use std::fmt::Write as FmtWrite; use std::time::{SystemTime, UNIX_EPOCH}; use tracing::{Event, Level}; - -// Re-export ProtoBuffer and helpers from pdata for direct use -pub use otap_df_pdata::otlp::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; +use otap_df_pdata::otlp::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; use otap_df_pdata::proto::consts::{field_num::common::*, field_num::logs::*, wire_types}; /// Position marker for a length-delimited field that needs patching. /// -/// When encoding protobuf, we don't know the length of nested messages until we've -/// written all their content. We reserve 4 bytes for the length, write the content, -/// then patch the length back. +/// TODO: This would belong in otap_df_pdata::otlp, for use in place +/// of directly calling encode_len_placeholder, patch_len_placeholder, +/// except we should use the macros defined there instead. Remove. #[derive(Debug, Clone, Copy)] pub struct LengthPlaceholder { /// Position in buffer where the 4-byte length placeholder starts @@ -59,10 +36,8 @@ impl LengthPlaceholder { } } -/// Wrapper for ProtoBuffer that implements `std::fmt::Write`. -/// -/// This allows direct formatting of `Debug` values into the protobuf buffer -/// without allocating an intermediate `String`. +/// Wrapper for ProtoBuffer for formatting of Debug values without +/// allocating an intermediate String. struct ProtoBufferWriter<'a> { buf: &'a mut ProtoBuffer, } @@ -76,21 +51,6 @@ impl FmtWrite for ProtoBufferWriter<'_> { } /// Direct encoder that writes a single LogRecord from a tracing Event. -/// -/// This encoder writes directly to a provided `ProtoBuffer`, producing the -/// protobuf encoding of a LogRecord message without any intermediate structs. -/// -/// # Example -/// -/// ```ignore -/// use tracing_subscriber::layer::Layer; -/// -/// // In a Layer::on_event callback: -/// fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { -/// let mut encoder = DirectLogRecordEncoder::new(&mut self.buffer); -/// encoder.encode_event(event); -/// } -/// ``` pub struct DirectLogRecordEncoder<'buf> { buf: &'buf mut ProtoBuffer, } @@ -185,6 +145,7 @@ impl<'buf> DirectFieldVisitor<'buf> { /// Encode an attribute (KeyValue message) with a string value. #[inline] pub fn encode_string_attribute(&mut self, key: &str, value: &str) { + // KeyValue message as LOG_RECORD_ATTRIBUTES field (tag 6) self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); let kv_placeholder = LengthPlaceholder::new(self.buf.len()); @@ -373,20 +334,7 @@ impl tracing::field::Visit for DirectFieldVisitor<'_> { } fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) { - // Zero-allocation path: write Debug output directly to protobuf buffer. - // - // Note: This method is only called for types that don't implement the specific - // Visit methods (record_i64, record_f64, record_bool, record_str). Primitives - // are encoded as native OTLP AnyValue types (int_value, double_value, etc.), - // preserving type fidelity. Only complex types fall through to this Debug path. - // - // TODO: The Debug trait only provides string formatting, not structural access. - // std::fmt::Formatter is opaque with no public constructor, so we cannot intercept - // the debug_struct/debug_list/field calls to encode as nested OTLP AnyValue messages. - // To support structured encoding, types would need to implement an alternative trait: - // - `serde::Serialize` → encode to AnyValue::kvlist_value / array_value - // - `valuable::Valuable` → designed for structured inspection (limited adoption) - // - `tracing::Value` → unstable, may provide this in the future + // The Rust Debug type cannot be destructured, only formatted. if field.name() == "message" { self.encode_body_debug(value); } else { diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index d85f844681..e73dacefa7 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -1,25 +1,23 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//! A `fmt::layer()` alternative using self_tracing::LogRecord. +//! An alternative to Tokio fmt::layer(). use bytes::Bytes; use std::io::Write; use std::sync::RwLock; use std::time::{SystemTime, UNIX_EPOCH}; - use otap_df_pdata::proto::consts::field_num::logs::{LOG_RECORD_ATTRIBUTES, LOG_RECORD_BODY}; use otap_df_pdata::proto::consts::wire_types; use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; use otap_df_pdata::views::otlp::bytes::decode::read_varint; - use tracing::span::{Attributes, Record}; use tracing::{Event, Level, Subscriber}; use tracing_subscriber::layer::{Context, Layer as TracingLayer}; use tracing_subscriber::registry::LookupSpan; - -use super::direct_encoder::{DirectFieldVisitor, ProtoBuffer}; +use otap_df_pdata::otlp::ProtoBuffer; +use super::encoder::DirectFieldVisitor; use super::{CallsiteMap, LogRecord}; /// Console formatter writes to stdout or stderr. diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index b3e6282ad4..d69e5cad1d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -9,7 +9,7 @@ //! primitive fields and static references. The remaining data are //! placed in a partial OTLP encoding. -pub mod direct_encoder; +pub mod encoder; pub mod formatter; use bytes::Bytes; @@ -18,8 +18,6 @@ use tracing::callsite::Identifier; pub use formatter::{ConsoleWriter, Layer as RawLoggingLayer}; -pub use direct_encoder::{DirectFieldVisitor, ProtoBuffer}; - /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] pub struct LogRecord { From 686cd9e3af358b657efbd8f4538b72a6f9df0743 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 6 Jan 2026 22:06:21 -0800 Subject: [PATCH 14/92] remove dead --- .../telemetry/src/self_tracing/encoder.rs | 313 ------------------ 1 file changed, 313 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index 5ec59ce4c0..b5727c95f6 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -3,7 +3,6 @@ //! Direct OTLP bytes encoder for tokio-tracing events. -use bytes::Bytes; use std::fmt::Write as FmtWrite; use std::time::{SystemTime, UNIX_EPOCH}; use tracing::{Event, Level}; @@ -357,286 +356,6 @@ fn level_to_severity_number(level: &Level) -> i32 { } } -/// Stateful encoder for batching multiple LogRecords with shared Resource/Scope. -/// -/// This encoder maintains open `ResourceLogs` and `ScopeLogs` messages, allowing -/// multiple LogRecords to be appended efficiently. When the scope changes, it -/// automatically closes the current scope and starts a new one. -/// -/// # Thread-Local Usage -/// -/// This encoder is designed for thread-local use. Each thread should have its own -/// encoder instance to avoid synchronization overhead. The encoder accumulates -/// records until explicitly flushed. -/// -/// # Example -/// -/// ```ignore -/// thread_local! { -/// static ENCODER: RefCell = RefCell::new( -/// StatefulDirectEncoder::new(64 * 1024, resource_bytes) -/// ); -/// } -/// -/// // In event handler: -/// ENCODER.with(|encoder| { -/// let mut encoder = encoder.borrow_mut(); -/// encoder.encode_event(event); -/// -/// if encoder.len() > FLUSH_THRESHOLD { -/// let bytes = encoder.flush(); -/// // Send bytes to pipeline -/// } -/// }); -/// ``` -pub struct StatefulDirectEncoder { - /// Output buffer - buf: ProtoBuffer, - - /// Pre-encoded Resource bytes (includes ResourceLogs.resource field) - resource_bytes: Vec, - - /// Current encoder state - state: EncoderState, - - /// Length placeholder for current ResourceLogs - resource_logs_placeholder: Option, - - /// Length placeholder for current ScopeLogs - scope_logs_placeholder: Option, - - /// Current scope name for batching comparison - current_scope_name: Option, -} - -/// Current state of the stateful encoder. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum EncoderState { - /// No messages open, ready to start new ResourceLogs - Idle, - /// ResourceLogs is open, ready to add ScopeLogs - ResourceOpen, - /// ResourceLogs and ScopeLogs are both open, ready to append LogRecords - ScopeOpen, -} - -impl StatefulDirectEncoder { - /// Create a new stateful encoder with pre-allocated buffer capacity. - /// - /// # Arguments - /// * `capacity_bytes` - Initial buffer capacity in bytes - /// * `resource_bytes` - Pre-encoded Resource (use `encode_resource_bytes` helper) - pub fn new(capacity_bytes: usize, resource_bytes: Vec) -> Self { - Self { - buf: ProtoBuffer::with_capacity(capacity_bytes), - resource_bytes, - state: EncoderState::Idle, - resource_logs_placeholder: None, - scope_logs_placeholder: None, - current_scope_name: None, - } - } - - /// Get the current buffer size in bytes. - #[inline] - pub fn len(&self) -> usize { - self.buf.len() - } - - /// Check if the buffer is empty. - #[inline] - pub fn is_empty(&self) -> bool { - self.buf.is_empty() - } - - /// Encode a tracing Event, using its metadata target as the scope name. - /// - /// This method automatically handles batching: - /// - If scope (target) matches the current batch, the LogRecord is appended - /// - If scope differs, the current ScopeLogs is closed and a new one started - pub fn encode_event(&mut self, event: &Event<'_>) { - let scope_name = event.metadata().target(); - self.encode_event_with_scope(event, scope_name); - } - - /// Encode a tracing Event with an explicit scope name. - pub fn encode_event_with_scope(&mut self, event: &Event<'_>, scope_name: &str) { - match self.state { - EncoderState::Idle => { - self.start_resource_logs(); - self.start_scope_logs(scope_name); - self.append_log_record(event); - } - EncoderState::ResourceOpen => { - self.start_scope_logs(scope_name); - self.append_log_record(event); - } - EncoderState::ScopeOpen => { - if self.current_scope_name.as_deref() == Some(scope_name) { - // Same scope - just append - self.append_log_record(event); - } else { - // Different scope - close current and start new - self.close_scope_logs(); - self.start_scope_logs(scope_name); - self.append_log_record(event); - } - } - } - } - - /// Flush the encoder, closing all open messages and returning the accumulated bytes. - /// - /// After flushing, the encoder is reset and ready for new messages. - pub fn flush(&mut self) -> Bytes { - // Close any open messages - if self.state == EncoderState::ScopeOpen { - self.close_scope_logs(); - } - if self.state == EncoderState::ResourceOpen { - self.close_resource_logs(); - } - - // Take the bytes - let (bytes, capacity) = self.buf.take_into_bytes(); - - // Reset state - self.state = EncoderState::Idle; - self.resource_logs_placeholder = None; - self.scope_logs_placeholder = None; - self.current_scope_name = None; - - // Preserve capacity for next use - self.buf.ensure_capacity(capacity); - - bytes - } - - // === Private methods === - - fn start_resource_logs(&mut self) { - // LogsData.resource_logs field (tag 1, length-delimited) - self.buf.encode_field_tag(LOGS_DATA_RESOURCE, wire_types::LEN); - - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // Copy pre-encoded Resource bytes - self.buf.extend_from_slice(&self.resource_bytes); - - self.resource_logs_placeholder = Some(placeholder); - self.state = EncoderState::ResourceOpen; - } - - fn start_scope_logs(&mut self, scope_name: &str) { - // ResourceLogs.scope_logs field (tag 2, length-delimited) - self.buf.encode_field_tag(RESOURCE_LOGS_SCOPE_LOGS, wire_types::LEN); - - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // Encode ScopeLogs.scope field (InstrumentationScope message) - self.encode_instrumentation_scope(scope_name); - - self.scope_logs_placeholder = Some(placeholder); - self.current_scope_name = Some(scope_name.to_string()); - self.state = EncoderState::ScopeOpen; - } - - fn encode_instrumentation_scope(&mut self, scope_name: &str) { - // ScopeLogs.scope field (tag 1, length-delimited) - self.buf.encode_field_tag(SCOPE_LOG_SCOPE, wire_types::LEN); - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // InstrumentationScope.name field (tag 1, string) - self.buf.encode_string(INSTRUMENTATION_SCOPE_NAME, scope_name); - - placeholder.patch(&mut self.buf); - } - - fn append_log_record(&mut self, event: &Event<'_>) { - // ScopeLogs.log_records field (tag 2, length-delimited) - self.buf.encode_field_tag(SCOPE_LOGS_LOG_RECORDS, wire_types::LEN); - - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(&mut self.buf); - - // Encode the LogRecord content directly - let mut encoder = DirectLogRecordEncoder::new(&mut self.buf); - let _ = encoder.encode_event(event); - - placeholder.patch(&mut self.buf); - } - - fn close_scope_logs(&mut self) { - if let Some(placeholder) = self.scope_logs_placeholder.take() { - placeholder.patch(&mut self.buf); - self.state = EncoderState::ResourceOpen; - self.current_scope_name = None; - } - } - - fn close_resource_logs(&mut self) { - if let Some(placeholder) = self.resource_logs_placeholder.take() { - placeholder.patch(&mut self.buf); - self.state = EncoderState::Idle; - } - } -} - -/// Helper to pre-encode a Resource as OTLP bytes. -/// -/// The returned bytes include the ResourceLogs.resource field tag and length, -/// ready to be copied directly into the encoder buffer. -/// -/// # Example -/// -/// ```ignore -/// use otap_df_pdata::proto::opentelemetry::resource::v1::Resource; -/// -/// let resource = Resource { -/// attributes: vec![ -/// KeyValue { key: "service.name".into(), value: Some(AnyValue { ... }) }, -/// ], -/// dropped_attributes_count: 0, -/// }; -/// let bytes = encode_resource_bytes(&resource); -/// let encoder = StatefulDirectEncoder::new(64 * 1024, bytes); -/// ``` -pub fn encode_resource_bytes_from_attrs(attributes: &[(&str, &str)]) -> Vec { - use otap_df_pdata::proto::consts::field_num::resource::RESOURCE_ATTRIBUTES; - - let mut buf = ProtoBuffer::with_capacity(256); - - // ResourceLogs.resource field (tag 1, length-delimited) - buf.encode_field_tag(1, wire_types::LEN); - let resource_placeholder = LengthPlaceholder::new(buf.len()); - encode_len_placeholder(&mut buf); - - // Encode each attribute as Resource.attributes (tag 1, KeyValue) - for (key, value) in attributes { - buf.encode_field_tag(RESOURCE_ATTRIBUTES, wire_types::LEN); - let kv_placeholder = LengthPlaceholder::new(buf.len()); - encode_len_placeholder(&mut buf); - - buf.encode_string(KEY_VALUE_KEY, key); - - buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); - let av_placeholder = LengthPlaceholder::new(buf.len()); - encode_len_placeholder(&mut buf); - - buf.encode_string(ANY_VALUE_STRING_VALUE, value); - - av_placeholder.patch(&mut buf); - kv_placeholder.patch(&mut buf); - } - - resource_placeholder.patch(&mut buf); - - buf.into_bytes().to_vec() -} - #[cfg(test)] mod tests { use super::*; @@ -713,38 +432,6 @@ mod tests { assert!(!buffer.is_empty()); } - #[test] - fn test_stateful_encoder_batching() { - let resource_bytes = encode_resource_bytes_from_attrs(&[ - ("service.name", "test-service"), - ]); - - let mut encoder = StatefulDirectEncoder::new(4096, resource_bytes); - - assert!(encoder.is_empty()); - assert_eq!(encoder.state, EncoderState::Idle); - - // We can't easily test with real tracing events, but we can verify the structure - // For now, just test flush on empty encoder - let bytes = encoder.flush(); - assert!(bytes.is_empty()); - } - - #[test] - fn test_encode_resource_bytes() { - let bytes = encode_resource_bytes_from_attrs(&[ - ("service.name", "my-service"), - ("service.version", "1.0.0"), - ]); - - // Should produce non-empty bytes - assert!(!bytes.is_empty()); - - // Bytes should start with field tag for ResourceLogs.resource - // Field 1, wire type 2 (LEN) = (1 << 3) | 2 = 0x0a - assert_eq!(bytes[0], 0x0a); - } - #[test] fn test_level_to_severity() { assert_eq!(level_to_severity_number(&Level::TRACE), 1); From 21961df7e47aaa30de682063b1187bc0aa291806 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 07:42:36 -0800 Subject: [PATCH 15/92] cleanup --- .../benchmarks/benches/self_tracing/main.rs | 146 +++++++------- .../telemetry/src/self_tracing/encoder.rs | 182 +++--------------- .../telemetry/src/self_tracing/formatter.rs | 160 ++++----------- .../crates/telemetry/src/self_tracing/mod.rs | 73 ++++--- 4 files changed, 184 insertions(+), 377 deletions(-) diff --git a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs index b380100ff6..cf7f2c9edb 100644 --- a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs +++ b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs @@ -1,8 +1,6 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -#![allow(missing_docs)] - //! Benchmarks for the compact log formatter. //! //! These benchmarks emit a single tracing event but perform N @@ -13,16 +11,13 @@ //! Example: `compact_encode/3_attrs/1000_events` = 300 µs → 300 ns per event use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; -use tracing::{Event, Level, Subscriber}; +use tracing::{Event, Subscriber}; use tracing_subscriber::layer::Layer; use tracing_subscriber::prelude::*; use tracing_subscriber::registry::LookupSpan; -use otap_df_telemetry::self_tracing::{ - CallsiteCache, CompactLogRecord, encode_body_and_attrs, format_log_record, -}; - -use std::time::{SystemTime, UNIX_EPOCH}; +use otap_df_pdata::otlp::ProtoBuffer; +use otap_df_telemetry::self_tracing::{DirectLogRecordEncoder, ConsoleWriter, LogRecord, SavedCallsite}; #[cfg(not(windows))] use tikv_jemallocator::Jemalloc; @@ -47,8 +42,8 @@ where { fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { for _ in 0..self.iterations { - let bytes = encode_body_and_attrs(event); - let _ = std::hint::black_box(bytes); + let record = LogRecord::new(event); + let _ = std::hint::black_box(record); } } } @@ -61,11 +56,11 @@ fn bench_encode(c: &mut Criterion) { BenchmarkId::new("3_attrs", format!("{}_events", iterations)), iterations, |b, &iters| { - b.iter(|| { - let layer = EncodeOnlyLayer::new(iters); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); + let layer = EncodeOnlyLayer::new(iters); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + b.iter(|| { tracing::dispatcher::with_default(&dispatch, || { tracing::info!( key1 = "value1", @@ -99,35 +94,13 @@ where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - let metadata = event.metadata(); - - // Build cache with this callsite - let mut cache = CallsiteCache::new(); - cache.register(metadata); - // Encode once - let body_attrs_bytes = encode_body_and_attrs(event); - let timestamp_ns = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - let record = CompactLogRecord { - callsite_id: metadata.callsite(), - timestamp_ns, - severity_number: match *metadata.level() { - Level::TRACE => 1, - Level::DEBUG => 5, - Level::INFO => 9, - Level::WARN => 13, - Level::ERROR => 17, - }, - severity_text: metadata.level().as_str(), - body_attrs_bytes, - }; + let record = LogRecord::new(event); + let writer = ConsoleWriter::no_color(); + let callsite = SavedCallsite::new(event.metadata()); for _ in 0..self.iterations { - let line = format_log_record(&record, &cache, true); + let line = writer.format_log_record(&record, &callsite); let _ = std::hint::black_box(line); } } @@ -179,35 +152,13 @@ where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - let metadata = event.metadata(); - - // Build cache with this callsite - let mut cache = CallsiteCache::new(); - cache.register(metadata); - // Encode + format N times for _ in 0..self.iterations { - let body_attrs_bytes = encode_body_and_attrs(event); - let timestamp_ns = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - let record = CompactLogRecord { - callsite_id: metadata.callsite(), - timestamp_ns, - severity_number: match *metadata.level() { - Level::TRACE => 1, - Level::DEBUG => 5, - Level::INFO => 9, - Level::WARN => 13, - Level::ERROR => 17, - }, - severity_text: metadata.level().as_str(), - body_attrs_bytes, - }; - - let line = format_log_record(&record, &cache, true); + let record = LogRecord::new(event); + let writer = ConsoleWriter::no_color(); + let callsite = SavedCallsite::new(event.metadata()); + + let line = writer.format_log_record(&record, &callsite); let _ = std::hint::black_box(line); } } @@ -244,6 +195,65 @@ fn bench_encode_and_format(c: &mut Criterion) { group.finish(); } +struct EncodeFullLayer { + iterations: usize, +} + +impl EncodeFullLayer { + fn new(iterations: usize) -> Self { + Self { iterations } + } +} + +impl Layer for EncodeFullLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { + // Encode + format N times + let mut buf = ProtoBuffer::new(); + let mut encoder = DirectLogRecordEncoder::new(&mut buf); + let callsite = SavedCallsite::new(event.metadata()); + + for _ in 0..self.iterations { + encoder.clear(); + let size = encoder.encode_log_record(LogRecord::new(event), &callsite); + let _ = std::hint::black_box(size); + } + } +} + +fn bench_encode_full(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_full"); + + for iterations in [100, 1000].iter() { + let _ = group.bench_with_input( + BenchmarkId::new("3_attrs", format!("{}_events", iterations)), + iterations, + |b, &iters| { + b.iter(|| { + let layer = EncodeFullLayer::new(iters); + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + + tracing::dispatcher::with_default(&dispatch, || { + tracing::info!( + key1 = "value1", + key2 = 42, + key3 = true, + "Benchmark message" + ); + }); + + let _ = std::hint::black_box(()); + }) + }, + ); + } + + group.finish(); +} + fn bench_encode_attrs(c: &mut Criterion) { let mut group = c.benchmark_group("encode_attrs"); let iterations = 1000; @@ -315,7 +325,7 @@ mod bench_entry { criterion_group!( name = benches; config = Criterion::default(); - targets = bench_encode, bench_format, bench_encode_and_format, bench_encode_attrs + targets = bench_encode, bench_format, bench_encode_and_format, bench_encode_full, bench_encode_attrs ); } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index b5727c95f6..bb374e1b58 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -4,8 +4,8 @@ //! Direct OTLP bytes encoder for tokio-tracing events. use std::fmt::Write as FmtWrite; -use std::time::{SystemTime, UNIX_EPOCH}; -use tracing::{Event, Level}; +use tracing::Level; +use super::{LogRecord, SavedCallsite}; use otap_df_pdata::otlp::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; use otap_df_pdata::proto::consts::{field_num::common::*, field_num::logs::*, wire_types}; @@ -35,20 +35,6 @@ impl LengthPlaceholder { } } -/// Wrapper for ProtoBuffer for formatting of Debug values without -/// allocating an intermediate String. -struct ProtoBufferWriter<'a> { - buf: &'a mut ProtoBuffer, -} - -impl FmtWrite for ProtoBufferWriter<'_> { - #[inline] - fn write_str(&mut self, s: &str) -> std::fmt::Result { - self.buf.extend_from_slice(s.as_bytes()); - Ok(()) - } -} - /// Direct encoder that writes a single LogRecord from a tracing Event. pub struct DirectLogRecordEncoder<'buf> { buf: &'buf mut ProtoBuffer, @@ -61,80 +47,50 @@ impl<'buf> DirectLogRecordEncoder<'buf> { Self { buf } } + /// Reset the underlying buffer. + pub fn clear(&mut self) { + self.buf.clear(); + } + /// Encode a tracing Event as a complete LogRecord message. /// - /// This writes all LogRecord fields directly to the buffer: - /// - time_unix_nano (field 1) - /// - severity_number (field 2) - /// - severity_text (field 3) - /// - body (field 5) - from the "message" field - /// - attributes (field 6) - from all other fields - /// /// Returns the number of bytes written. - pub fn encode_event(&mut self, event: &Event<'_>) -> usize { + pub fn encode_log_record(&mut self, record: LogRecord, callsite: &SavedCallsite) -> usize { let start_len = self.buf.len(); - // Get timestamp - let timestamp_nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - let metadata = event.metadata(); - // Encode time_unix_nano (field 1, fixed64) self.buf.encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); - self.buf.extend_from_slice(×tamp_nanos.to_le_bytes()); - - // Encode severity_number (field 2, varint) - let severity = level_to_severity_number(metadata.level()); - self.buf.encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); - self.buf.encode_varint(severity as u64); - - // Encode severity_text (field 3, string) - self.buf.encode_string(LOG_RECORD_SEVERITY_TEXT, metadata.level().as_str()); - - // Now visit fields to encode body and attributes - let mut visitor = DirectFieldVisitor::new(self.buf); - event.record(&mut visitor); - - self.buf.len() - start_len - } + self.buf.extend_from_slice(&record.timestamp_ns.to_le_bytes()); - /// Encode a tracing Event with a custom timestamp. - pub fn encode_event_with_timestamp(&mut self, event: &Event<'_>, timestamp_nanos: u64) -> usize { - let start_len = self.buf.len(); - let metadata = event.metadata(); - - // Encode time_unix_nano (field 1, fixed64) - self.buf.encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); - self.buf.extend_from_slice(×tamp_nanos.to_le_bytes()); + // Note: the next two fields could be pre-encoded by Level // Encode severity_number (field 2, varint) - let severity = level_to_severity_number(metadata.level()); + let severity = level_to_severity_number(&callsite.level); self.buf.encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); self.buf.encode_varint(severity as u64); // Encode severity_text (field 3, string) - self.buf.encode_string(LOG_RECORD_SEVERITY_TEXT, metadata.level().as_str()); - - // Now visit fields to encode body and attributes - let mut visitor = DirectFieldVisitor::new(self.buf); - event.record(&mut visitor); + self.buf.encode_string(LOG_RECORD_SEVERITY_TEXT, callsite.level.as_str()); + + self.buf.extend_from_slice(&record.body_attrs_bytes); self.buf.len() - start_len } } /// Visitor that directly encodes tracing fields to protobuf. -/// -/// This is the core of the zero-allocation design: instead of collecting -/// field values into an intermediate data structure, we encode them directly -/// to the protobuf buffer as we visit them. pub struct DirectFieldVisitor<'buf> { buf: &'buf mut ProtoBuffer, } +impl<'buf> FmtWrite for DirectFieldVisitor<'buf> { + #[inline] + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.buf.extend_from_slice(s.as_bytes()); + Ok(()) + } +} + impl<'buf> DirectFieldVisitor<'buf> { /// Create a new DirectFieldVisitor that writes to the provided buffer. pub fn new(buf: &'buf mut ProtoBuffer) -> Self { @@ -256,8 +212,7 @@ impl<'buf> DirectFieldVisitor<'buf> { encode_len_placeholder(self.buf); // Write Debug output directly to buffer - let mut writer = ProtoBufferWriter { buf: self.buf }; - let _ = write!(writer, "{:?}", value); + let _ = write!(self, "{:?}", value); string_placeholder.patch(self.buf); body_placeholder.patch(self.buf); @@ -285,8 +240,7 @@ impl<'buf> DirectFieldVisitor<'buf> { encode_len_placeholder(self.buf); // Write Debug output directly to buffer - let mut writer = ProtoBufferWriter { buf: self.buf }; - let _ = write!(writer, "{:?}", value); + let _ = write!(self, "{:?}", value); string_placeholder.patch(self.buf); av_placeholder.patch(self.buf); @@ -346,7 +300,7 @@ impl tracing::field::Visit for DirectFieldVisitor<'_> { /// /// See: https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber #[inline] -fn level_to_severity_number(level: &Level) -> i32 { +pub fn level_to_severity_number(level: &Level) -> u8 { match *level { Level::TRACE => 1, Level::DEBUG => 5, @@ -355,89 +309,3 @@ fn level_to_severity_number(level: &Level) -> i32 { Level::ERROR => 17, } } - -#[cfg(test)] -mod tests { - use super::*; - use tracing_subscriber::prelude::*; - use tracing_subscriber::layer::Layer; - use tracing_subscriber::registry::LookupSpan; - use tracing::Subscriber; - use std::sync::Mutex; - - /// Simple layer that uses DirectLogRecordEncoder (thread-safe for tests) - struct DirectEncoderLayer { - // Thread-local buffer - each event encodes to this - buffer: Mutex, - // Collected encoded bytes - encoded: Mutex>>, - } - - impl DirectEncoderLayer { - fn new() -> Self { - Self { - buffer: Mutex::new(ProtoBuffer::with_capacity(4096)), - encoded: Mutex::new(Vec::new()), - } - } - } - - impl Layer for DirectEncoderLayer - where - S: Subscriber + for<'a> LookupSpan<'a>, - { - fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - let mut buffer = self.buffer.lock().unwrap(); - buffer.clear(); - - let mut encoder = DirectLogRecordEncoder::new(&mut buffer); - let _ = encoder.encode_event(event); - - // Save a copy of the encoded bytes - self.encoded.lock().unwrap().push(buffer.as_ref().to_vec()); - } - } - - #[test] - fn test_direct_encoder_captures_events() { - let layer = DirectEncoderLayer::new(); - - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - let _guard = tracing::dispatcher::set_default(&dispatch); - - tracing::info!("Test message"); - tracing::warn!(count = 42, "Warning with attribute"); - - // Drop the guard to stop capturing - drop(_guard); - - // Note: We can't easily get the layer back from dispatch to verify results - // The test verifies that the encoding path doesn't panic - } - - #[test] - fn test_direct_encoder_encodes_attributes() { - let mut buffer = ProtoBuffer::with_capacity(1024); - - // We can't easily create a tracing::Event in tests, so we'll just verify - // the attribute encoding helpers work correctly - let mut visitor = DirectFieldVisitor::new(&mut buffer); - visitor.encode_string_attribute("test_key", "test_value"); - visitor.encode_int_attribute("count", 42); - visitor.encode_bool_attribute("enabled", true); - visitor.encode_double_attribute("ratio", 3.14); - - // Buffer should have content - assert!(!buffer.is_empty()); - } - - #[test] - fn test_level_to_severity() { - assert_eq!(level_to_severity_number(&Level::TRACE), 1); - assert_eq!(level_to_severity_number(&Level::DEBUG), 5); - assert_eq!(level_to_severity_number(&Level::INFO), 9); - assert_eq!(level_to_severity_number(&Level::WARN), 13); - assert_eq!(level_to_severity_number(&Level::ERROR), 17); - } -} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index e73dacefa7..d089f7052a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -3,22 +3,18 @@ //! An alternative to Tokio fmt::layer(). +use super::{LogRecord, SavedCallsite}; use bytes::Bytes; -use std::io::Write; -use std::sync::RwLock; -use std::time::{SystemTime, UNIX_EPOCH}; use otap_df_pdata::proto::consts::field_num::logs::{LOG_RECORD_ATTRIBUTES, LOG_RECORD_BODY}; use otap_df_pdata::proto::consts::wire_types; use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; use otap_df_pdata::views::otlp::bytes::decode::read_varint; +use std::io::Write; use tracing::span::{Attributes, Record}; use tracing::{Event, Level, Subscriber}; use tracing_subscriber::layer::{Context, Layer as TracingLayer}; use tracing_subscriber::registry::LookupSpan; -use otap_df_pdata::otlp::ProtoBuffer; -use super::encoder::DirectFieldVisitor; -use super::{CallsiteMap, LogRecord}; /// Console formatter writes to stdout or stderr. #[derive(Debug)] @@ -29,8 +25,7 @@ pub struct ConsoleWriter { /// A minimal formatting layer that outputs log records to stdout/stderr. /// /// This is a lightweight alternative to `tracing_subscriber::fmt::layer()`. -pub struct Layer { - callsites: RwLock, +pub struct RawLayer { writer: ConsoleWriter, } @@ -44,24 +39,10 @@ const ANSI_MAGENTA: &str = "\x1b[35m"; const ANSI_DIM: &str = "\x1b[2m"; const ANSI_BOLD: &str = "\x1b[1m"; -impl Layer { +impl RawLayer { /// Return a new fomatting layer with associated writer. pub fn new(writer: ConsoleWriter) -> Self { - Self { - callsites: RwLock::new(CallsiteMap::new()), - writer, - } - } - - /// Convert tracing Level to OTLP severity number. - fn level_to_severity(level: &Level) -> u8 { - match *level { - Level::TRACE => 1, - Level::DEBUG => 5, - Level::INFO => 9, - Level::WARN => 13, - Level::ERROR => 17, - } + Self { writer } } } @@ -79,25 +60,20 @@ impl ConsoleWriter { /// Format a InternalLogRecord as a human-readable string. /// /// Output format: `2026-01-06T10:30:45.123Z INFO target::name (file.rs:42): body [attr=value, ...]` - pub fn format_log_record(&self, record: &LogRecord, map: &CallsiteMap) -> String { - let callsite = map.get(&record.callsite_id); - - let event_name = match callsite { - Some(cs) => Self::format_event_name(cs.target, cs.name, cs.file, cs.line), - None => "".to_string(), - }; + pub fn format_log_record(&self, record: &LogRecord, callsite: &SavedCallsite) -> String { + let event_name = Self::format_event_name(callsite); let body_attrs = Self::format_body_attrs(&record.body_attrs_bytes); if self.use_ansi { - let level_color = Self::level_color(record.severity_level); + let level_color = Self::level_color(callsite.level); format!( "{}{}{} {}{:5}{} {}{}{}: {}\n", ANSI_DIM, Self::format_timestamp(record.timestamp_ns), ANSI_RESET, level_color, - record.severity_text, + callsite.level.as_str(), ANSI_RESET, ANSI_BOLD, event_name, @@ -108,7 +84,7 @@ impl ConsoleWriter { format!( "{} {:5} {}: {}\n", Self::format_timestamp(record.timestamp_ns), - record.severity_text, + callsite.level.as_str(), event_name, body_attrs, ) @@ -119,15 +95,12 @@ impl ConsoleWriter { /// /// Format: "target::name (file.rs:42)" or "target::name" if file/line unavailable. #[inline] - fn format_event_name( - target: &str, - name: &str, - file: Option<&str>, - line: Option, - ) -> String { - match (file, line) { - (Some(file), Some(line)) => format!("{}::{} ({}:{})", target, name, file, line), - _ => format!("{}::{}", target, name), + fn format_event_name(callsite: &SavedCallsite) -> String { + match (callsite.file, callsite.line) { + (Some(file), Some(line)) => { + format!("{}::{} ({}:{})", callsite.target, callsite.name, file, line) + } + _ => format!("{}::{}", callsite.target, callsite.name), } } @@ -248,9 +221,6 @@ impl ConsoleWriter { } /// Format an AnyValue for display. - /// - /// This is based on the same logic used in `otlp_bytes_formatter.rs`, providing - /// consistent formatting across the crate. fn format_any_value<'a>(value: &impl AnyValueView<'a>) -> String { match value.value_type() { ValueType::String => { @@ -321,9 +291,14 @@ impl ConsoleWriter { } /// Write a log line - fn write_line(&self, level: u8, line: &str) { - // Ignore erorr - let _error = if level >= 13 { + fn write_line(&self, level: &Level, line: &str) { + let use_stderr = match *level { + Level::ERROR => true, + Level::WARN => true, + _ => false, + }; + // Ignore error from write() + let _ = if use_stderr { std::io::stderr().write(line.as_bytes()) } else { std::io::stdout().write(line.as_bytes()) @@ -332,62 +307,31 @@ impl ConsoleWriter { /// Get ANSI color code for a severity level. #[inline] - fn level_color(level: u8) -> &'static str { - if level >= 17 { - ANSI_RED - } else if level >= 13 { - ANSI_YELLOW - } else if level >= 9 { - ANSI_GREEN - } else if level >= 5 { - ANSI_BLUE - } else { - ANSI_MAGENTA + fn level_color(level: &Level) -> &'static str { + match *level { + Level::ERROR => ANSI_RED, + Level::WARN => ANSI_YELLOW, + Level::INFO => ANSI_GREEN, + Level::DEBUG => ANSI_BLUE, + Level::TRACE => ANSI_MAGENTA, } } } -// ============================================================================ -// Layer Implementation -// ============================================================================ - -impl TracingLayer for Layer +impl TracingLayer for RawLayer where S: Subscriber + for<'a> LookupSpan<'a>, { - fn register_callsite( - &self, - metadata: &'static tracing::Metadata<'static>, - ) -> tracing::subscriber::Interest { - self.callsites.write().unwrap().register(metadata); - tracing::subscriber::Interest::always() - } - fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { let metadata = event.metadata(); - // Encode body and attributes to bytes - let body_attrs_bytes = encode_body_and_attrs(event); - - // Get current timestamp - let timestamp_ns = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - // Build compact record - let record = LogRecord { - callsite_id: metadata.callsite(), - timestamp_ns, - severity_level: Self::level_to_severity(metadata.level()), - severity_text: metadata.level().as_str(), - body_attrs_bytes, - }; + let record = LogRecord::new(event); // Format and write immediately - let map = self.callsites.read().unwrap(); - let line = self.writer.format_log_record(&record, &map); - self.writer.write_line(record.severity_level, &line); + let callsite = SavedCallsite::new(metadata); + let line = self.writer.format_log_record(&record, &callsite); + self.writer.write_line(callsite.level, &line); } fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &tracing::span::Id, _ctx: Context<'_, S>) { @@ -411,17 +355,6 @@ where } } -/// Encode only body and attributes from an event to OTLP bytes. -pub fn encode_body_and_attrs(event: &Event<'_>) -> Bytes { - let mut buf = ProtoBuffer::with_capacity(256); - - // Visit fields to encode body (field 5) and attributes (field 6) - let mut visitor = DirectFieldVisitor::new(&mut buf); - event.record(&mut visitor); - - buf.into_bytes() -} - // ============================================================================ // Tests // ============================================================================ @@ -454,21 +387,6 @@ mod tests { assert_eq!(ConsoleWriter::days_to_ymd(19723), (2024, 1, 1)); } - #[test] - fn test_level_to_severity() { - assert_eq!(Layer::level_to_severity(&Level::TRACE), 1); - assert_eq!(Layer::level_to_severity(&Level::DEBUG), 5); - assert_eq!(Layer::level_to_severity(&Level::INFO), 9); - assert_eq!(Layer::level_to_severity(&Level::WARN), 13); - assert_eq!(Layer::level_to_severity(&Level::ERROR), 17); - } - - #[test] - fn test_callsites() { - let map = CallsiteMap::new(); - assert!(map.callsites.is_empty()); - } - #[test] fn test_simple_writer_creation() { let _stdout = ConsoleWriter::color(); @@ -477,14 +395,14 @@ mod tests { #[test] fn test_formatter_layer_creation() { - let _color = Layer::new(ConsoleWriter::color()); - let _nocolor = Layer::new(ConsoleWriter::no_color()); + let _color = RawLayer::new(ConsoleWriter::color()); + let _nocolor = RawLayer::new(ConsoleWriter::no_color()); } #[test] fn test_layer_integration() { // Create the layer and subscriber - let layer = Layer::new(ConsoleWriter::no_color()); + let layer = RawLayer::new(ConsoleWriter::no_color()); let subscriber = tracing_subscriber::registry().with(layer); // Set as default for this thread temporarily diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index d69e5cad1d..2b855c1d1d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -1,11 +1,9 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//! Log encoding and formatting for Tokio tracing events. This module -//! stores pre-calculated encodings for the LogRecord event_name and -//! avoids unnecessary encoding work for primitive fields (e.g., timestamp). +//! Log encoding and formatting for Tokio tracing events. //! -//! The intermediate representation is InternalLogRecord, includes the +//! The intermediate representation is LogRecord, includes the //! primitive fields and static references. The remaining data are //! placed in a partial OTLP encoding. @@ -13,10 +11,12 @@ pub mod encoder; pub mod formatter; use bytes::Bytes; -use std::collections::HashMap; +use std::time::{SystemTime, UNIX_EPOCH}; use tracing::callsite::Identifier; +use tracing::{Level, Metadata}; -pub use formatter::{ConsoleWriter, Layer as RawLoggingLayer}; +pub use formatter::{ConsoleWriter, RawLayer as RawLoggingLayer}; +pub use encoder::DirectLogRecordEncoder; /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] @@ -27,12 +27,6 @@ pub struct LogRecord { /// Timestamp in UNIX epoch nanoseconds pub timestamp_ns: u64, - /// Severity level, OpenTelemetry defined - pub severity_level: u8, - - /// Severity text - pub severity_text: &'static str, - /// Pre-encoded body and attributes pub body_attrs_bytes: Bytes, } @@ -51,33 +45,50 @@ pub struct SavedCallsite { /// Source line pub line: Option, -} -/// Map callsite information by `Identifier`. -#[derive(Debug, Default)] -pub struct CallsiteMap { - callsites: HashMap, + /// Severity level + pub level: &'static Level, } -impl CallsiteMap { - /// Create a new empty cache. - pub fn new() -> Self { - Self::default() - } - - /// Register a callsite from its metadata. - pub fn register(&mut self, metadata: &'static tracing::Metadata<'static>) { - let id = metadata.callsite(); - let _ = self.callsites.entry(id).or_insert_with(|| SavedCallsite { +impl SavedCallsite { + /// Construct saved callsite information from tracing Metadata. + pub fn new(metadata: &'static Metadata<'static>) -> Self { + Self { + level: metadata.level(), target: metadata.target(), name: metadata.name(), file: metadata.file(), line: metadata.line(), - }); + } + } +} + +use encoder::DirectFieldVisitor; +use otap_df_pdata::otlp::ProtoBuffer; +use tracing::Event; + +impl LogRecord { + /// Construct a log record, partially encoding its dynamic content. + pub fn new(event: &Event<'_>) -> Self { + let metadata = event.metadata(); + + // Encode body and attributes to bytes + let mut buf = ProtoBuffer::with_capacity(256); + let mut visitor = DirectFieldVisitor::new(&mut buf); + event.record(&mut visitor); + + Self { + callsite_id: metadata.callsite(), + timestamp_ns: Self::get_timestamp_nanos(), + body_attrs_bytes: buf.into_bytes(), + } } - /// Get cached callsite info by identifier. - pub fn get(&self, id: &Identifier) -> Option<&SavedCallsite> { - self.callsites.get(id) + /// Get current timestamp in UNIX epoch nanoseconds. + fn get_timestamp_nanos() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64 } } From 8f8595c06848bb07e82305d588f4e9d38a12475a Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 08:54:41 -0800 Subject: [PATCH 16/92] refactor bench --- .../benchmarks/benches/self_tracing/main.rs | 387 ++++++------------ 1 file changed, 127 insertions(+), 260 deletions(-) diff --git a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs index cf7f2c9edb..7c82dfe933 100644 --- a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs +++ b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs @@ -8,7 +8,7 @@ //! //! Benchmark names follow the pattern: `group/description/N_events` //! -//! Example: `compact_encode/3_attrs/1000_events` = 300 µs → 300 ns per event +//! Example: `encode/3_attrs/1000_events` = 300 µs → 300 ns per event use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use tracing::{Event, Subscriber}; @@ -17,7 +17,9 @@ use tracing_subscriber::prelude::*; use tracing_subscriber::registry::LookupSpan; use otap_df_pdata::otlp::ProtoBuffer; -use otap_df_telemetry::self_tracing::{DirectLogRecordEncoder, ConsoleWriter, LogRecord, SavedCallsite}; +use otap_df_telemetry::self_tracing::{ + ConsoleWriter, DirectLogRecordEncoder, LogRecord, SavedCallsite, +}; #[cfg(not(windows))] use tikv_jemallocator::Jemalloc; @@ -26,296 +28,161 @@ use tikv_jemallocator::Jemalloc; #[global_allocator] static GLOBAL: Jemalloc = Jemalloc; -struct EncodeOnlyLayer { +/// The operation to perform on each event within the layer. +#[derive(Clone, Copy)] +enum BenchOp { + /// Encode the event into a LogRecord only. + Encode, + /// Encode once, then format N times. + Format, + /// Encode and format together N times. + EncodeAndFormat, + /// Encode to protobuf N times. + EncodeProto, +} + +/// A layer that performs a configurable operation N times per event. +struct BenchLayer { iterations: usize, + op: BenchOp, } -impl EncodeOnlyLayer { - fn new(iterations: usize) -> Self { - Self { iterations } +impl BenchLayer { + fn new(iterations: usize, op: BenchOp) -> Self { + Self { iterations, op } } } -impl Layer for EncodeOnlyLayer +impl Layer for BenchLayer where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - for _ in 0..self.iterations { - let record = LogRecord::new(event); - let _ = std::hint::black_box(record); + match self.op { + BenchOp::Encode => { + for _ in 0..self.iterations { + let record = LogRecord::new(event); + let _ = std::hint::black_box(record); + } + } + BenchOp::Format => { + // Encode once, format N times + let record = LogRecord::new(event); + let writer = ConsoleWriter::no_color(); + let callsite = SavedCallsite::new(event.metadata()); + + for _ in 0..self.iterations { + let line = writer.format_log_record(&record, &callsite); + let _ = std::hint::black_box(line); + } + } + BenchOp::EncodeAndFormat => { + let writer = ConsoleWriter::no_color(); + + for _ in 0..self.iterations { + let record = LogRecord::new(event); + let callsite = SavedCallsite::new(event.metadata()); + let line = writer.format_log_record(&record, &callsite); + let _ = std::hint::black_box(line); + } + } + BenchOp::EncodeProto => { + let mut buf = ProtoBuffer::new(); + let mut encoder = DirectLogRecordEncoder::new(&mut buf); + let callsite = SavedCallsite::new(event.metadata()); + + for _ in 0..self.iterations { + encoder.clear(); + let size = encoder.encode_log_record(LogRecord::new(event), &callsite); + let _ = std::hint::black_box(size); + } + } } } } -fn bench_encode(c: &mut Criterion) { - let mut group = c.benchmark_group("encode"); - - for iterations in [100, 1000].iter() { - let _ = group.bench_with_input( - BenchmarkId::new("3_attrs", format!("{}_events", iterations)), - iterations, - |b, &iters| { - let layer = EncodeOnlyLayer::new(iters); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - - b.iter(|| { - tracing::dispatcher::with_default(&dispatch, || { - tracing::info!( - key1 = "value1", - key2 = 42, - key3 = true, - "Benchmark message" - ); - }); - - let _ = std::hint::black_box(()); - }) - }, - ); - } - - group.finish(); -} - -struct FormatOnlyLayer { - iterations: usize, -} - -impl FormatOnlyLayer { - fn new(iterations: usize) -> Self { - Self { iterations } - } -} - -impl Layer for FormatOnlyLayer +/// Macro to generate benchmark functions for different attribute counts. +/// Each variant emits a consistent log statement for fair comparison. +macro_rules! emit_log { + (0) => { + tracing::info!("benchmark message") + }; + (3) => { + tracing::info!( + attr_str = "value", + attr_int = 42, + attr_bool = true, + "benchmark message" + ) + }; + (10) => { + tracing::info!( + attr_str1 = "string1", + attr_bool1 = true, + attr_str2 = "string2", + attr_float1 = 3.14, + attr_int1 = 42i64, + attr_str3 = "string3", + attr_bool2 = false, + attr_float2 = 2.718, + attr_int2 = 100u64, + attr_str4 = "string4", + "benchmark message" + ) + }; +} + +/// Run a benchmark with the given layer, invoking the log emitter. +fn run_bench(b: &mut criterion::Bencher, layer: L, emit: F) where - S: Subscriber + for<'a> LookupSpan<'a>, + L: Layer + 'static, + F: Fn(), { - fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - // Encode once - let record = LogRecord::new(event); - let writer = ConsoleWriter::no_color(); - let callsite = SavedCallsite::new(event.metadata()); - - for _ in 0..self.iterations { - let line = writer.format_log_record(&record, &callsite); - let _ = std::hint::black_box(line); - } - } -} + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); -fn bench_format(c: &mut Criterion) { - let mut group = c.benchmark_group("format"); - - for iterations in [100, 1000].iter() { - let _ = group.bench_with_input( - BenchmarkId::new("3_attrs", format!("{}_events", iterations)), - iterations, - |b, &iters| { - b.iter(|| { - let layer = FormatOnlyLayer::new(iters); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - - tracing::dispatcher::with_default(&dispatch, || { - tracing::info!( - key1 = "value1", - key2 = 42, - key3 = true, - "Benchmark message" - ); - }); - - let _ = std::hint::black_box(()); - }) - }, - ); - } - - group.finish(); + b.iter(|| { + tracing::dispatcher::with_default(&dispatch, &emit); + std::hint::black_box(()); + }); } -struct EncodeFormatLayer { - iterations: usize, -} +/// Benchmark a specific operation across different iteration counts. +fn bench_op(c: &mut Criterion, group_name: &str, op: BenchOp) { + let mut group = c.benchmark_group(group_name); -impl EncodeFormatLayer { - fn new(iterations: usize) -> Self { - Self { iterations } - } -} - -impl Layer for EncodeFormatLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, -{ - fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - // Encode + format N times - for _ in 0..self.iterations { - let record = LogRecord::new(event); - let writer = ConsoleWriter::no_color(); - let callsite = SavedCallsite::new(event.metadata()); + for &iterations in &[100, 1000] { + for &(attr_count, attr_label) in &[(0, "0_attrs"), (3, "3_attrs"), (10, "10_attrs")] { + let id = BenchmarkId::new(attr_label, format!("{}_events", iterations)); - let line = writer.format_log_record(&record, &callsite); - let _ = std::hint::black_box(line); + group.bench_with_input(id, &iterations, |b, &iters| { + let layer = BenchLayer::new(iters, op); + match attr_count { + 0 => run_bench(b, layer, || emit_log!(0)), + 3 => run_bench(b, layer, || emit_log!(3)), + _ => run_bench(b, layer, || emit_log!(10)), + } + }); } } -} - -fn bench_encode_and_format(c: &mut Criterion) { - let mut group = c.benchmark_group("encode_and_format"); - - for iterations in [100, 1000].iter() { - let _ = group.bench_with_input( - BenchmarkId::new("3_attrs", format!("{}_events", iterations)), - iterations, - |b, &iters| { - b.iter(|| { - let layer = EncodeFormatLayer::new(iters); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - - tracing::dispatcher::with_default(&dispatch, || { - tracing::info!( - key1 = "value1", - key2 = 42, - key3 = true, - "Benchmark message" - ); - }); - - let _ = std::hint::black_box(()); - }) - }, - ); - } group.finish(); } -struct EncodeFullLayer { - iterations: usize, -} - -impl EncodeFullLayer { - fn new(iterations: usize) -> Self { - Self { iterations } - } +fn bench_encode(c: &mut Criterion) { + bench_op(c, "encode", BenchOp::Encode); } -impl Layer for EncodeFullLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, -{ - fn on_event(&self, event: &Event<'_>, _ctx: tracing_subscriber::layer::Context<'_, S>) { - // Encode + format N times - let mut buf = ProtoBuffer::new(); - let mut encoder = DirectLogRecordEncoder::new(&mut buf); - let callsite = SavedCallsite::new(event.metadata()); - - for _ in 0..self.iterations { - encoder.clear(); - let size = encoder.encode_log_record(LogRecord::new(event), &callsite); - let _ = std::hint::black_box(size); - } - } +fn bench_format(c: &mut Criterion) { + bench_op(c, "format", BenchOp::Format); } -fn bench_encode_full(c: &mut Criterion) { - let mut group = c.benchmark_group("encode_full"); - - for iterations in [100, 1000].iter() { - let _ = group.bench_with_input( - BenchmarkId::new("3_attrs", format!("{}_events", iterations)), - iterations, - |b, &iters| { - b.iter(|| { - let layer = EncodeFullLayer::new(iters); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - - tracing::dispatcher::with_default(&dispatch, || { - tracing::info!( - key1 = "value1", - key2 = 42, - key3 = true, - "Benchmark message" - ); - }); - - let _ = std::hint::black_box(()); - }) - }, - ); - } - - group.finish(); +fn bench_encode_and_format(c: &mut Criterion) { + bench_op(c, "encode_and_format", BenchOp::EncodeAndFormat); } -fn bench_encode_attrs(c: &mut Criterion) { - let mut group = c.benchmark_group("encode_attrs"); - let iterations = 1000; - - // No attributes - let _ = group.bench_function("0_attrs/1000_events", |b| { - b.iter(|| { - let layer = EncodeOnlyLayer::new(iterations); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - - tracing::dispatcher::with_default(&dispatch, || { - tracing::info!("message only"); - }); - - let _ = std::hint::black_box(()); - }) - }); - - // 3 attributes - let _ = group.bench_function("3_attrs/1000_events", |b| { - b.iter(|| { - let layer = EncodeOnlyLayer::new(iterations); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - - tracing::dispatcher::with_default(&dispatch, || { - tracing::info!(a1 = "value", a2 = 42, a3 = true, "with 3 attributes"); - }); - - let _ = std::hint::black_box(()); - }) - }); - - // 10 attributes - let _ = group.bench_function("10_attrs/1000_events", |b| { - b.iter(|| { - let layer = EncodeOnlyLayer::new(iterations); - let subscriber = tracing_subscriber::registry().with(layer); - let dispatch = tracing::Dispatch::new(subscriber); - - tracing::dispatcher::with_default(&dispatch, || { - tracing::info!( - a1 = "string1", - a2 = true, - a3 = "string2", - a4 = 3.14, - a5 = 42i64, - a6 = "string3", - a7 = false, - a8 = 2.718, - a9 = 100u64, - a10 = "string4", - "with 10 attributes" - ); - }); - - let _ = std::hint::black_box(()); - }) - }); - - group.finish(); +fn bench_encode_proto(c: &mut Criterion) { + bench_op(c, "encode_proto", BenchOp::EncodeProto); } #[allow(missing_docs)] @@ -325,7 +192,7 @@ mod bench_entry { criterion_group!( name = benches; config = Criterion::default(); - targets = bench_encode, bench_format, bench_encode_and_format, bench_encode_full, bench_encode_attrs + targets = bench_encode, bench_format, bench_encode_and_format, bench_encode_proto ); } From f9a65a5b677942de96768e2f55e4424684a607f6 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 08:54:53 -0800 Subject: [PATCH 17/92] use proto encoder macro --- .../telemetry/src/self_tracing/encoder.rs | 290 ++++++++---------- 1 file changed, 131 insertions(+), 159 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index bb374e1b58..dbc24e67c3 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -4,36 +4,13 @@ //! Direct OTLP bytes encoder for tokio-tracing events. use std::fmt::Write as FmtWrite; -use tracing::Level; -use super::{LogRecord, SavedCallsite}; -use otap_df_pdata::otlp::{ProtoBuffer, encode_len_placeholder, patch_len_placeholder}; -use otap_df_pdata::proto::consts::{field_num::common::*, field_num::logs::*, wire_types}; -/// Position marker for a length-delimited field that needs patching. -/// -/// TODO: This would belong in otap_df_pdata::otlp, for use in place -/// of directly calling encode_len_placeholder, patch_len_placeholder, -/// except we should use the macros defined there instead. Remove. -#[derive(Debug, Clone, Copy)] -pub struct LengthPlaceholder { - /// Position in buffer where the 4-byte length placeholder starts - position: usize, -} - -impl LengthPlaceholder { - /// Create a new placeholder at the current buffer position. - #[inline] - pub fn new(position: usize) -> Self { - Self { position } - } +use otap_df_pdata::otlp::ProtoBuffer; +use otap_df_pdata::proto::consts::{field_num::common::*, field_num::logs::*, wire_types}; +use otap_df_pdata::proto_encode_len_delimited_unknown_size; +use tracing::Level; - /// Patch the placeholder with the actual content length. - #[inline] - pub fn patch(self, buf: &mut ProtoBuffer) { - let content_len = buf.len() - self.position - 4; - patch_len_placeholder(buf, 4, content_len, self.position); - } -} +use super::{LogRecord, SavedCallsite}; /// Direct encoder that writes a single LogRecord from a tracing Event. pub struct DirectLogRecordEncoder<'buf> { @@ -57,23 +34,27 @@ impl<'buf> DirectLogRecordEncoder<'buf> { /// Returns the number of bytes written. pub fn encode_log_record(&mut self, record: LogRecord, callsite: &SavedCallsite) -> usize { let start_len = self.buf.len(); - + // Encode time_unix_nano (field 1, fixed64) - self.buf.encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); - self.buf.extend_from_slice(&record.timestamp_ns.to_le_bytes()); + self.buf + .encode_field_tag(LOG_RECORD_TIME_UNIX_NANO, wire_types::FIXED64); + self.buf + .extend_from_slice(&record.timestamp_ns.to_le_bytes()); // Note: the next two fields could be pre-encoded by Level - + // Encode severity_number (field 2, varint) let severity = level_to_severity_number(&callsite.level); - self.buf.encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); + self.buf + .encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); self.buf.encode_varint(severity as u64); - + // Encode severity_text (field 3, string) - self.buf.encode_string(LOG_RECORD_SEVERITY_TEXT, callsite.level.as_str()); + self.buf + .encode_string(LOG_RECORD_SEVERITY_TEXT, callsite.level.as_str()); self.buf.extend_from_slice(&record.body_attrs_bytes); - + self.buf.len() - start_len } } @@ -83,14 +64,6 @@ pub struct DirectFieldVisitor<'buf> { buf: &'buf mut ProtoBuffer, } -impl<'buf> FmtWrite for DirectFieldVisitor<'buf> { - #[inline] - fn write_str(&mut self, s: &str) -> std::fmt::Result { - self.buf.extend_from_slice(s.as_bytes()); - Ok(()) - } -} - impl<'buf> DirectFieldVisitor<'buf> { /// Create a new DirectFieldVisitor that writes to the provided buffer. pub fn new(buf: &'buf mut ProtoBuffer) -> Self { @@ -100,151 +73,150 @@ impl<'buf> DirectFieldVisitor<'buf> { /// Encode an attribute (KeyValue message) with a string value. #[inline] pub fn encode_string_attribute(&mut self, key: &str, value: &str) { - - // KeyValue message as LOG_RECORD_ATTRIBUTES field (tag 6) - self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); - let kv_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // KeyValue.key (field 1, string) - self.buf.encode_string(KEY_VALUE_KEY, key); - - // KeyValue.value (field 2, AnyValue message) - self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); - let av_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // AnyValue.string_value (field 1, string) - self.buf.encode_string(ANY_VALUE_STRING_VALUE, value); - - av_placeholder.patch(self.buf); - kv_placeholder.patch(self.buf); + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_ATTRIBUTES, + { + self.buf.encode_string(KEY_VALUE_KEY, key); + proto_encode_len_delimited_unknown_size!( + KEY_VALUE_VALUE, + { + self.buf.encode_string(ANY_VALUE_STRING_VALUE, value); + }, + self.buf + ); + }, + self.buf + ); } /// Encode an attribute with an i64 value. #[inline] pub fn encode_int_attribute(&mut self, key: &str, value: i64) { - self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); - let kv_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - self.buf.encode_string(KEY_VALUE_KEY, key); - - self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); - let av_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // AnyValue.int_value (field 3, varint) - self.buf.encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); - self.buf.encode_varint(value as u64); - - av_placeholder.patch(self.buf); - kv_placeholder.patch(self.buf); + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_ATTRIBUTES, + { + self.buf.encode_string(KEY_VALUE_KEY, key); + proto_encode_len_delimited_unknown_size!( + KEY_VALUE_VALUE, + { + self.buf + .encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); + self.buf.encode_varint(value as u64); + }, + self.buf + ); + }, + self.buf + ); } /// Encode an attribute with a bool value. #[inline] pub fn encode_bool_attribute(&mut self, key: &str, value: bool) { - self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); - let kv_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - self.buf.encode_string(KEY_VALUE_KEY, key); - - self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); - let av_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // AnyValue.bool_value (field 2, varint) - self.buf.encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); - self.buf.encode_varint(if value { 1 } else { 0 }); - - av_placeholder.patch(self.buf); - kv_placeholder.patch(self.buf); + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_ATTRIBUTES, + { + self.buf.encode_string(KEY_VALUE_KEY, key); + proto_encode_len_delimited_unknown_size!( + KEY_VALUE_VALUE, + { + self.buf + .encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); + self.buf.encode_varint(u64::from(value)); + }, + self.buf + ); + }, + self.buf + ); } /// Encode an attribute with a double value. #[inline] pub fn encode_double_attribute(&mut self, key: &str, value: f64) { - self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); - let kv_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - self.buf.encode_string(KEY_VALUE_KEY, key); - - self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); - let av_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // AnyValue.double_value (field 4, fixed64) - self.buf.encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); - self.buf.extend_from_slice(&value.to_le_bytes()); - - av_placeholder.patch(self.buf); - kv_placeholder.patch(self.buf); + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_ATTRIBUTES, + { + self.buf.encode_string(KEY_VALUE_KEY, key); + proto_encode_len_delimited_unknown_size!( + KEY_VALUE_VALUE, + { + self.buf + .encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); + self.buf.extend_from_slice(&value.to_le_bytes()); + }, + self.buf + ); + }, + self.buf + ); } /// Encode the body (AnyValue message) as a string. #[inline] pub fn encode_body_string(&mut self, value: &str) { - // LogRecord.body (field 5, AnyValue message) - self.buf.encode_field_tag(LOG_RECORD_BODY, wire_types::LEN); - let placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // AnyValue.string_value (field 1, string) - self.buf.encode_string(ANY_VALUE_STRING_VALUE, value); - - placeholder.patch(self.buf); + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_BODY, + { + self.buf.encode_string(ANY_VALUE_STRING_VALUE, value); + }, + self.buf + ); } /// Encode the body (AnyValue message) from a Debug value without allocation. #[inline] pub fn encode_body_debug(&mut self, value: &dyn std::fmt::Debug) { - // LogRecord.body (field 5, AnyValue message) - self.buf.encode_field_tag(LOG_RECORD_BODY, wire_types::LEN); - let body_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // AnyValue.string_value (field 1, string) - self.buf.encode_field_tag(ANY_VALUE_STRING_VALUE, wire_types::LEN); - let string_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // Write Debug output directly to buffer - let _ = write!(self, "{:?}", value); - - string_placeholder.patch(self.buf); - body_placeholder.patch(self.buf); + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_BODY, + { + encode_debug_string(self.buf, value); + }, + self.buf + ); } /// Encode an attribute with a Debug value without allocation. #[inline] pub fn encode_debug_attribute(&mut self, key: &str, value: &dyn std::fmt::Debug) { - // KeyValue message as LOG_RECORD_ATTRIBUTES field (tag 6) - self.buf.encode_field_tag(LOG_RECORD_ATTRIBUTES, wire_types::LEN); - let kv_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // KeyValue.key (field 1, string) - self.buf.encode_string(KEY_VALUE_KEY, key); - - // KeyValue.value (field 2, AnyValue message) - self.buf.encode_field_tag(KEY_VALUE_VALUE, wire_types::LEN); - let av_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // AnyValue.string_value (field 1, string) - self.buf.encode_field_tag(ANY_VALUE_STRING_VALUE, wire_types::LEN); - let string_placeholder = LengthPlaceholder::new(self.buf.len()); - encode_len_placeholder(self.buf); - - // Write Debug output directly to buffer - let _ = write!(self, "{:?}", value); - - string_placeholder.patch(self.buf); - av_placeholder.patch(self.buf); - kv_placeholder.patch(self.buf); + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_ATTRIBUTES, + { + self.buf.encode_string(KEY_VALUE_KEY, key); + proto_encode_len_delimited_unknown_size!( + KEY_VALUE_VALUE, + { + encode_debug_string(self.buf, value); + }, + self.buf + ); + }, + self.buf + ); + } +} + +/// Helper to encode a Debug value as a protobuf string field. +/// This is separate from DirectFieldVisitor to avoid borrow conflicts with the macro. +#[inline] +fn encode_debug_string(buf: &mut ProtoBuffer, value: &dyn std::fmt::Debug) { + proto_encode_len_delimited_unknown_size!( + ANY_VALUE_STRING_VALUE, + { + let _ = write!(DebugWriter(buf), "{:?}", value); + }, + buf + ); +} + +/// Wrapper that implements fmt::Write for a ProtoBuffer. +struct DebugWriter<'a>(&'a mut ProtoBuffer); + +impl FmtWrite for DebugWriter<'_> { + #[inline] + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.0.extend_from_slice(s.as_bytes()); + Ok(()) } } From b95aea2f39f054efdfd31565a913b1e401beae61 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 09:03:34 -0800 Subject: [PATCH 18/92] ftb --- rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs index 7c82dfe933..1db275dca8 100644 --- a/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs +++ b/rust/otap-dataflow/benchmarks/benches/self_tracing/main.rs @@ -133,9 +133,9 @@ macro_rules! emit_log { } /// Run a benchmark with the given layer, invoking the log emitter. -fn run_bench(b: &mut criterion::Bencher, layer: L, emit: F) +fn run_bench(b: &mut criterion::Bencher<'_>, layer: L, emit: F) where - L: Layer + 'static, + L: Layer + Send + Sync + 'static, F: Fn(), { let subscriber = tracing_subscriber::registry().with(layer); @@ -155,7 +155,7 @@ fn bench_op(c: &mut Criterion, group_name: &str, op: BenchOp) { for &(attr_count, attr_label) in &[(0, "0_attrs"), (3, "3_attrs"), (10, "10_attrs")] { let id = BenchmarkId::new(attr_label, format!("{}_events", iterations)); - group.bench_with_input(id, &iterations, |b, &iters| { + let _ = group.bench_with_input(id, &iterations, |b, &iters| { let layer = BenchLayer::new(iters, op); match attr_count { 0 => run_bench(b, layer, || emit_log!(0)), From 82f5cbee57e46181943afad7c5b332dc2ac70cd7 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 09:34:26 -0800 Subject: [PATCH 19/92] nicer --- .../otap-dataflow/crates/telemetry/Cargo.toml | 1 + .../telemetry/src/self_tracing/formatter.rs | 365 ++++++++++-------- 2 files changed, 196 insertions(+), 170 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/Cargo.toml b/rust/otap-dataflow/crates/telemetry/Cargo.toml index 85ea4f1e31..06633d338e 100644 --- a/rust/otap-dataflow/crates/telemetry/Cargo.toml +++ b/rust/otap-dataflow/crates/telemetry/Cargo.toml @@ -24,6 +24,7 @@ otap-df-config = { workspace = true } axum = { workspace = true } bytes = { workspace = true } +chrono = { workspace = true } flume = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index d089f7052a..608e4ad557 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -5,19 +5,23 @@ use super::{LogRecord, SavedCallsite}; use bytes::Bytes; +use chrono::{DateTime, Datelike, Timelike, Utc}; use otap_df_pdata::proto::consts::field_num::logs::{LOG_RECORD_ATTRIBUTES, LOG_RECORD_BODY}; use otap_df_pdata::proto::consts::wire_types; use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; use otap_df_pdata::views::otlp::bytes::decode::read_varint; -use std::io::Write; +use std::io::{Cursor, Write}; use tracing::span::{Attributes, Record}; use tracing::{Event, Level, Subscriber}; use tracing_subscriber::layer::{Context, Layer as TracingLayer}; use tracing_subscriber::registry::LookupSpan; +/// Default buffer size for log formatting. +pub const LOG_BUFFER_SIZE: usize = 4096; + /// Console formatter writes to stdout or stderr. -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct ConsoleWriter { use_ansi: bool, } @@ -30,141 +34,157 @@ pub struct RawLayer { } // ANSI color codes -const ANSI_RESET: &str = "\x1b[0m"; -const ANSI_RED: &str = "\x1b[31m"; -const ANSI_YELLOW: &str = "\x1b[33m"; -const ANSI_GREEN: &str = "\x1b[32m"; -const ANSI_BLUE: &str = "\x1b[34m"; -const ANSI_MAGENTA: &str = "\x1b[35m"; -const ANSI_DIM: &str = "\x1b[2m"; -const ANSI_BOLD: &str = "\x1b[1m"; +const ANSI_RESET: &[u8] = b"\x1b[0m"; +const ANSI_RED: &[u8] = b"\x1b[31m"; +const ANSI_YELLOW: &[u8] = b"\x1b[33m"; +const ANSI_GREEN: &[u8] = b"\x1b[32m"; +const ANSI_BLUE: &[u8] = b"\x1b[34m"; +const ANSI_MAGENTA: &[u8] = b"\x1b[35m"; +const ANSI_DIM: &[u8] = b"\x1b[2m"; +const ANSI_BOLD: &[u8] = b"\x1b[1m"; impl RawLayer { - /// Return a new fomatting layer with associated writer. + /// Return a new formatting layer with associated writer. pub fn new(writer: ConsoleWriter) -> Self { Self { writer } } } +/// Type alias for a cursor over a byte buffer. +/// Uses `std::io::Cursor` for position tracking with `std::io::Write`. +pub type BufWriter<'a> = Cursor<&'a mut [u8]>; + impl ConsoleWriter { /// Create a writer that outputs to stdout without ANSI colors. pub fn no_color() -> Self { Self { use_ansi: false } } - /// Create a writer that outputs to stderr without ANSI colors. + /// Create a writer that outputs to stderr with ANSI colors. pub fn color() -> Self { Self { use_ansi: true } } - /// Format a InternalLogRecord as a human-readable string. + /// Format a LogRecord as a human-readable string (for testing/compatibility). /// /// Output format: `2026-01-06T10:30:45.123Z INFO target::name (file.rs:42): body [attr=value, ...]` pub fn format_log_record(&self, record: &LogRecord, callsite: &SavedCallsite) -> String { - let event_name = Self::format_event_name(callsite); + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let len = self.write_log_record(&mut buf, record, callsite); + // The buffer contains valid UTF-8 since we only write ASCII and valid UTF-8 strings + String::from_utf8_lossy(&buf[..len]).into_owned() + } - let body_attrs = Self::format_body_attrs(&record.body_attrs_bytes); + /// Write a LogRecord to a byte buffer. Returns the number of bytes written. + /// + /// This is the efficient path - no heap allocation, writes directly to the buffer. + pub fn write_log_record( + &self, + buf: &mut [u8], + record: &LogRecord, + callsite: &SavedCallsite, + ) -> usize { + let mut w = Cursor::new(buf); if self.use_ansi { - let level_color = Self::level_color(callsite.level); - format!( - "{}{}{} {}{:5}{} {}{}{}: {}\n", - ANSI_DIM, - Self::format_timestamp(record.timestamp_ns), - ANSI_RESET, - level_color, - callsite.level.as_str(), - ANSI_RESET, - ANSI_BOLD, - event_name, - ANSI_RESET, - body_attrs, - ) + let _ = w.write_all(ANSI_DIM); + Self::write_timestamp(&mut w, record.timestamp_ns); + let _ = w.write_all(ANSI_RESET); + let _ = w.write_all(b" "); + let _ = w.write_all(Self::level_color(callsite.level)); + Self::write_level(&mut w, callsite.level); + let _ = w.write_all(ANSI_RESET); + let _ = w.write_all(b" "); + let _ = w.write_all(ANSI_BOLD); + Self::write_event_name(&mut w, callsite); + let _ = w.write_all(ANSI_RESET); + let _ = w.write_all(b": "); } else { - format!( - "{} {:5} {}: {}\n", - Self::format_timestamp(record.timestamp_ns), - callsite.level.as_str(), - event_name, - body_attrs, - ) + Self::write_timestamp(&mut w, record.timestamp_ns); + let _ = w.write_all(b" "); + Self::write_level(&mut w, callsite.level); + let _ = w.write_all(b" "); + Self::write_event_name(&mut w, callsite); + let _ = w.write_all(b": "); } + + Self::write_body_attrs(&mut w, &record.body_attrs_bytes); + let _ = w.write_all(b"\n"); + + w.position() as usize } - /// Format callsite details as event_name string. - /// - /// Format: "target::name (file.rs:42)" or "target::name" if file/line unavailable. + /// Write level with padding. #[inline] - fn format_event_name(callsite: &SavedCallsite) -> String { - match (callsite.file, callsite.line) { - (Some(file), Some(line)) => { - format!("{}::{} ({}:{})", callsite.target, callsite.name, file, line) - } - _ => format!("{}::{}", callsite.target, callsite.name), - } + fn write_level(w: &mut BufWriter<'_>, level: &Level) { + let _ = match *level { + Level::TRACE => w.write_all(b"TRACE"), + Level::DEBUG => w.write_all(b"DEBUG"), + Level::INFO => w.write_all(b"INFO "), + Level::WARN => w.write_all(b"WARN "), + Level::ERROR => w.write_all(b"ERROR"), + }; } - /// Format nanosecond timestamp as ISO 8601 (UTC). - fn format_timestamp(nanos: u64) -> String { - let secs = nanos / 1_000_000_000; - let subsec_millis = (nanos % 1_000_000_000) / 1_000_000; - - // Convert to datetime components - // Days since Unix epoch - let days = secs / 86400; - let time_of_day = secs % 86400; - - let hours = time_of_day / 3600; - let minutes = (time_of_day % 3600) / 60; - let seconds = time_of_day % 60; - - // Calculate year/month/day from days since epoch (1970-01-01) - let (year, month, day) = Self::days_to_ymd(days as i64); + /// Write callsite details as event_name to buffer. + #[inline] + fn write_event_name(w: &mut BufWriter<'_>, callsite: &SavedCallsite) { + let _ = w.write_all(callsite.target.as_bytes()); + let _ = w.write_all(b"::"); + let _ = w.write_all(callsite.name.as_bytes()); + if let (Some(file), Some(line)) = (callsite.file, callsite.line) { + let _ = write!(w, " ({}:{})", file, line); + } + } - format!( - "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", - year, month, day, hours, minutes, seconds, subsec_millis - ) + /// Write nanosecond timestamp as ISO 8601 (UTC) to buffer. + #[inline] + fn write_timestamp(w: &mut BufWriter<'_>, nanos: u64) { + let secs = (nanos / 1_000_000_000) as i64; + let subsec_nanos = (nanos % 1_000_000_000) as u32; + + if let Some(dt) = DateTime::::from_timestamp(secs, subsec_nanos) { + let date = dt.date_naive(); + let time = dt.time(); + let millis = subsec_nanos / 1_000_000; + + let _ = write!( + w, + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", + date.year(), + date.month(), + date.day(), + time.hour(), + time.minute(), + time.second(), + millis + ); + } else { + let _ = w.write_all(b""); + } } - /// Convert days since Unix epoch to (year, month, day). - fn days_to_ymd(days: i64) -> (i32, u32, u32) { - // Algorithm from Howard Hinnant's date library - let z = days + 719468; - let era = if z >= 0 { z } else { z - 146096 } / 146097; - let doe = (z - era * 146097) as u32; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; - let y = yoe as i64 + era * 400; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let d = doy - (153 * mp + 2) / 5 + 1; - let m = if mp < 10 { mp + 3 } else { mp - 9 }; - let y = if m <= 2 { y + 1 } else { y }; - - (y as i32, m, d) + /// Format timestamp as String (for testing). + fn format_timestamp(nanos: u64) -> String { + let mut buf = [0u8; 32]; + let mut w = Cursor::new(&mut buf[..]); + Self::write_timestamp(&mut w, nanos); + let pos = w.position() as usize; + String::from_utf8_lossy(&buf[..pos]).into_owned() } - /// Format body+attrs bytes as readable string. - /// - /// Uses the pdata View types (`RawAnyValue`, `RawKeyValue`) for zero-copy - /// parsing of the OTLP protobuf bytes. This is consistent with the decoding - /// approach used in `otlp_bytes_formatter.rs`. - fn format_body_attrs(bytes: &Bytes) -> String { + /// Write body+attrs bytes to buffer. + fn write_body_attrs(w: &mut BufWriter<'_>, bytes: &Bytes) { if bytes.is_empty() { - return String::new(); + return; } - // The bytes contain LogRecord fields: - // - field 5 (LOG_RECORD_BODY): AnyValue message - // - field 6 (LOG_RECORD_ATTRIBUTES): repeated KeyValue messages - - let mut body_str = String::new(); - let mut attrs = Vec::new(); let data = bytes.as_ref(); let mut pos = 0; + let mut first_attr = true; + let mut has_attrs = false; while pos < data.len() { - // Read field tag let (tag, next_pos) = match read_varint(data, pos) { Some(v) => v, None => break, @@ -175,11 +195,9 @@ impl ConsoleWriter { let wire_type = tag & 0x7; if wire_type != wire_types::LEN { - // Skip non-length-delimited fields (shouldn't happen for body/attrs) break; } - // Read length-delimited content let (len, next_pos) = match read_varint(data, pos) { Some(v) => v, None => break, @@ -194,120 +212,123 @@ impl ConsoleWriter { let field_bytes = &data[pos..end]; if field_num == LOG_RECORD_BODY { - // Body: parse as AnyValue using pdata View let any_value = RawAnyValue::new(field_bytes); - body_str = Self::format_any_value(&any_value); + Self::write_any_value(w, &any_value); } else if field_num == LOG_RECORD_ATTRIBUTES { - // Attribute: parse as KeyValue using pdata View + if !has_attrs { + let _ = w.write_all(b" ["); + has_attrs = true; + } + + if !first_attr { + let _ = w.write_all(b", "); + } + first_attr = false; + let kv = RawKeyValue::new(field_bytes); - let key = String::from_utf8_lossy(kv.key()).to_string(); - let value = match kv.value() { - Some(v) => Self::format_any_value(&v), - None => "".to_string(), - }; - attrs.push(format!("{}={}", key, value)); + let _ = w.write_all(kv.key()); + let _ = w.write_all(b"="); + + match kv.value() { + Some(v) => Self::write_any_value(w, &v), + None => { + let _ = w.write_all(b""); + } + } } pos = end; } - if !attrs.is_empty() { - body_str.push_str(" ["); - body_str.push_str(&attrs.join(", ")); - body_str.push(']'); + if has_attrs { + let _ = w.write_all(b"]"); } - - body_str } - /// Format an AnyValue for display. - fn format_any_value<'a>(value: &impl AnyValueView<'a>) -> String { + /// Write an AnyValue to buffer. + fn write_any_value<'a>(w: &mut BufWriter<'_>, value: &impl AnyValueView<'a>) { match value.value_type() { ValueType::String => { if let Some(s) = value.as_string() { - String::from_utf8_lossy(s).to_string() - } else { - String::new() + let _ = w.write_all(s); } } ValueType::Int64 => { if let Some(i) = value.as_int64() { - i.to_string() - } else { - String::new() + let _ = write!(w, "{}", i); } } ValueType::Bool => { if let Some(b) = value.as_bool() { - b.to_string() - } else { - String::new() + let _ = w.write_all(if b { b"true" } else { b"false" }); } } ValueType::Double => { if let Some(d) = value.as_double() { - format!("{:.6}", d) - } else { - String::new() + let _ = write!(w, "{:.6}", d); } } ValueType::Bytes => { if let Some(bytes) = value.as_bytes() { - format!("{:?}", bytes) - } else { - String::new() + let _ = w.write_all(b"["); + for (i, b) in bytes.iter().enumerate() { + if i > 0 { + let _ = w.write_all(b", "); + } + let _ = write!(w, "{}", b); + } + let _ = w.write_all(b"]"); } } ValueType::Array => { + let _ = w.write_all(b"["); if let Some(array_iter) = value.as_array() { - let parts: Vec<_> = array_iter - .map(|item| Self::format_any_value(&item)) - .collect(); - format!("[{}]", parts.join(", ")) - } else { - "[]".to_string() + let mut first = true; + for item in array_iter { + if !first { + let _ = w.write_all(b", "); + } + first = false; + Self::write_any_value(w, &item); + } } + let _ = w.write_all(b"]"); } ValueType::KeyValueList => { + let _ = w.write_all(b"{"); if let Some(kvlist_iter) = value.as_kvlist() { - let parts: Vec<_> = kvlist_iter - .map(|kv| { - let key_str = String::from_utf8_lossy(kv.key()).to_string(); - match kv.value() { - Some(val) => { - format!("{}={}", key_str, Self::format_any_value(&val)) - } - None => key_str, - } - }) - .collect(); - format!("{{{}}}", parts.join(", ")) - } else { - "{}".to_string() + let mut first = true; + for kv in kvlist_iter { + if !first { + let _ = w.write_all(b", "); + } + first = false; + let _ = w.write_all(kv.key()); + if let Some(val) = kv.value() { + let _ = w.write_all(b"="); + Self::write_any_value(w, &val); + } + } } + let _ = w.write_all(b"}"); } - ValueType::Empty => String::new(), + ValueType::Empty => {} } } - /// Write a log line - fn write_line(&self, level: &Level, line: &str) { - let use_stderr = match *level { - Level::ERROR => true, - Level::WARN => true, - _ => false, - }; - // Ignore error from write() + /// Write a log line to stdout or stderr. + fn write_line(&self, level: &Level, data: &[u8]) { + let use_stderr = matches!(*level, Level::ERROR | Level::WARN); let _ = if use_stderr { - std::io::stderr().write(line.as_bytes()) + std::io::stderr().write_all(data) } else { - std::io::stdout().write(line.as_bytes()) + std::io::stdout().write_all(data) }; } /// Get ANSI color code for a severity level. #[inline] - fn level_color(level: &Level) -> &'static str { + fn level_color(level: &Level) -> &'static [u8] { match *level { Level::ERROR => ANSI_RED, Level::WARN => ANSI_YELLOW, @@ -328,10 +349,11 @@ where // Build compact record let record = LogRecord::new(event); - // Format and write immediately + // Allocate buffer on stack and format directly let callsite = SavedCallsite::new(metadata); - let line = self.writer.format_log_record(&record, &callsite); - self.writer.write_line(callsite.level, &line); + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let len = self.writer.write_log_record(&mut buf, &record, &callsite); + self.writer.write_line(callsite.level, &buf[..len]); } fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &tracing::span::Id, _ctx: Context<'_, S>) { @@ -379,12 +401,15 @@ mod tests { } #[test] - fn test_days_to_ymd() { - // 1970-01-01 is day 0 - assert_eq!(ConsoleWriter::days_to_ymd(0), (1970, 1, 1)); - - // 2024-01-01 is 19723 days after 1970-01-01 - assert_eq!(ConsoleWriter::days_to_ymd(19723), (2024, 1, 1)); + fn test_format_timestamp_edge_cases() { + // Unix epoch + let epoch = ConsoleWriter::format_timestamp(0); + assert_eq!(epoch, "1970-01-01T00:00:00.000Z"); + + // End of day with max milliseconds + let end_of_day: u64 = 86399 * 1_000_000_000 + 999_000_000; + let formatted = ConsoleWriter::format_timestamp(end_of_day); + assert_eq!(formatted, "1970-01-01T23:59:59.999Z"); } #[test] From cd4baee93a3d7116245f845509cc8436a0b2422b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 09:55:52 -0800 Subject: [PATCH 20/92] use view --- .../crates/pdata/src/views/otlp/bytes/logs.rs | 11 +++ .../telemetry/src/self_tracing/formatter.rs | 85 +++++-------------- 2 files changed, 31 insertions(+), 65 deletions(-) diff --git a/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/logs.rs b/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/logs.rs index 8795e14658..90f7ff18fa 100644 --- a/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/logs.rs +++ b/rust/otap-dataflow/crates/pdata/src/views/otlp/bytes/logs.rs @@ -153,6 +153,17 @@ pub struct RawLogRecord<'a> { bytes_parser: ProtoBytesParser<'a, LogFieldOffsets>, } +impl<'a> RawLogRecord<'a> { + /// Create a new instance of `RawLogRecord` from a byte slice containing + /// a serialized LogRecord message (or partial message with just body/attributes). + #[must_use] + pub fn new(buf: &'a [u8]) -> Self { + Self { + bytes_parser: ProtoBytesParser::new(buf), + } + } +} + /// Known field offsets within byte buffer for fields in ResourceLogs message pub struct LogFieldOffsets { scalar_fields: [Cell>; 13], diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 608e4ad557..0cd403f3d1 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -6,11 +6,9 @@ use super::{LogRecord, SavedCallsite}; use bytes::Bytes; use chrono::{DateTime, Datelike, Timelike, Utc}; -use otap_df_pdata::proto::consts::field_num::logs::{LOG_RECORD_ATTRIBUTES, LOG_RECORD_BODY}; -use otap_df_pdata::proto::consts::wire_types; use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; -use otap_df_pdata::views::otlp::bytes::common::{RawAnyValue, RawKeyValue}; -use otap_df_pdata::views::otlp::bytes::decode::read_varint; +use otap_df_pdata::views::logs::LogRecordView; +use otap_df_pdata::views::otlp::bytes::logs::RawLogRecord; use std::io::{Cursor, Write}; use tracing::span::{Attributes, Record}; use tracing::{Event, Level, Subscriber}; @@ -164,83 +162,40 @@ impl ConsoleWriter { } } - /// Format timestamp as String (for testing). - fn format_timestamp(nanos: u64) -> String { - let mut buf = [0u8; 32]; - let mut w = Cursor::new(&mut buf[..]); - Self::write_timestamp(&mut w, nanos); - let pos = w.position() as usize; - String::from_utf8_lossy(&buf[..pos]).into_owned() - } - - /// Write body+attrs bytes to buffer. + /// Write body+attrs bytes to buffer using LogRecordView. fn write_body_attrs(w: &mut BufWriter<'_>, bytes: &Bytes) { if bytes.is_empty() { return; } - let data = bytes.as_ref(); - let mut pos = 0; - let mut first_attr = true; - let mut has_attrs = false; - - while pos < data.len() { - let (tag, next_pos) = match read_varint(data, pos) { - Some(v) => v, - None => break, - }; - pos = next_pos; - - let field_num = tag >> 3; - let wire_type = tag & 0x7; - - if wire_type != wire_types::LEN { - break; - } - - let (len, next_pos) = match read_varint(data, pos) { - Some(v) => v, - None => break, - }; - pos = next_pos; - let end = pos + len as usize; - - if end > data.len() { - break; - } + // A partial protobuf message (just body + attributes) is still a valid message. + // We can use the RawLogRecord view to access just the fields we encoded. + let record = RawLogRecord::new(bytes.as_ref()); - let field_bytes = &data[pos..end]; - - if field_num == LOG_RECORD_BODY { - let any_value = RawAnyValue::new(field_bytes); - Self::write_any_value(w, &any_value); - } else if field_num == LOG_RECORD_ATTRIBUTES { - if !has_attrs { - let _ = w.write_all(b" ["); - has_attrs = true; - } + // Write body if present + if let Some(body) = record.body() { + Self::write_any_value(w, &body); + } - if !first_attr { + // Write attributes if present + let mut attrs = record.attributes().peekable(); + if attrs.peek().is_some() { + let _ = w.write_all(b" ["); + let mut first = true; + for attr in attrs { + if !first { let _ = w.write_all(b", "); } - first_attr = false; - - let kv = RawKeyValue::new(field_bytes); - let _ = w.write_all(kv.key()); + first = false; + let _ = w.write_all(attr.key()); let _ = w.write_all(b"="); - - match kv.value() { + match attr.value() { Some(v) => Self::write_any_value(w, &v), None => { let _ = w.write_all(b""); } } } - - pos = end; - } - - if has_attrs { let _ = w.write_all(b"]"); } } From f8f0eb75a6d3ce50b7c338c1501fd698d2af8115 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 10:10:27 -0800 Subject: [PATCH 21/92] ansi cleanup --- .../telemetry/src/self_tracing/formatter.rs | 86 +++++-------------- .../crates/telemetry/src/self_tracing/mod.rs | 19 ++-- 2 files changed, 33 insertions(+), 72 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 0cd403f3d1..7b742b8465 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -10,7 +10,7 @@ use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; use otap_df_pdata::views::logs::LogRecordView; use otap_df_pdata::views::otlp::bytes::logs::RawLogRecord; use std::io::{Cursor, Write}; -use tracing::span::{Attributes, Record}; +use tracing::span::Record; use tracing::{Event, Level, Subscriber}; use tracing_subscriber::layer::{Context, Layer as TracingLayer}; use tracing_subscriber::registry::LookupSpan; @@ -31,15 +31,15 @@ pub struct RawLayer { writer: ConsoleWriter, } -// ANSI color codes -const ANSI_RESET: &[u8] = b"\x1b[0m"; -const ANSI_RED: &[u8] = b"\x1b[31m"; -const ANSI_YELLOW: &[u8] = b"\x1b[33m"; -const ANSI_GREEN: &[u8] = b"\x1b[32m"; -const ANSI_BLUE: &[u8] = b"\x1b[34m"; -const ANSI_MAGENTA: &[u8] = b"\x1b[35m"; -const ANSI_DIM: &[u8] = b"\x1b[2m"; -const ANSI_BOLD: &[u8] = b"\x1b[1m"; +// ANSI SGR (Select Graphic Rendition) codes +const ANSI_RESET: u8 = 0; +const ANSI_BOLD: u8 = 1; +const ANSI_DIM: u8 = 2; +const ANSI_RED: u8 = 31; +const ANSI_GREEN: u8 = 32; +const ANSI_YELLOW: u8 = 33; +const ANSI_BLUE: u8 = 34; +const ANSI_MAGENTA: u8 = 35; impl RawLayer { /// Return a new formatting layer with associated writer. @@ -85,17 +85,17 @@ impl ConsoleWriter { let mut w = Cursor::new(buf); if self.use_ansi { - let _ = w.write_all(ANSI_DIM); + Self::write_ansi(&mut w, ANSI_DIM); Self::write_timestamp(&mut w, record.timestamp_ns); - let _ = w.write_all(ANSI_RESET); + Self::write_ansi(&mut w, ANSI_RESET); let _ = w.write_all(b" "); - let _ = w.write_all(Self::level_color(callsite.level)); + Self::write_ansi(&mut w, Self::level_color(callsite.level)); Self::write_level(&mut w, callsite.level); - let _ = w.write_all(ANSI_RESET); + Self::write_ansi(&mut w, ANSI_RESET); let _ = w.write_all(b" "); - let _ = w.write_all(ANSI_BOLD); + Self::write_ansi(&mut w, ANSI_BOLD); Self::write_event_name(&mut w, callsite); - let _ = w.write_all(ANSI_RESET); + Self::write_ansi(&mut w, ANSI_RESET); let _ = w.write_all(b": "); } else { Self::write_timestamp(&mut w, record.timestamp_ns); @@ -281,9 +281,15 @@ impl ConsoleWriter { }; } + /// Write an ANSI SGR escape sequence. + #[inline] + fn write_ansi(w: &mut BufWriter<'_>, code: u8) { + let _ = write!(w, "\x1b[{}m", code); + } + /// Get ANSI color code for a severity level. #[inline] - fn level_color(level: &Level) -> &'static [u8] { + fn level_color(level: &Level) -> u8 { match *level { Level::ERROR => ANSI_RED, Level::WARN => ANSI_YELLOW, @@ -310,26 +316,6 @@ where let len = self.writer.write_log_record(&mut buf, &record, &callsite); self.writer.write_line(callsite.level, &buf[..len]); } - - fn on_new_span(&self, _attrs: &Attributes<'_>, _id: &tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans - } - - fn on_record(&self, _span: &tracing::span::Id, _values: &Record<'_>, _ctx: Context<'_, S>) { - // Not handling spans - } - - fn on_enter(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans - } - - fn on_exit(&self, _id: &tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans - } - - fn on_close(&self, _id: tracing::span::Id, _ctx: Context<'_, S>) { - // Not handling spans - } } // ============================================================================ @@ -341,32 +327,6 @@ mod tests { use super::*; use tracing_subscriber::prelude::*; - #[test] - fn test_format_timestamp() { - // 2026-01-06T10:30:45.123Z in nanoseconds - // Let's use a known timestamp: 2024-01-01T00:00:00.000Z - let nanos: u64 = 1704067200 * 1_000_000_000; // 2024-01-01 00:00:00 UTC - let formatted = ConsoleWriter::format_timestamp(nanos); - assert_eq!(formatted, "2024-01-01T00:00:00.000Z"); - - // Test with milliseconds - let nanos_with_ms: u64 = 1704067200 * 1_000_000_000 + 123_000_000; - let formatted = ConsoleWriter::format_timestamp(nanos_with_ms); - assert_eq!(formatted, "2024-01-01T00:00:00.123Z"); - } - - #[test] - fn test_format_timestamp_edge_cases() { - // Unix epoch - let epoch = ConsoleWriter::format_timestamp(0); - assert_eq!(epoch, "1970-01-01T00:00:00.000Z"); - - // End of day with max milliseconds - let end_of_day: u64 = 86399 * 1_000_000_000 + 999_000_000; - let formatted = ConsoleWriter::format_timestamp(end_of_day); - assert_eq!(formatted, "1970-01-01T23:59:59.999Z"); - } - #[test] fn test_simple_writer_creation() { let _stdout = ConsoleWriter::color(); diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index 2b855c1d1d..22881eb7f5 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -11,27 +11,32 @@ pub mod encoder; pub mod formatter; use bytes::Bytes; +use encoder::DirectFieldVisitor; +use otap_df_pdata::otlp::ProtoBuffer; use std::time::{SystemTime, UNIX_EPOCH}; +use tracing::Event; use tracing::callsite::Identifier; use tracing::{Level, Metadata}; -pub use formatter::{ConsoleWriter, RawLayer as RawLoggingLayer}; pub use encoder::DirectLogRecordEncoder; +pub use formatter::{ConsoleWriter, RawLayer as RawLoggingLayer}; /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] pub struct LogRecord { - /// Callsite identifier used to look up cached callsite info + /// Callsite identifier used to look up cached callsite info. pub callsite_id: Identifier, - /// Timestamp in UNIX epoch nanoseconds + /// Timestamp in UNIX epoch nanoseconds. pub timestamp_ns: u64, - /// Pre-encoded body and attributes + /// Pre-encoded body and attributes in OTLP bytes. pub body_attrs_bytes: Bytes, } -/// Saved callsite information, populated via `register_callsite` hook. +/// Saved callsite information. This is information that can easily be +/// populated from Metadata, for example in a `register_callsite` hook +/// for building a map by Identifier. #[derive(Debug, Clone)] pub struct SavedCallsite { /// Target (e.g., module path) @@ -63,10 +68,6 @@ impl SavedCallsite { } } -use encoder::DirectFieldVisitor; -use otap_df_pdata::otlp::ProtoBuffer; -use tracing::Event; - impl LogRecord { /// Construct a log record, partially encoding its dynamic content. pub fn new(event: &Event<'_>) -> Self { From 92a9337ae4c39259d4e6817c1af6842e75d77440 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 10:19:30 -0800 Subject: [PATCH 22/92] handy --- .../telemetry/src/self_tracing/formatter.rs | 101 ++++++++++++++++-- 1 file changed, 95 insertions(+), 6 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 7b742b8465..20e5e5bccb 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -10,7 +10,6 @@ use otap_df_pdata::views::common::{AnyValueView, AttributeView, ValueType}; use otap_df_pdata::views::logs::LogRecordView; use otap_df_pdata::views::otlp::bytes::logs::RawLogRecord; use std::io::{Cursor, Write}; -use tracing::span::Record; use tracing::{Event, Level, Subscriber}; use tracing_subscriber::layer::{Context, Layer as TracingLayer}; use tracing_subscriber::registry::LookupSpan; @@ -281,7 +280,7 @@ impl ConsoleWriter { }; } - /// Write an ANSI SGR escape sequence. + /// Write an ANSI escape sequence. #[inline] fn write_ansi(w: &mut BufWriter<'_>, code: u8) { let _ = write!(w, "\x1b[{}m", code); @@ -318,10 +317,6 @@ where } } -// ============================================================================ -// Tests -// ============================================================================ - #[cfg(test)] mod tests { use super::*; @@ -356,4 +351,98 @@ mod tests { // The test verifies no panics occur; actual output goes to stderr } + + #[test] + fn test_log_format() { + // Test the formatter by capturing output through our layer + use std::sync::{Arc, Mutex}; + + let output: Arc> = Arc::new(Mutex::new(String::new())); + let output_clone = output.clone(); + + struct CaptureLayer { + output: Arc>, + } + impl TracingLayer for CaptureLayer + where + S: Subscriber + for<'a> LookupSpan<'a>, + { + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + let record = LogRecord::new(event); + let callsite = SavedCallsite::new(event.metadata()); + let writer = ConsoleWriter::no_color(); + *self.output.lock().unwrap() = writer.format_log_record(&record, &callsite); + } + } + + // Helper to strip timestamp (24 char timestamp + 2 spaces) + fn strip_ts(s: &str) -> &str { + s[26..].trim_end() + } + + let layer = CaptureLayer { output: output_clone }; + let subscriber = tracing_subscriber::registry().with(layer); + let dispatch = tracing::Dispatch::new(subscriber); + let _guard = tracing::dispatcher::set_default(&dispatch); + + // Test info + tracing::info!("hello world"); + let binding = output.lock().unwrap(); + let result = strip_ts(&binding); + assert!(result.starts_with("INFO "), "got: {}", result); + assert!(result.ends_with(": hello world"), "got: {}", result); + drop(binding); + + // Test warn with attribute + tracing::warn!(count = 42, "warning"); + let binding = output.lock().unwrap(); + let result = strip_ts(&binding); + assert!(result.starts_with("WARN "), "got: {}", result); + assert!(result.ends_with(": warning [count=42]"), "got: {}", result); + drop(binding); + + // Test error with string attribute + tracing::error!(msg = "oops", "failed"); + let binding = output.lock().unwrap(); + let result = strip_ts(&binding); + assert!(result.starts_with("ERROR"), "got: {}", result); + assert!(result.ends_with(": failed [msg=oops]"), "got: {}", result); + } + + #[test] + fn test_full_format_with_known_timestamp() { + use bytes::Bytes; + + let record = LogRecord { + callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), + // 2024-01-15T12:30:45.678Z + timestamp_ns: 1705321845_678_000_000, + body_attrs_bytes: Bytes::new(), + }; + + let callsite = SavedCallsite { + target: "my_crate::module", + name: "event", + file: Some("src/lib.rs"), + line: Some(42), + level: &Level::INFO, + }; + + let writer = ConsoleWriter::no_color(); + let output = writer.format_log_record(&record, &callsite); + + assert_eq!( + output, + "2024-01-15T12:30:45.678Z INFO my_crate::module::event (src/lib.rs:42): \n" + ); + } + + static TEST_CALLSITE: TestCallsite = TestCallsite; + struct TestCallsite; + impl tracing::Callsite for TestCallsite { + fn set_interest(&self, _: tracing::subscriber::Interest) {} + fn metadata(&self) -> &tracing::Metadata<'_> { + unimplemented!() + } + } } From d80edc66270f08343261fe3877d6c582122a394b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 10:49:59 -0800 Subject: [PATCH 23/92] nice --- rust/otap-dataflow/crates/pdata/src/lib.rs | 3 + .../telemetry/src/self_tracing/formatter.rs | 185 +++++++++++------- 2 files changed, 115 insertions(+), 73 deletions(-) diff --git a/rust/otap-dataflow/crates/pdata/src/lib.rs b/rust/otap-dataflow/crates/pdata/src/lib.rs index b023deb05b..30046807c9 100644 --- a/rust/otap-dataflow/crates/pdata/src/lib.rs +++ b/rust/otap-dataflow/crates/pdata/src/lib.rs @@ -38,6 +38,9 @@ mod validation; pub use decode::decoder::Consumer; pub use encode::producer::Producer; +/// Re-export prost for proto message encoding/decoding. +pub use prost; + /// TraceID identifier of a Trace #[derive(Eq, PartialEq, Clone, Copy, Debug, Default)] pub struct TraceID([u8; 16]); diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 20e5e5bccb..014704066d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -17,20 +17,18 @@ use tracing_subscriber::registry::LookupSpan; /// Default buffer size for log formatting. pub const LOG_BUFFER_SIZE: usize = 4096; -/// Console formatter writes to stdout or stderr. +/// Console writes formatted text to stdout or stderr. #[derive(Debug, Clone, Copy)] pub struct ConsoleWriter { use_ansi: bool, } -/// A minimal formatting layer that outputs log records to stdout/stderr. -/// -/// This is a lightweight alternative to `tracing_subscriber::fmt::layer()`. +/// A minimal alternative to `tracing_subscriber::fmt::layer()`. pub struct RawLayer { writer: ConsoleWriter, } -// ANSI SGR (Select Graphic Rendition) codes +// ANSI "Select Graphic Rendition" codes const ANSI_RESET: u8 = 0; const ANSI_BOLD: u8 = 1; const ANSI_DIM: u8 = 2; @@ -182,6 +180,9 @@ impl ConsoleWriter { let _ = w.write_all(b" ["); let mut first = true; for attr in attrs { + if Self::is_full(w) { + break; + } if !first { let _ = w.write_all(b", "); } @@ -199,6 +200,12 @@ impl ConsoleWriter { } } + /// Check if the buffer is full (position >= capacity). + #[inline] + fn is_full(w: &BufWriter<'_>) -> bool { + w.position() as usize >= w.get_ref().len() + } + /// Write an AnyValue to buffer. fn write_any_value<'a>(w: &mut BufWriter<'_>, value: &impl AnyValueView<'a>) { match value.value_type() { @@ -303,14 +310,12 @@ impl TracingLayer for RawLayer where S: Subscriber + for<'a> LookupSpan<'a>, { + // Allocates a buffer on the stack, formats the event to a LogRecord + // with partial OTLP bytes. fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let metadata = event.metadata(); - - // Build compact record let record = LogRecord::new(event); + let callsite = SavedCallsite::new(event.metadata()); - // Allocate buffer on stack and format directly - let callsite = SavedCallsite::new(metadata); let mut buf = [0u8; LOG_BUFFER_SIZE]; let len = self.writer.write_log_record(&mut buf, &record, &callsite); self.writer.write_line(callsite.level, &buf[..len]); @@ -320,72 +325,46 @@ where #[cfg(test)] mod tests { use super::*; + use bytes::Bytes; + use otap_df_pdata::prost::Message; + use otap_df_pdata::proto::opentelemetry::common::v1::{AnyValue, KeyValue}; + use otap_df_pdata::proto::opentelemetry::logs::v1::LogRecord as ProtoLogRecord; + use std::sync::{Arc, Mutex}; use tracing_subscriber::prelude::*; - #[test] - fn test_simple_writer_creation() { - let _stdout = ConsoleWriter::color(); - let _stderr = ConsoleWriter::no_color(); - } - - #[test] - fn test_formatter_layer_creation() { - let _color = RawLayer::new(ConsoleWriter::color()); - let _nocolor = RawLayer::new(ConsoleWriter::no_color()); + struct CaptureLayer { + output: Arc>, } - #[test] - fn test_layer_integration() { - // Create the layer and subscriber - let layer = RawLayer::new(ConsoleWriter::no_color()); - let subscriber = tracing_subscriber::registry().with(layer); - - // Set as default for this thread temporarily - let dispatch = tracing::Dispatch::new(subscriber); - let _guard = tracing::dispatcher::set_default(&dispatch); - - // Emit some events - these should be formatted and written to stderr - tracing::info!("Test info message"); - tracing::warn!(count = 42, "Warning with attribute"); - tracing::error!(error = "something failed", "Error occurred"); - - // The test verifies no panics occur; actual output goes to stderr + impl TracingLayer for CaptureLayer + where + S: Subscriber + for<'a> LookupSpan<'a>, + { + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + let record = LogRecord::new(event); + let callsite = SavedCallsite::new(event.metadata()); + let writer = ConsoleWriter::no_color(); + *self.output.lock().unwrap() = writer.format_log_record(&record, &callsite); + } } #[test] fn test_log_format() { - // Test the formatter by capturing output through our layer - use std::sync::{Arc, Mutex}; - let output: Arc> = Arc::new(Mutex::new(String::new())); let output_clone = output.clone(); - struct CaptureLayer { - output: Arc>, - } - impl TracingLayer for CaptureLayer - where - S: Subscriber + for<'a> LookupSpan<'a>, - { - fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let record = LogRecord::new(event); - let callsite = SavedCallsite::new(event.metadata()); - let writer = ConsoleWriter::no_color(); - *self.output.lock().unwrap() = writer.format_log_record(&record, &callsite); - } - } - - // Helper to strip timestamp (24 char timestamp + 2 spaces) + // strip timestamp and newline fn strip_ts(s: &str) -> &str { s[26..].trim_end() } - let layer = CaptureLayer { output: output_clone }; + let layer = CaptureLayer { + output: output_clone, + }; let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); let _guard = tracing::dispatcher::set_default(&dispatch); - // Test info tracing::info!("hello world"); let binding = output.lock().unwrap(); let result = strip_ts(&binding); @@ -393,7 +372,6 @@ mod tests { assert!(result.ends_with(": hello world"), "got: {}", result); drop(binding); - // Test warn with attribute tracing::warn!(count = 42, "warning"); let binding = output.lock().unwrap(); let result = strip_ts(&binding); @@ -401,7 +379,6 @@ mod tests { assert!(result.ends_with(": warning [count=42]"), "got: {}", result); drop(binding); - // Test error with string attribute tracing::error!(msg = "oops", "failed"); let binding = output.lock().unwrap(); let result = strip_ts(&binding); @@ -410,9 +387,7 @@ mod tests { } #[test] - fn test_full_format_with_known_timestamp() { - use bytes::Bytes; - + fn test_timestamp_format() { let record = LogRecord { callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), // 2024-01-15T12:30:45.678Z @@ -420,20 +395,69 @@ mod tests { body_attrs_bytes: Bytes::new(), }; - let callsite = SavedCallsite { - target: "my_crate::module", - name: "event", - file: Some("src/lib.rs"), - line: Some(42), - level: &Level::INFO, - }; - let writer = ConsoleWriter::no_color(); - let output = writer.format_log_record(&record, &callsite); + let output = writer.format_log_record(&record, &test_callsite()); assert_eq!( output, - "2024-01-15T12:30:45.678Z INFO my_crate::module::event (src/lib.rs:42): \n" + "2024-01-15T12:30:45.678Z INFO test_module::submodule::test_event (src/test.rs:123): \n" + ); + } + + #[test] + fn test_buffer_overflow() { + let mut attrs = Vec::new(); + for i in 0..200 { + attrs.push(KeyValue::new( + format!("attribute_key_{:03}", i), + AnyValue::new_string(format!("value_{:03}", i)), + )); + } + + let proto_record = ProtoLogRecord { + body: Some(AnyValue::new_string("This is the log message body")), + attributes: attrs, + ..Default::default() + }; + + let mut encoded = Vec::new(); + proto_record.encode(&mut encoded).unwrap(); + + let record = LogRecord { + callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), + timestamp_ns: 1705321845_678_000_000, + body_attrs_bytes: Bytes::from(encoded), + }; + + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let writer = ConsoleWriter::no_color(); + let len = writer.write_log_record(&mut buf, &record, &test_callsite()); + + // Fills exactly to capacity due to overflow. + // Note! we could append a ... or some other indicator. + assert_eq!(len, LOG_BUFFER_SIZE); + + // Verify the output starts correctly with timestamp and body + let output = std::str::from_utf8(&buf[..len]).unwrap(); + assert!( + output.starts_with("2024-01-15T12:30:45.678Z"), + "got: {}", + output + ); + assert!( + output.contains("This is the log message body"), + "got: {}", + output + ); + assert!( + output.contains("attribute_key_000=value_000"), + "got: {}", + output + ); + assert!( + output.contains("attribute_key_010=value_010"), + "got: {}", + output ); } @@ -442,7 +466,22 @@ mod tests { impl tracing::Callsite for TestCallsite { fn set_interest(&self, _: tracing::subscriber::Interest) {} fn metadata(&self) -> &tracing::Metadata<'_> { - unimplemented!() + &TEST_METADATA } } + + static TEST_METADATA: tracing::Metadata<'static> = tracing::Metadata::new( + "test_event", + "test_module::submodule", + Level::INFO, + Some("src/test.rs"), + Some(123), + Some("test_module::submodule"), + tracing::field::FieldSet::new(&[], tracing::callsite::Identifier(&TEST_CALLSITE)), + tracing::metadata::Kind::EVENT, + ); + + fn test_callsite() -> SavedCallsite { + SavedCallsite::new(&TEST_METADATA) + } } From 4e4b37f0b1dd71321ffbca97fe843306cab15cda Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 11:05:45 -0800 Subject: [PATCH 24/92] refactor a test --- .../telemetry/src/self_tracing/formatter.rs | 50 +++++++++++-------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 014704066d..0943d2fd2e 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -313,6 +313,9 @@ where // Allocates a buffer on the stack, formats the event to a LogRecord // with partial OTLP bytes. fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + // TODO: there are allocations implied here that we would prefer + // to avoid, it will be an extensive change in the ProtoBuffer to + // stack-allocate this temporary. let record = LogRecord::new(event); let callsite = SavedCallsite::new(event.metadata()); @@ -348,42 +351,47 @@ mod tests { } } + // strip timestamp and newline + fn strip_ts(s: &str) -> &str { + s[26..].trim_end() + } + + fn assert_log_format(output: &Arc>, expected_level: &str, expected_suffix: &str) { + let binding = output.lock().unwrap(); + let result = strip_ts(&binding); + assert!( + result.starts_with(expected_level), + "expected level '{}', got: {}", + expected_level, + result + ); + assert!( + result.ends_with(expected_suffix), + "expected suffix '{}', got: {}", + expected_suffix, + result + ); + } + #[test] fn test_log_format() { let output: Arc> = Arc::new(Mutex::new(String::new())); - let output_clone = output.clone(); - - // strip timestamp and newline - fn strip_ts(s: &str) -> &str { - s[26..].trim_end() - } let layer = CaptureLayer { - output: output_clone, + output: output.clone(), }; let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); let _guard = tracing::dispatcher::set_default(&dispatch); tracing::info!("hello world"); - let binding = output.lock().unwrap(); - let result = strip_ts(&binding); - assert!(result.starts_with("INFO "), "got: {}", result); - assert!(result.ends_with(": hello world"), "got: {}", result); - drop(binding); + assert_log_format(&output, "INFO ", ": hello world"); tracing::warn!(count = 42, "warning"); - let binding = output.lock().unwrap(); - let result = strip_ts(&binding); - assert!(result.starts_with("WARN "), "got: {}", result); - assert!(result.ends_with(": warning [count=42]"), "got: {}", result); - drop(binding); + assert_log_format(&output, "WARN ", ": warning [count=42]"); tracing::error!(msg = "oops", "failed"); - let binding = output.lock().unwrap(); - let result = strip_ts(&binding); - assert!(result.starts_with("ERROR"), "got: {}", result); - assert!(result.ends_with(": failed [msg=oops]"), "got: {}", result); + assert_log_format(&output, "ERROR", ": failed [msg=oops]"); } #[test] From 657524db238cc33882c591d4d08bd2fe14959512 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 11:12:56 -0800 Subject: [PATCH 25/92] ansi cleanup --- .../telemetry/src/self_tracing/formatter.rs | 144 ++++++++++-------- 1 file changed, 78 insertions(+), 66 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 0943d2fd2e..dda4d7d2f3 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -17,10 +17,69 @@ use tracing_subscriber::registry::LookupSpan; /// Default buffer size for log formatting. pub const LOG_BUFFER_SIZE: usize = 4096; +/// ANSI codes a.k.a. "Select Graphic Rendition" codes. +#[derive(Clone, Copy)] +#[repr(u8)] +enum AnsiCode { + Reset = 0, + Bold = 1, + Dim = 2, + Red = 31, + Green = 32, + Yellow = 33, + Blue = 34, + Magenta = 35, +} + +/// Color mode for console output. +#[derive(Debug, Clone, Copy)] +pub enum ColorMode { + /// Enable ANSI color codes. + Color, + /// Disable ANSI color codes. + NoColor, +} + +impl ColorMode { + /// Write an ANSI escape sequence (no-op for NoColor). + #[inline] + fn write_ansi(self, w: &mut BufWriter<'_>, code: AnsiCode) { + if let ColorMode::Color = self { + let _ = write!(w, "\x1b[{}m", code as u8); + } + } + + /// Write level with color and padding. + #[inline] + fn write_level(self, w: &mut BufWriter<'_>, level: &Level) { + self.write_ansi(w, Self::level_color(level)); + let _ = match *level { + Level::TRACE => w.write_all(b"TRACE"), + Level::DEBUG => w.write_all(b"DEBUG"), + Level::INFO => w.write_all(b"INFO "), + Level::WARN => w.write_all(b"WARN "), + Level::ERROR => w.write_all(b"ERROR"), + }; + self.write_ansi(w, AnsiCode::Reset); + } + + /// Get ANSI color code for a severity level. + #[inline] + fn level_color(level: &Level) -> AnsiCode { + match *level { + Level::ERROR => AnsiCode::Red, + Level::WARN => AnsiCode::Yellow, + Level::INFO => AnsiCode::Green, + Level::DEBUG => AnsiCode::Blue, + Level::TRACE => AnsiCode::Magenta, + } + } +} + /// Console writes formatted text to stdout or stderr. #[derive(Debug, Clone, Copy)] pub struct ConsoleWriter { - use_ansi: bool, + color_mode: ColorMode, } /// A minimal alternative to `tracing_subscriber::fmt::layer()`. @@ -28,16 +87,6 @@ pub struct RawLayer { writer: ConsoleWriter, } -// ANSI "Select Graphic Rendition" codes -const ANSI_RESET: u8 = 0; -const ANSI_BOLD: u8 = 1; -const ANSI_DIM: u8 = 2; -const ANSI_RED: u8 = 31; -const ANSI_GREEN: u8 = 32; -const ANSI_YELLOW: u8 = 33; -const ANSI_BLUE: u8 = 34; -const ANSI_MAGENTA: u8 = 35; - impl RawLayer { /// Return a new formatting layer with associated writer. pub fn new(writer: ConsoleWriter) -> Self { @@ -52,12 +101,16 @@ pub type BufWriter<'a> = Cursor<&'a mut [u8]>; impl ConsoleWriter { /// Create a writer that outputs to stdout without ANSI colors. pub fn no_color() -> Self { - Self { use_ansi: false } + Self { + color_mode: ColorMode::NoColor, + } } /// Create a writer that outputs to stderr with ANSI colors. pub fn color() -> Self { - Self { use_ansi: true } + Self { + color_mode: ColorMode::Color, + } } /// Format a LogRecord as a human-readable string (for testing/compatibility). @@ -80,47 +133,24 @@ impl ConsoleWriter { callsite: &SavedCallsite, ) -> usize { let mut w = Cursor::new(buf); - - if self.use_ansi { - Self::write_ansi(&mut w, ANSI_DIM); - Self::write_timestamp(&mut w, record.timestamp_ns); - Self::write_ansi(&mut w, ANSI_RESET); - let _ = w.write_all(b" "); - Self::write_ansi(&mut w, Self::level_color(callsite.level)); - Self::write_level(&mut w, callsite.level); - Self::write_ansi(&mut w, ANSI_RESET); - let _ = w.write_all(b" "); - Self::write_ansi(&mut w, ANSI_BOLD); - Self::write_event_name(&mut w, callsite); - Self::write_ansi(&mut w, ANSI_RESET); - let _ = w.write_all(b": "); - } else { - Self::write_timestamp(&mut w, record.timestamp_ns); - let _ = w.write_all(b" "); - Self::write_level(&mut w, callsite.level); - let _ = w.write_all(b" "); - Self::write_event_name(&mut w, callsite); - let _ = w.write_all(b": "); - } - + let cm = self.color_mode; + + cm.write_ansi(&mut w, AnsiCode::Dim); + Self::write_timestamp(&mut w, record.timestamp_ns); + cm.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + cm.write_level(&mut w, callsite.level); + let _ = w.write_all(b" "); + cm.write_ansi(&mut w, AnsiCode::Bold); + Self::write_event_name(&mut w, callsite); + cm.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b": "); Self::write_body_attrs(&mut w, &record.body_attrs_bytes); let _ = w.write_all(b"\n"); w.position() as usize } - /// Write level with padding. - #[inline] - fn write_level(w: &mut BufWriter<'_>, level: &Level) { - let _ = match *level { - Level::TRACE => w.write_all(b"TRACE"), - Level::DEBUG => w.write_all(b"DEBUG"), - Level::INFO => w.write_all(b"INFO "), - Level::WARN => w.write_all(b"WARN "), - Level::ERROR => w.write_all(b"ERROR"), - }; - } - /// Write callsite details as event_name to buffer. #[inline] fn write_event_name(w: &mut BufWriter<'_>, callsite: &SavedCallsite) { @@ -286,24 +316,6 @@ impl ConsoleWriter { std::io::stdout().write_all(data) }; } - - /// Write an ANSI escape sequence. - #[inline] - fn write_ansi(w: &mut BufWriter<'_>, code: u8) { - let _ = write!(w, "\x1b[{}m", code); - } - - /// Get ANSI color code for a severity level. - #[inline] - fn level_color(level: &Level) -> u8 { - match *level { - Level::ERROR => ANSI_RED, - Level::WARN => ANSI_YELLOW, - Level::INFO => ANSI_GREEN, - Level::DEBUG => ANSI_BLUE, - Level::TRACE => ANSI_MAGENTA, - } - } } impl TracingLayer for RawLayer From cbb10000506a5776e066ffa2f8f9813e21a9fdc2 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 11:56:38 -0800 Subject: [PATCH 26/92] fmt --- .../configs/fake-debug-noop-telemetry.yaml | 4 +- .../src/pipeline/service/telemetry/logs.rs | 4 +- .../crates/pdata/src/otlp/mod.rs | 4 +- .../telemetry/src/opentelemetry_client.rs | 7 +- .../opentelemetry_client/logger_provider.rs | 2 +- .../telemetry/src/self_tracing/encoder.rs | 7 +- .../telemetry/src/self_tracing/formatter.rs | 174 ++++++++++++++---- .../crates/telemetry/src/self_tracing/mod.rs | 5 +- 8 files changed, 157 insertions(+), 50 deletions(-) diff --git a/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml b/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml index 2c589005d1..b3bbb49e9e 100644 --- a/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml +++ b/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml @@ -37,7 +37,9 @@ nodes: service: telemetry: logs: - level: "info" + level: "debug" + internal: + enabled: false processors: - batch: exporter: diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index f6492fda73..a246adf208 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -25,9 +25,7 @@ pub struct LogsConfig { } fn default_internal() -> LogsInternalConfig { - LogsInternalConfig { - enabled: true, - } + LogsInternalConfig { enabled: true } } /// Log level for internal engine logs. diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs index 8ca317459d..4e5e611410 100644 --- a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs +++ b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs @@ -15,10 +15,10 @@ pub use otap_df_pdata_otlp_macros::qualified; // Required for derived code /// Common methods for OTLP/OTAP attributes. pub mod attributes; -/// Common utilities for protobuf encoding. -pub mod common; /// Common methods for batching. pub mod batching; +/// Common utilities for protobuf encoding. +pub mod common; /// Common methods for OTLP/OTAP logs. pub mod logs; /// Common methods for OTLP/OTAP metrics. diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 522ad919f4..a14b94efa1 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -75,10 +75,7 @@ impl OpentelemetryClient { // Try to initialize the global subscriber. In tests, this may fail if already set, // which is acceptable as we're only validating the configuration works. let _ = tracing_setup.with(fmt_layer).with(sdk_layer).try_init(); - ( - Some(logger_provider), - runtime, - ) + (Some(logger_provider), runtime) } else { let writer = if std::env::var("NO_COLOR").is_ok() { ConsoleWriter::no_color() @@ -87,7 +84,7 @@ impl OpentelemetryClient { }; // See comment above. let _ = tracing_setup.with(RawLoggingLayer::new(writer)).try_init(); - + (None, runtime) }; diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs index c440cea82d..82ef030659 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs @@ -189,7 +189,7 @@ mod tests { use opentelemetry_sdk::Resource; use otap_df_config::pipeline::service::telemetry::{ logs::{ - LogLevel, LogsInternalConfig, LogsConfig, + LogLevel, LogsConfig, LogsInternalConfig, processors::{ BatchLogProcessorConfig, batch::{LogBatchProcessorExporterConfig, otlp::OtlpExporterConfig}, diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index dbc24e67c3..f21fd08663 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -41,8 +41,6 @@ impl<'buf> DirectLogRecordEncoder<'buf> { self.buf .extend_from_slice(&record.timestamp_ns.to_le_bytes()); - // Note: the next two fields could be pre-encoded by Level - // Encode severity_number (field 2, varint) let severity = level_to_severity_number(&callsite.level); self.buf @@ -223,7 +221,7 @@ impl FmtWrite for DebugWriter<'_> { impl tracing::field::Visit for DirectFieldVisitor<'_> { fn record_f64(&mut self, field: &tracing::field::Field, value: f64) { if field.name() == "message" { - // Body will be formatted later if needed + // TODO: encode f64 body return; } self.encode_double_attribute(field.name(), value); @@ -231,6 +229,7 @@ impl tracing::field::Visit for DirectFieldVisitor<'_> { fn record_i64(&mut self, field: &tracing::field::Field, value: i64) { if field.name() == "message" { + // TODO: encode i64 body return; } self.encode_int_attribute(field.name(), value); @@ -238,6 +237,7 @@ impl tracing::field::Visit for DirectFieldVisitor<'_> { fn record_u64(&mut self, field: &tracing::field::Field, value: u64) { if field.name() == "message" { + // TODO: encode u64 body return; } self.encode_int_attribute(field.name(), value as i64); @@ -245,6 +245,7 @@ impl tracing::field::Visit for DirectFieldVisitor<'_> { fn record_bool(&mut self, field: &tracing::field::Field, value: bool) { if field.name() == "message" { + // TODO: encode bool body return; } self.encode_bool_attribute(field.name(), value); diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index dda4d7d2f3..72427061d4 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -52,20 +52,15 @@ impl ColorMode { /// Write level with color and padding. #[inline] fn write_level(self, w: &mut BufWriter<'_>, level: &Level) { - self.write_ansi(w, Self::level_color(level)); - let _ = match *level { - Level::TRACE => w.write_all(b"TRACE"), - Level::DEBUG => w.write_all(b"DEBUG"), - Level::INFO => w.write_all(b"INFO "), - Level::WARN => w.write_all(b"WARN "), - Level::ERROR => w.write_all(b"ERROR"), - }; + self.write_ansi(w, Self::color(level)); + let _ = w.write_all(level.as_str().as_bytes()); self.write_ansi(w, AnsiCode::Reset); + let _ = w.write_all(b" "); } /// Get ANSI color code for a severity level. #[inline] - fn level_color(level: &Level) -> AnsiCode { + fn color(level: &Level) -> AnsiCode { match *level { Level::ERROR => AnsiCode::Red, Level::WARN => AnsiCode::Yellow, @@ -140,7 +135,6 @@ impl ConsoleWriter { cm.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); cm.write_level(&mut w, callsite.level); - let _ = w.write_all(b" "); cm.write_ansi(&mut w, AnsiCode::Bold); Self::write_event_name(&mut w, callsite); cm.write_ansi(&mut w, AnsiCode::Reset); @@ -340,15 +334,20 @@ where #[cfg(test)] mod tests { use super::*; + use crate::self_tracing::DirectLogRecordEncoder; + use crate::self_tracing::encoder::level_to_severity_number; use bytes::Bytes; + use otap_df_pdata::otlp::ProtoBuffer; use otap_df_pdata::prost::Message; + use otap_df_pdata::proto::opentelemetry::common::v1::any_value::Value; use otap_df_pdata::proto::opentelemetry::common::v1::{AnyValue, KeyValue}; use otap_df_pdata::proto::opentelemetry::logs::v1::LogRecord as ProtoLogRecord; use std::sync::{Arc, Mutex}; use tracing_subscriber::prelude::*; struct CaptureLayer { - output: Arc>, + formatted: Arc>, + encoded: Arc>, } impl TracingLayer for CaptureLayer @@ -358,52 +357,150 @@ mod tests { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { let record = LogRecord::new(event); let callsite = SavedCallsite::new(event.metadata()); + + // Capture formatted output let writer = ConsoleWriter::no_color(); - *self.output.lock().unwrap() = writer.format_log_record(&record, &callsite); + *self.formatted.lock().unwrap() = writer.format_log_record(&record, &callsite); + + // Capture full OTLP encoding + let mut buf = ProtoBuffer::with_capacity(512); + let mut encoder = DirectLogRecordEncoder::new(&mut buf); + let _ = encoder.encode_log_record(record, &callsite); + *self.encoded.lock().unwrap() = buf.into_bytes(); } } + fn new_capture_layer() -> (CaptureLayer, Arc>, Arc>) { + let formatted = Arc::new(Mutex::new(String::new())); + let encoded = Arc::new(Mutex::new(Bytes::new())); + let layer = CaptureLayer { + formatted: formatted.clone(), + encoded: encoded.clone(), + }; + (layer, formatted, encoded) + } + // strip timestamp and newline - fn strip_ts(s: &str) -> &str { - s[26..].trim_end() + fn strip_ts(s: &str) -> (&str, &str) { + // timestamp is 24 bytes, see assertion below. + (&s[..24], s[26..].trim_end()) + } + + fn format_timestamp(nanos: u64) -> String { + let mut buf = [0u8; 32]; + let mut w = Cursor::new(buf.as_mut_slice()); + ConsoleWriter::write_timestamp(&mut w, nanos); + let len = w.position() as usize; + assert_eq!(len, 24); + String::from_utf8_lossy(&buf[..len]).into_owned() + } + + fn format_attrs(attrs: &[KeyValue]) -> String { + if attrs.is_empty() { + return String::new(); + } + let mut result = String::from(" ["); + for (i, kv) in attrs.iter().enumerate() { + if i > 0 { + result.push_str(", "); + } + result.push_str(&kv.key); + result.push('='); + if let Some(ref v) = kv.value { + if let Some(ref val) = v.value { + match val { + Value::StringValue(s) => result.push_str(s), + Value::IntValue(i) => result.push_str(&i.to_string()), + Value::BoolValue(b) => result.push_str(if *b { "true" } else { "false" }), + Value::DoubleValue(d) => result.push_str(&format!("{:.6}", d)), + _ => unreachable!(), + } + } + } + } + result.push(']'); + result } - fn assert_log_format(output: &Arc>, expected_level: &str, expected_suffix: &str) { - let binding = output.lock().unwrap(); - let result = strip_ts(&binding); + fn assert_log_record( + formatted: &Arc>, + encoded: &Arc>, + expected_level: Level, + expected_body: &str, + expected_attrs: Vec, + ) { + // Decode the OTLP bytes + let bytes = encoded.lock().unwrap(); + let decoded = ProtoLogRecord::decode(bytes.as_ref()).expect("decode failed"); + + // Verify OTLP encoding + let sev_text = expected_level.as_str(); + assert_eq!( + decoded.severity_number, + level_to_severity_number(&expected_level) as i32, + "severity_number mismatch" + ); + assert_eq!(decoded.severity_text, sev_text, "severity_text mismatch"); + assert_eq!( + decoded.body, + Some(AnyValue::new_string(expected_body)), + "body mismatch" + ); + assert_eq!(decoded.attributes, expected_attrs, "attributes mismatch"); + + // Build expected text suffix + let attrs_text = format_attrs(&expected_attrs); + let expected_suffix = format!(": {}{}", expected_body, attrs_text); + + // Verify text formatting + let binding = formatted.lock().unwrap(); + let (ts_str, rest) = strip_ts(&binding); + + // Verify timestamp matches OTLP value + let expected_ts = format_timestamp(decoded.time_unix_nano); + assert_eq!(ts_str, expected_ts, "timestamp mismatch"); + assert!( - result.starts_with(expected_level), + rest.starts_with(sev_text), "expected level '{}', got: {}", - expected_level, - result + sev_text, + rest ); assert!( - result.ends_with(expected_suffix), + rest.ends_with(&expected_suffix), "expected suffix '{}', got: {}", expected_suffix, - result + rest ); } #[test] fn test_log_format() { - let output: Arc> = Arc::new(Mutex::new(String::new())); - - let layer = CaptureLayer { - output: output.clone(), - }; + let (layer, formatted, encoded) = new_capture_layer(); let subscriber = tracing_subscriber::registry().with(layer); let dispatch = tracing::Dispatch::new(subscriber); let _guard = tracing::dispatcher::set_default(&dispatch); tracing::info!("hello world"); - assert_log_format(&output, "INFO ", ": hello world"); - - tracing::warn!(count = 42, "warning"); - assert_log_format(&output, "WARN ", ": warning [count=42]"); + assert_log_record(&formatted, &encoded, Level::INFO, "hello world", vec![]); + + tracing::warn!(count = 42i64, "something odd"); + assert_log_record( + &formatted, + &encoded, + Level::WARN, + "something odd", + vec![KeyValue::new("count", AnyValue::new_int(42))], + ); - tracing::error!(msg = "oops", "failed"); - assert_log_format(&output, "ERROR", ": failed [msg=oops]"); + tracing::error!(msg = "oops", "we failed"); + assert_log_record( + &formatted, + &encoded, + Level::ERROR, + "we failed", + vec![KeyValue::new("msg", AnyValue::new_string("oops"))], + ); } #[test] @@ -420,7 +517,16 @@ mod tests { assert_eq!( output, - "2024-01-15T12:30:45.678Z INFO test_module::submodule::test_event (src/test.rs:123): \n" + "2024-01-15T12:30:45.678Z INFO test_module::submodule::test_event (src/test.rs:123): \n" + ); + + let writer = ConsoleWriter::color(); + let output = writer.format_log_record(&record, &test_callsite()); + + // With ANSI codes: dim timestamp, green INFO, bold event name + assert_eq!( + output, + "\x1b[2m2024-01-15T12:30:45.678Z\x1b[0m \x1b[32mINFO\x1b[0m \x1b[1mtest_module::submodule::test_event (src/test.rs:123)\x1b[0m: \n" ); } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index 22881eb7f5..1eb2346744 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -73,7 +73,10 @@ impl LogRecord { pub fn new(event: &Event<'_>) -> Self { let metadata = event.metadata(); - // Encode body and attributes to bytes + // Encode body and attributes to bytes. + // Note! TODO: we could potentially avoid allocating for the intermediate + // protobuf slice with work to support a fixed-size buffer and cursor + // instead of a Vec. let mut buf = ProtoBuffer::with_capacity(256); let mut visitor = DirectFieldVisitor::new(&mut buf); event.record(&mut visitor); From 2f02db61775d5fcac6472471f167122995ecbc4e Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 11:58:41 -0800 Subject: [PATCH 27/92] new config --- .../configs/fake-debug-noop-telemetry.yaml | 3 +- .../configs/internal-telemetry.yaml | 42 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 rust/otap-dataflow/configs/internal-telemetry.yaml diff --git a/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml b/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml index b3bbb49e9e..1eb65e8263 100644 --- a/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml +++ b/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml @@ -37,8 +37,9 @@ nodes: service: telemetry: logs: - level: "debug" + level: "info" internal: + # To use the OTel SDK, disable the internal logger. enabled: false processors: - batch: diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml new file mode 100644 index 0000000000..8babcd2d1a --- /dev/null +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -0,0 +1,42 @@ +settings: + default_pipeline_ctrl_msg_channel_size: 100 + default_node_ctrl_msg_channel_size: 100 + default_pdata_channel_size: 100 + +nodes: + receiver: + kind: receiver + plugin_urn: "urn:otel:otap:fake_data_generator:receiver" + out_ports: + out_port: + destinations: + - debug + dispatch_strategy: round_robin + config: + traffic_config: + max_signal_count: 1000 + max_batch_size: 1000 + signals_per_second: 1000 + log_weight: 100 + registry_path: https://github.com/open-telemetry/semantic-conventions.git[model] + debug: + kind: processor + plugin_urn: "urn:otel:debug:processor" + out_ports: + out_port: + destinations: + - noop + dispatch_strategy: round_robin + config: + verbosity: basic + noop: + kind: exporter + plugin_urn: "urn:otel:noop:exporter" + config: + +service: + telemetry: + logs: + level: "debug" + internal: + enabled: true From ba09b5627f85e62bcef1df7b5582855cc796d991 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 12:08:10 -0800 Subject: [PATCH 28/92] clippyu --- .../otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs | 3 ++- .../crates/telemetry/src/self_tracing/formatter.rs | 3 +++ rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index f21fd08663..c89cc8bcf6 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -42,7 +42,7 @@ impl<'buf> DirectLogRecordEncoder<'buf> { .extend_from_slice(&record.timestamp_ns.to_le_bytes()); // Encode severity_number (field 2, varint) - let severity = level_to_severity_number(&callsite.level); + let severity = level_to_severity_number(callsite.level); self.buf .encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); self.buf.encode_varint(severity as u64); @@ -273,6 +273,7 @@ impl tracing::field::Visit for DirectFieldVisitor<'_> { /// /// See: https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber #[inline] +#[must_use] pub fn level_to_severity_number(level: &Level) -> u8 { match *level { Level::TRACE => 1, diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 72427061d4..96c928c82d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -84,6 +84,7 @@ pub struct RawLayer { impl RawLayer { /// Return a new formatting layer with associated writer. + #[must_use] pub fn new(writer: ConsoleWriter) -> Self { Self { writer } } @@ -95,6 +96,7 @@ pub type BufWriter<'a> = Cursor<&'a mut [u8]>; impl ConsoleWriter { /// Create a writer that outputs to stdout without ANSI colors. + #[must_use] pub fn no_color() -> Self { Self { color_mode: ColorMode::NoColor, @@ -102,6 +104,7 @@ impl ConsoleWriter { } /// Create a writer that outputs to stderr with ANSI colors. + #[must_use] pub fn color() -> Self { Self { color_mode: ColorMode::Color, diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index 1eb2346744..d6c45f0c7a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -57,6 +57,7 @@ pub struct SavedCallsite { impl SavedCallsite { /// Construct saved callsite information from tracing Metadata. + #[must_use] pub fn new(metadata: &'static Metadata<'static>) -> Self { Self { level: metadata.level(), @@ -70,6 +71,7 @@ impl SavedCallsite { impl LogRecord { /// Construct a log record, partially encoding its dynamic content. + #[must_use] pub fn new(event: &Event<'_>) -> Self { let metadata = event.metadata(); From c172e5ddd4ebdc13113f9b591d80387ad9245e8b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 12:16:43 -0800 Subject: [PATCH 29/92] comment --- rust/otap-dataflow/configs/internal-telemetry.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 8babcd2d1a..3977862f2a 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -37,6 +37,8 @@ nodes: service: telemetry: logs: + # The default level is "info". level: "debug" internal: + # Internal logging is enabled by default. enabled: true From 60fea89a5d49801eafef74ced8136da54efd2a2d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 12:19:23 -0800 Subject: [PATCH 30/92] comments --- rust/otap-dataflow/crates/pdata/src/otlp/common.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs index 9ec1adc96e..f94ebf8955 100644 --- a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs +++ b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs @@ -131,7 +131,7 @@ pub(crate) struct ScopeArrays<'a> { } /// Arrow DataType for the Scope struct array. -pub static SCOPE_ARRAY_DATA_TYPE: LazyLock = LazyLock::new(|| { +static SCOPE_ARRAY_DATA_TYPE: LazyLock = LazyLock::new(|| { DataType::Struct(Fields::from(vec![ Field::new( consts::NAME, @@ -493,20 +493,13 @@ macro_rules! proto_encode_len_delimited_unknown_size { } /// Write a 4-byte length placeholder for later patching. -/// -/// This writes bytes that represent a zero-padded varint, which can be -/// patched later with the actual length once content is written. +/// Do not use directly, use proto_encode_len_delimited_unknown_size. pub fn encode_len_placeholder(buf: &mut ProtoBuffer) { buf.buffer.extend_from_slice(&[0x80, 0x80, 0x80, 0x00]); } /// Patch a previously written length placeholder with the actual length. -/// -/// # Arguments -/// * `buf` - The buffer containing the placeholder -/// * `num_bytes` - Number of bytes in the placeholder (typically 4) -/// * `len` - The actual content length to encode -/// * `len_start_pos` - Position where the placeholder starts +/// Do not use directly, use proto_encode_len_delimited_unknown_size. pub fn patch_len_placeholder( buf: &mut ProtoBuffer, num_bytes: usize, From b9e8958f6f61810df05f9405ce014e5bd7d99895 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 12:26:33 -0800 Subject: [PATCH 31/92] cleanup docs --- .../crates/pdata/src/otlp/common.rs | 1 - .../crates/telemetry/ARCHITECTURE.md | 210 -------- .../telemetry/src/opentelemetry_client.rs | 34 ++ .../opentelemetry_client/logger_provider.rs | 2 +- .../telemetry/src/self_tracing/DESIGN.md | 178 ------- .../src/self_tracing/HYBRID_ENCODER_PLAN.md | 468 ------------------ .../telemetry/src/self_tracing/REPORT.md | 208 -------- 7 files changed, 35 insertions(+), 1066 deletions(-) delete mode 100644 rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/DESIGN.md delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs index f94ebf8955..dce34e9d59 100644 --- a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs +++ b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs @@ -130,7 +130,6 @@ pub(crate) struct ScopeArrays<'a> { pub id: Option<&'a UInt16Array>, } -/// Arrow DataType for the Scope struct array. static SCOPE_ARRAY_DATA_TYPE: LazyLock = LazyLock::new(|| { DataType::Struct(Fields::from(vec![ Field::new( diff --git a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md b/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md deleted file mode 100644 index a49ea685aa..0000000000 --- a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md +++ /dev/null @@ -1,210 +0,0 @@ -# Internal Telemetry Collection Architecture & Development Plan - -## Architecture - -OTAP-Dataflow uses a configurable internal telemetry data plane. We -support alternatives to enable a range of observability requirements. -The internal telemetry SDK is designed for the engine to safely -consume its own telemetry, and we intend for the self-hosted telemetry -pipeline to be the standard configuration. - -Consuming self-generated ("telemetry presents a potential a kind of -feedback loop, situations where a telemetry pipeline creates pressure -on itself and must not explode. - -## Internal telemetry receiver - -The Internal Telemetry Receiver or "ITR" is an OTAP-Dataflow receiver -component that produces telemetry from internal sources. An internal -telemetry pipeline consists of one or more ITR components and any of -the connected processor and exporter components reachable from ITR -source nodes. - -To begin with, every OTAP-Dataflow comonent is configured with an -internal telemetry SDK meant for primary instrumentation of that -component. Components are required to exclusively use the internal -telemetry SDK for self-diagnostics, as they are considered first party -in this exchange. - -The internal telemetry receiver is the SDK's counterpart, making it -second party as it is responsible for routing internal telemetry. The -ITR cannot use the internal telemetry SDK itself, an invisible member -of the pipeline. The ITR can be instrumented using third-party -instrumentation (e.g., `tracing`, `log` crates) provided it can -guarantee there is no potential for feedback (e.g., a single -`tracing::info()` statement at startup). - -## Pitfall avoidance - -The OTAP-Dataflow engine is safeguarded against many self-induced -telemetry pitfalls, as follows: - -- OTAP-Dataflow components reachable from an ITR cannot be configured - to send to an ITR node. This avoids a direct feedback cycle for - internal telemetry because the components cannot reach - themselves. For example, ITR and downstream components may be - configured for raw logging, no metrics, etc. -- ITR instances share access to one or more threads with associated - async runtime. They use these dedicated threads to isolate internal - telemetry processes that use third-party instrumentation. -- A thread-local variable is used to redirect third-party - instrumentation in dedicated internal telemetry threads. Internal - telemetry threads automatically configure a safe configuration. -- Components under observation (non-ITR components) have internal - telemetry events routed queues in the OTAP-Dataflow pipeline on the - same core, this avoids blocking the engine. First-party - instrumentation will be handled on the CPU core that produced the - telemetry under normal circumstances. This isolates cores that are - able to process their own internal telemetry. -- Option to fall back to no-op, a non-blocking global provider, and/or - raw logging. - -## OTLP-bytes first - -As a key design decision, the OTAP-Dataflow internal telemetry data -path produces OTLP-bytes first. Because OTLP bytes is one of the -builtin `OtapPayload` formats, once we have the OTLP bytes encoding of -an event we are able to send to an OTAP-Dataflow pipeline. To obtain -these bytes, we will build a custom [Tokio `tracing` -Event][TOKIOEVENT] handler to produce OTLP bytes before dispatching to -an internal pipeline, used (in different configurations) for first and -third-party instrumentation. - -[TOKIOEVENT]: https://docs.rs/tracing/latest/tracing/struct.Event.html - -## Raw logging - -We support formatting events for direct printing to the console from -OTLP bytes, based on `otap_df_pdata::views::logs::LogsDataView` and -associated types, a zero-copy approach. We refer to this most-basic -form of printing to the console as raw logging because it is a safe -configuration early in the lifetime of a process. - -## Routing - -The two internal logs data paths are: - -- Third-party: Tokio `tracing` global subscriber: third-party log - events, instrumentation in code without access to an OTAP-Dataflow - `EffectHandler`. These are handled in a dedicated internal telemetry - thread. -- First-party: components with a local or shared `EffectHandler` use - dedicated macros (e.g., `otel_info!(effect, "interesting thing")`), - these use the configured internal telemetry SDK and for ordinary - components (not ITR-downstream) these are routed through the ITR the - same core. These are always non-blocking APIs, the internal SDK must - drop logs instead of blocking the pipeline. - -## Development plan - -Each of the items below is relatively small, estimated at 300-500 -lines of new code plus new tests. - -### TracingLogRecord: Tokio tracing Event and Metadata to LogRecordView - -When we receive a Tokio tracing event whether through a -`tracing::info!` macro (or similar) or through a dedicated -`EffectHandler`-based API, the same happens: - -Create a `TracingLogRecord`, a struct derived from `tracing::Event` -and `tracing::Metadata`, containing raw LogRecord fields extracted -from the tracing macro layer. The `otap_df_pdata::views::logs::LogRecordView` is -implemented for `TracingLogRecord` making it the `TracingLogRecord` something -we can transcode into OTel-Arrow batches. - -The `otap_df_pdata` crate currently has no OTLP bytes encoder for -directly accepting `otap_df_pdata::views::*` inputs (note the -OTAP-records-to-OTLP-bytes function bypasses the views and encodes -bytes directly). Therefore, this project implies we extend or refactor -`otap_df_ptdata` with an OTLP bytes encoder for its views interfaces. - -Then, `TracingLogRecord` implements the log record view, we will encode -the reocrd as OTLP bytes by encoding the view. - -### Stateful OTLP bytes encoder for repeated LogRecordViews - -We can avoid sending a log record through a channel every time an event -happens by buffering log records. We will buffer them as OTLP bytes. Each -receiver of events from `TracingLogRecord` OTLP bytes will use one stateful -encoder that is: - -- Preconfigured with the process-level OpenTelemetry `Resource` value -- Remembers the OpenTelemetry `InstrumentationScope.Name` that was previously used -- Remembers the starting position of the current `ResourceLogs` and `ScopeLogs` of a - single OTLP bytes payload. - -Whether a global logging collector thread or an effect handler thread -processing internal telemetry, we will enter the stateful encoder and -append a `LogRecordView` with its effective -`InstrumentationScope`. The stateful encoder will append the log -record correctly, recognizing change of scope and a limited buffer -size. This re-uses the `ProtoBuf` object from the existing -OTAP-records-to-OTLP-bytes code path for easy protobuf generation -(1-pass encoder with length placeholders). - -### OTLP-bytes console logging handler - -We require a way to print OTLP bytes as human-readable log lines. We -cannot easily re-use the Tokio `tracing` format layer for this, -however we can use the `LogsDataView` trait with `RawLogsData` to -format human-readable text for the console directly from OTLP bytes. - -This OTLP-bytes-to-human-readable logic will be used to implement raw -logging. - -### Global logs collection thread - -An OTAP-Dataflow engine will run at least one global logs collection -thread. These threads receive encoded (OTLP bytes) log events from -various locations in the process. The global logs collection thread is -special because it sets a special anti-recursion bit in the -thread-local state to prevent logging in its own export path - -The global logs collection thread is configured as one (or more, if -needed) instances consuming logs from the global Tokio `tracing` -subscriber. In this thread, we'll configure the OpenTelemetry SDK or a -dedicated OTAP-Dataflow pipeline (by configuration) for logs export. - -Because global logs collection threads are used as a fallback for -`EffectHandler`-level logs and because third-party libraries generally -could call Tokio `tracing` APIs, we arrange to explicitly disallow -these threads from logging. The macros are disabled from executing. - -### Global and Per-core Event Router - -OTAP-Dataflow provides an option to route internal telemetry to a pipeline -in the same effect handler that produced the telemetry. When a component -logging API is used on the `EffectHandler` or when a tokio `tracing` event -occurs on the `EffectHandler` thread, it will be routed using thread-local -state so that event is immediately encoded and stored or flushed, without -blocking the effect handler. - -When a telemetry event is routed directly, as in this case and -`send_message()` succeeds, it means there was queue space to accept -the log record on the same core. When this fails, the configurable -telemetry router will support options to use global logs collection -thread, a raw logger, or do nothing (dropping the internal log -record). - -## Example configuration - -```yaml -service: - telemetry: - logs: - level: info - internal_collection: - enabled: true - - # Per-thread buffer - buffer_size_bytes: 65536 - - # Individual record size limit - max_record_bytes: 16384 - - # Bounded channel capacity - max_record_count: 10 - - # Timer-based flush interval - flush_interval: "1s" -``` diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index a14b94efa1..8eaf8ecce2 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -51,6 +51,40 @@ fn get_env_filter(level: LogLevel) -> EnvFilter { impl OpentelemetryClient { /// Create a new OpenTelemetry client from the given configuration. + /// + /// Logging-specific notes: + /// + /// The log level can be controlled via: + /// 1. The `logs.level` config setting (off, debug, info, warn, error) + /// 2. The `RUST_LOG` environment variable for fine-grained control + /// + /// When `RUST_LOG` is set, it takes precedence and allows filtering by target. + /// Example: `RUST_LOG=info,h2=warn,hyper=warn` enables info level but silences + /// noisy HTTP/2 and hyper logs. + /// + /// TODO: The engine uses a thread-per-core model + /// and is NUMA aware. + /// The fmt::init() here is truly global, and hence + /// this will be a source of contention. + /// We need to evaluate alternatives: + /// + /// 1. Set up per thread subscriber. + /// ```ignore + /// // start of thread + /// let _guard = tracing::subscriber::set_default(subscriber); + /// // now, with this thread, all tracing calls will go to this subscriber + /// // eliminating contention. + /// // end of thread + /// ``` + /// + /// 2. Use custom subscriber that batches logs in thread-local buffer, and + /// flushes them periodically. + /// + /// The TODO here is to evaluate these options and implement one of them. + /// As of now, this causes contention, and we just need to accept temporarily. + /// + /// TODO: Evaluate also alternatives for the contention caused by the global + /// OpenTelemetry logger provider added as layer. pub fn new(config: &TelemetryConfig) -> Result { let sdk_resource = Self::configure_resource(&config.resource); diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs index 82ef030659..443584e908 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs @@ -25,7 +25,7 @@ pub struct LoggerProvider { } impl LoggerProvider { - /// Initializes OpenTelemetry logging for the OTAP engine. + /// Initializes OpenTelemetry logger provider for the OTAP engine. pub fn configure( sdk_resource: Resource, logger_config: &LogsConfig, diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/DESIGN.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/DESIGN.md deleted file mode 100644 index ba7aa896a9..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/DESIGN.md +++ /dev/null @@ -1,178 +0,0 @@ -# Tracing Integration Design: Zero-Copy Exploration - -This document captures the design exploration for integrating `tokio-tracing` events with our OTLP-bytes-first encoding architecture. - -## Goal - -Convert tracing events to OTLP bytes with minimal allocations, following the principle that **encoding to bytes happens before crossing thread boundaries**. The ideal is true zero-copy: borrow data directly from the tracing event and encode it in-place. - -## Architecture Context - -From `ARCHITECTURE.md`: The system uses a thread-per-core design where components are local to each thread. OTLP bytes are the interchange format that crosses thread boundaries, not structured data. - -## What We Achieved - -### 1. `TracingAnyValue<'a>` is `Copy` - -```rust -#[derive(Debug, Clone, Copy)] -pub enum TracingAnyValue<'a> { - Str(&'a str), - Int(i64), - Bool(bool), - Double(f64), - Bytes(&'a [u8]), - Array(&'a [TracingAnyValue<'a>]), - KeyValueList(&'a [TracingAttribute<'a>]), -} -``` - -The enum only contains borrowed references or primitive values. "Copying" this type just copies the pointer+length, not the underlying data. The lifetime `'a` is preserved in the copy. - -### 2. `TracingAttribute<'a>` is `Copy` - -```rust -#[derive(Debug, Clone, Copy)] -pub struct TracingAttribute<'a> { - pub key: &'a str, - pub value: TracingAnyValue<'a>, -} -``` - -### 3. `TracingLogRecord<'a>` Borrows from Metadata - -```rust -pub struct TracingLogRecord<'a> { - event_name: Option<&'static str>, // metadata.name() is always static - target: &'a str, // borrowed from Metadata<'a> - attributes: Vec>, - body: Option<&'a str>, - // ... -} -``` - -The lifetime `'a` ties the log record to the tracing event callback scope. - -### 4. Direct Trait Implementations (No Wrappers) - -`TracingAnyValue<'a>` implements `AnyValueView<'a>` directly. -`TracingAttribute<'a>` implements `AttributeView` directly. -No wrapper types needed because the underlying types are `Copy`. - -### 5. GAT Lifetime Handling - -The `LogRecordView` trait uses Generic Associated Types: - -```rust -type Attribute<'att>: AttributeView where Self: 'att; -type Body<'bod>: AnyValueView<'bod> where Self: 'bod; -``` - -For `TracingLogRecord<'a>`: -- `type Attribute<'att> = TracingAttribute<'a>` — uses data lifetime `'a`, not GAT lifetime -- `type Body<'bod> = TracingAnyValue<'bod>` — constructs on demand from stored `&'a str` - -The key insight: when `Self: 'bod`, it implies `'a: 'bod`, so we can shorten the lifetime. - -## The Barrier: The `Visit` Trait - -The tracing crate's `Visit` trait erases lifetime information: - -```rust -pub trait Visit { - fn record_str(&mut self, field: &Field, value: &str); - fn record_debug(&mut self, field: &Field, value: &dyn Debug); - // ... -} -``` - -The `value: &str` has an anonymous lifetime. Even though in practice the data is borrowed from the `Event<'_>` which exists for the entire callback, **the trait boundary prevents expressing this relationship**. - -### What This Means - -1. **Field names (`field.name()`)**: Always `&'static str` — zero-copy ✓ -2. **Primitive values (i64, bool, f64)**: No allocation needed — zero-copy ✓ -3. **String values**: The borrow lifetime is erased by the trait, so we must either: - - Allocate (copy to `String`) - - Use `unsafe` to assert the lifetime relationship - -### Current Implementation - -We use owned storage (`OwnedValue`) in the visitor: - -```rust -enum OwnedValue { - Str(String), // Allocated copy - Int(i64), - Bool(bool), - Double(f64), -} -``` - -This is the safe approach at the cost of one allocation per string attribute. - -## The `Send + Sync` Barrier - -The tracing ecosystem requires subscribers to be `Send + Sync`: - -```rust -impl Dispatch { - pub fn new(subscriber: S) -> Self - where - S: Subscriber + Send + Sync + 'static -} -``` - -Our layer uses `RefCell>` for span storage (single-threaded design), which is `!Sync`. This prevents using standard tracing test utilities like `with_default`. - -### Workaround - -Tests must use thread-local storage or other patterns that don't require the subscriber to be `Sync`. - -## Alternatives Not Taken - -### 1. Unsafe Lifetime Extension - -```rust -fn record_str(&mut self, field: &Field, value: &str) { - // UNSAFE: Assert that value lives as long as the event - let extended: &'static str = unsafe { std::mem::transmute(value) }; - self.attr_values.push(TracingAnyValue::Str(extended)); -} -``` - -Rejected because: -- Requires proving the invariant holds for all tracing macros -- Third-party libraries might violate the assumption -- The allocation cost is minimal compared to encoding - -### 2. Arc/Rc for Cheap Cloning - -Earlier iterations used `Rc` and `Rc<[u8]>` to make cloning cheap. Rejected because: -- Still requires initial allocation -- Adds reference counting overhead -- The goal is zero-copy, not cheap-copy - -### 3. String Arena - -Could store formatted strings in a pre-allocated arena that lives for the callback scope. Not implemented because: -- Adds complexity -- Still requires copying data into the arena -- The simple `String` approach is clear and correct - -## Summary - -| Data Type | Zero-Copy? | Notes | -|-----------|------------|-------| -| Field names | ✓ | `&'static str` from tracing | -| `metadata.name()` | ✓ | `&'static str` | -| `metadata.target()` | ✓ | `&'a str` borrowed from metadata | -| Primitive values | ✓ | Copied by value (cheap) | -| String values | ✗ | `Visit` trait erases lifetime | -| Debug-formatted values | ✗ | Requires formatting to String | - -The current implementation achieves zero-copy for everything except string attribute values, where the `Visit` trait's lifetime erasure forces allocation. This is a fundamental limitation of the tracing crate's design, not something we can work around without `unsafe`. - -## Future Considerations - -If the tracing crate ever adds a lifetime-aware visitor pattern, or if we're willing to use `unsafe` with careful auditing, we could achieve true zero-copy for all data types. diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md deleted file mode 100644 index a95d1b2722..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/HYBRID_ENCODER_PLAN.md +++ /dev/null @@ -1,468 +0,0 @@ -# Hybrid Log Record Encoder: Planning Document - -## Overview - -This document plans a hybrid approach for encoding tracing events that: - -1. **Keeps cheap, useful data in structural form** (for sorting, filtering, indexing) -2. **Encodes borrowed/expensive data to OTLP bytes** (body, attributes) -3. **Caches static callsite details** as pre-encoded LogRecord.event_name bytes - -## Current vs. Proposed Architecture - -### Current: Full OTLP Encoding - -``` -Event → StatefulDirectEncoder → Complete OTLP bytes - ├── ResourceLogs envelope - ├── ScopeLogs envelope (scope name string) - └── LogRecord (all fields as protobuf) -``` - -**Issues:** -- All fields encoded immediately at event time -- Callsite info (target/name/file/line) re-encoded for every event -- Can't sort/filter without decoding - -### Proposed: Hybrid Structural + Partial OTLP - -``` -Event → CompactLogRecord → Accumulate → Batch encode - ├── callsite_id: Identifier (for cached event_name lookup) - ├── timestamp_ns: u64 (structural, cheap copy) - ├── severity: u8 (structural, cheap copy) - └── body_attrs_bytes: Bytes (OTLP body+attributes only) -``` - -**Benefits:** -- Callsite details (target/name/file/line) encoded once per unique log statement -- Cached event_name bytes appended to each LogRecord at flush time -- Structural fields available for filtering/indexing -- Body+attributes already OTLP-encoded (common output path) - -## Tokio Tracing Event Anatomy - -```rust -// From tracing crate -pub struct Event<'a> { - fields: ValueSet<'a>, // Borrowed from callsite + formatted values - metadata: &'static Metadata<'static>, // Static callsite metadata -} - -pub struct Metadata<'static> { - name: &'static str, // Static - target: &'static str, // Static (module path) - level: Level, // Static - file: Option<&'static str>, - line: Option, - callsite: Identifier, // &'static dyn Callsite - // ... -} -``` - -**Key insight:** `Metadata` is `'static` (owned by callsite). Only the formatted field *values* are borrowed from the event. - -## What to Keep Structural vs. Encode as OTLP - -| Field | Lifetime | Keep Structural? | Rationale | -|-------|----------|------------------|-----------| -| `callsite.Identifier` | `'static` | ✓ | Key for cached event_name lookup | -| `metadata.level` | `'static` | ✓ | Cheap u8, useful for filtering | -| `timestamp` | Generated | ✓ | Cheap u64, useful for sorting | -| `metadata.target` | `'static` | Cache → event_name | Static, encode once per callsite | -| `metadata.name` | `'static` | Cache → event_name | Static, encode once per callsite | -| `metadata.file/line` | `'static` | Cache → event_name | Static, encode once per callsite | -| `body` (message) | `'a` | **Encode** | Borrowed, must capture | -| `attributes` | `'a` | **Encode** | Borrowed values, must capture | - -## Proposed Data Structures - -### Core Insight - -Since tracing provides lazy callsite registration via `register_callsite`, we can: - -1. **Cache encoded event_name bytes** per callsite at the subscriber level -2. **Store minimal event structs** with just `Identifier` + structural fields + pre-encoded body/attrs bytes -3. **Append event_name on flush** - look up cached bytes from Identifier when encoding each LogRecord - -### `CompactLogRecord` - -```rust -/// A compact log record with structural metadata and pre-encoded body/attributes. -/// -/// Cheap-to-copy fields are kept in structural form for sorting/filtering. -/// Only borrowed data (body, attributes) is encoded to OTLP bytes. -/// Callsite details (target/name/file/line) are cached and appended at flush time. -pub struct CompactLogRecord { - /// Callsite identifier - used to look up cached event_name encoding - pub callsite_id: Identifier, - - /// Timestamp in nanoseconds since Unix epoch (cheap u64 copy) - pub timestamp_ns: u64, - - /// Severity number: 1=TRACE, 5=DEBUG, 9=INFO, 13=WARN, 17=ERROR (cheap u8 copy) - pub severity_number: u8, - - /// Severity text - &'static str from Level::as_str() (no allocation) - pub severity_text: &'static str, - - /// Pre-encoded OTLP bytes for body (field 5) and attributes (field 6) only - /// These are the only fields with borrowed lifetimes that must be captured - pub body_attrs_bytes: Bytes, -} -``` - -**Why this split?** - -| Field | Size | Keep Structural | Rationale | -|-------|------|-----------------|-----------| -| `callsite_id` | 8 bytes | ✓ | Pointer to static callsite, for event_name lookup | -| `timestamp_ns` | 8 bytes | ✓ | Useful for time-based sorting/filtering | -| `severity_number` | 1 byte | ✓ | Useful for level filtering | -| `severity_text` | 16 bytes | ✓ | `&'static str`, just a pointer+len | -| `body` | variable | **Encode** | Borrowed `&str` or formatted, lifetime ends | -| `attributes` | variable | **Encode** | Borrowed values, lifetime ends | -| `event_name` | variable | **Cache** | Static callsite info, encode once per callsite | - -Total structural overhead per event: ~33 bytes + `Bytes` (Arc pointer) - -### Subscriber-Level Callsite Cache - -The key insight: callsite metadata (target, module, file, line) are **static properties of the log statement**, not the scope. We encode them once per callsite and include them in each LogRecord's `event_name` field. - -```rust -/// Cache of pre-encoded callsite details, keyed by callsite Identifier. -/// -/// Populated lazily via `register_callsite` hook. -pub struct CallsiteCache { - /// Map from Identifier to pre-encoded callsite details - callsites: HashMap, -} - -pub struct CachedCallsite { - /// Target module path - &'static from Metadata - pub target: &'static str, - - /// Event name - &'static from Metadata - pub name: &'static str, - - /// Source file - &'static from Metadata - pub file: Option<&'static str>, - - /// Source line - pub line: Option, - - /// Pre-encoded LogRecord.event_name OTLP bytes (lazily computed on first flush) - /// Format: "target::name" or "target::name (file:line)" - pub event_name_bytes: OnceCell, -} - -impl CallsiteCache { - /// Called from register_callsite hook - pub fn register(&mut self, metadata: &'static Metadata<'static>) { - let id = metadata.callsite(); - self.callsites.entry(id).or_insert_with(|| CachedCallsite { - target: metadata.target(), - name: metadata.name(), - file: metadata.file(), - line: metadata.line(), - event_name_bytes: OnceCell::new(), - }); - } - - /// Get or lazily encode event_name bytes for an Identifier - pub fn get_event_name_bytes(&self, id: &Identifier) -> &Bytes { - let cached = self.callsites.get(id).expect("callsite not registered"); - cached.event_name_bytes.get_or_init(|| { - encode_event_name(cached.target, cached.name, cached.file, cached.line) - }) - } -} - -/// Encode callsite details as LogRecord.event_name field bytes. -/// -/// Format options: -/// - "module::path::event_name" -/// - "module::path::event_name (file.rs:42)" -fn encode_event_name( - target: &str, - name: &str, - file: Option<&str>, - line: Option -) -> Bytes { - let mut buf = ProtoBuffer::with_capacity(128); - - // LogRecord.event_name (field 12, string) - // Build the string: "target::name" or "target::name (file:line)" - if let (Some(file), Some(line)) = (file, line) { - let event_name = format!("{}::{} ({}:{})", target, name, file, line); - buf.encode_string(LOG_RECORD_EVENT_NAME, &event_name); - } else { - let event_name = format!("{}::{}", target, name); - buf.encode_string(LOG_RECORD_EVENT_NAME, &event_name); - } - - buf.into_bytes() -} -``` - -### Design Evolution - -```rust -// Original full-OTLP design: -pub struct StatefulDirectEncoder { - // Encodes complete LogsData with ResourceLogs/ScopeLogs/LogRecord - // All fields encoded immediately, scope batching only for consecutive -} - -// New compact design: -pub struct CompactLogRecord { - pub callsite_id: Identifier, // For cached event_name lookup - pub timestamp_ns: u64, // Structural: for sorting/filtering - pub severity_number: u8, // Structural: for level filtering - pub severity_text: &'static str, // Structural: static, no alloc - pub body_attrs_bytes: Bytes, // Encoded: borrowed data captured -} -``` - -**Encoding strategy:** -- Structural fields encoded to OTLP at flush time (trivial: 9 + 2 + ~6 bytes) -- Body/attrs bytes appended directly (already OTLP encoded) -- event_name looked up from callsite cache, appended to each LogRecord - -### `CallsiteRegistry` - -The tracing crate maintains a global callsite registry internally, but it is **not exposed** for enumeration. The `Callsites::for_each` method is private. - -However, we can build our own registry lazily via the `Subscriber::register_callsite` hook, which is called **once per callsite** before any events from that callsite are emitted: - -```rust -impl Layer for HybridEncoderLayer { - fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest { - // Called once per callsite, with static metadata we can store - self.callsite_cache.register(metadata); - Interest::always() - } -} -``` - -**Key insight**: `Metadata<'static>` gives us `&'static str` references that we can store without lifetime issues. No allocation needed for callsite names. - -Note: The `CallsiteRegistry` struct defined above is essentially the same as `CallsiteCache`, just with a different focus. We can consolidate these into a single `CallsiteCache` struct. - -### `CompactLogRecord` Formatter - -Instead of accumulating records, we format and write immediately. This is a minimal `fmt::layer()` alternative: - -```rust -/// Formats a CompactLogRecord as a human-readable string. -/// -/// This is our minimal fmt::layer() replacement. -pub fn format_log_record(record: &CompactLogRecord, callsite_cache: &CallsiteCache) -> String { - let cached = callsite_cache.get(record.callsite_id); - - // Format: "2026-01-06T10:30:45.123Z INFO target::name: body [attr1=val1, attr2=val2]" - format!( - "{} {:5} {}::{}: {}", - format_timestamp(record.timestamp_ns), - record.severity_text, - cached.target, - cached.name, - format_body_attrs(&record.body_attrs_bytes), - ) -} - -/// Format nanosecond timestamp as ISO 8601 -fn format_timestamp(nanos: u64) -> String { - // TODO: Use a more efficient formatter - let secs = nanos / 1_000_000_000; - let subsec_nanos = (nanos % 1_000_000_000) as u32; - // ... format as "2026-01-06T10:30:45.123Z" -} - -/// Decode and format body+attrs bytes as readable string -fn format_body_attrs(bytes: &Bytes) -> String { - // Decode the pre-encoded OTLP bytes back to readable form - // Body becomes the main message, attrs become "[key=value, ...]" -} -``` - -### Simple Writer - -```rust -use std::io::{self, Write}; - -pub enum OutputTarget { - Stdout, - Stderr, -} - -pub struct SimpleWriter { - target: OutputTarget, -} - -impl SimpleWriter { - pub fn stdout() -> Self { - Self { target: OutputTarget::Stdout } - } - - pub fn stderr() -> Self { - Self { target: OutputTarget::Stderr } - } - - pub fn write_line(&self, line: &str) { - match self.target { - OutputTarget::Stdout => { - let _ = writeln!(io::stdout(), "{}", line); - } - OutputTarget::Stderr => { - let _ = writeln!(io::stderr(), "{}", line); - } - } - } -} -``` - -### Minimal Layer Implementation - -```rust -pub struct CompactFormatterLayer { - callsite_cache: RwLock, - writer: SimpleWriter, -} - -impl Layer for CompactFormatterLayer { - fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest { - self.callsite_cache.write().unwrap().register(metadata); - Interest::always() - } - - fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let metadata = event.metadata(); - - // Encode body+attrs (borrowed data) - let body_attrs_bytes = encode_body_and_attrs(event); - - // Build compact record - let record = CompactLogRecord { - callsite_id: metadata.callsite(), - timestamp_ns: current_time_nanos(), - severity_number: level_to_severity(metadata.level()), - severity_text: metadata.level().as_str(), - body_attrs_bytes, - }; - - // Format and write immediately - let line = format_log_record(&record, &self.callsite_cache.read().unwrap()); - self.writer.write_line(&line); - } -} -``` - -## Encoding Flow (Simplified) - -### 1. Callsite Registration (once per callsite, lazy) - -```rust -fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest { - self.callsite_cache.write().unwrap().register(metadata); - Interest::always() -} -``` - -### 2. Event Capture → Format → Write (per event) - -```rust -fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let metadata = event.metadata(); - - // Encode body+attrs (borrowed data that won't survive) - let body_attrs_bytes = encode_body_and_attrs(event); - - // Build compact record with structural fields - let record = CompactLogRecord { - callsite_id: metadata.callsite(), - timestamp_ns: current_time_nanos(), - severity_number: level_to_severity(metadata.level()), - severity_text: metadata.level().as_str(), - body_attrs_bytes, - }; - - // Format and write immediately (no accumulation) - let line = format_log_record(&record, &self.callsite_cache.read().unwrap()); - self.writer.write_line(&line); -} -``` - -### Key Benefits (MVP) - -1. **Simple**: No accumulator, no batching, no deferred encoding -2. **Immediate output**: Events written as they occur -3. **Composable**: Accumulator/batching can be layered on later -4. **Testable**: `format_log_record()` returns String, easy to test -5. **Familiar**: Similar mental model to `fmt::layer()` - -## Implementation Plan - -### Phase 1: Core Data Structures ✅ COMPLETE -- [x] 1.1 Create `CompactLogRecord` struct (callsite_id + structural fields + body_attrs_bytes) -- [x] 1.2 Create `CallsiteCache` with `register()` and `get()` -- [x] 1.3 Create `CachedCallsite` struct storing static metadata refs - -### Phase 2: Formatting ✅ COMPLETE -- [x] 2.1 Implement `format_log_record()` → String -- [x] 2.2 Implement `format_timestamp()` for ISO 8601 output -- [x] 2.3 Implement `format_body_attrs()` using pdata View types (`RawAnyValue`, `RawKeyValue`) -- [x] 2.4 Create `SimpleWriter` for stdout/stderr output -- [x] 2.5 Implement `format_any_value()` consistent with `otlp_bytes_formatter.rs` - -### Phase 3: Body+Attrs Encoder ✅ COMPLETE -- [x] 3.1 Reuse `DirectFieldVisitor` for body+attrs encoding -- [x] 3.2 Create `encode_body_and_attrs(event) -> Bytes` function - -### Phase 4: Layer Integration ✅ COMPLETE -- [x] 4.1 Create `CompactFormatterLayer` implementing tracing Layer -- [x] 4.2 Implement `register_callsite()` to populate CallsiteCache -- [x] 4.3 Implement `on_event()` to encode, format, and write immediately -- [x] 4.4 Add basic tests with mock subscriber - -### Phase 5: Future Extensions (deferred) -- [ ] 5.1 Add `LogAccumulator` for batching -- [ ] 5.2 Add OTLP encoding path (flush to bytes) -- [ ] 5.3 Add configurable output formats (JSON, compact, etc.) - -## Open Questions (Resolved) - -1. **Body+attrs encoding**: ✅ We encode to OTLP bytes using `DirectFieldVisitor`, then decode for formatting using pdata View types (`RawAnyValue`, `RawKeyValue`). This keeps the data path consistent with future OTLP batching. - -2. **Timestamp format**: ✅ ISO 8601 with milliseconds: `2026-01-06T10:30:45.123Z` - -3. **Output format**: ✅ Single compact format for MVP: `timestamp LEVEL target::name: body [attr=value, ...]` - -4. **Thread safety**: ✅ `RwLock` - readers don't block each other, writes are rare (only during callsite registration) - -5. **Color support**: Deferred to future work (can be added to `SimpleWriter`) - -## Resolved Design Decisions - -1. **pdata View integration**: Instead of writing custom OTLP decoders, we reuse the existing `RawAnyValue`, `RawKeyValue` types from `otap_df_pdata::views::otlp::bytes::common`. Made `RawKeyValue::new()` public to enable this. - -2. **format_any_value consistency**: The `format_any_value()` function in `compact_formatter.rs` matches the implementation in `otlp_bytes_formatter.rs`, ensuring consistent formatting across the crate. - -## Success Metrics - -1. **Simplicity**: MVP should be <300 lines of code -2. **Correctness**: Output matches expected format for all log levels -3. **Performance**: Comparable to `fmt::layer()` for immediate writes -4. **Extensibility**: Easy to add accumulator/batching layer later - ---- - -## Next Steps - -Please review this plan and let me know: - -1. Do the proposed data structures align with your vision? -2. Any changes to what should be structural vs. encoded? -3. Which phase should we start with? -4. Answers to any of the open questions? diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md deleted file mode 100644 index 15b38fa73f..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/REPORT.md +++ /dev/null @@ -1,208 +0,0 @@ -# Self-Tracing Direct Encoder: Performance Report - -This report documents the design, implementation, and benchmark results for the direct OTLP encoder used for self-diagnostics in otap-dataflow. - -## Executive Summary - -We implemented a **zero-allocation** path for encoding `tokio-tracing` events directly to OTLP protobuf bytes. The key findings: - -| Operation | Per-event cost | -|-----------|----------------| -| **Encode to OTLP bytes** (3 attrs) | ~200 ns | -| **Format OTLP for console** | ~1.0 µs | -| **Full encode + format** | ~1.35 µs | - -Memory allocations were reduced from multiple per-event to **zero for primitive types** and **zero heap allocations** for Debug types (via `fmt::Write` directly to buffer). - ---- - -## Problem Statement - -The otap-dataflow system uses a thread-per-core architecture where **OTLP bytes are the interchange format** that crosses thread boundaries. For self-diagnostics (internal logging), we needed to convert `tokio-tracing` events to OTLP with minimal overhead. - -The naive approach: -1. Visit event fields → allocate intermediate struct -2. Encode struct via View trait → OTLP bytes - -Our approach: -1. Visit event fields → encode directly to protobuf buffer - ---- - -## Implementation: `DirectLogRecordEncoder` - -### Architecture - -``` -tracing::info!(count = 42, "message") - │ - ▼ -┌─────────────────────────────────────────────┐ -│ Layer::on_event(event) │ -│ └── StatefulDirectEncoder │ -│ ├── Pre-encoded Resource bytes │ -│ ├── Open ResourceLogs/ScopeLogs │ -│ └── DirectLogRecordEncoder │ -│ └── DirectFieldVisitor │ -│ └── ProtoBuffer │ -└─────────────────────────────────────────────┘ - │ - ▼ - OTLP bytes (protobuf) -``` - -### Key Components - -1. **`StatefulDirectEncoder`**: Maintains open `ResourceLogs` and `ScopeLogs` containers, batching consecutive events with the same instrumentation scope. - -2. **`DirectLogRecordEncoder`**: Encodes a single LogRecord directly to a `ProtoBuffer`. - -3. **`DirectFieldVisitor`**: Implements `tracing::field::Visit` to encode each field directly as OTLP attributes without intermediate allocation. - -4. **`LengthPlaceholder`**: Reserves 4 bytes for protobuf length fields, patches after content is written. - -5. **`ProtoBufferWriter`**: Implements `std::fmt::Write` to allow `Debug` formatting directly into the protobuf buffer. - ---- - -## Type Fidelity - -The encoder preserves native OTLP types for primitives: - -| Tracing Type | OTLP AnyValue | Encoding | -|--------------|---------------|----------| -| `i64`, `u64` | `int_value` | varint | -| `f64` | `double_value` | fixed64 | -| `bool` | `bool_value` | varint | -| `&str` | `string_value` | length-prefixed bytes | -| `&dyn Debug` | `string_value` | formatted via `fmt::Write` | - -This means `tracing::info!(count = 42)` produces an OTLP attribute with `int_value: 42`, not `string_value: "42"`. - ---- - -## Memory Allocation Analysis - -### Per-Event Allocations - -| Location | Allocation | Avoidable? | -|----------|------------|------------| -| `StatefulDirectEncoder::start_scope_logs` | `scope_name.to_string()` | Yes, with scope interning | - -### Zero-Allocation Paths ✓ - -- All primitive type visitors: `record_i64`, `record_f64`, `record_bool` -- String visitor: `record_str` — encodes borrowed `&str` directly -- Debug visitor: `record_debug` — uses `fmt::Write` to buffer (no intermediate `String`) -- Buffer writes: use pre-allocated capacity - -### The Debug Trait Limitation - -The `std::fmt::Debug` trait only provides string formatting, not structural access: - -```rust -pub trait Debug { - fn fmt(&self, f: &mut Formatter<'_>) -> Result; -} -``` - -`Formatter` has no public constructor, so we cannot intercept `debug_struct`/`debug_list` calls to encode as nested OTLP structures. Complex types must be formatted as strings. - -**Future options:** -- `serde::Serialize` → encode to `AnyValue::kvlist_value` -- `valuable::Valuable` → designed for structured inspection -- `tracing::Value` → unstable, may provide this - ---- - -## Benchmark Results - -### Methodology - -Benchmarks use Criterion with jemalloc. To isolate encoding cost from tracing dispatch overhead, each benchmark: - -1. Emits 1 tracing event -2. Inside the callback, encodes it N times (100 or 1000) -3. Measures total time, then computes per-event cost - -### Encoding Cost by Attribute Count - -| Attributes | Total (1000 events) | **Per event** | -|------------|---------------------|---------------| -| 0 | 136.6 µs | **137 ns** | -| 3 | 265.6 µs | **266 ns** | -| 10 | 489.7 µs | **490 ns** | - -Cost scales roughly linearly with attribute count (~35 ns per additional attribute). - -### Full Pipeline Costs - -| Operation | Per event | -|-----------|-----------| -| Encode only | ~200 ns | -| Format only | ~1.0 µs | -| Encode + Format | ~1.35 µs | - -Formatting dominates the cost due to text generation (timestamps, attribute formatting, ANSI colors). - -### Comparison to Baseline - -For context, a single `HashMap::insert` is ~20-50 ns. Our encoding of a 3-attribute event at ~266 ns is roughly 5-10 hash operations worth of overhead. - ---- - -## Scope Batching - -The `StatefulDirectEncoder` batches consecutive events with the same instrumentation scope: - -``` -Event 1: target="module_a" ─┐ -Event 2: target="module_a" ─┼── ScopeLogs { scope: "module_a", log_records: [1, 2, 3] } -Event 3: target="module_a" ─┘ -Event 4: target="module_b" ─── ScopeLogs { scope: "module_b", log_records: [4] } -``` - -This reduces OTLP envelope overhead when events from the same module are logged consecutively. - ---- - -## Design Decisions - -### 1. Direct Encoding vs. View Trait - -We bypass the `LogRecordView` / `AttributeView` abstraction for self-tracing. The View traits require `GAT` lifetime handling and don't eliminate the fundamental issue: the tracing `Visit` trait erases lifetimes. - -**Trade-off**: Some code duplication vs. complexity of making View traits work with borrowed tracing data. - -### 2. Pre-encoded Resource Bytes - -Resource attributes (e.g., `service.name`) are encoded once at startup and copied into each batch. This avoids re-encoding the same data repeatedly. - -### 3. 4-Byte Length Placeholders - -Protobuf uses varint lengths, but we can't know the length until content is written. We reserve 4 bytes (max 2^28 content size) and patch afterward. This allows single-pass encoding. - ---- - -## Future Work - -1. **Scope name interning**: Avoid `to_string()` on scope change by using static strings or an intern pool. - -2. **Structured encoding for Serialize types**: Add optional serde support to encode complex types as nested OTLP structures instead of strings. - -3. **Span integration**: Currently only events are encoded. Could extend to encode spans as OTLP spans. - -4. **Batching heuristics**: Currently flushes on demand. Could add time-based or size-based automatic flushing. - ---- - -## Conclusion - -The direct encoder achieves near-optimal performance for converting tracing events to OTLP bytes: - -- **~200-500 ns per event** depending on attribute count -- **Zero heap allocations** for typical events (primitives + strings) -- **Preserves type fidelity** (numbers stay numbers, bools stay bools) -- **Single-pass encoding** with placeholder patching - -The main limitation is the `Debug` trait's lack of structural inspection, which forces complex types to be formatted as strings. This is a Rust language limitation, not an implementation issue. From a6b6f46a74312d896f4107c305a4164776efa538 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 13:00:04 -0800 Subject: [PATCH 32/92] lint --- .../crates/telemetry/src/error.rs | 4 -- .../telemetry/src/self_tracing/README.md | 37 ++++++++++++++++ .../telemetry/src/self_tracing/encoder.rs | 44 ++++++++++++++----- .../telemetry/src/self_tracing/formatter.rs | 27 +++++++++++- 4 files changed, 94 insertions(+), 18 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md diff --git a/rust/otap-dataflow/crates/telemetry/src/error.rs b/rust/otap-dataflow/crates/telemetry/src/error.rs index 79cdcba188..77bef53512 100644 --- a/rust/otap-dataflow/crates/telemetry/src/error.rs +++ b/rust/otap-dataflow/crates/telemetry/src/error.rs @@ -27,8 +27,4 @@ pub enum Error { /// Error during configuration of a component. #[error("Configuration error: {0}")] ConfigurationError(String), - - /// Error during tracing subscriber initialization. - #[error("Tracing initialization error: {0}")] - TracingInitError(String), } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md new file mode 100644 index 0000000000..a09e3bc847 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md @@ -0,0 +1,37 @@ +# Internal logging handlers + +This module contains a simple encoder and formatter for use with Tokio +tracing subscribers to enable a lightweight bridge into the +OTAP-Dataflow engine. + +## OTLP bytes first + +This module currently implements encoding support for OTLP bytes, in +two forms: + +- Partial: The `LogRecord` type encodes the dynamic arguments from the event + along with a timestamp, yielding a representation that can be passed into + an internal pipeline because it is already encoded nas bytes. This representation + allows sorting and filtering records before encoding full OTLP messages. +- Full: The `DirectLogRecordEncoder` type supports appending the OTLP bytes + representation for the complete LogRecrd (without Scope and Resource wrappers). + +## Internal logging configuration + +Internal logging is the default configuration. In this configuration, +messages are written to the console. Note this can impact performance +due to contention over the console itself, however this configuration +does not use any `Sync + Send` synchronization. + +```yaml +service: + telemetry: + logs: + level: "debug" + internal: + enabled: true +``` + +The default configuration is subject to change. In the future, the `internal` +configuration block will be extended to route internal logs through dedicated +internal OTAP dataflow pipelines. diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index c89cc8bcf6..401bacb4ba 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -51,12 +51,43 @@ impl<'buf> DirectLogRecordEncoder<'buf> { self.buf .encode_string(LOG_RECORD_SEVERITY_TEXT, callsite.level.as_str()); + // Encode event_name (field 12, string) - format: "target::name (file:line)" + encode_event_name(self.buf, callsite); + self.buf.extend_from_slice(&record.body_attrs_bytes); self.buf.len() - start_len } } +/// Encode the event name from callsite metadata. +/// Format: "target::name (file:line)" or "target::name" if no file/line. +fn encode_event_name(buf: &mut ProtoBuffer, callsite: &SavedCallsite) { + proto_encode_len_delimited_unknown_size!( + LOG_RECORD_EVENT_NAME, + { + buf.extend_from_slice(callsite.target.as_bytes()); + buf.extend_from_slice(b"::"); + buf.extend_from_slice(callsite.name.as_bytes()); + if let (Some(file), Some(line)) = (callsite.file, callsite.line) { + let _ = write!(ProtoWriter(buf), " ({}:{})", file, line); + } + }, + buf + ); +} + +/// Wrapper that implements fmt::Write for a ProtoBuffer. +struct ProtoWriter<'a>(&'a mut ProtoBuffer); + +impl FmtWrite for ProtoWriter<'_> { + #[inline] + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.0.extend_from_slice(s.as_bytes()); + Ok(()) + } +} + /// Visitor that directly encodes tracing fields to protobuf. pub struct DirectFieldVisitor<'buf> { buf: &'buf mut ProtoBuffer, @@ -201,23 +232,12 @@ fn encode_debug_string(buf: &mut ProtoBuffer, value: &dyn std::fmt::Debug) { proto_encode_len_delimited_unknown_size!( ANY_VALUE_STRING_VALUE, { - let _ = write!(DebugWriter(buf), "{:?}", value); + let _ = write!(ProtoWriter(buf), "{:?}", value); }, buf ); } -/// Wrapper that implements fmt::Write for a ProtoBuffer. -struct DebugWriter<'a>(&'a mut ProtoBuffer); - -impl FmtWrite for DebugWriter<'_> { - #[inline] - fn write_str(&mut self, s: &str) -> std::fmt::Result { - self.0.extend_from_slice(s.as_bytes()); - Ok(()) - } -} - impl tracing::field::Visit for DirectFieldVisitor<'_> { fn record_f64(&mut self, field: &tracing::field::Field, value: f64) { if field.name() == "message" { diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 96c928c82d..f1289e2f59 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -455,6 +455,15 @@ mod tests { let attrs_text = format_attrs(&expected_attrs); let expected_suffix = format!(": {}{}", expected_body, attrs_text); + // Verify event_name has correct prefix (target::name) + let expected_prefix = "otap_df_telemetry::self_tracing::formatter::tests::event crates/telemetry/src/self_tracing/formatter.rs:"; + assert!( + decoded.event_name.starts_with(expected_prefix), + "event_name should start with '{}', got: {}", + expected_prefix, + decoded.event_name + ); + // Verify text formatting let binding = formatted.lock().unwrap(); let (ts_str, rest) = strip_ts(&binding); @@ -511,7 +520,7 @@ mod tests { let record = LogRecord { callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), // 2024-01-15T12:30:45.678Z - timestamp_ns: 1705321845_678_000_000, + timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::new(), }; @@ -531,6 +540,20 @@ mod tests { output, "\x1b[2m2024-01-15T12:30:45.678Z\x1b[0m \x1b[32mINFO\x1b[0m \x1b[1mtest_module::submodule::test_event (src/test.rs:123)\x1b[0m: \n" ); + + // Verify full OTLP encoding with known callsite + let mut buf = ProtoBuffer::with_capacity(256); + let mut encoder = DirectLogRecordEncoder::new(&mut buf); + let _ = encoder.encode_log_record(record, &test_callsite()); + let decoded = ProtoLogRecord::decode(buf.into_bytes().as_ref()).expect("decode failed"); + + assert_eq!(decoded.time_unix_nano, 1_705_321_845_678_000_000); + assert_eq!(decoded.severity_number, 9); // INFO + assert_eq!(decoded.severity_text, "INFO"); + assert_eq!( + decoded.event_name, + "test_module::submodule::test_event (src/test.rs:123)" + ); } #[test] @@ -554,7 +577,7 @@ mod tests { let record = LogRecord { callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), - timestamp_ns: 1705321845_678_000_000, + timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::from(encoded), }; From 241158e74e9680e95a7b26ef3efdd16be6d20c65 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 13:55:16 -0800 Subject: [PATCH 33/92] fix win test --- .../crates/telemetry/src/self_tracing/formatter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index f1289e2f59..a98e66b97d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -455,8 +455,8 @@ mod tests { let attrs_text = format_attrs(&expected_attrs); let expected_suffix = format!(": {}{}", expected_body, attrs_text); - // Verify event_name has correct prefix (target::name) - let expected_prefix = "otap_df_telemetry::self_tracing::formatter::tests::event crates/telemetry/src/self_tracing/formatter.rs:"; + // Verify event_name has correct prefix. Note: file:line are not always available, not tested. + let expected_prefix = "otap_df_telemetry::self_tracing::formatter::tests::event"; assert!( decoded.event_name.starts_with(expected_prefix), "event_name should start with '{}', got: {}", From 93c118ee05befaf3cd987ed903844f081829c1fd Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 14:07:29 -0800 Subject: [PATCH 34/92] RawLoggingLayer --- .../crates/telemetry/src/self_tracing/formatter.rs | 6 +++--- rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index a98e66b97d..ac1a6647b8 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -78,11 +78,11 @@ pub struct ConsoleWriter { } /// A minimal alternative to `tracing_subscriber::fmt::layer()`. -pub struct RawLayer { +pub struct RawLoggingLayer { writer: ConsoleWriter, } -impl RawLayer { +impl RawLoggingLayer { /// Return a new formatting layer with associated writer. #[must_use] pub fn new(writer: ConsoleWriter) -> Self { @@ -315,7 +315,7 @@ impl ConsoleWriter { } } -impl TracingLayer for RawLayer +impl TracingLayer for RawLoggingLayer where S: Subscriber + for<'a> LookupSpan<'a>, { diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs index d6c45f0c7a..3507ac027a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs @@ -19,7 +19,7 @@ use tracing::callsite::Identifier; use tracing::{Level, Metadata}; pub use encoder::DirectLogRecordEncoder; -pub use formatter::{ConsoleWriter, RawLayer as RawLoggingLayer}; +pub use formatter::{ConsoleWriter, RawLoggingLayer}; /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] From e235e8581c1ea27926aac4dab8be469dfcf0c285 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 14:12:11 -0800 Subject: [PATCH 35/92] use module basename --- .../crates/telemetry/src/{self_tracing/mod.rs => self_tracing.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename rust/otap-dataflow/crates/telemetry/src/{self_tracing/mod.rs => self_tracing.rs} (100%) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/self_tracing/mod.rs rename to rust/otap-dataflow/crates/telemetry/src/self_tracing.rs From 54d6bf4c51c84cc6b913a69a25f5a2332e5afbe8 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 14:17:08 -0800 Subject: [PATCH 36/92] ws --- rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md b/rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md index a09e3bc847..256fcf6ca3 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/README.md @@ -32,6 +32,6 @@ service: enabled: true ``` -The default configuration is subject to change. In the future, the `internal` +The default configuration is subject to change. In the future, the `internal` configuration block will be extended to route internal logs through dedicated internal OTAP dataflow pipelines. From e1e52c85fa719660e6d9a6bae13789b44f657a14 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 14:37:53 -0800 Subject: [PATCH 37/92] write to console on global subscriber error --- .../crates/telemetry/src/opentelemetry_client.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 8eaf8ecce2..59849bd678 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -95,6 +95,13 @@ impl OpentelemetryClient { let tracing_setup = tracing_subscriber::registry().with(get_env_filter(config.logs.level)); + let logerr = |err| { + use std::io::Write; + let _ = std::io::stderr().write_fmt(format_args!( + "could not install global tracing/logging subscriber: {err}" + )); + }; + let (logger_provider, runtime) = if !config.logs.internal.enabled { let (logger_provider, runtime) = LoggerProvider::configure(sdk_resource, &config.logs, runtime)?.into_parts(); @@ -108,7 +115,9 @@ impl OpentelemetryClient { // Try to initialize the global subscriber. In tests, this may fail if already set, // which is acceptable as we're only validating the configuration works. - let _ = tracing_setup.with(fmt_layer).with(sdk_layer).try_init(); + if let Err(err) = tracing_setup.with(fmt_layer).with(sdk_layer).try_init() { + logerr(err); + } (Some(logger_provider), runtime) } else { let writer = if std::env::var("NO_COLOR").is_ok() { @@ -117,7 +126,9 @@ impl OpentelemetryClient { ConsoleWriter::color() }; // See comment above. - let _ = tracing_setup.with(RawLoggingLayer::new(writer)).try_init(); + if let Err(err) = tracing_setup.with(RawLoggingLayer::new(writer)).try_init() { + logerr(err); + } (None, runtime) }; From 7a1baedc0a8bd0671fd53f6db4b8c22d4fe238d0 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 16:04:57 -0800 Subject: [PATCH 38/92] store &Metadata, make accessors --- .../crates/telemetry/src/self_tracing.rs | 57 +++++++++++-------- .../telemetry/src/self_tracing/encoder.rs | 12 ++-- .../telemetry/src/self_tracing/formatter.rs | 18 ++++-- 3 files changed, 53 insertions(+), 34 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 3507ac027a..75d08165f8 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -14,9 +14,8 @@ use bytes::Bytes; use encoder::DirectFieldVisitor; use otap_df_pdata::otlp::ProtoBuffer; use std::time::{SystemTime, UNIX_EPOCH}; -use tracing::Event; use tracing::callsite::Identifier; -use tracing::{Level, Metadata}; +use tracing::{Event, Level, Metadata}; pub use encoder::DirectLogRecordEncoder; pub use formatter::{ConsoleWriter, RawLoggingLayer}; @@ -39,33 +38,45 @@ pub struct LogRecord { /// for building a map by Identifier. #[derive(Debug, Clone)] pub struct SavedCallsite { - /// Target (e.g., module path) - pub target: &'static str, - - /// Event name - pub name: &'static str, - - /// Source file - pub file: Option<&'static str>, - - /// Source line - pub line: Option, - - /// Severity level - pub level: &'static Level, + /// Tracing metadata. + metadata: &'static Metadata<'static>, } impl SavedCallsite { /// Construct saved callsite information from tracing Metadata. #[must_use] pub fn new(metadata: &'static Metadata<'static>) -> Self { - Self { - level: metadata.level(), - target: metadata.target(), - name: metadata.name(), - file: metadata.file(), - line: metadata.line(), - } + Self { metadata } + } + + /// The level. + #[must_use] + pub fn level(&self) -> &Level { + self.metadata.level() + } + + /// The filename. + #[must_use] + pub fn file(&self) -> Option<&'static str> { + self.metadata.file() + } + + /// The line number. + #[must_use] + pub fn line(&self) -> Option { + self.metadata.line() + } + + /// The target (e.g., module). + #[must_use] + pub fn target(&self) -> &'static str { + self.metadata.target() + } + + /// The event name. + #[must_use] + pub fn name(&self) -> &'static str { + self.metadata.name() } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index 401bacb4ba..633067100f 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -1,4 +1,4 @@ -// Copyright The OpenTelemetry Authors +// Copyright OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 //! Direct OTLP bytes encoder for tokio-tracing events. @@ -42,14 +42,14 @@ impl<'buf> DirectLogRecordEncoder<'buf> { .extend_from_slice(&record.timestamp_ns.to_le_bytes()); // Encode severity_number (field 2, varint) - let severity = level_to_severity_number(callsite.level); + let severity = level_to_severity_number(callsite.level()); self.buf .encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); self.buf.encode_varint(severity as u64); // Encode severity_text (field 3, string) self.buf - .encode_string(LOG_RECORD_SEVERITY_TEXT, callsite.level.as_str()); + .encode_string(LOG_RECORD_SEVERITY_TEXT, callsite.level().as_str()); // Encode event_name (field 12, string) - format: "target::name (file:line)" encode_event_name(self.buf, callsite); @@ -66,10 +66,10 @@ fn encode_event_name(buf: &mut ProtoBuffer, callsite: &SavedCallsite) { proto_encode_len_delimited_unknown_size!( LOG_RECORD_EVENT_NAME, { - buf.extend_from_slice(callsite.target.as_bytes()); + buf.extend_from_slice(callsite.target().as_bytes()); buf.extend_from_slice(b"::"); - buf.extend_from_slice(callsite.name.as_bytes()); - if let (Some(file), Some(line)) = (callsite.file, callsite.line) { + buf.extend_from_slice(callsite.name().as_bytes()); + if let (Some(file), Some(line)) = (callsite.file(), callsite.line()) { let _ = write!(ProtoWriter(buf), " ({}:{})", file, line); } }, diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index ac1a6647b8..cfa3a74c3d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -137,7 +137,7 @@ impl ConsoleWriter { Self::write_timestamp(&mut w, record.timestamp_ns); cm.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); - cm.write_level(&mut w, callsite.level); + cm.write_level(&mut w, callsite.level()); cm.write_ansi(&mut w, AnsiCode::Bold); Self::write_event_name(&mut w, callsite); cm.write_ansi(&mut w, AnsiCode::Reset); @@ -151,10 +151,10 @@ impl ConsoleWriter { /// Write callsite details as event_name to buffer. #[inline] fn write_event_name(w: &mut BufWriter<'_>, callsite: &SavedCallsite) { - let _ = w.write_all(callsite.target.as_bytes()); + let _ = w.write_all(callsite.target().as_bytes()); let _ = w.write_all(b"::"); - let _ = w.write_all(callsite.name.as_bytes()); - if let (Some(file), Some(line)) = (callsite.file, callsite.line) { + let _ = w.write_all(callsite.name().as_bytes()); + if let (Some(file), Some(line)) = (callsite.file(), callsite.line()) { let _ = write!(w, " ({}:{})", file, line); } } @@ -330,8 +330,16 @@ where let mut buf = [0u8; LOG_BUFFER_SIZE]; let len = self.writer.write_log_record(&mut buf, &record, &callsite); - self.writer.write_line(callsite.level, &buf[..len]); + self.writer.write_line(callsite.level(), &buf[..len]); } + + // Note! This tracing layer does not implement Span-related features + // available through LookupSpan. This is important future work and will + // require introducing a notion of context. Presently, the Tokio tracing + // Context does not pass through the OTAP dataflow engine. + // + // We are likely to issue span events as events, meaning not to build + // Span objects at runtime. } #[cfg(test)] From 9e86542cf1062bb28dc66e62234b702da6331308 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 16:13:38 -0800 Subject: [PATCH 39/92] TODO about truncation --- .../crates/telemetry/src/self_tracing/formatter.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index cfa3a74c3d..414529100f 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -15,6 +15,9 @@ use tracing_subscriber::layer::{Context, Layer as TracingLayer}; use tracing_subscriber::registry::LookupSpan; /// Default buffer size for log formatting. +/// +/// TODO: Append a note to the log message when truncation occurs, otherwise +/// today the log record is silently truncated. pub const LOG_BUFFER_SIZE: usize = 4096; /// ANSI codes a.k.a. "Select Graphic Rendition" codes. @@ -122,8 +125,6 @@ impl ConsoleWriter { } /// Write a LogRecord to a byte buffer. Returns the number of bytes written. - /// - /// This is the efficient path - no heap allocation, writes directly to the buffer. pub fn write_log_record( &self, buf: &mut [u8], From f277f0978768e9c71bcfb872bc74134248006bc1 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 7 Jan 2026 16:40:34 -0800 Subject: [PATCH 40/92] cpyright --- rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index 633067100f..6d2d0459f9 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -1,4 +1,4 @@ -// Copyright OpenTelemetry Authors +// Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 //! Direct OTLP bytes encoder for tokio-tracing events. From 1eba8f2da541c8287b5b5d6c59b6e207acf7b166 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 8 Jan 2026 14:09:31 -0800 Subject: [PATCH 41/92] v1 is a thread local layer --- .../crates/controller/src/lib.rs | 28 +- .../crates/engine/src/pipeline_ctrl.rs | 12 + .../crates/engine/src/runtime_pipeline.rs | 3 + .../crates/telemetry/src/error.rs | 4 + .../otap-dataflow/crates/telemetry/src/lib.rs | 8 + .../crates/telemetry/src/logs.rs | 371 ++++++++++++++++++ 6 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/src/logs.rs diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 6c1e39f7cb..0f49c97d22 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -36,6 +36,7 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; +use otap_df_telemetry::logs::{LogsCollector, LogsReporter, install_thread_log_buffer, uninstall_thread_log_buffer}; use otap_df_telemetry::opentelemetry_client::OpentelemetryClient; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::{MetricsSystem, otel_info, otel_info_span, otel_warn}; @@ -115,6 +116,16 @@ impl Controller { obs_state_store.run(cancellation_token) })?; + // Create the logs collection channel and start the collector thread + let (logs_collector, logs_reporter) = LogsCollector::new( + telemetry_config.reporting_channel_size, + ); + // TODO: Store handle for graceful shutdown + let _logs_collector_handle = + spawn_thread_local_task("logs-collector", move |_cancellation_token| { + logs_collector.run() + })?; + // Start one thread per requested core // Get available CPU cores for pinning let requested_cores = Self::select_cores_for_quota( @@ -148,6 +159,7 @@ impl Controller { thread_id, ); let metrics_reporter = metrics_reporter.clone(); + let logs_reporter = logs_reporter.clone(); let thread_name = format!("pipeline-core-{}", core_id.id); let obs_evt_reporter = obs_evt_reporter.clone(); @@ -162,6 +174,7 @@ impl Controller { pipeline_handle, obs_evt_reporter, metrics_reporter, + logs_reporter, pipeline_ctrl_msg_tx, pipeline_ctrl_msg_rx, ) @@ -379,9 +392,14 @@ impl Controller { pipeline_context: PipelineContext, obs_evt_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, + logs_reporter: LogsReporter, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { + // Install thread-local log buffer for this pipeline thread + // Buffer capacity: 1024 entries (TODO: make configurable) + install_thread_log_buffer(1024); + // Create a tracing span for this pipeline thread // so that all logs within this scope include pipeline context. let span = otel_info_span!("pipeline_thread", core.id = core_id.id); @@ -415,18 +433,24 @@ impl Controller { )); // Start the pipeline (this will use the current thread's Tokio runtime) - runtime_pipeline + let result = runtime_pipeline .run_forever( pipeline_key, pipeline_context, obs_evt_reporter, metrics_reporter, + logs_reporter, pipeline_ctrl_msg_tx, pipeline_ctrl_msg_rx, ) .map_err(|e| Error::PipelineRuntimeError { source: Box::new(e), - }) + }); + + // Cleanup: uninstall thread-local log buffer + uninstall_thread_log_buffer(); + + result } } diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs index 601f697081..8bf44dd617 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs @@ -21,6 +21,7 @@ use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_telemetry::otel_warn; use otap_df_telemetry::reporter::MetricsReporter; +use otap_df_telemetry::logs::{LogsReporter, flush_thread_log_buffer}; use std::cmp::Reverse; use std::collections::{BinaryHeap, HashMap}; use std::time::{Duration, Instant}; @@ -182,6 +183,8 @@ pub struct PipelineCtrlMsgManager { event_reporter: ObservedEventReporter, /// Global metrics reporter. metrics_reporter: MetricsReporter, + /// Global logs reporter for internal log collection. + logs_reporter: LogsReporter, /// Channel metrics handles for periodic reporting. channel_metrics: Vec, @@ -199,6 +202,7 @@ impl PipelineCtrlMsgManager { control_senders: ControlSenders, event_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, + logs_reporter: LogsReporter, internal_telemetry: TelemetrySettings, channel_metrics: Vec, ) -> Self { @@ -212,6 +216,7 @@ impl PipelineCtrlMsgManager { delayed_data: BinaryHeap::new(), event_reporter, metrics_reporter, + logs_reporter, channel_metrics, telemetry: internal_telemetry, } @@ -349,6 +354,13 @@ impl PipelineCtrlMsgManager { } } + // Flush internal logs from the thread-local buffer + if let Some(batch) = flush_thread_log_buffer() { + if let Err(err) = self.logs_reporter.try_report(batch) { + otel_warn!("logs.reporting.fail", error = err.to_string()); + } + } + // Deliver all accumulated control messages (best-effort) for (node_id, msg) in to_send { self.send(node_id, msg).await; diff --git a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs index 691f213c67..69c0e6203c 100644 --- a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs +++ b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs @@ -14,6 +14,7 @@ use crate::terminal_state::TerminalState; use crate::{exporter::ExporterWrapper, processor::ProcessorWrapper, receiver::ReceiverWrapper}; use otap_df_config::pipeline::PipelineConfig; use otap_df_telemetry::reporter::MetricsReporter; +use otap_df_telemetry::logs::LogsReporter; use crate::context::PipelineContext; use otap_df_state::DeployedPipelineKey; @@ -106,6 +107,7 @@ impl RuntimePipeline { pipeline_context: PipelineContext, event_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, + logs_reporter: LogsReporter, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { @@ -193,6 +195,7 @@ impl RuntimePipeline { control_senders, event_reporter, metrics_reporter, + logs_reporter, internal_telemetry, channel_metrics, ); diff --git a/rust/otap-dataflow/crates/telemetry/src/error.rs b/rust/otap-dataflow/crates/telemetry/src/error.rs index 77bef53512..ecab5ce5dd 100644 --- a/rust/otap-dataflow/crates/telemetry/src/error.rs +++ b/rust/otap-dataflow/crates/telemetry/src/error.rs @@ -20,6 +20,10 @@ pub enum Error { #[error("Metrics channel was closed")] MetricsChannelClosed, + /// The logs channel was closed unexpectedly. + #[error("Logs channel was closed")] + LogsChannelClosed, + /// Error during shutdown of a component. #[error("Shutdown error: {0}")] ShutdownError(String), diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 58f7a75da5..c484501cab 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -36,6 +36,8 @@ pub mod error; pub mod instrument; /// Internal logs/events module for engine. pub mod internal_events; +/// Internal logs collection and transport. +pub mod logs; pub mod metrics; pub mod opentelemetry_client; pub mod registry; @@ -59,6 +61,12 @@ pub use tracing::info_span as otel_info_span; pub use tracing::trace_span as otel_trace_span; pub use tracing::warn_span as otel_warn_span; +// Re-export commonly used logs types for convenience. +pub use logs::{ + BufferWriterLayer, LogsCollector, LogsReporter, ProducerKeyGuard, current_producer_key, + flush_thread_log_buffer, install_thread_log_buffer, uninstall_thread_log_buffer, +}; + // TODO This should be #[cfg(test)], but something is preventing it from working. // The #[cfg(test)]-labeled otap_batch_processor::test_helpers::from_config // can't load this module unless I remove #[cfg(test)]! See #1304. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs new file mode 100644 index 0000000000..7ce8dd9fd2 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -0,0 +1,371 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Internal logs collection for OTAP-Dataflow. +//! +//! Each pipeline thread has a single LogBuffer (via thread-local) that accumulates +//! log records. The pipeline runtime periodically flushes this buffer to the admin +//! via a channel. Components don't need to do anything special for logging. + +use crate::error::Error; +use crate::registry::MetricsKey; +use crate::self_tracing::{ConsoleWriter, LogRecord, SavedCallsite}; +use std::cell::RefCell; +use std::sync::atomic::{AtomicU64, Ordering}; +use tracing::{Event, Subscriber}; +use tracing_subscriber::layer::{Context, Layer as TracingLayer}; +use tracing_subscriber::registry::LookupSpan; + +/// A log entry with optional producer identification. +pub struct LogEntry { + /// The log record (callsite, timestamp, encoded body/attrs). + pub record: LogRecord, + /// Optional key identifying the producing component (for first-party logs). + /// None for third-party logs from libraries. + pub producer_key: Option, +} + +/// A batch of log entries from a pipeline thread. +pub struct LogBatch { + /// The log entries in this batch. + pub entries: Vec, +} + +/// Thread-local log buffer for a pipeline thread. +/// +/// All components on this thread share the same buffer. +/// The pipeline runtime flushes it periodically on a timer. +/// If the buffer fills before flush, new events are dropped. +pub struct LogBuffer { + entries: Vec, + capacity: usize, + dropped_count: u64, +} + +impl LogBuffer { + /// Create a new log buffer with the given capacity. + #[must_use] + pub fn new(capacity: usize) -> Self { + Self { + entries: Vec::with_capacity(capacity.min(256)), + capacity, + dropped_count: 0, + } + } + + /// Push a log entry. If at capacity, the new entry is dropped. + /// + /// Returns true if the entry was added, false if dropped. + pub fn push(&mut self, entry: LogEntry) -> bool { + if self.entries.len() >= self.capacity { + self.dropped_count += 1; + false + } else { + self.entries.push(entry); + true + } + } + + /// Push just a LogRecord with no producer key (for third-party events). + /// + /// Returns true if the entry was added, false if dropped. + pub fn push_record(&mut self, record: LogRecord) -> bool { + self.push(LogEntry { + record, + producer_key: None, + }) + } + + /// Check if the buffer has entries to flush. + #[must_use] + pub fn needs_flush(&self) -> bool { + !self.entries.is_empty() + } + + /// Drain all entries from the buffer, returning them as a batch. + pub fn drain(&mut self) -> LogBatch { + LogBatch { + entries: std::mem::take(&mut self.entries), + } + } + + /// Returns the number of dropped entries since creation. + #[must_use] + pub fn dropped_count(&self) -> u64 { + self.dropped_count + } +} + +// Thread-local log buffer for the current pipeline thread. +thread_local! { + static CURRENT_LOG_BUFFER: RefCell> = const { RefCell::new(None) }; +} + +// Thread-local current MetricsKey for third-party instrumentation. +// When a component is executing, this is set to that component's key so that +// any tracing::info!() calls from libraries can be attributed to the component. +thread_local! { + static CURRENT_PRODUCER_KEY: RefCell> = const { RefCell::new(None) }; +} + +/// Guard that sets the current producer key for the duration of a scope. +/// +/// When dropped, restores the previous key (or None). +/// This allows nested scoping if needed. +pub struct ProducerKeyGuard { + previous: Option, +} + +impl ProducerKeyGuard { + /// Enter a scope with the given producer key. + /// + /// Third-party log events will be attributed to this key until + /// the guard is dropped. + #[must_use] + pub fn enter(key: MetricsKey) -> Self { + let previous = CURRENT_PRODUCER_KEY.with(|cell| cell.borrow_mut().replace(key)); + Self { previous } + } +} + +impl Drop for ProducerKeyGuard { + fn drop(&mut self) { + CURRENT_PRODUCER_KEY.with(|cell| { + *cell.borrow_mut() = self.previous; + }); + } +} + +/// Get the current producer key (if any component scope is active). +#[must_use] +pub fn current_producer_key() -> Option { + CURRENT_PRODUCER_KEY.with(|cell| *cell.borrow()) +} + +/// Install a log buffer for the current thread. +/// +/// Called by the pipeline runtime when the thread starts. +pub fn install_thread_log_buffer(capacity: usize) { + CURRENT_LOG_BUFFER.with(|cell| { + *cell.borrow_mut() = Some(LogBuffer::new(capacity)); + }); +} + +/// Uninstall the log buffer for the current thread. +/// +/// Called by the pipeline runtime when the thread shuts down. +pub fn uninstall_thread_log_buffer() { + CURRENT_LOG_BUFFER.with(|cell| { + *cell.borrow_mut() = None; + }); +} + +/// Push a log record to the current thread's buffer (if installed). +/// +/// If `producer_key` is None, uses the current thread-local producer key +/// (set via `ProducerKeyGuard::enter()`). This allows third-party instrumentation +/// to be attributed to the currently-executing component. +/// +/// Returns false if no buffer is installed or buffer is full (event dropped). +pub fn push_to_thread_buffer(record: LogRecord, producer_key: Option) -> bool { + CURRENT_LOG_BUFFER.with(|cell| { + if let Some(ref mut buffer) = *cell.borrow_mut() { + // Use explicit key if provided, otherwise use thread-current key + let key = producer_key.or_else(current_producer_key); + buffer.push(LogEntry { record, producer_key: key }) + } else { + false + } + }) +} + +/// Flush the current thread's log buffer, returning the batch. +/// +/// Called by the pipeline runtime on a timer. +pub fn flush_thread_log_buffer() -> Option { + CURRENT_LOG_BUFFER.with(|cell| { + cell.borrow_mut().as_mut().and_then(|buffer| { + if buffer.needs_flush() { + Some(buffer.drain()) + } else { + None + } + }) + }) +} + +/// Reporter for sending log batches through a channel. +#[derive(Clone)] +pub struct LogsReporter { + sender: flume::Sender, +} + +impl LogsReporter { + /// Create a new LogsReporter with the given sender. + #[must_use] + pub fn new(sender: flume::Sender) -> Self { + Self { sender } + } + + /// Try to send a batch, non-blocking. + /// + /// If the channel is full, the batch is dropped (returns Ok). + /// Only returns Err if the channel is disconnected. + pub fn try_report(&self, batch: LogBatch) -> Result<(), Error> { + match self.sender.try_send(batch) { + Ok(()) => Ok(()), + Err(flume::TrySendError::Full(_)) => Ok(()), + Err(flume::TrySendError::Disconnected(_)) => Err(Error::LogsChannelClosed), + } + } +} + +/// Collector that receives log batches and writes them to console. +pub struct LogsCollector { + receiver: flume::Receiver, + writer: ConsoleWriter, +} + +impl LogsCollector { + /// Create a new collector and reporter pair. + #[must_use] + pub fn new(channel_size: usize) -> (Self, LogsReporter) { + let (sender, receiver) = flume::bounded(channel_size); + let collector = Self { + receiver, + writer: ConsoleWriter::color(), + }; + let reporter = LogsReporter::new(sender); + (collector, reporter) + } + + /// Run the collection loop until the channel is closed. + pub async fn run(self) -> Result<(), Error> { + loop { + match self.receiver.recv_async().await { + Ok(batch) => { + self.write_batch(batch); + } + Err(_) => { + return Ok(()); + } + } + } + } + + /// Write a batch of log entries to console. + fn write_batch(&self, batch: LogBatch) { + for entry in batch.entries { + // Identifier.0 is the &'static dyn Callsite + let metadata = entry.record.callsite_id.0.metadata(); + let saved = SavedCallsite::new(metadata); + let output = self.writer.format_log_record(&entry.record, &saved); + // TODO: include producer_key in output when present + eprint!("{}", output); + } + } +} + +// ============================================================================ +// BufferWriterLayer - Tracing Layer that captures events to thread-local buffer +// ============================================================================ + +/// A tracing Layer that writes events to the thread-local LogBuffer. +/// +/// This layer should be installed in the global subscriber. It only captures +/// events on threads that have a LogBuffer installed (via `install_thread_log_buffer`). +/// On other threads (e.g., the admin thread), events are silently ignored by this +/// layer (but may be handled by other layers in the subscriber stack). +/// +/// # Drop Statistics +/// +/// The layer tracks global drop statistics: +/// - `events_captured`: Total events successfully pushed to buffers +/// - `events_dropped_no_buffer`: Events on threads without a buffer installed +/// - `events_dropped_buffer_full`: Events dropped because buffer was at capacity +pub struct BufferWriterLayer { + /// Count of events successfully captured to a buffer. + events_captured: AtomicU64, + /// Count of events dropped because no buffer was installed on the thread. + events_dropped_no_buffer: AtomicU64, + /// Count of events dropped because the buffer was full. + events_dropped_buffer_full: AtomicU64, +} + +impl BufferWriterLayer { + /// Create a new BufferWriterLayer. + #[must_use] + pub fn new() -> Self { + Self { + events_captured: AtomicU64::new(0), + events_dropped_no_buffer: AtomicU64::new(0), + events_dropped_buffer_full: AtomicU64::new(0), + } + } + + /// Get the number of events successfully captured. + #[must_use] + pub fn events_captured(&self) -> u64 { + self.events_captured.load(Ordering::Relaxed) + } + + /// Get the number of events dropped because no buffer was installed. + #[must_use] + pub fn events_dropped_no_buffer(&self) -> u64 { + self.events_dropped_no_buffer.load(Ordering::Relaxed) + } + + /// Get the number of events dropped because buffers were full. + #[must_use] + pub fn events_dropped_buffer_full(&self) -> u64 { + self.events_dropped_buffer_full.load(Ordering::Relaxed) + } + + /// Get total events dropped (no buffer + buffer full). + #[must_use] + pub fn events_dropped_total(&self) -> u64 { + self.events_dropped_no_buffer() + self.events_dropped_buffer_full() + } +} + +impl Default for BufferWriterLayer { + fn default() -> Self { + Self::new() + } +} + +impl TracingLayer for BufferWriterLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + // Create the LogRecord from the event + let record = LogRecord::new(event); + + // Try to push to the thread-local buffer + // producer_key=None means use current_producer_key() from thread-local + CURRENT_LOG_BUFFER.with(|cell| { + if let Some(ref mut buffer) = *cell.borrow_mut() { + let key = current_producer_key(); + if buffer.push(LogEntry { + record, + producer_key: key, + }) { + let _ = self.events_captured.fetch_add(1, Ordering::Relaxed); + } else { + let _ = self.events_dropped_buffer_full.fetch_add(1, Ordering::Relaxed); + } + } else { + // No buffer installed on this thread - drop the event + let _ = self.events_dropped_no_buffer.fetch_add(1, Ordering::Relaxed); + } + }); + } + + fn event_enabled(&self, _event: &Event<'_>, _ctx: Context<'_, S>) -> bool { + // Only process events if a buffer is installed on this thread. + // This allows other layers (like RawLoggingLayer) to handle events + // on threads without buffers (e.g., admin thread). + CURRENT_LOG_BUFFER.with(|cell| cell.borrow().is_some()) + } +} From af37863fa2b1da6871c1e5a38718dd7cf71f903d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 8 Jan 2026 16:50:06 -0800 Subject: [PATCH 42/92] v2 adds config --- .../src/pipeline/service/telemetry/logs.rs | 188 ++++++++++++++++-- .../crates/controller/src/lib.rs | 135 +++++++------ .../otap-dataflow/crates/telemetry/src/lib.rs | 5 +- .../crates/telemetry/src/logs.rs | 171 +++++++++++----- .../telemetry/src/opentelemetry_client.rs | 72 +++---- .../opentelemetry_client/logger_provider.rs | 8 +- .../telemetry/src/self_tracing/formatter.rs | 17 +- 7 files changed, 417 insertions(+), 179 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index a246adf208..5b4b7ce7cd 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -15,17 +15,122 @@ pub struct LogsConfig { #[serde(default)] pub level: LogLevel, - /// Internal log configuration options - #[serde(default = "default_internal")] - pub internal: LogsInternalConfig, + /// Logging strategy configuration for different thread contexts. + #[serde(default)] + pub strategies: LoggingStrategies, + + /// How the admin thread handles received log events. + #[serde(default)] + pub output: LogOutputConfig, - /// The list of log processors to configure. + /// The list of log processors to configure (for OpenTelemetry SDK output mode). + /// Only used when `output.mode` is set to `opentelemetry`. #[serde(default)] pub processors: Vec, } -fn default_internal() -> LogsInternalConfig { - LogsInternalConfig { enabled: true } +/// Logging strategies for different execution contexts. +/// +/// Controls how log events are captured and routed to the admin thread. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct LoggingStrategies { + /// Strategy for non-engine threads (admin, metrics aggregator, etc.). + /// These threads don't have an EffectHandler and use the global tracing subscriber. + /// Default: `global` (send to admin channel). + #[serde(default = "default_global_strategy")] + pub global: ProducerStrategy, + + /// Strategy for engine/pipeline threads. + /// These threads have an EffectHandler and use buffered logging. + /// Default: `buffered` (thread-local buffer, batch flush on timer). + #[serde(default = "default_engine_strategy")] + pub engine: ProducerStrategy, +} + +impl Default for LoggingStrategies { + fn default() -> Self { + Self { + global: default_global_strategy(), + engine: default_engine_strategy(), + } + } +} + +fn default_global_strategy() -> ProducerStrategy { + ProducerStrategy::Global +} + +fn default_engine_strategy() -> ProducerStrategy { + ProducerStrategy::Buffered +} + +/// Producer strategy: how log events are captured and routed to the admin thread. +/// +/// Used to configure logging behavior for different thread types: +/// - Global subscriber for non-engine threads +/// - Engine threads with EffectHandler +/// - Per-component (future: for ITR downstream to prevent feedback) +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum ProducerStrategy { + /// No-op: log events are silently dropped. + /// Use for ITR-downstream components to prevent feedback loops. + Noop, + + /// Global channel: send individual events to the admin collector thread. + /// Non-blocking (drops if channel full). Default for non-engine threads. + Global, + + /// Buffered: accumulate events in thread-local buffer, flush on timer. + /// Default for engine threads. Events are batched before sending to admin. + Buffered, +} + +/// Configuration for how the admin thread outputs received log events. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct LogOutputConfig { + /// The output mode for log events received by the admin thread. + #[serde(default = "default_output_mode")] + pub mode: OutputMode, + + /// Ring buffer capacity for `memory` mode (number of log entries). + /// Also used for the `/logs` HTTP endpoint regardless of mode. + #[serde(default = "default_ring_buffer_capacity")] + pub ring_buffer_capacity: usize, +} + +impl Default for LogOutputConfig { + fn default() -> Self { + Self { + mode: default_output_mode(), + ring_buffer_capacity: default_ring_buffer_capacity(), + } + } +} + +fn default_output_mode() -> OutputMode { + OutputMode::Raw +} + +fn default_ring_buffer_capacity() -> usize { + 1000 +} + +/// Output mode: what the admin thread does with received log events. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum OutputMode { + /// Raw logging: format and print directly to console (stdout/stderr). + /// ERROR/WARN go to stderr, others to stdout. + Raw, + + /// Memory only: store in ring buffer for `/logs` HTTP endpoint. + /// No console output. Useful for headless/production deployments. + Memory, + + /// OpenTelemetry SDK: forward to OTel logging SDK with configured processors. + /// Events are sent through the OTel appender bridge for OTLP export. + Opentelemetry, } /// Log level for internal engine logs. @@ -45,14 +150,6 @@ pub enum LogLevel { Error, } -/// Log internal configuration -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default, PartialEq)] -#[serde(rename_all = "lowercase")] -pub struct LogsInternalConfig { - /// Is internal logging in use? - pub enabled: bool, -} - #[cfg(test)] mod tests { use super::*; @@ -88,4 +185,67 @@ mod tests { assert!(config.processors.is_empty()); Ok(()) } + + #[test] + fn test_logging_strategies_deserialize() { + let yaml_str = r#" + level: "info" + strategies: + global: global + engine: buffered + "#; + let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); + assert_eq!(config.strategies.global, ProducerStrategy::Global); + assert_eq!(config.strategies.engine, ProducerStrategy::Buffered); + } + + #[test] + fn test_logging_strategies_default() { + let config = LogsConfig::default(); + assert_eq!(config.strategies.global, ProducerStrategy::Global); + assert_eq!(config.strategies.engine, ProducerStrategy::Buffered); + assert_eq!(config.output.mode, OutputMode::Raw); + } + + #[test] + fn test_output_modes() { + let yaml_str = r#" + level: "info" + output: + mode: memory + ring_buffer_capacity: 5000 + "#; + let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); + assert_eq!(config.output.mode, OutputMode::Memory); + assert_eq!(config.output.ring_buffer_capacity, 5000); + } + + #[test] + fn test_opentelemetry_output() { + let yaml_str = r#" + level: "info" + output: + mode: opentelemetry + processors: + - batch: + exporter: + console: + "#; + let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); + assert_eq!(config.output.mode, OutputMode::Opentelemetry); + assert_eq!(config.processors.len(), 1); + } + + #[test] + fn test_noop_strategy_for_itr() { + let yaml_str = r#" + level: "info" + strategies: + global: noop + engine: noop + "#; + let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); + assert_eq!(config.strategies.global, ProducerStrategy::Noop); + assert_eq!(config.strategies.engine, ProducerStrategy::Noop); + } } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 0f49c97d22..cd175f4a3e 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -36,7 +36,7 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; -use otap_df_telemetry::logs::{LogsCollector, LogsReporter, install_thread_log_buffer, uninstall_thread_log_buffer}; +use otap_df_telemetry::logs::{LogsCollector, LogsReporter, install_thread_log_buffer, uninstall_thread_log_buffer, with_engine_thread_subscriber}; use otap_df_telemetry::opentelemetry_client::OpentelemetryClient; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::{MetricsSystem, otel_info, otel_info_span, otel_warn}; @@ -84,7 +84,20 @@ impl Controller { node_ctrl_msg_channel_size = settings.default_node_ctrl_msg_channel_size, pipeline_ctrl_msg_channel_size = settings.default_pipeline_ctrl_msg_channel_size ); - let opentelemetry_client = OpentelemetryClient::new(telemetry_config)?; + + // Create the logs collection channel before OpentelemetryClient so it can + // install the DirectChannelLayer for global subscriber. + let (logs_collector, logs_reporter) = LogsCollector::new( + telemetry_config.reporting_channel_size, + ); + // Start the logs collector thread + // TODO: Store handle for graceful shutdown + let _logs_collector_handle = + spawn_thread_local_task("logs-collector", move |_cancellation_token| { + logs_collector.run() + })?; + + let opentelemetry_client = OpentelemetryClient::new(telemetry_config, logs_reporter.clone())?; let metrics_system = MetricsSystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -116,16 +129,6 @@ impl Controller { obs_state_store.run(cancellation_token) })?; - // Create the logs collection channel and start the collector thread - let (logs_collector, logs_reporter) = LogsCollector::new( - telemetry_config.reporting_channel_size, - ); - // TODO: Store handle for graceful shutdown - let _logs_collector_handle = - spawn_thread_local_task("logs-collector", move |_cancellation_token| { - logs_collector.run() - })?; - // Start one thread per requested core // Get available CPU cores for pinning let requested_cores = Self::select_cores_for_quota( @@ -396,61 +399,65 @@ impl Controller { pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { - // Install thread-local log buffer for this pipeline thread - // Buffer capacity: 1024 entries (TODO: make configurable) - install_thread_log_buffer(1024); - - // Create a tracing span for this pipeline thread - // so that all logs within this scope include pipeline context. - let span = otel_info_span!("pipeline_thread", core.id = core_id.id); - let _guard = span.enter(); - - // Pin thread to specific core - if !core_affinity::set_for_current(core_id) { - // Continue execution even if pinning fails. - // This is acceptable because the OS will still schedule the thread, but performance may be less predictable. - otel_warn!( - "CoreAffinity.SetFailed", - message = "Failed to set core affinity for pipeline thread. Performance may be less predictable." - ); - } + // Run the entire pipeline thread with the engine-specific tracing subscriber. + // This ensures all logs go to the thread-local buffer instead of the global channel. + with_engine_thread_subscriber(|| { + // Install thread-local log buffer for this pipeline thread + // Buffer capacity: 1024 entries (TODO: make configurable) + install_thread_log_buffer(1024); + + // Create a tracing span for this pipeline thread + // so that all logs within this scope include pipeline context. + let span = otel_info_span!("pipeline_thread", core.id = core_id.id); + let _guard = span.enter(); + + // Pin thread to specific core + if !core_affinity::set_for_current(core_id) { + // Continue execution even if pinning fails. + // This is acceptable because the OS will still schedule the thread, but performance may be less predictable. + otel_warn!( + "CoreAffinity.SetFailed", + message = "Failed to set core affinity for pipeline thread. Performance may be less predictable." + ); + } - obs_evt_reporter.report(ObservedEvent::admitted( - pipeline_key.clone(), - Some("Pipeline admission successful.".to_owned()), - )); + obs_evt_reporter.report(ObservedEvent::admitted( + pipeline_key.clone(), + Some("Pipeline admission successful.".to_owned()), + )); - // Build the runtime pipeline from the configuration - let runtime_pipeline = pipeline_factory - .build(pipeline_context.clone(), pipeline_config.clone()) - .map_err(|e| Error::PipelineRuntimeError { - source: Box::new(e), - })?; + // Build the runtime pipeline from the configuration + let runtime_pipeline = pipeline_factory + .build(pipeline_context.clone(), pipeline_config.clone()) + .map_err(|e| Error::PipelineRuntimeError { + source: Box::new(e), + })?; + + obs_evt_reporter.report(ObservedEvent::ready( + pipeline_key.clone(), + Some("Pipeline initialization successful.".to_owned()), + )); + + // Start the pipeline (this will use the current thread's Tokio runtime) + let result = runtime_pipeline + .run_forever( + pipeline_key, + pipeline_context, + obs_evt_reporter, + metrics_reporter, + logs_reporter, + pipeline_ctrl_msg_tx, + pipeline_ctrl_msg_rx, + ) + .map_err(|e| Error::PipelineRuntimeError { + source: Box::new(e), + }); + + // Cleanup: uninstall thread-local log buffer + uninstall_thread_log_buffer(); - obs_evt_reporter.report(ObservedEvent::ready( - pipeline_key.clone(), - Some("Pipeline initialization successful.".to_owned()), - )); - - // Start the pipeline (this will use the current thread's Tokio runtime) - let result = runtime_pipeline - .run_forever( - pipeline_key, - pipeline_context, - obs_evt_reporter, - metrics_reporter, - logs_reporter, - pipeline_ctrl_msg_tx, - pipeline_ctrl_msg_rx, - ) - .map_err(|e| Error::PipelineRuntimeError { - source: Box::new(e), - }); - - // Cleanup: uninstall thread-local log buffer - uninstall_thread_log_buffer(); - - result + result + }) } } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index c484501cab..05712d2a25 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -63,8 +63,9 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - BufferWriterLayer, LogsCollector, LogsReporter, ProducerKeyGuard, current_producer_key, - flush_thread_log_buffer, install_thread_log_buffer, uninstall_thread_log_buffer, + BufferWriterLayer, DirectChannelLayer, LogsCollector, LogsReporter, ProducerKeyGuard, + current_producer_key, flush_thread_log_buffer, install_thread_log_buffer, + uninstall_thread_log_buffer, with_engine_thread_subscriber, }; // TODO This should be #[cfg(test)], but something is preventing it from working. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 7ce8dd9fd2..dea72860f2 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -13,8 +13,10 @@ use crate::self_tracing::{ConsoleWriter, LogRecord, SavedCallsite}; use std::cell::RefCell; use std::sync::atomic::{AtomicU64, Ordering}; use tracing::{Event, Subscriber}; -use tracing_subscriber::layer::{Context, Layer as TracingLayer}; +use tracing_subscriber::filter::LevelFilter; +use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; use tracing_subscriber::registry::LookupSpan; +use tracing_subscriber::{EnvFilter, Registry}; /// A log entry with optional producer identification. pub struct LogEntry { @@ -259,37 +261,26 @@ impl LogsCollector { // Identifier.0 is the &'static dyn Callsite let metadata = entry.record.callsite_id.0.metadata(); let saved = SavedCallsite::new(metadata); - let output = self.writer.format_log_record(&entry.record, &saved); + // Use ConsoleWriter's routing: ERROR/WARN to stderr, others to stdout + self.writer.print_log_record(&entry.record, &saved); // TODO: include producer_key in output when present - eprint!("{}", output); } } } // ============================================================================ -// BufferWriterLayer - Tracing Layer that captures events to thread-local buffer +// BufferWriterLayer - For engine threads with thread-local buffer // ============================================================================ -/// A tracing Layer that writes events to the thread-local LogBuffer. +/// A tracing Layer for engine threads that writes to thread-local LogBuffer. /// -/// This layer should be installed in the global subscriber. It only captures -/// events on threads that have a LogBuffer installed (via `install_thread_log_buffer`). -/// On other threads (e.g., the admin thread), events are silently ignored by this -/// layer (but may be handled by other layers in the subscriber stack). -/// -/// # Drop Statistics -/// -/// The layer tracks global drop statistics: -/// - `events_captured`: Total events successfully pushed to buffers -/// - `events_dropped_no_buffer`: Events on threads without a buffer installed -/// - `events_dropped_buffer_full`: Events dropped because buffer was at capacity +/// This layer is installed via `with_default()` on each engine thread. +/// Events are accumulated in the thread-local buffer and flushed on a timer. pub struct BufferWriterLayer { - /// Count of events successfully captured to a buffer. + /// Count of events successfully captured to the buffer. events_captured: AtomicU64, - /// Count of events dropped because no buffer was installed on the thread. - events_dropped_no_buffer: AtomicU64, /// Count of events dropped because the buffer was full. - events_dropped_buffer_full: AtomicU64, + events_dropped: AtomicU64, } impl BufferWriterLayer { @@ -298,8 +289,7 @@ impl BufferWriterLayer { pub fn new() -> Self { Self { events_captured: AtomicU64::new(0), - events_dropped_no_buffer: AtomicU64::new(0), - events_dropped_buffer_full: AtomicU64::new(0), + events_dropped: AtomicU64::new(0), } } @@ -309,22 +299,10 @@ impl BufferWriterLayer { self.events_captured.load(Ordering::Relaxed) } - /// Get the number of events dropped because no buffer was installed. - #[must_use] - pub fn events_dropped_no_buffer(&self) -> u64 { - self.events_dropped_no_buffer.load(Ordering::Relaxed) - } - - /// Get the number of events dropped because buffers were full. - #[must_use] - pub fn events_dropped_buffer_full(&self) -> u64 { - self.events_dropped_buffer_full.load(Ordering::Relaxed) - } - - /// Get total events dropped (no buffer + buffer full). + /// Get the number of events dropped because buffer was full. #[must_use] - pub fn events_dropped_total(&self) -> u64 { - self.events_dropped_no_buffer() + self.events_dropped_buffer_full() + pub fn events_dropped(&self) -> u64 { + self.events_dropped.load(Ordering::Relaxed) } } @@ -339,33 +317,118 @@ where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - // Create the LogRecord from the event let record = LogRecord::new(event); + let producer_key = current_producer_key(); - // Try to push to the thread-local buffer - // producer_key=None means use current_producer_key() from thread-local CURRENT_LOG_BUFFER.with(|cell| { if let Some(ref mut buffer) = *cell.borrow_mut() { - let key = current_producer_key(); - if buffer.push(LogEntry { - record, - producer_key: key, - }) { + if buffer.push(LogEntry { record, producer_key }) { let _ = self.events_captured.fetch_add(1, Ordering::Relaxed); } else { - let _ = self.events_dropped_buffer_full.fetch_add(1, Ordering::Relaxed); + let _ = self.events_dropped.fetch_add(1, Ordering::Relaxed); } - } else { - // No buffer installed on this thread - drop the event - let _ = self.events_dropped_no_buffer.fetch_add(1, Ordering::Relaxed); } + // No buffer = programming error on engine thread, silently drop }); } +} + +// ============================================================================ +// DirectChannelLayer - Global fallback for non-engine threads +// ============================================================================ + +/// A tracing Layer for non-engine threads that sends directly to channel. +/// +/// This is installed as the global subscriber. Events are sent immediately +/// to the LogsCollector (non-blocking, dropped if channel is full). +pub struct DirectChannelLayer { + /// Reporter for sending to the channel. + reporter: LogsReporter, + /// Count of events successfully sent. + events_captured: AtomicU64, + /// Count of events dropped because channel was full. + events_dropped: AtomicU64, +} + +impl DirectChannelLayer { + /// Create a new DirectChannelLayer with the given reporter. + #[must_use] + pub fn new(reporter: LogsReporter) -> Self { + Self { + reporter, + events_captured: AtomicU64::new(0), + events_dropped: AtomicU64::new(0), + } + } + + /// Get the number of events successfully sent. + #[must_use] + pub fn events_captured(&self) -> u64 { + self.events_captured.load(Ordering::Relaxed) + } - fn event_enabled(&self, _event: &Event<'_>, _ctx: Context<'_, S>) -> bool { - // Only process events if a buffer is installed on this thread. - // This allows other layers (like RawLoggingLayer) to handle events - // on threads without buffers (e.g., admin thread). - CURRENT_LOG_BUFFER.with(|cell| cell.borrow().is_some()) + /// Get the number of events dropped because channel was full. + #[must_use] + pub fn events_dropped(&self) -> u64 { + self.events_dropped.load(Ordering::Relaxed) } } + +impl TracingLayer for DirectChannelLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + let record = LogRecord::new(event); + // Non-engine threads don't have producer_key context + let batch = LogBatch { + entries: vec![LogEntry { + record, + producer_key: None, + }], + }; + + match self.reporter.sender.try_send(batch) { + Ok(()) => { + let _ = self.events_captured.fetch_add(1, Ordering::Relaxed); + } + Err(flume::TrySendError::Full(_)) => { + let _ = self.events_dropped.fetch_add(1, Ordering::Relaxed); + } + Err(flume::TrySendError::Disconnected(_)) => { + // Channel closed, nothing we can do + } + } + } +} + +// ============================================================================ +// Engine Thread Subscriber Setup +// ============================================================================ + +/// Create a subscriber for engine threads that uses BufferWriterLayer. +/// +/// This subscriber captures events to the thread-local buffer instead of +/// sending them to the channel directly. +fn create_engine_thread_subscriber() -> impl Subscriber { + // Use the same filter as the global subscriber (INFO by default, RUST_LOG override) + let filter = EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .from_env_lossy(); + + Registry::default() + .with(filter) + .with(BufferWriterLayer::new()) +} + +/// Run a closure with the engine thread subscriber as the default. +/// +/// This should be called at the top of each engine thread to ensure all +/// logging on that thread goes to the thread-local buffer. +pub fn with_engine_thread_subscriber(f: F) -> R +where + F: FnOnce() -> R, +{ + let subscriber = create_engine_thread_subscriber(); + tracing::subscriber::with_default(subscriber, f) +} diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 59849bd678..6c206dc25b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -9,7 +9,8 @@ pub mod meter_provider; use opentelemetry::KeyValue; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider, metrics::SdkMeterProvider}; use otap_df_config::pipeline::service::telemetry::{ - AttributeValue, AttributeValueArray, TelemetryConfig, logs::LogLevel, + AttributeValue, AttributeValueArray, TelemetryConfig, + logs::{LogLevel, ProducerStrategy}, }; use tracing::level_filters::LevelFilter; use tracing_subscriber::EnvFilter; @@ -17,8 +18,8 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use crate::{ error::Error, - opentelemetry_client::{logger_provider::LoggerProvider, meter_provider::MeterProvider}, - self_tracing::{ConsoleWriter, RawLoggingLayer}, + logs::{DirectChannelLayer, LogsReporter}, + opentelemetry_client::meter_provider::MeterProvider, }; /// Client for the OpenTelemetry SDK. @@ -85,13 +86,13 @@ impl OpentelemetryClient { /// /// TODO: Evaluate also alternatives for the contention caused by the global /// OpenTelemetry logger provider added as layer. - pub fn new(config: &TelemetryConfig) -> Result { + pub fn new(config: &TelemetryConfig, logs_reporter: LogsReporter) -> Result { let sdk_resource = Self::configure_resource(&config.resource); let runtime = None; let (meter_provider, runtime) = - MeterProvider::configure(sdk_resource.clone(), &config.metrics, runtime)?.into_parts(); + MeterProvider::configure(sdk_resource, &config.metrics, runtime)?.into_parts(); let tracing_setup = tracing_subscriber::registry().with(get_env_filter(config.logs.level)); @@ -102,43 +103,41 @@ impl OpentelemetryClient { )); }; - let (logger_provider, runtime) = if !config.logs.internal.enabled { - let (logger_provider, runtime) = - LoggerProvider::configure(sdk_resource, &config.logs, runtime)?.into_parts(); - - // Tokio provides a console formatting layer, OTel - // provides other behaviors. - let fmt_layer = tracing_subscriber::fmt::layer().with_thread_names(true); - let sdk_layer = opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge::new( - &logger_provider, - ); - - // Try to initialize the global subscriber. In tests, this may fail if already set, - // which is acceptable as we're only validating the configuration works. - if let Err(err) = tracing_setup.with(fmt_layer).with(sdk_layer).try_init() { - logerr(err); + // Configure the global subscriber based on strategies.global. + // Engine threads override this with BufferWriterLayer via with_default(). + match config.logs.strategies.global { + ProducerStrategy::Noop => { + // No-op: just install the filter, events are dropped + if let Err(err) = tracing_setup.try_init() { + logerr(err); + } + } + ProducerStrategy::Global => { + // Global channel: send events to admin collector thread + let channel_layer = DirectChannelLayer::new(logs_reporter); + if let Err(err) = tracing_setup.with(channel_layer).try_init() { + logerr(err); + } } - (Some(logger_provider), runtime) - } else { - let writer = if std::env::var("NO_COLOR").is_ok() { - ConsoleWriter::no_color() - } else { - ConsoleWriter::color() - }; - // See comment above. - if let Err(err) = tracing_setup.with(RawLoggingLayer::new(writer)).try_init() { - logerr(err); + ProducerStrategy::Buffered => { + // Buffered is only valid for engine threads, treat as global for global subscriber + // This is a misconfiguration, but we handle it gracefully + let channel_layer = DirectChannelLayer::new(logs_reporter); + if let Err(err) = tracing_setup.with(channel_layer).try_init() { + logerr(err); + } } + } - (None, runtime) - }; + // Note: OpenTelemetry SDK forwarding is handled by the LogsCollector on the admin thread, + // not at the global subscriber level. The output.mode config controls that behavior. //TODO: Configure traces provider. Ok(Self { _runtime: runtime, meter_provider, - logger_provider, + logger_provider: None, }) } @@ -228,12 +227,14 @@ mod tests { }; use super::*; + use crate::logs::LogsCollector; use std::{f64::consts::PI, time::Duration}; #[test] fn test_configure_minimal_opentelemetry_client() -> Result<(), Error> { let config = TelemetryConfig::default(); - let client = OpentelemetryClient::new(&config)?; + let (_collector, reporter) = LogsCollector::new(10); + let client = OpentelemetryClient::new(&config, reporter)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); @@ -267,7 +268,8 @@ mod tests { logs: LogsConfig::default(), resource, }; - let client = OpentelemetryClient::new(&config)?; + let (_collector, reporter) = LogsCollector::new(10); + let client = OpentelemetryClient::new(&config, reporter)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs index 443584e908..f5f2660b16 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs @@ -189,7 +189,7 @@ mod tests { use opentelemetry_sdk::Resource; use otap_df_config::pipeline::service::telemetry::{ logs::{ - LogLevel, LogsConfig, LogsInternalConfig, + LogLevel, LogsConfig, processors::{ BatchLogProcessorConfig, batch::{LogBatchProcessorExporterConfig, otlp::OtlpExporterConfig}, @@ -204,7 +204,6 @@ mod tests { let resource = Resource::builder().build(); let logger_config = LogsConfig { level: LogLevel::Info, - internal: LogsInternalConfig::default(), processors: vec![ otap_df_config::pipeline::service::telemetry::logs::processors::LogProcessorConfig::Batch( BatchLogProcessorConfig { @@ -212,6 +211,7 @@ mod tests { }, ), ], + ..Default::default() }; let logger_provider = LoggerProvider::configure(resource, &logger_config, None)?; let (sdk_logger_provider, _) = logger_provider.into_parts(); @@ -228,7 +228,6 @@ mod tests { let resource = Resource::builder().build(); let logger_config = LogsConfig { level: LogLevel::Info, - internal: LogsInternalConfig::default(), processors: vec![ otap_df_config::pipeline::service::telemetry::logs::processors::LogProcessorConfig::Batch( BatchLogProcessorConfig { @@ -241,6 +240,7 @@ mod tests { }, ), ], + ..Default::default() }; let logger_provider = LoggerProvider::configure(resource, &logger_config, None)?; let (sdk_logger_provider, runtime_option) = logger_provider.into_parts(); @@ -259,8 +259,8 @@ mod tests { let resource = Resource::builder().build(); let logger_config = LogsConfig { level: LogLevel::default(), - internal: LogsInternalConfig::default(), processors: vec![], + ..Default::default() }; let logger_provider = LoggerProvider::configure(resource, &logger_config, None)?; let (sdk_logger_provider, _) = logger_provider.into_parts(); diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 414529100f..adb51f795e 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -124,6 +124,16 @@ impl ConsoleWriter { String::from_utf8_lossy(&buf[..len]).into_owned() } + /// Write a LogRecord to stdout or stderr (based on level). + /// + /// ERROR and WARN go to stderr, others go to stdout. + /// This is the same routing logic used by RawLoggingLayer. + pub fn print_log_record(&self, record: &LogRecord, callsite: &SavedCallsite) { + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let len = self.write_log_record(&mut buf, record, callsite); + self.write_line(callsite.level(), &buf[..len]); + } + /// Write a LogRecord to a byte buffer. Returns the number of bytes written. pub fn write_log_record( &self, @@ -320,18 +330,13 @@ impl TracingLayer for RawLoggingLayer where S: Subscriber + for<'a> LookupSpan<'a>, { - // Allocates a buffer on the stack, formats the event to a LogRecord - // with partial OTLP bytes. fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { // TODO: there are allocations implied here that we would prefer // to avoid, it will be an extensive change in the ProtoBuffer to // stack-allocate this temporary. let record = LogRecord::new(event); let callsite = SavedCallsite::new(event.metadata()); - - let mut buf = [0u8; LOG_BUFFER_SIZE]; - let len = self.writer.write_log_record(&mut buf, &record, &callsite); - self.writer.write_line(callsite.level(), &buf[..len]); + self.writer.print_log_record(&record, &callsite); } // Note! This tracing layer does not implement Span-related features From 16fe056202713d5799004df9b13e74152e3b88a5 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 8 Jan 2026 17:52:00 -0800 Subject: [PATCH 43/92] v3 is mvp --- .../configs/internal-telemetry.yaml | 8 +- .../crates/engine/src/pipeline_ctrl.rs | 9 + .../crates/telemetry/ARCHITECTURE.md | 198 +++++ .../crates/telemetry/src/logs.rs | 132 ++- .../crates/telemetry/src/registry.rs | 1 + .../crates/telemetry/src/self_tracing.rs | 11 +- .../telemetry/src/self_tracing/formatter.rs | 38 +- .../docs/fixed-proto-buffer-design.md | 754 ++++++++++++++++ .../docs/internal-logs-design.md | 812 ++++++++++++++++++ .../docs/thread-local-telemetry-research.md | 713 +++++++++++++++ 10 files changed, 2586 insertions(+), 90 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md create mode 100644 rust/otap-dataflow/docs/fixed-proto-buffer-design.md create mode 100644 rust/otap-dataflow/docs/internal-logs-design.md create mode 100644 rust/otap-dataflow/docs/thread-local-telemetry-research.md diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 3977862f2a..9df80b380e 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -39,6 +39,8 @@ service: logs: # The default level is "info". level: "debug" - internal: - # Internal logging is enabled by default. - enabled: true + strategies: + global: global + engine: buffered + output: + mode: raw diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs index 8bf44dd617..6f2128b71e 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs @@ -435,6 +435,7 @@ mod tests { use otap_df_config::pipeline::PipelineSettings; use otap_df_config::{PipelineGroupId, PipelineId}; use otap_df_state::store::ObservedStateStore; + use otap_df_telemetry::logs::LogsCollector; use std::collections::HashMap; use std::time::{Duration, Instant}; use tokio::task::LocalSet; @@ -487,6 +488,9 @@ mod tests { thread_id, ); + // Create a LogsReporter for testing (collector is dropped, that's ok for tests) + let (_collector, logs_reporter) = LogsCollector::new(10); + let manager = PipelineCtrlMsgManager::new( DeployedPipelineKey { pipeline_group_id, @@ -498,6 +502,7 @@ mod tests { control_senders, observed_state_store.reporter(), metrics_reporter, + logs_reporter, pipeline_settings.telemetry.clone(), Vec::new(), ); @@ -904,6 +909,9 @@ mod tests { thread_id, ); + // Create a LogsReporter for testing (collector is dropped, that's ok for tests) + let (_collector, logs_reporter) = LogsCollector::new(10); + // Create manager with empty control_senders map (no registered nodes) let manager = PipelineCtrlMsgManager::<()>::new( pipeline_key, @@ -912,6 +920,7 @@ mod tests { ControlSenders::new(), observed_state_store.reporter(), metrics_reporter, + logs_reporter, TelemetrySettings::default(), Vec::new(), ); diff --git a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md b/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md new file mode 100644 index 0000000000..2540b6093e --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md @@ -0,0 +1,198 @@ +# Internal Telemetry Collection Architecture & Development Plan + +## Architecture + +The internal telemetry SDK is designed for the engine to safely +consume its own telemetry, and we intend for the self-hosted telemetry +pipeline to be the standard configuration for all OpenTelemetry +signals. + +Consuming self-generated telemetry presents a potential a kind of +feedback loop, situations where a telemetry pipeline creates pressure +on itself. We have designed for the OTAP dataflow engine to remain +reliable even with this kind of dependency on itself. + +## Internal telemetry receiver + +The Internal Telemetry Receiver or "ITR" is an OTAP-Dataflow receiver +component that produces telemetry from internal sources. An internal +telemetry pipeline consists of one or more ITR components and any of +the connected processor and exporter components reachable from ITR +source nodes. + +To begin with, every OTAP-Dataflow comonent is configured with an +internal telemetry SDK meant for primary instrumentation of that +component. Components are required to exclusively use the internal +telemetry SDK for self-diagnostics, as they are considered first party +in this exchange. + +The internal telemetry receiver is the SDK's counterpart, making it +second party as it is responsible for routing internal telemetry. The +ITR cannot use the internal telemetry SDK itself, making it an +invisible member of the pipeline. The ITR can be instrumented using +third-party instrumentation (e.g., `tracing`, `log` crates) provided +it can guarantee there is no potential for feedback (e.g., a single +`tracing::info()` statement at startup). + +## Pitfall avoidance + +The OTAP-Dataflow engine is safeguarded against many self-induced +telemetry pitfalls, as follows: + +- OTAP-Dataflow components reachable from an ITR cannot be configured + to send to an ITR node. This avoids a direct feedback cycle for + internal telemetry because the components cannot reach + themselves. For example, ITR and downstream components may be + configured for raw logging, no metrics, etc. +- ITR instances share access to one or more threads with associated + async runtime. They use these dedicated threads to isolate internal + telemetry processes that use third-party instrumentation. +- A thread-local variable is used to redirect third-party + instrumentation in dedicated internal telemetry threads. Internal + telemetry threads automatically configure a safe configuration + that drop third-party instrumentation instead of creating feedback. +- Components under observation (non-ITR components) have internal + telemetry events routed to queues in the OTAP-Dataflow pipeline on + the same core, this avoids blocking the engine. First-party + instrumentation will be handled on the CPU core that produced the + telemetry under normal circumstances. This isolates cores that are + able to process their own internal telemetry. +- Option to configure internal telemetry multiple ways, including the + no-op implementation, multi-threaded subscriber, routing to the + same-core ITR, and/or raw logging. + +## OTLP-bytes first + +As a key design decision, the OTAP-Dataflow internal telemetry data +path produces OTLP-bytes first. Because OTLP bytes is one of the +builtin `OtapPayload` formats, once we have the OTLP bytes encoding of +an event we are able to send to an OTAP-Dataflow pipeline. To obtain +these bytes, we will build a custom [Tokio `tracing` +Event][TOKIOEVENT] handler to produce OTLP bytes before dispatching to +an internal pipeline, used (in different configurations) for first and +third-party instrumentation. + +We use an intermediate representation in which the dynamic elements of +the `tracing` event are encoded while primtive fields and metadata +remain in structured form. These are encoded using the OTLP +`opentelemetry.proto.logs.v1.LogRecord` protocol. + +[TOKIOEVENT]: https://docs.rs/tracing/latest/tracing/struct.Event.html + +## Raw logging + +We support formatting events for direct printing to the console from +OTLP bytes. For the dynamic encoding, these are consumed using +`otap_df_pdata::views::logs::LogsDataView`, our zero-copy accessor. We +refer to this most-basic form of printing to the console as raw +logging because it is a safe configuration early in the lifetime of a +process. Note that the views implementation + +This configuration is meant for development purposes, it is likely to +introduce contention over the console. + +## Routing + +The two internal logs data paths are: + +- Third-party: Tokio `tracing` global subscriber: third-party log + events, instrumentation in code without access to an OTAP-Dataflow + `EffectHandler`. These are handled in a dedicated internal telemetry + thread. +- First-party: components with a local or shared `EffectHandler` use + dedicated macros (e.g., `otel_info!(effect, "interesting thing")`), + these use the configured internal telemetry SDK and for ordinary + components (not ITR-downstream) these are routed through the ITR the + same core. These are always non-blocking APIs, the internal SDK must + drop logs instead of blocking the pipeline. + +## Development plan + +Each of the items below is relatively small, estimated at 300-500 +lines of new code plus new tests. + +### LogRecord: Tokio tracing Event and Metadata to LogRecordView + +When we receive a Tokio tracing event whether through a +`tracing::info!` macro (or similar) or through a dedicated +`EffectHandler`-based API, the same happens: + +Create a `LogRecord`, a struct derived from `tracing::Event` and +`tracing::Metadata`, containing raw LogRecord fields extracted from +the tracing macro layer plus a fresh timestamp. Log record attributes +and the log event body are encoded as the "attributes and body bytes" +field of `LogRecord`, the other fields are copied. + +With this record, we can defer formatting or encoding the entire +record until later. We can: + +- For raw logging, format directly for the console +- Finish the full OTLP bytes encoding for the `LogRecord` +- Sort and filter before combining into a `LogsData`. + +### OTLP-bytes console logging handler + +We require a way to print OTLP bytes as human-readable log lines. We +cannot easily re-use the Tokio `tracing` format layer for this, +however we can use the `LogsDataView` trait with `RawLogsData` to +format human-readable text for the console directly from OTLP bytes. + +This OTLP-bytes-to-human-readable logic will be used to implement raw +logging. + +### Global logs collection thread + +An OTAP-Dataflow engine will run at least one global logs collection +thread. These threads receive encoded (OTLP bytes) log events from +various locations in the process. The global logs collection thread is +special because it sets a special anti-recursion bit in the +thread-local state to prevent logging in its own export path + +The global logs collection thread is configured as one (or more, if +needed) instances consuming logs from the global Tokio `tracing` +subscriber. In this thread, we'll configure the OpenTelemetry SDK or a +dedicated OTAP-Dataflow pipeline (by configuration) for logs export. + +Because global logs collection threads are used as a fallback for +`EffectHandler`-level logs and because third-party libraries generally +could call Tokio `tracing` APIs, we arrange to explicitly disallow +these threads from logging. The macros are disabled from executing. + +### Global and Per-core Event Router + +OTAP-Dataflow provides an option to route internal telemetry to a pipeline +in the same effect handler that produced the telemetry. When a component +logging API is used on the `EffectHandler` or when a tokio `tracing` event +occurs on the `EffectHandler` thread, it will be routed using thread-local +state so that event is immediately encoded and stored or flushed, without +blocking the effect handler. + +When a telemetry event is routed directly, as in this case and +`send_message()` succeeds, it means there was queue space to accept +the log record on the same core. When this fails, the configurable +telemetry router will support options to use global logs collection +thread, a raw logger, or do nothing (dropping the internal log +record). + +## Example configuration + +```yaml +service: + telemetry: + logs: + level: info + internal_collection: + enabled: true + + # Per-thread buffer + buffer_size_bytes: 65536 + + # Individual record size limit + max_record_bytes: 16384 + + # Bounded channel capacity + max_record_count: 10 + + # Timer-based flush interval + flush_interval: "1s" +``` diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index dea72860f2..9cc8e8f35b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -8,8 +8,7 @@ //! via a channel. Components don't need to do anything special for logging. use crate::error::Error; -use crate::registry::MetricsKey; -use crate::self_tracing::{ConsoleWriter, LogRecord, SavedCallsite}; +use crate::self_tracing::{ConsoleWriter, LogRecord, ProducerKey, SavedCallsite}; use std::cell::RefCell; use std::sync::atomic::{AtomicU64, Ordering}; use tracing::{Event, Subscriber}; @@ -18,30 +17,21 @@ use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; use tracing_subscriber::registry::LookupSpan; use tracing_subscriber::{EnvFilter, Registry}; -/// A log entry with optional producer identification. -pub struct LogEntry { - /// The log record (callsite, timestamp, encoded body/attrs). - pub record: LogRecord, - /// Optional key identifying the producing component (for first-party logs). - /// None for third-party logs from libraries. - pub producer_key: Option, -} - -/// A batch of log entries from a pipeline thread. +/// A batch of log records from a pipeline thread. pub struct LogBatch { - /// The log entries in this batch. - pub entries: Vec, + /// The log records in this batch. + pub records: Vec, + /// Number of records dropped since the last batch (buffer was full). + pub dropped_count: u64, } /// Thread-local log buffer for a pipeline thread. /// /// All components on this thread share the same buffer. /// The pipeline runtime flushes it periodically on a timer. -/// If the buffer fills before flush, new events are dropped. +/// If the buffer fills before flush, new events are dropped and counted. pub struct LogBuffer { - entries: Vec, - capacity: usize, - dropped_count: u64, + batch: LogBatch, } impl LogBuffer { @@ -49,52 +39,43 @@ impl LogBuffer { #[must_use] pub fn new(capacity: usize) -> Self { Self { - entries: Vec::with_capacity(capacity.min(256)), - capacity, - dropped_count: 0, + batch: LogBatch { + records: Vec::with_capacity(capacity), + dropped_count: 0, + }, } } - /// Push a log entry. If at capacity, the new entry is dropped. + /// Push a log record. If at capacity, the record is dropped and counted. /// - /// Returns true if the entry was added, false if dropped. - pub fn push(&mut self, entry: LogEntry) -> bool { - if self.entries.len() >= self.capacity { - self.dropped_count += 1; + /// Returns true if the record was added, false if dropped. + pub fn push(&mut self, record: LogRecord) -> bool { + if self.batch.records.len() >= self.batch.records.capacity() { + self.batch.dropped_count += 1; false } else { - self.entries.push(entry); + self.batch.records.push(record); true } } - /// Push just a LogRecord with no producer key (for third-party events). - /// - /// Returns true if the entry was added, false if dropped. - pub fn push_record(&mut self, record: LogRecord) -> bool { - self.push(LogEntry { - record, - producer_key: None, - }) - } - - /// Check if the buffer has entries to flush. + /// Check if the buffer has records to flush. #[must_use] pub fn needs_flush(&self) -> bool { - !self.entries.is_empty() + !self.batch.records.is_empty() || self.batch.dropped_count > 0 } - /// Drain all entries from the buffer, returning them as a batch. + /// Drain all records from the buffer, returning them as a batch. + /// Resets the dropped count for the next batch. pub fn drain(&mut self) -> LogBatch { - LogBatch { - entries: std::mem::take(&mut self.entries), - } - } - - /// Returns the number of dropped entries since creation. - #[must_use] - pub fn dropped_count(&self) -> u64 { - self.dropped_count + let capacity = self.batch.records.capacity(); + std::mem::replace( + &mut self.batch, + LogBatch { + records: Vec::with_capacity(capacity), + dropped_count: 0, + }, + ) } } @@ -103,11 +84,11 @@ thread_local! { static CURRENT_LOG_BUFFER: RefCell> = const { RefCell::new(None) }; } -// Thread-local current MetricsKey for third-party instrumentation. +// Thread-local current ProducerKey for third-party instrumentation. // When a component is executing, this is set to that component's key so that // any tracing::info!() calls from libraries can be attributed to the component. thread_local! { - static CURRENT_PRODUCER_KEY: RefCell> = const { RefCell::new(None) }; + static CURRENT_PRODUCER_KEY: RefCell> = const { RefCell::new(None) }; } /// Guard that sets the current producer key for the duration of a scope. @@ -115,7 +96,7 @@ thread_local! { /// When dropped, restores the previous key (or None). /// This allows nested scoping if needed. pub struct ProducerKeyGuard { - previous: Option, + previous: Option, } impl ProducerKeyGuard { @@ -124,7 +105,7 @@ impl ProducerKeyGuard { /// Third-party log events will be attributed to this key until /// the guard is dropped. #[must_use] - pub fn enter(key: MetricsKey) -> Self { + pub fn enter(key: ProducerKey) -> Self { let previous = CURRENT_PRODUCER_KEY.with(|cell| cell.borrow_mut().replace(key)); Self { previous } } @@ -140,7 +121,7 @@ impl Drop for ProducerKeyGuard { /// Get the current producer key (if any component scope is active). #[must_use] -pub fn current_producer_key() -> Option { +pub fn current_producer_key() -> Option { CURRENT_PRODUCER_KEY.with(|cell| *cell.borrow()) } @@ -162,25 +143,6 @@ pub fn uninstall_thread_log_buffer() { }); } -/// Push a log record to the current thread's buffer (if installed). -/// -/// If `producer_key` is None, uses the current thread-local producer key -/// (set via `ProducerKeyGuard::enter()`). This allows third-party instrumentation -/// to be attributed to the currently-executing component. -/// -/// Returns false if no buffer is installed or buffer is full (event dropped). -pub fn push_to_thread_buffer(record: LogRecord, producer_key: Option) -> bool { - CURRENT_LOG_BUFFER.with(|cell| { - if let Some(ref mut buffer) = *cell.borrow_mut() { - // Use explicit key if provided, otherwise use thread-current key - let key = producer_key.or_else(current_producer_key); - buffer.push(LogEntry { record, producer_key: key }) - } else { - false - } - }) -} - /// Flush the current thread's log buffer, returning the batch. /// /// Called by the pipeline runtime on a timer. @@ -255,14 +217,18 @@ impl LogsCollector { } } - /// Write a batch of log entries to console. + /// Write a batch of log records to console. fn write_batch(&self, batch: LogBatch) { - for entry in batch.entries { + // Print dropped count as a formatted warning before the batch + if batch.dropped_count > 0 { + self.writer.print_dropped_warning(batch.dropped_count); + } + for record in batch.records { // Identifier.0 is the &'static dyn Callsite - let metadata = entry.record.callsite_id.0.metadata(); + let metadata = record.callsite_id.0.metadata(); let saved = SavedCallsite::new(metadata); // Use ConsoleWriter's routing: ERROR/WARN to stderr, others to stdout - self.writer.print_log_record(&entry.record, &saved); + self.writer.print_log_record(&record, &saved); // TODO: include producer_key in output when present } } @@ -317,12 +283,12 @@ where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let record = LogRecord::new(event); let producer_key = current_producer_key(); + let record = LogRecord::new(event, producer_key); CURRENT_LOG_BUFFER.with(|cell| { if let Some(ref mut buffer) = *cell.borrow_mut() { - if buffer.push(LogEntry { record, producer_key }) { + if buffer.push(record) { let _ = self.events_captured.fetch_add(1, Ordering::Relaxed); } else { let _ = self.events_dropped.fetch_add(1, Ordering::Relaxed); @@ -379,13 +345,11 @@ where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let record = LogRecord::new(event); // Non-engine threads don't have producer_key context + let record = LogRecord::new(event, None); let batch = LogBatch { - entries: vec![LogEntry { - record, - producer_key: None, - }], + records: vec![record], + dropped_count: 0, }; match self.reporter.sender.try_send(batch) { diff --git a/rust/otap-dataflow/crates/telemetry/src/registry.rs b/rust/otap-dataflow/crates/telemetry/src/registry.rs index d1cc354ba3..6727d5dea5 100644 --- a/rust/otap-dataflow/crates/telemetry/src/registry.rs +++ b/rust/otap-dataflow/crates/telemetry/src/registry.rs @@ -19,6 +19,7 @@ use std::sync::Arc; new_key_type! { /// This key is used to identify a specific metrics entry in the registry (slotmap index). + /// TODO: Can we rename this ScopeKey? Is this the set of instrument scope variables? pub struct MetricsKey; } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 75d08165f8..29354d4202 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -20,6 +20,10 @@ use tracing::{Event, Level, Metadata}; pub use encoder::DirectLogRecordEncoder; pub use formatter::{ConsoleWriter, RawLoggingLayer}; +/// Optional key identifying the producing component. +/// Re-exported from the registry module for use by downstream code. +pub type ProducerKey = crate::registry::MetricsKey; + /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] pub struct LogRecord { @@ -31,6 +35,10 @@ pub struct LogRecord { /// Pre-encoded body and attributes in OTLP bytes. pub body_attrs_bytes: Bytes, + + /// Optional key identifying the producing component (for first-party logs). + /// None for third-party logs from libraries. + pub producer_key: Option, } /// Saved callsite information. This is information that can easily be @@ -83,7 +91,7 @@ impl SavedCallsite { impl LogRecord { /// Construct a log record, partially encoding its dynamic content. #[must_use] - pub fn new(event: &Event<'_>) -> Self { + pub fn new(event: &Event<'_>, producer_key: Option) -> Self { let metadata = event.metadata(); // Encode body and attributes to bytes. @@ -98,6 +106,7 @@ impl LogRecord { callsite_id: metadata.callsite(), timestamp_ns: Self::get_timestamp_nanos(), body_attrs_bytes: buf.into_bytes(), + producer_key, } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index adb51f795e..0da31c2db2 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -134,6 +134,36 @@ impl ConsoleWriter { self.write_line(callsite.level(), &buf[..len]); } + /// Print a warning about dropped log records. + /// + /// Formatted to look like a regular log record at WARN level. + pub fn print_dropped_warning(&self, dropped_count: u64) { + use std::time::{SystemTime, UNIX_EPOCH}; + + let timestamp_ns = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let mut w = Cursor::new(&mut buf[..]); + let cm = self.color_mode; + + cm.write_ansi(&mut w, AnsiCode::Dim); + Self::write_timestamp(&mut w, timestamp_ns); + cm.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + cm.write_level(&mut w, &Level::WARN); + cm.write_ansi(&mut w, AnsiCode::Bold); + let _ = w.write_all(b"otap_df_telemetry::logs::buffer_overflow"); + cm.write_ansi(&mut w, AnsiCode::Reset); + let _ = write!(w, ": dropped {} log records (buffer full)\n", dropped_count); + + let len = w.position() as usize; + // WARN goes to stderr + let _ = std::io::stderr().write_all(&buf[..len]); + } + /// Write a LogRecord to a byte buffer. Returns the number of bytes written. pub fn write_log_record( &self, @@ -334,7 +364,9 @@ where // TODO: there are allocations implied here that we would prefer // to avoid, it will be an extensive change in the ProtoBuffer to // stack-allocate this temporary. - let record = LogRecord::new(event); + // RawLoggingLayer is used before the logs infrastructure is set up, + // so no producer_key context is available. + let record = LogRecord::new(event, None); let callsite = SavedCallsite::new(event.metadata()); self.writer.print_log_record(&record, &callsite); } @@ -372,7 +404,7 @@ mod tests { S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let record = LogRecord::new(event); + let record = LogRecord::new(event, None); let callsite = SavedCallsite::new(event.metadata()); // Capture formatted output @@ -536,6 +568,7 @@ mod tests { // 2024-01-15T12:30:45.678Z timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::new(), + producer_key: None, }; let writer = ConsoleWriter::no_color(); @@ -593,6 +626,7 @@ mod tests { callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::from(encoded), + producer_key: None, }; let mut buf = [0u8; LOG_BUFFER_SIZE]; diff --git a/rust/otap-dataflow/docs/fixed-proto-buffer-design.md b/rust/otap-dataflow/docs/fixed-proto-buffer-design.md new file mode 100644 index 0000000000..a86d0eec8e --- /dev/null +++ b/rust/otap-dataflow/docs/fixed-proto-buffer-design.md @@ -0,0 +1,754 @@ +# Fixed-Size Proto Buffer Design + +## Problem Statement + +The OTAP dataflow pipeline requires an internal logging path for self-diagnostics that feeds OTLP protocol bytes directly into the pipeline. This internal instrumentation has specific constraints: + +1. **Safety**: Internal logging must not cause heap allocations that could interfere with the main data path or cause memory pressure during high-load scenarios. + +2. **Low Impact**: The encoding path must be lightweight and predictable, suitable for use in hot paths like `tracing::info!` statements. + +3. **Fixed-Size Buffers**: For stack-allocated buffers with a predetermined capacity, the encoder must handle out-of-space conditions gracefully rather than panicking or reallocating. + +4. **Truncation Support**: When encoding attributes into a fixed buffer, if space runs out mid-encoding (e.g., while looping through event variables), the encoder should: + - Return a "truncated" error result + - Allow callers to use the partially-encoded contents + - Enable tracking of dropped attributes + +5. **Code Reuse**: We explicitly want to avoid maintaining two separate encoder implementations—one for growable buffers (normal telemetry path) and one for fixed-size buffers (internal instrumentation). + +6. **Graceful Degradation**: Even for growable buffers, we want configurable limits to prevent unbounded growth from malformed or malicious data. Large attribute values and log bodies should be truncated gracefully with informative markers. + +### OTLP Protocol Support + +The OpenTelemetry LogRecord proto already provides mechanisms for handling truncation: + +```protobuf +message LogRecord { + // ... other fields ... + uint32 dropped_attributes_count = 7; // Track dropped attributes + fixed32 flags = 8; // 5 bytes total (tag + fixed32) +} +``` + +This means we can: +- Reserve 5 bytes at the end of our encoding buffer for `dropped_attributes_count` +- Encode as many attributes as fit +- On truncation, count remaining attributes and encode the count in the reserved space + +### Example Use Case + +```rust +// During a tracing::info! statement, encode log attributes into a fixed buffer +let mut buf = FixedProtoBuffer::<1024>::new(); + +// Reserve space for dropped_attributes_count (tag=7 varint + uint32 varint = ~5 bytes) +buf.reserve_tail(5); + +let mut encoded_count = 0; +for attr in event_attributes { + if encode_key_value(&mut buf, attr).is_err() { + // Truncation occurred - use partial contents + break; + } + encoded_count += 1; +} + +// Release reserved space and encode dropped count +let dropped_count = event_attributes.len() - encoded_count; +buf.release_tail(5); +if dropped_count > 0 { + buf.encode_field_tag(7, WIRE_TYPE_VARINT); + buf.encode_varint(dropped_count as u64); +} +``` + +## Solution + +### Design Approach + +The solution introduces a `ProtoWrite` trait that abstracts over buffer implementations, allowing encoding logic to work with both growable (`ProtoBuffer`) and fixed-size (`FixedProtoBuffer`) buffers through the same code path. + +### Core Concepts + +#### Buffer Space Model + +``` +|-------- written --------|----- remaining -----|---- reserved ----| + ^ ^ + len limit - reserved_tail + +effective_remaining = limit - len - reserved_tail +``` + +- **limit**: Maximum bytes that can be written (may be less than capacity) +- **reserved_tail**: Bytes reserved at the end for fields like `dropped_attributes_count` +- **effective_remaining**: Actual bytes available for the next write operation + +#### Length Placeholder Optimization + +When encoding nested messages, we don't know the size upfront, so we reserve placeholder bytes for the length varint and patch them afterward. The number of bytes needed depends on the maximum possible message size: + +| Buffer Limit | Max Length | Varint Bytes | Savings vs 4-byte | +|-------------|------------|--------------|-------------------| +| ≤ 127 B | 127 | 1 byte | 75% | +| ≤ 16 KiB | 16383 | 2 bytes | 50% | +| ≤ 2 MiB | 2097151 | 3 bytes | 25% | +| > 2 MiB | 2^28-1 | 4 bytes | 0% | + +For internal instrumentation with small fixed buffers (e.g., 1-4 KiB), using 2-byte placeholders instead of 4-byte saves significant space, especially in deeply nested structures like attributes within log records within scope logs within resource logs. + +**Example savings for a LogRecord with 10 nested messages:** +- 4-byte placeholders: 40 bytes overhead +- 2-byte placeholders: 20 bytes overhead +- Savings: 20 bytes (could fit another small attribute!) + +#### `LengthPlaceholderSize` Enum + +```rust +/// Determines how many bytes to reserve for length placeholders in nested messages. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum LengthPlaceholderSize { + /// 1 byte - for buffers ≤ 127 bytes (max length = 127) + OneByte, + /// 2 bytes - for buffers ≤ 16 KiB (max length = 16383) + TwoBytes, + /// 3 bytes - for buffers ≤ 2 MiB (max length = 2097151) + ThreeBytes, + /// 4 bytes - for larger buffers (max length = 268435455) + #[default] + FourBytes, +} + +impl LengthPlaceholderSize { + /// Choose the optimal placeholder size for a given buffer limit. + pub const fn for_limit(limit: usize) -> Self { + if limit <= 127 { + Self::OneByte + } else if limit <= 16383 { + Self::TwoBytes + } else if limit <= 2097151 { + Self::ThreeBytes + } else { + Self::FourBytes + } + } + + /// Number of bytes this placeholder uses. + pub const fn num_bytes(self) -> usize { + match self { + Self::OneByte => 1, + Self::TwoBytes => 2, + Self::ThreeBytes => 3, + Self::FourBytes => 4, + } + } + + /// Maximum length that can be encoded with this placeholder size. + pub const fn max_length(self) -> usize { + match self { + Self::OneByte => 127, + Self::TwoBytes => 16383, + Self::ThreeBytes => 2097151, + Self::FourBytes => 268435455, + } + } + + /// Encode a zero-padded length placeholder. + /// Returns the bytes to write. + pub const fn placeholder_bytes(self) -> &'static [u8] { + match self { + Self::OneByte => &[0x00], + Self::TwoBytes => &[0x80, 0x00], + Self::ThreeBytes => &[0x80, 0x80, 0x00], + Self::FourBytes => &[0x80, 0x80, 0x80, 0x00], + } + } +} +``` + +#### Dispatch Mechanism + +**Dynamic sizing based on remaining capacity:** + +The placeholder size only needs to accommodate the *remaining buffer space*. When writing a length placeholder, we check how much space is left and choose the smallest sufficient placeholder: + +```rust +/// Returned from write_length_placeholder, used to patch the length later. +#[derive(Clone, Copy)] +pub struct LengthPlaceholder { + pub offset: usize, + pub size: LengthPlaceholderSize, +} + +fn write_length_placeholder(&mut self) -> Result { + let offset = self.len(); + let remaining = self.capacity() - offset - self.reserved_tail; + let size = LengthPlaceholderSize::for_limit(remaining); + self.write_bytes(size.placeholder_bytes())?; + Ok(LengthPlaceholder { offset, size }) +} + +fn patch_length_placeholder(&mut self, placeholder: LengthPlaceholder, length: usize) { + let slice = self.as_mut_slice(); + for i in 0..placeholder.size.num_bytes() { + slice[placeholder.offset + i] += ((length >> (i * 7)) & 0x7f) as u8; + } +} +``` + +**Usage in macro:** + +```rust +macro_rules! proto_encode_len_delimited_try { + ($buf:expr, $tag:expr, $encode_fn:expr) => {{ + proto_encode_varint($buf, $tag); + let placeholder = $buf.write_length_placeholder()?; // returns LengthPlaceholder + let start = $buf.len(); + $encode_fn; + let length = $buf.len() - start; + $buf.patch_length_placeholder(placeholder, length); // uses stored offset + size + }}; +} +``` + +**Benefits:** + +- **No configuration needed**: The encoder automatically chooses optimal sizes +- **Simple**: The placeholder struct is just 2 usizes on the stack +- **Optimal**: Uses smallest sufficient placeholder for remaining space + +**Example progression in a 4 KiB buffer:** + +| Write # | Position | Remaining | Placeholder Size | Overhead | +|---------|----------|-----------|------------------|----------| +| 1 | 0 | 4096 | 2 bytes | 2 | +| 2 | 100 | 3996 | 2 bytes | 2 | +| 3 | 3900 | 196 | 2 bytes | 2 | +| 4 | 4000 | 96 | 1 byte | 1 | + +### New Types + +#### `Truncated` Error + +A simple, lightweight error type indicating a fixed-size buffer ran out of space: + +```rust +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Truncated; +``` + +This error is: +- Zero-sized (no runtime overhead) +- Copyable (can be returned by value) +- Convertible to the main `Error` type via `From` + +#### `StringTruncation` Result + +Information about how a string was truncated: + +```rust +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct StringTruncation { + /// Number of bytes actually written (including any marker) + pub bytes_written: usize, + /// Number of bytes from the original string that were truncated + pub bytes_truncated: usize, +} + +impl StringTruncation { + pub fn none() -> Self { + Self { bytes_written: 0, bytes_truncated: 0 } + } + + pub fn was_truncated(&self) -> bool { + self.bytes_truncated > 0 + } +} +``` + +#### `ProtoWrite` Trait + +The trait defines the core buffer operations with fallible semantics: + +```rust +pub trait ProtoWrite { + // === Core required methods === + + /// Append bytes to the buffer. Returns Err(Truncated) if insufficient capacity. + fn write_bytes(&mut self, bytes: &[u8]) -> Result<(), Truncated>; + + /// Current length of encoded data. + fn len(&self) -> usize; + + /// Get a reference to the encoded bytes. + fn as_slice(&self) -> &[u8]; + + /// Get a mutable reference for patching length placeholders. + fn as_mut_slice(&mut self) -> &mut [u8]; + + /// Clear the buffer contents (does not clear reserved_tail or limit). + fn clear(&mut self); + + /// Physical capacity of the buffer. + fn capacity(&self) -> usize; + + // === Limit and reservation management === + + /// Set a soft limit on buffer size. For fixed buffers, clamped to capacity. + /// For growable buffers, prevents growth beyond this point. + /// Also updates the length placeholder size to match the new limit. + fn set_limit(&mut self, limit: usize); + + /// Get current limit (defaults to capacity for fixed, usize::MAX for growable). + fn limit(&self) -> usize; + + /// Reserve bytes at the end of the buffer for later use. + /// Returns the new effective remaining space. + /// This space is protected from writes until released. + fn reserve_tail(&mut self, bytes: usize) -> usize; + + /// Release previously reserved tail bytes, making them available for writing. + fn release_tail(&mut self, bytes: usize); + + /// Get current tail reservation. + fn reserved_tail(&self) -> usize; + + /// Bytes available for writing: limit - len - reserved_tail + fn remaining(&self) -> usize { + self.limit() + .saturating_sub(self.len()) + .saturating_sub(self.reserved_tail()) + } + + // === Length placeholder configuration === + + /// Get the length placeholder size for this buffer. + /// Determined by the buffer's limit. + fn length_placeholder_size(&self) -> LengthPlaceholderSize { + LengthPlaceholderSize::for_limit(self.limit()) + } + + /// Override the length placeholder size. + /// Useful when you know nested messages will be small even in a large buffer. + fn set_length_placeholder_size(&mut self, size: LengthPlaceholderSize); + + /// Write the length placeholder bytes and return the position where length starts. + fn write_length_placeholder(&mut self) -> Result { + let pos = self.len(); + let placeholder = self.length_placeholder_size().placeholder_bytes(); + self.write_bytes(placeholder)?; + Ok(pos) + } + + /// Patch a previously written length placeholder with the actual length. + fn patch_length_placeholder(&mut self, len_start_pos: usize, length: usize) { + let num_bytes = self.length_placeholder_size().num_bytes(); + let slice = self.as_mut_slice(); + for i in 0..num_bytes { + slice[len_start_pos + i] += ((length >> (i * 7)) & 0x7f) as u8; + } + } + + // === Encoding methods with default implementations === + + fn encode_varint(&mut self, value: u64) -> Result<(), Truncated>; + fn encode_field_tag(&mut self, field_number: u64, wire_type: u64) -> Result<(), Truncated>; + fn encode_sint32(&mut self, value: i32) -> Result<(), Truncated>; + fn encode_string(&mut self, field_tag: u64, val: &str) -> Result<(), Truncated>; + fn encode_bytes_field(&mut self, field_tag: u64, val: &[u8]) -> Result<(), Truncated>; + + // === Truncating string encoder === + + /// Encode a string field, truncating if necessary to fit in available space. + /// + /// If the full string doesn't fit, truncates and appends the marker. + /// The marker should be a short fixed string like "..." or "[TRUNCATED]". + /// + /// Returns information about what was written and truncated. + /// Returns Err(Truncated) only if even the field tag + minimal content won't fit. + fn encode_string_truncated( + &mut self, + field_tag: u64, + val: &str, + marker: &str, + ) -> Result; +} +``` + +#### `FixedProtoBuffer` + +A stack-allocatable, fixed-size buffer: + +```rust +pub struct FixedProtoBuffer { + buffer: [u8; N], + len: usize, + reserved_tail: usize, + placeholder_size: LengthPlaceholderSize, +} + +impl FixedProtoBuffer { + pub const fn new() -> Self { + Self { + buffer: [0u8; N], + len: 0, + reserved_tail: 0, + // Computed at compile time based on N + placeholder_size: LengthPlaceholderSize::for_limit(N), + } + } +} +``` + +Key properties: +- **No heap allocation**: The buffer is a fixed-size array +- **Automatic placeholder sizing**: `LengthPlaceholderSize` is determined from `N` at compile time +- **Atomic writes where possible**: `write_bytes` checks capacity before writing +- **Truncation-safe**: Returns `Err(Truncated)` instead of panicking +- **Limit equals capacity**: `set_limit` is a no-op (or clamps to capacity) + +#### Updated `ProtoBuffer` + +The growable buffer gains limit, reservation, and placeholder size support: + +```rust +pub struct ProtoBuffer { + buffer: Vec, + limit: usize, // Default: usize::MAX (unlimited) + reserved_tail: usize, // Default: 0 + placeholder_size: LengthPlaceholderSize, // Default: FourBytes +} +``` + +Key properties: +- **Configurable limit**: Prevents unbounded growth +- **Configurable placeholder size**: Can use 2-byte placeholders when limit is set appropriately +- **Truncation on limit**: Returns `Err(Truncated)` when limit reached (no realloc) +- **Backward compatible**: Default limit is unlimited, default placeholder is 4 bytes + +### String Truncation Behavior + +The `encode_string_truncated` method implements graceful truncation: + +```rust +fn encode_string_truncated( + &mut self, + field_tag: u64, + val: &str, + marker: &str, // e.g., "..." or "[TRUNCATED]" +) -> Result { + let tag_len = varint_len((field_tag << 3) | WIRE_TYPE_LEN); + let full_len = tag_len + varint_len(val.len()) + val.len(); + + // Check if full string fits + if full_len <= self.remaining() { + self.encode_string(field_tag, val)?; + return Ok(StringTruncation::none()); + } + + // Calculate how much of the string we can fit with marker + let marker_bytes = marker.as_bytes(); + let available = self.remaining(); + + // Need at least: tag + length(1 byte min) + marker + let min_needed = tag_len + 1 + marker_bytes.len(); + if available < min_needed { + return Err(Truncated); + } + + // Calculate truncated string length + let max_content = available - tag_len - 1; // Assuming 1-byte length varint + let truncated_str_len = max_content.saturating_sub(marker_bytes.len()); + + // Find UTF-8 safe truncation point + let truncated_str = truncate_utf8_safe(val, truncated_str_len); + let bytes_truncated = val.len() - truncated_str.len(); + + // Build the truncated content: truncated_str + marker + let total_content_len = truncated_str.len() + marker_bytes.len(); + + self.encode_field_tag(field_tag, WIRE_TYPE_LEN)?; + self.encode_varint(total_content_len as u64)?; + self.write_bytes(truncated_str.as_bytes())?; + self.write_bytes(marker_bytes)?; + + Ok(StringTruncation { + bytes_written: tag_len + varint_len(total_content_len) + total_content_len, + bytes_truncated, + }) +} + +/// Truncate a string at a UTF-8 safe boundary +fn truncate_utf8_safe(s: &str, max_bytes: usize) -> &str { + if max_bytes >= s.len() { + return s; + } + // Find the last valid UTF-8 char boundary at or before max_bytes + let mut end = max_bytes; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] +} +``` + +### Backward Compatibility + +The existing `ProtoBuffer` type retains its infallible inherent methods: + +```rust +impl ProtoBuffer { + // These remain infallible (no Result return type) when limit is unlimited + pub fn encode_varint(&mut self, value: u64) { ... } + pub fn encode_string(&mut self, field_tag: u64, val: &str) { ... } + // ... etc +} + +// Additionally implements ProtoWrite (may return Err if limit set) +impl ProtoWrite for ProtoBuffer { ... } +``` + +This means: +- All existing code using `ProtoBuffer` continues to work unchanged +- New generic code can use `impl ProtoWrite` to work with either buffer type +- Setting a limit on `ProtoBuffer` enables truncation behavior + +### Macro Support + +The macros now use the buffer's configured `LengthPlaceholderSize`: + +1. **`proto_encode_len_delimited_unknown_size!`** (existing, updated) + - Uses infallible helpers for `ProtoBuffer` + - Uses the buffer's `length_placeholder_size()` instead of hardcoded 4 bytes + - No error propagation needed + +2. **`proto_encode_len_delimited_try!`** (new) + - For use with generic `ProtoWrite` code + - Uses the buffer's `length_placeholder_size()` + - Propagates `Truncated` errors via `?` + - Returns `Result<(), Truncated>` + +```rust +/// Updated macro using configurable placeholder size +#[macro_export] +macro_rules! proto_encode_len_delimited_try { + ($field_tag: expr, $encode_fn:expr, $buf:expr) => {{ + use $crate::otlp::ProtoWrite; + $buf.encode_field_tag($field_tag, $crate::proto::consts::wire_types::LEN)?; + let len_start_pos = $buf.write_length_placeholder()?; + $encode_fn; + let num_bytes = $buf.length_placeholder_size().num_bytes(); + let len = $buf.len() - len_start_pos - num_bytes; + $buf.patch_length_placeholder(len_start_pos, len); + Ok::<(), $crate::error::Truncated>(()) + }}; +} +``` + +## Usage Examples + +### Generic Encoding Function + +```rust +use otap_df_pdata::otlp::{ProtoWrite, Truncated}; + +fn encode_attributes( + buf: &mut W, + attrs: &[KeyValue], +) -> Result { + let mut count = 0; + for attr in attrs { + buf.encode_string(KEY_TAG, &attr.key)?; + buf.encode_string(VALUE_TAG, &attr.value)?; + count += 1; + } + Ok(count) +} +``` + +### Fixed Buffer with Reserved Space for Dropped Count + +```rust +use otap_df_pdata::otlp::{FixedProtoBuffer, ProtoWrite, Truncated}; + +fn encode_log_record(attrs: &[KeyValue], body: &str) -> Vec { + let mut buf = FixedProtoBuffer::<2048>::new(); + + // Reserve 5 bytes for dropped_attributes_count (tag + varint) + buf.reserve_tail(5); + + // Encode body with truncation marker + let body_result = buf.encode_string_truncated( + LOG_RECORD_BODY, + body, + "...[truncated]" + ); + + // Encode attributes until we run out of space + let mut encoded = 0; + for attr in attrs { + match encode_key_value(&mut buf, attr) { + Ok(()) => encoded += 1, + Err(Truncated) => break, + } + } + + // Release reserved space and encode dropped count + let dropped = attrs.len() - encoded; + buf.release_tail(5); + + if dropped > 0 { + let _ = buf.encode_field_tag(DROPPED_ATTRIBUTES_COUNT, WIRE_TYPE_VARINT); + let _ = buf.encode_varint(dropped as u64); + } + + buf.as_slice().to_vec() +} +``` + +### Limiting Growable Buffer + +```rust +use otap_df_pdata::otlp::{ProtoBuffer, ProtoWrite, LengthPlaceholderSize}; + +fn encode_with_limit(data: &LargeData) -> Result { + let mut buf = ProtoBuffer::new(); + + // Prevent unbounded growth - limit to 16KB + // This automatically sets placeholder size to TwoBytes + buf.set_limit(16 * 1024); + + // Or explicitly use small placeholders for even smaller limits + // buf.set_limit(4 * 1024); + // buf.set_length_placeholder_size(LengthPlaceholderSize::TwoBytes); + + // Reserve space for metadata at the end + buf.reserve_tail(64); + + // Encode potentially large content with truncation + for item in &data.items { + buf.encode_string_truncated(ITEM_TAG, &item.value, "...")?; + } + + // Add metadata in reserved space + buf.release_tail(64); + encode_metadata(&mut buf, data)?; + + Ok(buf.into_bytes()) +} +``` + +### Space-Efficient Small Buffer + +```rust +use otap_df_pdata::otlp::FixedProtoBuffer; + +fn encode_compact_log() { + // 4KB buffer automatically uses 2-byte length placeholders + let mut buf = FixedProtoBuffer::<4096>::new(); + + assert_eq!(buf.length_placeholder_size().num_bytes(), 2); + + // Each nested message saves 2 bytes compared to 4-byte placeholders! + // In a LogRecord with 10 nested structures, that's 20 bytes saved. +} +``` + +### Body Truncation with Byte Count + +For cases where you want to include the byte count in the truncation marker: + +```rust +fn encode_body_with_count(buf: &mut W, body: &str) -> StringTruncation { + // First attempt with simple marker + match buf.encode_string_truncated(LOG_RECORD_BODY, body, "...") { + Ok(info) => { + if info.was_truncated() { + // Log the truncation details for observability + // The bytes_truncated field tells us exactly how much was lost + tracing::debug!( + truncated_bytes = info.bytes_truncated, + "Log body truncated" + ); + } + info + } + Err(Truncated) => { + // Couldn't fit even minimal content + StringTruncation { bytes_written: 0, bytes_truncated: body.len() } + } + } +} +``` + +## Design Rationale + +### Why Configurable Length Placeholder Size? + +The protobuf wire format uses varints for length-delimited field lengths. Since we encode nested messages without knowing their size upfront, we reserve placeholder bytes and patch them later. + +The problem: varints are variable-length! A length of 127 needs 1 byte, but 128 needs 2 bytes. Our solution uses zero-padded varints where each byte has its continuation bit set until the final byte. + +For a 4 KiB buffer, no nested message can exceed 4096 bytes, which fits in a 2-byte varint. Using 4-byte placeholders wastes 2 bytes per nested message. In a typical LogRecord with its nested structure: + +``` +ResourceLogs [4 bytes wasted] + └─ ScopeLogs [4 bytes wasted if 4-byte, 2 bytes if 2-byte] + └─ LogRecord [...] + ├─ Body (AnyValue) + └─ Attributes (repeated KeyValue) + └─ Value (AnyValue) +``` + +With 10 attributes, that's potentially 20+ extra bytes wasted—space that could hold another attribute! + +### Why Reserve Tail Space? + +The `reserve_tail` mechanism ensures that critical fields like `dropped_attributes_count` can always be encoded, even when the buffer is nearly full. Without this: + +1. We might fill the buffer completely with attributes +2. Then have no room to record that we dropped some +3. The receiver would have no indication of data loss + +### Why Truncate Strings vs. Drop Entirely? + +Truncated data with a marker is often more useful than no data: +- A truncated log message still conveys intent +- A truncated attribute value may still be useful for filtering/grouping +- The marker makes it clear that truncation occurred + +### Why UTF-8 Safe Truncation? + +Truncating in the middle of a multi-byte UTF-8 character would produce invalid UTF-8, which could cause issues downstream. The `truncate_utf8_safe` function ensures we always produce valid UTF-8. + +### Why Configurable Limits for Growable Buffers? + +Even in the "normal" path, we want protection against: +- Malformed data causing unbounded memory growth +- DoS attacks via large payloads +- Accidental memory exhaustion from unexpectedly large telemetry + +## File Changes + +| File | Changes | +|------|---------| +| `crates/pdata/src/error.rs` | Added `Truncated` error type with `Display` and `Error` impls | +| `crates/pdata/src/otlp/common.rs` | Added `ProtoWrite` trait, `FixedProtoBuffer`, `StringTruncation`, `LengthPlaceholderSize`, updated `ProtoBuffer` with limit/reservation/placeholder fields, helper functions, updated macros | +| `crates/pdata/src/otlp/mod.rs` | Export `ProtoWrite`, `FixedProtoBuffer`, `StringTruncation`, `LengthPlaceholderSize`, `Truncated` | + +## Testing + +The implementation includes comprehensive tests covering: + +- Basic `FixedProtoBuffer` operations +- Truncation behavior for various encoding operations +- Varint encoding with partial writes +- Generic function usage with both buffer types +- Backward compatibility of `ProtoBuffer` inherent methods +- Partial content availability after truncation +- String truncation with UTF-8 safety +- Reserved tail space behavior +- Limit enforcement for growable buffers + +All existing tests continue to pass, plus new tests for the added functionality. diff --git a/rust/otap-dataflow/docs/internal-logs-design.md b/rust/otap-dataflow/docs/internal-logs-design.md new file mode 100644 index 0000000000..1431b32717 --- /dev/null +++ b/rust/otap-dataflow/docs/internal-logs-design.md @@ -0,0 +1,812 @@ +# Internal Logs Collection Design + +This document describes the internal logging architecture for OTAP-Dataflow, +enabling first-party and third-party log events to be captured, buffered, +and routed without creating feedback loops. + +## Goals + +1. **Unified capture**: Both first-party (`otel_info!`) and third-party + (`tracing::info!`) log events are captured in the same buffer +2. **Per-core buffering**: Each EffectHandler thread accumulates logs in + its own heap-allocated buffer, avoiding cross-thread contention +3. **No feedback loops**: The global telemetry collection thread cannot + create log events that cycle back through the system +4. **Non-blocking**: Log emission never blocks the EffectHandler thread +5. **Configurable routing**: Buffered logs can be sent to the global + collector, routed through an Internal Telemetry Receiver (ITR) + pipeline, or both + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ EffectHandler Thread (one per core/pipeline node) │ +│ │ +│ ┌────────────────────────┐ ┌──────────────────────────────────┐ │ +│ │ EffectHandlerCore │ │ Thread-Local State │ │ +│ │ │ │ │ │ +│ │ log_buffer: LogBuffer ├───────►│ CURRENT_BUFFER: *mut LogBuffer │ │ +│ │ (heap: 128KB-1MB) │ │ │ │ +│ └────────────────────────┘ └──────────────┬───────────────────┘ │ +│ │ │ │ +│ │ │ │ +│ ┌───────┴───────┐ │ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ otel_info! tracing::info! BufferWriterLayer │ +│ (first-party) (third-party) (global Subscriber) │ +│ │ │ │ │ +│ │ └───────────────────────────────┘ │ +│ │ │ │ +│ │ ▼ │ +│ │ ┌──────────────────────┐ │ +│ └─────────────►│ log_buffer.push() │ │ +│ └──────────────────────┘ │ +│ │ +│ On timer tick: flush buffer ──────────────────────────────────────────────┼──┐ +└─────────────────────────────────────────────────────────────────────────────┘ │ + │ + ┌───────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Global Telemetry Thread │ +│ │ +│ Subscriber: stderr-only or NoSubscriber (NO BufferWriterLayer) │ +│ │ +│ ┌─────────────────────┐ ┌─────────────────────┐ │ +│ │ LogsRegistry │ │ ITR Pipeline │ │ +│ │ (ring buffer for │ │ (OTLP export, │ │ +│ │ /logs endpoint) │ │ processing, etc.) │ │ +│ └─────────────────────┘ └─────────────────────┘ │ +│ │ +│ tracing::info!("...") → stderr (safe, no feedback) │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Key Components + +### 1. LogBuffer + +A heap-allocated ring buffer owned by each EffectHandler. Log records are +encoded to OTLP bytes before storage. + +```rust +pub struct LogBuffer { + /// Heap-allocated storage (e.g., 128KB to 1MB) + data: Box<[u8]>, + + /// Ring buffer state + write_pos: usize, + read_pos: usize, + + /// Statistics + record_count: usize, + dropped_count: usize, +} +``` + +**Behavior:** +- Fixed capacity, configured at startup +- When full, oldest records are evicted (ring buffer semantics) +- Tracks dropped record count for observability +- Non-blocking push operation + +### 2. Thread-Local Buffer Pointer + +A thread-local variable provides the bridge between the tracing subscriber +and the EffectHandler's buffer. + +```rust +thread_local! { + static CURRENT_BUFFER: Cell>> = const { Cell::new(None) }; +} +``` + +**Lifecycle:** +1. EffectHandler calls `install_buffer()` when its thread starts +2. Thread-local points to the handler's `log_buffer` +3. EffectHandler calls (or guard drops) `uninstall_buffer()` on shutdown +4. Thread-local is cleared, subsequent events are dropped + +### 3. BufferWriterLayer + +A `tracing_subscriber::Layer` installed as part of the global subscriber. +It writes events to whichever buffer is installed in the current thread. + +```rust +impl Layer for BufferWriterLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { + CURRENT_BUFFER.with(|c| { + if let Some(mut ptr) = c.get() { + let buffer = unsafe { ptr.as_mut() }; + let encoded = encode_event_to_otlp(event, &ctx); + buffer.push(&encoded); + } + // No buffer installed: event is dropped + }); + } + + fn enabled(&self, _metadata: &Metadata<'_>, _ctx: Context<'_, S>) -> bool { + // Only process events if a buffer is installed + CURRENT_BUFFER.with(|c| c.get().is_some()) + } +} +``` + +### 4. Global Telemetry Thread + +A dedicated thread for collecting logs from all EffectHandler threads and +routing them to their destinations. This thread uses a **different** +subscriber that does not include `BufferWriterLayer`. + +```rust +pub fn spawn_global_telemetry_thread() -> JoinHandle<()> { + std::thread::spawn(|| { + // Safe subscriber: stderr only, or completely silent + let safe_subscriber = tracing_subscriber::fmt() + .with_writer(std::io::stderr) + .with_max_level(tracing::Level::WARN) + .finish(); + + // Override the default subscriber for this thread only + tracing::subscriber::with_default(safe_subscriber, || { + // Any tracing::info! in here goes to stderr + // NOT back through BufferWriterLayer + run_collection_loop(); + }); + }) +} +``` + +## Event Flow + +### First-Party Events (otel_info!, etc.) + +Code with access to the EffectHandler can log directly: + +```rust +impl EffectHandlerCore { + pub fn log_info(&mut self, name: &str, attrs: &[(&str, &dyn Debug)]) { + let encoded = encode_log_record(Level::INFO, name, attrs); + self.log_buffer.push(&encoded); + } +} + +// Usage in a receiver/processor/exporter: +effect_handler.log_info("batch.processed", &[ + ("count", &batch.len()), + ("duration_ms", &elapsed.as_millis()), +]); +``` + +### Third-Party Events (tracing::info!, etc.) + +Library code or deeply nested code without EffectHandler access: + +```rust +// Somewhere in a library +tracing::info!(records = count, "Parsed input"); + +// Flow: +// 1. tracing::info! → global subscriber → BufferWriterLayer::on_event() +// 2. BufferWriterLayer reads CURRENT_BUFFER thread-local +// 3. If set, encodes event and pushes to that buffer +// 4. If not set (wrong thread), event is dropped +``` + +### Buffer Flush + +EffectHandlers periodically flush their buffers: + +```rust +impl EffectHandlerCore { + pub async fn flush_logs(&mut self) -> Result<(), Error> { + let logs = self.log_buffer.drain(); + if logs.is_empty() { + return Ok(()); + } + + // Send to global collector via channel + self.log_sender.send(logs).await?; + + Ok(()) + } +} +``` + +The flush can be triggered by: +- Timer tick (e.g., every 1 second) +- Buffer reaching high-water mark +- Explicit flush request from pipeline + +## Feedback Loop Prevention + +The architecture prevents feedback loops through subscriber isolation: + +| Thread Type | Subscriber | BufferWriterLayer? | Effect of `tracing::info!` | +|-------------|------------|-------------------|---------------------------| +| EffectHandler | Global (with BufferWriterLayer) | Yes, buffer installed | Written to handler's buffer | +| Global Telemetry | Thread-local override (stderr/noop) | No | Stderr or dropped | +| Other | Global (with BufferWriterLayer) | No buffer installed | Dropped | + +**Why this prevents cycles:** + +1. EffectHandler thread emits `otel_info!("something")` +2. Event is buffered locally (no channel send yet) +3. On timer, buffer is flushed to global telemetry thread via channel +4. Global thread receives the event +5. If global thread calls `tracing::info!()` while processing: + - Its subscriber is the stderr/noop override + - BufferWriterLayer is NOT in its subscriber stack + - Event goes to stderr (or nowhere), NOT back to a buffer + - No channel send, no cycle + +## Encoding Format + +Log records are encoded to OTLP bytes (`opentelemetry.proto.logs.v1.LogRecord`) +before storage in the buffer. This enables: + +- Zero-copy access via `LogsDataView` for formatting +- Direct forwarding to OTLP exporters +- Consistent format for `/logs` HTTP endpoint +- Efficient storage (no per-field overhead) + +## Flush Strategy: Timer-Based with Drop on Full + +Unlike metrics (which are pre-aggregated), individual log events can be +lost if the buffer fills. The current approach is simple: + +- **Timer-based flush**: The pipeline runtime flushes on its telemetry timer +- **Drop new events when full**: If buffer fills before flush, new events are dropped +- **Track dropped count**: `LogBuffer::dropped_count()` for observability + +This keeps the implementation simple. Future enhancements could include: +- Sampling at high volume +- Priority levels (always keep ERROR events) +- Dynamic buffer sizing + +## Configuration + +*(To be defined)* + +```yaml +service: + telemetry: + logs: + level: info + internal_collection: + enabled: true + buffer_size_bytes: 131072 # 128KB per handler + flush_interval: "1s" + # Routing options: + # - global_collector: send to global thread + # - local_pipeline: route through ITR on same core + # - both: send to both destinations + routing: global_collector +``` + +## Integration with Existing Metrics System + +This design parallels the existing metrics infrastructure. Understanding +the metrics flow is essential for implementing consistent logging. + +### Metrics System Architecture + +The metrics system follows a clear data flow pattern: + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Controller (lib.rs) │ +│ │ +│ MetricsSystem::new(config) │ +│ ├── MetricsRegistryHandle::new() ← Shared registry for aggregation │ +│ ├── MetricsCollector::new() ← Runs on metrics-aggregator thread│ +│ └── MetricsReporter::new(sender) ← Cloned to each pipeline thread │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + │ metrics_reporter.clone() + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Pipeline Thread (one per core) │ +│ │ +│ PipelineContext::new(controller_context, pipeline_id, core_id, thread_id) │ +│ └── with_node_context(node_id, node_urn, node_kind) │ +│ └── register_metrics() │ +│ └── registry.register::(self.node_attribute_set()) │ +│ │ +│ Each component (receiver/processor/exporter): │ +│ 1. Receives PipelineContext via build() method │ +│ 2. Calls pipeline_ctx.register_metrics::() │ +│ 3. Gets MetricSet with pre-registered attributes │ +│ 4. On timer tick: metrics_reporter.report(&mut metric_set) │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + │ flume channel (MetricSetSnapshot) + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Metrics Aggregator Thread ("metrics-aggregator") │ +│ │ +│ MetricsCollector::run_collection_loop() │ +│ loop { │ +│ snapshot = receiver.recv_async().await │ +│ registry.accumulate_snapshot(snapshot.key, &snapshot.metrics) │ +│ } │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + │ MetricsRegistryHandle (Arc>) + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Admin HTTP Server ("http-admin" thread) │ +│ │ +│ GET /metrics or /telemetry/metrics │ +│ registry.visit_metrics_and_reset(|desc, attrs, iter| { │ +│ // Format as JSON, Prometheus, Line Protocol, etc. │ +│ // desc: MetricsDescriptor (name, field definitions) │ +│ // attrs: NodeAttributeSet (resource + node attributes) │ +│ // iter: MetricsIterator (field, value) pairs │ +│ }) │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Key Components + +1. **MetricsRegistryHandle**: Thread-safe handle wrapping `Arc>`. + Passed to admin for HTTP endpoints, passed to controller for aggregation. + +2. **MetricsReporter**: Cloneable sender side of a flume channel. Each pipeline + thread gets a clone to send `MetricSetSnapshot` messages. + +3. **MetricsCollector**: Runs on a dedicated thread, receives snapshots via + channel, and calls `registry.accumulate_snapshot()` to merge them. + +4. **NodeAttributeSet**: Consistent attributes attached to every metric set + registered by a component. Includes: + - Resource: `process_instance_id`, `host_id`, `container_id` + - Engine: `core_id`, `numa_node_id` + - Pipeline: `pipeline_id` + - Node: `node_id`, `node_urn`, `node_type` + +### Unified Registration: Shared MetricsKey for Logs and Metrics + +The key insight is that `MetricsKey` already identifies a component's +`NodeAttributeSet` in the registry. Logs should reuse this same key +rather than duplicating attribute storage. + +**Existing MetricsEntry (in registry.rs):** + +```rust +pub struct MetricsEntry { + pub metrics_descriptor: &'static MetricsDescriptor, + pub attributes_descriptor: &'static AttributesDescriptor, + pub metric_values: Vec, + pub attribute_values: Box, // ← NodeAttributeSet +} +``` + +When `pipeline_ctx.register_metrics::()` is called: +1. Returns `MetricSet` containing a `MetricsKey` (slotmap index) +2. The `NodeAttributeSet` is stored in the registry under that key +3. **Both metrics and logs use the same `MetricsKey`** + +### Parallel Logs Architecture + +| Metrics | Logs | +|---------|------| +| `MetricSet` | `LogBuffer` | +| `MetricsReporter` (channel sender) | `LogsReporter` (channel sender) | +| `MetricsRegistry` (aggregates metrics) | `LogsRing` (ring buffer for recent logs) | +| `MetricsCollector` (receives snapshots) | `LogsCollector` (receives batches) | +| `MetricSetSnapshot { key, metrics }` | `LogBatch { producer_key, records }` | +| `/metrics` endpoint | `/logs` endpoint | + +**Shared:** +- `MetricsKey` identifies the producer (same key for metrics and logs) +- `NodeAttributeSet` stored once in `MetricsRegistry`, looked up by key + +### Channel Data Types + +```rust +/// A batch of logs from one producer - compact, just carries the key +pub struct LogBatch { + /// Same key returned from register_metrics() - identifies NodeAttributeSet + pub producer_key: MetricsKey, + + /// The log records + pub records: Vec, +} + +/// A single log record +pub struct LogRecord { + pub callsite_id: Identifier, // Pointer to static Metadata + pub timestamp_ns: u64, + pub body_attrs_bytes: Bytes, // Pre-encoded body + event attributes +} + +/// Reporter for sending log batches (parallel to MetricsReporter) +#[derive(Clone)] +pub struct LogsReporter { + sender: flume::Sender, +} + +impl LogsReporter { + pub fn try_report(&self, batch: LogBatch) -> Result<(), Error> { + match self.sender.try_send(batch) { + Ok(_) => Ok(()), + Err(flume::TrySendError::Full(_)) => Ok(()), // Drop if full + Err(flume::TrySendError::Disconnected(_)) => Err(Error::LogsChannelClosed), + } + } +} +``` + +### EffectHandler with Shared Key + +```rust +pub struct EffectHandlerCore { + pub node_id: NodeId, + pub producer_key: MetricsKey, // Shared identifier for metrics & logs + pub metrics_reporter: MetricsReporter, + pub logs_reporter: LogsReporter, // NEW + pub log_buffer: LogBuffer, // NEW + // ... +} + +impl EffectHandlerCore { + pub async fn flush_logs(&mut self) -> Result<(), Error> { + if self.log_buffer.is_empty() { + return Ok(()); + } + + let batch = LogBatch { + producer_key: self.producer_key, // Just the 8-byte key + records: self.log_buffer.drain(), + }; + self.logs_reporter.try_report(batch) + } +} +``` + +### Consumer Side: LogsRing with Key Lookup + +```rust +/// Ring buffer storing recent logs for /logs endpoint +pub struct LogsRing { + entries: VecDeque, + capacity: usize, + total_received: u64, + total_dropped: u64, +} + +/// Stored entry - just the key, not the full attributes +pub struct StoredLogEntry { + pub producer_key: MetricsKey, // Lookup attrs from MetricsRegistry + pub callsite_id: Identifier, + pub timestamp_ns: u64, + pub body_attrs_bytes: Bytes, +} + +impl LogsRing { + pub fn append(&mut self, batch: LogBatch) { + for record in batch.records { + if self.entries.len() >= self.capacity { + self.entries.pop_front(); + self.total_dropped += 1; + } + self.entries.push_back(StoredLogEntry { + producer_key: batch.producer_key, + callsite_id: record.callsite_id, + timestamp_ns: record.timestamp_ns, + body_attrs_bytes: record.body_attrs_bytes, + }); + self.total_received += 1; + } + } +} +``` + +### Admin /logs Endpoint + +```rust +pub async fn get_logs(State(state): State) -> impl IntoResponse { + let logs_ring = state.logs_ring.lock(); + let registry = state.metrics_registry.lock(); + + let writer = ConsoleWriter::no_color(); + let mut output = String::new(); + + for entry in logs_ring.recent(100) { + // Dereference Identifier to get static Metadata + let metadata = entry.callsite_id.callsite().metadata(); + let saved = SavedCallsite::new(metadata); + + let record = LogRecord { + callsite_id: entry.callsite_id, + timestamp_ns: entry.timestamp_ns, + body_attrs_bytes: entry.body_attrs_bytes.clone(), + }; + + // Format the log record + output.push_str(&writer.format_log_record(&record, &saved)); + + // Look up NodeAttributeSet using the shared key + if let Some(metrics_entry) = registry.metrics.get(entry.producer_key) { + let attrs = metrics_entry.attribute_values.as_ref(); + output.push_str(&format_node_attrs(attrs)); + } + output.push('\n'); + } + + (StatusCode::OK, output) +} +``` + +### Benefits of Shared Key + +| Aspect | Sending attrs per batch | Shared MetricsKey | +|--------|------------------------|-------------------| +| Registration | Separate for metrics/logs | Single registration | +| Per-batch overhead | Full NodeAttributeSet clone | 8-byte key | +| Attribute storage | Duplicated per batch | Single source of truth | +| Consistency | Could diverge | Guaranteed identical | +| Admin lookup | Already has attrs | Lookup from registry | + +### Identifier → Metadata: Direct Field Access + +The `Identifier` type wraps a pointer to static memory: + +```rust +pub struct Identifier( + #[doc(hidden)] + pub &'static dyn Callsite, +); +``` + +The inner field is `pub` (for macro construction purposes), so any thread +can access it directly to get `Metadata`: + +```rust +// Identifier.0 is &'static dyn Callsite +let metadata: &'static Metadata<'static> = identifier.0.metadata(); +``` + +No need to forward `(Identifier, Metadata)` pairs between threads. +The admin thread can directly access `Identifier.0.metadata()` on any +`Identifier` received in a `LogBatch` to get the full static metadata +(level, target, file, line, name, etc.). + +### Thread-Local Producer Key for Third-Party Instrumentation + +Third-party libraries often use `tracing::info!()` without access to any +EffectHandler or `MetricsKey`. To attribute these logs to the correct +component, we use a thread-local "current producer key" that is set +when entering a component's execution scope. + +```rust +// Thread-local current MetricsKey for third-party instrumentation. +thread_local! { + static CURRENT_PRODUCER_KEY: RefCell> = const { RefCell::new(None) }; +} + +/// Guard that sets the current producer key for the duration of a scope. +/// When dropped, restores the previous key (supports nesting). +pub struct ProducerKeyGuard { + previous: Option, +} + +impl ProducerKeyGuard { + /// Enter a scope with the given producer key. + pub fn enter(key: MetricsKey) -> Self { + let previous = CURRENT_PRODUCER_KEY.with(|cell| cell.borrow_mut().replace(key)); + Self { previous } + } +} + +impl Drop for ProducerKeyGuard { + fn drop(&mut self) { + CURRENT_PRODUCER_KEY.with(|cell| { + *cell.borrow_mut() = self.previous; + }); + } +} + +/// Get the current producer key (if any component scope is active). +pub fn current_producer_key() -> Option { + CURRENT_PRODUCER_KEY.with(|cell| *cell.borrow()) +} +``` + +**Usage in the engine (when calling component methods):** + +```rust +impl EffectHandlerCore { + /// Enter a scope where third-party logs are attributed to this component. + pub fn enter_producer_scope(&self) -> ProducerKeyGuard { + ProducerKeyGuard::enter(self.producer_key) + } +} + +// In the pipeline runtime, when calling a processor: +let _guard = effect_handler.enter_producer_scope(); +processor.process(batch, effect_handler).await?; +// Guard drops here, restoring previous key +``` + +**How it works with the BufferWriterLayer:** + +```rust +impl Layer for BufferWriterLayer { + fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { + let record = encode_event(event, &ctx); + // Pass None - push_to_thread_buffer will use current_producer_key() + push_to_thread_buffer(record, None); + } +} + +// In push_to_thread_buffer: +pub fn push_to_thread_buffer(record: LogRecord, producer_key: Option) -> bool { + CURRENT_LOG_BUFFER.with(|cell| { + if let Some(ref mut buffer) = *cell.borrow_mut() { + // Use explicit key if provided, otherwise use thread-current key + let key = producer_key.or_else(current_producer_key); + buffer.push(LogEntry { record, producer_key: key }); + true + } else { + false + } + }) +} +``` + +**Benefits:** + +| Aspect | Without ProducerKeyGuard | With ProducerKeyGuard | +|--------|-------------------------|----------------------| +| First-party logs | Attributed correctly | Attributed correctly | +| Third-party libs | `producer_key: None` | Attributed to current component | +| No EffectHandler access | Lost attribution | Correct attribution | +| Nesting support | N/A | Previous key restored on drop | + +**Example flow:** + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Pipeline Thread │ +│ │ +│ 1. Enter processor scope: ProducerKeyGuard::enter(processor_key) +│ CURRENT_PRODUCER_KEY = Some(processor_key) │ +│ │ +│ 2. Processor calls library code │ +│ └── Library calls tracing::info!("parsing data") │ +│ └── BufferWriterLayer::on_event() │ +│ └── push_to_thread_buffer(record, None) │ +│ └── key = current_producer_key() = processor_key│ +│ └── buffer.push(LogEntry { key: processor_key })│ +│ │ +│ 3. Guard drops: CURRENT_PRODUCER_KEY = None │ +│ │ +│ 4. On flush: LogBatch includes entry with producer_key set │ +│ │ +│ 5. Admin can look up NodeAttributeSet for processor_key │ +│ → Log shows: node_id=processor, node_urn=arrow/processor │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Channel Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PRODUCER SIDE │ +│ │ +│ Pipeline Thread 0 Pipeline Thread 1 Pipeline Thread N │ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ EffectHandler │ │ EffectHandler │ │ EffectHandler │ │ +│ │ producer_key │ │ producer_key │ │ producer_key │ │ +│ │ log_buffer │ │ log_buffer │ │ log_buffer │ │ +│ │ logs_reporter │ │ logs_reporter │ │ logs_reporter │ │ +│ └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ │ +│ │ │ │ │ +│ │ on timer: flush │ │ │ +│ ▼ ▼ ▼ │ +│ ┌────────────────────────────────────────────────────────────────────────┐│ +│ │ Metrics Channel (existing) ││ +│ │ flume::Sender ││ +│ └────────────────────────────────────────────────────────────────────────┘│ +│ ┌────────────────────────────────────────────────────────────────────────┐│ +│ │ Logs Channel (NEW, parallel) ││ +│ │ flume::Sender ││ +│ └────────────────────────────────────────────────────────────────────────┘│ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + │ Two separate channels + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CONSUMER SIDE │ +│ │ +│ ┌─────────────────────────────────┐ ┌─────────────────────────────────┐ │ +│ │ MetricsCollector │ │ LogsCollector (NEW) │ │ +│ │ (metrics-aggregator thread) │ │ (logs-collector thread OR │ │ +│ │ │ │ same thread as admin) │ │ +│ │ loop { │ │ │ │ +│ │ snapshot = rx.recv() │ │ loop { │ │ +│ │ registry.accumulate(...) │ │ batch = rx.recv() │ │ +│ │ } │ │ logs_ring.append(batch) │ │ +│ └─────────────────────────────────┘ │ } │ │ +│ └─────────────────────────────────┘ │ +│ │ +│ ┌─────────────────────────────────────────────────────────────────────────┐│ +│ │ Admin HTTP Server ││ +│ │ ││ +│ │ GET /metrics → registry.visit_metrics_and_reset(...) ││ +│ │ ││ +│ │ GET /logs → logs_ring.recent(limit) + registry.get(key).attrs ││ +│ │ ││ +│ └─────────────────────────────────────────────────────────────────────────┘│ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Why Separate Channels? + +| Aspect | Metrics Channel | Logs Channel | +|--------|-----------------|--------------| +| Volume | Lower (aggregated values) | Higher (individual events) | +| Semantics | Merge into registry | Append to ring buffer | +| Backpressure | Accumulate locally | Drop oldest | +| Consumer | Aggregate by key | Keep recent N entries | + +## Future Considerations + +1. **Span context propagation**: Include trace/span IDs in buffered logs + when a span is active on the thread + +2. **Sampling**: Support head-based sampling to reduce volume + +3. **Priority levels**: Allow high-severity logs to bypass buffer limits + +4. **Direct ITR routing**: Option to route logs directly to a same-core + ITR pipeline without going through the global thread + +5. **Backpressure signaling**: Mechanism for global collector to signal + EffectHandlers when it's overloaded + +## Code References + +### Metrics System (for reference implementation) + +| File | Purpose | +|------|---------| +| `crates/controller/src/lib.rs` | Creates `MetricsSystem`, spawns threads, passes `MetricsReporter` to pipeline threads | +| `crates/telemetry/src/lib.rs` | `MetricsSystem` struct holding registry, collector, reporter, dispatcher | +| `crates/telemetry/src/registry.rs` | `MetricsRegistry` and `MetricsRegistryHandle` for aggregation | +| `crates/telemetry/src/reporter.rs` | `MetricsReporter` for sending snapshots through flume channel | +| `crates/telemetry/src/collector.rs` | `MetricsCollector::run_collection_loop()` receives and aggregates snapshots | +| `crates/engine/src/context.rs` | `PipelineContext` and `NodeAttributeSet` for consistent attributes | +| `crates/engine/src/effect_handler.rs` | `EffectHandlerCore` with `report_metrics()` method | +| `crates/admin/src/telemetry.rs` | `/metrics` endpoint using `registry.visit_metrics_and_reset()` | + +### Existing Self-Tracing Primitives + +| File | Purpose | +|------|---------| +| `crates/telemetry/src/self_tracing.rs` | `LogRecord` and `SavedCallsite` types | +| `crates/telemetry/src/self_tracing/encoder.rs` | `DirectLogRecordEncoder`, `DirectFieldVisitor` for OTLP encoding | +| `crates/telemetry/src/self_tracing/formatter.rs` | `RawLoggingLayer`, `ConsoleWriter` for console output | +| `crates/telemetry/src/internal_events.rs` | `otel_info!`, `otel_warn!`, etc. macros wrapping tracing | + +### Tokio Tracing (vendored) + +| File | Purpose | +|------|---------| +| `tokio-tracing-rs/tracing-core/src/dispatcher.rs` | Thread-local `CURRENT_STATE`, `with_default()` for subscriber scoping | +| `tokio-tracing-rs/tracing-subscriber/src/registry/sharded.rs` | Example of `ThreadLocal>` for per-thread span stacks | + diff --git a/rust/otap-dataflow/docs/thread-local-telemetry-research.md b/rust/otap-dataflow/docs/thread-local-telemetry-research.md new file mode 100644 index 0000000000..c95f35a76c --- /dev/null +++ b/rust/otap-dataflow/docs/thread-local-telemetry-research.md @@ -0,0 +1,713 @@ +# Thread-Local Variables and Tokio Tracing: Research for Internal Telemetry + +This document explains how thread-local variables work in Rust, how Tokio's +`tracing` library uses them for scoping, and how these mechanisms can be +applied to the OTAP-Dataflow internal telemetry architecture. + +## Table of Contents + +1. [Rust Thread-Local Variables Fundamentals](#rust-thread-local-variables-fundamentals) +2. [How Tracing Uses Thread-Locals](#how-tracing-uses-thread-locals) +3. [Reentrancy Protection in Tracing](#reentrancy-protection-in-tracing) +4. [Application to OTAP-Dataflow](#application-to-otap-dataflow) +5. [Design Patterns for EffectHandler Buffer](#design-patterns-for-effecthandler-buffer) +6. [Parallel with Existing Metrics Infrastructure](#parallel-with-existing-metrics-infrastructure) + +--- + +## Rust Thread-Local Variables Fundamentals + +### Basic Thread-Local Storage + +Rust's `std::thread_local!` macro creates thread-local storage: + +```rust +use std::cell::{Cell, RefCell}; + +thread_local! { + // Simple value types use Cell + static COUNTER: Cell = const { Cell::new(0) }; + + // Complex types use RefCell for interior mutability + static BUFFER: RefCell> = RefCell::new(Vec::new()); +} +``` + +**Key characteristics:** + +1. **Initialization**: Thread-locals are lazily initialized per-thread on first access +2. **Lifetime**: Data lives as long as the thread (destroyed when thread exits) +3. **Access Pattern**: Must be accessed via closure using `.with(|value| ...)` +4. **Interior Mutability**: Use `Cell` for `Copy` types, `RefCell` for others +5. **No Cross-Thread Access**: By design, other threads cannot see this data + +### Access Patterns + +```rust +// Reading +COUNTER.with(|c| { + let value = c.get(); + println!("Counter: {}", value); +}); + +// Writing +COUNTER.with(|c| { + c.set(c.get() + 1); +}); + +// Mutable access to complex types +BUFFER.with(|b| { + b.borrow_mut().push(record); +}); +``` + +### The `thread_local` Crate (Used by tracing-subscriber) + +The `thread_local` crate provides `ThreadLocal`, which is different from +`std::thread_local!`: + +```rust +use thread_local::ThreadLocal; +use std::cell::RefCell; + +struct Registry { + // Each thread gets its own RefCell + current_spans: ThreadLocal>, +} + +impl Registry { + fn enter(&self, id: &span::Id) { + // get_or_default() returns a reference to this thread's value + self.current_spans + .get_or_default() // Returns &RefCell + .borrow_mut() + .push(id.clone()); + } +} +``` + +**Key difference**: `ThreadLocal` is a struct field that can be shared +across threads (via `Arc` or references), but each thread accessing it +sees its own independent value. + +--- + +## How Tracing Uses Thread-Locals + +### Dispatcher Thread-Local State + +The `tracing-core` dispatcher uses thread-local storage for two critical purposes: + +```rust +// From tracing-core/src/dispatcher.rs +#[cfg(feature = "std")] +std::thread_local! { + static CURRENT_STATE: State = const { + State { + default: RefCell>, + can_enter: Cell, + } + }; +} +``` + +#### 1. Per-Thread Default Subscriber (`default`) + +Each thread can have its own "scoped" subscriber that overrides the global default: + +```rust +// The dispatcher lookup chain: +pub fn get_default(mut f: F) -> T +where + F: FnMut(&Dispatch) -> T, +{ + // Fast path: if no scoped dispatchers exist, use global + if SCOPED_COUNT.load(Ordering::Acquire) == 0 { + return f(get_global()); + } + + // Slow path: check thread-local state + CURRENT_STATE.try_with(|state| { + if let Some(entered) = state.enter() { + return f(&entered.current()); + } + f(&NONE) + }) + .unwrap_or_else(|_| f(&NONE)) +} +``` + +The scoping mechanism: + +```rust +pub fn with_default(dispatcher: &Dispatch, f: impl FnOnce() -> T) -> T { + // set_default stores the previous dispatcher and sets the new one + let _guard = set_default(dispatcher); + f() + // When guard drops, previous dispatcher is restored +} +``` + +**How it works:** +- `set_default()` stores the current dispatcher in the thread-local and + replaces it with the new one +- Returns a `DefaultGuard` that, when dropped, restores the previous dispatcher +- This creates a stack of dispatchers per thread + +#### 2. Reentrancy Protection (`can_enter`) + +Prevents infinite recursion when a subscriber's callback triggers more tracing: + +```rust +struct State { + default: RefCell>, + can_enter: Cell, // ← Reentrancy guard +} + +impl State { + fn enter(&self) -> Option> { + // Atomically check and set to false + if self.can_enter.replace(false) { + Some(Entered(self)) + } else { + None // Already in a dispatch, prevent recursion + } + } +} + +impl Drop for Entered<'_> { + fn drop(&mut self) { + self.0.can_enter.set(true); // Re-enable on exit + } +} +``` + +**Usage pattern:** +- Before dispatching an event, `state.enter()` is called +- If we're already dispatching (nested call), `enter()` returns `None` +- The caller then uses `Dispatch::none()` instead, preventing recursion +- When the dispatch completes, the guard's `Drop` re-enables entry + +### Registry Per-Thread Span Stack + +The `tracing-subscriber` Registry tracks which spans are "entered" on each thread: + +```rust +// From tracing-subscriber/src/registry/sharded.rs +pub struct Registry { + spans: Pool, + // Each thread has its own stack of currently-entered spans + current_spans: ThreadLocal>, + next_filter_id: u8, +} + +impl Subscriber for Registry { + fn enter(&self, id: &span::Id) { + // Push to THIS thread's span stack + self.current_spans + .get_or_default() + .borrow_mut() + .push(id.clone()); + } + + fn exit(&self, id: &span::Id) { + // Pop from THIS thread's span stack + if let Some(spans) = self.current_spans.get() { + spans.borrow_mut().pop(id); + } + } + + fn current_span(&self) -> Current { + // Return the top of THIS thread's span stack + self.current_spans + .get() + .and_then(|spans| { + let spans = spans.borrow(); + let id = spans.current()?; + let span = self.get(id)?; + Some(Current::new(id.clone(), span.metadata)) + }) + .unwrap_or_else(Current::none) + } +} +``` + +--- + +## Reentrancy Protection in Tracing + +### The Problem + +When a subscriber processes an event, it might trigger more events: + +```rust +impl Subscriber for MySubscriber { + fn event(&self, event: &Event<'_>) { + // This would cause infinite recursion! + tracing::info!("Received event: {:?}", event); + } +} +``` + +### The Solution + +Tracing uses the `can_enter` flag as a guard: + +```rust +// Simplified from dispatcher.rs +pub fn get_default(f: F) -> T { + CURRENT_STATE.try_with(|state| { + // Try to enter dispatch mode + if let Some(entered) = state.enter() { + // Success: use the real dispatcher + return f(&entered.current()); + } + // Already dispatching: use no-op dispatcher + f(&NONE) + }) +} +``` + +The test in `dispatcher.rs` demonstrates this: + +```rust +#[test] +fn events_dont_infinite_loop() { + struct TestSubscriber; + impl Subscriber for TestSubscriber { + fn event(&self, _: &Event<'_>) { + static EVENTS: AtomicUsize = AtomicUsize::new(0); + assert_eq!( + EVENTS.fetch_add(1, Ordering::Relaxed), + 0, + "event method called twice!" + ); + // This nested event dispatch is blocked by can_enter + Event::dispatch(&TEST_META, &TEST_META.fields().value_set(&[])); + } + } + // ... test passes because the nested dispatch sees Dispatch::none() +} +``` + +--- + +## Application to OTAP-Dataflow + +### Internal Telemetry Feedback Prevention + +Your architecture document describes preventing feedback loops in internal +telemetry. Here's how to implement this using thread-local state: + +```rust +use std::cell::Cell; + +thread_local! { + /// Thread-local flag indicating this thread is an internal telemetry thread. + /// When true, all otel_* macros become no-ops to prevent feedback. + static INTERNAL_TELEMETRY_THREAD: Cell = const { Cell::new(false) }; + + /// Reentrancy guard for telemetry processing + static IN_TELEMETRY_DISPATCH: Cell = const { Cell::new(false) }; +} + +/// Mark the current thread as an internal telemetry thread. +/// All otel_info!, otel_warn!, etc. macros will be disabled on this thread. +pub fn mark_as_internal_telemetry_thread() { + INTERNAL_TELEMETRY_THREAD.with(|flag| flag.set(true)); +} + +/// Check if telemetry is enabled on this thread +pub fn is_telemetry_enabled() -> bool { + INTERNAL_TELEMETRY_THREAD.with(|flag| !flag.get()) +} + +/// Guard for telemetry dispatch that prevents reentrancy +pub struct TelemetryDispatchGuard; + +impl TelemetryDispatchGuard { + pub fn try_enter() -> Option { + IN_TELEMETRY_DISPATCH.with(|flag| { + if flag.replace(true) { + None // Already dispatching + } else { + Some(TelemetryDispatchGuard) + } + }) + } +} + +impl Drop for TelemetryDispatchGuard { + fn drop(&mut self) { + IN_TELEMETRY_DISPATCH.with(|flag| flag.set(false)); + } +} +``` + +### Updated Macros with Feedback Protection + +```rust +/// Macro for logging informational messages with feedback protection. +#[macro_export] +macro_rules! otel_info { + ($name:expr $(,)?) => { + if $crate::is_telemetry_enabled() { + $crate::_private::info!( + name: $name, + target: env!("CARGO_PKG_NAME"), + name = $name, + "" + ); + } + }; + // ... other variants +} +``` + +### Global Internal Telemetry Thread + +For your global logs collection thread: + +```rust +pub fn spawn_internal_telemetry_thread( + name: &str, + task: F, +) -> std::thread::JoinHandle<()> +where + F: FnOnce() + Send + 'static, +{ + std::thread::Builder::new() + .name(name.into()) + .spawn(move || { + // Mark this thread as internal telemetry + mark_as_internal_telemetry_thread(); + + // Configure a safe subscriber for this thread only + let safe_subscriber = create_raw_logging_subscriber(); + tracing::subscriber::with_default(safe_subscriber, task); + }) + .expect("Failed to spawn internal telemetry thread") +} +``` + +--- + +## Design Patterns for EffectHandler Buffer + +### Option 1: Thread-Local Buffer with EffectHandler Coordination + +Since your `EffectHandler` owns its thread, you can use thread-local storage: + +```rust +use std::cell::RefCell; +use std::collections::VecDeque; + +/// Maximum bytes to buffer per thread +const MAX_BUFFER_BYTES: usize = 65536; + +/// Individual log record (pre-encoded or structured) +pub struct LogRecord { + pub timestamp: std::time::Instant, + pub level: tracing::Level, + pub name: &'static str, + pub target: &'static str, + // Pre-encoded OTLP bytes for attributes + body + pub encoded_data: Vec, +} + +thread_local! { + /// Per-thread log buffer for first-party telemetry + static LOG_BUFFER: RefCell = RefCell::new(LogBuffer::new()); +} + +pub struct LogBuffer { + records: VecDeque, + total_bytes: usize, +} + +impl LogBuffer { + pub fn new() -> Self { + Self { + records: VecDeque::new(), + total_bytes: 0, + } + } + + /// Add a record, potentially dropping oldest if over capacity + pub fn push(&mut self, record: LogRecord) { + let record_size = record.encoded_data.len(); + + // Evict old records if needed + while self.total_bytes + record_size > MAX_BUFFER_BYTES + && !self.records.is_empty() + { + if let Some(old) = self.records.pop_front() { + self.total_bytes -= old.encoded_data.len(); + } + } + + self.total_bytes += record_size; + self.records.push_back(record); + } + + /// Drain all records for sending + pub fn drain(&mut self) -> Vec { + self.total_bytes = 0; + self.records.drain(..).collect() + } + + /// Check if buffer has data + pub fn is_empty(&self) -> bool { + self.records.is_empty() + } +} + +/// Called by otel_* macros to buffer a log record +pub fn buffer_log_record(record: LogRecord) { + LOG_BUFFER.with(|buf| { + buf.borrow_mut().push(record); + }); +} + +/// Called by EffectHandler on timer tick to flush logs +pub fn flush_log_buffer() -> Vec { + LOG_BUFFER.with(|buf| { + buf.borrow_mut().drain() + }) +} +``` + +### Option 2: EffectHandler-Owned Buffer (Explicit State) + +Alternatively, store the buffer directly in the `EffectHandler`: + +```rust +pub struct EffectHandlerCore { + pub(crate) node_id: NodeId, + pub(crate) pipeline_ctrl_msg_sender: Option>, + pub(crate) metrics_reporter: MetricsReporter, + + // NEW: Per-handler log buffer + pub(crate) log_buffer: LogBuffer, +} + +impl EffectHandlerCore { + /// Log an info message, buffering it for later flush + pub fn log_info(&mut self, name: &'static str, attributes: &[(&str, AttributeValue)]) { + let record = LogRecord { + timestamp: std::time::Instant::now(), + level: tracing::Level::INFO, + name, + target: self.node_id.name.as_str(), + encoded_data: encode_attributes_to_otlp(attributes), + }; + self.log_buffer.push(record); + } + + /// Flush buffered logs - can be called on timer or when sending to pipeline + pub async fn flush_logs(&mut self) -> Result<(), Error> { + let records = self.log_buffer.drain(); + if records.is_empty() { + return Ok(()); + } + + // Option A: Send to global collection thread + self.send_to_global_collector(records).await?; + + // Option B: Route to local ITR pipeline + // self.route_to_local_pipeline(records).await?; + + Ok(()) + } +} +``` + +### Option 3: Hybrid Approach with Thread-Local + Handler Reference + +This pattern allows macros to work anywhere while the EffectHandler controls flushing: + +```rust +use std::cell::RefCell; +use std::sync::Arc; + +/// Weak reference to the EffectHandler's log sink +pub struct LogSink { + sender: flume::Sender, +} + +thread_local! { + /// Thread-local pointer to this thread's log sink + static CURRENT_LOG_SINK: RefCell>> = RefCell::new(None); +} + +impl EffectHandlerCore { + /// Install this handler's log sink as the thread-local default + pub fn install_log_sink(&self) { + let sink = Arc::new(LogSink { + sender: self.log_channel.clone(), + }); + CURRENT_LOG_SINK.with(|s| { + *s.borrow_mut() = Some(sink); + }); + } + + /// Remove the thread-local sink (e.g., during shutdown) + pub fn uninstall_log_sink(&self) { + CURRENT_LOG_SINK.with(|s| { + *s.borrow_mut() = None; + }); + } +} + +/// Called by otel_* macros +pub fn emit_log(record: LogRecord) { + CURRENT_LOG_SINK.with(|sink| { + if let Some(sink) = &*sink.borrow() { + // Non-blocking send, drop if full + let _ = sink.sender.try_send(record); + } + // If no sink installed, log is dropped (or use fallback) + }); +} +``` + +--- + +## Parallel with Existing Metrics Infrastructure + +Your existing metrics system follows a pattern that can be mirrored for logs: + +### Current Metrics Flow + +``` +┌──────────────────┐ report() ┌──────────────────┐ aggregate ┌─────────────────┐ +│ MetricSet │ ──────────────► │ MetricsReporter │ ─────────────► │ MetricsRegistry │ +│ (per-component) │ (channel) │ (per-handler) │ (channel) │ (global) │ +└──────────────────┘ └──────────────────┘ └─────────────────┘ + │ + dispatch_metrics() + ▼ + ┌─────────────────────┐ + │ MetricsDispatcher │ + │ → OpenTelemetry SDK │ + │ → /metrics endpoint │ + └─────────────────────┘ +``` + +### Proposed Parallel Logs Flow + +``` +┌──────────────────┐ buffer() ┌──────────────────┐ flush ┌─────────────────┐ +│ LogRecord │ ──────────────► │ LogBuffer │ ─────────────► │ LogsRegistry │ +│ (per-event) │ (thread-local) │ (per-handler) │ (channel) │ (global) │ +└──────────────────┘ └──────────────────┘ └─────────────────┘ + │ + dispatch_logs() + ▼ + ┌─────────────────────┐ + │ LogsDispatcher │ + │ → ITR Pipeline │ + │ → /logs endpoint │ + │ → Raw console │ + └─────────────────────┘ +``` + +### Implementation Sketch for LogsRegistry + +```rust +use std::sync::{Arc, RwLock}; +use std::collections::VecDeque; + +/// Ring buffer of recent log records for the /logs endpoint +pub struct LogsRegistry { + /// Configurable max records to keep + max_records: usize, + /// Ring buffer of recent logs (OTLP-encoded bytes) + recent_logs: RwLock>>, + /// Channel to receive logs from all handlers + receiver: flume::Receiver>, +} + +impl LogsRegistry { + /// Get recent logs for HTTP endpoint (analogous to /metrics) + pub fn get_recent_logs(&self) -> Vec> { + self.recent_logs.read().unwrap().iter().cloned().collect() + } + + /// Collection loop (parallel to MetricsCollector::run_collection_loop) + pub async fn run_collection_loop(&self) -> Result<(), Error> { + while let Ok(log_bytes) = self.receiver.recv_async().await { + let mut buffer = self.recent_logs.write().unwrap(); + + // Ring buffer eviction + if buffer.len() >= self.max_records { + buffer.pop_front(); + } + buffer.push_back(log_bytes); + + // Also forward to ITR pipeline if configured + // self.forward_to_itr(&log_bytes).await?; + } + Ok(()) + } +} +``` + +### HTTP Endpoint for Logs + +Similar to `/metrics`, provide a `/logs` endpoint: + +```rust +/// Handler for GET /logs - returns recent internal logs +pub async fn get_internal_logs( + registry: Arc, +) -> impl IntoResponse { + let logs = registry.get_recent_logs(); + + // Could format as: + // - JSON array of log lines + // - OTLP LogsData protobuf + // - Human-readable text + + let formatted = format_logs_as_text(&logs); + (StatusCode::OK, formatted) +} +``` + +--- + +## Summary + +### Key Thread-Local Patterns for Your Use Case + +1. **Feedback Prevention Flag**: `INTERNAL_TELEMETRY_THREAD: Cell` + - Set `true` on dedicated internal telemetry threads + - Macros check this before emitting events + +2. **Reentrancy Guard**: `IN_TELEMETRY_DISPATCH: Cell` + - Prevents recursive telemetry events + - Similar to tracing's `can_enter` mechanism + +3. **Per-Thread Buffer**: `LOG_BUFFER: RefCell` + - Accumulate logs without blocking + - EffectHandler flushes on timer + +4. **Thread-Local Sink Reference**: `CURRENT_LOG_SINK: RefCell>>` + - Allows macros to find the right destination + - EffectHandler installs/uninstalls on thread lifecycle + +### Tracing Mechanisms You Can Leverage + +1. **`with_default()`**: Set thread-specific subscriber for internal threads +2. **`Dispatch::none()`**: No-op subscriber when reentrancy detected +3. **`ThreadLocal>`**: Per-thread state in shared structures +4. **Guard-based RAII**: Automatic cleanup on scope exit + +### Next Steps + +1. Implement the feedback prevention thread-local flag +2. Update `otel_*` macros to check the flag +3. Create `LogBuffer` structure parallel to `MetricSet` +4. Add `LogsReporter` parallel to `MetricsReporter` +5. Implement `LogsRegistry` with `/logs` endpoint +6. Wire up EffectHandler timer-based flush From f25ed91128396fb04523e571fda67a3afcff9791 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 8 Jan 2026 21:15:46 -0800 Subject: [PATCH 44/92] move docs --- .../crates/telemetry/ARCHITECTURE.md | 198 ----- .../docs/fixed-proto-buffer-design.md | 754 ---------------- .../docs/internal-logs-design.md | 812 ------------------ .../docs/thread-local-telemetry-research.md | 713 --------------- 4 files changed, 2477 deletions(-) delete mode 100644 rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md delete mode 100644 rust/otap-dataflow/docs/fixed-proto-buffer-design.md delete mode 100644 rust/otap-dataflow/docs/internal-logs-design.md delete mode 100644 rust/otap-dataflow/docs/thread-local-telemetry-research.md diff --git a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md b/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md deleted file mode 100644 index 2540b6093e..0000000000 --- a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md +++ /dev/null @@ -1,198 +0,0 @@ -# Internal Telemetry Collection Architecture & Development Plan - -## Architecture - -The internal telemetry SDK is designed for the engine to safely -consume its own telemetry, and we intend for the self-hosted telemetry -pipeline to be the standard configuration for all OpenTelemetry -signals. - -Consuming self-generated telemetry presents a potential a kind of -feedback loop, situations where a telemetry pipeline creates pressure -on itself. We have designed for the OTAP dataflow engine to remain -reliable even with this kind of dependency on itself. - -## Internal telemetry receiver - -The Internal Telemetry Receiver or "ITR" is an OTAP-Dataflow receiver -component that produces telemetry from internal sources. An internal -telemetry pipeline consists of one or more ITR components and any of -the connected processor and exporter components reachable from ITR -source nodes. - -To begin with, every OTAP-Dataflow comonent is configured with an -internal telemetry SDK meant for primary instrumentation of that -component. Components are required to exclusively use the internal -telemetry SDK for self-diagnostics, as they are considered first party -in this exchange. - -The internal telemetry receiver is the SDK's counterpart, making it -second party as it is responsible for routing internal telemetry. The -ITR cannot use the internal telemetry SDK itself, making it an -invisible member of the pipeline. The ITR can be instrumented using -third-party instrumentation (e.g., `tracing`, `log` crates) provided -it can guarantee there is no potential for feedback (e.g., a single -`tracing::info()` statement at startup). - -## Pitfall avoidance - -The OTAP-Dataflow engine is safeguarded against many self-induced -telemetry pitfalls, as follows: - -- OTAP-Dataflow components reachable from an ITR cannot be configured - to send to an ITR node. This avoids a direct feedback cycle for - internal telemetry because the components cannot reach - themselves. For example, ITR and downstream components may be - configured for raw logging, no metrics, etc. -- ITR instances share access to one or more threads with associated - async runtime. They use these dedicated threads to isolate internal - telemetry processes that use third-party instrumentation. -- A thread-local variable is used to redirect third-party - instrumentation in dedicated internal telemetry threads. Internal - telemetry threads automatically configure a safe configuration - that drop third-party instrumentation instead of creating feedback. -- Components under observation (non-ITR components) have internal - telemetry events routed to queues in the OTAP-Dataflow pipeline on - the same core, this avoids blocking the engine. First-party - instrumentation will be handled on the CPU core that produced the - telemetry under normal circumstances. This isolates cores that are - able to process their own internal telemetry. -- Option to configure internal telemetry multiple ways, including the - no-op implementation, multi-threaded subscriber, routing to the - same-core ITR, and/or raw logging. - -## OTLP-bytes first - -As a key design decision, the OTAP-Dataflow internal telemetry data -path produces OTLP-bytes first. Because OTLP bytes is one of the -builtin `OtapPayload` formats, once we have the OTLP bytes encoding of -an event we are able to send to an OTAP-Dataflow pipeline. To obtain -these bytes, we will build a custom [Tokio `tracing` -Event][TOKIOEVENT] handler to produce OTLP bytes before dispatching to -an internal pipeline, used (in different configurations) for first and -third-party instrumentation. - -We use an intermediate representation in which the dynamic elements of -the `tracing` event are encoded while primtive fields and metadata -remain in structured form. These are encoded using the OTLP -`opentelemetry.proto.logs.v1.LogRecord` protocol. - -[TOKIOEVENT]: https://docs.rs/tracing/latest/tracing/struct.Event.html - -## Raw logging - -We support formatting events for direct printing to the console from -OTLP bytes. For the dynamic encoding, these are consumed using -`otap_df_pdata::views::logs::LogsDataView`, our zero-copy accessor. We -refer to this most-basic form of printing to the console as raw -logging because it is a safe configuration early in the lifetime of a -process. Note that the views implementation - -This configuration is meant for development purposes, it is likely to -introduce contention over the console. - -## Routing - -The two internal logs data paths are: - -- Third-party: Tokio `tracing` global subscriber: third-party log - events, instrumentation in code without access to an OTAP-Dataflow - `EffectHandler`. These are handled in a dedicated internal telemetry - thread. -- First-party: components with a local or shared `EffectHandler` use - dedicated macros (e.g., `otel_info!(effect, "interesting thing")`), - these use the configured internal telemetry SDK and for ordinary - components (not ITR-downstream) these are routed through the ITR the - same core. These are always non-blocking APIs, the internal SDK must - drop logs instead of blocking the pipeline. - -## Development plan - -Each of the items below is relatively small, estimated at 300-500 -lines of new code plus new tests. - -### LogRecord: Tokio tracing Event and Metadata to LogRecordView - -When we receive a Tokio tracing event whether through a -`tracing::info!` macro (or similar) or through a dedicated -`EffectHandler`-based API, the same happens: - -Create a `LogRecord`, a struct derived from `tracing::Event` and -`tracing::Metadata`, containing raw LogRecord fields extracted from -the tracing macro layer plus a fresh timestamp. Log record attributes -and the log event body are encoded as the "attributes and body bytes" -field of `LogRecord`, the other fields are copied. - -With this record, we can defer formatting or encoding the entire -record until later. We can: - -- For raw logging, format directly for the console -- Finish the full OTLP bytes encoding for the `LogRecord` -- Sort and filter before combining into a `LogsData`. - -### OTLP-bytes console logging handler - -We require a way to print OTLP bytes as human-readable log lines. We -cannot easily re-use the Tokio `tracing` format layer for this, -however we can use the `LogsDataView` trait with `RawLogsData` to -format human-readable text for the console directly from OTLP bytes. - -This OTLP-bytes-to-human-readable logic will be used to implement raw -logging. - -### Global logs collection thread - -An OTAP-Dataflow engine will run at least one global logs collection -thread. These threads receive encoded (OTLP bytes) log events from -various locations in the process. The global logs collection thread is -special because it sets a special anti-recursion bit in the -thread-local state to prevent logging in its own export path - -The global logs collection thread is configured as one (or more, if -needed) instances consuming logs from the global Tokio `tracing` -subscriber. In this thread, we'll configure the OpenTelemetry SDK or a -dedicated OTAP-Dataflow pipeline (by configuration) for logs export. - -Because global logs collection threads are used as a fallback for -`EffectHandler`-level logs and because third-party libraries generally -could call Tokio `tracing` APIs, we arrange to explicitly disallow -these threads from logging. The macros are disabled from executing. - -### Global and Per-core Event Router - -OTAP-Dataflow provides an option to route internal telemetry to a pipeline -in the same effect handler that produced the telemetry. When a component -logging API is used on the `EffectHandler` or when a tokio `tracing` event -occurs on the `EffectHandler` thread, it will be routed using thread-local -state so that event is immediately encoded and stored or flushed, without -blocking the effect handler. - -When a telemetry event is routed directly, as in this case and -`send_message()` succeeds, it means there was queue space to accept -the log record on the same core. When this fails, the configurable -telemetry router will support options to use global logs collection -thread, a raw logger, or do nothing (dropping the internal log -record). - -## Example configuration - -```yaml -service: - telemetry: - logs: - level: info - internal_collection: - enabled: true - - # Per-thread buffer - buffer_size_bytes: 65536 - - # Individual record size limit - max_record_bytes: 16384 - - # Bounded channel capacity - max_record_count: 10 - - # Timer-based flush interval - flush_interval: "1s" -``` diff --git a/rust/otap-dataflow/docs/fixed-proto-buffer-design.md b/rust/otap-dataflow/docs/fixed-proto-buffer-design.md deleted file mode 100644 index a86d0eec8e..0000000000 --- a/rust/otap-dataflow/docs/fixed-proto-buffer-design.md +++ /dev/null @@ -1,754 +0,0 @@ -# Fixed-Size Proto Buffer Design - -## Problem Statement - -The OTAP dataflow pipeline requires an internal logging path for self-diagnostics that feeds OTLP protocol bytes directly into the pipeline. This internal instrumentation has specific constraints: - -1. **Safety**: Internal logging must not cause heap allocations that could interfere with the main data path or cause memory pressure during high-load scenarios. - -2. **Low Impact**: The encoding path must be lightweight and predictable, suitable for use in hot paths like `tracing::info!` statements. - -3. **Fixed-Size Buffers**: For stack-allocated buffers with a predetermined capacity, the encoder must handle out-of-space conditions gracefully rather than panicking or reallocating. - -4. **Truncation Support**: When encoding attributes into a fixed buffer, if space runs out mid-encoding (e.g., while looping through event variables), the encoder should: - - Return a "truncated" error result - - Allow callers to use the partially-encoded contents - - Enable tracking of dropped attributes - -5. **Code Reuse**: We explicitly want to avoid maintaining two separate encoder implementations—one for growable buffers (normal telemetry path) and one for fixed-size buffers (internal instrumentation). - -6. **Graceful Degradation**: Even for growable buffers, we want configurable limits to prevent unbounded growth from malformed or malicious data. Large attribute values and log bodies should be truncated gracefully with informative markers. - -### OTLP Protocol Support - -The OpenTelemetry LogRecord proto already provides mechanisms for handling truncation: - -```protobuf -message LogRecord { - // ... other fields ... - uint32 dropped_attributes_count = 7; // Track dropped attributes - fixed32 flags = 8; // 5 bytes total (tag + fixed32) -} -``` - -This means we can: -- Reserve 5 bytes at the end of our encoding buffer for `dropped_attributes_count` -- Encode as many attributes as fit -- On truncation, count remaining attributes and encode the count in the reserved space - -### Example Use Case - -```rust -// During a tracing::info! statement, encode log attributes into a fixed buffer -let mut buf = FixedProtoBuffer::<1024>::new(); - -// Reserve space for dropped_attributes_count (tag=7 varint + uint32 varint = ~5 bytes) -buf.reserve_tail(5); - -let mut encoded_count = 0; -for attr in event_attributes { - if encode_key_value(&mut buf, attr).is_err() { - // Truncation occurred - use partial contents - break; - } - encoded_count += 1; -} - -// Release reserved space and encode dropped count -let dropped_count = event_attributes.len() - encoded_count; -buf.release_tail(5); -if dropped_count > 0 { - buf.encode_field_tag(7, WIRE_TYPE_VARINT); - buf.encode_varint(dropped_count as u64); -} -``` - -## Solution - -### Design Approach - -The solution introduces a `ProtoWrite` trait that abstracts over buffer implementations, allowing encoding logic to work with both growable (`ProtoBuffer`) and fixed-size (`FixedProtoBuffer`) buffers through the same code path. - -### Core Concepts - -#### Buffer Space Model - -``` -|-------- written --------|----- remaining -----|---- reserved ----| - ^ ^ - len limit - reserved_tail - -effective_remaining = limit - len - reserved_tail -``` - -- **limit**: Maximum bytes that can be written (may be less than capacity) -- **reserved_tail**: Bytes reserved at the end for fields like `dropped_attributes_count` -- **effective_remaining**: Actual bytes available for the next write operation - -#### Length Placeholder Optimization - -When encoding nested messages, we don't know the size upfront, so we reserve placeholder bytes for the length varint and patch them afterward. The number of bytes needed depends on the maximum possible message size: - -| Buffer Limit | Max Length | Varint Bytes | Savings vs 4-byte | -|-------------|------------|--------------|-------------------| -| ≤ 127 B | 127 | 1 byte | 75% | -| ≤ 16 KiB | 16383 | 2 bytes | 50% | -| ≤ 2 MiB | 2097151 | 3 bytes | 25% | -| > 2 MiB | 2^28-1 | 4 bytes | 0% | - -For internal instrumentation with small fixed buffers (e.g., 1-4 KiB), using 2-byte placeholders instead of 4-byte saves significant space, especially in deeply nested structures like attributes within log records within scope logs within resource logs. - -**Example savings for a LogRecord with 10 nested messages:** -- 4-byte placeholders: 40 bytes overhead -- 2-byte placeholders: 20 bytes overhead -- Savings: 20 bytes (could fit another small attribute!) - -#### `LengthPlaceholderSize` Enum - -```rust -/// Determines how many bytes to reserve for length placeholders in nested messages. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum LengthPlaceholderSize { - /// 1 byte - for buffers ≤ 127 bytes (max length = 127) - OneByte, - /// 2 bytes - for buffers ≤ 16 KiB (max length = 16383) - TwoBytes, - /// 3 bytes - for buffers ≤ 2 MiB (max length = 2097151) - ThreeBytes, - /// 4 bytes - for larger buffers (max length = 268435455) - #[default] - FourBytes, -} - -impl LengthPlaceholderSize { - /// Choose the optimal placeholder size for a given buffer limit. - pub const fn for_limit(limit: usize) -> Self { - if limit <= 127 { - Self::OneByte - } else if limit <= 16383 { - Self::TwoBytes - } else if limit <= 2097151 { - Self::ThreeBytes - } else { - Self::FourBytes - } - } - - /// Number of bytes this placeholder uses. - pub const fn num_bytes(self) -> usize { - match self { - Self::OneByte => 1, - Self::TwoBytes => 2, - Self::ThreeBytes => 3, - Self::FourBytes => 4, - } - } - - /// Maximum length that can be encoded with this placeholder size. - pub const fn max_length(self) -> usize { - match self { - Self::OneByte => 127, - Self::TwoBytes => 16383, - Self::ThreeBytes => 2097151, - Self::FourBytes => 268435455, - } - } - - /// Encode a zero-padded length placeholder. - /// Returns the bytes to write. - pub const fn placeholder_bytes(self) -> &'static [u8] { - match self { - Self::OneByte => &[0x00], - Self::TwoBytes => &[0x80, 0x00], - Self::ThreeBytes => &[0x80, 0x80, 0x00], - Self::FourBytes => &[0x80, 0x80, 0x80, 0x00], - } - } -} -``` - -#### Dispatch Mechanism - -**Dynamic sizing based on remaining capacity:** - -The placeholder size only needs to accommodate the *remaining buffer space*. When writing a length placeholder, we check how much space is left and choose the smallest sufficient placeholder: - -```rust -/// Returned from write_length_placeholder, used to patch the length later. -#[derive(Clone, Copy)] -pub struct LengthPlaceholder { - pub offset: usize, - pub size: LengthPlaceholderSize, -} - -fn write_length_placeholder(&mut self) -> Result { - let offset = self.len(); - let remaining = self.capacity() - offset - self.reserved_tail; - let size = LengthPlaceholderSize::for_limit(remaining); - self.write_bytes(size.placeholder_bytes())?; - Ok(LengthPlaceholder { offset, size }) -} - -fn patch_length_placeholder(&mut self, placeholder: LengthPlaceholder, length: usize) { - let slice = self.as_mut_slice(); - for i in 0..placeholder.size.num_bytes() { - slice[placeholder.offset + i] += ((length >> (i * 7)) & 0x7f) as u8; - } -} -``` - -**Usage in macro:** - -```rust -macro_rules! proto_encode_len_delimited_try { - ($buf:expr, $tag:expr, $encode_fn:expr) => {{ - proto_encode_varint($buf, $tag); - let placeholder = $buf.write_length_placeholder()?; // returns LengthPlaceholder - let start = $buf.len(); - $encode_fn; - let length = $buf.len() - start; - $buf.patch_length_placeholder(placeholder, length); // uses stored offset + size - }}; -} -``` - -**Benefits:** - -- **No configuration needed**: The encoder automatically chooses optimal sizes -- **Simple**: The placeholder struct is just 2 usizes on the stack -- **Optimal**: Uses smallest sufficient placeholder for remaining space - -**Example progression in a 4 KiB buffer:** - -| Write # | Position | Remaining | Placeholder Size | Overhead | -|---------|----------|-----------|------------------|----------| -| 1 | 0 | 4096 | 2 bytes | 2 | -| 2 | 100 | 3996 | 2 bytes | 2 | -| 3 | 3900 | 196 | 2 bytes | 2 | -| 4 | 4000 | 96 | 1 byte | 1 | - -### New Types - -#### `Truncated` Error - -A simple, lightweight error type indicating a fixed-size buffer ran out of space: - -```rust -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct Truncated; -``` - -This error is: -- Zero-sized (no runtime overhead) -- Copyable (can be returned by value) -- Convertible to the main `Error` type via `From` - -#### `StringTruncation` Result - -Information about how a string was truncated: - -```rust -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct StringTruncation { - /// Number of bytes actually written (including any marker) - pub bytes_written: usize, - /// Number of bytes from the original string that were truncated - pub bytes_truncated: usize, -} - -impl StringTruncation { - pub fn none() -> Self { - Self { bytes_written: 0, bytes_truncated: 0 } - } - - pub fn was_truncated(&self) -> bool { - self.bytes_truncated > 0 - } -} -``` - -#### `ProtoWrite` Trait - -The trait defines the core buffer operations with fallible semantics: - -```rust -pub trait ProtoWrite { - // === Core required methods === - - /// Append bytes to the buffer. Returns Err(Truncated) if insufficient capacity. - fn write_bytes(&mut self, bytes: &[u8]) -> Result<(), Truncated>; - - /// Current length of encoded data. - fn len(&self) -> usize; - - /// Get a reference to the encoded bytes. - fn as_slice(&self) -> &[u8]; - - /// Get a mutable reference for patching length placeholders. - fn as_mut_slice(&mut self) -> &mut [u8]; - - /// Clear the buffer contents (does not clear reserved_tail or limit). - fn clear(&mut self); - - /// Physical capacity of the buffer. - fn capacity(&self) -> usize; - - // === Limit and reservation management === - - /// Set a soft limit on buffer size. For fixed buffers, clamped to capacity. - /// For growable buffers, prevents growth beyond this point. - /// Also updates the length placeholder size to match the new limit. - fn set_limit(&mut self, limit: usize); - - /// Get current limit (defaults to capacity for fixed, usize::MAX for growable). - fn limit(&self) -> usize; - - /// Reserve bytes at the end of the buffer for later use. - /// Returns the new effective remaining space. - /// This space is protected from writes until released. - fn reserve_tail(&mut self, bytes: usize) -> usize; - - /// Release previously reserved tail bytes, making them available for writing. - fn release_tail(&mut self, bytes: usize); - - /// Get current tail reservation. - fn reserved_tail(&self) -> usize; - - /// Bytes available for writing: limit - len - reserved_tail - fn remaining(&self) -> usize { - self.limit() - .saturating_sub(self.len()) - .saturating_sub(self.reserved_tail()) - } - - // === Length placeholder configuration === - - /// Get the length placeholder size for this buffer. - /// Determined by the buffer's limit. - fn length_placeholder_size(&self) -> LengthPlaceholderSize { - LengthPlaceholderSize::for_limit(self.limit()) - } - - /// Override the length placeholder size. - /// Useful when you know nested messages will be small even in a large buffer. - fn set_length_placeholder_size(&mut self, size: LengthPlaceholderSize); - - /// Write the length placeholder bytes and return the position where length starts. - fn write_length_placeholder(&mut self) -> Result { - let pos = self.len(); - let placeholder = self.length_placeholder_size().placeholder_bytes(); - self.write_bytes(placeholder)?; - Ok(pos) - } - - /// Patch a previously written length placeholder with the actual length. - fn patch_length_placeholder(&mut self, len_start_pos: usize, length: usize) { - let num_bytes = self.length_placeholder_size().num_bytes(); - let slice = self.as_mut_slice(); - for i in 0..num_bytes { - slice[len_start_pos + i] += ((length >> (i * 7)) & 0x7f) as u8; - } - } - - // === Encoding methods with default implementations === - - fn encode_varint(&mut self, value: u64) -> Result<(), Truncated>; - fn encode_field_tag(&mut self, field_number: u64, wire_type: u64) -> Result<(), Truncated>; - fn encode_sint32(&mut self, value: i32) -> Result<(), Truncated>; - fn encode_string(&mut self, field_tag: u64, val: &str) -> Result<(), Truncated>; - fn encode_bytes_field(&mut self, field_tag: u64, val: &[u8]) -> Result<(), Truncated>; - - // === Truncating string encoder === - - /// Encode a string field, truncating if necessary to fit in available space. - /// - /// If the full string doesn't fit, truncates and appends the marker. - /// The marker should be a short fixed string like "..." or "[TRUNCATED]". - /// - /// Returns information about what was written and truncated. - /// Returns Err(Truncated) only if even the field tag + minimal content won't fit. - fn encode_string_truncated( - &mut self, - field_tag: u64, - val: &str, - marker: &str, - ) -> Result; -} -``` - -#### `FixedProtoBuffer` - -A stack-allocatable, fixed-size buffer: - -```rust -pub struct FixedProtoBuffer { - buffer: [u8; N], - len: usize, - reserved_tail: usize, - placeholder_size: LengthPlaceholderSize, -} - -impl FixedProtoBuffer { - pub const fn new() -> Self { - Self { - buffer: [0u8; N], - len: 0, - reserved_tail: 0, - // Computed at compile time based on N - placeholder_size: LengthPlaceholderSize::for_limit(N), - } - } -} -``` - -Key properties: -- **No heap allocation**: The buffer is a fixed-size array -- **Automatic placeholder sizing**: `LengthPlaceholderSize` is determined from `N` at compile time -- **Atomic writes where possible**: `write_bytes` checks capacity before writing -- **Truncation-safe**: Returns `Err(Truncated)` instead of panicking -- **Limit equals capacity**: `set_limit` is a no-op (or clamps to capacity) - -#### Updated `ProtoBuffer` - -The growable buffer gains limit, reservation, and placeholder size support: - -```rust -pub struct ProtoBuffer { - buffer: Vec, - limit: usize, // Default: usize::MAX (unlimited) - reserved_tail: usize, // Default: 0 - placeholder_size: LengthPlaceholderSize, // Default: FourBytes -} -``` - -Key properties: -- **Configurable limit**: Prevents unbounded growth -- **Configurable placeholder size**: Can use 2-byte placeholders when limit is set appropriately -- **Truncation on limit**: Returns `Err(Truncated)` when limit reached (no realloc) -- **Backward compatible**: Default limit is unlimited, default placeholder is 4 bytes - -### String Truncation Behavior - -The `encode_string_truncated` method implements graceful truncation: - -```rust -fn encode_string_truncated( - &mut self, - field_tag: u64, - val: &str, - marker: &str, // e.g., "..." or "[TRUNCATED]" -) -> Result { - let tag_len = varint_len((field_tag << 3) | WIRE_TYPE_LEN); - let full_len = tag_len + varint_len(val.len()) + val.len(); - - // Check if full string fits - if full_len <= self.remaining() { - self.encode_string(field_tag, val)?; - return Ok(StringTruncation::none()); - } - - // Calculate how much of the string we can fit with marker - let marker_bytes = marker.as_bytes(); - let available = self.remaining(); - - // Need at least: tag + length(1 byte min) + marker - let min_needed = tag_len + 1 + marker_bytes.len(); - if available < min_needed { - return Err(Truncated); - } - - // Calculate truncated string length - let max_content = available - tag_len - 1; // Assuming 1-byte length varint - let truncated_str_len = max_content.saturating_sub(marker_bytes.len()); - - // Find UTF-8 safe truncation point - let truncated_str = truncate_utf8_safe(val, truncated_str_len); - let bytes_truncated = val.len() - truncated_str.len(); - - // Build the truncated content: truncated_str + marker - let total_content_len = truncated_str.len() + marker_bytes.len(); - - self.encode_field_tag(field_tag, WIRE_TYPE_LEN)?; - self.encode_varint(total_content_len as u64)?; - self.write_bytes(truncated_str.as_bytes())?; - self.write_bytes(marker_bytes)?; - - Ok(StringTruncation { - bytes_written: tag_len + varint_len(total_content_len) + total_content_len, - bytes_truncated, - }) -} - -/// Truncate a string at a UTF-8 safe boundary -fn truncate_utf8_safe(s: &str, max_bytes: usize) -> &str { - if max_bytes >= s.len() { - return s; - } - // Find the last valid UTF-8 char boundary at or before max_bytes - let mut end = max_bytes; - while end > 0 && !s.is_char_boundary(end) { - end -= 1; - } - &s[..end] -} -``` - -### Backward Compatibility - -The existing `ProtoBuffer` type retains its infallible inherent methods: - -```rust -impl ProtoBuffer { - // These remain infallible (no Result return type) when limit is unlimited - pub fn encode_varint(&mut self, value: u64) { ... } - pub fn encode_string(&mut self, field_tag: u64, val: &str) { ... } - // ... etc -} - -// Additionally implements ProtoWrite (may return Err if limit set) -impl ProtoWrite for ProtoBuffer { ... } -``` - -This means: -- All existing code using `ProtoBuffer` continues to work unchanged -- New generic code can use `impl ProtoWrite` to work with either buffer type -- Setting a limit on `ProtoBuffer` enables truncation behavior - -### Macro Support - -The macros now use the buffer's configured `LengthPlaceholderSize`: - -1. **`proto_encode_len_delimited_unknown_size!`** (existing, updated) - - Uses infallible helpers for `ProtoBuffer` - - Uses the buffer's `length_placeholder_size()` instead of hardcoded 4 bytes - - No error propagation needed - -2. **`proto_encode_len_delimited_try!`** (new) - - For use with generic `ProtoWrite` code - - Uses the buffer's `length_placeholder_size()` - - Propagates `Truncated` errors via `?` - - Returns `Result<(), Truncated>` - -```rust -/// Updated macro using configurable placeholder size -#[macro_export] -macro_rules! proto_encode_len_delimited_try { - ($field_tag: expr, $encode_fn:expr, $buf:expr) => {{ - use $crate::otlp::ProtoWrite; - $buf.encode_field_tag($field_tag, $crate::proto::consts::wire_types::LEN)?; - let len_start_pos = $buf.write_length_placeholder()?; - $encode_fn; - let num_bytes = $buf.length_placeholder_size().num_bytes(); - let len = $buf.len() - len_start_pos - num_bytes; - $buf.patch_length_placeholder(len_start_pos, len); - Ok::<(), $crate::error::Truncated>(()) - }}; -} -``` - -## Usage Examples - -### Generic Encoding Function - -```rust -use otap_df_pdata::otlp::{ProtoWrite, Truncated}; - -fn encode_attributes( - buf: &mut W, - attrs: &[KeyValue], -) -> Result { - let mut count = 0; - for attr in attrs { - buf.encode_string(KEY_TAG, &attr.key)?; - buf.encode_string(VALUE_TAG, &attr.value)?; - count += 1; - } - Ok(count) -} -``` - -### Fixed Buffer with Reserved Space for Dropped Count - -```rust -use otap_df_pdata::otlp::{FixedProtoBuffer, ProtoWrite, Truncated}; - -fn encode_log_record(attrs: &[KeyValue], body: &str) -> Vec { - let mut buf = FixedProtoBuffer::<2048>::new(); - - // Reserve 5 bytes for dropped_attributes_count (tag + varint) - buf.reserve_tail(5); - - // Encode body with truncation marker - let body_result = buf.encode_string_truncated( - LOG_RECORD_BODY, - body, - "...[truncated]" - ); - - // Encode attributes until we run out of space - let mut encoded = 0; - for attr in attrs { - match encode_key_value(&mut buf, attr) { - Ok(()) => encoded += 1, - Err(Truncated) => break, - } - } - - // Release reserved space and encode dropped count - let dropped = attrs.len() - encoded; - buf.release_tail(5); - - if dropped > 0 { - let _ = buf.encode_field_tag(DROPPED_ATTRIBUTES_COUNT, WIRE_TYPE_VARINT); - let _ = buf.encode_varint(dropped as u64); - } - - buf.as_slice().to_vec() -} -``` - -### Limiting Growable Buffer - -```rust -use otap_df_pdata::otlp::{ProtoBuffer, ProtoWrite, LengthPlaceholderSize}; - -fn encode_with_limit(data: &LargeData) -> Result { - let mut buf = ProtoBuffer::new(); - - // Prevent unbounded growth - limit to 16KB - // This automatically sets placeholder size to TwoBytes - buf.set_limit(16 * 1024); - - // Or explicitly use small placeholders for even smaller limits - // buf.set_limit(4 * 1024); - // buf.set_length_placeholder_size(LengthPlaceholderSize::TwoBytes); - - // Reserve space for metadata at the end - buf.reserve_tail(64); - - // Encode potentially large content with truncation - for item in &data.items { - buf.encode_string_truncated(ITEM_TAG, &item.value, "...")?; - } - - // Add metadata in reserved space - buf.release_tail(64); - encode_metadata(&mut buf, data)?; - - Ok(buf.into_bytes()) -} -``` - -### Space-Efficient Small Buffer - -```rust -use otap_df_pdata::otlp::FixedProtoBuffer; - -fn encode_compact_log() { - // 4KB buffer automatically uses 2-byte length placeholders - let mut buf = FixedProtoBuffer::<4096>::new(); - - assert_eq!(buf.length_placeholder_size().num_bytes(), 2); - - // Each nested message saves 2 bytes compared to 4-byte placeholders! - // In a LogRecord with 10 nested structures, that's 20 bytes saved. -} -``` - -### Body Truncation with Byte Count - -For cases where you want to include the byte count in the truncation marker: - -```rust -fn encode_body_with_count(buf: &mut W, body: &str) -> StringTruncation { - // First attempt with simple marker - match buf.encode_string_truncated(LOG_RECORD_BODY, body, "...") { - Ok(info) => { - if info.was_truncated() { - // Log the truncation details for observability - // The bytes_truncated field tells us exactly how much was lost - tracing::debug!( - truncated_bytes = info.bytes_truncated, - "Log body truncated" - ); - } - info - } - Err(Truncated) => { - // Couldn't fit even minimal content - StringTruncation { bytes_written: 0, bytes_truncated: body.len() } - } - } -} -``` - -## Design Rationale - -### Why Configurable Length Placeholder Size? - -The protobuf wire format uses varints for length-delimited field lengths. Since we encode nested messages without knowing their size upfront, we reserve placeholder bytes and patch them later. - -The problem: varints are variable-length! A length of 127 needs 1 byte, but 128 needs 2 bytes. Our solution uses zero-padded varints where each byte has its continuation bit set until the final byte. - -For a 4 KiB buffer, no nested message can exceed 4096 bytes, which fits in a 2-byte varint. Using 4-byte placeholders wastes 2 bytes per nested message. In a typical LogRecord with its nested structure: - -``` -ResourceLogs [4 bytes wasted] - └─ ScopeLogs [4 bytes wasted if 4-byte, 2 bytes if 2-byte] - └─ LogRecord [...] - ├─ Body (AnyValue) - └─ Attributes (repeated KeyValue) - └─ Value (AnyValue) -``` - -With 10 attributes, that's potentially 20+ extra bytes wasted—space that could hold another attribute! - -### Why Reserve Tail Space? - -The `reserve_tail` mechanism ensures that critical fields like `dropped_attributes_count` can always be encoded, even when the buffer is nearly full. Without this: - -1. We might fill the buffer completely with attributes -2. Then have no room to record that we dropped some -3. The receiver would have no indication of data loss - -### Why Truncate Strings vs. Drop Entirely? - -Truncated data with a marker is often more useful than no data: -- A truncated log message still conveys intent -- A truncated attribute value may still be useful for filtering/grouping -- The marker makes it clear that truncation occurred - -### Why UTF-8 Safe Truncation? - -Truncating in the middle of a multi-byte UTF-8 character would produce invalid UTF-8, which could cause issues downstream. The `truncate_utf8_safe` function ensures we always produce valid UTF-8. - -### Why Configurable Limits for Growable Buffers? - -Even in the "normal" path, we want protection against: -- Malformed data causing unbounded memory growth -- DoS attacks via large payloads -- Accidental memory exhaustion from unexpectedly large telemetry - -## File Changes - -| File | Changes | -|------|---------| -| `crates/pdata/src/error.rs` | Added `Truncated` error type with `Display` and `Error` impls | -| `crates/pdata/src/otlp/common.rs` | Added `ProtoWrite` trait, `FixedProtoBuffer`, `StringTruncation`, `LengthPlaceholderSize`, updated `ProtoBuffer` with limit/reservation/placeholder fields, helper functions, updated macros | -| `crates/pdata/src/otlp/mod.rs` | Export `ProtoWrite`, `FixedProtoBuffer`, `StringTruncation`, `LengthPlaceholderSize`, `Truncated` | - -## Testing - -The implementation includes comprehensive tests covering: - -- Basic `FixedProtoBuffer` operations -- Truncation behavior for various encoding operations -- Varint encoding with partial writes -- Generic function usage with both buffer types -- Backward compatibility of `ProtoBuffer` inherent methods -- Partial content availability after truncation -- String truncation with UTF-8 safety -- Reserved tail space behavior -- Limit enforcement for growable buffers - -All existing tests continue to pass, plus new tests for the added functionality. diff --git a/rust/otap-dataflow/docs/internal-logs-design.md b/rust/otap-dataflow/docs/internal-logs-design.md deleted file mode 100644 index 1431b32717..0000000000 --- a/rust/otap-dataflow/docs/internal-logs-design.md +++ /dev/null @@ -1,812 +0,0 @@ -# Internal Logs Collection Design - -This document describes the internal logging architecture for OTAP-Dataflow, -enabling first-party and third-party log events to be captured, buffered, -and routed without creating feedback loops. - -## Goals - -1. **Unified capture**: Both first-party (`otel_info!`) and third-party - (`tracing::info!`) log events are captured in the same buffer -2. **Per-core buffering**: Each EffectHandler thread accumulates logs in - its own heap-allocated buffer, avoiding cross-thread contention -3. **No feedback loops**: The global telemetry collection thread cannot - create log events that cycle back through the system -4. **Non-blocking**: Log emission never blocks the EffectHandler thread -5. **Configurable routing**: Buffered logs can be sent to the global - collector, routed through an Internal Telemetry Receiver (ITR) - pipeline, or both - -## Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ EffectHandler Thread (one per core/pipeline node) │ -│ │ -│ ┌────────────────────────┐ ┌──────────────────────────────────┐ │ -│ │ EffectHandlerCore │ │ Thread-Local State │ │ -│ │ │ │ │ │ -│ │ log_buffer: LogBuffer ├───────►│ CURRENT_BUFFER: *mut LogBuffer │ │ -│ │ (heap: 128KB-1MB) │ │ │ │ -│ └────────────────────────┘ └──────────────┬───────────────────┘ │ -│ │ │ │ -│ │ │ │ -│ ┌───────┴───────┐ │ │ -│ │ │ │ │ -│ ▼ ▼ ▼ │ -│ otel_info! tracing::info! BufferWriterLayer │ -│ (first-party) (third-party) (global Subscriber) │ -│ │ │ │ │ -│ │ └───────────────────────────────┘ │ -│ │ │ │ -│ │ ▼ │ -│ │ ┌──────────────────────┐ │ -│ └─────────────►│ log_buffer.push() │ │ -│ └──────────────────────┘ │ -│ │ -│ On timer tick: flush buffer ──────────────────────────────────────────────┼──┐ -└─────────────────────────────────────────────────────────────────────────────┘ │ - │ - ┌───────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Global Telemetry Thread │ -│ │ -│ Subscriber: stderr-only or NoSubscriber (NO BufferWriterLayer) │ -│ │ -│ ┌─────────────────────┐ ┌─────────────────────┐ │ -│ │ LogsRegistry │ │ ITR Pipeline │ │ -│ │ (ring buffer for │ │ (OTLP export, │ │ -│ │ /logs endpoint) │ │ processing, etc.) │ │ -│ └─────────────────────┘ └─────────────────────┘ │ -│ │ -│ tracing::info!("...") → stderr (safe, no feedback) │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -## Key Components - -### 1. LogBuffer - -A heap-allocated ring buffer owned by each EffectHandler. Log records are -encoded to OTLP bytes before storage. - -```rust -pub struct LogBuffer { - /// Heap-allocated storage (e.g., 128KB to 1MB) - data: Box<[u8]>, - - /// Ring buffer state - write_pos: usize, - read_pos: usize, - - /// Statistics - record_count: usize, - dropped_count: usize, -} -``` - -**Behavior:** -- Fixed capacity, configured at startup -- When full, oldest records are evicted (ring buffer semantics) -- Tracks dropped record count for observability -- Non-blocking push operation - -### 2. Thread-Local Buffer Pointer - -A thread-local variable provides the bridge between the tracing subscriber -and the EffectHandler's buffer. - -```rust -thread_local! { - static CURRENT_BUFFER: Cell>> = const { Cell::new(None) }; -} -``` - -**Lifecycle:** -1. EffectHandler calls `install_buffer()` when its thread starts -2. Thread-local points to the handler's `log_buffer` -3. EffectHandler calls (or guard drops) `uninstall_buffer()` on shutdown -4. Thread-local is cleared, subsequent events are dropped - -### 3. BufferWriterLayer - -A `tracing_subscriber::Layer` installed as part of the global subscriber. -It writes events to whichever buffer is installed in the current thread. - -```rust -impl Layer for BufferWriterLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, -{ - fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { - CURRENT_BUFFER.with(|c| { - if let Some(mut ptr) = c.get() { - let buffer = unsafe { ptr.as_mut() }; - let encoded = encode_event_to_otlp(event, &ctx); - buffer.push(&encoded); - } - // No buffer installed: event is dropped - }); - } - - fn enabled(&self, _metadata: &Metadata<'_>, _ctx: Context<'_, S>) -> bool { - // Only process events if a buffer is installed - CURRENT_BUFFER.with(|c| c.get().is_some()) - } -} -``` - -### 4. Global Telemetry Thread - -A dedicated thread for collecting logs from all EffectHandler threads and -routing them to their destinations. This thread uses a **different** -subscriber that does not include `BufferWriterLayer`. - -```rust -pub fn spawn_global_telemetry_thread() -> JoinHandle<()> { - std::thread::spawn(|| { - // Safe subscriber: stderr only, or completely silent - let safe_subscriber = tracing_subscriber::fmt() - .with_writer(std::io::stderr) - .with_max_level(tracing::Level::WARN) - .finish(); - - // Override the default subscriber for this thread only - tracing::subscriber::with_default(safe_subscriber, || { - // Any tracing::info! in here goes to stderr - // NOT back through BufferWriterLayer - run_collection_loop(); - }); - }) -} -``` - -## Event Flow - -### First-Party Events (otel_info!, etc.) - -Code with access to the EffectHandler can log directly: - -```rust -impl EffectHandlerCore { - pub fn log_info(&mut self, name: &str, attrs: &[(&str, &dyn Debug)]) { - let encoded = encode_log_record(Level::INFO, name, attrs); - self.log_buffer.push(&encoded); - } -} - -// Usage in a receiver/processor/exporter: -effect_handler.log_info("batch.processed", &[ - ("count", &batch.len()), - ("duration_ms", &elapsed.as_millis()), -]); -``` - -### Third-Party Events (tracing::info!, etc.) - -Library code or deeply nested code without EffectHandler access: - -```rust -// Somewhere in a library -tracing::info!(records = count, "Parsed input"); - -// Flow: -// 1. tracing::info! → global subscriber → BufferWriterLayer::on_event() -// 2. BufferWriterLayer reads CURRENT_BUFFER thread-local -// 3. If set, encodes event and pushes to that buffer -// 4. If not set (wrong thread), event is dropped -``` - -### Buffer Flush - -EffectHandlers periodically flush their buffers: - -```rust -impl EffectHandlerCore { - pub async fn flush_logs(&mut self) -> Result<(), Error> { - let logs = self.log_buffer.drain(); - if logs.is_empty() { - return Ok(()); - } - - // Send to global collector via channel - self.log_sender.send(logs).await?; - - Ok(()) - } -} -``` - -The flush can be triggered by: -- Timer tick (e.g., every 1 second) -- Buffer reaching high-water mark -- Explicit flush request from pipeline - -## Feedback Loop Prevention - -The architecture prevents feedback loops through subscriber isolation: - -| Thread Type | Subscriber | BufferWriterLayer? | Effect of `tracing::info!` | -|-------------|------------|-------------------|---------------------------| -| EffectHandler | Global (with BufferWriterLayer) | Yes, buffer installed | Written to handler's buffer | -| Global Telemetry | Thread-local override (stderr/noop) | No | Stderr or dropped | -| Other | Global (with BufferWriterLayer) | No buffer installed | Dropped | - -**Why this prevents cycles:** - -1. EffectHandler thread emits `otel_info!("something")` -2. Event is buffered locally (no channel send yet) -3. On timer, buffer is flushed to global telemetry thread via channel -4. Global thread receives the event -5. If global thread calls `tracing::info!()` while processing: - - Its subscriber is the stderr/noop override - - BufferWriterLayer is NOT in its subscriber stack - - Event goes to stderr (or nowhere), NOT back to a buffer - - No channel send, no cycle - -## Encoding Format - -Log records are encoded to OTLP bytes (`opentelemetry.proto.logs.v1.LogRecord`) -before storage in the buffer. This enables: - -- Zero-copy access via `LogsDataView` for formatting -- Direct forwarding to OTLP exporters -- Consistent format for `/logs` HTTP endpoint -- Efficient storage (no per-field overhead) - -## Flush Strategy: Timer-Based with Drop on Full - -Unlike metrics (which are pre-aggregated), individual log events can be -lost if the buffer fills. The current approach is simple: - -- **Timer-based flush**: The pipeline runtime flushes on its telemetry timer -- **Drop new events when full**: If buffer fills before flush, new events are dropped -- **Track dropped count**: `LogBuffer::dropped_count()` for observability - -This keeps the implementation simple. Future enhancements could include: -- Sampling at high volume -- Priority levels (always keep ERROR events) -- Dynamic buffer sizing - -## Configuration - -*(To be defined)* - -```yaml -service: - telemetry: - logs: - level: info - internal_collection: - enabled: true - buffer_size_bytes: 131072 # 128KB per handler - flush_interval: "1s" - # Routing options: - # - global_collector: send to global thread - # - local_pipeline: route through ITR on same core - # - both: send to both destinations - routing: global_collector -``` - -## Integration with Existing Metrics System - -This design parallels the existing metrics infrastructure. Understanding -the metrics flow is essential for implementing consistent logging. - -### Metrics System Architecture - -The metrics system follows a clear data flow pattern: - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Controller (lib.rs) │ -│ │ -│ MetricsSystem::new(config) │ -│ ├── MetricsRegistryHandle::new() ← Shared registry for aggregation │ -│ ├── MetricsCollector::new() ← Runs on metrics-aggregator thread│ -│ └── MetricsReporter::new(sender) ← Cloned to each pipeline thread │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ metrics_reporter.clone() - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Pipeline Thread (one per core) │ -│ │ -│ PipelineContext::new(controller_context, pipeline_id, core_id, thread_id) │ -│ └── with_node_context(node_id, node_urn, node_kind) │ -│ └── register_metrics() │ -│ └── registry.register::(self.node_attribute_set()) │ -│ │ -│ Each component (receiver/processor/exporter): │ -│ 1. Receives PipelineContext via build() method │ -│ 2. Calls pipeline_ctx.register_metrics::() │ -│ 3. Gets MetricSet with pre-registered attributes │ -│ 4. On timer tick: metrics_reporter.report(&mut metric_set) │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ flume channel (MetricSetSnapshot) - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Metrics Aggregator Thread ("metrics-aggregator") │ -│ │ -│ MetricsCollector::run_collection_loop() │ -│ loop { │ -│ snapshot = receiver.recv_async().await │ -│ registry.accumulate_snapshot(snapshot.key, &snapshot.metrics) │ -│ } │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ MetricsRegistryHandle (Arc>) - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Admin HTTP Server ("http-admin" thread) │ -│ │ -│ GET /metrics or /telemetry/metrics │ -│ registry.visit_metrics_and_reset(|desc, attrs, iter| { │ -│ // Format as JSON, Prometheus, Line Protocol, etc. │ -│ // desc: MetricsDescriptor (name, field definitions) │ -│ // attrs: NodeAttributeSet (resource + node attributes) │ -│ // iter: MetricsIterator (field, value) pairs │ -│ }) │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Key Components - -1. **MetricsRegistryHandle**: Thread-safe handle wrapping `Arc>`. - Passed to admin for HTTP endpoints, passed to controller for aggregation. - -2. **MetricsReporter**: Cloneable sender side of a flume channel. Each pipeline - thread gets a clone to send `MetricSetSnapshot` messages. - -3. **MetricsCollector**: Runs on a dedicated thread, receives snapshots via - channel, and calls `registry.accumulate_snapshot()` to merge them. - -4. **NodeAttributeSet**: Consistent attributes attached to every metric set - registered by a component. Includes: - - Resource: `process_instance_id`, `host_id`, `container_id` - - Engine: `core_id`, `numa_node_id` - - Pipeline: `pipeline_id` - - Node: `node_id`, `node_urn`, `node_type` - -### Unified Registration: Shared MetricsKey for Logs and Metrics - -The key insight is that `MetricsKey` already identifies a component's -`NodeAttributeSet` in the registry. Logs should reuse this same key -rather than duplicating attribute storage. - -**Existing MetricsEntry (in registry.rs):** - -```rust -pub struct MetricsEntry { - pub metrics_descriptor: &'static MetricsDescriptor, - pub attributes_descriptor: &'static AttributesDescriptor, - pub metric_values: Vec, - pub attribute_values: Box, // ← NodeAttributeSet -} -``` - -When `pipeline_ctx.register_metrics::()` is called: -1. Returns `MetricSet` containing a `MetricsKey` (slotmap index) -2. The `NodeAttributeSet` is stored in the registry under that key -3. **Both metrics and logs use the same `MetricsKey`** - -### Parallel Logs Architecture - -| Metrics | Logs | -|---------|------| -| `MetricSet` | `LogBuffer` | -| `MetricsReporter` (channel sender) | `LogsReporter` (channel sender) | -| `MetricsRegistry` (aggregates metrics) | `LogsRing` (ring buffer for recent logs) | -| `MetricsCollector` (receives snapshots) | `LogsCollector` (receives batches) | -| `MetricSetSnapshot { key, metrics }` | `LogBatch { producer_key, records }` | -| `/metrics` endpoint | `/logs` endpoint | - -**Shared:** -- `MetricsKey` identifies the producer (same key for metrics and logs) -- `NodeAttributeSet` stored once in `MetricsRegistry`, looked up by key - -### Channel Data Types - -```rust -/// A batch of logs from one producer - compact, just carries the key -pub struct LogBatch { - /// Same key returned from register_metrics() - identifies NodeAttributeSet - pub producer_key: MetricsKey, - - /// The log records - pub records: Vec, -} - -/// A single log record -pub struct LogRecord { - pub callsite_id: Identifier, // Pointer to static Metadata - pub timestamp_ns: u64, - pub body_attrs_bytes: Bytes, // Pre-encoded body + event attributes -} - -/// Reporter for sending log batches (parallel to MetricsReporter) -#[derive(Clone)] -pub struct LogsReporter { - sender: flume::Sender, -} - -impl LogsReporter { - pub fn try_report(&self, batch: LogBatch) -> Result<(), Error> { - match self.sender.try_send(batch) { - Ok(_) => Ok(()), - Err(flume::TrySendError::Full(_)) => Ok(()), // Drop if full - Err(flume::TrySendError::Disconnected(_)) => Err(Error::LogsChannelClosed), - } - } -} -``` - -### EffectHandler with Shared Key - -```rust -pub struct EffectHandlerCore { - pub node_id: NodeId, - pub producer_key: MetricsKey, // Shared identifier for metrics & logs - pub metrics_reporter: MetricsReporter, - pub logs_reporter: LogsReporter, // NEW - pub log_buffer: LogBuffer, // NEW - // ... -} - -impl EffectHandlerCore { - pub async fn flush_logs(&mut self) -> Result<(), Error> { - if self.log_buffer.is_empty() { - return Ok(()); - } - - let batch = LogBatch { - producer_key: self.producer_key, // Just the 8-byte key - records: self.log_buffer.drain(), - }; - self.logs_reporter.try_report(batch) - } -} -``` - -### Consumer Side: LogsRing with Key Lookup - -```rust -/// Ring buffer storing recent logs for /logs endpoint -pub struct LogsRing { - entries: VecDeque, - capacity: usize, - total_received: u64, - total_dropped: u64, -} - -/// Stored entry - just the key, not the full attributes -pub struct StoredLogEntry { - pub producer_key: MetricsKey, // Lookup attrs from MetricsRegistry - pub callsite_id: Identifier, - pub timestamp_ns: u64, - pub body_attrs_bytes: Bytes, -} - -impl LogsRing { - pub fn append(&mut self, batch: LogBatch) { - for record in batch.records { - if self.entries.len() >= self.capacity { - self.entries.pop_front(); - self.total_dropped += 1; - } - self.entries.push_back(StoredLogEntry { - producer_key: batch.producer_key, - callsite_id: record.callsite_id, - timestamp_ns: record.timestamp_ns, - body_attrs_bytes: record.body_attrs_bytes, - }); - self.total_received += 1; - } - } -} -``` - -### Admin /logs Endpoint - -```rust -pub async fn get_logs(State(state): State) -> impl IntoResponse { - let logs_ring = state.logs_ring.lock(); - let registry = state.metrics_registry.lock(); - - let writer = ConsoleWriter::no_color(); - let mut output = String::new(); - - for entry in logs_ring.recent(100) { - // Dereference Identifier to get static Metadata - let metadata = entry.callsite_id.callsite().metadata(); - let saved = SavedCallsite::new(metadata); - - let record = LogRecord { - callsite_id: entry.callsite_id, - timestamp_ns: entry.timestamp_ns, - body_attrs_bytes: entry.body_attrs_bytes.clone(), - }; - - // Format the log record - output.push_str(&writer.format_log_record(&record, &saved)); - - // Look up NodeAttributeSet using the shared key - if let Some(metrics_entry) = registry.metrics.get(entry.producer_key) { - let attrs = metrics_entry.attribute_values.as_ref(); - output.push_str(&format_node_attrs(attrs)); - } - output.push('\n'); - } - - (StatusCode::OK, output) -} -``` - -### Benefits of Shared Key - -| Aspect | Sending attrs per batch | Shared MetricsKey | -|--------|------------------------|-------------------| -| Registration | Separate for metrics/logs | Single registration | -| Per-batch overhead | Full NodeAttributeSet clone | 8-byte key | -| Attribute storage | Duplicated per batch | Single source of truth | -| Consistency | Could diverge | Guaranteed identical | -| Admin lookup | Already has attrs | Lookup from registry | - -### Identifier → Metadata: Direct Field Access - -The `Identifier` type wraps a pointer to static memory: - -```rust -pub struct Identifier( - #[doc(hidden)] - pub &'static dyn Callsite, -); -``` - -The inner field is `pub` (for macro construction purposes), so any thread -can access it directly to get `Metadata`: - -```rust -// Identifier.0 is &'static dyn Callsite -let metadata: &'static Metadata<'static> = identifier.0.metadata(); -``` - -No need to forward `(Identifier, Metadata)` pairs between threads. -The admin thread can directly access `Identifier.0.metadata()` on any -`Identifier` received in a `LogBatch` to get the full static metadata -(level, target, file, line, name, etc.). - -### Thread-Local Producer Key for Third-Party Instrumentation - -Third-party libraries often use `tracing::info!()` without access to any -EffectHandler or `MetricsKey`. To attribute these logs to the correct -component, we use a thread-local "current producer key" that is set -when entering a component's execution scope. - -```rust -// Thread-local current MetricsKey for third-party instrumentation. -thread_local! { - static CURRENT_PRODUCER_KEY: RefCell> = const { RefCell::new(None) }; -} - -/// Guard that sets the current producer key for the duration of a scope. -/// When dropped, restores the previous key (supports nesting). -pub struct ProducerKeyGuard { - previous: Option, -} - -impl ProducerKeyGuard { - /// Enter a scope with the given producer key. - pub fn enter(key: MetricsKey) -> Self { - let previous = CURRENT_PRODUCER_KEY.with(|cell| cell.borrow_mut().replace(key)); - Self { previous } - } -} - -impl Drop for ProducerKeyGuard { - fn drop(&mut self) { - CURRENT_PRODUCER_KEY.with(|cell| { - *cell.borrow_mut() = self.previous; - }); - } -} - -/// Get the current producer key (if any component scope is active). -pub fn current_producer_key() -> Option { - CURRENT_PRODUCER_KEY.with(|cell| *cell.borrow()) -} -``` - -**Usage in the engine (when calling component methods):** - -```rust -impl EffectHandlerCore { - /// Enter a scope where third-party logs are attributed to this component. - pub fn enter_producer_scope(&self) -> ProducerKeyGuard { - ProducerKeyGuard::enter(self.producer_key) - } -} - -// In the pipeline runtime, when calling a processor: -let _guard = effect_handler.enter_producer_scope(); -processor.process(batch, effect_handler).await?; -// Guard drops here, restoring previous key -``` - -**How it works with the BufferWriterLayer:** - -```rust -impl Layer for BufferWriterLayer { - fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { - let record = encode_event(event, &ctx); - // Pass None - push_to_thread_buffer will use current_producer_key() - push_to_thread_buffer(record, None); - } -} - -// In push_to_thread_buffer: -pub fn push_to_thread_buffer(record: LogRecord, producer_key: Option) -> bool { - CURRENT_LOG_BUFFER.with(|cell| { - if let Some(ref mut buffer) = *cell.borrow_mut() { - // Use explicit key if provided, otherwise use thread-current key - let key = producer_key.or_else(current_producer_key); - buffer.push(LogEntry { record, producer_key: key }); - true - } else { - false - } - }) -} -``` - -**Benefits:** - -| Aspect | Without ProducerKeyGuard | With ProducerKeyGuard | -|--------|-------------------------|----------------------| -| First-party logs | Attributed correctly | Attributed correctly | -| Third-party libs | `producer_key: None` | Attributed to current component | -| No EffectHandler access | Lost attribution | Correct attribution | -| Nesting support | N/A | Previous key restored on drop | - -**Example flow:** - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Pipeline Thread │ -│ │ -│ 1. Enter processor scope: ProducerKeyGuard::enter(processor_key) -│ CURRENT_PRODUCER_KEY = Some(processor_key) │ -│ │ -│ 2. Processor calls library code │ -│ └── Library calls tracing::info!("parsing data") │ -│ └── BufferWriterLayer::on_event() │ -│ └── push_to_thread_buffer(record, None) │ -│ └── key = current_producer_key() = processor_key│ -│ └── buffer.push(LogEntry { key: processor_key })│ -│ │ -│ 3. Guard drops: CURRENT_PRODUCER_KEY = None │ -│ │ -│ 4. On flush: LogBatch includes entry with producer_key set │ -│ │ -│ 5. Admin can look up NodeAttributeSet for processor_key │ -│ → Log shows: node_id=processor, node_urn=arrow/processor │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Channel Architecture - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ PRODUCER SIDE │ -│ │ -│ Pipeline Thread 0 Pipeline Thread 1 Pipeline Thread N │ -│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ -│ │ EffectHandler │ │ EffectHandler │ │ EffectHandler │ │ -│ │ producer_key │ │ producer_key │ │ producer_key │ │ -│ │ log_buffer │ │ log_buffer │ │ log_buffer │ │ -│ │ logs_reporter │ │ logs_reporter │ │ logs_reporter │ │ -│ └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ │ -│ │ │ │ │ -│ │ on timer: flush │ │ │ -│ ▼ ▼ ▼ │ -│ ┌────────────────────────────────────────────────────────────────────────┐│ -│ │ Metrics Channel (existing) ││ -│ │ flume::Sender ││ -│ └────────────────────────────────────────────────────────────────────────┘│ -│ ┌────────────────────────────────────────────────────────────────────────┐│ -│ │ Logs Channel (NEW, parallel) ││ -│ │ flume::Sender ││ -│ └────────────────────────────────────────────────────────────────────────┘│ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ Two separate channels - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ CONSUMER SIDE │ -│ │ -│ ┌─────────────────────────────────┐ ┌─────────────────────────────────┐ │ -│ │ MetricsCollector │ │ LogsCollector (NEW) │ │ -│ │ (metrics-aggregator thread) │ │ (logs-collector thread OR │ │ -│ │ │ │ same thread as admin) │ │ -│ │ loop { │ │ │ │ -│ │ snapshot = rx.recv() │ │ loop { │ │ -│ │ registry.accumulate(...) │ │ batch = rx.recv() │ │ -│ │ } │ │ logs_ring.append(batch) │ │ -│ └─────────────────────────────────┘ │ } │ │ -│ └─────────────────────────────────┘ │ -│ │ -│ ┌─────────────────────────────────────────────────────────────────────────┐│ -│ │ Admin HTTP Server ││ -│ │ ││ -│ │ GET /metrics → registry.visit_metrics_and_reset(...) ││ -│ │ ││ -│ │ GET /logs → logs_ring.recent(limit) + registry.get(key).attrs ││ -│ │ ││ -│ └─────────────────────────────────────────────────────────────────────────┘│ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Why Separate Channels? - -| Aspect | Metrics Channel | Logs Channel | -|--------|-----------------|--------------| -| Volume | Lower (aggregated values) | Higher (individual events) | -| Semantics | Merge into registry | Append to ring buffer | -| Backpressure | Accumulate locally | Drop oldest | -| Consumer | Aggregate by key | Keep recent N entries | - -## Future Considerations - -1. **Span context propagation**: Include trace/span IDs in buffered logs - when a span is active on the thread - -2. **Sampling**: Support head-based sampling to reduce volume - -3. **Priority levels**: Allow high-severity logs to bypass buffer limits - -4. **Direct ITR routing**: Option to route logs directly to a same-core - ITR pipeline without going through the global thread - -5. **Backpressure signaling**: Mechanism for global collector to signal - EffectHandlers when it's overloaded - -## Code References - -### Metrics System (for reference implementation) - -| File | Purpose | -|------|---------| -| `crates/controller/src/lib.rs` | Creates `MetricsSystem`, spawns threads, passes `MetricsReporter` to pipeline threads | -| `crates/telemetry/src/lib.rs` | `MetricsSystem` struct holding registry, collector, reporter, dispatcher | -| `crates/telemetry/src/registry.rs` | `MetricsRegistry` and `MetricsRegistryHandle` for aggregation | -| `crates/telemetry/src/reporter.rs` | `MetricsReporter` for sending snapshots through flume channel | -| `crates/telemetry/src/collector.rs` | `MetricsCollector::run_collection_loop()` receives and aggregates snapshots | -| `crates/engine/src/context.rs` | `PipelineContext` and `NodeAttributeSet` for consistent attributes | -| `crates/engine/src/effect_handler.rs` | `EffectHandlerCore` with `report_metrics()` method | -| `crates/admin/src/telemetry.rs` | `/metrics` endpoint using `registry.visit_metrics_and_reset()` | - -### Existing Self-Tracing Primitives - -| File | Purpose | -|------|---------| -| `crates/telemetry/src/self_tracing.rs` | `LogRecord` and `SavedCallsite` types | -| `crates/telemetry/src/self_tracing/encoder.rs` | `DirectLogRecordEncoder`, `DirectFieldVisitor` for OTLP encoding | -| `crates/telemetry/src/self_tracing/formatter.rs` | `RawLoggingLayer`, `ConsoleWriter` for console output | -| `crates/telemetry/src/internal_events.rs` | `otel_info!`, `otel_warn!`, etc. macros wrapping tracing | - -### Tokio Tracing (vendored) - -| File | Purpose | -|------|---------| -| `tokio-tracing-rs/tracing-core/src/dispatcher.rs` | Thread-local `CURRENT_STATE`, `with_default()` for subscriber scoping | -| `tokio-tracing-rs/tracing-subscriber/src/registry/sharded.rs` | Example of `ThreadLocal>` for per-thread span stacks | - diff --git a/rust/otap-dataflow/docs/thread-local-telemetry-research.md b/rust/otap-dataflow/docs/thread-local-telemetry-research.md deleted file mode 100644 index c95f35a76c..0000000000 --- a/rust/otap-dataflow/docs/thread-local-telemetry-research.md +++ /dev/null @@ -1,713 +0,0 @@ -# Thread-Local Variables and Tokio Tracing: Research for Internal Telemetry - -This document explains how thread-local variables work in Rust, how Tokio's -`tracing` library uses them for scoping, and how these mechanisms can be -applied to the OTAP-Dataflow internal telemetry architecture. - -## Table of Contents - -1. [Rust Thread-Local Variables Fundamentals](#rust-thread-local-variables-fundamentals) -2. [How Tracing Uses Thread-Locals](#how-tracing-uses-thread-locals) -3. [Reentrancy Protection in Tracing](#reentrancy-protection-in-tracing) -4. [Application to OTAP-Dataflow](#application-to-otap-dataflow) -5. [Design Patterns for EffectHandler Buffer](#design-patterns-for-effecthandler-buffer) -6. [Parallel with Existing Metrics Infrastructure](#parallel-with-existing-metrics-infrastructure) - ---- - -## Rust Thread-Local Variables Fundamentals - -### Basic Thread-Local Storage - -Rust's `std::thread_local!` macro creates thread-local storage: - -```rust -use std::cell::{Cell, RefCell}; - -thread_local! { - // Simple value types use Cell - static COUNTER: Cell = const { Cell::new(0) }; - - // Complex types use RefCell for interior mutability - static BUFFER: RefCell> = RefCell::new(Vec::new()); -} -``` - -**Key characteristics:** - -1. **Initialization**: Thread-locals are lazily initialized per-thread on first access -2. **Lifetime**: Data lives as long as the thread (destroyed when thread exits) -3. **Access Pattern**: Must be accessed via closure using `.with(|value| ...)` -4. **Interior Mutability**: Use `Cell` for `Copy` types, `RefCell` for others -5. **No Cross-Thread Access**: By design, other threads cannot see this data - -### Access Patterns - -```rust -// Reading -COUNTER.with(|c| { - let value = c.get(); - println!("Counter: {}", value); -}); - -// Writing -COUNTER.with(|c| { - c.set(c.get() + 1); -}); - -// Mutable access to complex types -BUFFER.with(|b| { - b.borrow_mut().push(record); -}); -``` - -### The `thread_local` Crate (Used by tracing-subscriber) - -The `thread_local` crate provides `ThreadLocal`, which is different from -`std::thread_local!`: - -```rust -use thread_local::ThreadLocal; -use std::cell::RefCell; - -struct Registry { - // Each thread gets its own RefCell - current_spans: ThreadLocal>, -} - -impl Registry { - fn enter(&self, id: &span::Id) { - // get_or_default() returns a reference to this thread's value - self.current_spans - .get_or_default() // Returns &RefCell - .borrow_mut() - .push(id.clone()); - } -} -``` - -**Key difference**: `ThreadLocal` is a struct field that can be shared -across threads (via `Arc` or references), but each thread accessing it -sees its own independent value. - ---- - -## How Tracing Uses Thread-Locals - -### Dispatcher Thread-Local State - -The `tracing-core` dispatcher uses thread-local storage for two critical purposes: - -```rust -// From tracing-core/src/dispatcher.rs -#[cfg(feature = "std")] -std::thread_local! { - static CURRENT_STATE: State = const { - State { - default: RefCell>, - can_enter: Cell, - } - }; -} -``` - -#### 1. Per-Thread Default Subscriber (`default`) - -Each thread can have its own "scoped" subscriber that overrides the global default: - -```rust -// The dispatcher lookup chain: -pub fn get_default(mut f: F) -> T -where - F: FnMut(&Dispatch) -> T, -{ - // Fast path: if no scoped dispatchers exist, use global - if SCOPED_COUNT.load(Ordering::Acquire) == 0 { - return f(get_global()); - } - - // Slow path: check thread-local state - CURRENT_STATE.try_with(|state| { - if let Some(entered) = state.enter() { - return f(&entered.current()); - } - f(&NONE) - }) - .unwrap_or_else(|_| f(&NONE)) -} -``` - -The scoping mechanism: - -```rust -pub fn with_default(dispatcher: &Dispatch, f: impl FnOnce() -> T) -> T { - // set_default stores the previous dispatcher and sets the new one - let _guard = set_default(dispatcher); - f() - // When guard drops, previous dispatcher is restored -} -``` - -**How it works:** -- `set_default()` stores the current dispatcher in the thread-local and - replaces it with the new one -- Returns a `DefaultGuard` that, when dropped, restores the previous dispatcher -- This creates a stack of dispatchers per thread - -#### 2. Reentrancy Protection (`can_enter`) - -Prevents infinite recursion when a subscriber's callback triggers more tracing: - -```rust -struct State { - default: RefCell>, - can_enter: Cell, // ← Reentrancy guard -} - -impl State { - fn enter(&self) -> Option> { - // Atomically check and set to false - if self.can_enter.replace(false) { - Some(Entered(self)) - } else { - None // Already in a dispatch, prevent recursion - } - } -} - -impl Drop for Entered<'_> { - fn drop(&mut self) { - self.0.can_enter.set(true); // Re-enable on exit - } -} -``` - -**Usage pattern:** -- Before dispatching an event, `state.enter()` is called -- If we're already dispatching (nested call), `enter()` returns `None` -- The caller then uses `Dispatch::none()` instead, preventing recursion -- When the dispatch completes, the guard's `Drop` re-enables entry - -### Registry Per-Thread Span Stack - -The `tracing-subscriber` Registry tracks which spans are "entered" on each thread: - -```rust -// From tracing-subscriber/src/registry/sharded.rs -pub struct Registry { - spans: Pool, - // Each thread has its own stack of currently-entered spans - current_spans: ThreadLocal>, - next_filter_id: u8, -} - -impl Subscriber for Registry { - fn enter(&self, id: &span::Id) { - // Push to THIS thread's span stack - self.current_spans - .get_or_default() - .borrow_mut() - .push(id.clone()); - } - - fn exit(&self, id: &span::Id) { - // Pop from THIS thread's span stack - if let Some(spans) = self.current_spans.get() { - spans.borrow_mut().pop(id); - } - } - - fn current_span(&self) -> Current { - // Return the top of THIS thread's span stack - self.current_spans - .get() - .and_then(|spans| { - let spans = spans.borrow(); - let id = spans.current()?; - let span = self.get(id)?; - Some(Current::new(id.clone(), span.metadata)) - }) - .unwrap_or_else(Current::none) - } -} -``` - ---- - -## Reentrancy Protection in Tracing - -### The Problem - -When a subscriber processes an event, it might trigger more events: - -```rust -impl Subscriber for MySubscriber { - fn event(&self, event: &Event<'_>) { - // This would cause infinite recursion! - tracing::info!("Received event: {:?}", event); - } -} -``` - -### The Solution - -Tracing uses the `can_enter` flag as a guard: - -```rust -// Simplified from dispatcher.rs -pub fn get_default(f: F) -> T { - CURRENT_STATE.try_with(|state| { - // Try to enter dispatch mode - if let Some(entered) = state.enter() { - // Success: use the real dispatcher - return f(&entered.current()); - } - // Already dispatching: use no-op dispatcher - f(&NONE) - }) -} -``` - -The test in `dispatcher.rs` demonstrates this: - -```rust -#[test] -fn events_dont_infinite_loop() { - struct TestSubscriber; - impl Subscriber for TestSubscriber { - fn event(&self, _: &Event<'_>) { - static EVENTS: AtomicUsize = AtomicUsize::new(0); - assert_eq!( - EVENTS.fetch_add(1, Ordering::Relaxed), - 0, - "event method called twice!" - ); - // This nested event dispatch is blocked by can_enter - Event::dispatch(&TEST_META, &TEST_META.fields().value_set(&[])); - } - } - // ... test passes because the nested dispatch sees Dispatch::none() -} -``` - ---- - -## Application to OTAP-Dataflow - -### Internal Telemetry Feedback Prevention - -Your architecture document describes preventing feedback loops in internal -telemetry. Here's how to implement this using thread-local state: - -```rust -use std::cell::Cell; - -thread_local! { - /// Thread-local flag indicating this thread is an internal telemetry thread. - /// When true, all otel_* macros become no-ops to prevent feedback. - static INTERNAL_TELEMETRY_THREAD: Cell = const { Cell::new(false) }; - - /// Reentrancy guard for telemetry processing - static IN_TELEMETRY_DISPATCH: Cell = const { Cell::new(false) }; -} - -/// Mark the current thread as an internal telemetry thread. -/// All otel_info!, otel_warn!, etc. macros will be disabled on this thread. -pub fn mark_as_internal_telemetry_thread() { - INTERNAL_TELEMETRY_THREAD.with(|flag| flag.set(true)); -} - -/// Check if telemetry is enabled on this thread -pub fn is_telemetry_enabled() -> bool { - INTERNAL_TELEMETRY_THREAD.with(|flag| !flag.get()) -} - -/// Guard for telemetry dispatch that prevents reentrancy -pub struct TelemetryDispatchGuard; - -impl TelemetryDispatchGuard { - pub fn try_enter() -> Option { - IN_TELEMETRY_DISPATCH.with(|flag| { - if flag.replace(true) { - None // Already dispatching - } else { - Some(TelemetryDispatchGuard) - } - }) - } -} - -impl Drop for TelemetryDispatchGuard { - fn drop(&mut self) { - IN_TELEMETRY_DISPATCH.with(|flag| flag.set(false)); - } -} -``` - -### Updated Macros with Feedback Protection - -```rust -/// Macro for logging informational messages with feedback protection. -#[macro_export] -macro_rules! otel_info { - ($name:expr $(,)?) => { - if $crate::is_telemetry_enabled() { - $crate::_private::info!( - name: $name, - target: env!("CARGO_PKG_NAME"), - name = $name, - "" - ); - } - }; - // ... other variants -} -``` - -### Global Internal Telemetry Thread - -For your global logs collection thread: - -```rust -pub fn spawn_internal_telemetry_thread( - name: &str, - task: F, -) -> std::thread::JoinHandle<()> -where - F: FnOnce() + Send + 'static, -{ - std::thread::Builder::new() - .name(name.into()) - .spawn(move || { - // Mark this thread as internal telemetry - mark_as_internal_telemetry_thread(); - - // Configure a safe subscriber for this thread only - let safe_subscriber = create_raw_logging_subscriber(); - tracing::subscriber::with_default(safe_subscriber, task); - }) - .expect("Failed to spawn internal telemetry thread") -} -``` - ---- - -## Design Patterns for EffectHandler Buffer - -### Option 1: Thread-Local Buffer with EffectHandler Coordination - -Since your `EffectHandler` owns its thread, you can use thread-local storage: - -```rust -use std::cell::RefCell; -use std::collections::VecDeque; - -/// Maximum bytes to buffer per thread -const MAX_BUFFER_BYTES: usize = 65536; - -/// Individual log record (pre-encoded or structured) -pub struct LogRecord { - pub timestamp: std::time::Instant, - pub level: tracing::Level, - pub name: &'static str, - pub target: &'static str, - // Pre-encoded OTLP bytes for attributes + body - pub encoded_data: Vec, -} - -thread_local! { - /// Per-thread log buffer for first-party telemetry - static LOG_BUFFER: RefCell = RefCell::new(LogBuffer::new()); -} - -pub struct LogBuffer { - records: VecDeque, - total_bytes: usize, -} - -impl LogBuffer { - pub fn new() -> Self { - Self { - records: VecDeque::new(), - total_bytes: 0, - } - } - - /// Add a record, potentially dropping oldest if over capacity - pub fn push(&mut self, record: LogRecord) { - let record_size = record.encoded_data.len(); - - // Evict old records if needed - while self.total_bytes + record_size > MAX_BUFFER_BYTES - && !self.records.is_empty() - { - if let Some(old) = self.records.pop_front() { - self.total_bytes -= old.encoded_data.len(); - } - } - - self.total_bytes += record_size; - self.records.push_back(record); - } - - /// Drain all records for sending - pub fn drain(&mut self) -> Vec { - self.total_bytes = 0; - self.records.drain(..).collect() - } - - /// Check if buffer has data - pub fn is_empty(&self) -> bool { - self.records.is_empty() - } -} - -/// Called by otel_* macros to buffer a log record -pub fn buffer_log_record(record: LogRecord) { - LOG_BUFFER.with(|buf| { - buf.borrow_mut().push(record); - }); -} - -/// Called by EffectHandler on timer tick to flush logs -pub fn flush_log_buffer() -> Vec { - LOG_BUFFER.with(|buf| { - buf.borrow_mut().drain() - }) -} -``` - -### Option 2: EffectHandler-Owned Buffer (Explicit State) - -Alternatively, store the buffer directly in the `EffectHandler`: - -```rust -pub struct EffectHandlerCore { - pub(crate) node_id: NodeId, - pub(crate) pipeline_ctrl_msg_sender: Option>, - pub(crate) metrics_reporter: MetricsReporter, - - // NEW: Per-handler log buffer - pub(crate) log_buffer: LogBuffer, -} - -impl EffectHandlerCore { - /// Log an info message, buffering it for later flush - pub fn log_info(&mut self, name: &'static str, attributes: &[(&str, AttributeValue)]) { - let record = LogRecord { - timestamp: std::time::Instant::now(), - level: tracing::Level::INFO, - name, - target: self.node_id.name.as_str(), - encoded_data: encode_attributes_to_otlp(attributes), - }; - self.log_buffer.push(record); - } - - /// Flush buffered logs - can be called on timer or when sending to pipeline - pub async fn flush_logs(&mut self) -> Result<(), Error> { - let records = self.log_buffer.drain(); - if records.is_empty() { - return Ok(()); - } - - // Option A: Send to global collection thread - self.send_to_global_collector(records).await?; - - // Option B: Route to local ITR pipeline - // self.route_to_local_pipeline(records).await?; - - Ok(()) - } -} -``` - -### Option 3: Hybrid Approach with Thread-Local + Handler Reference - -This pattern allows macros to work anywhere while the EffectHandler controls flushing: - -```rust -use std::cell::RefCell; -use std::sync::Arc; - -/// Weak reference to the EffectHandler's log sink -pub struct LogSink { - sender: flume::Sender, -} - -thread_local! { - /// Thread-local pointer to this thread's log sink - static CURRENT_LOG_SINK: RefCell>> = RefCell::new(None); -} - -impl EffectHandlerCore { - /// Install this handler's log sink as the thread-local default - pub fn install_log_sink(&self) { - let sink = Arc::new(LogSink { - sender: self.log_channel.clone(), - }); - CURRENT_LOG_SINK.with(|s| { - *s.borrow_mut() = Some(sink); - }); - } - - /// Remove the thread-local sink (e.g., during shutdown) - pub fn uninstall_log_sink(&self) { - CURRENT_LOG_SINK.with(|s| { - *s.borrow_mut() = None; - }); - } -} - -/// Called by otel_* macros -pub fn emit_log(record: LogRecord) { - CURRENT_LOG_SINK.with(|sink| { - if let Some(sink) = &*sink.borrow() { - // Non-blocking send, drop if full - let _ = sink.sender.try_send(record); - } - // If no sink installed, log is dropped (or use fallback) - }); -} -``` - ---- - -## Parallel with Existing Metrics Infrastructure - -Your existing metrics system follows a pattern that can be mirrored for logs: - -### Current Metrics Flow - -``` -┌──────────────────┐ report() ┌──────────────────┐ aggregate ┌─────────────────┐ -│ MetricSet │ ──────────────► │ MetricsReporter │ ─────────────► │ MetricsRegistry │ -│ (per-component) │ (channel) │ (per-handler) │ (channel) │ (global) │ -└──────────────────┘ └──────────────────┘ └─────────────────┘ - │ - dispatch_metrics() - ▼ - ┌─────────────────────┐ - │ MetricsDispatcher │ - │ → OpenTelemetry SDK │ - │ → /metrics endpoint │ - └─────────────────────┘ -``` - -### Proposed Parallel Logs Flow - -``` -┌──────────────────┐ buffer() ┌──────────────────┐ flush ┌─────────────────┐ -│ LogRecord │ ──────────────► │ LogBuffer │ ─────────────► │ LogsRegistry │ -│ (per-event) │ (thread-local) │ (per-handler) │ (channel) │ (global) │ -└──────────────────┘ └──────────────────┘ └─────────────────┘ - │ - dispatch_logs() - ▼ - ┌─────────────────────┐ - │ LogsDispatcher │ - │ → ITR Pipeline │ - │ → /logs endpoint │ - │ → Raw console │ - └─────────────────────┘ -``` - -### Implementation Sketch for LogsRegistry - -```rust -use std::sync::{Arc, RwLock}; -use std::collections::VecDeque; - -/// Ring buffer of recent log records for the /logs endpoint -pub struct LogsRegistry { - /// Configurable max records to keep - max_records: usize, - /// Ring buffer of recent logs (OTLP-encoded bytes) - recent_logs: RwLock>>, - /// Channel to receive logs from all handlers - receiver: flume::Receiver>, -} - -impl LogsRegistry { - /// Get recent logs for HTTP endpoint (analogous to /metrics) - pub fn get_recent_logs(&self) -> Vec> { - self.recent_logs.read().unwrap().iter().cloned().collect() - } - - /// Collection loop (parallel to MetricsCollector::run_collection_loop) - pub async fn run_collection_loop(&self) -> Result<(), Error> { - while let Ok(log_bytes) = self.receiver.recv_async().await { - let mut buffer = self.recent_logs.write().unwrap(); - - // Ring buffer eviction - if buffer.len() >= self.max_records { - buffer.pop_front(); - } - buffer.push_back(log_bytes); - - // Also forward to ITR pipeline if configured - // self.forward_to_itr(&log_bytes).await?; - } - Ok(()) - } -} -``` - -### HTTP Endpoint for Logs - -Similar to `/metrics`, provide a `/logs` endpoint: - -```rust -/// Handler for GET /logs - returns recent internal logs -pub async fn get_internal_logs( - registry: Arc, -) -> impl IntoResponse { - let logs = registry.get_recent_logs(); - - // Could format as: - // - JSON array of log lines - // - OTLP LogsData protobuf - // - Human-readable text - - let formatted = format_logs_as_text(&logs); - (StatusCode::OK, formatted) -} -``` - ---- - -## Summary - -### Key Thread-Local Patterns for Your Use Case - -1. **Feedback Prevention Flag**: `INTERNAL_TELEMETRY_THREAD: Cell` - - Set `true` on dedicated internal telemetry threads - - Macros check this before emitting events - -2. **Reentrancy Guard**: `IN_TELEMETRY_DISPATCH: Cell` - - Prevents recursive telemetry events - - Similar to tracing's `can_enter` mechanism - -3. **Per-Thread Buffer**: `LOG_BUFFER: RefCell` - - Accumulate logs without blocking - - EffectHandler flushes on timer - -4. **Thread-Local Sink Reference**: `CURRENT_LOG_SINK: RefCell>>` - - Allows macros to find the right destination - - EffectHandler installs/uninstalls on thread lifecycle - -### Tracing Mechanisms You Can Leverage - -1. **`with_default()`**: Set thread-specific subscriber for internal threads -2. **`Dispatch::none()`**: No-op subscriber when reentrancy detected -3. **`ThreadLocal>`**: Per-thread state in shared structures -4. **Guard-based RAII**: Automatic cleanup on scope exit - -### Next Steps - -1. Implement the feedback prevention thread-local flag -2. Update `otel_*` macros to check the flag -3. Create `LogBuffer` structure parallel to `MetricSet` -4. Add `LogsReporter` parallel to `MetricsReporter` -5. Implement `LogsRegistry` with `/logs` endpoint -6. Wire up EffectHandler timer-based flush From 58af27d7ba18f7239dc73d76fa9d42671482a35f Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 10:41:17 -0800 Subject: [PATCH 45/92] hand --- .../crates/telemetry/src/error.rs | 4 + .../otap-dataflow/crates/telemetry/src/lib.rs | 2 +- .../crates/telemetry/src/logs.rs | 212 ++++-------------- .../crates/telemetry/src/self_tracing.rs | 10 +- .../telemetry/src/self_tracing/encoder.rs | 2 +- .../telemetry/src/self_tracing/formatter.rs | 38 +--- 6 files changed, 59 insertions(+), 209 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/error.rs b/rust/otap-dataflow/crates/telemetry/src/error.rs index ecab5ce5dd..8d37550881 100644 --- a/rust/otap-dataflow/crates/telemetry/src/error.rs +++ b/rust/otap-dataflow/crates/telemetry/src/error.rs @@ -31,4 +31,8 @@ pub enum Error { /// Error during configuration of a component. #[error("Configuration error: {0}")] ConfigurationError(String), + + /// Error during logs send. + #[error("Log send error: {0}")] + LogSendError(String), } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 05712d2a25..3638ad15c0 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -64,7 +64,7 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ BufferWriterLayer, DirectChannelLayer, LogsCollector, LogsReporter, ProducerKeyGuard, - current_producer_key, flush_thread_log_buffer, install_thread_log_buffer, + current_producer_key, drain_thread_log_buffer, install_thread_log_buffer, uninstall_thread_log_buffer, with_engine_thread_subscriber, }; diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 9cc8e8f35b..7c80a3c32f 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -2,15 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 //! Internal logs collection for OTAP-Dataflow. -//! -//! Each pipeline thread has a single LogBuffer (via thread-local) that accumulates -//! log records. The pipeline runtime periodically flushes this buffer to the admin -//! via a channel. Components don't need to do anything special for logging. use crate::error::Error; use crate::self_tracing::{ConsoleWriter, LogRecord, ProducerKey, SavedCallsite}; use std::cell::RefCell; -use std::sync::atomic::{AtomicU64, Ordering}; use tracing::{Event, Subscriber}; use tracing_subscriber::filter::LevelFilter; use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; @@ -21,17 +16,14 @@ use tracing_subscriber::{EnvFilter, Registry}; pub struct LogBatch { /// The log records in this batch. pub records: Vec, - /// Number of records dropped since the last batch (buffer was full). + /// Number of records dropped in the same period. pub dropped_count: u64, } /// Thread-local log buffer for a pipeline thread. -/// -/// All components on this thread share the same buffer. -/// The pipeline runtime flushes it periodically on a timer. -/// If the buffer fills before flush, new events are dropped and counted. pub struct LogBuffer { batch: LogBatch, + active: Option, } impl LogBuffer { @@ -43,39 +35,25 @@ impl LogBuffer { records: Vec::with_capacity(capacity), dropped_count: 0, }, + active: None, } } /// Push a log record. If at capacity, the record is dropped and counted. - /// - /// Returns true if the record was added, false if dropped. - pub fn push(&mut self, record: LogRecord) -> bool { + pub fn push(&mut self, record: LogRecord) { if self.batch.records.len() >= self.batch.records.capacity() { self.batch.dropped_count += 1; - false } else { self.batch.records.push(record); - true } } - /// Check if the buffer has records to flush. - #[must_use] - pub fn needs_flush(&self) -> bool { - !self.batch.records.is_empty() || self.batch.dropped_count > 0 - } - /// Drain all records from the buffer, returning them as a batch. - /// Resets the dropped count for the next batch. pub fn drain(&mut self) -> LogBatch { - let capacity = self.batch.records.capacity(); - std::mem::replace( - &mut self.batch, - LogBatch { - records: Vec::with_capacity(capacity), - dropped_count: 0, - }, - ) + LogBatch { + records: self.batch.records.drain(..).collect(), + dropped_count: std::mem::take(&mut self.batch.dropped_count), + } } } @@ -84,50 +62,41 @@ thread_local! { static CURRENT_LOG_BUFFER: RefCell> = const { RefCell::new(None) }; } -// Thread-local current ProducerKey for third-party instrumentation. -// When a component is executing, this is set to that component's key so that -// any tracing::info!() calls from libraries can be attributed to the component. -thread_local! { - static CURRENT_PRODUCER_KEY: RefCell> = const { RefCell::new(None) }; -} - /// Guard that sets the current producer key for the duration of a scope. -/// -/// When dropped, restores the previous key (or None). -/// This allows nested scoping if needed. pub struct ProducerKeyGuard { previous: Option, } impl ProducerKeyGuard { /// Enter a scope with the given producer key. - /// - /// Third-party log events will be attributed to this key until - /// the guard is dropped. #[must_use] pub fn enter(key: ProducerKey) -> Self { - let previous = CURRENT_PRODUCER_KEY.with(|cell| cell.borrow_mut().replace(key)); + let previous = CURRENT_LOG_BUFFER + .with(|cell| cell.borrow_mut().as_mut().map(|b| b.active.replace(key))) + .flatten(); Self { previous } } } impl Drop for ProducerKeyGuard { fn drop(&mut self) { - CURRENT_PRODUCER_KEY.with(|cell| { - *cell.borrow_mut() = self.previous; + let _ = CURRENT_LOG_BUFFER.with(|cell| { + cell.borrow_mut().as_mut().map(|b| { + b.active = self.previous; + }) }); } } -/// Get the current producer key (if any component scope is active). +/// Get the current producer key #[must_use] pub fn current_producer_key() -> Option { - CURRENT_PRODUCER_KEY.with(|cell| *cell.borrow()) + CURRENT_LOG_BUFFER + .with(|cell| cell.borrow().as_ref().map(|b| b.active)) + .flatten() } /// Install a log buffer for the current thread. -/// -/// Called by the pipeline runtime when the thread starts. pub fn install_thread_log_buffer(capacity: usize) { CURRENT_LOG_BUFFER.with(|cell| { *cell.borrow_mut() = Some(LogBuffer::new(capacity)); @@ -135,26 +104,18 @@ pub fn install_thread_log_buffer(capacity: usize) { } /// Uninstall the log buffer for the current thread. -/// -/// Called by the pipeline runtime when the thread shuts down. pub fn uninstall_thread_log_buffer() { CURRENT_LOG_BUFFER.with(|cell| { *cell.borrow_mut() = None; }); } -/// Flush the current thread's log buffer, returning the batch. -/// -/// Called by the pipeline runtime on a timer. -pub fn flush_thread_log_buffer() -> Option { +/// Drain the current thread's log buffer, returning the batch. +pub fn drain_thread_log_buffer() -> Option { CURRENT_LOG_BUFFER.with(|cell| { - cell.borrow_mut().as_mut().and_then(|buffer| { - if buffer.needs_flush() { - Some(buffer.drain()) - } else { - None - } - }) + cell.borrow_mut() + .as_mut() + .and_then(|buffer| Some(buffer.drain())) }) } @@ -172,15 +133,10 @@ impl LogsReporter { } /// Try to send a batch, non-blocking. - /// - /// If the channel is full, the batch is dropped (returns Ok). - /// Only returns Err if the channel is disconnected. pub fn try_report(&self, batch: LogBatch) -> Result<(), Error> { - match self.sender.try_send(batch) { - Ok(()) => Ok(()), - Err(flume::TrySendError::Full(_)) => Ok(()), - Err(flume::TrySendError::Disconnected(_)) => Err(Error::LogsChannelClosed), - } + self.sender + .try_send(batch) + .map_err(|e| Error::LogSendError(e.to_string())) } } @@ -211,6 +167,7 @@ impl LogsCollector { self.write_batch(batch); } Err(_) => { + // TODO: raw log. return Ok(()); } } @@ -219,64 +176,21 @@ impl LogsCollector { /// Write a batch of log records to console. fn write_batch(&self, batch: LogBatch) { - // Print dropped count as a formatted warning before the batch - if batch.dropped_count > 0 { - self.writer.print_dropped_warning(batch.dropped_count); - } + // TODO: Print dropped count as a formatted warning before the batch for record in batch.records { // Identifier.0 is the &'static dyn Callsite let metadata = record.callsite_id.0.metadata(); let saved = SavedCallsite::new(metadata); // Use ConsoleWriter's routing: ERROR/WARN to stderr, others to stdout - self.writer.print_log_record(&record, &saved); + self.writer.raw_print(&record, &saved); // TODO: include producer_key in output when present } } } -// ============================================================================ -// BufferWriterLayer - For engine threads with thread-local buffer -// ============================================================================ - /// A tracing Layer for engine threads that writes to thread-local LogBuffer. -/// -/// This layer is installed via `with_default()` on each engine thread. -/// Events are accumulated in the thread-local buffer and flushed on a timer. -pub struct BufferWriterLayer { - /// Count of events successfully captured to the buffer. - events_captured: AtomicU64, - /// Count of events dropped because the buffer was full. - events_dropped: AtomicU64, -} - -impl BufferWriterLayer { - /// Create a new BufferWriterLayer. - #[must_use] - pub fn new() -> Self { - Self { - events_captured: AtomicU64::new(0), - events_dropped: AtomicU64::new(0), - } - } - - /// Get the number of events successfully captured. - #[must_use] - pub fn events_captured(&self) -> u64 { - self.events_captured.load(Ordering::Relaxed) - } - - /// Get the number of events dropped because buffer was full. - #[must_use] - pub fn events_dropped(&self) -> u64 { - self.events_dropped.load(Ordering::Relaxed) - } -} - -impl Default for BufferWriterLayer { - fn default() -> Self { - Self::new() - } -} +#[derive(Default)] +pub struct BufferWriterLayer {} impl TracingLayer for BufferWriterLayer where @@ -288,55 +202,24 @@ where CURRENT_LOG_BUFFER.with(|cell| { if let Some(ref mut buffer) = *cell.borrow_mut() { - if buffer.push(record) { - let _ = self.events_captured.fetch_add(1, Ordering::Relaxed); - } else { - let _ = self.events_dropped.fetch_add(1, Ordering::Relaxed); - } + buffer.push(record); } // No buffer = programming error on engine thread, silently drop }); } } -// ============================================================================ -// DirectChannelLayer - Global fallback for non-engine threads -// ============================================================================ - /// A tracing Layer for non-engine threads that sends directly to channel. -/// -/// This is installed as the global subscriber. Events are sent immediately -/// to the LogsCollector (non-blocking, dropped if channel is full). pub struct DirectChannelLayer { /// Reporter for sending to the channel. reporter: LogsReporter, - /// Count of events successfully sent. - events_captured: AtomicU64, - /// Count of events dropped because channel was full. - events_dropped: AtomicU64, } impl DirectChannelLayer { /// Create a new DirectChannelLayer with the given reporter. #[must_use] pub fn new(reporter: LogsReporter) -> Self { - Self { - reporter, - events_captured: AtomicU64::new(0), - events_dropped: AtomicU64::new(0), - } - } - - /// Get the number of events successfully sent. - #[must_use] - pub fn events_captured(&self) -> u64 { - self.events_captured.load(Ordering::Relaxed) - } - - /// Get the number of events dropped because channel was full. - #[must_use] - pub fn events_dropped(&self) -> u64 { - self.events_dropped.load(Ordering::Relaxed) + Self { reporter } } } @@ -352,43 +235,28 @@ where dropped_count: 0, }; - match self.reporter.sender.try_send(batch) { - Ok(()) => { - let _ = self.events_captured.fetch_add(1, Ordering::Relaxed); - } - Err(flume::TrySendError::Full(_)) => { - let _ = self.events_dropped.fetch_add(1, Ordering::Relaxed); - } - Err(flume::TrySendError::Disconnected(_)) => { - // Channel closed, nothing we can do + match self.reporter.try_report(batch) { + Ok(()) => {} + Err(_err) => { + // TODO: raw log } } } } -// ============================================================================ -// Engine Thread Subscriber Setup -// ============================================================================ - /// Create a subscriber for engine threads that uses BufferWriterLayer. -/// -/// This subscriber captures events to the thread-local buffer instead of -/// sending them to the channel directly. fn create_engine_thread_subscriber() -> impl Subscriber { // Use the same filter as the global subscriber (INFO by default, RUST_LOG override) let filter = EnvFilter::builder() .with_default_directive(LevelFilter::INFO.into()) .from_env_lossy(); - + Registry::default() .with(filter) - .with(BufferWriterLayer::new()) + .with(BufferWriterLayer::default()) } /// Run a closure with the engine thread subscriber as the default. -/// -/// This should be called at the top of each engine thread to ensure all -/// logging on that thread goes to the thread-local buffer. pub fn with_engine_thread_subscriber(f: F) -> R where F: FnOnce() -> R, diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 29354d4202..23a205d5d4 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -21,7 +21,7 @@ pub use encoder::DirectLogRecordEncoder; pub use formatter::{ConsoleWriter, RawLoggingLayer}; /// Optional key identifying the producing component. -/// Re-exported from the registry module for use by downstream code. +/// TODO: This is re-exported, instead rename the underlying type. pub type ProducerKey = crate::registry::MetricsKey; /// A log record with structural metadata and pre-encoded body/attributes. @@ -118,3 +118,11 @@ impl LogRecord { .as_nanos() as u64 } } + +/// Write a LogRecord to stdout or stderr (based on level). +/// +/// ERROR and WARN go to stderr, others go to stdout. +/// This is the same routing logic used by RawLoggingLayer. +pub fn raw_print(record: &LogRecord, callsite: &SavedCallsite) { + ConsoleWriter::no_color().raw_print(record, callsite) +} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index 6d2d0459f9..ae0d7db4b5 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -47,7 +47,7 @@ impl<'buf> DirectLogRecordEncoder<'buf> { .encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); self.buf.encode_varint(severity as u64); - // Encode severity_text (field 3, string) + // Encode severity_text (field 3, string) UPDATE ME self.buf .encode_string(LOG_RECORD_SEVERITY_TEXT, callsite.level().as_str()); diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 0da31c2db2..9ad75de366 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -128,44 +128,14 @@ impl ConsoleWriter { /// /// ERROR and WARN go to stderr, others go to stdout. /// This is the same routing logic used by RawLoggingLayer. - pub fn print_log_record(&self, record: &LogRecord, callsite: &SavedCallsite) { + pub fn raw_print(&self, record: &LogRecord, callsite: &SavedCallsite) { let mut buf = [0u8; LOG_BUFFER_SIZE]; let len = self.write_log_record(&mut buf, record, callsite); self.write_line(callsite.level(), &buf[..len]); } - /// Print a warning about dropped log records. - /// - /// Formatted to look like a regular log record at WARN level. - pub fn print_dropped_warning(&self, dropped_count: u64) { - use std::time::{SystemTime, UNIX_EPOCH}; - - let timestamp_ns = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64; - - let mut buf = [0u8; LOG_BUFFER_SIZE]; - let mut w = Cursor::new(&mut buf[..]); - let cm = self.color_mode; - - cm.write_ansi(&mut w, AnsiCode::Dim); - Self::write_timestamp(&mut w, timestamp_ns); - cm.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b" "); - cm.write_level(&mut w, &Level::WARN); - cm.write_ansi(&mut w, AnsiCode::Bold); - let _ = w.write_all(b"otap_df_telemetry::logs::buffer_overflow"); - cm.write_ansi(&mut w, AnsiCode::Reset); - let _ = write!(w, ": dropped {} log records (buffer full)\n", dropped_count); - - let len = w.position() as usize; - // WARN goes to stderr - let _ = std::io::stderr().write_all(&buf[..len]); - } - /// Write a LogRecord to a byte buffer. Returns the number of bytes written. - pub fn write_log_record( + pub(crate) fn write_log_record( &self, buf: &mut [u8], record: &LogRecord, @@ -346,7 +316,7 @@ impl ConsoleWriter { } /// Write a log line to stdout or stderr. - fn write_line(&self, level: &Level, data: &[u8]) { + pub(crate) fn write_line(&self, level: &Level, data: &[u8]) { let use_stderr = matches!(*level, Level::ERROR | Level::WARN); let _ = if use_stderr { std::io::stderr().write_all(data) @@ -368,7 +338,7 @@ where // so no producer_key context is available. let record = LogRecord::new(event, None); let callsite = SavedCallsite::new(event.metadata()); - self.writer.print_log_record(&record, &callsite); + self.writer.raw_print(&record, &callsite); } // Note! This tracing layer does not implement Span-related features From cbf9f2684a0052acfb7b199ac3d4754b1cb1b6d7 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 13:33:31 -0800 Subject: [PATCH 46/92] wip --- .../crates/telemetry/src/logs.rs | 8 +-- .../crates/telemetry/src/self_tracing.rs | 1 + .../telemetry/src/self_tracing/raw_log.rs | 57 +++++++++++++++++++ 3 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 7c80a3c32f..c6e13153b9 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -166,8 +166,8 @@ impl LogsCollector { Ok(batch) => { self.write_batch(batch); } - Err(_) => { - // TODO: raw log. + Err(err) => { + crate::raw_error!("log collector error: {err}"); return Ok(()); } } @@ -237,8 +237,8 @@ where match self.reporter.try_report(batch) { Ok(()) => {} - Err(_err) => { - // TODO: raw log + Err(err) => { + crate::raw_error!("failed to send log batch: {}", err); } } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 23a205d5d4..ae05bf3f84 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -9,6 +9,7 @@ pub mod encoder; pub mod formatter; +pub mod raw_log; use bytes::Bytes; use encoder::DirectFieldVisitor; diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs new file mode 100644 index 0000000000..c103c329c3 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs @@ -0,0 +1,57 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Raw logging macros that bypass the tracing subscriber. +//! +//! These macros are used for logging when the subscriber infrastructure is not +//! available or may be in an inconsistent state (e.g., during shutdown, when +//! the channel is closed, or during early initialization). +//! +//! The macros temporarily install a dedicated subscriber that writes directly +//! to the console, then use standard tracing macros. This reuses the normal +//! tracing event creation and encoding path. + +use super::formatter::RawLoggingLayer; +use tracing_subscriber::prelude::*; + +/// Create a subscriber that writes directly to console (bypassing channels). +fn raw_logging_subscriber() -> impl tracing::Subscriber { + tracing_subscriber::registry().with(RawLoggingLayer::new(super::ConsoleWriter::no_color())) +} + +/// Execute a closure with a raw logging subscriber that writes directly to console. +#[inline] +pub fn with_raw_logging(f: F) -> R +where + F: FnOnce() -> R, +{ + tracing::subscriber::with_default(raw_logging_subscriber(), f) +} + +/// Log an error message directly to stderr, bypassing the tracing subscriber. +/// +/// For reporting errors in the rrr rsr rs rsr rsr rs sr sr rs rs rs sr rs rs +/// +/// # Example +/// +/// ```ignore +/// use otap_df_telemetry::raw_error; +/// raw_error!("Connection failed: {}", error); +/// ``` +#[macro_export] +macro_rules! raw_error { + ($($arg:tt)+) => { + $crate::self_tracing::raw_log::with_raw_logging(|| { + ::tracing::error!($($arg)+) + }) + }; +} + +#[cfg(test)] +mod tests { + #[test] + fn test_raw_error() { + raw_error!("test error message"); + raw_error!("test error with arg: {}", 42); + } +} From d031e422521c9ddb3a4563a69ca851174358c6f8 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 15:02:57 -0800 Subject: [PATCH 47/92] undoc --- .../crates/telemetry/ARCHITECTURE.md | 198 ----- .../docs/fixed-proto-buffer-design.md | 754 ---------------- .../docs/internal-logs-design.md | 812 ------------------ .../docs/thread-local-telemetry-research.md | 713 --------------- 4 files changed, 2477 deletions(-) delete mode 100644 rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md delete mode 100644 rust/otap-dataflow/docs/fixed-proto-buffer-design.md delete mode 100644 rust/otap-dataflow/docs/internal-logs-design.md delete mode 100644 rust/otap-dataflow/docs/thread-local-telemetry-research.md diff --git a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md b/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md deleted file mode 100644 index 2540b6093e..0000000000 --- a/rust/otap-dataflow/crates/telemetry/ARCHITECTURE.md +++ /dev/null @@ -1,198 +0,0 @@ -# Internal Telemetry Collection Architecture & Development Plan - -## Architecture - -The internal telemetry SDK is designed for the engine to safely -consume its own telemetry, and we intend for the self-hosted telemetry -pipeline to be the standard configuration for all OpenTelemetry -signals. - -Consuming self-generated telemetry presents a potential a kind of -feedback loop, situations where a telemetry pipeline creates pressure -on itself. We have designed for the OTAP dataflow engine to remain -reliable even with this kind of dependency on itself. - -## Internal telemetry receiver - -The Internal Telemetry Receiver or "ITR" is an OTAP-Dataflow receiver -component that produces telemetry from internal sources. An internal -telemetry pipeline consists of one or more ITR components and any of -the connected processor and exporter components reachable from ITR -source nodes. - -To begin with, every OTAP-Dataflow comonent is configured with an -internal telemetry SDK meant for primary instrumentation of that -component. Components are required to exclusively use the internal -telemetry SDK for self-diagnostics, as they are considered first party -in this exchange. - -The internal telemetry receiver is the SDK's counterpart, making it -second party as it is responsible for routing internal telemetry. The -ITR cannot use the internal telemetry SDK itself, making it an -invisible member of the pipeline. The ITR can be instrumented using -third-party instrumentation (e.g., `tracing`, `log` crates) provided -it can guarantee there is no potential for feedback (e.g., a single -`tracing::info()` statement at startup). - -## Pitfall avoidance - -The OTAP-Dataflow engine is safeguarded against many self-induced -telemetry pitfalls, as follows: - -- OTAP-Dataflow components reachable from an ITR cannot be configured - to send to an ITR node. This avoids a direct feedback cycle for - internal telemetry because the components cannot reach - themselves. For example, ITR and downstream components may be - configured for raw logging, no metrics, etc. -- ITR instances share access to one or more threads with associated - async runtime. They use these dedicated threads to isolate internal - telemetry processes that use third-party instrumentation. -- A thread-local variable is used to redirect third-party - instrumentation in dedicated internal telemetry threads. Internal - telemetry threads automatically configure a safe configuration - that drop third-party instrumentation instead of creating feedback. -- Components under observation (non-ITR components) have internal - telemetry events routed to queues in the OTAP-Dataflow pipeline on - the same core, this avoids blocking the engine. First-party - instrumentation will be handled on the CPU core that produced the - telemetry under normal circumstances. This isolates cores that are - able to process their own internal telemetry. -- Option to configure internal telemetry multiple ways, including the - no-op implementation, multi-threaded subscriber, routing to the - same-core ITR, and/or raw logging. - -## OTLP-bytes first - -As a key design decision, the OTAP-Dataflow internal telemetry data -path produces OTLP-bytes first. Because OTLP bytes is one of the -builtin `OtapPayload` formats, once we have the OTLP bytes encoding of -an event we are able to send to an OTAP-Dataflow pipeline. To obtain -these bytes, we will build a custom [Tokio `tracing` -Event][TOKIOEVENT] handler to produce OTLP bytes before dispatching to -an internal pipeline, used (in different configurations) for first and -third-party instrumentation. - -We use an intermediate representation in which the dynamic elements of -the `tracing` event are encoded while primtive fields and metadata -remain in structured form. These are encoded using the OTLP -`opentelemetry.proto.logs.v1.LogRecord` protocol. - -[TOKIOEVENT]: https://docs.rs/tracing/latest/tracing/struct.Event.html - -## Raw logging - -We support formatting events for direct printing to the console from -OTLP bytes. For the dynamic encoding, these are consumed using -`otap_df_pdata::views::logs::LogsDataView`, our zero-copy accessor. We -refer to this most-basic form of printing to the console as raw -logging because it is a safe configuration early in the lifetime of a -process. Note that the views implementation - -This configuration is meant for development purposes, it is likely to -introduce contention over the console. - -## Routing - -The two internal logs data paths are: - -- Third-party: Tokio `tracing` global subscriber: third-party log - events, instrumentation in code without access to an OTAP-Dataflow - `EffectHandler`. These are handled in a dedicated internal telemetry - thread. -- First-party: components with a local or shared `EffectHandler` use - dedicated macros (e.g., `otel_info!(effect, "interesting thing")`), - these use the configured internal telemetry SDK and for ordinary - components (not ITR-downstream) these are routed through the ITR the - same core. These are always non-blocking APIs, the internal SDK must - drop logs instead of blocking the pipeline. - -## Development plan - -Each of the items below is relatively small, estimated at 300-500 -lines of new code plus new tests. - -### LogRecord: Tokio tracing Event and Metadata to LogRecordView - -When we receive a Tokio tracing event whether through a -`tracing::info!` macro (or similar) or through a dedicated -`EffectHandler`-based API, the same happens: - -Create a `LogRecord`, a struct derived from `tracing::Event` and -`tracing::Metadata`, containing raw LogRecord fields extracted from -the tracing macro layer plus a fresh timestamp. Log record attributes -and the log event body are encoded as the "attributes and body bytes" -field of `LogRecord`, the other fields are copied. - -With this record, we can defer formatting or encoding the entire -record until later. We can: - -- For raw logging, format directly for the console -- Finish the full OTLP bytes encoding for the `LogRecord` -- Sort and filter before combining into a `LogsData`. - -### OTLP-bytes console logging handler - -We require a way to print OTLP bytes as human-readable log lines. We -cannot easily re-use the Tokio `tracing` format layer for this, -however we can use the `LogsDataView` trait with `RawLogsData` to -format human-readable text for the console directly from OTLP bytes. - -This OTLP-bytes-to-human-readable logic will be used to implement raw -logging. - -### Global logs collection thread - -An OTAP-Dataflow engine will run at least one global logs collection -thread. These threads receive encoded (OTLP bytes) log events from -various locations in the process. The global logs collection thread is -special because it sets a special anti-recursion bit in the -thread-local state to prevent logging in its own export path - -The global logs collection thread is configured as one (or more, if -needed) instances consuming logs from the global Tokio `tracing` -subscriber. In this thread, we'll configure the OpenTelemetry SDK or a -dedicated OTAP-Dataflow pipeline (by configuration) for logs export. - -Because global logs collection threads are used as a fallback for -`EffectHandler`-level logs and because third-party libraries generally -could call Tokio `tracing` APIs, we arrange to explicitly disallow -these threads from logging. The macros are disabled from executing. - -### Global and Per-core Event Router - -OTAP-Dataflow provides an option to route internal telemetry to a pipeline -in the same effect handler that produced the telemetry. When a component -logging API is used on the `EffectHandler` or when a tokio `tracing` event -occurs on the `EffectHandler` thread, it will be routed using thread-local -state so that event is immediately encoded and stored or flushed, without -blocking the effect handler. - -When a telemetry event is routed directly, as in this case and -`send_message()` succeeds, it means there was queue space to accept -the log record on the same core. When this fails, the configurable -telemetry router will support options to use global logs collection -thread, a raw logger, or do nothing (dropping the internal log -record). - -## Example configuration - -```yaml -service: - telemetry: - logs: - level: info - internal_collection: - enabled: true - - # Per-thread buffer - buffer_size_bytes: 65536 - - # Individual record size limit - max_record_bytes: 16384 - - # Bounded channel capacity - max_record_count: 10 - - # Timer-based flush interval - flush_interval: "1s" -``` diff --git a/rust/otap-dataflow/docs/fixed-proto-buffer-design.md b/rust/otap-dataflow/docs/fixed-proto-buffer-design.md deleted file mode 100644 index a86d0eec8e..0000000000 --- a/rust/otap-dataflow/docs/fixed-proto-buffer-design.md +++ /dev/null @@ -1,754 +0,0 @@ -# Fixed-Size Proto Buffer Design - -## Problem Statement - -The OTAP dataflow pipeline requires an internal logging path for self-diagnostics that feeds OTLP protocol bytes directly into the pipeline. This internal instrumentation has specific constraints: - -1. **Safety**: Internal logging must not cause heap allocations that could interfere with the main data path or cause memory pressure during high-load scenarios. - -2. **Low Impact**: The encoding path must be lightweight and predictable, suitable for use in hot paths like `tracing::info!` statements. - -3. **Fixed-Size Buffers**: For stack-allocated buffers with a predetermined capacity, the encoder must handle out-of-space conditions gracefully rather than panicking or reallocating. - -4. **Truncation Support**: When encoding attributes into a fixed buffer, if space runs out mid-encoding (e.g., while looping through event variables), the encoder should: - - Return a "truncated" error result - - Allow callers to use the partially-encoded contents - - Enable tracking of dropped attributes - -5. **Code Reuse**: We explicitly want to avoid maintaining two separate encoder implementations—one for growable buffers (normal telemetry path) and one for fixed-size buffers (internal instrumentation). - -6. **Graceful Degradation**: Even for growable buffers, we want configurable limits to prevent unbounded growth from malformed or malicious data. Large attribute values and log bodies should be truncated gracefully with informative markers. - -### OTLP Protocol Support - -The OpenTelemetry LogRecord proto already provides mechanisms for handling truncation: - -```protobuf -message LogRecord { - // ... other fields ... - uint32 dropped_attributes_count = 7; // Track dropped attributes - fixed32 flags = 8; // 5 bytes total (tag + fixed32) -} -``` - -This means we can: -- Reserve 5 bytes at the end of our encoding buffer for `dropped_attributes_count` -- Encode as many attributes as fit -- On truncation, count remaining attributes and encode the count in the reserved space - -### Example Use Case - -```rust -// During a tracing::info! statement, encode log attributes into a fixed buffer -let mut buf = FixedProtoBuffer::<1024>::new(); - -// Reserve space for dropped_attributes_count (tag=7 varint + uint32 varint = ~5 bytes) -buf.reserve_tail(5); - -let mut encoded_count = 0; -for attr in event_attributes { - if encode_key_value(&mut buf, attr).is_err() { - // Truncation occurred - use partial contents - break; - } - encoded_count += 1; -} - -// Release reserved space and encode dropped count -let dropped_count = event_attributes.len() - encoded_count; -buf.release_tail(5); -if dropped_count > 0 { - buf.encode_field_tag(7, WIRE_TYPE_VARINT); - buf.encode_varint(dropped_count as u64); -} -``` - -## Solution - -### Design Approach - -The solution introduces a `ProtoWrite` trait that abstracts over buffer implementations, allowing encoding logic to work with both growable (`ProtoBuffer`) and fixed-size (`FixedProtoBuffer`) buffers through the same code path. - -### Core Concepts - -#### Buffer Space Model - -``` -|-------- written --------|----- remaining -----|---- reserved ----| - ^ ^ - len limit - reserved_tail - -effective_remaining = limit - len - reserved_tail -``` - -- **limit**: Maximum bytes that can be written (may be less than capacity) -- **reserved_tail**: Bytes reserved at the end for fields like `dropped_attributes_count` -- **effective_remaining**: Actual bytes available for the next write operation - -#### Length Placeholder Optimization - -When encoding nested messages, we don't know the size upfront, so we reserve placeholder bytes for the length varint and patch them afterward. The number of bytes needed depends on the maximum possible message size: - -| Buffer Limit | Max Length | Varint Bytes | Savings vs 4-byte | -|-------------|------------|--------------|-------------------| -| ≤ 127 B | 127 | 1 byte | 75% | -| ≤ 16 KiB | 16383 | 2 bytes | 50% | -| ≤ 2 MiB | 2097151 | 3 bytes | 25% | -| > 2 MiB | 2^28-1 | 4 bytes | 0% | - -For internal instrumentation with small fixed buffers (e.g., 1-4 KiB), using 2-byte placeholders instead of 4-byte saves significant space, especially in deeply nested structures like attributes within log records within scope logs within resource logs. - -**Example savings for a LogRecord with 10 nested messages:** -- 4-byte placeholders: 40 bytes overhead -- 2-byte placeholders: 20 bytes overhead -- Savings: 20 bytes (could fit another small attribute!) - -#### `LengthPlaceholderSize` Enum - -```rust -/// Determines how many bytes to reserve for length placeholders in nested messages. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum LengthPlaceholderSize { - /// 1 byte - for buffers ≤ 127 bytes (max length = 127) - OneByte, - /// 2 bytes - for buffers ≤ 16 KiB (max length = 16383) - TwoBytes, - /// 3 bytes - for buffers ≤ 2 MiB (max length = 2097151) - ThreeBytes, - /// 4 bytes - for larger buffers (max length = 268435455) - #[default] - FourBytes, -} - -impl LengthPlaceholderSize { - /// Choose the optimal placeholder size for a given buffer limit. - pub const fn for_limit(limit: usize) -> Self { - if limit <= 127 { - Self::OneByte - } else if limit <= 16383 { - Self::TwoBytes - } else if limit <= 2097151 { - Self::ThreeBytes - } else { - Self::FourBytes - } - } - - /// Number of bytes this placeholder uses. - pub const fn num_bytes(self) -> usize { - match self { - Self::OneByte => 1, - Self::TwoBytes => 2, - Self::ThreeBytes => 3, - Self::FourBytes => 4, - } - } - - /// Maximum length that can be encoded with this placeholder size. - pub const fn max_length(self) -> usize { - match self { - Self::OneByte => 127, - Self::TwoBytes => 16383, - Self::ThreeBytes => 2097151, - Self::FourBytes => 268435455, - } - } - - /// Encode a zero-padded length placeholder. - /// Returns the bytes to write. - pub const fn placeholder_bytes(self) -> &'static [u8] { - match self { - Self::OneByte => &[0x00], - Self::TwoBytes => &[0x80, 0x00], - Self::ThreeBytes => &[0x80, 0x80, 0x00], - Self::FourBytes => &[0x80, 0x80, 0x80, 0x00], - } - } -} -``` - -#### Dispatch Mechanism - -**Dynamic sizing based on remaining capacity:** - -The placeholder size only needs to accommodate the *remaining buffer space*. When writing a length placeholder, we check how much space is left and choose the smallest sufficient placeholder: - -```rust -/// Returned from write_length_placeholder, used to patch the length later. -#[derive(Clone, Copy)] -pub struct LengthPlaceholder { - pub offset: usize, - pub size: LengthPlaceholderSize, -} - -fn write_length_placeholder(&mut self) -> Result { - let offset = self.len(); - let remaining = self.capacity() - offset - self.reserved_tail; - let size = LengthPlaceholderSize::for_limit(remaining); - self.write_bytes(size.placeholder_bytes())?; - Ok(LengthPlaceholder { offset, size }) -} - -fn patch_length_placeholder(&mut self, placeholder: LengthPlaceholder, length: usize) { - let slice = self.as_mut_slice(); - for i in 0..placeholder.size.num_bytes() { - slice[placeholder.offset + i] += ((length >> (i * 7)) & 0x7f) as u8; - } -} -``` - -**Usage in macro:** - -```rust -macro_rules! proto_encode_len_delimited_try { - ($buf:expr, $tag:expr, $encode_fn:expr) => {{ - proto_encode_varint($buf, $tag); - let placeholder = $buf.write_length_placeholder()?; // returns LengthPlaceholder - let start = $buf.len(); - $encode_fn; - let length = $buf.len() - start; - $buf.patch_length_placeholder(placeholder, length); // uses stored offset + size - }}; -} -``` - -**Benefits:** - -- **No configuration needed**: The encoder automatically chooses optimal sizes -- **Simple**: The placeholder struct is just 2 usizes on the stack -- **Optimal**: Uses smallest sufficient placeholder for remaining space - -**Example progression in a 4 KiB buffer:** - -| Write # | Position | Remaining | Placeholder Size | Overhead | -|---------|----------|-----------|------------------|----------| -| 1 | 0 | 4096 | 2 bytes | 2 | -| 2 | 100 | 3996 | 2 bytes | 2 | -| 3 | 3900 | 196 | 2 bytes | 2 | -| 4 | 4000 | 96 | 1 byte | 1 | - -### New Types - -#### `Truncated` Error - -A simple, lightweight error type indicating a fixed-size buffer ran out of space: - -```rust -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct Truncated; -``` - -This error is: -- Zero-sized (no runtime overhead) -- Copyable (can be returned by value) -- Convertible to the main `Error` type via `From` - -#### `StringTruncation` Result - -Information about how a string was truncated: - -```rust -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct StringTruncation { - /// Number of bytes actually written (including any marker) - pub bytes_written: usize, - /// Number of bytes from the original string that were truncated - pub bytes_truncated: usize, -} - -impl StringTruncation { - pub fn none() -> Self { - Self { bytes_written: 0, bytes_truncated: 0 } - } - - pub fn was_truncated(&self) -> bool { - self.bytes_truncated > 0 - } -} -``` - -#### `ProtoWrite` Trait - -The trait defines the core buffer operations with fallible semantics: - -```rust -pub trait ProtoWrite { - // === Core required methods === - - /// Append bytes to the buffer. Returns Err(Truncated) if insufficient capacity. - fn write_bytes(&mut self, bytes: &[u8]) -> Result<(), Truncated>; - - /// Current length of encoded data. - fn len(&self) -> usize; - - /// Get a reference to the encoded bytes. - fn as_slice(&self) -> &[u8]; - - /// Get a mutable reference for patching length placeholders. - fn as_mut_slice(&mut self) -> &mut [u8]; - - /// Clear the buffer contents (does not clear reserved_tail or limit). - fn clear(&mut self); - - /// Physical capacity of the buffer. - fn capacity(&self) -> usize; - - // === Limit and reservation management === - - /// Set a soft limit on buffer size. For fixed buffers, clamped to capacity. - /// For growable buffers, prevents growth beyond this point. - /// Also updates the length placeholder size to match the new limit. - fn set_limit(&mut self, limit: usize); - - /// Get current limit (defaults to capacity for fixed, usize::MAX for growable). - fn limit(&self) -> usize; - - /// Reserve bytes at the end of the buffer for later use. - /// Returns the new effective remaining space. - /// This space is protected from writes until released. - fn reserve_tail(&mut self, bytes: usize) -> usize; - - /// Release previously reserved tail bytes, making them available for writing. - fn release_tail(&mut self, bytes: usize); - - /// Get current tail reservation. - fn reserved_tail(&self) -> usize; - - /// Bytes available for writing: limit - len - reserved_tail - fn remaining(&self) -> usize { - self.limit() - .saturating_sub(self.len()) - .saturating_sub(self.reserved_tail()) - } - - // === Length placeholder configuration === - - /// Get the length placeholder size for this buffer. - /// Determined by the buffer's limit. - fn length_placeholder_size(&self) -> LengthPlaceholderSize { - LengthPlaceholderSize::for_limit(self.limit()) - } - - /// Override the length placeholder size. - /// Useful when you know nested messages will be small even in a large buffer. - fn set_length_placeholder_size(&mut self, size: LengthPlaceholderSize); - - /// Write the length placeholder bytes and return the position where length starts. - fn write_length_placeholder(&mut self) -> Result { - let pos = self.len(); - let placeholder = self.length_placeholder_size().placeholder_bytes(); - self.write_bytes(placeholder)?; - Ok(pos) - } - - /// Patch a previously written length placeholder with the actual length. - fn patch_length_placeholder(&mut self, len_start_pos: usize, length: usize) { - let num_bytes = self.length_placeholder_size().num_bytes(); - let slice = self.as_mut_slice(); - for i in 0..num_bytes { - slice[len_start_pos + i] += ((length >> (i * 7)) & 0x7f) as u8; - } - } - - // === Encoding methods with default implementations === - - fn encode_varint(&mut self, value: u64) -> Result<(), Truncated>; - fn encode_field_tag(&mut self, field_number: u64, wire_type: u64) -> Result<(), Truncated>; - fn encode_sint32(&mut self, value: i32) -> Result<(), Truncated>; - fn encode_string(&mut self, field_tag: u64, val: &str) -> Result<(), Truncated>; - fn encode_bytes_field(&mut self, field_tag: u64, val: &[u8]) -> Result<(), Truncated>; - - // === Truncating string encoder === - - /// Encode a string field, truncating if necessary to fit in available space. - /// - /// If the full string doesn't fit, truncates and appends the marker. - /// The marker should be a short fixed string like "..." or "[TRUNCATED]". - /// - /// Returns information about what was written and truncated. - /// Returns Err(Truncated) only if even the field tag + minimal content won't fit. - fn encode_string_truncated( - &mut self, - field_tag: u64, - val: &str, - marker: &str, - ) -> Result; -} -``` - -#### `FixedProtoBuffer` - -A stack-allocatable, fixed-size buffer: - -```rust -pub struct FixedProtoBuffer { - buffer: [u8; N], - len: usize, - reserved_tail: usize, - placeholder_size: LengthPlaceholderSize, -} - -impl FixedProtoBuffer { - pub const fn new() -> Self { - Self { - buffer: [0u8; N], - len: 0, - reserved_tail: 0, - // Computed at compile time based on N - placeholder_size: LengthPlaceholderSize::for_limit(N), - } - } -} -``` - -Key properties: -- **No heap allocation**: The buffer is a fixed-size array -- **Automatic placeholder sizing**: `LengthPlaceholderSize` is determined from `N` at compile time -- **Atomic writes where possible**: `write_bytes` checks capacity before writing -- **Truncation-safe**: Returns `Err(Truncated)` instead of panicking -- **Limit equals capacity**: `set_limit` is a no-op (or clamps to capacity) - -#### Updated `ProtoBuffer` - -The growable buffer gains limit, reservation, and placeholder size support: - -```rust -pub struct ProtoBuffer { - buffer: Vec, - limit: usize, // Default: usize::MAX (unlimited) - reserved_tail: usize, // Default: 0 - placeholder_size: LengthPlaceholderSize, // Default: FourBytes -} -``` - -Key properties: -- **Configurable limit**: Prevents unbounded growth -- **Configurable placeholder size**: Can use 2-byte placeholders when limit is set appropriately -- **Truncation on limit**: Returns `Err(Truncated)` when limit reached (no realloc) -- **Backward compatible**: Default limit is unlimited, default placeholder is 4 bytes - -### String Truncation Behavior - -The `encode_string_truncated` method implements graceful truncation: - -```rust -fn encode_string_truncated( - &mut self, - field_tag: u64, - val: &str, - marker: &str, // e.g., "..." or "[TRUNCATED]" -) -> Result { - let tag_len = varint_len((field_tag << 3) | WIRE_TYPE_LEN); - let full_len = tag_len + varint_len(val.len()) + val.len(); - - // Check if full string fits - if full_len <= self.remaining() { - self.encode_string(field_tag, val)?; - return Ok(StringTruncation::none()); - } - - // Calculate how much of the string we can fit with marker - let marker_bytes = marker.as_bytes(); - let available = self.remaining(); - - // Need at least: tag + length(1 byte min) + marker - let min_needed = tag_len + 1 + marker_bytes.len(); - if available < min_needed { - return Err(Truncated); - } - - // Calculate truncated string length - let max_content = available - tag_len - 1; // Assuming 1-byte length varint - let truncated_str_len = max_content.saturating_sub(marker_bytes.len()); - - // Find UTF-8 safe truncation point - let truncated_str = truncate_utf8_safe(val, truncated_str_len); - let bytes_truncated = val.len() - truncated_str.len(); - - // Build the truncated content: truncated_str + marker - let total_content_len = truncated_str.len() + marker_bytes.len(); - - self.encode_field_tag(field_tag, WIRE_TYPE_LEN)?; - self.encode_varint(total_content_len as u64)?; - self.write_bytes(truncated_str.as_bytes())?; - self.write_bytes(marker_bytes)?; - - Ok(StringTruncation { - bytes_written: tag_len + varint_len(total_content_len) + total_content_len, - bytes_truncated, - }) -} - -/// Truncate a string at a UTF-8 safe boundary -fn truncate_utf8_safe(s: &str, max_bytes: usize) -> &str { - if max_bytes >= s.len() { - return s; - } - // Find the last valid UTF-8 char boundary at or before max_bytes - let mut end = max_bytes; - while end > 0 && !s.is_char_boundary(end) { - end -= 1; - } - &s[..end] -} -``` - -### Backward Compatibility - -The existing `ProtoBuffer` type retains its infallible inherent methods: - -```rust -impl ProtoBuffer { - // These remain infallible (no Result return type) when limit is unlimited - pub fn encode_varint(&mut self, value: u64) { ... } - pub fn encode_string(&mut self, field_tag: u64, val: &str) { ... } - // ... etc -} - -// Additionally implements ProtoWrite (may return Err if limit set) -impl ProtoWrite for ProtoBuffer { ... } -``` - -This means: -- All existing code using `ProtoBuffer` continues to work unchanged -- New generic code can use `impl ProtoWrite` to work with either buffer type -- Setting a limit on `ProtoBuffer` enables truncation behavior - -### Macro Support - -The macros now use the buffer's configured `LengthPlaceholderSize`: - -1. **`proto_encode_len_delimited_unknown_size!`** (existing, updated) - - Uses infallible helpers for `ProtoBuffer` - - Uses the buffer's `length_placeholder_size()` instead of hardcoded 4 bytes - - No error propagation needed - -2. **`proto_encode_len_delimited_try!`** (new) - - For use with generic `ProtoWrite` code - - Uses the buffer's `length_placeholder_size()` - - Propagates `Truncated` errors via `?` - - Returns `Result<(), Truncated>` - -```rust -/// Updated macro using configurable placeholder size -#[macro_export] -macro_rules! proto_encode_len_delimited_try { - ($field_tag: expr, $encode_fn:expr, $buf:expr) => {{ - use $crate::otlp::ProtoWrite; - $buf.encode_field_tag($field_tag, $crate::proto::consts::wire_types::LEN)?; - let len_start_pos = $buf.write_length_placeholder()?; - $encode_fn; - let num_bytes = $buf.length_placeholder_size().num_bytes(); - let len = $buf.len() - len_start_pos - num_bytes; - $buf.patch_length_placeholder(len_start_pos, len); - Ok::<(), $crate::error::Truncated>(()) - }}; -} -``` - -## Usage Examples - -### Generic Encoding Function - -```rust -use otap_df_pdata::otlp::{ProtoWrite, Truncated}; - -fn encode_attributes( - buf: &mut W, - attrs: &[KeyValue], -) -> Result { - let mut count = 0; - for attr in attrs { - buf.encode_string(KEY_TAG, &attr.key)?; - buf.encode_string(VALUE_TAG, &attr.value)?; - count += 1; - } - Ok(count) -} -``` - -### Fixed Buffer with Reserved Space for Dropped Count - -```rust -use otap_df_pdata::otlp::{FixedProtoBuffer, ProtoWrite, Truncated}; - -fn encode_log_record(attrs: &[KeyValue], body: &str) -> Vec { - let mut buf = FixedProtoBuffer::<2048>::new(); - - // Reserve 5 bytes for dropped_attributes_count (tag + varint) - buf.reserve_tail(5); - - // Encode body with truncation marker - let body_result = buf.encode_string_truncated( - LOG_RECORD_BODY, - body, - "...[truncated]" - ); - - // Encode attributes until we run out of space - let mut encoded = 0; - for attr in attrs { - match encode_key_value(&mut buf, attr) { - Ok(()) => encoded += 1, - Err(Truncated) => break, - } - } - - // Release reserved space and encode dropped count - let dropped = attrs.len() - encoded; - buf.release_tail(5); - - if dropped > 0 { - let _ = buf.encode_field_tag(DROPPED_ATTRIBUTES_COUNT, WIRE_TYPE_VARINT); - let _ = buf.encode_varint(dropped as u64); - } - - buf.as_slice().to_vec() -} -``` - -### Limiting Growable Buffer - -```rust -use otap_df_pdata::otlp::{ProtoBuffer, ProtoWrite, LengthPlaceholderSize}; - -fn encode_with_limit(data: &LargeData) -> Result { - let mut buf = ProtoBuffer::new(); - - // Prevent unbounded growth - limit to 16KB - // This automatically sets placeholder size to TwoBytes - buf.set_limit(16 * 1024); - - // Or explicitly use small placeholders for even smaller limits - // buf.set_limit(4 * 1024); - // buf.set_length_placeholder_size(LengthPlaceholderSize::TwoBytes); - - // Reserve space for metadata at the end - buf.reserve_tail(64); - - // Encode potentially large content with truncation - for item in &data.items { - buf.encode_string_truncated(ITEM_TAG, &item.value, "...")?; - } - - // Add metadata in reserved space - buf.release_tail(64); - encode_metadata(&mut buf, data)?; - - Ok(buf.into_bytes()) -} -``` - -### Space-Efficient Small Buffer - -```rust -use otap_df_pdata::otlp::FixedProtoBuffer; - -fn encode_compact_log() { - // 4KB buffer automatically uses 2-byte length placeholders - let mut buf = FixedProtoBuffer::<4096>::new(); - - assert_eq!(buf.length_placeholder_size().num_bytes(), 2); - - // Each nested message saves 2 bytes compared to 4-byte placeholders! - // In a LogRecord with 10 nested structures, that's 20 bytes saved. -} -``` - -### Body Truncation with Byte Count - -For cases where you want to include the byte count in the truncation marker: - -```rust -fn encode_body_with_count(buf: &mut W, body: &str) -> StringTruncation { - // First attempt with simple marker - match buf.encode_string_truncated(LOG_RECORD_BODY, body, "...") { - Ok(info) => { - if info.was_truncated() { - // Log the truncation details for observability - // The bytes_truncated field tells us exactly how much was lost - tracing::debug!( - truncated_bytes = info.bytes_truncated, - "Log body truncated" - ); - } - info - } - Err(Truncated) => { - // Couldn't fit even minimal content - StringTruncation { bytes_written: 0, bytes_truncated: body.len() } - } - } -} -``` - -## Design Rationale - -### Why Configurable Length Placeholder Size? - -The protobuf wire format uses varints for length-delimited field lengths. Since we encode nested messages without knowing their size upfront, we reserve placeholder bytes and patch them later. - -The problem: varints are variable-length! A length of 127 needs 1 byte, but 128 needs 2 bytes. Our solution uses zero-padded varints where each byte has its continuation bit set until the final byte. - -For a 4 KiB buffer, no nested message can exceed 4096 bytes, which fits in a 2-byte varint. Using 4-byte placeholders wastes 2 bytes per nested message. In a typical LogRecord with its nested structure: - -``` -ResourceLogs [4 bytes wasted] - └─ ScopeLogs [4 bytes wasted if 4-byte, 2 bytes if 2-byte] - └─ LogRecord [...] - ├─ Body (AnyValue) - └─ Attributes (repeated KeyValue) - └─ Value (AnyValue) -``` - -With 10 attributes, that's potentially 20+ extra bytes wasted—space that could hold another attribute! - -### Why Reserve Tail Space? - -The `reserve_tail` mechanism ensures that critical fields like `dropped_attributes_count` can always be encoded, even when the buffer is nearly full. Without this: - -1. We might fill the buffer completely with attributes -2. Then have no room to record that we dropped some -3. The receiver would have no indication of data loss - -### Why Truncate Strings vs. Drop Entirely? - -Truncated data with a marker is often more useful than no data: -- A truncated log message still conveys intent -- A truncated attribute value may still be useful for filtering/grouping -- The marker makes it clear that truncation occurred - -### Why UTF-8 Safe Truncation? - -Truncating in the middle of a multi-byte UTF-8 character would produce invalid UTF-8, which could cause issues downstream. The `truncate_utf8_safe` function ensures we always produce valid UTF-8. - -### Why Configurable Limits for Growable Buffers? - -Even in the "normal" path, we want protection against: -- Malformed data causing unbounded memory growth -- DoS attacks via large payloads -- Accidental memory exhaustion from unexpectedly large telemetry - -## File Changes - -| File | Changes | -|------|---------| -| `crates/pdata/src/error.rs` | Added `Truncated` error type with `Display` and `Error` impls | -| `crates/pdata/src/otlp/common.rs` | Added `ProtoWrite` trait, `FixedProtoBuffer`, `StringTruncation`, `LengthPlaceholderSize`, updated `ProtoBuffer` with limit/reservation/placeholder fields, helper functions, updated macros | -| `crates/pdata/src/otlp/mod.rs` | Export `ProtoWrite`, `FixedProtoBuffer`, `StringTruncation`, `LengthPlaceholderSize`, `Truncated` | - -## Testing - -The implementation includes comprehensive tests covering: - -- Basic `FixedProtoBuffer` operations -- Truncation behavior for various encoding operations -- Varint encoding with partial writes -- Generic function usage with both buffer types -- Backward compatibility of `ProtoBuffer` inherent methods -- Partial content availability after truncation -- String truncation with UTF-8 safety -- Reserved tail space behavior -- Limit enforcement for growable buffers - -All existing tests continue to pass, plus new tests for the added functionality. diff --git a/rust/otap-dataflow/docs/internal-logs-design.md b/rust/otap-dataflow/docs/internal-logs-design.md deleted file mode 100644 index 1431b32717..0000000000 --- a/rust/otap-dataflow/docs/internal-logs-design.md +++ /dev/null @@ -1,812 +0,0 @@ -# Internal Logs Collection Design - -This document describes the internal logging architecture for OTAP-Dataflow, -enabling first-party and third-party log events to be captured, buffered, -and routed without creating feedback loops. - -## Goals - -1. **Unified capture**: Both first-party (`otel_info!`) and third-party - (`tracing::info!`) log events are captured in the same buffer -2. **Per-core buffering**: Each EffectHandler thread accumulates logs in - its own heap-allocated buffer, avoiding cross-thread contention -3. **No feedback loops**: The global telemetry collection thread cannot - create log events that cycle back through the system -4. **Non-blocking**: Log emission never blocks the EffectHandler thread -5. **Configurable routing**: Buffered logs can be sent to the global - collector, routed through an Internal Telemetry Receiver (ITR) - pipeline, or both - -## Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ EffectHandler Thread (one per core/pipeline node) │ -│ │ -│ ┌────────────────────────┐ ┌──────────────────────────────────┐ │ -│ │ EffectHandlerCore │ │ Thread-Local State │ │ -│ │ │ │ │ │ -│ │ log_buffer: LogBuffer ├───────►│ CURRENT_BUFFER: *mut LogBuffer │ │ -│ │ (heap: 128KB-1MB) │ │ │ │ -│ └────────────────────────┘ └──────────────┬───────────────────┘ │ -│ │ │ │ -│ │ │ │ -│ ┌───────┴───────┐ │ │ -│ │ │ │ │ -│ ▼ ▼ ▼ │ -│ otel_info! tracing::info! BufferWriterLayer │ -│ (first-party) (third-party) (global Subscriber) │ -│ │ │ │ │ -│ │ └───────────────────────────────┘ │ -│ │ │ │ -│ │ ▼ │ -│ │ ┌──────────────────────┐ │ -│ └─────────────►│ log_buffer.push() │ │ -│ └──────────────────────┘ │ -│ │ -│ On timer tick: flush buffer ──────────────────────────────────────────────┼──┐ -└─────────────────────────────────────────────────────────────────────────────┘ │ - │ - ┌───────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Global Telemetry Thread │ -│ │ -│ Subscriber: stderr-only or NoSubscriber (NO BufferWriterLayer) │ -│ │ -│ ┌─────────────────────┐ ┌─────────────────────┐ │ -│ │ LogsRegistry │ │ ITR Pipeline │ │ -│ │ (ring buffer for │ │ (OTLP export, │ │ -│ │ /logs endpoint) │ │ processing, etc.) │ │ -│ └─────────────────────┘ └─────────────────────┘ │ -│ │ -│ tracing::info!("...") → stderr (safe, no feedback) │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -## Key Components - -### 1. LogBuffer - -A heap-allocated ring buffer owned by each EffectHandler. Log records are -encoded to OTLP bytes before storage. - -```rust -pub struct LogBuffer { - /// Heap-allocated storage (e.g., 128KB to 1MB) - data: Box<[u8]>, - - /// Ring buffer state - write_pos: usize, - read_pos: usize, - - /// Statistics - record_count: usize, - dropped_count: usize, -} -``` - -**Behavior:** -- Fixed capacity, configured at startup -- When full, oldest records are evicted (ring buffer semantics) -- Tracks dropped record count for observability -- Non-blocking push operation - -### 2. Thread-Local Buffer Pointer - -A thread-local variable provides the bridge between the tracing subscriber -and the EffectHandler's buffer. - -```rust -thread_local! { - static CURRENT_BUFFER: Cell>> = const { Cell::new(None) }; -} -``` - -**Lifecycle:** -1. EffectHandler calls `install_buffer()` when its thread starts -2. Thread-local points to the handler's `log_buffer` -3. EffectHandler calls (or guard drops) `uninstall_buffer()` on shutdown -4. Thread-local is cleared, subsequent events are dropped - -### 3. BufferWriterLayer - -A `tracing_subscriber::Layer` installed as part of the global subscriber. -It writes events to whichever buffer is installed in the current thread. - -```rust -impl Layer for BufferWriterLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, -{ - fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { - CURRENT_BUFFER.with(|c| { - if let Some(mut ptr) = c.get() { - let buffer = unsafe { ptr.as_mut() }; - let encoded = encode_event_to_otlp(event, &ctx); - buffer.push(&encoded); - } - // No buffer installed: event is dropped - }); - } - - fn enabled(&self, _metadata: &Metadata<'_>, _ctx: Context<'_, S>) -> bool { - // Only process events if a buffer is installed - CURRENT_BUFFER.with(|c| c.get().is_some()) - } -} -``` - -### 4. Global Telemetry Thread - -A dedicated thread for collecting logs from all EffectHandler threads and -routing them to their destinations. This thread uses a **different** -subscriber that does not include `BufferWriterLayer`. - -```rust -pub fn spawn_global_telemetry_thread() -> JoinHandle<()> { - std::thread::spawn(|| { - // Safe subscriber: stderr only, or completely silent - let safe_subscriber = tracing_subscriber::fmt() - .with_writer(std::io::stderr) - .with_max_level(tracing::Level::WARN) - .finish(); - - // Override the default subscriber for this thread only - tracing::subscriber::with_default(safe_subscriber, || { - // Any tracing::info! in here goes to stderr - // NOT back through BufferWriterLayer - run_collection_loop(); - }); - }) -} -``` - -## Event Flow - -### First-Party Events (otel_info!, etc.) - -Code with access to the EffectHandler can log directly: - -```rust -impl EffectHandlerCore { - pub fn log_info(&mut self, name: &str, attrs: &[(&str, &dyn Debug)]) { - let encoded = encode_log_record(Level::INFO, name, attrs); - self.log_buffer.push(&encoded); - } -} - -// Usage in a receiver/processor/exporter: -effect_handler.log_info("batch.processed", &[ - ("count", &batch.len()), - ("duration_ms", &elapsed.as_millis()), -]); -``` - -### Third-Party Events (tracing::info!, etc.) - -Library code or deeply nested code without EffectHandler access: - -```rust -// Somewhere in a library -tracing::info!(records = count, "Parsed input"); - -// Flow: -// 1. tracing::info! → global subscriber → BufferWriterLayer::on_event() -// 2. BufferWriterLayer reads CURRENT_BUFFER thread-local -// 3. If set, encodes event and pushes to that buffer -// 4. If not set (wrong thread), event is dropped -``` - -### Buffer Flush - -EffectHandlers periodically flush their buffers: - -```rust -impl EffectHandlerCore { - pub async fn flush_logs(&mut self) -> Result<(), Error> { - let logs = self.log_buffer.drain(); - if logs.is_empty() { - return Ok(()); - } - - // Send to global collector via channel - self.log_sender.send(logs).await?; - - Ok(()) - } -} -``` - -The flush can be triggered by: -- Timer tick (e.g., every 1 second) -- Buffer reaching high-water mark -- Explicit flush request from pipeline - -## Feedback Loop Prevention - -The architecture prevents feedback loops through subscriber isolation: - -| Thread Type | Subscriber | BufferWriterLayer? | Effect of `tracing::info!` | -|-------------|------------|-------------------|---------------------------| -| EffectHandler | Global (with BufferWriterLayer) | Yes, buffer installed | Written to handler's buffer | -| Global Telemetry | Thread-local override (stderr/noop) | No | Stderr or dropped | -| Other | Global (with BufferWriterLayer) | No buffer installed | Dropped | - -**Why this prevents cycles:** - -1. EffectHandler thread emits `otel_info!("something")` -2. Event is buffered locally (no channel send yet) -3. On timer, buffer is flushed to global telemetry thread via channel -4. Global thread receives the event -5. If global thread calls `tracing::info!()` while processing: - - Its subscriber is the stderr/noop override - - BufferWriterLayer is NOT in its subscriber stack - - Event goes to stderr (or nowhere), NOT back to a buffer - - No channel send, no cycle - -## Encoding Format - -Log records are encoded to OTLP bytes (`opentelemetry.proto.logs.v1.LogRecord`) -before storage in the buffer. This enables: - -- Zero-copy access via `LogsDataView` for formatting -- Direct forwarding to OTLP exporters -- Consistent format for `/logs` HTTP endpoint -- Efficient storage (no per-field overhead) - -## Flush Strategy: Timer-Based with Drop on Full - -Unlike metrics (which are pre-aggregated), individual log events can be -lost if the buffer fills. The current approach is simple: - -- **Timer-based flush**: The pipeline runtime flushes on its telemetry timer -- **Drop new events when full**: If buffer fills before flush, new events are dropped -- **Track dropped count**: `LogBuffer::dropped_count()` for observability - -This keeps the implementation simple. Future enhancements could include: -- Sampling at high volume -- Priority levels (always keep ERROR events) -- Dynamic buffer sizing - -## Configuration - -*(To be defined)* - -```yaml -service: - telemetry: - logs: - level: info - internal_collection: - enabled: true - buffer_size_bytes: 131072 # 128KB per handler - flush_interval: "1s" - # Routing options: - # - global_collector: send to global thread - # - local_pipeline: route through ITR on same core - # - both: send to both destinations - routing: global_collector -``` - -## Integration with Existing Metrics System - -This design parallels the existing metrics infrastructure. Understanding -the metrics flow is essential for implementing consistent logging. - -### Metrics System Architecture - -The metrics system follows a clear data flow pattern: - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Controller (lib.rs) │ -│ │ -│ MetricsSystem::new(config) │ -│ ├── MetricsRegistryHandle::new() ← Shared registry for aggregation │ -│ ├── MetricsCollector::new() ← Runs on metrics-aggregator thread│ -│ └── MetricsReporter::new(sender) ← Cloned to each pipeline thread │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ metrics_reporter.clone() - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Pipeline Thread (one per core) │ -│ │ -│ PipelineContext::new(controller_context, pipeline_id, core_id, thread_id) │ -│ └── with_node_context(node_id, node_urn, node_kind) │ -│ └── register_metrics() │ -│ └── registry.register::(self.node_attribute_set()) │ -│ │ -│ Each component (receiver/processor/exporter): │ -│ 1. Receives PipelineContext via build() method │ -│ 2. Calls pipeline_ctx.register_metrics::() │ -│ 3. Gets MetricSet with pre-registered attributes │ -│ 4. On timer tick: metrics_reporter.report(&mut metric_set) │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ flume channel (MetricSetSnapshot) - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Metrics Aggregator Thread ("metrics-aggregator") │ -│ │ -│ MetricsCollector::run_collection_loop() │ -│ loop { │ -│ snapshot = receiver.recv_async().await │ -│ registry.accumulate_snapshot(snapshot.key, &snapshot.metrics) │ -│ } │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ MetricsRegistryHandle (Arc>) - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Admin HTTP Server ("http-admin" thread) │ -│ │ -│ GET /metrics or /telemetry/metrics │ -│ registry.visit_metrics_and_reset(|desc, attrs, iter| { │ -│ // Format as JSON, Prometheus, Line Protocol, etc. │ -│ // desc: MetricsDescriptor (name, field definitions) │ -│ // attrs: NodeAttributeSet (resource + node attributes) │ -│ // iter: MetricsIterator (field, value) pairs │ -│ }) │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Key Components - -1. **MetricsRegistryHandle**: Thread-safe handle wrapping `Arc>`. - Passed to admin for HTTP endpoints, passed to controller for aggregation. - -2. **MetricsReporter**: Cloneable sender side of a flume channel. Each pipeline - thread gets a clone to send `MetricSetSnapshot` messages. - -3. **MetricsCollector**: Runs on a dedicated thread, receives snapshots via - channel, and calls `registry.accumulate_snapshot()` to merge them. - -4. **NodeAttributeSet**: Consistent attributes attached to every metric set - registered by a component. Includes: - - Resource: `process_instance_id`, `host_id`, `container_id` - - Engine: `core_id`, `numa_node_id` - - Pipeline: `pipeline_id` - - Node: `node_id`, `node_urn`, `node_type` - -### Unified Registration: Shared MetricsKey for Logs and Metrics - -The key insight is that `MetricsKey` already identifies a component's -`NodeAttributeSet` in the registry. Logs should reuse this same key -rather than duplicating attribute storage. - -**Existing MetricsEntry (in registry.rs):** - -```rust -pub struct MetricsEntry { - pub metrics_descriptor: &'static MetricsDescriptor, - pub attributes_descriptor: &'static AttributesDescriptor, - pub metric_values: Vec, - pub attribute_values: Box, // ← NodeAttributeSet -} -``` - -When `pipeline_ctx.register_metrics::()` is called: -1. Returns `MetricSet` containing a `MetricsKey` (slotmap index) -2. The `NodeAttributeSet` is stored in the registry under that key -3. **Both metrics and logs use the same `MetricsKey`** - -### Parallel Logs Architecture - -| Metrics | Logs | -|---------|------| -| `MetricSet` | `LogBuffer` | -| `MetricsReporter` (channel sender) | `LogsReporter` (channel sender) | -| `MetricsRegistry` (aggregates metrics) | `LogsRing` (ring buffer for recent logs) | -| `MetricsCollector` (receives snapshots) | `LogsCollector` (receives batches) | -| `MetricSetSnapshot { key, metrics }` | `LogBatch { producer_key, records }` | -| `/metrics` endpoint | `/logs` endpoint | - -**Shared:** -- `MetricsKey` identifies the producer (same key for metrics and logs) -- `NodeAttributeSet` stored once in `MetricsRegistry`, looked up by key - -### Channel Data Types - -```rust -/// A batch of logs from one producer - compact, just carries the key -pub struct LogBatch { - /// Same key returned from register_metrics() - identifies NodeAttributeSet - pub producer_key: MetricsKey, - - /// The log records - pub records: Vec, -} - -/// A single log record -pub struct LogRecord { - pub callsite_id: Identifier, // Pointer to static Metadata - pub timestamp_ns: u64, - pub body_attrs_bytes: Bytes, // Pre-encoded body + event attributes -} - -/// Reporter for sending log batches (parallel to MetricsReporter) -#[derive(Clone)] -pub struct LogsReporter { - sender: flume::Sender, -} - -impl LogsReporter { - pub fn try_report(&self, batch: LogBatch) -> Result<(), Error> { - match self.sender.try_send(batch) { - Ok(_) => Ok(()), - Err(flume::TrySendError::Full(_)) => Ok(()), // Drop if full - Err(flume::TrySendError::Disconnected(_)) => Err(Error::LogsChannelClosed), - } - } -} -``` - -### EffectHandler with Shared Key - -```rust -pub struct EffectHandlerCore { - pub node_id: NodeId, - pub producer_key: MetricsKey, // Shared identifier for metrics & logs - pub metrics_reporter: MetricsReporter, - pub logs_reporter: LogsReporter, // NEW - pub log_buffer: LogBuffer, // NEW - // ... -} - -impl EffectHandlerCore { - pub async fn flush_logs(&mut self) -> Result<(), Error> { - if self.log_buffer.is_empty() { - return Ok(()); - } - - let batch = LogBatch { - producer_key: self.producer_key, // Just the 8-byte key - records: self.log_buffer.drain(), - }; - self.logs_reporter.try_report(batch) - } -} -``` - -### Consumer Side: LogsRing with Key Lookup - -```rust -/// Ring buffer storing recent logs for /logs endpoint -pub struct LogsRing { - entries: VecDeque, - capacity: usize, - total_received: u64, - total_dropped: u64, -} - -/// Stored entry - just the key, not the full attributes -pub struct StoredLogEntry { - pub producer_key: MetricsKey, // Lookup attrs from MetricsRegistry - pub callsite_id: Identifier, - pub timestamp_ns: u64, - pub body_attrs_bytes: Bytes, -} - -impl LogsRing { - pub fn append(&mut self, batch: LogBatch) { - for record in batch.records { - if self.entries.len() >= self.capacity { - self.entries.pop_front(); - self.total_dropped += 1; - } - self.entries.push_back(StoredLogEntry { - producer_key: batch.producer_key, - callsite_id: record.callsite_id, - timestamp_ns: record.timestamp_ns, - body_attrs_bytes: record.body_attrs_bytes, - }); - self.total_received += 1; - } - } -} -``` - -### Admin /logs Endpoint - -```rust -pub async fn get_logs(State(state): State) -> impl IntoResponse { - let logs_ring = state.logs_ring.lock(); - let registry = state.metrics_registry.lock(); - - let writer = ConsoleWriter::no_color(); - let mut output = String::new(); - - for entry in logs_ring.recent(100) { - // Dereference Identifier to get static Metadata - let metadata = entry.callsite_id.callsite().metadata(); - let saved = SavedCallsite::new(metadata); - - let record = LogRecord { - callsite_id: entry.callsite_id, - timestamp_ns: entry.timestamp_ns, - body_attrs_bytes: entry.body_attrs_bytes.clone(), - }; - - // Format the log record - output.push_str(&writer.format_log_record(&record, &saved)); - - // Look up NodeAttributeSet using the shared key - if let Some(metrics_entry) = registry.metrics.get(entry.producer_key) { - let attrs = metrics_entry.attribute_values.as_ref(); - output.push_str(&format_node_attrs(attrs)); - } - output.push('\n'); - } - - (StatusCode::OK, output) -} -``` - -### Benefits of Shared Key - -| Aspect | Sending attrs per batch | Shared MetricsKey | -|--------|------------------------|-------------------| -| Registration | Separate for metrics/logs | Single registration | -| Per-batch overhead | Full NodeAttributeSet clone | 8-byte key | -| Attribute storage | Duplicated per batch | Single source of truth | -| Consistency | Could diverge | Guaranteed identical | -| Admin lookup | Already has attrs | Lookup from registry | - -### Identifier → Metadata: Direct Field Access - -The `Identifier` type wraps a pointer to static memory: - -```rust -pub struct Identifier( - #[doc(hidden)] - pub &'static dyn Callsite, -); -``` - -The inner field is `pub` (for macro construction purposes), so any thread -can access it directly to get `Metadata`: - -```rust -// Identifier.0 is &'static dyn Callsite -let metadata: &'static Metadata<'static> = identifier.0.metadata(); -``` - -No need to forward `(Identifier, Metadata)` pairs between threads. -The admin thread can directly access `Identifier.0.metadata()` on any -`Identifier` received in a `LogBatch` to get the full static metadata -(level, target, file, line, name, etc.). - -### Thread-Local Producer Key for Third-Party Instrumentation - -Third-party libraries often use `tracing::info!()` without access to any -EffectHandler or `MetricsKey`. To attribute these logs to the correct -component, we use a thread-local "current producer key" that is set -when entering a component's execution scope. - -```rust -// Thread-local current MetricsKey for third-party instrumentation. -thread_local! { - static CURRENT_PRODUCER_KEY: RefCell> = const { RefCell::new(None) }; -} - -/// Guard that sets the current producer key for the duration of a scope. -/// When dropped, restores the previous key (supports nesting). -pub struct ProducerKeyGuard { - previous: Option, -} - -impl ProducerKeyGuard { - /// Enter a scope with the given producer key. - pub fn enter(key: MetricsKey) -> Self { - let previous = CURRENT_PRODUCER_KEY.with(|cell| cell.borrow_mut().replace(key)); - Self { previous } - } -} - -impl Drop for ProducerKeyGuard { - fn drop(&mut self) { - CURRENT_PRODUCER_KEY.with(|cell| { - *cell.borrow_mut() = self.previous; - }); - } -} - -/// Get the current producer key (if any component scope is active). -pub fn current_producer_key() -> Option { - CURRENT_PRODUCER_KEY.with(|cell| *cell.borrow()) -} -``` - -**Usage in the engine (when calling component methods):** - -```rust -impl EffectHandlerCore { - /// Enter a scope where third-party logs are attributed to this component. - pub fn enter_producer_scope(&self) -> ProducerKeyGuard { - ProducerKeyGuard::enter(self.producer_key) - } -} - -// In the pipeline runtime, when calling a processor: -let _guard = effect_handler.enter_producer_scope(); -processor.process(batch, effect_handler).await?; -// Guard drops here, restoring previous key -``` - -**How it works with the BufferWriterLayer:** - -```rust -impl Layer for BufferWriterLayer { - fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { - let record = encode_event(event, &ctx); - // Pass None - push_to_thread_buffer will use current_producer_key() - push_to_thread_buffer(record, None); - } -} - -// In push_to_thread_buffer: -pub fn push_to_thread_buffer(record: LogRecord, producer_key: Option) -> bool { - CURRENT_LOG_BUFFER.with(|cell| { - if let Some(ref mut buffer) = *cell.borrow_mut() { - // Use explicit key if provided, otherwise use thread-current key - let key = producer_key.or_else(current_producer_key); - buffer.push(LogEntry { record, producer_key: key }); - true - } else { - false - } - }) -} -``` - -**Benefits:** - -| Aspect | Without ProducerKeyGuard | With ProducerKeyGuard | -|--------|-------------------------|----------------------| -| First-party logs | Attributed correctly | Attributed correctly | -| Third-party libs | `producer_key: None` | Attributed to current component | -| No EffectHandler access | Lost attribution | Correct attribution | -| Nesting support | N/A | Previous key restored on drop | - -**Example flow:** - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Pipeline Thread │ -│ │ -│ 1. Enter processor scope: ProducerKeyGuard::enter(processor_key) -│ CURRENT_PRODUCER_KEY = Some(processor_key) │ -│ │ -│ 2. Processor calls library code │ -│ └── Library calls tracing::info!("parsing data") │ -│ └── BufferWriterLayer::on_event() │ -│ └── push_to_thread_buffer(record, None) │ -│ └── key = current_producer_key() = processor_key│ -│ └── buffer.push(LogEntry { key: processor_key })│ -│ │ -│ 3. Guard drops: CURRENT_PRODUCER_KEY = None │ -│ │ -│ 4. On flush: LogBatch includes entry with producer_key set │ -│ │ -│ 5. Admin can look up NodeAttributeSet for processor_key │ -│ → Log shows: node_id=processor, node_urn=arrow/processor │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Channel Architecture - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ PRODUCER SIDE │ -│ │ -│ Pipeline Thread 0 Pipeline Thread 1 Pipeline Thread N │ -│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ -│ │ EffectHandler │ │ EffectHandler │ │ EffectHandler │ │ -│ │ producer_key │ │ producer_key │ │ producer_key │ │ -│ │ log_buffer │ │ log_buffer │ │ log_buffer │ │ -│ │ logs_reporter │ │ logs_reporter │ │ logs_reporter │ │ -│ └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ │ -│ │ │ │ │ -│ │ on timer: flush │ │ │ -│ ▼ ▼ ▼ │ -│ ┌────────────────────────────────────────────────────────────────────────┐│ -│ │ Metrics Channel (existing) ││ -│ │ flume::Sender ││ -│ └────────────────────────────────────────────────────────────────────────┘│ -│ ┌────────────────────────────────────────────────────────────────────────┐│ -│ │ Logs Channel (NEW, parallel) ││ -│ │ flume::Sender ││ -│ └────────────────────────────────────────────────────────────────────────┘│ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ Two separate channels - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ CONSUMER SIDE │ -│ │ -│ ┌─────────────────────────────────┐ ┌─────────────────────────────────┐ │ -│ │ MetricsCollector │ │ LogsCollector (NEW) │ │ -│ │ (metrics-aggregator thread) │ │ (logs-collector thread OR │ │ -│ │ │ │ same thread as admin) │ │ -│ │ loop { │ │ │ │ -│ │ snapshot = rx.recv() │ │ loop { │ │ -│ │ registry.accumulate(...) │ │ batch = rx.recv() │ │ -│ │ } │ │ logs_ring.append(batch) │ │ -│ └─────────────────────────────────┘ │ } │ │ -│ └─────────────────────────────────┘ │ -│ │ -│ ┌─────────────────────────────────────────────────────────────────────────┐│ -│ │ Admin HTTP Server ││ -│ │ ││ -│ │ GET /metrics → registry.visit_metrics_and_reset(...) ││ -│ │ ││ -│ │ GET /logs → logs_ring.recent(limit) + registry.get(key).attrs ││ -│ │ ││ -│ └─────────────────────────────────────────────────────────────────────────┘│ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Why Separate Channels? - -| Aspect | Metrics Channel | Logs Channel | -|--------|-----------------|--------------| -| Volume | Lower (aggregated values) | Higher (individual events) | -| Semantics | Merge into registry | Append to ring buffer | -| Backpressure | Accumulate locally | Drop oldest | -| Consumer | Aggregate by key | Keep recent N entries | - -## Future Considerations - -1. **Span context propagation**: Include trace/span IDs in buffered logs - when a span is active on the thread - -2. **Sampling**: Support head-based sampling to reduce volume - -3. **Priority levels**: Allow high-severity logs to bypass buffer limits - -4. **Direct ITR routing**: Option to route logs directly to a same-core - ITR pipeline without going through the global thread - -5. **Backpressure signaling**: Mechanism for global collector to signal - EffectHandlers when it's overloaded - -## Code References - -### Metrics System (for reference implementation) - -| File | Purpose | -|------|---------| -| `crates/controller/src/lib.rs` | Creates `MetricsSystem`, spawns threads, passes `MetricsReporter` to pipeline threads | -| `crates/telemetry/src/lib.rs` | `MetricsSystem` struct holding registry, collector, reporter, dispatcher | -| `crates/telemetry/src/registry.rs` | `MetricsRegistry` and `MetricsRegistryHandle` for aggregation | -| `crates/telemetry/src/reporter.rs` | `MetricsReporter` for sending snapshots through flume channel | -| `crates/telemetry/src/collector.rs` | `MetricsCollector::run_collection_loop()` receives and aggregates snapshots | -| `crates/engine/src/context.rs` | `PipelineContext` and `NodeAttributeSet` for consistent attributes | -| `crates/engine/src/effect_handler.rs` | `EffectHandlerCore` with `report_metrics()` method | -| `crates/admin/src/telemetry.rs` | `/metrics` endpoint using `registry.visit_metrics_and_reset()` | - -### Existing Self-Tracing Primitives - -| File | Purpose | -|------|---------| -| `crates/telemetry/src/self_tracing.rs` | `LogRecord` and `SavedCallsite` types | -| `crates/telemetry/src/self_tracing/encoder.rs` | `DirectLogRecordEncoder`, `DirectFieldVisitor` for OTLP encoding | -| `crates/telemetry/src/self_tracing/formatter.rs` | `RawLoggingLayer`, `ConsoleWriter` for console output | -| `crates/telemetry/src/internal_events.rs` | `otel_info!`, `otel_warn!`, etc. macros wrapping tracing | - -### Tokio Tracing (vendored) - -| File | Purpose | -|------|---------| -| `tokio-tracing-rs/tracing-core/src/dispatcher.rs` | Thread-local `CURRENT_STATE`, `with_default()` for subscriber scoping | -| `tokio-tracing-rs/tracing-subscriber/src/registry/sharded.rs` | Example of `ThreadLocal>` for per-thread span stacks | - diff --git a/rust/otap-dataflow/docs/thread-local-telemetry-research.md b/rust/otap-dataflow/docs/thread-local-telemetry-research.md deleted file mode 100644 index c95f35a76c..0000000000 --- a/rust/otap-dataflow/docs/thread-local-telemetry-research.md +++ /dev/null @@ -1,713 +0,0 @@ -# Thread-Local Variables and Tokio Tracing: Research for Internal Telemetry - -This document explains how thread-local variables work in Rust, how Tokio's -`tracing` library uses them for scoping, and how these mechanisms can be -applied to the OTAP-Dataflow internal telemetry architecture. - -## Table of Contents - -1. [Rust Thread-Local Variables Fundamentals](#rust-thread-local-variables-fundamentals) -2. [How Tracing Uses Thread-Locals](#how-tracing-uses-thread-locals) -3. [Reentrancy Protection in Tracing](#reentrancy-protection-in-tracing) -4. [Application to OTAP-Dataflow](#application-to-otap-dataflow) -5. [Design Patterns for EffectHandler Buffer](#design-patterns-for-effecthandler-buffer) -6. [Parallel with Existing Metrics Infrastructure](#parallel-with-existing-metrics-infrastructure) - ---- - -## Rust Thread-Local Variables Fundamentals - -### Basic Thread-Local Storage - -Rust's `std::thread_local!` macro creates thread-local storage: - -```rust -use std::cell::{Cell, RefCell}; - -thread_local! { - // Simple value types use Cell - static COUNTER: Cell = const { Cell::new(0) }; - - // Complex types use RefCell for interior mutability - static BUFFER: RefCell> = RefCell::new(Vec::new()); -} -``` - -**Key characteristics:** - -1. **Initialization**: Thread-locals are lazily initialized per-thread on first access -2. **Lifetime**: Data lives as long as the thread (destroyed when thread exits) -3. **Access Pattern**: Must be accessed via closure using `.with(|value| ...)` -4. **Interior Mutability**: Use `Cell` for `Copy` types, `RefCell` for others -5. **No Cross-Thread Access**: By design, other threads cannot see this data - -### Access Patterns - -```rust -// Reading -COUNTER.with(|c| { - let value = c.get(); - println!("Counter: {}", value); -}); - -// Writing -COUNTER.with(|c| { - c.set(c.get() + 1); -}); - -// Mutable access to complex types -BUFFER.with(|b| { - b.borrow_mut().push(record); -}); -``` - -### The `thread_local` Crate (Used by tracing-subscriber) - -The `thread_local` crate provides `ThreadLocal`, which is different from -`std::thread_local!`: - -```rust -use thread_local::ThreadLocal; -use std::cell::RefCell; - -struct Registry { - // Each thread gets its own RefCell - current_spans: ThreadLocal>, -} - -impl Registry { - fn enter(&self, id: &span::Id) { - // get_or_default() returns a reference to this thread's value - self.current_spans - .get_or_default() // Returns &RefCell - .borrow_mut() - .push(id.clone()); - } -} -``` - -**Key difference**: `ThreadLocal` is a struct field that can be shared -across threads (via `Arc` or references), but each thread accessing it -sees its own independent value. - ---- - -## How Tracing Uses Thread-Locals - -### Dispatcher Thread-Local State - -The `tracing-core` dispatcher uses thread-local storage for two critical purposes: - -```rust -// From tracing-core/src/dispatcher.rs -#[cfg(feature = "std")] -std::thread_local! { - static CURRENT_STATE: State = const { - State { - default: RefCell>, - can_enter: Cell, - } - }; -} -``` - -#### 1. Per-Thread Default Subscriber (`default`) - -Each thread can have its own "scoped" subscriber that overrides the global default: - -```rust -// The dispatcher lookup chain: -pub fn get_default(mut f: F) -> T -where - F: FnMut(&Dispatch) -> T, -{ - // Fast path: if no scoped dispatchers exist, use global - if SCOPED_COUNT.load(Ordering::Acquire) == 0 { - return f(get_global()); - } - - // Slow path: check thread-local state - CURRENT_STATE.try_with(|state| { - if let Some(entered) = state.enter() { - return f(&entered.current()); - } - f(&NONE) - }) - .unwrap_or_else(|_| f(&NONE)) -} -``` - -The scoping mechanism: - -```rust -pub fn with_default(dispatcher: &Dispatch, f: impl FnOnce() -> T) -> T { - // set_default stores the previous dispatcher and sets the new one - let _guard = set_default(dispatcher); - f() - // When guard drops, previous dispatcher is restored -} -``` - -**How it works:** -- `set_default()` stores the current dispatcher in the thread-local and - replaces it with the new one -- Returns a `DefaultGuard` that, when dropped, restores the previous dispatcher -- This creates a stack of dispatchers per thread - -#### 2. Reentrancy Protection (`can_enter`) - -Prevents infinite recursion when a subscriber's callback triggers more tracing: - -```rust -struct State { - default: RefCell>, - can_enter: Cell, // ← Reentrancy guard -} - -impl State { - fn enter(&self) -> Option> { - // Atomically check and set to false - if self.can_enter.replace(false) { - Some(Entered(self)) - } else { - None // Already in a dispatch, prevent recursion - } - } -} - -impl Drop for Entered<'_> { - fn drop(&mut self) { - self.0.can_enter.set(true); // Re-enable on exit - } -} -``` - -**Usage pattern:** -- Before dispatching an event, `state.enter()` is called -- If we're already dispatching (nested call), `enter()` returns `None` -- The caller then uses `Dispatch::none()` instead, preventing recursion -- When the dispatch completes, the guard's `Drop` re-enables entry - -### Registry Per-Thread Span Stack - -The `tracing-subscriber` Registry tracks which spans are "entered" on each thread: - -```rust -// From tracing-subscriber/src/registry/sharded.rs -pub struct Registry { - spans: Pool, - // Each thread has its own stack of currently-entered spans - current_spans: ThreadLocal>, - next_filter_id: u8, -} - -impl Subscriber for Registry { - fn enter(&self, id: &span::Id) { - // Push to THIS thread's span stack - self.current_spans - .get_or_default() - .borrow_mut() - .push(id.clone()); - } - - fn exit(&self, id: &span::Id) { - // Pop from THIS thread's span stack - if let Some(spans) = self.current_spans.get() { - spans.borrow_mut().pop(id); - } - } - - fn current_span(&self) -> Current { - // Return the top of THIS thread's span stack - self.current_spans - .get() - .and_then(|spans| { - let spans = spans.borrow(); - let id = spans.current()?; - let span = self.get(id)?; - Some(Current::new(id.clone(), span.metadata)) - }) - .unwrap_or_else(Current::none) - } -} -``` - ---- - -## Reentrancy Protection in Tracing - -### The Problem - -When a subscriber processes an event, it might trigger more events: - -```rust -impl Subscriber for MySubscriber { - fn event(&self, event: &Event<'_>) { - // This would cause infinite recursion! - tracing::info!("Received event: {:?}", event); - } -} -``` - -### The Solution - -Tracing uses the `can_enter` flag as a guard: - -```rust -// Simplified from dispatcher.rs -pub fn get_default(f: F) -> T { - CURRENT_STATE.try_with(|state| { - // Try to enter dispatch mode - if let Some(entered) = state.enter() { - // Success: use the real dispatcher - return f(&entered.current()); - } - // Already dispatching: use no-op dispatcher - f(&NONE) - }) -} -``` - -The test in `dispatcher.rs` demonstrates this: - -```rust -#[test] -fn events_dont_infinite_loop() { - struct TestSubscriber; - impl Subscriber for TestSubscriber { - fn event(&self, _: &Event<'_>) { - static EVENTS: AtomicUsize = AtomicUsize::new(0); - assert_eq!( - EVENTS.fetch_add(1, Ordering::Relaxed), - 0, - "event method called twice!" - ); - // This nested event dispatch is blocked by can_enter - Event::dispatch(&TEST_META, &TEST_META.fields().value_set(&[])); - } - } - // ... test passes because the nested dispatch sees Dispatch::none() -} -``` - ---- - -## Application to OTAP-Dataflow - -### Internal Telemetry Feedback Prevention - -Your architecture document describes preventing feedback loops in internal -telemetry. Here's how to implement this using thread-local state: - -```rust -use std::cell::Cell; - -thread_local! { - /// Thread-local flag indicating this thread is an internal telemetry thread. - /// When true, all otel_* macros become no-ops to prevent feedback. - static INTERNAL_TELEMETRY_THREAD: Cell = const { Cell::new(false) }; - - /// Reentrancy guard for telemetry processing - static IN_TELEMETRY_DISPATCH: Cell = const { Cell::new(false) }; -} - -/// Mark the current thread as an internal telemetry thread. -/// All otel_info!, otel_warn!, etc. macros will be disabled on this thread. -pub fn mark_as_internal_telemetry_thread() { - INTERNAL_TELEMETRY_THREAD.with(|flag| flag.set(true)); -} - -/// Check if telemetry is enabled on this thread -pub fn is_telemetry_enabled() -> bool { - INTERNAL_TELEMETRY_THREAD.with(|flag| !flag.get()) -} - -/// Guard for telemetry dispatch that prevents reentrancy -pub struct TelemetryDispatchGuard; - -impl TelemetryDispatchGuard { - pub fn try_enter() -> Option { - IN_TELEMETRY_DISPATCH.with(|flag| { - if flag.replace(true) { - None // Already dispatching - } else { - Some(TelemetryDispatchGuard) - } - }) - } -} - -impl Drop for TelemetryDispatchGuard { - fn drop(&mut self) { - IN_TELEMETRY_DISPATCH.with(|flag| flag.set(false)); - } -} -``` - -### Updated Macros with Feedback Protection - -```rust -/// Macro for logging informational messages with feedback protection. -#[macro_export] -macro_rules! otel_info { - ($name:expr $(,)?) => { - if $crate::is_telemetry_enabled() { - $crate::_private::info!( - name: $name, - target: env!("CARGO_PKG_NAME"), - name = $name, - "" - ); - } - }; - // ... other variants -} -``` - -### Global Internal Telemetry Thread - -For your global logs collection thread: - -```rust -pub fn spawn_internal_telemetry_thread( - name: &str, - task: F, -) -> std::thread::JoinHandle<()> -where - F: FnOnce() + Send + 'static, -{ - std::thread::Builder::new() - .name(name.into()) - .spawn(move || { - // Mark this thread as internal telemetry - mark_as_internal_telemetry_thread(); - - // Configure a safe subscriber for this thread only - let safe_subscriber = create_raw_logging_subscriber(); - tracing::subscriber::with_default(safe_subscriber, task); - }) - .expect("Failed to spawn internal telemetry thread") -} -``` - ---- - -## Design Patterns for EffectHandler Buffer - -### Option 1: Thread-Local Buffer with EffectHandler Coordination - -Since your `EffectHandler` owns its thread, you can use thread-local storage: - -```rust -use std::cell::RefCell; -use std::collections::VecDeque; - -/// Maximum bytes to buffer per thread -const MAX_BUFFER_BYTES: usize = 65536; - -/// Individual log record (pre-encoded or structured) -pub struct LogRecord { - pub timestamp: std::time::Instant, - pub level: tracing::Level, - pub name: &'static str, - pub target: &'static str, - // Pre-encoded OTLP bytes for attributes + body - pub encoded_data: Vec, -} - -thread_local! { - /// Per-thread log buffer for first-party telemetry - static LOG_BUFFER: RefCell = RefCell::new(LogBuffer::new()); -} - -pub struct LogBuffer { - records: VecDeque, - total_bytes: usize, -} - -impl LogBuffer { - pub fn new() -> Self { - Self { - records: VecDeque::new(), - total_bytes: 0, - } - } - - /// Add a record, potentially dropping oldest if over capacity - pub fn push(&mut self, record: LogRecord) { - let record_size = record.encoded_data.len(); - - // Evict old records if needed - while self.total_bytes + record_size > MAX_BUFFER_BYTES - && !self.records.is_empty() - { - if let Some(old) = self.records.pop_front() { - self.total_bytes -= old.encoded_data.len(); - } - } - - self.total_bytes += record_size; - self.records.push_back(record); - } - - /// Drain all records for sending - pub fn drain(&mut self) -> Vec { - self.total_bytes = 0; - self.records.drain(..).collect() - } - - /// Check if buffer has data - pub fn is_empty(&self) -> bool { - self.records.is_empty() - } -} - -/// Called by otel_* macros to buffer a log record -pub fn buffer_log_record(record: LogRecord) { - LOG_BUFFER.with(|buf| { - buf.borrow_mut().push(record); - }); -} - -/// Called by EffectHandler on timer tick to flush logs -pub fn flush_log_buffer() -> Vec { - LOG_BUFFER.with(|buf| { - buf.borrow_mut().drain() - }) -} -``` - -### Option 2: EffectHandler-Owned Buffer (Explicit State) - -Alternatively, store the buffer directly in the `EffectHandler`: - -```rust -pub struct EffectHandlerCore { - pub(crate) node_id: NodeId, - pub(crate) pipeline_ctrl_msg_sender: Option>, - pub(crate) metrics_reporter: MetricsReporter, - - // NEW: Per-handler log buffer - pub(crate) log_buffer: LogBuffer, -} - -impl EffectHandlerCore { - /// Log an info message, buffering it for later flush - pub fn log_info(&mut self, name: &'static str, attributes: &[(&str, AttributeValue)]) { - let record = LogRecord { - timestamp: std::time::Instant::now(), - level: tracing::Level::INFO, - name, - target: self.node_id.name.as_str(), - encoded_data: encode_attributes_to_otlp(attributes), - }; - self.log_buffer.push(record); - } - - /// Flush buffered logs - can be called on timer or when sending to pipeline - pub async fn flush_logs(&mut self) -> Result<(), Error> { - let records = self.log_buffer.drain(); - if records.is_empty() { - return Ok(()); - } - - // Option A: Send to global collection thread - self.send_to_global_collector(records).await?; - - // Option B: Route to local ITR pipeline - // self.route_to_local_pipeline(records).await?; - - Ok(()) - } -} -``` - -### Option 3: Hybrid Approach with Thread-Local + Handler Reference - -This pattern allows macros to work anywhere while the EffectHandler controls flushing: - -```rust -use std::cell::RefCell; -use std::sync::Arc; - -/// Weak reference to the EffectHandler's log sink -pub struct LogSink { - sender: flume::Sender, -} - -thread_local! { - /// Thread-local pointer to this thread's log sink - static CURRENT_LOG_SINK: RefCell>> = RefCell::new(None); -} - -impl EffectHandlerCore { - /// Install this handler's log sink as the thread-local default - pub fn install_log_sink(&self) { - let sink = Arc::new(LogSink { - sender: self.log_channel.clone(), - }); - CURRENT_LOG_SINK.with(|s| { - *s.borrow_mut() = Some(sink); - }); - } - - /// Remove the thread-local sink (e.g., during shutdown) - pub fn uninstall_log_sink(&self) { - CURRENT_LOG_SINK.with(|s| { - *s.borrow_mut() = None; - }); - } -} - -/// Called by otel_* macros -pub fn emit_log(record: LogRecord) { - CURRENT_LOG_SINK.with(|sink| { - if let Some(sink) = &*sink.borrow() { - // Non-blocking send, drop if full - let _ = sink.sender.try_send(record); - } - // If no sink installed, log is dropped (or use fallback) - }); -} -``` - ---- - -## Parallel with Existing Metrics Infrastructure - -Your existing metrics system follows a pattern that can be mirrored for logs: - -### Current Metrics Flow - -``` -┌──────────────────┐ report() ┌──────────────────┐ aggregate ┌─────────────────┐ -│ MetricSet │ ──────────────► │ MetricsReporter │ ─────────────► │ MetricsRegistry │ -│ (per-component) │ (channel) │ (per-handler) │ (channel) │ (global) │ -└──────────────────┘ └──────────────────┘ └─────────────────┘ - │ - dispatch_metrics() - ▼ - ┌─────────────────────┐ - │ MetricsDispatcher │ - │ → OpenTelemetry SDK │ - │ → /metrics endpoint │ - └─────────────────────┘ -``` - -### Proposed Parallel Logs Flow - -``` -┌──────────────────┐ buffer() ┌──────────────────┐ flush ┌─────────────────┐ -│ LogRecord │ ──────────────► │ LogBuffer │ ─────────────► │ LogsRegistry │ -│ (per-event) │ (thread-local) │ (per-handler) │ (channel) │ (global) │ -└──────────────────┘ └──────────────────┘ └─────────────────┘ - │ - dispatch_logs() - ▼ - ┌─────────────────────┐ - │ LogsDispatcher │ - │ → ITR Pipeline │ - │ → /logs endpoint │ - │ → Raw console │ - └─────────────────────┘ -``` - -### Implementation Sketch for LogsRegistry - -```rust -use std::sync::{Arc, RwLock}; -use std::collections::VecDeque; - -/// Ring buffer of recent log records for the /logs endpoint -pub struct LogsRegistry { - /// Configurable max records to keep - max_records: usize, - /// Ring buffer of recent logs (OTLP-encoded bytes) - recent_logs: RwLock>>, - /// Channel to receive logs from all handlers - receiver: flume::Receiver>, -} - -impl LogsRegistry { - /// Get recent logs for HTTP endpoint (analogous to /metrics) - pub fn get_recent_logs(&self) -> Vec> { - self.recent_logs.read().unwrap().iter().cloned().collect() - } - - /// Collection loop (parallel to MetricsCollector::run_collection_loop) - pub async fn run_collection_loop(&self) -> Result<(), Error> { - while let Ok(log_bytes) = self.receiver.recv_async().await { - let mut buffer = self.recent_logs.write().unwrap(); - - // Ring buffer eviction - if buffer.len() >= self.max_records { - buffer.pop_front(); - } - buffer.push_back(log_bytes); - - // Also forward to ITR pipeline if configured - // self.forward_to_itr(&log_bytes).await?; - } - Ok(()) - } -} -``` - -### HTTP Endpoint for Logs - -Similar to `/metrics`, provide a `/logs` endpoint: - -```rust -/// Handler for GET /logs - returns recent internal logs -pub async fn get_internal_logs( - registry: Arc, -) -> impl IntoResponse { - let logs = registry.get_recent_logs(); - - // Could format as: - // - JSON array of log lines - // - OTLP LogsData protobuf - // - Human-readable text - - let formatted = format_logs_as_text(&logs); - (StatusCode::OK, formatted) -} -``` - ---- - -## Summary - -### Key Thread-Local Patterns for Your Use Case - -1. **Feedback Prevention Flag**: `INTERNAL_TELEMETRY_THREAD: Cell` - - Set `true` on dedicated internal telemetry threads - - Macros check this before emitting events - -2. **Reentrancy Guard**: `IN_TELEMETRY_DISPATCH: Cell` - - Prevents recursive telemetry events - - Similar to tracing's `can_enter` mechanism - -3. **Per-Thread Buffer**: `LOG_BUFFER: RefCell` - - Accumulate logs without blocking - - EffectHandler flushes on timer - -4. **Thread-Local Sink Reference**: `CURRENT_LOG_SINK: RefCell>>` - - Allows macros to find the right destination - - EffectHandler installs/uninstalls on thread lifecycle - -### Tracing Mechanisms You Can Leverage - -1. **`with_default()`**: Set thread-specific subscriber for internal threads -2. **`Dispatch::none()`**: No-op subscriber when reentrancy detected -3. **`ThreadLocal>`**: Per-thread state in shared structures -4. **Guard-based RAII**: Automatic cleanup on scope exit - -### Next Steps - -1. Implement the feedback prevention thread-local flag -2. Update `otel_*` macros to check the flag -3. Create `LogBuffer` structure parallel to `MetricSet` -4. Add `LogsReporter` parallel to `MetricsReporter` -5. Implement `LogsRegistry` with `/logs` endpoint -6. Wire up EffectHandler timer-based flush From d6b6610467e1809c75393278c2d0fc73e15032b9 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 15:18:29 -0800 Subject: [PATCH 48/92] comment --- .../crates/telemetry/src/self_tracing/raw_log.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs index c103c329c3..141c653ad7 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs @@ -1,15 +1,8 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//! Raw logging macros that bypass the tracing subscriber. -//! -//! These macros are used for logging when the subscriber infrastructure is not -//! available or may be in an inconsistent state (e.g., during shutdown, when -//! the channel is closed, or during early initialization). -//! -//! The macros temporarily install a dedicated subscriber that writes directly -//! to the console, then use standard tracing macros. This reuses the normal -//! tracing event creation and encoding path. +//! Raw logging macros that bypass the tracing subscriber and write to +//! the console. A single `raw_error!(...)` API is provided. use super::formatter::RawLoggingLayer; use tracing_subscriber::prelude::*; @@ -30,7 +23,10 @@ where /// Log an error message directly to stderr, bypassing the tracing subscriber. /// -/// For reporting errors in the rrr rsr rs rsr rsr rs sr sr rs rs rs sr rs rs +/// This should be used sparingly, only emergencies! This is a good +/// configuration for diagnosing internal other logging facilities, +/// because it is unbuffered and uses a dedicated +/// /// /// # Example /// From 33ad2dc7b574953d153a5fd24d11f5d8bfef6fb7 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 15:53:27 -0800 Subject: [PATCH 49/92] typo --- rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index 688f7eb19f..1d88be5b60 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -47,7 +47,7 @@ impl<'buf> DirectLogRecordEncoder<'buf> { .encode_field_tag(LOG_RECORD_SEVERITY_NUMBER, wire_types::VARINT); self.buf.encode_varint(severity as u64); - // Node we skip encoding severity_text (field 3, string) + // Note we skip encoding severity_text (field 3, string) // Encode event_name (field 12, string) - format: "target::name (file:line)" encode_event_name(self.buf, callsite); From faa45be3b6e4d16080cc19864004499b10d39fc6 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 16:03:54 -0800 Subject: [PATCH 50/92] strip --- .../crates/engine/src/pipeline_ctrl.rs | 9 +- .../telemetry/fixed-proto-design-v1.diff | 624 ++++++++++++++++++ .../crates/telemetry/src/logs.rs | 13 +- .../crates/telemetry/src/self_tracing.rs | 12 +- .../telemetry/src/self_tracing/formatter.rs | 31 +- .../telemetry/src/self_tracing/raw_log.rs | 27 +- 6 files changed, 693 insertions(+), 23 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs index 6f2128b71e..2aecdc5112 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs @@ -19,9 +19,9 @@ use otap_df_config::pipeline::TelemetrySettings; use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; -use otap_df_telemetry::otel_warn; +use otap_df_telemetry::logs::{LogsReporter, drain_thread_log_buffer}; use otap_df_telemetry::reporter::MetricsReporter; -use otap_df_telemetry::logs::{LogsReporter, flush_thread_log_buffer}; +use otap_df_telemetry::{otel_error, otel_warn}; use std::cmp::Reverse; use std::collections::{BinaryHeap, HashMap}; use std::time::{Duration, Instant}; @@ -355,9 +355,10 @@ impl PipelineCtrlMsgManager { } // Flush internal logs from the thread-local buffer - if let Some(batch) = flush_thread_log_buffer() { + if let Some(batch) = drain_thread_log_buffer() { + let count = batch.size_with_dropped(); if let Err(err) = self.logs_reporter.try_report(batch) { - otel_warn!("logs.reporting.fail", error = err.to_string()); + otel_error!("logs.reporting.fail", error = err.to_string(), dropped = count); } } diff --git a/rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff b/rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff new file mode 100644 index 0000000000..1a7c188e2e --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff @@ -0,0 +1,624 @@ +diff --git a/rust/otap-dataflow/crates/pdata/src/error.rs b/rust/otap-dataflow/crates/pdata/src/error.rs +index 88540081..c30b804a 100644 +--- a/rust/otap-dataflow/crates/pdata/src/error.rs ++++ b/rust/otap-dataflow/crates/pdata/src/error.rs +@@ -15,6 +15,24 @@ use std::num::TryFromIntError; + /// Result type + pub type Result = std::result::Result; + ++/// Error indicating that a fixed-size buffer ran out of space during encoding. ++/// ++/// This error is returned by [`ProtoWrite`] implementations when there is ++/// insufficient capacity to complete an encoding operation. For fixed-size ++/// buffers used in internal instrumentation, callers can catch this error ++/// and use the partially-encoded contents, incrementing a dropped-attributes ++/// counter instead. ++#[derive(Debug, Clone, Copy, PartialEq, Eq)] ++pub struct Truncated; ++ ++impl std::fmt::Display for Truncated { ++ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { ++ write!(f, "buffer truncated: insufficient capacity for encoding") ++ } ++} ++ ++impl std::error::Error for Truncated {} ++ + /// Errors related to OTAP or OTLP pipeline data + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] +@@ -179,4 +197,7 @@ pub enum Error { + + #[error("Format error: {}", error)] + Format { error: String }, ++ ++ #[error("Buffer truncated: insufficient capacity for encoding")] ++ Truncated(#[from] Truncated), + } +diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs +index e3a0859d..f1536aef 100644 +--- a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs ++++ b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs +@@ -5,7 +5,7 @@ use crate::arrays::{ + ByteArrayAccessor, Int64ArrayAccessor, MaybeDictArrayAccessor, NullableArrayAccessor, + StringArrayAccessor, StructColumnAccessor, get_bool_array_opt, get_f64_array_opt, get_u8_array, + }; +-use crate::error::{Error, Result}; ++use crate::error::{Error, Result, Truncated}; + use crate::otlp::attributes::{Attribute16Arrays, encode_key_value}; + use crate::proto::consts::field_num::common::{ + INSTRUMENTATION_DROPPED_ATTRIBUTES_COUNT, INSTRUMENTATION_SCOPE_ATTRIBUTES, +@@ -30,6 +30,216 @@ use std::fmt; + use std::fmt::Write; + use std::sync::LazyLock; + ++/// Trait for types that can be used as protobuf encoding buffers. ++/// ++/// This trait abstracts over growable buffers (like [`ProtoBuffer`]) and fixed-size ++/// buffers (like [`FixedProtoBuffer`]), allowing encoding logic to work with both. ++/// ++/// For fixed-size buffers, operations return `Err(Truncated)` when there is ++/// insufficient capacity. Callers can catch this error and use the partially-encoded ++/// contents, for example by incrementing a dropped-attributes counter. ++/// ++/// # Example ++/// ++/// ```ignore ++/// fn encode_attributes(buf: &mut W, attrs: &[KeyValue]) -> Result<(), Truncated> { ++/// for attr in attrs { ++/// buf.encode_string(ATTR_KEY_TAG, &attr.key)?; ++/// buf.encode_string(ATTR_VALUE_TAG, &attr.value)?; ++/// } ++/// Ok(()) ++/// } ++/// ``` ++pub trait ProtoWrite { ++ /// Append bytes to the buffer. ++ /// ++ /// Returns `Err(Truncated)` if there is insufficient capacity (for fixed-size buffers). ++ fn write_bytes(&mut self, bytes: &[u8]) -> std::result::Result<(), Truncated>; ++ ++ /// Current length of encoded data. ++ fn len(&self) -> usize; ++ ++ /// Returns true if the buffer is empty. ++ fn is_empty(&self) -> bool { ++ self.len() == 0 ++ } ++ ++ /// Get a reference to the encoded bytes. ++ fn as_slice(&self) -> &[u8]; ++ ++ /// Get a mutable reference to the buffer (for patching length placeholders). ++ fn as_mut_slice(&mut self) -> &mut [u8]; ++ ++ /// Clear the buffer contents. ++ fn clear(&mut self); ++ ++ /// Current capacity of the buffer. ++ fn capacity(&self) -> usize; ++ ++ /// Remaining capacity available for writing. ++ fn remaining(&self) -> usize { ++ self.capacity().saturating_sub(self.len()) ++ } ++ ++ /// Push a single byte. ++ #[inline] ++ fn write_byte(&mut self, byte: u8) -> std::result::Result<(), Truncated> { ++ self.write_bytes(&[byte]) ++ } ++ ++ /// Encode a varint (variable-length integer). ++ #[inline] ++ fn encode_varint(&mut self, value: u64) -> std::result::Result<(), Truncated> { ++ // Fast path for single byte (very common) ++ if value < 0x80 { ++ return self.write_byte(value as u8); ++ } ++ ++ // Fast path for two bytes (common) ++ if value < 0x4000 { ++ return self.write_bytes(&[((value & 0x7F) | 0x80) as u8, (value >> 7) as u8]); ++ } ++ ++ // General case ++ let mut v = value; ++ while v >= 0x80 { ++ self.write_byte(((v & 0x7F) | 0x80) as u8)?; ++ v >>= 7; ++ } ++ self.write_byte(v as u8) ++ } ++ ++ /// Encode a protobuf field tag (field number + wire type). ++ #[inline] ++ fn encode_field_tag( ++ &mut self, ++ field_number: u64, ++ wire_type: u64, ++ ) -> std::result::Result<(), Truncated> { ++ let key = (field_number << 3) | wire_type; ++ self.encode_varint(key) ++ } ++ ++ /// Encode a signed varint using zig-zag encoding (sint32/sint64). ++ #[inline] ++ fn encode_sint32(&mut self, value: i32) -> std::result::Result<(), Truncated> { ++ self.encode_varint(((value << 1) ^ (value >> 31)) as u64) ++ } ++ ++ /// Encode a length-delimited string field. ++ fn encode_string(&mut self, field_tag: u64, val: &str) -> std::result::Result<(), Truncated> { ++ self.encode_field_tag(field_tag, wire_types::LEN)?; ++ self.encode_varint(val.len() as u64)?; ++ self.write_bytes(val.as_bytes()) ++ } ++ ++ /// Encode a length-delimited bytes field. ++ fn encode_bytes_field( ++ &mut self, ++ field_tag: u64, ++ val: &[u8], ++ ) -> std::result::Result<(), Truncated> { ++ self.encode_field_tag(field_tag, wire_types::LEN)?; ++ self.encode_varint(val.len() as u64)?; ++ self.write_bytes(val) ++ } ++} ++ ++/// A fixed-size, stack-allocatable buffer for protobuf encoding. ++/// ++/// This buffer is designed for internal instrumentation where heap allocation ++/// should be avoided. When the buffer runs out of space, encoding operations ++/// return `Err(Truncated)`, allowing callers to use the partial contents and ++/// track dropped attributes. ++/// ++/// # Example ++/// ++/// ```ignore ++/// let mut buf = FixedProtoBuffer::<1024>::new(); ++/// match encode_log_record(&mut buf, &record) { ++/// Ok(()) => send_proto_bytes(buf.as_slice()), ++/// Err(Truncated) => { ++/// // Use partial contents, increment dropped counter ++/// dropped_count += 1; ++/// send_proto_bytes(buf.as_slice()); ++/// } ++/// } ++/// ``` ++#[derive(Debug)] ++pub struct FixedProtoBuffer { ++ buffer: [u8; N], ++ len: usize, ++} ++ ++impl Default for FixedProtoBuffer { ++ fn default() -> Self { ++ Self::new() ++ } ++} ++ ++impl FixedProtoBuffer { ++ /// Create a new empty fixed-size buffer. ++ #[must_use] ++ pub const fn new() -> Self { ++ Self { ++ buffer: [0u8; N], ++ len: 0, ++ } ++ } ++ ++ /// Returns the maximum capacity of this buffer. ++ #[must_use] ++ pub const fn max_capacity() -> usize { ++ N ++ } ++} ++ ++impl ProtoWrite for FixedProtoBuffer { ++ #[inline] ++ fn write_bytes(&mut self, bytes: &[u8]) -> std::result::Result<(), Truncated> { ++ let new_len = self.len.checked_add(bytes.len()).ok_or(Truncated)?; ++ if new_len > N { ++ return Err(Truncated); ++ } ++ self.buffer[self.len..new_len].copy_from_slice(bytes); ++ self.len = new_len; ++ Ok(()) ++ } ++ ++ #[inline] ++ fn len(&self) -> usize { ++ self.len ++ } ++ ++ fn as_slice(&self) -> &[u8] { ++ &self.buffer[..self.len] ++ } ++ ++ fn as_mut_slice(&mut self) -> &mut [u8] { ++ &mut self.buffer[..self.len] ++ } ++ ++ fn clear(&mut self) { ++ self.len = 0; ++ } ++ ++ fn capacity(&self) -> usize { ++ N ++ } ++} ++ ++impl AsRef<[u8]> for FixedProtoBuffer { ++ fn as_ref(&self) -> &[u8] { ++ self.as_slice() ++ } ++} ++ ++impl AsMut<[u8]> for FixedProtoBuffer { ++ fn as_mut(&mut self) -> &mut [u8] { ++ self.as_mut_slice() ++ } ++} ++ + pub(in crate::otlp) struct ResourceArrays<'a> { + pub id: Option<&'a UInt16Array>, + pub dropped_attributes_count: Option<&'a UInt32Array>, +@@ -451,6 +661,35 @@ impl AsMut<[u8]> for ProtoBuffer { + } + } + ++impl ProtoWrite for ProtoBuffer { ++ #[inline] ++ fn write_bytes(&mut self, bytes: &[u8]) -> std::result::Result<(), Truncated> { ++ self.buffer.extend_from_slice(bytes); ++ Ok(()) ++ } ++ ++ #[inline] ++ fn len(&self) -> usize { ++ self.buffer.len() ++ } ++ ++ fn as_slice(&self) -> &[u8] { ++ &self.buffer ++ } ++ ++ fn as_mut_slice(&mut self) -> &mut [u8] { ++ &mut self.buffer ++ } ++ ++ fn clear(&mut self) { ++ self.buffer.clear(); ++ } ++ ++ fn capacity(&self) -> usize { ++ self.buffer.capacity() ++ } ++} ++ + /// Helper for encoding with unknown length. Usage: + /// ```ignore + /// proto_encode_len_delimited_unknown_size!( +@@ -478,24 +717,52 @@ impl AsMut<[u8]> for ProtoBuffer { + /// TODO: currently we're always allocating 4 byte. This may often be too much but we over-allocate + /// to be safe. Eventually we should maybe allow a size hint here and allocate fewer bytes. + /// ++/// # Note on buffer types ++/// ++/// This macro works with [`ProtoBuffer`] (the growable Vec-based buffer) and uses infallible ++/// operations. For use with [`FixedProtoBuffer`] or other [`ProtoWrite`] implementations ++/// that may return [`Truncated`] errors, use [`proto_encode_len_delimited_try!`] instead. + #[macro_export] + macro_rules! proto_encode_len_delimited_unknown_size { + ($field_tag: expr, $encode_fn:expr, $buf:expr) => {{ + let num_bytes = 4; // placeholder length + $buf.encode_field_tag($field_tag, $crate::proto::consts::wire_types::LEN); + let len_start_pos = $buf.len(); +- $crate::otlp::common::encode_len_placeholder($buf); ++ $crate::otlp::common::encode_len_placeholder_infallible($buf); + $encode_fn; + let len = $buf.len() - len_start_pos - num_bytes; +- $crate::otlp::common::patch_len_placeholder($buf, num_bytes, len, len_start_pos); ++ $crate::otlp::common::patch_len_placeholder_infallible($buf, num_bytes, len, len_start_pos); + }}; + } + +-pub(crate) fn encode_len_placeholder(buf: &mut ProtoBuffer) { ++pub(crate) fn encode_len_placeholder( ++ buf: &mut W, ++) -> std::result::Result<(), Truncated> { ++ buf.write_bytes(&[0x80, 0x80, 0x80, 0x00]) ++} ++ ++pub(crate) fn patch_len_placeholder( ++ buf: &mut W, ++ num_bytes: usize, ++ len: usize, ++ len_start_pos: usize, ++) { ++ let slice = buf.as_mut_slice(); ++ for i in 0..num_bytes { ++ slice[len_start_pos + i] += ((len >> (i * 7)) & 0x7f) as u8; ++ } ++} ++ ++/// Infallible version of [`encode_len_placeholder`] for use with [`ProtoBuffer`]. ++/// ++/// This function directly extends the buffer without returning a Result, ++/// for use in the infallible encoding path with growable buffers. ++pub(crate) fn encode_len_placeholder_infallible(buf: &mut ProtoBuffer) { + buf.buffer.extend_from_slice(&[0x80, 0x80, 0x80, 0x00]); + } + +-pub(crate) fn patch_len_placeholder( ++/// Infallible version of [`patch_len_placeholder`] for use with [`ProtoBuffer`]. ++pub(crate) fn patch_len_placeholder_infallible( + buf: &mut ProtoBuffer, + num_bytes: usize, + len: usize, +@@ -506,6 +773,41 @@ pub(crate) fn patch_len_placeholder( + } + } + ++/// Fallible helper macro for encoding with unknown length, for use with any [`ProtoWrite`] impl. ++/// ++/// Unlike [`proto_encode_len_delimited_unknown_size!`], this macro propagates truncation errors ++/// and is intended for use with fixed-size buffers or generic code that works with any ++/// [`ProtoWrite`] implementation. ++/// ++/// The enclosing function must return a `Result` type that can convert from [`Truncated`]. ++/// ++/// # Example ++/// ++/// ```ignore ++/// fn encode_message(buf: &mut W) -> Result<(), Truncated> { ++/// proto_encode_len_delimited_try!( ++/// FIELD_TAG, ++/// encode_nested_content(buf)?, ++/// buf ++/// )?; ++/// Ok(()) ++/// } ++/// ``` ++#[macro_export] ++macro_rules! proto_encode_len_delimited_try { ++ ($field_tag: expr, $encode_fn:expr, $buf:expr) => {{ ++ use $crate::otlp::ProtoWrite; ++ let num_bytes = 4; // placeholder length ++ $buf.encode_field_tag($field_tag, $crate::proto::consts::wire_types::LEN)?; ++ let len_start_pos = $buf.len(); ++ $crate::otlp::common::encode_len_placeholder($buf)?; ++ $encode_fn; ++ let len = $buf.len() - len_start_pos - num_bytes; ++ $crate::otlp::common::patch_len_placeholder($buf, num_bytes, len, len_start_pos); ++ Ok::<(), $crate::error::Truncated>(()) ++ }}; ++} ++ + /// Used to iterate over OTAP [`RecordBatch`] in a particular order. + /// + /// There are certain use cases where we want to visit all the rows in some record batch that are +@@ -1132,4 +1434,201 @@ mod test { + fn test_metrics_with_no_metrics() { + assert_empty_batch(metrics_with_no_metrics().into()); + } ++ ++ // ++ // ProtoWrite and FixedProtoBuffer tests ++ // ++ ++ use super::{FixedProtoBuffer, ProtoBuffer, ProtoWrite}; ++ use crate::error::Truncated; ++ ++ #[test] ++ fn test_proto_buffer_implements_proto_write() { ++ let mut buf = ProtoBuffer::new(); ++ ++ // Test basic write operations via the trait (using fully qualified syntax) ++ ProtoWrite::write_bytes(&mut buf, b"hello").unwrap(); ++ assert_eq!(ProtoWrite::len(&buf), 5); ++ assert_eq!(ProtoWrite::as_slice(&buf), b"hello"); ++ ++ // Test varint encoding via trait ++ ProtoWrite::clear(&mut buf); ++ ProtoWrite::encode_varint(&mut buf, 127).unwrap(); // single byte ++ assert_eq!(buf.len(), 1); ++ assert_eq!(ProtoWrite::as_slice(&buf), &[127]); ++ ++ ProtoWrite::clear(&mut buf); ++ ProtoWrite::encode_varint(&mut buf, 128).unwrap(); // two bytes ++ assert_eq!(buf.len(), 2); ++ ++ ProtoWrite::clear(&mut buf); ++ ProtoWrite::encode_varint(&mut buf, 16384).unwrap(); // three bytes ++ assert_eq!(buf.len(), 3); ++ ++ // Test string encoding via trait ++ ProtoWrite::clear(&mut buf); ++ ProtoWrite::encode_string(&mut buf, 1, "test").unwrap(); ++ assert!(buf.len() > 4); // tag + length + "test" ++ } ++ ++ #[test] ++ fn test_proto_buffer_inherent_methods_unchanged() { ++ // Verify that the original infallible API still works ++ let mut buf = ProtoBuffer::new(); ++ ++ // These are the inherent methods (infallible, no Result) ++ buf.encode_varint(127); ++ assert_eq!(buf.len(), 1); ++ ++ buf.clear(); ++ buf.encode_string(1, "test"); ++ assert!(buf.len() > 4); ++ ++ buf.clear(); ++ buf.extend_from_slice(b"direct"); ++ assert_eq!(buf.as_ref(), b"direct"); ++ } ++ ++ #[test] ++ fn test_fixed_proto_buffer_basic() { ++ let mut buf = FixedProtoBuffer::<64>::new(); ++ ++ assert_eq!(buf.len(), 0); ++ assert_eq!(buf.capacity(), 64); ++ assert_eq!(buf.remaining(), 64); ++ ++ // Write some bytes ++ buf.write_bytes(b"hello").unwrap(); ++ assert_eq!(buf.len(), 5); ++ assert_eq!(buf.remaining(), 59); ++ assert_eq!(buf.as_slice(), b"hello"); ++ ++ // Clear and verify ++ buf.clear(); ++ assert_eq!(buf.len(), 0); ++ assert_eq!(buf.remaining(), 64); ++ } ++ ++ #[test] ++ fn test_fixed_proto_buffer_truncation() { ++ let mut buf = FixedProtoBuffer::<10>::new(); ++ ++ // Write should succeed when under capacity ++ assert!(buf.write_bytes(b"12345").is_ok()); ++ assert_eq!(buf.len(), 5); ++ ++ // Write should succeed when exactly at capacity ++ assert!(buf.write_bytes(b"67890").is_ok()); ++ assert_eq!(buf.len(), 10); ++ ++ // Write should fail when over capacity ++ let result = buf.write_bytes(b"x"); ++ assert_eq!(result, Err(Truncated)); ++ ++ // Buffer contents should be unchanged after failed write ++ assert_eq!(buf.len(), 10); ++ assert_eq!(buf.as_slice(), b"1234567890"); ++ } ++ ++ #[test] ++ fn test_fixed_proto_buffer_varint_truncation() { ++ // Create a buffer that can only hold 1 byte ++ let mut buf = FixedProtoBuffer::<1>::new(); ++ ++ // Small varint (1 byte) should succeed ++ assert!(buf.encode_varint(127).is_ok()); ++ assert_eq!(buf.len(), 1); ++ ++ // Clear and try a larger varint that needs 2 bytes ++ buf.clear(); ++ let result = buf.encode_varint(128); ++ assert_eq!(result, Err(Truncated)); ++ ++ // For the 2-byte fast path, write_bytes checks capacity atomically, ++ // so no partial write occurs (the buffer remains empty) ++ assert_eq!(buf.len(), 0); ++ ++ // Test partial write in the general case (3+ byte varints) ++ let mut buf3 = FixedProtoBuffer::<2>::new(); ++ // Value 16384 needs 3 bytes in varint encoding ++ let result = buf3.encode_varint(16384); ++ assert_eq!(result, Err(Truncated)); ++ // The general case writes byte-by-byte, so partial writes can occur ++ assert_eq!(buf3.len(), 2); // two bytes written before failure ++ } ++ ++ #[test] ++ fn test_fixed_proto_buffer_encode_string_truncation() { ++ // Create a small buffer ++ let mut buf = FixedProtoBuffer::<8>::new(); ++ ++ // This should fail because the string + tag + length is > 8 bytes ++ let result = buf.encode_string(1, "hello world"); ++ assert_eq!(result, Err(Truncated)); ++ ++ // Partial contents are in the buffer (useful for truncation recovery) ++ assert!(buf.len() > 0); ++ } ++ ++ #[test] ++ fn test_proto_write_trait_generic_function() { ++ // Demonstrate using a generic function over ProtoWrite ++ fn encode_test_message( ++ buf: &mut W, ++ value: u64, ++ ) -> Result<(), Truncated> { ++ buf.encode_field_tag(1, 0)?; // varint wire type ++ buf.encode_varint(value)?; ++ Ok(()) ++ } ++ ++ // Works with ProtoBuffer (growable) ++ let mut growing_buf = ProtoBuffer::new(); ++ encode_test_message(&mut growing_buf, 12345).unwrap(); ++ assert!(growing_buf.len() > 0); ++ ++ // Works with FixedProtoBuffer ++ let mut fixed_buf = FixedProtoBuffer::<32>::new(); ++ encode_test_message(&mut fixed_buf, 12345).unwrap(); ++ assert_eq!(fixed_buf.as_slice(), growing_buf.as_slice()); ++ ++ // Fixed buffer can fail ++ let mut tiny_buf = FixedProtoBuffer::<2>::new(); ++ let result = encode_test_message(&mut tiny_buf, 12345); ++ assert_eq!(result, Err(Truncated)); ++ } ++ ++ #[test] ++ fn test_fixed_proto_buffer_partial_content_on_truncation() { ++ // This test demonstrates the truncation recovery pattern: ++ // encode as much as possible, then on truncation, use partial content ++ ++ let mut buf = FixedProtoBuffer::<20>::new(); ++ ++ // Simulate encoding multiple attributes ++ let attrs = [("a", "1"), ("b", "2"), ("c", "very long value that won't fit")]; ++ let mut encoded_count = 0; ++ ++ for (key, val) in &attrs { ++ // Try to encode the key-value pair ++ let start_len = buf.len(); ++ if buf.encode_string(1, key).is_err() { ++ break; ++ } ++ if buf.encode_string(2, val).is_err() { ++ // Failed to encode value, could truncate here ++ // For this test, we just break ++ break; ++ } ++ encoded_count += 1; ++ let _ = start_len; // silence unused warning ++ } ++ ++ // We should have encoded at least some attributes ++ assert!(encoded_count >= 1); ++ assert!(encoded_count < attrs.len()); // but not all due to truncation ++ ++ // The buffer contains the partial encoding ++ assert!(buf.len() > 0); ++ } + } +diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs +index f4f4056c..d731e401 100644 +--- a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs ++++ b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs +@@ -9,10 +9,15 @@ use crate::{error::Result, otap::OtapArrowRecords}; + use bytes::Bytes; + use otap_df_config::SignalType; + ++pub use common::FixedProtoBuffer; + pub use common::ProtoBuffer; ++pub use common::ProtoWrite; + pub use otap_df_pdata_otlp_macros::Message; // Required for derived code + pub use otap_df_pdata_otlp_macros::qualified; // Required for derived code + ++// Re-export Truncated from error module for convenience ++pub use crate::error::Truncated; ++ + /// Common methods for OTLP/OTAP attributes. + pub mod attributes; + /// Common methods for batching. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index c6e13153b9..b8d5f6d31a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -17,7 +17,14 @@ pub struct LogBatch { /// The log records in this batch. pub records: Vec, /// Number of records dropped in the same period. - pub dropped_count: u64, + pub dropped_count: usize, +} + +impl LogBatch { + /// The total number of dropped if you drop this batch. + pub fn size_with_dropped(&self) -> usize { + self.records.len() + self.dropped_count + } } /// Thread-local log buffer for a pipeline thread. @@ -167,7 +174,7 @@ impl LogsCollector { self.write_batch(batch); } Err(err) => { - crate::raw_error!("log collector error: {err}"); + crate::raw_error!("log collector error:", err = err.to_string()); return Ok(()); } } @@ -238,7 +245,7 @@ where match self.reporter.try_report(batch) { Ok(()) => {} Err(err) => { - crate::raw_error!("failed to send log batch: {}", err); + crate::raw_error!("failed to send log batch", err = err.to_string()); } } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 9d03fd56c7..378eacb89e 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -9,6 +9,7 @@ pub mod encoder; pub mod formatter; +pub mod raw_log; use bytes::Bytes; use encoder::DirectFieldVisitor; @@ -20,6 +21,10 @@ use tracing::{Event, Level, Metadata}; pub use encoder::DirectLogRecordEncoder; pub use formatter::{ConsoleWriter, RawLoggingLayer}; +/// Optional key identifying the producing component. +/// TODO: This is re-exported, instead rename the underlying type. +pub type ProducerKey = crate::registry::MetricsKey; + /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] pub struct LogRecord { @@ -34,6 +39,10 @@ pub struct LogRecord { /// in practice and/or parsed by a crate::proto::opentelemetry::logs::v1::LogRecord /// message object for testing. pub body_attrs_bytes: Bytes, + + /// Optional key identifying the producing component (for first-party logs). + /// None for third-party logs from libraries. + pub producer_key: Option, } /// Saved callsite information. This is information that can easily be @@ -86,7 +95,7 @@ impl SavedCallsite { impl LogRecord { /// Construct a log record, partially encoding its dynamic content. #[must_use] - pub fn new(event: &Event<'_>) -> Self { + pub fn new(event: &Event<'_>, producer_key: Option) -> Self { let metadata = event.metadata(); // Encode body and attributes to bytes. @@ -101,6 +110,7 @@ impl LogRecord { callsite_id: metadata.callsite(), timestamp_ns: Self::get_timestamp_nanos(), body_attrs_bytes: buf.into_bytes(), + producer_key, } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 2d6b4c2b79..3a48aba9b3 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -124,8 +124,18 @@ impl ConsoleWriter { String::from_utf8_lossy(&buf[..len]).into_owned() } + /// Write a LogRecord to stdout or stderr (based on level). + /// + /// ERROR and WARN go to stderr, others go to stdout. + /// This is the same routing logic used by RawLoggingLayer. + pub fn raw_print(&self, record: &LogRecord, callsite: &SavedCallsite) { + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let len = self.write_log_record(&mut buf, record, callsite); + self.write_line(callsite.level(), &buf[..len]); + } + /// Write a LogRecord to a byte buffer. Returns the number of bytes written. - pub fn write_log_record( + pub(crate) fn write_log_record( &self, buf: &mut [u8], record: &LogRecord, @@ -306,7 +316,7 @@ impl ConsoleWriter { } /// Write a log line to stdout or stderr. - fn write_line(&self, level: &Level, data: &[u8]) { + pub(crate) fn write_line(&self, level: &Level, data: &[u8]) { let use_stderr = matches!(*level, Level::ERROR | Level::WARN); let _ = if use_stderr { std::io::stderr().write_all(data) @@ -326,12 +336,11 @@ where // TODO: there are allocations implied here that we would prefer // to avoid, it will be an extensive change in the ProtoBuffer to // stack-allocate this temporary. - let record = LogRecord::new(event); + // RawLoggingLayer is used before the logs infrastructure is set up, + // so no producer_key context is available. + let record = LogRecord::new(event, None); let callsite = SavedCallsite::new(event.metadata()); - - let mut buf = [0u8; LOG_BUFFER_SIZE]; - let len = self.writer.write_log_record(&mut buf, &record, &callsite); - self.writer.write_line(callsite.level(), &buf[..len]); + self.writer.raw_print(&record, &callsite); } // Note! This tracing layer does not implement Span-related features @@ -350,10 +359,10 @@ mod tests { use crate::self_tracing::encoder::level_to_severity_number; use bytes::Bytes; use otap_df_pdata::otlp::ProtoBuffer; + use otap_df_pdata::prost::Message; use otap_df_pdata::proto::opentelemetry::common::v1::any_value::Value; use otap_df_pdata::proto::opentelemetry::common::v1::{AnyValue, KeyValue}; use otap_df_pdata::proto::opentelemetry::logs::v1::LogRecord as ProtoLogRecord; - use prost::Message; use std::sync::{Arc, Mutex}; use tracing_subscriber::prelude::*; @@ -367,7 +376,7 @@ mod tests { S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let record = LogRecord::new(event); + let record = LogRecord::new(event, None); let callsite = SavedCallsite::new(event.metadata()); // Capture formatted output @@ -532,13 +541,12 @@ mod tests { // 2024-01-15T12:30:45.678Z timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::new(), + producer_key: None, }; let writer = ConsoleWriter::no_color(); let output = writer.format_log_record(&record, &test_callsite()); - // Note that the severity text is formatted using the Metadata::Level - // so the text appears, unlike the protobuf case. assert_eq!( output, "2024-01-15T12:30:45.678Z INFO test_module::submodule::test_event (src/test.rs:123): \n" @@ -591,6 +599,7 @@ mod tests { callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::from(encoded), + producer_key: None, }; let mut buf = [0u8; LOG_BUFFER_SIZE]; diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs index 141c653ad7..bc105a43e1 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs @@ -4,9 +4,16 @@ //! Raw logging macros that bypass the tracing subscriber and write to //! the console. A single `raw_error!(...)` API is provided. +#![allow(unused_macros)] + use super::formatter::RawLoggingLayer; use tracing_subscriber::prelude::*; +#[doc(hidden)] +pub mod _private { + pub use tracing::error; +} + /// Create a subscriber that writes directly to console (bypassing channels). fn raw_logging_subscriber() -> impl tracing::Subscriber { tracing_subscriber::registry().with(RawLoggingLayer::new(super::ConsoleWriter::no_color())) @@ -36,9 +43,21 @@ where /// ``` #[macro_export] macro_rules! raw_error { - ($($arg:tt)+) => { + ($name:expr, $(,)?) => { + $crate::self_tracing::raw_log::with_raw_logging(|| { + $crate::_private::error!(name: $name, target: env!("CARGO_PKG_NAME"), name = $name, ""); + }) + }; + ($name:expr, $($key:ident = $value:expr),+ $(,)?) => { $crate::self_tracing::raw_log::with_raw_logging(|| { - ::tracing::error!($($arg)+) + $crate::_private::error!(name: $name, + target: env!("CARGO_PKG_NAME"), + name = $name, + $($key = { + $value + }),+, + "" + ) }) }; } @@ -47,7 +66,7 @@ macro_rules! raw_error { mod tests { #[test] fn test_raw_error() { - raw_error!("test error message"); - raw_error!("test error with arg: {}", 42); + raw_error!("panic.late", msg = "test error message"); + raw_error!("panic.early", msg = "test error with arg", arg = 42); } } From d1b26ad19769ac47f1778a42a1d1629d23dad762 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 16:06:26 -0800 Subject: [PATCH 51/92] nope --- .../telemetry/fixed-proto-design-v1.diff | 624 ------------------ .../telemetry/src/self_tracing/raw_log.rs | 10 +- 2 files changed, 1 insertion(+), 633 deletions(-) delete mode 100644 rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff diff --git a/rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff b/rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff deleted file mode 100644 index 1a7c188e2e..0000000000 --- a/rust/otap-dataflow/crates/telemetry/fixed-proto-design-v1.diff +++ /dev/null @@ -1,624 +0,0 @@ -diff --git a/rust/otap-dataflow/crates/pdata/src/error.rs b/rust/otap-dataflow/crates/pdata/src/error.rs -index 88540081..c30b804a 100644 ---- a/rust/otap-dataflow/crates/pdata/src/error.rs -+++ b/rust/otap-dataflow/crates/pdata/src/error.rs -@@ -15,6 +15,24 @@ use std::num::TryFromIntError; - /// Result type - pub type Result = std::result::Result; - -+/// Error indicating that a fixed-size buffer ran out of space during encoding. -+/// -+/// This error is returned by [`ProtoWrite`] implementations when there is -+/// insufficient capacity to complete an encoding operation. For fixed-size -+/// buffers used in internal instrumentation, callers can catch this error -+/// and use the partially-encoded contents, incrementing a dropped-attributes -+/// counter instead. -+#[derive(Debug, Clone, Copy, PartialEq, Eq)] -+pub struct Truncated; -+ -+impl std::fmt::Display for Truncated { -+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -+ write!(f, "buffer truncated: insufficient capacity for encoding") -+ } -+} -+ -+impl std::error::Error for Truncated {} -+ - /// Errors related to OTAP or OTLP pipeline data - #[derive(thiserror::Error, Debug)] - #[allow(missing_docs)] -@@ -179,4 +197,7 @@ pub enum Error { - - #[error("Format error: {}", error)] - Format { error: String }, -+ -+ #[error("Buffer truncated: insufficient capacity for encoding")] -+ Truncated(#[from] Truncated), - } -diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs -index e3a0859d..f1536aef 100644 ---- a/rust/otap-dataflow/crates/pdata/src/otlp/common.rs -+++ b/rust/otap-dataflow/crates/pdata/src/otlp/common.rs -@@ -5,7 +5,7 @@ use crate::arrays::{ - ByteArrayAccessor, Int64ArrayAccessor, MaybeDictArrayAccessor, NullableArrayAccessor, - StringArrayAccessor, StructColumnAccessor, get_bool_array_opt, get_f64_array_opt, get_u8_array, - }; --use crate::error::{Error, Result}; -+use crate::error::{Error, Result, Truncated}; - use crate::otlp::attributes::{Attribute16Arrays, encode_key_value}; - use crate::proto::consts::field_num::common::{ - INSTRUMENTATION_DROPPED_ATTRIBUTES_COUNT, INSTRUMENTATION_SCOPE_ATTRIBUTES, -@@ -30,6 +30,216 @@ use std::fmt; - use std::fmt::Write; - use std::sync::LazyLock; - -+/// Trait for types that can be used as protobuf encoding buffers. -+/// -+/// This trait abstracts over growable buffers (like [`ProtoBuffer`]) and fixed-size -+/// buffers (like [`FixedProtoBuffer`]), allowing encoding logic to work with both. -+/// -+/// For fixed-size buffers, operations return `Err(Truncated)` when there is -+/// insufficient capacity. Callers can catch this error and use the partially-encoded -+/// contents, for example by incrementing a dropped-attributes counter. -+/// -+/// # Example -+/// -+/// ```ignore -+/// fn encode_attributes(buf: &mut W, attrs: &[KeyValue]) -> Result<(), Truncated> { -+/// for attr in attrs { -+/// buf.encode_string(ATTR_KEY_TAG, &attr.key)?; -+/// buf.encode_string(ATTR_VALUE_TAG, &attr.value)?; -+/// } -+/// Ok(()) -+/// } -+/// ``` -+pub trait ProtoWrite { -+ /// Append bytes to the buffer. -+ /// -+ /// Returns `Err(Truncated)` if there is insufficient capacity (for fixed-size buffers). -+ fn write_bytes(&mut self, bytes: &[u8]) -> std::result::Result<(), Truncated>; -+ -+ /// Current length of encoded data. -+ fn len(&self) -> usize; -+ -+ /// Returns true if the buffer is empty. -+ fn is_empty(&self) -> bool { -+ self.len() == 0 -+ } -+ -+ /// Get a reference to the encoded bytes. -+ fn as_slice(&self) -> &[u8]; -+ -+ /// Get a mutable reference to the buffer (for patching length placeholders). -+ fn as_mut_slice(&mut self) -> &mut [u8]; -+ -+ /// Clear the buffer contents. -+ fn clear(&mut self); -+ -+ /// Current capacity of the buffer. -+ fn capacity(&self) -> usize; -+ -+ /// Remaining capacity available for writing. -+ fn remaining(&self) -> usize { -+ self.capacity().saturating_sub(self.len()) -+ } -+ -+ /// Push a single byte. -+ #[inline] -+ fn write_byte(&mut self, byte: u8) -> std::result::Result<(), Truncated> { -+ self.write_bytes(&[byte]) -+ } -+ -+ /// Encode a varint (variable-length integer). -+ #[inline] -+ fn encode_varint(&mut self, value: u64) -> std::result::Result<(), Truncated> { -+ // Fast path for single byte (very common) -+ if value < 0x80 { -+ return self.write_byte(value as u8); -+ } -+ -+ // Fast path for two bytes (common) -+ if value < 0x4000 { -+ return self.write_bytes(&[((value & 0x7F) | 0x80) as u8, (value >> 7) as u8]); -+ } -+ -+ // General case -+ let mut v = value; -+ while v >= 0x80 { -+ self.write_byte(((v & 0x7F) | 0x80) as u8)?; -+ v >>= 7; -+ } -+ self.write_byte(v as u8) -+ } -+ -+ /// Encode a protobuf field tag (field number + wire type). -+ #[inline] -+ fn encode_field_tag( -+ &mut self, -+ field_number: u64, -+ wire_type: u64, -+ ) -> std::result::Result<(), Truncated> { -+ let key = (field_number << 3) | wire_type; -+ self.encode_varint(key) -+ } -+ -+ /// Encode a signed varint using zig-zag encoding (sint32/sint64). -+ #[inline] -+ fn encode_sint32(&mut self, value: i32) -> std::result::Result<(), Truncated> { -+ self.encode_varint(((value << 1) ^ (value >> 31)) as u64) -+ } -+ -+ /// Encode a length-delimited string field. -+ fn encode_string(&mut self, field_tag: u64, val: &str) -> std::result::Result<(), Truncated> { -+ self.encode_field_tag(field_tag, wire_types::LEN)?; -+ self.encode_varint(val.len() as u64)?; -+ self.write_bytes(val.as_bytes()) -+ } -+ -+ /// Encode a length-delimited bytes field. -+ fn encode_bytes_field( -+ &mut self, -+ field_tag: u64, -+ val: &[u8], -+ ) -> std::result::Result<(), Truncated> { -+ self.encode_field_tag(field_tag, wire_types::LEN)?; -+ self.encode_varint(val.len() as u64)?; -+ self.write_bytes(val) -+ } -+} -+ -+/// A fixed-size, stack-allocatable buffer for protobuf encoding. -+/// -+/// This buffer is designed for internal instrumentation where heap allocation -+/// should be avoided. When the buffer runs out of space, encoding operations -+/// return `Err(Truncated)`, allowing callers to use the partial contents and -+/// track dropped attributes. -+/// -+/// # Example -+/// -+/// ```ignore -+/// let mut buf = FixedProtoBuffer::<1024>::new(); -+/// match encode_log_record(&mut buf, &record) { -+/// Ok(()) => send_proto_bytes(buf.as_slice()), -+/// Err(Truncated) => { -+/// // Use partial contents, increment dropped counter -+/// dropped_count += 1; -+/// send_proto_bytes(buf.as_slice()); -+/// } -+/// } -+/// ``` -+#[derive(Debug)] -+pub struct FixedProtoBuffer { -+ buffer: [u8; N], -+ len: usize, -+} -+ -+impl Default for FixedProtoBuffer { -+ fn default() -> Self { -+ Self::new() -+ } -+} -+ -+impl FixedProtoBuffer { -+ /// Create a new empty fixed-size buffer. -+ #[must_use] -+ pub const fn new() -> Self { -+ Self { -+ buffer: [0u8; N], -+ len: 0, -+ } -+ } -+ -+ /// Returns the maximum capacity of this buffer. -+ #[must_use] -+ pub const fn max_capacity() -> usize { -+ N -+ } -+} -+ -+impl ProtoWrite for FixedProtoBuffer { -+ #[inline] -+ fn write_bytes(&mut self, bytes: &[u8]) -> std::result::Result<(), Truncated> { -+ let new_len = self.len.checked_add(bytes.len()).ok_or(Truncated)?; -+ if new_len > N { -+ return Err(Truncated); -+ } -+ self.buffer[self.len..new_len].copy_from_slice(bytes); -+ self.len = new_len; -+ Ok(()) -+ } -+ -+ #[inline] -+ fn len(&self) -> usize { -+ self.len -+ } -+ -+ fn as_slice(&self) -> &[u8] { -+ &self.buffer[..self.len] -+ } -+ -+ fn as_mut_slice(&mut self) -> &mut [u8] { -+ &mut self.buffer[..self.len] -+ } -+ -+ fn clear(&mut self) { -+ self.len = 0; -+ } -+ -+ fn capacity(&self) -> usize { -+ N -+ } -+} -+ -+impl AsRef<[u8]> for FixedProtoBuffer { -+ fn as_ref(&self) -> &[u8] { -+ self.as_slice() -+ } -+} -+ -+impl AsMut<[u8]> for FixedProtoBuffer { -+ fn as_mut(&mut self) -> &mut [u8] { -+ self.as_mut_slice() -+ } -+} -+ - pub(in crate::otlp) struct ResourceArrays<'a> { - pub id: Option<&'a UInt16Array>, - pub dropped_attributes_count: Option<&'a UInt32Array>, -@@ -451,6 +661,35 @@ impl AsMut<[u8]> for ProtoBuffer { - } - } - -+impl ProtoWrite for ProtoBuffer { -+ #[inline] -+ fn write_bytes(&mut self, bytes: &[u8]) -> std::result::Result<(), Truncated> { -+ self.buffer.extend_from_slice(bytes); -+ Ok(()) -+ } -+ -+ #[inline] -+ fn len(&self) -> usize { -+ self.buffer.len() -+ } -+ -+ fn as_slice(&self) -> &[u8] { -+ &self.buffer -+ } -+ -+ fn as_mut_slice(&mut self) -> &mut [u8] { -+ &mut self.buffer -+ } -+ -+ fn clear(&mut self) { -+ self.buffer.clear(); -+ } -+ -+ fn capacity(&self) -> usize { -+ self.buffer.capacity() -+ } -+} -+ - /// Helper for encoding with unknown length. Usage: - /// ```ignore - /// proto_encode_len_delimited_unknown_size!( -@@ -478,24 +717,52 @@ impl AsMut<[u8]> for ProtoBuffer { - /// TODO: currently we're always allocating 4 byte. This may often be too much but we over-allocate - /// to be safe. Eventually we should maybe allow a size hint here and allocate fewer bytes. - /// -+/// # Note on buffer types -+/// -+/// This macro works with [`ProtoBuffer`] (the growable Vec-based buffer) and uses infallible -+/// operations. For use with [`FixedProtoBuffer`] or other [`ProtoWrite`] implementations -+/// that may return [`Truncated`] errors, use [`proto_encode_len_delimited_try!`] instead. - #[macro_export] - macro_rules! proto_encode_len_delimited_unknown_size { - ($field_tag: expr, $encode_fn:expr, $buf:expr) => {{ - let num_bytes = 4; // placeholder length - $buf.encode_field_tag($field_tag, $crate::proto::consts::wire_types::LEN); - let len_start_pos = $buf.len(); -- $crate::otlp::common::encode_len_placeholder($buf); -+ $crate::otlp::common::encode_len_placeholder_infallible($buf); - $encode_fn; - let len = $buf.len() - len_start_pos - num_bytes; -- $crate::otlp::common::patch_len_placeholder($buf, num_bytes, len, len_start_pos); -+ $crate::otlp::common::patch_len_placeholder_infallible($buf, num_bytes, len, len_start_pos); - }}; - } - --pub(crate) fn encode_len_placeholder(buf: &mut ProtoBuffer) { -+pub(crate) fn encode_len_placeholder( -+ buf: &mut W, -+) -> std::result::Result<(), Truncated> { -+ buf.write_bytes(&[0x80, 0x80, 0x80, 0x00]) -+} -+ -+pub(crate) fn patch_len_placeholder( -+ buf: &mut W, -+ num_bytes: usize, -+ len: usize, -+ len_start_pos: usize, -+) { -+ let slice = buf.as_mut_slice(); -+ for i in 0..num_bytes { -+ slice[len_start_pos + i] += ((len >> (i * 7)) & 0x7f) as u8; -+ } -+} -+ -+/// Infallible version of [`encode_len_placeholder`] for use with [`ProtoBuffer`]. -+/// -+/// This function directly extends the buffer without returning a Result, -+/// for use in the infallible encoding path with growable buffers. -+pub(crate) fn encode_len_placeholder_infallible(buf: &mut ProtoBuffer) { - buf.buffer.extend_from_slice(&[0x80, 0x80, 0x80, 0x00]); - } - --pub(crate) fn patch_len_placeholder( -+/// Infallible version of [`patch_len_placeholder`] for use with [`ProtoBuffer`]. -+pub(crate) fn patch_len_placeholder_infallible( - buf: &mut ProtoBuffer, - num_bytes: usize, - len: usize, -@@ -506,6 +773,41 @@ pub(crate) fn patch_len_placeholder( - } - } - -+/// Fallible helper macro for encoding with unknown length, for use with any [`ProtoWrite`] impl. -+/// -+/// Unlike [`proto_encode_len_delimited_unknown_size!`], this macro propagates truncation errors -+/// and is intended for use with fixed-size buffers or generic code that works with any -+/// [`ProtoWrite`] implementation. -+/// -+/// The enclosing function must return a `Result` type that can convert from [`Truncated`]. -+/// -+/// # Example -+/// -+/// ```ignore -+/// fn encode_message(buf: &mut W) -> Result<(), Truncated> { -+/// proto_encode_len_delimited_try!( -+/// FIELD_TAG, -+/// encode_nested_content(buf)?, -+/// buf -+/// )?; -+/// Ok(()) -+/// } -+/// ``` -+#[macro_export] -+macro_rules! proto_encode_len_delimited_try { -+ ($field_tag: expr, $encode_fn:expr, $buf:expr) => {{ -+ use $crate::otlp::ProtoWrite; -+ let num_bytes = 4; // placeholder length -+ $buf.encode_field_tag($field_tag, $crate::proto::consts::wire_types::LEN)?; -+ let len_start_pos = $buf.len(); -+ $crate::otlp::common::encode_len_placeholder($buf)?; -+ $encode_fn; -+ let len = $buf.len() - len_start_pos - num_bytes; -+ $crate::otlp::common::patch_len_placeholder($buf, num_bytes, len, len_start_pos); -+ Ok::<(), $crate::error::Truncated>(()) -+ }}; -+} -+ - /// Used to iterate over OTAP [`RecordBatch`] in a particular order. - /// - /// There are certain use cases where we want to visit all the rows in some record batch that are -@@ -1132,4 +1434,201 @@ mod test { - fn test_metrics_with_no_metrics() { - assert_empty_batch(metrics_with_no_metrics().into()); - } -+ -+ // -+ // ProtoWrite and FixedProtoBuffer tests -+ // -+ -+ use super::{FixedProtoBuffer, ProtoBuffer, ProtoWrite}; -+ use crate::error::Truncated; -+ -+ #[test] -+ fn test_proto_buffer_implements_proto_write() { -+ let mut buf = ProtoBuffer::new(); -+ -+ // Test basic write operations via the trait (using fully qualified syntax) -+ ProtoWrite::write_bytes(&mut buf, b"hello").unwrap(); -+ assert_eq!(ProtoWrite::len(&buf), 5); -+ assert_eq!(ProtoWrite::as_slice(&buf), b"hello"); -+ -+ // Test varint encoding via trait -+ ProtoWrite::clear(&mut buf); -+ ProtoWrite::encode_varint(&mut buf, 127).unwrap(); // single byte -+ assert_eq!(buf.len(), 1); -+ assert_eq!(ProtoWrite::as_slice(&buf), &[127]); -+ -+ ProtoWrite::clear(&mut buf); -+ ProtoWrite::encode_varint(&mut buf, 128).unwrap(); // two bytes -+ assert_eq!(buf.len(), 2); -+ -+ ProtoWrite::clear(&mut buf); -+ ProtoWrite::encode_varint(&mut buf, 16384).unwrap(); // three bytes -+ assert_eq!(buf.len(), 3); -+ -+ // Test string encoding via trait -+ ProtoWrite::clear(&mut buf); -+ ProtoWrite::encode_string(&mut buf, 1, "test").unwrap(); -+ assert!(buf.len() > 4); // tag + length + "test" -+ } -+ -+ #[test] -+ fn test_proto_buffer_inherent_methods_unchanged() { -+ // Verify that the original infallible API still works -+ let mut buf = ProtoBuffer::new(); -+ -+ // These are the inherent methods (infallible, no Result) -+ buf.encode_varint(127); -+ assert_eq!(buf.len(), 1); -+ -+ buf.clear(); -+ buf.encode_string(1, "test"); -+ assert!(buf.len() > 4); -+ -+ buf.clear(); -+ buf.extend_from_slice(b"direct"); -+ assert_eq!(buf.as_ref(), b"direct"); -+ } -+ -+ #[test] -+ fn test_fixed_proto_buffer_basic() { -+ let mut buf = FixedProtoBuffer::<64>::new(); -+ -+ assert_eq!(buf.len(), 0); -+ assert_eq!(buf.capacity(), 64); -+ assert_eq!(buf.remaining(), 64); -+ -+ // Write some bytes -+ buf.write_bytes(b"hello").unwrap(); -+ assert_eq!(buf.len(), 5); -+ assert_eq!(buf.remaining(), 59); -+ assert_eq!(buf.as_slice(), b"hello"); -+ -+ // Clear and verify -+ buf.clear(); -+ assert_eq!(buf.len(), 0); -+ assert_eq!(buf.remaining(), 64); -+ } -+ -+ #[test] -+ fn test_fixed_proto_buffer_truncation() { -+ let mut buf = FixedProtoBuffer::<10>::new(); -+ -+ // Write should succeed when under capacity -+ assert!(buf.write_bytes(b"12345").is_ok()); -+ assert_eq!(buf.len(), 5); -+ -+ // Write should succeed when exactly at capacity -+ assert!(buf.write_bytes(b"67890").is_ok()); -+ assert_eq!(buf.len(), 10); -+ -+ // Write should fail when over capacity -+ let result = buf.write_bytes(b"x"); -+ assert_eq!(result, Err(Truncated)); -+ -+ // Buffer contents should be unchanged after failed write -+ assert_eq!(buf.len(), 10); -+ assert_eq!(buf.as_slice(), b"1234567890"); -+ } -+ -+ #[test] -+ fn test_fixed_proto_buffer_varint_truncation() { -+ // Create a buffer that can only hold 1 byte -+ let mut buf = FixedProtoBuffer::<1>::new(); -+ -+ // Small varint (1 byte) should succeed -+ assert!(buf.encode_varint(127).is_ok()); -+ assert_eq!(buf.len(), 1); -+ -+ // Clear and try a larger varint that needs 2 bytes -+ buf.clear(); -+ let result = buf.encode_varint(128); -+ assert_eq!(result, Err(Truncated)); -+ -+ // For the 2-byte fast path, write_bytes checks capacity atomically, -+ // so no partial write occurs (the buffer remains empty) -+ assert_eq!(buf.len(), 0); -+ -+ // Test partial write in the general case (3+ byte varints) -+ let mut buf3 = FixedProtoBuffer::<2>::new(); -+ // Value 16384 needs 3 bytes in varint encoding -+ let result = buf3.encode_varint(16384); -+ assert_eq!(result, Err(Truncated)); -+ // The general case writes byte-by-byte, so partial writes can occur -+ assert_eq!(buf3.len(), 2); // two bytes written before failure -+ } -+ -+ #[test] -+ fn test_fixed_proto_buffer_encode_string_truncation() { -+ // Create a small buffer -+ let mut buf = FixedProtoBuffer::<8>::new(); -+ -+ // This should fail because the string + tag + length is > 8 bytes -+ let result = buf.encode_string(1, "hello world"); -+ assert_eq!(result, Err(Truncated)); -+ -+ // Partial contents are in the buffer (useful for truncation recovery) -+ assert!(buf.len() > 0); -+ } -+ -+ #[test] -+ fn test_proto_write_trait_generic_function() { -+ // Demonstrate using a generic function over ProtoWrite -+ fn encode_test_message( -+ buf: &mut W, -+ value: u64, -+ ) -> Result<(), Truncated> { -+ buf.encode_field_tag(1, 0)?; // varint wire type -+ buf.encode_varint(value)?; -+ Ok(()) -+ } -+ -+ // Works with ProtoBuffer (growable) -+ let mut growing_buf = ProtoBuffer::new(); -+ encode_test_message(&mut growing_buf, 12345).unwrap(); -+ assert!(growing_buf.len() > 0); -+ -+ // Works with FixedProtoBuffer -+ let mut fixed_buf = FixedProtoBuffer::<32>::new(); -+ encode_test_message(&mut fixed_buf, 12345).unwrap(); -+ assert_eq!(fixed_buf.as_slice(), growing_buf.as_slice()); -+ -+ // Fixed buffer can fail -+ let mut tiny_buf = FixedProtoBuffer::<2>::new(); -+ let result = encode_test_message(&mut tiny_buf, 12345); -+ assert_eq!(result, Err(Truncated)); -+ } -+ -+ #[test] -+ fn test_fixed_proto_buffer_partial_content_on_truncation() { -+ // This test demonstrates the truncation recovery pattern: -+ // encode as much as possible, then on truncation, use partial content -+ -+ let mut buf = FixedProtoBuffer::<20>::new(); -+ -+ // Simulate encoding multiple attributes -+ let attrs = [("a", "1"), ("b", "2"), ("c", "very long value that won't fit")]; -+ let mut encoded_count = 0; -+ -+ for (key, val) in &attrs { -+ // Try to encode the key-value pair -+ let start_len = buf.len(); -+ if buf.encode_string(1, key).is_err() { -+ break; -+ } -+ if buf.encode_string(2, val).is_err() { -+ // Failed to encode value, could truncate here -+ // For this test, we just break -+ break; -+ } -+ encoded_count += 1; -+ let _ = start_len; // silence unused warning -+ } -+ -+ // We should have encoded at least some attributes -+ assert!(encoded_count >= 1); -+ assert!(encoded_count < attrs.len()); // but not all due to truncation -+ -+ // The buffer contains the partial encoding -+ assert!(buf.len() > 0); -+ } - } -diff --git a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs -index f4f4056c..d731e401 100644 ---- a/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs -+++ b/rust/otap-dataflow/crates/pdata/src/otlp/mod.rs -@@ -9,10 +9,15 @@ use crate::{error::Result, otap::OtapArrowRecords}; - use bytes::Bytes; - use otap_df_config::SignalType; - -+pub use common::FixedProtoBuffer; - pub use common::ProtoBuffer; -+pub use common::ProtoWrite; - pub use otap_df_pdata_otlp_macros::Message; // Required for derived code - pub use otap_df_pdata_otlp_macros::qualified; // Required for derived code - -+// Re-export Truncated from error module for convenience -+pub use crate::error::Truncated; -+ - /// Common methods for OTLP/OTAP attributes. - pub mod attributes; - /// Common methods for batching. diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs index bc105a43e1..8a449b00a5 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs @@ -32,15 +32,7 @@ where /// /// This should be used sparingly, only emergencies! This is a good /// configuration for diagnosing internal other logging facilities, -/// because it is unbuffered and uses a dedicated -/// -/// -/// # Example -/// -/// ```ignore -/// use otap_df_telemetry::raw_error; -/// raw_error!("Connection failed: {}", error); -/// ``` +/// because it is unbuffered and overrides the tracing subscriber. #[macro_export] macro_rules! raw_error { ($name:expr, $(,)?) => { From ecaf6230b688df8f897639252af570900f710b7f Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 16:35:38 -0800 Subject: [PATCH 52/92] bare --- .../src/pipeline/service/telemetry/logs.rs | 244 ++++-------------- .../telemetry/src/opentelemetry_client.rs | 37 +-- 2 files changed, 71 insertions(+), 210 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 5b4b7ce7cd..06d49884a7 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -19,233 +19,103 @@ pub struct LogsConfig { #[serde(default)] pub strategies: LoggingStrategies, - /// How the admin thread handles received log events. - #[serde(default)] - pub output: LogOutputConfig, - /// The list of log processors to configure (for OpenTelemetry SDK output mode). /// Only used when `output.mode` is set to `opentelemetry`. #[serde(default)] pub processors: Vec, } +/// Log level for internal engine logs. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, Default, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum LogLevel { + /// Logging is completely disabled. + Off, + /// Debug level logging. + Debug, + /// Info level logging. + #[default] + Info, + /// Warn level logging. + Warn, + /// Error level logging. + Error, +} + /// Logging strategies for different execution contexts. /// /// Controls how log events are captured and routed to the admin thread. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct LoggingStrategies { - /// Strategy for non-engine threads (admin, metrics aggregator, etc.). - /// These threads don't have an EffectHandler and use the global tracing subscriber. - /// Default: `global` (send to admin channel). + /// Strategy for non-engine threads. #[serde(default = "default_global_strategy")] - pub global: ProducerStrategy, + pub global: ProviderMode, /// Strategy for engine/pipeline threads. - /// These threads have an EffectHandler and use buffered logging. - /// Default: `buffered` (thread-local buffer, batch flush on timer). #[serde(default = "default_engine_strategy")] - pub engine: ProducerStrategy, -} + pub engine: ProviderMode, -impl Default for LoggingStrategies { - fn default() -> Self { - Self { - global: default_global_strategy(), - engine: default_engine_strategy(), - } - } + /// Default for internal telemetry-reporting components. + #[serde(default = "default_internal_strategy")] + pub internal: ProviderMode, } -fn default_global_strategy() -> ProducerStrategy { - ProducerStrategy::Global -} - -fn default_engine_strategy() -> ProducerStrategy { - ProducerStrategy::Buffered -} - -/// Producer strategy: how log events are captured and routed to the admin thread. -/// -/// Used to configure logging behavior for different thread types: -/// - Global subscriber for non-engine threads -/// - Engine threads with EffectHandler -/// - Per-component (future: for ITR downstream to prevent feedback) +/// Logs producer: how log events are captured and routed. #[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] #[serde(rename_all = "lowercase")] -pub enum ProducerStrategy { +pub enum ProviderMode { /// No-op: log events are silently dropped. /// Use for ITR-downstream components to prevent feedback loops. Noop, - /// Global channel: send individual events to the admin collector thread. - /// Non-blocking (drops if channel full). Default for non-engine threads. - Global, - - /// Buffered: accumulate events in thread-local buffer, flush on timer. - /// Default for engine threads. Events are batched before sending to admin. - Buffered, -} - -/// Configuration for how the admin thread outputs received log events. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct LogOutputConfig { - /// The output mode for log events received by the admin thread. - #[serde(default = "default_output_mode")] - pub mode: OutputMode, - - /// Ring buffer capacity for `memory` mode (number of log entries). - /// Also used for the `/logs` HTTP endpoint regardless of mode. - #[serde(default = "default_ring_buffer_capacity")] - pub ring_buffer_capacity: usize, -} - -impl Default for LogOutputConfig { - fn default() -> Self { - Self { - mode: default_output_mode(), - ring_buffer_capacity: default_ring_buffer_capacity(), - } - } -} + /// Regional channel: send individual events to a regional thread. + /// Drop events when full. + Regional, -fn default_output_mode() -> OutputMode { - OutputMode::Raw + /// Use OTel as the first class provider. + OpenTelemetry, } -fn default_ring_buffer_capacity() -> usize { - 1000 -} - -/// Output mode: what the admin thread does with received log events. +/// Output mode: what the recipient does with received log events. #[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum OutputMode { + /// Disable output. + Noop, + /// Raw logging: format and print directly to console (stdout/stderr). /// ERROR/WARN go to stderr, others to stdout. Raw, - /// Memory only: store in ring buffer for `/logs` HTTP endpoint. - /// No console output. Useful for headless/production deployments. - Memory, + /// [Demonstrated]: Deliver to a dedicated telemetry pipeline. + Pipeline, - /// OpenTelemetry SDK: forward to OTel logging SDK with configured processors. - /// Events are sent through the OTel appender bridge for OTLP export. - Opentelemetry, -} + /// [Hypothetical]: Store in a memory ring buffer for `/logs` HTTP endpoint. + Memory, -/// Log level for internal engine logs. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, Default, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum LogLevel { - /// Logging is completely disabled. - Off, - /// Debug level logging. - Debug, - /// Info level logging. - #[default] - Info, - /// Warn level logging. - Warn, - /// Error level logging. - Error, + /// [Hypothetical]: Forward OTLP bytes into the OTel SDK pipeline (requires + /// OTLP-bytes-to-SDK-event). + OpenTelemetry, } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_logs_config_deserialize() { - let yaml_str = r#" - level: "info" - processors: - - batch: - exporter: - console: - "#; - let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); - assert_eq!(config.level, LogLevel::Info); - assert_eq!(config.processors.len(), 1); - } - - #[test] - fn test_log_level_deserialize() { - let yaml_str = r#" - level: "info" - "#; - let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); - assert_eq!(config.level, LogLevel::Info); - } - - #[test] - fn test_logs_config_default_deserialize() -> Result<(), serde_yaml::Error> { - let yaml_str = r#""#; - let config: LogsConfig = serde_yaml::from_str(yaml_str)?; - assert_eq!(config.level, LogLevel::Info); - assert!(config.processors.is_empty()); - Ok(()) - } - - #[test] - fn test_logging_strategies_deserialize() { - let yaml_str = r#" - level: "info" - strategies: - global: global - engine: buffered - "#; - let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); - assert_eq!(config.strategies.global, ProducerStrategy::Global); - assert_eq!(config.strategies.engine, ProducerStrategy::Buffered); - } - - #[test] - fn test_logging_strategies_default() { - let config = LogsConfig::default(); - assert_eq!(config.strategies.global, ProducerStrategy::Global); - assert_eq!(config.strategies.engine, ProducerStrategy::Buffered); - assert_eq!(config.output.mode, OutputMode::Raw); +impl Default for LoggingStrategies { + fn default() -> Self { + Self { + global: default_global_strategy(), + engine: default_engine_strategy(), + internal: default_internal_strategy(), + } } +} - #[test] - fn test_output_modes() { - let yaml_str = r#" - level: "info" - output: - mode: memory - ring_buffer_capacity: 5000 - "#; - let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); - assert_eq!(config.output.mode, OutputMode::Memory); - assert_eq!(config.output.ring_buffer_capacity, 5000); - } +fn default_global_strategy() -> ProviderMode { + ProviderMode::Regional +} - #[test] - fn test_opentelemetry_output() { - let yaml_str = r#" - level: "info" - output: - mode: opentelemetry - processors: - - batch: - exporter: - console: - "#; - let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); - assert_eq!(config.output.mode, OutputMode::Opentelemetry); - assert_eq!(config.processors.len(), 1); - } +fn default_engine_strategy() -> ProviderMode { + ProviderMode::Regional +} - #[test] - fn test_noop_strategy_for_itr() { - let yaml_str = r#" - level: "info" - strategies: - global: noop - engine: noop - "#; - let config: LogsConfig = serde_yaml::from_str(yaml_str).unwrap(); - assert_eq!(config.strategies.global, ProducerStrategy::Noop); - assert_eq!(config.strategies.engine, ProducerStrategy::Noop); - } +fn default_internal_strategy() -> ProviderMode { + ProviderMode::Noop } diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 6c206dc25b..b4d94e46f6 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -10,11 +10,12 @@ use opentelemetry::KeyValue; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider, metrics::SdkMeterProvider}; use otap_df_config::pipeline::service::telemetry::{ AttributeValue, AttributeValueArray, TelemetryConfig, - logs::{LogLevel, ProducerStrategy}, + logs::{LogLevel, ProviderMode}, }; use tracing::level_filters::LevelFilter; -use tracing_subscriber::EnvFilter; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; +use tracing_subscriber::{ + EnvFilter, layer::SubscriberExt, util::SubscriberInitExt, util::TryInitError, +}; use crate::{ error::Error, @@ -96,43 +97,33 @@ impl OpentelemetryClient { let tracing_setup = tracing_subscriber::registry().with(get_env_filter(config.logs.level)); - let logerr = |err| { - use std::io::Write; - let _ = std::io::stderr().write_fmt(format_args!( - "could not install global tracing/logging subscriber: {err}" - )); + let logerr = |err: TryInitError| { + crate::raw_error!("tracing.subscriber.init", error = err.to_string()); }; // Configure the global subscriber based on strategies.global. // Engine threads override this with BufferWriterLayer via with_default(). match config.logs.strategies.global { - ProducerStrategy::Noop => { + ProviderMode::Noop => { // No-op: just install the filter, events are dropped - if let Err(err) = tracing_setup.try_init() { + if let Err(err) = tracing::subscriber::NoSubscriber::new().try_init() { logerr(err); } } - ProducerStrategy::Global => { - // Global channel: send events to admin collector thread + ProviderMode::Regional => { + // Regional channel: send events to the appropriate logs collector thread let channel_layer = DirectChannelLayer::new(logs_reporter); if let Err(err) = tracing_setup.with(channel_layer).try_init() { logerr(err); } } - ProducerStrategy::Buffered => { - // Buffered is only valid for engine threads, treat as global for global subscriber - // This is a misconfiguration, but we handle it gracefully - let channel_layer = DirectChannelLayer::new(logs_reporter); - if let Err(err) = tracing_setup.with(channel_layer).try_init() { - logerr(err); - } + ProviderMode::OpenTelemetry => { + // @@@ TODO!!! } } - // Note: OpenTelemetry SDK forwarding is handled by the LogsCollector on the admin thread, - // not at the global subscriber level. The output.mode config controls that behavior. - - //TODO: Configure traces provider. + // Note: Any span-level detail, typically through a traces provider, has + // to be configured via the try_init() cases above. Ok(Self { _runtime: runtime, From 20a783164e282c5bcecd8c045fa4610c60772f3a Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 17:15:44 -0800 Subject: [PATCH 53/92] wip --- .../src/pipeline/service/telemetry/logs.rs | 73 ++++++++++++------- .../otap-dataflow/crates/telemetry/src/lib.rs | 27 ++++++- .../crates/telemetry/src/logs.rs | 72 ++++++------------ .../telemetry/src/opentelemetry_client.rs | 63 ++-------------- .../crates/telemetry/src/self_tracing.rs | 11 +-- 5 files changed, 100 insertions(+), 146 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 06d49884a7..12b6897bfd 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -9,19 +9,22 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; /// Internal logs configuration. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct LogsConfig { /// The log level for internal engine logs. - #[serde(default)] pub level: LogLevel, /// Logging strategy configuration for different thread contexts. - #[serde(default)] pub strategies: LoggingStrategies, + /// The level at which to consider a second fallback strategy. + pub fallback_level: LogLevel, + + /// Logging strategy configuration for different thread contexts. + pub fallbacks: LoggingStrategies, + /// The list of log processors to configure (for OpenTelemetry SDK output mode). /// Only used when `output.mode` is set to `opentelemetry`. - #[serde(default)] pub processors: Vec, } @@ -48,15 +51,12 @@ pub enum LogLevel { #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct LoggingStrategies { /// Strategy for non-engine threads. - #[serde(default = "default_global_strategy")] pub global: ProviderMode, /// Strategy for engine/pipeline threads. - #[serde(default = "default_engine_strategy")] pub engine: ProviderMode, /// Default for internal telemetry-reporting components. - #[serde(default = "default_internal_strategy")] pub internal: ProviderMode, } @@ -64,16 +64,21 @@ pub struct LoggingStrategies { #[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum ProviderMode { - /// No-op: log events are silently dropped. - /// Use for ITR-downstream components to prevent feedback loops. + /// Log events are silently ignored. Noop, - /// Regional channel: send individual events to a regional thread. - /// Drop events when full. - Regional, + /// Regional delivery: send to a buffered channel. + Buffered, + + /// Regional delivery: send to an unbuffered channel. + Unbuffered, - /// Use OTel as the first class provider. + /// Use OTel-Rust as the provider. OpenTelemetry, + + /// Use synchronous logging. This is harmful for performance + /// can be used for development or as a fallback configuration. + Raw, } /// Output mode: what the recipient does with received log events. @@ -98,24 +103,38 @@ pub enum OutputMode { OpenTelemetry, } -impl Default for LoggingStrategies { - fn default() -> Self { - Self { - global: default_global_strategy(), - engine: default_engine_strategy(), - internal: default_internal_strategy(), - } - } +fn default_level() -> LogLevel { + LogLevel::Off } -fn default_global_strategy() -> ProviderMode { - ProviderMode::Regional +fn default_fallback_level() -> LogLevel { + LogLevel::Error } -fn default_engine_strategy() -> ProviderMode { - ProviderMode::Regional +fn default_strategies() -> LoggingStrategies { + LoggingStrategies { + global: ProviderMode::Buffered, + engine: ProviderMode::Buffered, + internal: ProviderMode::Noop, + } +} + +fn default_fallback_strategies() -> LoggingStrategies { + LoggingStrategies { + global: ProviderMode::Raw, + engine: ProviderMode::Raw, + internal: ProviderMode::Noop, + } } -fn default_internal_strategy() -> ProviderMode { - ProviderMode::Noop +impl Default for LogsConfig { + fn default() -> Self { + Self { + level: default_level(), + strategies: default_strategies(), + fallback_level: default_fallback_level(), + fallbacks: default_fallback_strategies(), + processors: Vec::new(), + } + } } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 3638ad15c0..bad3c00238 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -27,7 +27,10 @@ use std::sync::Arc; use crate::error::Error; use crate::registry::MetricsRegistryHandle; use otap_df_config::pipeline::service::telemetry::TelemetryConfig; +use otap_df_config::pipeline::service::telemetry::logs::LogLevel; use tokio_util::sync::CancellationToken; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::EnvFilter; pub mod attributes; pub mod collector; @@ -63,9 +66,9 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - BufferWriterLayer, DirectChannelLayer, LogsCollector, LogsReporter, ProducerKeyGuard, - current_producer_key, drain_thread_log_buffer, install_thread_log_buffer, - uninstall_thread_log_buffer, with_engine_thread_subscriber, + BufferWriterLayer, LogsCollector, LogsReporter, UnbufferedChannelLayer, + drain_thread_log_buffer, install_thread_log_buffer, uninstall_thread_log_buffer, + with_engine_thread_subscriber, }; // TODO This should be #[cfg(test)], but something is preventing it from working. @@ -157,3 +160,21 @@ impl Default for MetricsSystem { Self::new(&TelemetryConfig::default()) } } + +// If RUST_LOG is set, use it for fine-grained control. +// Otherwise, fall back to the config level with some noisy dependencies silenced. +// Users can override by setting RUST_LOG explicitly. +pub(crate) fn get_env_filter(level: LogLevel) -> EnvFilter { + let level = match level { + LogLevel::Off => LevelFilter::OFF, + LogLevel::Debug => LevelFilter::DEBUG, + LogLevel::Info => LevelFilter::INFO, + LogLevel::Warn => LevelFilter::WARN, + LogLevel::Error => LevelFilter::ERROR, + }; + + EnvFilter::try_from_default_env().unwrap_or_else(|_| { + // Default filter: use config level, but silence known noisy HTTP dependencies + EnvFilter::new(format!("{level},h2=off,hyper=off")) + }) +} diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index b8d5f6d31a..7e2a3ce702 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -4,7 +4,7 @@ //! Internal logs collection for OTAP-Dataflow. use crate::error::Error; -use crate::self_tracing::{ConsoleWriter, LogRecord, ProducerKey, SavedCallsite}; +use crate::self_tracing::{ConsoleWriter, LogRecord, SavedCallsite}; use std::cell::RefCell; use tracing::{Event, Subscriber}; use tracing_subscriber::filter::LevelFilter; @@ -20,6 +20,14 @@ pub struct LogBatch { pub dropped_count: usize, } +/// A payload of two kinds +pub enum LogPayload { + /// A single record. + Singleton(LogRecord), + /// A batch. + Batch(LogBatch), +} + impl LogBatch { /// The total number of dropped if you drop this batch. pub fn size_with_dropped(&self) -> usize { @@ -30,7 +38,6 @@ impl LogBatch { /// Thread-local log buffer for a pipeline thread. pub struct LogBuffer { batch: LogBatch, - active: Option, } impl LogBuffer { @@ -42,7 +49,6 @@ impl LogBuffer { records: Vec::with_capacity(capacity), dropped_count: 0, }, - active: None, } } @@ -69,40 +75,6 @@ thread_local! { static CURRENT_LOG_BUFFER: RefCell> = const { RefCell::new(None) }; } -/// Guard that sets the current producer key for the duration of a scope. -pub struct ProducerKeyGuard { - previous: Option, -} - -impl ProducerKeyGuard { - /// Enter a scope with the given producer key. - #[must_use] - pub fn enter(key: ProducerKey) -> Self { - let previous = CURRENT_LOG_BUFFER - .with(|cell| cell.borrow_mut().as_mut().map(|b| b.active.replace(key))) - .flatten(); - Self { previous } - } -} - -impl Drop for ProducerKeyGuard { - fn drop(&mut self) { - let _ = CURRENT_LOG_BUFFER.with(|cell| { - cell.borrow_mut().as_mut().map(|b| { - b.active = self.previous; - }) - }); - } -} - -/// Get the current producer key -#[must_use] -pub fn current_producer_key() -> Option { - CURRENT_LOG_BUFFER - .with(|cell| cell.borrow().as_ref().map(|b| b.active)) - .flatten() -} - /// Install a log buffer for the current thread. pub fn install_thread_log_buffer(capacity: usize) { CURRENT_LOG_BUFFER.with(|cell| { @@ -129,27 +101,27 @@ pub fn drain_thread_log_buffer() -> Option { /// Reporter for sending log batches through a channel. #[derive(Clone)] pub struct LogsReporter { - sender: flume::Sender, + sender: flume::Sender, } impl LogsReporter { /// Create a new LogsReporter with the given sender. #[must_use] - pub fn new(sender: flume::Sender) -> Self { + pub fn new(sender: flume::Sender) -> Self { Self { sender } } /// Try to send a batch, non-blocking. - pub fn try_report(&self, batch: LogBatch) -> Result<(), Error> { + pub fn try_report(&self, payload: LogPayload) -> Result<(), Error> { self.sender - .try_send(batch) + .try_send(payload) .map_err(|e| Error::LogSendError(e.to_string())) } } /// Collector that receives log batches and writes them to console. pub struct LogsCollector { - receiver: flume::Receiver, + receiver: flume::Receiver, writer: ConsoleWriter, } @@ -204,39 +176,37 @@ where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let producer_key = current_producer_key(); - let record = LogRecord::new(event, producer_key); + let record = LogRecord::new(event); CURRENT_LOG_BUFFER.with(|cell| { if let Some(ref mut buffer) = *cell.borrow_mut() { buffer.push(record); } - // No buffer = programming error on engine thread, silently drop + // TODO: Fallback consideration. }); } } /// A tracing Layer for non-engine threads that sends directly to channel. -pub struct DirectChannelLayer { +pub struct UnbufferedChannelLayer { /// Reporter for sending to the channel. reporter: LogsReporter, } -impl DirectChannelLayer { - /// Create a new DirectChannelLayer with the given reporter. +impl UnbufferedChannelLayer { + /// Create a new unbuffered channel. #[must_use] pub fn new(reporter: LogsReporter) -> Self { Self { reporter } } } -impl TracingLayer for DirectChannelLayer +impl TracingLayer for UnbufferedChannelLayer where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - // Non-engine threads don't have producer_key context - let record = LogRecord::new(event, None); + let record = LogRecord::new(event); let batch = LogBatch { records: vec![record], dropped_count: 0, diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index b4d94e46f6..2b92eff0a9 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -9,18 +9,12 @@ pub mod meter_provider; use opentelemetry::KeyValue; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider, metrics::SdkMeterProvider}; use otap_df_config::pipeline::service::telemetry::{ - AttributeValue, AttributeValueArray, TelemetryConfig, - logs::{LogLevel, ProviderMode}, -}; -use tracing::level_filters::LevelFilter; -use tracing_subscriber::{ - EnvFilter, layer::SubscriberExt, util::SubscriberInitExt, util::TryInitError, + AttributeValue, AttributeValueArray, TelemetryConfig, logs::ProviderMode, }; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::TryInitError}; use crate::{ - error::Error, - logs::{DirectChannelLayer, LogsReporter}, - opentelemetry_client::meter_provider::MeterProvider, + error::Error, logs::LogsReporter, opentelemetry_client::meter_provider::MeterProvider, }; /// Client for the OpenTelemetry SDK. @@ -33,24 +27,6 @@ pub struct OpentelemetryClient { // TODO: Add traces providers. } -// If RUST_LOG is set, use it for fine-grained control. -// Otherwise, fall back to the config level with some noisy dependencies silenced. -// Users can override by setting RUST_LOG explicitly. -fn get_env_filter(level: LogLevel) -> EnvFilter { - let level = match level { - LogLevel::Off => LevelFilter::OFF, - LogLevel::Debug => LevelFilter::DEBUG, - LogLevel::Info => LevelFilter::INFO, - LogLevel::Warn => LevelFilter::WARN, - LogLevel::Error => LevelFilter::ERROR, - }; - - EnvFilter::try_from_default_env().unwrap_or_else(|_| { - // Default filter: use config level, but silence known noisy HTTP dependencies - EnvFilter::new(format!("{level},h2=off,hyper=off")) - }) -} - impl OpentelemetryClient { /// Create a new OpenTelemetry client from the given configuration. /// @@ -63,30 +39,6 @@ impl OpentelemetryClient { /// When `RUST_LOG` is set, it takes precedence and allows filtering by target. /// Example: `RUST_LOG=info,h2=warn,hyper=warn` enables info level but silences /// noisy HTTP/2 and hyper logs. - /// - /// TODO: The engine uses a thread-per-core model - /// and is NUMA aware. - /// The fmt::init() here is truly global, and hence - /// this will be a source of contention. - /// We need to evaluate alternatives: - /// - /// 1. Set up per thread subscriber. - /// ```ignore - /// // start of thread - /// let _guard = tracing::subscriber::set_default(subscriber); - /// // now, with this thread, all tracing calls will go to this subscriber - /// // eliminating contention. - /// // end of thread - /// ``` - /// - /// 2. Use custom subscriber that batches logs in thread-local buffer, and - /// flushes them periodically. - /// - /// The TODO here is to evaluate these options and implement one of them. - /// As of now, this causes contention, and we just need to accept temporarily. - /// - /// TODO: Evaluate also alternatives for the contention caused by the global - /// OpenTelemetry logger provider added as layer. pub fn new(config: &TelemetryConfig, logs_reporter: LogsReporter) -> Result { let sdk_resource = Self::configure_resource(&config.resource); @@ -95,7 +47,8 @@ impl OpentelemetryClient { let (meter_provider, runtime) = MeterProvider::configure(sdk_resource, &config.metrics, runtime)?.into_parts(); - let tracing_setup = tracing_subscriber::registry().with(get_env_filter(config.logs.level)); + let tracing_setup = + tracing_subscriber::registry().with(crate::get_env_filter(config.logs.level)); let logerr = |err: TryInitError| { crate::raw_error!("tracing.subscriber.init", error = err.to_string()); @@ -110,15 +63,15 @@ impl OpentelemetryClient { logerr(err); } } - ProviderMode::Regional => { + ProviderMode::Buffered => { // Regional channel: send events to the appropriate logs collector thread - let channel_layer = DirectChannelLayer::new(logs_reporter); + let channel_layer = BufferedChannelLayer::new(logs_reporter); if let Err(err) = tracing_setup.with(channel_layer).try_init() { logerr(err); } } ProviderMode::OpenTelemetry => { - // @@@ TODO!!! + // @@@ TODO!!! bring this back } } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 378eacb89e..db4ed4f998 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -21,10 +21,6 @@ use tracing::{Event, Level, Metadata}; pub use encoder::DirectLogRecordEncoder; pub use formatter::{ConsoleWriter, RawLoggingLayer}; -/// Optional key identifying the producing component. -/// TODO: This is re-exported, instead rename the underlying type. -pub type ProducerKey = crate::registry::MetricsKey; - /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] pub struct LogRecord { @@ -39,10 +35,6 @@ pub struct LogRecord { /// in practice and/or parsed by a crate::proto::opentelemetry::logs::v1::LogRecord /// message object for testing. pub body_attrs_bytes: Bytes, - - /// Optional key identifying the producing component (for first-party logs). - /// None for third-party logs from libraries. - pub producer_key: Option, } /// Saved callsite information. This is information that can easily be @@ -95,7 +87,7 @@ impl SavedCallsite { impl LogRecord { /// Construct a log record, partially encoding its dynamic content. #[must_use] - pub fn new(event: &Event<'_>, producer_key: Option) -> Self { + pub fn new(event: &Event<'_>) -> Self { let metadata = event.metadata(); // Encode body and attributes to bytes. @@ -110,7 +102,6 @@ impl LogRecord { callsite_id: metadata.callsite(), timestamp_ns: Self::get_timestamp_nanos(), body_attrs_bytes: buf.into_bytes(), - producer_key, } } From ec57fb0c50ce9278561a902dcb9a483f3fcb3a6b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Jan 2026 23:10:06 -0800 Subject: [PATCH 54/92] save --- .../crates/engine/src/pipeline_ctrl.rs | 4 +-- .../crates/telemetry/src/logs.rs | 36 +++++++++++-------- .../telemetry/src/opentelemetry_client.rs | 9 +++-- .../telemetry/src/self_tracing/formatter.rs | 6 ++-- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs index 2aecdc5112..f861b0a477 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs @@ -19,7 +19,7 @@ use otap_df_config::pipeline::TelemetrySettings; use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; -use otap_df_telemetry::logs::{LogsReporter, drain_thread_log_buffer}; +use otap_df_telemetry::logs::{LogsReporter, LogPayload, drain_thread_log_buffer}; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::{otel_error, otel_warn}; use std::cmp::Reverse; @@ -357,7 +357,7 @@ impl PipelineCtrlMsgManager { // Flush internal logs from the thread-local buffer if let Some(batch) = drain_thread_log_buffer() { let count = batch.size_with_dropped(); - if let Err(err) = self.logs_reporter.try_report(batch) { + if let Err(err) = self.logs_reporter.try_report(LogPayload::Batch(batch)) { otel_error!("logs.reporting.fail", error = err.to_string(), dropped = count); } } diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 7e2a3ce702..30e19b4874 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -142,8 +142,8 @@ impl LogsCollector { pub async fn run(self) -> Result<(), Error> { loop { match self.receiver.recv_async().await { - Ok(batch) => { - self.write_batch(batch); + Ok(payload) => { + self.write_batch(payload); } Err(err) => { crate::raw_error!("log collector error:", err = err.to_string()); @@ -154,17 +154,26 @@ impl LogsCollector { } /// Write a batch of log records to console. - fn write_batch(&self, batch: LogBatch) { + fn write_batch(&self, payload: LogPayload) { // TODO: Print dropped count as a formatted warning before the batch - for record in batch.records { - // Identifier.0 is the &'static dyn Callsite - let metadata = record.callsite_id.0.metadata(); - let saved = SavedCallsite::new(metadata); - // Use ConsoleWriter's routing: ERROR/WARN to stderr, others to stdout - self.writer.raw_print(&record, &saved); - // TODO: include producer_key in output when present + match payload { + LogPayload::Singleton(record) => self.write_record(record), + LogPayload::Batch(batch) => { + for record in batch.records { + self.write_record(record); + } + } } } + + /// Write one record. + fn write_record(&self, record: LogRecord) { + // Identifier.0 is the &'static dyn Callsite + let metadata = record.callsite_id.0.metadata(); + let saved = SavedCallsite::new(metadata); + // Use ConsoleWriter's routing: ERROR/WARN to stderr, others to stdout + self.writer.raw_print(&record, &saved); + } } /// A tracing Layer for engine threads that writes to thread-local LogBuffer. @@ -207,12 +216,9 @@ where { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { let record = LogRecord::new(event); - let batch = LogBatch { - records: vec![record], - dropped_count: 0, - }; + let payload = LogPayload::Singleton(record); - match self.reporter.try_report(batch) { + match self.reporter.try_report(payload) { Ok(()) => {} Err(err) => { crate::raw_error!("failed to send log batch", err = err.to_string()); diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 2b92eff0a9..d2838bb09b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -14,7 +14,7 @@ use otap_df_config::pipeline::service::telemetry::{ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::TryInitError}; use crate::{ - error::Error, logs::LogsReporter, opentelemetry_client::meter_provider::MeterProvider, + error::Error, logs::{LogsReporter, UnbufferedChannelLayer}, opentelemetry_client::meter_provider::MeterProvider, }; /// Client for the OpenTelemetry SDK. @@ -63,9 +63,14 @@ impl OpentelemetryClient { logerr(err); } } + ProviderMode::Unbuffered => { + } + ProviderMode::Raw => { + } ProviderMode::Buffered => { // Regional channel: send events to the appropriate logs collector thread - let channel_layer = BufferedChannelLayer::new(logs_reporter); + // @@@ + let channel_layer = UnbufferedChannelLayer::new(logs_reporter); if let Err(err) = tracing_setup.with(channel_layer).try_init() { logerr(err); } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 3a48aba9b3..f656033c08 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -338,7 +338,7 @@ where // stack-allocate this temporary. // RawLoggingLayer is used before the logs infrastructure is set up, // so no producer_key context is available. - let record = LogRecord::new(event, None); + let record = LogRecord::new(event); let callsite = SavedCallsite::new(event.metadata()); self.writer.raw_print(&record, &callsite); } @@ -376,7 +376,7 @@ mod tests { S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let record = LogRecord::new(event, None); + let record = LogRecord::new(event); let callsite = SavedCallsite::new(event.metadata()); // Capture formatted output @@ -541,7 +541,6 @@ mod tests { // 2024-01-15T12:30:45.678Z timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::new(), - producer_key: None, }; let writer = ConsoleWriter::no_color(); @@ -599,7 +598,6 @@ mod tests { callsite_id: tracing::callsite::Identifier(&TEST_CALLSITE), timestamp_ns: 1_705_321_845_678_000_000, body_attrs_bytes: Bytes::from(encoded), - producer_key: None, }; let mut buf = [0u8; LOG_BUFFER_SIZE]; From 33a699abdfce8e340fd9acbe36d96caf420df51d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Sun, 11 Jan 2026 14:52:28 -0800 Subject: [PATCH 55/92] handwork --- .../otap-dataflow/crates/telemetry/src/lib.rs | 5 +- .../crates/telemetry/src/logs.rs | 102 ++++++++++-------- .../telemetry/src/opentelemetry_client.rs | 67 +++++++++--- .../telemetry/src/self_tracing/formatter.rs | 7 ++ 4 files changed, 118 insertions(+), 63 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index bad3c00238..1046226b0c 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -66,9 +66,8 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - BufferWriterLayer, LogsCollector, LogsReporter, UnbufferedChannelLayer, - drain_thread_log_buffer, install_thread_log_buffer, uninstall_thread_log_buffer, - with_engine_thread_subscriber, + BufferedLayer, LogsCollector, LogsReporter, UnbufferedLayer, install_thread_log_buffer, + uninstall_thread_log_buffer, }; // TODO This should be #[cfg(test)], but something is preventing it from working. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 30e19b4874..d7946679ed 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -7,10 +7,13 @@ use crate::error::Error; use crate::self_tracing::{ConsoleWriter, LogRecord, SavedCallsite}; use std::cell::RefCell; use tracing::{Event, Subscriber}; -use tracing_subscriber::filter::LevelFilter; -use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; +//use tracing_subscriber::filter::LevelFilter; +use tracing_subscriber::layer::{ + Context, + Layer as TracingLayer, //, SubscriberExt +}; use tracing_subscriber::registry::LookupSpan; -use tracing_subscriber::{EnvFilter, Registry}; +//use tracing_subscriber::{EnvFilter, Registry}; /// A batch of log records from a pipeline thread. pub struct LogBatch { @@ -89,15 +92,6 @@ pub fn uninstall_thread_log_buffer() { }); } -/// Drain the current thread's log buffer, returning the batch. -pub fn drain_thread_log_buffer() -> Option { - CURRENT_LOG_BUFFER.with(|cell| { - cell.borrow_mut() - .as_mut() - .and_then(|buffer| Some(buffer.drain())) - }) -} - /// Reporter for sending log batches through a channel. #[derive(Clone)] pub struct LogsReporter { @@ -111,7 +105,7 @@ impl LogsReporter { Self { sender } } - /// Try to send a batch, non-blocking. + /// Try to send a payload, non-blocking. pub fn try_report(&self, payload: LogPayload) -> Result<(), Error> { self.sender .try_send(payload) @@ -167,7 +161,7 @@ impl LogsCollector { } /// Write one record. - fn write_record(&self, record: LogRecord) { + fn write_record(&self, record: LogRecord) { // Identifier.0 is the &'static dyn Callsite let metadata = record.callsite_id.0.metadata(); let saved = SavedCallsite::new(metadata); @@ -176,11 +170,31 @@ impl LogsCollector { } } -/// A tracing Layer for engine threads that writes to thread-local LogBuffer. -#[derive(Default)] -pub struct BufferWriterLayer {} +/// A tracing Layer that buffers records in thread-local storage. +pub struct BufferedLayer { + /// Reporter for flushing batches. + reporter: LogsReporter, +} + +impl BufferedLayer { + /// Create a new buffered layer. + #[must_use] + pub fn new(reporter: LogsReporter) -> Self { + Self { reporter } + } + + /// Flush the current thread's log buffer and send via the channel. + pub fn flush(&self) -> Result<(), Error> { + if let Some(batch) = + CURRENT_LOG_BUFFER.with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) + { + let _ = self.reporter.try_report(LogPayload::Batch(batch))?; + } + Ok(()) + } +} -impl TracingLayer for BufferWriterLayer +impl TracingLayer for BufferedLayer where S: Subscriber + for<'a> LookupSpan<'a>, { @@ -196,54 +210,54 @@ where } } -/// A tracing Layer for non-engine threads that sends directly to channel. -pub struct UnbufferedChannelLayer { +/// A tracing Layer that sends each record immediately. +pub struct UnbufferedLayer { /// Reporter for sending to the channel. reporter: LogsReporter, } -impl UnbufferedChannelLayer { - /// Create a new unbuffered channel. +impl UnbufferedLayer { + /// Create a new unbuffered layer. #[must_use] pub fn new(reporter: LogsReporter) -> Self { Self { reporter } } } -impl TracingLayer for UnbufferedChannelLayer +impl TracingLayer for UnbufferedLayer where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { let record = LogRecord::new(event); - let payload = LogPayload::Singleton(record); - match self.reporter.try_report(payload) { + match self.reporter.try_report(LogPayload::Singleton(record)) { Ok(()) => {} Err(err) => { - crate::raw_error!("failed to send log batch", err = err.to_string()); + crate::raw_error!("failed to send log", err = err.to_string()); } } } } -/// Create a subscriber for engine threads that uses BufferWriterLayer. -fn create_engine_thread_subscriber() -> impl Subscriber { - // Use the same filter as the global subscriber (INFO by default, RUST_LOG override) - let filter = EnvFilter::builder() - .with_default_directive(LevelFilter::INFO.into()) - .from_env_lossy(); +// Note: Commented below because not use, not ready, slightly incorrect. - Registry::default() - .with(filter) - .with(BufferWriterLayer::default()) -} +// /// Create a subscriber for engine threads that uses BufferedLayer. +// fn create_engine_thread_subscriber() -> impl Subscriber { +// // Use the same filter as the global subscriber (INFO by default, RUST_LOG override) +// let filter = EnvFilter::builder() +// .with_default_directive(LevelFilter::INFO.into()) +// .from_env_lossy(); +// Registry::default() +// .with(filter) +// .with(BufferedLayer::default()) +// } -/// Run a closure with the engine thread subscriber as the default. -pub fn with_engine_thread_subscriber(f: F) -> R -where - F: FnOnce() -> R, -{ - let subscriber = create_engine_thread_subscriber(); - tracing::subscriber::with_default(subscriber, f) -} +// /// Run a closure with the engine thread subscriber as the default. +// pub fn with_engine_thread_subscriber(f: F) -> R +// where +// F: FnOnce() -> R, +// { +// let subscriber = create_engine_thread_subscriber(); +// tracing::subscriber::with_default(subscriber, f) +// } diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index d2838bb09b..de4658ec49 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -7,6 +7,7 @@ pub mod logger_provider; pub mod meter_provider; use opentelemetry::KeyValue; +use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider, metrics::SdkMeterProvider}; use otap_df_config::pipeline::service::telemetry::{ AttributeValue, AttributeValueArray, TelemetryConfig, logs::ProviderMode, @@ -14,7 +15,15 @@ use otap_df_config::pipeline::service::telemetry::{ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::TryInitError}; use crate::{ - error::Error, logs::{LogsReporter, UnbufferedChannelLayer}, opentelemetry_client::meter_provider::MeterProvider, + error::Error, + logs::{ + BufferedLayer, + //LogsReporter, + UnbufferedLayer, + }, + opentelemetry_client::logger_provider::LoggerProvider, + opentelemetry_client::meter_provider::MeterProvider, + self_tracing::{ConsoleWriter, RawLoggingLayer}, }; /// Client for the OpenTelemetry SDK. @@ -39,7 +48,7 @@ impl OpentelemetryClient { /// When `RUST_LOG` is set, it takes precedence and allows filtering by target. /// Example: `RUST_LOG=info,h2=warn,hyper=warn` enables info level but silences /// noisy HTTP/2 and hyper logs. - pub fn new(config: &TelemetryConfig, logs_reporter: LogsReporter) -> Result { + pub fn new(config: &TelemetryConfig) -> Result { let sdk_resource = Self::configure_resource(&config.resource); let runtime = None; @@ -54,31 +63,55 @@ impl OpentelemetryClient { crate::raw_error!("tracing.subscriber.init", error = err.to_string()); }; + // The OpenTelemetry logging provider. + let mut logger_provider: Option<_> = None; + // Configure the global subscriber based on strategies.global. // Engine threads override this with BufferWriterLayer via with_default(). - match config.logs.strategies.global { + let (logger_provider, runtime) = match config.logs.strategies.global { ProviderMode::Noop => { // No-op: just install the filter, events are dropped if let Err(err) = tracing::subscriber::NoSubscriber::new().try_init() { logerr(err); } - } - ProviderMode::Unbuffered => { + (None, runtime) } ProviderMode::Raw => { + if let Err(err) = tracing_setup + .with(RawLoggingLayer::new(ConsoleWriter::default())) + .try_init() + { + logerr(err); + } + (None, runtime) } ProviderMode::Buffered => { - // Regional channel: send events to the appropriate logs collector thread - // @@@ - let channel_layer = UnbufferedChannelLayer::new(logs_reporter); + let channel_layer = BufferedLayer::new(logs_reporter); + if let Err(err) = tracing_setup.with(channel_layer).try_init() { + logerr(err); + } + (None, runtime) + } + ProviderMode::Unbuffered => { + let channel_layer = UnbufferedLayer::new(logs_reporter); if let Err(err) = tracing_setup.with(channel_layer).try_init() { logerr(err); } + (None, runtime) } ProviderMode::OpenTelemetry => { - // @@@ TODO!!! bring this back + let (logger_provider, runtime) = + LoggerProvider::configure(sdk_resource, &config.logs, runtime)?.into_parts(); + + let sdk_layer = OpenTelemetryTracingBridge::new(&logger_provider); + + if let Err(err) = tracing_setup.with(sdk_layer).try_init() { + logerr(err) + } + + (Some(logger_provider), runtime) } - } + }; // Note: Any span-level detail, typically through a traces provider, has // to be configured via the try_init() cases above. @@ -86,7 +119,7 @@ impl OpentelemetryClient { Ok(Self { _runtime: runtime, meter_provider, - logger_provider: None, + logger_provider, }) } @@ -176,14 +209,15 @@ mod tests { }; use super::*; - use crate::logs::LogsCollector; + //use crate::logs::LogsCollector; use std::{f64::consts::PI, time::Duration}; #[test] fn test_configure_minimal_opentelemetry_client() -> Result<(), Error> { let config = TelemetryConfig::default(); - let (_collector, reporter) = LogsCollector::new(10); - let client = OpentelemetryClient::new(&config, reporter)?; + //let (_collector, reporter) = LogsCollector::new(10); + // , reporter + let client = OpentelemetryClient::new(&config)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); @@ -217,8 +251,9 @@ mod tests { logs: LogsConfig::default(), resource, }; - let (_collector, reporter) = LogsCollector::new(10); - let client = OpentelemetryClient::new(&config, reporter)?; + //, reporter + //let (_collector, reporter) = LogsCollector::new(10); + let client = OpentelemetryClient::new(&config)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index f656033c08..6528b089f0 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -97,6 +97,13 @@ impl RawLoggingLayer { /// Uses `std::io::Cursor` for position tracking with `std::io::Write`. pub type BufWriter<'a> = Cursor<&'a mut [u8]>; +impl Default for ConsoleWriter { + /// Uses the standard NO_COLOR environment variable to disable color. + fn default() -> Self { + Self::no_color() + } +} + impl ConsoleWriter { /// Create a writer that outputs to stdout without ANSI colors. #[must_use] From 7de5313a1b23d955c2b916115f22f3c900d455e9 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Sun, 11 Jan 2026 14:58:21 -0800 Subject: [PATCH 56/92] scoped thread buffer --- .../otap-dataflow/crates/telemetry/src/lib.rs | 5 +--- .../crates/telemetry/src/logs.rs | 26 +++++++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 1046226b0c..65577e06db 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -65,10 +65,7 @@ pub use tracing::trace_span as otel_trace_span; pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. -pub use logs::{ - BufferedLayer, LogsCollector, LogsReporter, UnbufferedLayer, install_thread_log_buffer, - uninstall_thread_log_buffer, -}; +pub use logs::{BufferedLayer, LogsCollector, LogsReporter, UnbufferedLayer}; // TODO This should be #[cfg(test)], but something is preventing it from working. // The #[cfg(test)]-labeled otap_batch_processor::test_helpers::from_config diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index d7946679ed..356f2c8c00 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -78,18 +78,28 @@ thread_local! { static CURRENT_LOG_BUFFER: RefCell> = const { RefCell::new(None) }; } -/// Install a log buffer for the current thread. -pub fn install_thread_log_buffer(capacity: usize) { +/// Run a closure with a thread-local log buffer installed. +/// +/// The buffer is automatically uninstalled when the closure returns (or panics). +pub fn with_thread_log_buffer(capacity: usize, f: F) -> R +where + F: FnOnce() -> R, +{ CURRENT_LOG_BUFFER.with(|cell| { *cell.borrow_mut() = Some(LogBuffer::new(capacity)); }); -} -/// Uninstall the log buffer for the current thread. -pub fn uninstall_thread_log_buffer() { - CURRENT_LOG_BUFFER.with(|cell| { - *cell.borrow_mut() = None; - }); + struct Guard; + impl Drop for Guard { + fn drop(&mut self) { + CURRENT_LOG_BUFFER.with(|cell| { + *cell.borrow_mut() = None; + }); + } + } + let _guard = Guard; + + f() } /// Reporter for sending log batches through a channel. From cca289b0018153ce88941f3546da2d57ef58754d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Sun, 11 Jan 2026 15:04:17 -0800 Subject: [PATCH 57/92] add details --- rust/otap-dataflow/crates/telemetry/src/error.rs | 9 +++++++-- rust/otap-dataflow/crates/telemetry/src/logs.rs | 12 +++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/error.rs b/rust/otap-dataflow/crates/telemetry/src/error.rs index 8d37550881..949aeeba47 100644 --- a/rust/otap-dataflow/crates/telemetry/src/error.rs +++ b/rust/otap-dataflow/crates/telemetry/src/error.rs @@ -33,6 +33,11 @@ pub enum Error { ConfigurationError(String), /// Error during logs send. - #[error("Log send error: {0}")] - LogSendError(String), + #[error("Log send error, dropped: {dropped}: {message}")] + LogSendError { + /// Number dropped. + dropped: usize, + /// Reason. + message: String, + }, } diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 356f2c8c00..d31d8a8a7a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -31,10 +31,13 @@ pub enum LogPayload { Batch(LogBatch), } -impl LogBatch { +impl LogPayload { /// The total number of dropped if you drop this batch. pub fn size_with_dropped(&self) -> usize { - self.records.len() + self.dropped_count + match self { + Self::Singleton(_) => 1, + Self::Batch(batch) => batch.records.len() + batch.dropped_count, + } } } @@ -119,7 +122,10 @@ impl LogsReporter { pub fn try_report(&self, payload: LogPayload) -> Result<(), Error> { self.sender .try_send(payload) - .map_err(|e| Error::LogSendError(e.to_string())) + .map_err(|e| Error::LogSendError { + message: e.to_string(), + dropped: e.into_inner().size_with_dropped(), + }) } } From 9a760ebbaccf4461e1cb117b96a86305409663c9 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Sun, 11 Jan 2026 16:04:41 -0800 Subject: [PATCH 58/92] engine config --- .../src/pipeline/service/telemetry/logs.rs | 33 +--- .../crates/controller/src/lib.rs | 64 +++++--- .../crates/engine/src/pipeline_ctrl.rs | 31 ++-- .../crates/engine/src/runtime_pipeline.rs | 6 +- .../otap-dataflow/crates/telemetry/src/lib.rs | 5 +- .../crates/telemetry/src/logs.rs | 153 ++++++++++++++---- .../telemetry/src/opentelemetry_client.rs | 45 +++--- 7 files changed, 205 insertions(+), 132 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 12b6897bfd..9b05578cd9 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -17,12 +17,6 @@ pub struct LogsConfig { /// Logging strategy configuration for different thread contexts. pub strategies: LoggingStrategies, - /// The level at which to consider a second fallback strategy. - pub fallback_level: LogLevel, - - /// Logging strategy configuration for different thread contexts. - pub fallbacks: LoggingStrategies, - /// The list of log processors to configure (for OpenTelemetry SDK output mode). /// Only used when `output.mode` is set to `opentelemetry`. pub processors: Vec, @@ -55,9 +49,6 @@ pub struct LoggingStrategies { /// Strategy for engine/pipeline threads. pub engine: ProviderMode, - - /// Default for internal telemetry-reporting components. - pub internal: ProviderMode, } /// Logs producer: how log events are captured and routed. @@ -67,17 +58,16 @@ pub enum ProviderMode { /// Log events are silently ignored. Noop, - /// Regional delivery: send to a buffered channel. + /// Place into a thread-local buffer. Buffered, - /// Regional delivery: send to an unbuffered channel. + /// Non-blocking, immediate delivery. Unbuffered, /// Use OTel-Rust as the provider. OpenTelemetry, - /// Use synchronous logging. This is harmful for performance - /// can be used for development or as a fallback configuration. + /// Use synchronous logging. Raw, } @@ -107,23 +97,10 @@ fn default_level() -> LogLevel { LogLevel::Off } -fn default_fallback_level() -> LogLevel { - LogLevel::Error -} - fn default_strategies() -> LoggingStrategies { LoggingStrategies { - global: ProviderMode::Buffered, + global: ProviderMode::Unbuffered, engine: ProviderMode::Buffered, - internal: ProviderMode::Noop, - } -} - -fn default_fallback_strategies() -> LoggingStrategies { - LoggingStrategies { - global: ProviderMode::Raw, - engine: ProviderMode::Raw, - internal: ProviderMode::Noop, } } @@ -132,8 +109,6 @@ impl Default for LogsConfig { Self { level: default_level(), strategies: default_strategies(), - fallback_level: default_fallback_level(), - fallbacks: default_fallback_strategies(), processors: Vec::new(), } } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index cd175f4a3e..d7b0bdf74f 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -21,6 +21,7 @@ use crate::error::Error; use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; +use otap_df_config::pipeline::service::telemetry::logs::ProviderMode; use otap_df_config::{ PipelineGroupId, PipelineId, pipeline::PipelineConfig, @@ -36,7 +37,7 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; -use otap_df_telemetry::logs::{LogsCollector, LogsReporter, install_thread_log_buffer, uninstall_thread_log_buffer, with_engine_thread_subscriber}; +use otap_df_telemetry::logs::{EngineLogsSetup, LogsCollector}; use otap_df_telemetry::opentelemetry_client::OpentelemetryClient; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::{MetricsSystem, otel_info, otel_info_span, otel_warn}; @@ -85,11 +86,8 @@ impl Controller { pipeline_ctrl_msg_channel_size = settings.default_pipeline_ctrl_msg_channel_size ); - // Create the logs collection channel before OpentelemetryClient so it can - // install the DirectChannelLayer for global subscriber. - let (logs_collector, logs_reporter) = LogsCollector::new( - telemetry_config.reporting_channel_size, - ); + let (logs_collector, logs_reporter) = + LogsCollector::new(telemetry_config.reporting_channel_size); // Start the logs collector thread // TODO: Store handle for graceful shutdown let _logs_collector_handle = @@ -97,7 +95,8 @@ impl Controller { logs_collector.run() })?; - let opentelemetry_client = OpentelemetryClient::new(telemetry_config, logs_reporter.clone())?; + let opentelemetry_client = + OpentelemetryClient::new(telemetry_config, Some(logs_reporter.clone()))?; let metrics_system = MetricsSystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -129,6 +128,28 @@ impl Controller { obs_state_store.run(cancellation_token) })?; + // Create engine logs setup based on strategy configuration + let engine_logs_setup = match telemetry_config.logs.strategies.engine { + ProviderMode::Noop => EngineLogsSetup::Noop, + ProviderMode::Raw => EngineLogsSetup::Raw, + ProviderMode::Buffered => EngineLogsSetup::Buffered { + reporter: logs_reporter.clone(), + capacity: 1024, // TODO: make configurable + }, + ProviderMode::Unbuffered => EngineLogsSetup::Unbuffered { + reporter: logs_reporter.clone(), + }, + ProviderMode::OpenTelemetry => { + // OpenTelemetry mode for engine is not yet supported + // Fall back to buffered for now + EngineLogsSetup::Buffered { + reporter: logs_reporter.clone(), + capacity: 1024, + } + } + }; + let log_level = telemetry_config.logs.level; + // Start one thread per requested core // Get available CPU cores for pinning let requested_cores = Self::select_cores_for_quota( @@ -162,7 +183,7 @@ impl Controller { thread_id, ); let metrics_reporter = metrics_reporter.clone(); - let logs_reporter = logs_reporter.clone(); + let engine_logs_setup = engine_logs_setup.clone(); let thread_name = format!("pipeline-core-{}", core_id.id); let obs_evt_reporter = obs_evt_reporter.clone(); @@ -177,7 +198,8 @@ impl Controller { pipeline_handle, obs_evt_reporter, metrics_reporter, - logs_reporter, + engine_logs_setup, + log_level, pipeline_ctrl_msg_tx, pipeline_ctrl_msg_rx, ) @@ -395,17 +417,14 @@ impl Controller { pipeline_context: PipelineContext, obs_evt_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, - logs_reporter: LogsReporter, + engine_logs_setup: EngineLogsSetup, + log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { - // Run the entire pipeline thread with the engine-specific tracing subscriber. - // This ensures all logs go to the thread-local buffer instead of the global channel. - with_engine_thread_subscriber(|| { - // Install thread-local log buffer for this pipeline thread - // Buffer capacity: 1024 entries (TODO: make configurable) - install_thread_log_buffer(1024); - + // Run with the engine-appropriate tracing subscriber. + // The closure receives a LogsFlusher for buffered mode. + engine_logs_setup.with_engine_subscriber(log_level, |logs_flusher| { // Create a tracing span for this pipeline thread // so that all logs within this scope include pipeline context. let span = otel_info_span!("pipeline_thread", core.id = core_id.id); @@ -439,24 +458,19 @@ impl Controller { )); // Start the pipeline (this will use the current thread's Tokio runtime) - let result = runtime_pipeline + runtime_pipeline .run_forever( pipeline_key, pipeline_context, obs_evt_reporter, metrics_reporter, - logs_reporter, + logs_flusher, pipeline_ctrl_msg_tx, pipeline_ctrl_msg_rx, ) .map_err(|e| Error::PipelineRuntimeError { source: Box::new(e), - }); - - // Cleanup: uninstall thread-local log buffer - uninstall_thread_log_buffer(); - - result + }) }) } } diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs index f861b0a477..e5b4e0e8f6 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs @@ -19,7 +19,7 @@ use otap_df_config::pipeline::TelemetrySettings; use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; -use otap_df_telemetry::logs::{LogsReporter, LogPayload, drain_thread_log_buffer}; +use otap_df_telemetry::logs::LogsFlusher; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::{otel_error, otel_warn}; use std::cmp::Reverse; @@ -183,8 +183,8 @@ pub struct PipelineCtrlMsgManager { event_reporter: ObservedEventReporter, /// Global metrics reporter. metrics_reporter: MetricsReporter, - /// Global logs reporter for internal log collection. - logs_reporter: LogsReporter, + /// Logs flusher for periodic flush of internal log buffers. + logs_flusher: LogsFlusher, /// Channel metrics handles for periodic reporting. channel_metrics: Vec, @@ -202,7 +202,7 @@ impl PipelineCtrlMsgManager { control_senders: ControlSenders, event_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, - logs_reporter: LogsReporter, + logs_flusher: LogsFlusher, internal_telemetry: TelemetrySettings, channel_metrics: Vec, ) -> Self { @@ -216,7 +216,7 @@ impl PipelineCtrlMsgManager { delayed_data: BinaryHeap::new(), event_reporter, metrics_reporter, - logs_reporter, + logs_flusher, channel_metrics, telemetry: internal_telemetry, } @@ -355,11 +355,8 @@ impl PipelineCtrlMsgManager { } // Flush internal logs from the thread-local buffer - if let Some(batch) = drain_thread_log_buffer() { - let count = batch.size_with_dropped(); - if let Err(err) = self.logs_reporter.try_report(LogPayload::Batch(batch)) { - otel_error!("logs.reporting.fail", error = err.to_string(), dropped = count); - } + if let Err(err) = self.logs_flusher.flush() { + otel_error!("logs.flush.fail", error = err.to_string()); } // Deliver all accumulated control messages (best-effort) @@ -436,7 +433,7 @@ mod tests { use otap_df_config::pipeline::PipelineSettings; use otap_df_config::{PipelineGroupId, PipelineId}; use otap_df_state::store::ObservedStateStore; - use otap_df_telemetry::logs::LogsCollector; + use otap_df_telemetry::logs::LogsFlusher; use std::collections::HashMap; use std::time::{Duration, Instant}; use tokio::task::LocalSet; @@ -489,8 +486,8 @@ mod tests { thread_id, ); - // Create a LogsReporter for testing (collector is dropped, that's ok for tests) - let (_collector, logs_reporter) = LogsCollector::new(10); + // Create a no-op LogsFlusher for testing + let logs_flusher = LogsFlusher::Noop; let manager = PipelineCtrlMsgManager::new( DeployedPipelineKey { @@ -503,7 +500,7 @@ mod tests { control_senders, observed_state_store.reporter(), metrics_reporter, - logs_reporter, + logs_flusher, pipeline_settings.telemetry.clone(), Vec::new(), ); @@ -910,8 +907,8 @@ mod tests { thread_id, ); - // Create a LogsReporter for testing (collector is dropped, that's ok for tests) - let (_collector, logs_reporter) = LogsCollector::new(10); + // Create a no-op LogsFlusher for testing + let logs_flusher = LogsFlusher::Noop; // Create manager with empty control_senders map (no registered nodes) let manager = PipelineCtrlMsgManager::<()>::new( @@ -921,7 +918,7 @@ mod tests { ControlSenders::new(), observed_state_store.reporter(), metrics_reporter, - logs_reporter, + logs_flusher, TelemetrySettings::default(), Vec::new(), ); diff --git a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs index 69c0e6203c..9f572150c3 100644 --- a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs +++ b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs @@ -14,7 +14,7 @@ use crate::terminal_state::TerminalState; use crate::{exporter::ExporterWrapper, processor::ProcessorWrapper, receiver::ReceiverWrapper}; use otap_df_config::pipeline::PipelineConfig; use otap_df_telemetry::reporter::MetricsReporter; -use otap_df_telemetry::logs::LogsReporter; +use otap_df_telemetry::logs::LogsFlusher; use crate::context::PipelineContext; use otap_df_state::DeployedPipelineKey; @@ -107,7 +107,7 @@ impl RuntimePipeline { pipeline_context: PipelineContext, event_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, - logs_reporter: LogsReporter, + logs_flusher: LogsFlusher, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { @@ -195,7 +195,7 @@ impl RuntimePipeline { control_senders, event_reporter, metrics_reporter, - logs_reporter, + logs_flusher, internal_telemetry, channel_metrics, ); diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 65577e06db..faa9fbde1e 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -65,7 +65,10 @@ pub use tracing::trace_span as otel_trace_span; pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. -pub use logs::{BufferedLayer, LogsCollector, LogsReporter, UnbufferedLayer}; +pub use logs::{ + EngineLogsSetup, LogsCollector, LogsFlusher, LogsReporter, ThreadBufferedLayer, + UnbufferedLayer, +}; // TODO This should be #[cfg(test)], but something is preventing it from working. // The #[cfg(test)]-labeled otap_batch_processor::test_helpers::from_config diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index d31d8a8a7a..b7636e3ab5 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -4,16 +4,12 @@ //! Internal logs collection for OTAP-Dataflow. use crate::error::Error; -use crate::self_tracing::{ConsoleWriter, LogRecord, SavedCallsite}; +use crate::self_tracing::{ConsoleWriter, LogRecord, RawLoggingLayer, SavedCallsite}; use std::cell::RefCell; use tracing::{Event, Subscriber}; -//use tracing_subscriber::filter::LevelFilter; -use tracing_subscriber::layer::{ - Context, - Layer as TracingLayer, //, SubscriberExt -}; +use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; use tracing_subscriber::registry::LookupSpan; -//use tracing_subscriber::{EnvFilter, Registry}; +use tracing_subscriber::Registry; /// A batch of log records from a pipeline thread. pub struct LogBatch { @@ -23,6 +19,13 @@ pub struct LogBatch { pub dropped_count: usize, } +impl LogBatch { + /// The total size including dropped records. + pub fn size_with_dropped(&self) -> usize { + self.records.len() + self.dropped_count + } +} + /// A payload of two kinds pub enum LogPayload { /// A single record. @@ -32,11 +35,11 @@ pub enum LogPayload { } impl LogPayload { - /// The total number of dropped if you drop this batch. + /// The total number of records (including dropped) in this payload. pub fn size_with_dropped(&self) -> usize { match self { Self::Singleton(_) => 1, - Self::Batch(batch) => batch.records.len() + batch.dropped_count, + Self::Batch(batch) => batch.size_with_dropped(), } } } @@ -187,13 +190,15 @@ impl LogsCollector { } /// A tracing Layer that buffers records in thread-local storage. -pub struct BufferedLayer { +/// +/// For engine threads that control their own flush timing. +pub struct ThreadBufferedLayer { /// Reporter for flushing batches. reporter: LogsReporter, } -impl BufferedLayer { - /// Create a new buffered layer. +impl ThreadBufferedLayer { + /// Create a new thread-buffered layer. #[must_use] pub fn new(reporter: LogsReporter) -> Self { Self { reporter } @@ -210,7 +215,7 @@ impl BufferedLayer { } } -impl TracingLayer for BufferedLayer +impl TracingLayer for ThreadBufferedLayer where S: Subscriber + for<'a> LookupSpan<'a>, { @@ -256,24 +261,104 @@ where } } -// Note: Commented below because not use, not ready, slightly incorrect. - -// /// Create a subscriber for engine threads that uses BufferedLayer. -// fn create_engine_thread_subscriber() -> impl Subscriber { -// // Use the same filter as the global subscriber (INFO by default, RUST_LOG override) -// let filter = EnvFilter::builder() -// .with_default_directive(LevelFilter::INFO.into()) -// .from_env_lossy(); -// Registry::default() -// .with(filter) -// .with(BufferedLayer::default()) -// } - -// /// Run a closure with the engine thread subscriber as the default. -// pub fn with_engine_thread_subscriber(f: F) -> R -// where -// F: FnOnce() -> R, -// { -// let subscriber = create_engine_thread_subscriber(); -// tracing::subscriber::with_default(subscriber, f) -// } +/// Engine logging configuration, carrying the data needed for each mode. +/// +/// This enum is constructed based on `config.logs.strategies.engine` and passed +/// to each engine thread. The engine thread uses `with_engine_subscriber()` to +/// run its work with the appropriate logging layer. +#[derive(Clone)] +pub enum EngineLogsSetup { + /// Logs are silently dropped. + Noop, + /// Synchronous raw logging to console. + Raw, + /// Buffered: accumulates in thread-local buffer, flushed periodically. + Buffered { + /// Reporter to send batches through. + reporter: LogsReporter, + /// Buffer capacity per thread. + capacity: usize, + }, + /// Unbuffered: each log is sent immediately. + Unbuffered { + /// Reporter to send singletons through. + reporter: LogsReporter, + }, +} + +/// Handle for flushing buffered logs from the engine thread. +/// +/// For non-buffered modes, flush is a no-op. +#[derive(Clone)] +pub enum LogsFlusher { + /// No-op flusher for modes that don't buffer. + Noop, + /// Flusher that drains the thread-local buffer and sends via the reporter. + Buffered(LogsReporter), +} + +impl LogsFlusher { + /// Flush any buffered logs. + /// + /// For `Noop`, this does nothing. + /// For `Buffered`, this drains the thread-local buffer and sends as a batch. + pub fn flush(&self) -> Result<(), Error> { + match self { + LogsFlusher::Noop => Ok(()), + LogsFlusher::Buffered(reporter) => { + if let Some(batch) = CURRENT_LOG_BUFFER + .with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) + { + reporter.try_report(LogPayload::Batch(batch))?; + } + Ok(()) + } + } + } +} + +impl EngineLogsSetup { + /// Run a closure with the engine-appropriate tracing subscriber. + /// + /// Returns a `LogsFlusher` that can be used to periodically flush buffered logs. + /// For non-buffered modes, the flusher is a no-op. + pub fn with_engine_subscriber( + &self, + log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, + f: F, + ) -> R + where + F: FnOnce(LogsFlusher) -> R, + { + let filter = crate::get_env_filter(log_level); + + match self { + EngineLogsSetup::Noop => { + // Use NoSubscriber - events are dropped + let subscriber = tracing::subscriber::NoSubscriber::new(); + tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) + } + EngineLogsSetup::Raw => { + let subscriber = Registry::default() + .with(filter) + .with(RawLoggingLayer::new(ConsoleWriter::default())); + tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) + } + EngineLogsSetup::Buffered { reporter, capacity } => { + let layer = ThreadBufferedLayer::new(reporter.clone()); + let subscriber = Registry::default().with(filter).with(layer); + let flusher = LogsFlusher::Buffered(reporter.clone()); + + // Install the thread-local buffer + with_thread_log_buffer(*capacity, || { + tracing::subscriber::with_default(subscriber, || f(flusher)) + }) + } + EngineLogsSetup::Unbuffered { reporter } => { + let layer = UnbufferedLayer::new(reporter.clone()); + let subscriber = Registry::default().with(filter).with(layer); + tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) + } + } + } +} diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index de4658ec49..403ce1d097 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -16,11 +16,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::Tr use crate::{ error::Error, - logs::{ - BufferedLayer, - //LogsReporter, - UnbufferedLayer, - }, + logs::{LogsReporter, UnbufferedLayer}, opentelemetry_client::logger_provider::LoggerProvider, opentelemetry_client::meter_provider::MeterProvider, self_tracing::{ConsoleWriter, RawLoggingLayer}, @@ -48,13 +44,20 @@ impl OpentelemetryClient { /// When `RUST_LOG` is set, it takes precedence and allows filtering by target. /// Example: `RUST_LOG=info,h2=warn,hyper=warn` enables info level but silences /// noisy HTTP/2 and hyper logs. - pub fn new(config: &TelemetryConfig) -> Result { + /// + /// The `logs_reporter` parameter is required when `strategies.global` is set to + /// `Unbuffered`. It should be created via `LogsCollector::new()` and the collector + /// should be run on a dedicated thread. + pub fn new( + config: &TelemetryConfig, + logs_reporter: Option, + ) -> Result { let sdk_resource = Self::configure_resource(&config.resource); let runtime = None; let (meter_provider, runtime) = - MeterProvider::configure(sdk_resource, &config.metrics, runtime)?.into_parts(); + MeterProvider::configure(sdk_resource.clone(), &config.metrics, runtime)?.into_parts(); let tracing_setup = tracing_subscriber::registry().with(crate::get_env_filter(config.logs.level)); @@ -63,9 +66,6 @@ impl OpentelemetryClient { crate::raw_error!("tracing.subscriber.init", error = err.to_string()); }; - // The OpenTelemetry logging provider. - let mut logger_provider: Option<_> = None; - // Configure the global subscriber based on strategies.global. // Engine threads override this with BufferWriterLayer via with_default(). let (logger_provider, runtime) = match config.logs.strategies.global { @@ -86,14 +86,15 @@ impl OpentelemetryClient { (None, runtime) } ProviderMode::Buffered => { - let channel_layer = BufferedLayer::new(logs_reporter); - if let Err(err) = tracing_setup.with(channel_layer).try_init() { - logerr(err); - } - (None, runtime) + return Err(Error::ConfigurationError( + "global buffered logging not supported".into(), + )); } ProviderMode::Unbuffered => { - let channel_layer = UnbufferedLayer::new(logs_reporter); + let reporter = logs_reporter.ok_or_else(|| { + Error::ConfigurationError("Unbuffered logging requires a LogsReporter".into()) + })?; + let channel_layer = UnbufferedLayer::new(reporter); if let Err(err) = tracing_setup.with(channel_layer).try_init() { logerr(err); } @@ -209,15 +210,14 @@ mod tests { }; use super::*; - //use crate::logs::LogsCollector; + use crate::logs::LogsCollector; use std::{f64::consts::PI, time::Duration}; #[test] fn test_configure_minimal_opentelemetry_client() -> Result<(), Error> { let config = TelemetryConfig::default(); - //let (_collector, reporter) = LogsCollector::new(10); - // , reporter - let client = OpentelemetryClient::new(&config)?; + let (_collector, reporter) = LogsCollector::new(10); + let client = OpentelemetryClient::new(&config, Some(reporter))?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); @@ -251,9 +251,8 @@ mod tests { logs: LogsConfig::default(), resource, }; - //, reporter - //let (_collector, reporter) = LogsCollector::new(10); - let client = OpentelemetryClient::new(&config)?; + let (_collector, reporter) = LogsCollector::new(10); + let client = OpentelemetryClient::new(&config, Some(reporter))?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); From bcfe275dca75abc4380933da99bfbc0cbd6ee385 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Sun, 11 Jan 2026 16:13:42 -0800 Subject: [PATCH 59/92] output mode --- .../src/pipeline/service/telemetry/logs.rs | 50 +++++++++++++++---- .../crates/controller/src/error.rs | 7 +++ .../crates/controller/src/lib.rs | 49 ++++++++++++------ 3 files changed, 82 insertions(+), 24 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 9b05578cd9..1c9ff96af7 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -17,6 +17,10 @@ pub struct LogsConfig { /// Logging strategy configuration for different thread contexts. pub strategies: LoggingStrategies, + /// What to do with collected log events. + #[serde(default = "default_output")] + pub output: OutputMode, + /// The list of log processors to configure (for OpenTelemetry SDK output mode). /// Only used when `output.mode` is set to `opentelemetry`. pub processors: Vec, @@ -72,7 +76,7 @@ pub enum ProviderMode { } /// Output mode: what the recipient does with received log events. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)] #[serde(rename_all = "lowercase")] pub enum OutputMode { /// Disable output. @@ -80,17 +84,12 @@ pub enum OutputMode { /// Raw logging: format and print directly to console (stdout/stderr). /// ERROR/WARN go to stderr, others to stdout. + #[default] Raw, +} - /// [Demonstrated]: Deliver to a dedicated telemetry pipeline. - Pipeline, - - /// [Hypothetical]: Store in a memory ring buffer for `/logs` HTTP endpoint. - Memory, - - /// [Hypothetical]: Forward OTLP bytes into the OTel SDK pipeline (requires - /// OTLP-bytes-to-SDK-event). - OpenTelemetry, +fn default_output() -> OutputMode { + OutputMode::Raw } fn default_level() -> LogLevel { @@ -109,7 +108,38 @@ impl Default for LogsConfig { Self { level: default_level(), strategies: default_strategies(), + output: default_output(), processors: Vec::new(), } } } + +impl LogsConfig { + /// Validate the logs configuration. + /// + /// Returns an error if: + /// - `output` is `Noop` but a provider strategy uses `Buffered` or `Unbuffered` + /// (logs would be sent but discarded) + pub fn validate(&self) -> Result<(), String> { + if self.output == OutputMode::Noop { + let global_sends = matches!( + self.strategies.global, + ProviderMode::Buffered | ProviderMode::Unbuffered + ); + let engine_sends = matches!( + self.strategies.engine, + ProviderMode::Buffered | ProviderMode::Unbuffered + ); + + if global_sends || engine_sends { + return Err(format!( + "output mode is 'noop' but provider strategies would send logs: \ + global={:?}, engine={:?}. Set strategies to 'noop', 'raw', or 'opentelemetry', \ + or change output to 'raw'.", + self.strategies.global, self.strategies.engine + )); + } + } + Ok(()) + } +} diff --git a/rust/otap-dataflow/crates/controller/src/error.rs b/rust/otap-dataflow/crates/controller/src/error.rs index 53ca9aa299..683fb5e59f 100644 --- a/rust/otap-dataflow/crates/controller/src/error.rs +++ b/rust/otap-dataflow/crates/controller/src/error.rs @@ -96,4 +96,11 @@ pub enum Error { /// Panic message. panic_message: String, }, + + /// Configuration validation error. + #[error("Configuration error: {message}")] + ConfigurationError { + /// Error message describing the configuration problem. + message: String, + }, } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index d7b0bdf74f..5c67d970ec 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -21,7 +21,7 @@ use crate::error::Error; use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; -use otap_df_config::pipeline::service::telemetry::logs::ProviderMode; +use otap_df_config::pipeline::service::telemetry::logs::{OutputMode, ProviderMode}; use otap_df_config::{ PipelineGroupId, PipelineId, pipeline::PipelineConfig, @@ -86,17 +86,31 @@ impl Controller { pipeline_ctrl_msg_channel_size = settings.default_pipeline_ctrl_msg_channel_size ); - let (logs_collector, logs_reporter) = - LogsCollector::new(telemetry_config.reporting_channel_size); - // Start the logs collector thread - // TODO: Store handle for graceful shutdown - let _logs_collector_handle = - spawn_thread_local_task("logs-collector", move |_cancellation_token| { - logs_collector.run() - })?; + // Validate logs configuration + telemetry_config + .logs + .validate() + .map_err(|msg| Error::ConfigurationError { message: msg })?; + + // Create logs collector and reporter based on output mode. + // Only start the collector thread if output is Raw. + let logs_reporter = match telemetry_config.logs.output { + OutputMode::Raw => { + let (logs_collector, reporter) = + LogsCollector::new(telemetry_config.reporting_channel_size); + // Start the logs collector thread + // TODO: Store handle for graceful shutdown + let _logs_collector_handle = + spawn_thread_local_task("logs-collector", move |_cancellation_token| { + logs_collector.run() + })?; + Some(reporter) + } + OutputMode::Noop => None, + }; let opentelemetry_client = - OpentelemetryClient::new(telemetry_config, Some(logs_reporter.clone()))?; + OpentelemetryClient::new(telemetry_config, logs_reporter.clone())?; let metrics_system = MetricsSystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -128,22 +142,29 @@ impl Controller { obs_state_store.run(cancellation_token) })?; - // Create engine logs setup based on strategy configuration + // Create engine logs setup based on strategy configuration. + // Note: validation ensures that if Buffered/Unbuffered is used, logs_reporter is Some. let engine_logs_setup = match telemetry_config.logs.strategies.engine { ProviderMode::Noop => EngineLogsSetup::Noop, ProviderMode::Raw => EngineLogsSetup::Raw, ProviderMode::Buffered => EngineLogsSetup::Buffered { - reporter: logs_reporter.clone(), + reporter: logs_reporter + .clone() + .expect("validated: buffered requires reporter"), capacity: 1024, // TODO: make configurable }, ProviderMode::Unbuffered => EngineLogsSetup::Unbuffered { - reporter: logs_reporter.clone(), + reporter: logs_reporter + .clone() + .expect("validated: unbuffered requires reporter"), }, ProviderMode::OpenTelemetry => { // OpenTelemetry mode for engine is not yet supported // Fall back to buffered for now EngineLogsSetup::Buffered { - reporter: logs_reporter.clone(), + reporter: logs_reporter + .clone() + .expect("validated: opentelemetry requires reporter"), capacity: 1024, } } From 0e15ab8daa835014bb0323ed5bd3cba0614045bb Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Sun, 11 Jan 2026 17:13:39 -0800 Subject: [PATCH 60/92] ITR --- .../src/pipeline/service/telemetry/logs.rs | 28 +++- .../crates/controller/src/lib.rs | 51 +++--- .../otap/src/internal_telemetry_receiver.rs | 158 ++++++++++++++++++ rust/otap-dataflow/crates/otap/src/lib.rs | 3 + .../otap-dataflow/crates/telemetry/src/lib.rs | 4 +- .../crates/telemetry/src/logs.rs | 120 ++++++++++++- 6 files changed, 335 insertions(+), 29 deletions(-) create mode 100644 rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 1c9ff96af7..d86b027cef 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -53,6 +53,11 @@ pub struct LoggingStrategies { /// Strategy for engine/pipeline threads. pub engine: ProviderMode, + + /// Strategy for nodes handling internal telemetry (downstream of internal receiver). + /// Defaults to Noop to prevent log recursion. + #[serde(default = "default_internal_provider")] + pub internal: ProviderMode, } /// Logs producer: how log events are captured and routed. @@ -79,13 +84,17 @@ pub enum ProviderMode { #[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)] #[serde(rename_all = "lowercase")] pub enum OutputMode { - /// Disable output. + /// No-output is incompatible with Buffered and Unbuffered provider modes. Noop, /// Raw logging: format and print directly to console (stdout/stderr). /// ERROR/WARN go to stderr, others to stdout. #[default] Raw, + + /// Route to internal telemetry receiver node. + /// Requires engine provider to be Buffered. + Internal, } fn default_output() -> OutputMode { @@ -96,10 +105,15 @@ fn default_level() -> LogLevel { LogLevel::Off } +fn default_internal_provider() -> ProviderMode { + ProviderMode::Noop +} + fn default_strategies() -> LoggingStrategies { LoggingStrategies { global: ProviderMode::Unbuffered, engine: ProviderMode::Buffered, + internal: default_internal_provider(), } } @@ -120,6 +134,7 @@ impl LogsConfig { /// Returns an error if: /// - `output` is `Noop` but a provider strategy uses `Buffered` or `Unbuffered` /// (logs would be sent but discarded) + /// - `output` is `Internal` but engine provider is not `Buffered` pub fn validate(&self) -> Result<(), String> { if self.output == OutputMode::Noop { let global_sends = matches!( @@ -140,6 +155,17 @@ impl LogsConfig { )); } } + + if self.output == OutputMode::Internal { + if self.strategies.engine != ProviderMode::Buffered { + return Err(format!( + "output mode is 'internal' but engine provider is {:?}. \ + Internal output requires engine provider to be 'buffered'.", + self.strategies.engine + )); + } + } + Ok(()) } } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 5c67d970ec..f0255220e8 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -106,7 +106,7 @@ impl Controller { })?; Some(reporter) } - OutputMode::Noop => None, + OutputMode::Noop | OutputMode::Internal => None, }; let opentelemetry_client = @@ -142,30 +142,37 @@ impl Controller { obs_state_store.run(cancellation_token) })?; - // Create engine logs setup based on strategy configuration. - // Note: validation ensures that if Buffered/Unbuffered is used, logs_reporter is Some. - let engine_logs_setup = match telemetry_config.logs.strategies.engine { - ProviderMode::Noop => EngineLogsSetup::Noop, - ProviderMode::Raw => EngineLogsSetup::Raw, - ProviderMode::Buffered => EngineLogsSetup::Buffered { - reporter: logs_reporter - .clone() - .expect("validated: buffered requires reporter"), + // Create engine logs setup based on output mode and strategy configuration. + // When output is Internal, use Internal setup (validation ensures engine is Buffered). + // Otherwise, use the strategy configuration. + let engine_logs_setup = if telemetry_config.logs.output == OutputMode::Internal { + EngineLogsSetup::Internal { capacity: 1024, // TODO: make configurable - }, - ProviderMode::Unbuffered => EngineLogsSetup::Unbuffered { - reporter: logs_reporter - .clone() - .expect("validated: unbuffered requires reporter"), - }, - ProviderMode::OpenTelemetry => { - // OpenTelemetry mode for engine is not yet supported - // Fall back to buffered for now - EngineLogsSetup::Buffered { + } + } else { + match telemetry_config.logs.strategies.engine { + ProviderMode::Noop => EngineLogsSetup::Noop, + ProviderMode::Raw => EngineLogsSetup::Raw, + ProviderMode::Buffered => EngineLogsSetup::Buffered { reporter: logs_reporter .clone() - .expect("validated: opentelemetry requires reporter"), - capacity: 1024, + .expect("validated: buffered requires reporter"), + capacity: 1024, // TODO: make configurable + }, + ProviderMode::Unbuffered => EngineLogsSetup::Unbuffered { + reporter: logs_reporter + .clone() + .expect("validated: unbuffered requires reporter"), + }, + ProviderMode::OpenTelemetry => { + // OpenTelemetry mode for engine is not yet supported + // Fall back to buffered for now + EngineLogsSetup::Buffered { + reporter: logs_reporter + .clone() + .expect("validated: opentelemetry requires reporter"), + capacity: 1024, + } } } }; diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs new file mode 100644 index 0000000000..7eeb239400 --- /dev/null +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -0,0 +1,158 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Internal telemetry receiver. +//! +//! This receiver drains the engine thread's internal log buffer and emits +//! the logs as OTLP ExportLogsRequest messages into the pipeline. + +use crate::OTAP_RECEIVER_FACTORIES; +use crate::pdata::OtapPdata; +use async_trait::async_trait; +use linkme::distributed_slice; +use otap_df_config::node::NodeUserConfig; +use otap_df_engine::config::ReceiverConfig; +use otap_df_engine::context::PipelineContext; +use otap_df_engine::control::NodeControlMsg; +use otap_df_engine::error::Error; +use otap_df_engine::local::receiver as local; +use otap_df_engine::node::NodeId; +use otap_df_engine::receiver::ReceiverWrapper; +use otap_df_engine::terminal_state::TerminalState; +use otap_df_engine::ReceiverFactory; +use otap_df_pdata::OtlpProtoBytes; +use otap_df_telemetry::drain_thread_log_buffer; +use otap_df_telemetry::metrics::MetricSetSnapshot; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::sync::Arc; +use tokio::time::Duration; + +/// The URN for the internal telemetry receiver. +pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:otap:internal_telemetry:receiver"; + +/// Configuration for the internal telemetry receiver. +#[derive(Clone, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct Config { + /// Interval in milliseconds between buffer drains. + #[serde(default = "default_drain_interval_ms")] + pub drain_interval_ms: u64, +} + +fn default_drain_interval_ms() -> u64 { + 1000 +} + +impl Default for Config { + fn default() -> Self { + Self { + drain_interval_ms: default_drain_interval_ms(), + } + } +} + +/// A receiver that drains the engine's internal log buffer and emits OTLP logs. +pub struct InternalTelemetryReceiver { + config: Config, +} + +/// Declares the internal telemetry receiver as a local receiver factory. +#[allow(unsafe_code)] +#[distributed_slice(OTAP_RECEIVER_FACTORIES)] +pub static INTERNAL_TELEMETRY_RECEIVER: ReceiverFactory = ReceiverFactory { + name: INTERNAL_TELEMETRY_RECEIVER_URN, + create: |_pipeline: PipelineContext, + node: NodeId, + node_config: Arc, + receiver_config: &ReceiverConfig| { + Ok(ReceiverWrapper::local( + InternalTelemetryReceiver::from_config(&node_config.config)?, + node, + node_config, + receiver_config, + )) + }, +}; + +impl InternalTelemetryReceiver { + /// Create a new receiver with the given configuration. + #[must_use] + pub fn new(config: Config) -> Self { + Self { config } + } + + /// Create a receiver from a JSON configuration. + pub fn from_config(config: &Value) -> Result { + let config: Config = + serde_json::from_value(config.clone()).map_err(|e| { + otap_df_config::error::Error::InvalidUserConfig { + error: e.to_string(), + } + })?; + Ok(Self::new(config)) + } +} + +#[async_trait(?Send)] +impl local::Receiver for InternalTelemetryReceiver { + async fn start( + self: Box, + mut ctrl_msg_recv: local::ControlChannel, + effect_handler: local::EffectHandler, + ) -> Result { + let drain_interval = Duration::from_millis(self.config.drain_interval_ms); + + // Start periodic telemetry collection + let _ = effect_handler + .start_periodic_telemetry(Duration::from_secs(1)) + .await?; + + loop { + tokio::select! { + biased; + + // Handle control messages with priority + ctrl_msg = ctrl_msg_recv.recv() => { + match ctrl_msg { + Ok(NodeControlMsg::Shutdown { deadline, .. }) => { + // Drain any remaining logs before shutdown + self.drain_and_send(&effect_handler).await?; + return Ok(TerminalState::new::<[MetricSetSnapshot; 0]>(deadline, [])); + } + Ok(NodeControlMsg::CollectTelemetry { .. }) => { + // No metrics to report for now + } + Err(e) => { + return Err(Error::ChannelRecvError(e)); + } + _ => { + // Ignore other control messages + } + } + } + + // Periodic drain + _ = tokio::time::sleep(drain_interval) => { + self.drain_and_send(&effect_handler).await?; + } + } + } + } +} + +impl InternalTelemetryReceiver { + /// Drain the thread-local log buffer and send as OTLP logs. + async fn drain_and_send(&self, effect_handler: &local::EffectHandler) -> Result<(), Error> { + if let Some(batch) = drain_thread_log_buffer() { + if !batch.records.is_empty() { + let bytes = batch.encode_export_logs_request(); + let pdata = OtapPdata::new_todo_context( + OtlpProtoBytes::ExportLogsRequest(bytes).into(), + ); + effect_handler.send_message(pdata).await?; + } + } + Ok(()) + } +} diff --git a/rust/otap-dataflow/crates/otap/src/lib.rs b/rust/otap-dataflow/crates/otap/src/lib.rs index 63ffe331b1..73e3cb2d1d 100644 --- a/rust/otap-dataflow/crates/otap/src/lib.rs +++ b/rust/otap-dataflow/crates/otap/src/lib.rs @@ -52,6 +52,9 @@ pub mod noop_exporter; /// An error-exporter returns a static error. pub mod error_exporter; +/// Internal telemetry receiver that drains engine logs into the pipeline. +pub mod internal_telemetry_receiver; + /// Experimental exporters and processors #[cfg(any( feature = "experimental-exporters", diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index faa9fbde1e..07c4cb075d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -66,8 +66,8 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - EngineLogsSetup, LogsCollector, LogsFlusher, LogsReporter, ThreadBufferedLayer, - UnbufferedLayer, + drain_thread_log_buffer, EngineLogsSetup, LogBatch, LogsCollector, LogsFlusher, LogsReporter, + ThreadBufferedLayer, UnbufferedLayer, }; // TODO This should be #[cfg(test)], but something is preventing it from working. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index b7636e3ab5..4de7395fff 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -3,8 +3,14 @@ //! Internal logs collection for OTAP-Dataflow. +use bytes::Bytes; use crate::error::Error; -use crate::self_tracing::{ConsoleWriter, LogRecord, RawLoggingLayer, SavedCallsite}; +use crate::self_tracing::{ConsoleWriter, DirectLogRecordEncoder, LogRecord, RawLoggingLayer, SavedCallsite}; +use otap_df_pdata::otlp::ProtoBuffer; +use otap_df_pdata::proto::consts::field_num::logs::{ + LOGS_DATA_RESOURCE, RESOURCE_LOGS_SCOPE_LOGS, SCOPE_LOGS_LOG_RECORDS, +}; +use otap_df_pdata::proto_encode_len_delimited_unknown_size; use std::cell::RefCell; use tracing::{Event, Subscriber}; use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; @@ -24,6 +30,57 @@ impl LogBatch { pub fn size_with_dropped(&self) -> usize { self.records.len() + self.dropped_count } + + /// Encode this batch as an OTLP ExportLogsServiceRequest. + /// + /// The batch is wrapped in a minimal structure: + /// - One ResourceLogs with no resource attributes + /// - One ScopeLogs with no scope + /// - All log records from the batch + #[must_use] + pub fn encode_export_logs_request(&self) -> Bytes { + let mut buf = ProtoBuffer::with_capacity(self.records.len() * 256); + + // ExportLogsServiceRequest { resource_logs: [ ResourceLogs { ... } ] } + proto_encode_len_delimited_unknown_size!( + LOGS_DATA_RESOURCE, // field 1: resource_logs (same field number) + { + // ResourceLogs { scope_logs: [ ScopeLogs { ... } ] } + // Note: we skip resource (field 1) to use empty/default resource + proto_encode_len_delimited_unknown_size!( + RESOURCE_LOGS_SCOPE_LOGS, // field 2: scope_logs + { + // ScopeLogs { log_records: [ ... ] } + // Note: we skip scope (field 1) to use empty/default scope + for record in &self.records { + self.encode_log_record(record, &mut buf); + } + }, + &mut buf + ); + }, + &mut buf + ); + + buf.into_bytes() + } + + /// Encode a single log record into the buffer. + fn encode_log_record(&self, record: &LogRecord, buf: &mut ProtoBuffer) { + // Get the callsite metadata for encoding + let metadata = record.callsite_id.0.metadata(); + let callsite = SavedCallsite::new(metadata); + + proto_encode_len_delimited_unknown_size!( + SCOPE_LOGS_LOG_RECORDS, // field 2: log_records + { + let mut encoder = DirectLogRecordEncoder::new(buf); + // Clone record since encode_log_record takes ownership + let _ = encoder.encode_log_record(record.clone(), &callsite); + }, + buf + ); + } } /// A payload of two kinds @@ -108,6 +165,15 @@ where f() } +/// Drain the thread-local log buffer and return the batch. +/// +/// Returns `None` if no buffer is installed (e.g., not in an engine thread). +/// This is for use by the internal telemetry receiver node. +#[must_use] +pub fn drain_thread_log_buffer() -> Option { + CURRENT_LOG_BUFFER.with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) +} + /// Reporter for sending log batches through a channel. #[derive(Clone)] pub struct LogsReporter { @@ -121,6 +187,18 @@ impl LogsReporter { Self { sender } } + /// Create a null reporter that discards all payloads. + /// + /// Used for internal telemetry mode where the buffer is drained directly + /// rather than sent through a channel. + #[must_use] + pub fn null() -> Self { + // Create a bounded channel of size 0 - sends will always fail + // but we never actually call try_report on a null reporter + let (sender, _receiver) = flume::bounded(0); + Self { sender } + } + /// Try to send a payload, non-blocking. pub fn try_report(&self, payload: LogPayload) -> Result<(), Error> { self.sender @@ -284,6 +362,11 @@ pub enum EngineLogsSetup { /// Reporter to send singletons through. reporter: LogsReporter, }, + /// Internal: accumulates in thread-local buffer, drained by internal telemetry receiver. + Internal { + /// Buffer capacity per thread. + capacity: usize, + }, } /// Handle for flushing buffered logs from the engine thread. @@ -295,16 +378,19 @@ pub enum LogsFlusher { Noop, /// Flusher that drains the thread-local buffer and sends via the reporter. Buffered(LogsReporter), + /// Flusher for internal telemetry mode - drain returns batch directly. + /// Used by internal telemetry receiver node. + InternalDrain, } impl LogsFlusher { - /// Flush any buffered logs. + /// Flush any buffered logs by sending to the reporter. /// - /// For `Noop`, this does nothing. + /// For `Noop` and `InternalDrain`, this does nothing. /// For `Buffered`, this drains the thread-local buffer and sends as a batch. pub fn flush(&self) -> Result<(), Error> { match self { - LogsFlusher::Noop => Ok(()), + LogsFlusher::Noop | LogsFlusher::InternalDrain => Ok(()), LogsFlusher::Buffered(reporter) => { if let Some(batch) = CURRENT_LOG_BUFFER .with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) @@ -315,6 +401,19 @@ impl LogsFlusher { } } } + + /// Drain the thread-local buffer and return the batch directly. + /// + /// For use by internal telemetry receiver only. + /// Returns `None` if no buffer is installed or if this is not `InternalDrain` mode. + pub fn drain(&self) -> Option { + match self { + LogsFlusher::InternalDrain => { + CURRENT_LOG_BUFFER.with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) + } + _ => None, + } + } } impl EngineLogsSetup { @@ -359,6 +458,19 @@ impl EngineLogsSetup { let subscriber = Registry::default().with(filter).with(layer); tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) } + EngineLogsSetup::Internal { capacity } => { + // For internal mode, we use a "null" reporter that doesn't send anywhere. + // The internal telemetry receiver will drain the buffer directly. + let null_reporter = LogsReporter::null(); + let layer = ThreadBufferedLayer::new(null_reporter); + let subscriber = Registry::default().with(filter).with(layer); + let flusher = LogsFlusher::InternalDrain; + + // Install the thread-local buffer + with_thread_log_buffer(*capacity, || { + tracing::subscriber::with_default(subscriber, || f(flusher)) + }) + } } } } From a3cf629f37f803c87f11744b6d05b0d6a92f3ce2 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Sun, 11 Jan 2026 22:46:34 -0800 Subject: [PATCH 61/92] closer --- .../configs/fake-debug-noop-telemetry.yaml | 3 - .../src/pipeline/service/telemetry/logs.rs | 4 + .../crates/controller/src/lib.rs | 115 +++++++++++------- rust/otap-dataflow/crates/engine/src/lib.rs | 18 ++- .../crates/engine/src/local/receiver.rs | 25 +++- .../crates/engine/src/receiver.rs | 42 ++++++- .../otap/src/internal_telemetry_receiver.rs | 79 ++++++------ .../crates/otap/src/syslog_cef_receiver.rs | 2 + .../otap-dataflow/crates/telemetry/src/lib.rs | 2 +- .../crates/telemetry/src/logs.rs | 52 +++----- 10 files changed, 217 insertions(+), 125 deletions(-) diff --git a/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml b/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml index 1eb65e8263..2c589005d1 100644 --- a/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml +++ b/rust/otap-dataflow/configs/fake-debug-noop-telemetry.yaml @@ -38,9 +38,6 @@ service: telemetry: logs: level: "info" - internal: - # To use the OTel SDK, disable the internal logger. - enabled: false processors: - batch: exporter: diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index d86b027cef..fcc7da00e8 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -8,6 +8,10 @@ pub mod processors; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +/// The well-known URN for the Internal Telemetry Receiver node. +/// This receiver collects internal logs from all threads and emits them as OTLP. +pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:otap:internal_telemetry:receiver"; + /// Internal logs configuration. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct LogsConfig { diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index f0255220e8..a52b01a5c4 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -21,7 +21,9 @@ use crate::error::Error; use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; -use otap_df_config::pipeline::service::telemetry::logs::{OutputMode, ProviderMode}; +use otap_df_config::pipeline::service::telemetry::logs::{ + OutputMode, ProviderMode, INTERNAL_TELEMETRY_RECEIVER_URN, +}; use otap_df_config::{ PipelineGroupId, PipelineId, pipeline::PipelineConfig, @@ -92,21 +94,46 @@ impl Controller { .validate() .map_err(|msg| Error::ConfigurationError { message: msg })?; - // Create logs collector and reporter based on output mode. - // Only start the collector thread if output is Raw. - let logs_reporter = match telemetry_config.logs.output { - OutputMode::Raw => { - let (logs_collector, reporter) = - LogsCollector::new(telemetry_config.reporting_channel_size); - // Start the logs collector thread - // TODO: Store handle for graceful shutdown - let _logs_collector_handle = - spawn_thread_local_task("logs-collector", move |_cancellation_token| { - logs_collector.run() - })?; - Some(reporter) + // Create logs reporter based on provider strategies. + // LogsReporter is needed when: + // - global == Unbuffered (global threads send directly to channel) + // - engine == Buffered or Unbuffered (engine threads send to channel) + // Raw provider mode = synchronous console output, no reporter needed. + let strategies_need_reporter = + telemetry_config.logs.strategies.global == ProviderMode::Unbuffered + || matches!( + telemetry_config.logs.strategies.engine, + ProviderMode::Buffered | ProviderMode::Unbuffered + ); + + // Create the reporter if strategies need it. + // The receiver end goes to either: + // - LogsCollector thread (output == Raw): prints to console + // - Internal Telemetry Receiver node (output == Internal): emits as OTLP + let (logs_reporter, logs_receiver) = if strategies_need_reporter { + match telemetry_config.logs.output { + OutputMode::Raw => { + // Start collector thread for Raw output mode + let (logs_collector, reporter) = + LogsCollector::new(telemetry_config.reporting_channel_size); + // TODO: Store handle for graceful shutdown + let _logs_collector_handle = + spawn_thread_local_task("logs-collector", move |_cancellation_token| { + logs_collector.run() + })?; + (Some(reporter), None) + } + OutputMode::Internal => { + // For Internal output, create just the channel. + // The ITR node will receive from it during pipeline build. + let (logs_receiver, reporter) = + LogsCollector::channel(telemetry_config.reporting_channel_size); + (Some(reporter), Some(logs_receiver)) + } + OutputMode::Noop => (None, None), } - OutputMode::Noop | OutputMode::Internal => None, + } else { + (None, None) }; let opentelemetry_client = @@ -142,37 +169,31 @@ impl Controller { obs_state_store.run(cancellation_token) })?; - // Create engine logs setup based on output mode and strategy configuration. - // When output is Internal, use Internal setup (validation ensures engine is Buffered). - // Otherwise, use the strategy configuration. - let engine_logs_setup = if telemetry_config.logs.output == OutputMode::Internal { - EngineLogsSetup::Internal { + // Create engine logs setup based on strategy configuration. + // When output is Internal, the logs go through the channel to ITR. + // The validation layer ensures that when output=Internal, engine strategy is Buffered. + let engine_logs_setup = match telemetry_config.logs.strategies.engine { + ProviderMode::Noop => EngineLogsSetup::Noop, + ProviderMode::Raw => EngineLogsSetup::Raw, + ProviderMode::Buffered => EngineLogsSetup::Buffered { + reporter: logs_reporter + .clone() + .expect("validated: buffered requires reporter"), capacity: 1024, // TODO: make configurable - } - } else { - match telemetry_config.logs.strategies.engine { - ProviderMode::Noop => EngineLogsSetup::Noop, - ProviderMode::Raw => EngineLogsSetup::Raw, - ProviderMode::Buffered => EngineLogsSetup::Buffered { - reporter: logs_reporter - .clone() - .expect("validated: buffered requires reporter"), - capacity: 1024, // TODO: make configurable - }, - ProviderMode::Unbuffered => EngineLogsSetup::Unbuffered { + }, + ProviderMode::Unbuffered => EngineLogsSetup::Unbuffered { + reporter: logs_reporter + .clone() + .expect("validated: unbuffered requires reporter"), + }, + ProviderMode::OpenTelemetry => { + // OpenTelemetry mode for engine is not yet supported + // Fall back to buffered for now + EngineLogsSetup::Buffered { reporter: logs_reporter .clone() - .expect("validated: unbuffered requires reporter"), - }, - ProviderMode::OpenTelemetry => { - // OpenTelemetry mode for engine is not yet supported - // Fall back to buffered for now - EngineLogsSetup::Buffered { - reporter: logs_reporter - .clone() - .expect("validated: opentelemetry requires reporter"), - capacity: 1024, - } + .expect("validated: opentelemetry requires reporter"), + capacity: 1024, } } }; @@ -212,6 +233,7 @@ impl Controller { ); let metrics_reporter = metrics_reporter.clone(); let engine_logs_setup = engine_logs_setup.clone(); + let logs_receiver = logs_receiver.clone(); let thread_name = format!("pipeline-core-{}", core_id.id); let obs_evt_reporter = obs_evt_reporter.clone(); @@ -228,6 +250,7 @@ impl Controller { metrics_reporter, engine_logs_setup, log_level, + logs_receiver, pipeline_ctrl_msg_tx, pipeline_ctrl_msg_rx, ) @@ -447,6 +470,7 @@ impl Controller { metrics_reporter: MetricsReporter, engine_logs_setup: EngineLogsSetup, log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, + logs_receiver: Option, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { @@ -474,8 +498,11 @@ impl Controller { )); // Build the runtime pipeline from the configuration + // Pass logs_receiver for injection into ITR node (if present) + let logs_receiver_param = logs_receiver + .map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); let runtime_pipeline = pipeline_factory - .build(pipeline_context.clone(), pipeline_config.clone()) + .build(pipeline_context.clone(), pipeline_config.clone(), logs_receiver_param) .map_err(|e| Error::PipelineRuntimeError { source: Box::new(e), })?; diff --git a/rust/otap-dataflow/crates/engine/src/lib.rs b/rust/otap-dataflow/crates/engine/src/lib.rs index 4519004855..c2a635278b 100644 --- a/rust/otap-dataflow/crates/engine/src/lib.rs +++ b/rust/otap-dataflow/crates/engine/src/lib.rs @@ -292,10 +292,18 @@ impl PipelineFactory { /// the hyper-edges between them to determine the best channel type. /// - Assign channels to the source nodes and their destination nodes based on the previous /// analysis. + /// + /// # Parameters + /// - `pipeline_ctx`: The pipeline context for this build. + /// - `config`: The pipeline configuration. + /// - `logs_receiver`: Optional tuple of (URN, receiver) for internal logs channel. + /// When provided, the receiver is injected into any receiver node matching the URN, + /// enabling collection of logs from all threads via the channel. pub fn build( self: &PipelineFactory, pipeline_ctx: PipelineContext, config: PipelineConfig, + logs_receiver: Option<(&str, receiver::LogsReceiver)>, ) -> Result, Error> { let mut receivers = Vec::new(); let mut processors = Vec::new(); @@ -332,7 +340,7 @@ impl PipelineFactory { match node_config.kind { otap_df_config::node::NodeKind::Receiver => { - let wrapper = self.create_receiver( + let mut wrapper = self.create_receiver( &pipeline_ctx, &mut receiver_names, &mut nodes, @@ -340,6 +348,14 @@ impl PipelineFactory { name.clone(), node_config.clone(), )?; + + // Inject logs receiver if this is the target node + if let Some((target_urn, ref logs_rx)) = logs_receiver { + if node_config.plugin_urn.as_ref() == target_urn { + wrapper.set_logs_receiver(logs_rx.clone()); + } + } + receivers.push(wrapper.with_control_channel_metrics( &pipeline_ctx, &mut channel_metrics, diff --git a/rust/otap-dataflow/crates/engine/src/local/receiver.rs b/rust/otap-dataflow/crates/engine/src/local/receiver.rs index e3b5caf8fb..00ceb82bc1 100644 --- a/rust/otap-dataflow/crates/engine/src/local/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/local/receiver.rs @@ -120,6 +120,9 @@ impl ControlChannel { } } +/// Type alias for the internal logs receiver channel. +pub type LogsReceiver = otap_df_telemetry::LogsReceiver; + /// A `!Send` implementation of the EffectHandler. #[derive(Clone)] pub struct EffectHandler { @@ -130,6 +133,8 @@ pub struct EffectHandler { msg_senders: HashMap>, /// Cached default sender for fast access in the hot path default_sender: Option>, + /// Receiver for internal logs (for internal telemetry receiver). + logs_receiver: Option, } /// Implementation for the `!Send` effect handler. @@ -142,6 +147,7 @@ impl EffectHandler { default_port: Option, node_request_sender: PipelineCtrlMsgSender, metrics_reporter: MetricsReporter, + logs_receiver: Option, ) -> Self { let mut core = EffectHandlerCore::new(node_id, metrics_reporter); core.set_pipeline_ctrl_msg_sender(node_request_sender); @@ -159,9 +165,19 @@ impl EffectHandler { core, msg_senders, default_sender, + logs_receiver, } } + /// Returns the logs receiver, if configured. + /// + /// This is used by the Internal Telemetry Receiver to consume logs + /// from all threads via the logs channel. + #[must_use] + pub fn logs_receiver(&self) -> Option<&LogsReceiver> { + self.logs_receiver.as_ref() + } + /// Returns the id of the receiver associated with this handler. #[must_use] pub fn receiver_id(&self) -> NodeId { @@ -318,7 +334,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); eh.send_message_to("b", 42).await.unwrap(); @@ -339,7 +355,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); eh.send_message(7).await.unwrap(); assert_eq!(rx.recv().await.unwrap(), 7); @@ -362,6 +378,7 @@ mod tests { Some("a".into()), ctrl_tx, metrics_reporter, + None, ); eh.send_message(11).await.unwrap(); @@ -385,7 +402,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); let res = eh.send_message(5).await; assert!(res.is_err()); @@ -414,7 +431,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); let ports: HashSet<_> = eh.connected_ports().into_iter().collect(); let expected: HashSet<_> = [Cow::from("a"), Cow::from("b")].into_iter().collect(); diff --git a/rust/otap-dataflow/crates/engine/src/receiver.rs b/rust/otap-dataflow/crates/engine/src/receiver.rs index b1ea36d534..a6969e345c 100644 --- a/rust/otap-dataflow/crates/engine/src/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/receiver.rs @@ -27,6 +27,9 @@ use otap_df_telemetry::reporter::MetricsReporter; use std::collections::HashMap; use std::sync::Arc; +/// Type alias for the internal logs receiver channel. +pub type LogsReceiver = otap_df_telemetry::LogsReceiver; + /// A wrapper for the receiver that allows for both `Send` and `!Send` receivers. /// /// Note: This is useful for creating a single interface for the receiver regardless of their @@ -50,6 +53,8 @@ pub enum ReceiverWrapper { pdata_senders: HashMap>, /// A receiver for pdata messages. pdata_receiver: Option>, + /// Receiver for internal logs (for internal telemetry receiver). + logs_receiver: Option, }, /// A receiver with a `Send` implementation. Shared { @@ -69,6 +74,8 @@ pub enum ReceiverWrapper { pdata_senders: HashMap>, /// A receiver for pdata messages. pdata_receiver: Option>, + /// Receiver for internal logs (for internal telemetry receiver). + logs_receiver: Option, }, } @@ -108,6 +115,7 @@ impl ReceiverWrapper { control_receiver: LocalReceiver::mpsc(control_receiver), pdata_senders: HashMap::new(), pdata_receiver: None, + logs_receiver: None, } } @@ -133,6 +141,7 @@ impl ReceiverWrapper { control_receiver: SharedReceiver::mpsc(control_receiver), pdata_senders: HashMap::new(), pdata_receiver: None, + logs_receiver: None, } } @@ -155,7 +164,7 @@ impl ReceiverWrapper { receiver, pdata_senders, pdata_receiver, - .. + logs_receiver, } => { let channel_id = control_channel_id(&node_id); let control_sender = match control_sender.into_mpsc() { @@ -189,6 +198,7 @@ impl ReceiverWrapper { control_receiver, pdata_senders, pdata_receiver, + logs_receiver, } } ReceiverWrapper::Shared { @@ -200,7 +210,7 @@ impl ReceiverWrapper { receiver, pdata_senders, pdata_receiver, - .. + logs_receiver, } => { let channel_id = control_channel_id(&node_id); let control_sender = match control_sender.into_mpsc() { @@ -234,6 +244,7 @@ impl ReceiverWrapper { control_receiver, pdata_senders, pdata_receiver, + logs_receiver, } } } @@ -253,6 +264,7 @@ impl ReceiverWrapper { control_receiver, pdata_senders, user_config, + logs_receiver, .. }, metrics_reporter, @@ -275,6 +287,7 @@ impl ReceiverWrapper { default_port, pipeline_ctrl_msg_tx, metrics_reporter, + logs_receiver, ); receiver.start(ctrl_msg_chan, effect_handler).await } @@ -367,6 +380,31 @@ impl Node for ReceiverWrapper { } } +impl ReceiverWrapper { + /// Set the logs receiver for internal telemetry. + /// + /// This is used by the Internal Telemetry Receiver to receive logs + /// from all threads via the logs channel. + pub fn set_logs_receiver(&mut self, receiver: LogsReceiver) { + match self { + ReceiverWrapper::Local { logs_receiver, .. } => { + *logs_receiver = Some(receiver); + } + ReceiverWrapper::Shared { logs_receiver, .. } => { + *logs_receiver = Some(receiver); + } + } + } + + /// Take the logs receiver, if set. + pub fn take_logs_receiver(&mut self) -> Option { + match self { + ReceiverWrapper::Local { logs_receiver, .. } => logs_receiver.take(), + ReceiverWrapper::Shared { logs_receiver, .. } => logs_receiver.take(), + } + } +} + impl NodeWithPDataSender for ReceiverWrapper { fn set_pdata_sender( &mut self, diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs index 7eeb239400..b827f38b9c 100644 --- a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -3,7 +3,7 @@ //! Internal telemetry receiver. //! -//! This receiver drains the engine thread's internal log buffer and emits +//! This receiver consumes internal logs from the logging channel and emits //! the logs as OTLP ExportLogsRequest messages into the pipeline. use crate::OTAP_RECEIVER_FACTORIES; @@ -21,39 +21,29 @@ use otap_df_engine::receiver::ReceiverWrapper; use otap_df_engine::terminal_state::TerminalState; use otap_df_engine::ReceiverFactory; use otap_df_pdata::OtlpProtoBytes; -use otap_df_telemetry::drain_thread_log_buffer; +use otap_df_telemetry::logs::{LogBatch, LogPayload}; use otap_df_telemetry::metrics::MetricSetSnapshot; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::sync::Arc; -use tokio::time::Duration; /// The URN for the internal telemetry receiver. -pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:otap:internal_telemetry:receiver"; +pub use otap_df_config::pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN; /// Configuration for the internal telemetry receiver. #[derive(Clone, Deserialize, Serialize)] #[serde(deny_unknown_fields)] -pub struct Config { - /// Interval in milliseconds between buffer drains. - #[serde(default = "default_drain_interval_ms")] - pub drain_interval_ms: u64, -} - -fn default_drain_interval_ms() -> u64 { - 1000 -} +pub struct Config {} impl Default for Config { fn default() -> Self { - Self { - drain_interval_ms: default_drain_interval_ms(), - } + Self {} } } -/// A receiver that drains the engine's internal log buffer and emits OTLP logs. +/// A receiver that consumes internal logs from the logging channel and emits OTLP logs. pub struct InternalTelemetryReceiver { + #[allow(dead_code)] config: Config, } @@ -101,11 +91,14 @@ impl local::Receiver for InternalTelemetryReceiver { mut ctrl_msg_recv: local::ControlChannel, effect_handler: local::EffectHandler, ) -> Result { - let drain_interval = Duration::from_millis(self.config.drain_interval_ms); + // Get the logs receiver channel from the effect handler + let logs_receiver = effect_handler + .logs_receiver() + .expect("InternalTelemetryReceiver requires a logs_receiver to be configured"); // Start periodic telemetry collection let _ = effect_handler - .start_periodic_telemetry(Duration::from_secs(1)) + .start_periodic_telemetry(std::time::Duration::from_secs(1)) .await?; loop { @@ -116,8 +109,10 @@ impl local::Receiver for InternalTelemetryReceiver { ctrl_msg = ctrl_msg_recv.recv() => { match ctrl_msg { Ok(NodeControlMsg::Shutdown { deadline, .. }) => { - // Drain any remaining logs before shutdown - self.drain_and_send(&effect_handler).await?; + // Drain any remaining logs from channel before shutdown + while let Ok(payload) = logs_receiver.try_recv() { + self.send_payload(&effect_handler, payload).await?; + } return Ok(TerminalState::new::<[MetricSetSnapshot; 0]>(deadline, [])); } Ok(NodeControlMsg::CollectTelemetry { .. }) => { @@ -132,9 +127,17 @@ impl local::Receiver for InternalTelemetryReceiver { } } - // Periodic drain - _ = tokio::time::sleep(drain_interval) => { - self.drain_and_send(&effect_handler).await?; + // Receive logs from the channel + result = logs_receiver.recv_async() => { + match result { + Ok(payload) => { + self.send_payload(&effect_handler, payload).await?; + } + Err(_) => { + // Channel closed, exit gracefully + return Ok(TerminalState::default()); + } + } } } } @@ -142,16 +145,24 @@ impl local::Receiver for InternalTelemetryReceiver { } impl InternalTelemetryReceiver { - /// Drain the thread-local log buffer and send as OTLP logs. - async fn drain_and_send(&self, effect_handler: &local::EffectHandler) -> Result<(), Error> { - if let Some(batch) = drain_thread_log_buffer() { - if !batch.records.is_empty() { - let bytes = batch.encode_export_logs_request(); - let pdata = OtapPdata::new_todo_context( - OtlpProtoBytes::ExportLogsRequest(bytes).into(), - ); - effect_handler.send_message(pdata).await?; - } + /// Send a log payload as OTLP logs. + async fn send_payload( + &self, + effect_handler: &local::EffectHandler, + payload: LogPayload, + ) -> Result<(), Error> { + let batch = match payload { + LogPayload::Singleton(record) => LogBatch { + records: vec![record], + dropped_count: 0, + }, + LogPayload::Batch(batch) => batch, + }; + + if !batch.records.is_empty() { + let bytes = batch.encode_export_logs_request(); + let pdata = OtapPdata::new_todo_context(OtlpProtoBytes::ExportLogsRequest(bytes).into()); + effect_handler.send_message(pdata).await?; } Ok(()) } diff --git a/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs b/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs index 378c96769e..62b7773816 100644 --- a/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs @@ -1185,6 +1185,7 @@ mod telemetry_tests { None, pipe_tx, reporter.clone(), + None, ); let (ctrl_tx, ctrl_rx) = otap_df_channel::mpsc::Channel::new(16); @@ -1277,6 +1278,7 @@ mod telemetry_tests { None, pipe_tx, reporter.clone(), + None, ); let (ctrl_tx, ctrl_rx) = otap_df_channel::mpsc::Channel::new(8); diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 07c4cb075d..5823b47b76 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -66,7 +66,7 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - drain_thread_log_buffer, EngineLogsSetup, LogBatch, LogsCollector, LogsFlusher, LogsReporter, + EngineLogsSetup, LogBatch, LogPayload, LogsCollector, LogsFlusher, LogsReceiver, LogsReporter, ThreadBufferedLayer, UnbufferedLayer, }; diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 4de7395fff..14ac3b4b82 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -216,6 +216,9 @@ pub struct LogsCollector { writer: ConsoleWriter, } +/// Type alias for the log payload receiver channel. +pub type LogsReceiver = flume::Receiver; + impl LogsCollector { /// Create a new collector and reporter pair. #[must_use] @@ -229,6 +232,17 @@ impl LogsCollector { (collector, reporter) } + /// Create a reporter and receiver pair without the collector. + /// + /// Use this when the receiver will be consumed elsewhere (e.g., by the + /// Internal Telemetry Receiver node). + #[must_use] + pub fn channel(channel_size: usize) -> (LogsReceiver, LogsReporter) { + let (sender, receiver) = flume::bounded(channel_size); + let reporter = LogsReporter::new(sender); + (receiver, reporter) + } + /// Run the collection loop until the channel is closed. pub async fn run(self) -> Result<(), Error> { loop { @@ -362,11 +376,6 @@ pub enum EngineLogsSetup { /// Reporter to send singletons through. reporter: LogsReporter, }, - /// Internal: accumulates in thread-local buffer, drained by internal telemetry receiver. - Internal { - /// Buffer capacity per thread. - capacity: usize, - }, } /// Handle for flushing buffered logs from the engine thread. @@ -378,19 +387,16 @@ pub enum LogsFlusher { Noop, /// Flusher that drains the thread-local buffer and sends via the reporter. Buffered(LogsReporter), - /// Flusher for internal telemetry mode - drain returns batch directly. - /// Used by internal telemetry receiver node. - InternalDrain, } impl LogsFlusher { /// Flush any buffered logs by sending to the reporter. /// - /// For `Noop` and `InternalDrain`, this does nothing. + /// For `Noop`, this does nothing. /// For `Buffered`, this drains the thread-local buffer and sends as a batch. pub fn flush(&self) -> Result<(), Error> { match self { - LogsFlusher::Noop | LogsFlusher::InternalDrain => Ok(()), + LogsFlusher::Noop => Ok(()), LogsFlusher::Buffered(reporter) => { if let Some(batch) = CURRENT_LOG_BUFFER .with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) @@ -401,19 +407,6 @@ impl LogsFlusher { } } } - - /// Drain the thread-local buffer and return the batch directly. - /// - /// For use by internal telemetry receiver only. - /// Returns `None` if no buffer is installed or if this is not `InternalDrain` mode. - pub fn drain(&self) -> Option { - match self { - LogsFlusher::InternalDrain => { - CURRENT_LOG_BUFFER.with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) - } - _ => None, - } - } } impl EngineLogsSetup { @@ -458,19 +451,6 @@ impl EngineLogsSetup { let subscriber = Registry::default().with(filter).with(layer); tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) } - EngineLogsSetup::Internal { capacity } => { - // For internal mode, we use a "null" reporter that doesn't send anywhere. - // The internal telemetry receiver will drain the buffer directly. - let null_reporter = LogsReporter::null(); - let layer = ThreadBufferedLayer::new(null_reporter); - let subscriber = Registry::default().with(filter).with(layer); - let flusher = LogsFlusher::InternalDrain; - - // Install the thread-local buffer - with_thread_log_buffer(*capacity, || { - tracing::subscriber::with_default(subscriber, || f(flusher)) - }) - } } } } From 70c42dc0b653cc871a65a668475a7e5da6545cee Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 12 Jan 2026 09:11:26 -0800 Subject: [PATCH 62/92] serde --- rust/otap-dataflow/Cargo.toml | 3 +++ .../configs/internal-telemetry.yaml | 5 ++--- .../src/pipeline/service/telemetry/logs.rs | 3 +++ rust/otap-dataflow/src/main.rs | 20 +++++++++++++------ 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index a254ef58db..0ec64f2394 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -33,7 +33,10 @@ path = "src/main.rs" otap-df-config.workspace = true otap-df-controller.workspace = true otap-df-otap.workspace = true +otap-df-telemetry.workspace = true thiserror.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true quiver = { workspace = true, optional = true } serde_json.workspace = true clap.workspace = true diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 9df80b380e..c859d424f4 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -40,7 +40,6 @@ service: # The default level is "info". level: "debug" strategies: - global: global + global: unbuffered engine: buffered - output: - mode: raw + output: raw diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index fcc7da00e8..2c6839afcd 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -16,9 +16,11 @@ pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:otap:internal_teleme #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct LogsConfig { /// The log level for internal engine logs. + #[serde(default = "default_level")] pub level: LogLevel, /// Logging strategy configuration for different thread contexts. + #[serde(default = "default_strategies")] pub strategies: LoggingStrategies, /// What to do with collected log events. @@ -27,6 +29,7 @@ pub struct LogsConfig { /// The list of log processors to configure (for OpenTelemetry SDK output mode). /// Only used when `output.mode` is set to `opentelemetry`. + #[serde(default)] pub processors: Vec, } diff --git a/rust/otap-dataflow/src/main.rs b/rust/otap-dataflow/src/main.rs index e242b472cf..9de41c9b05 100644 --- a/rust/otap-dataflow/src/main.rs +++ b/rust/otap-dataflow/src/main.rs @@ -9,8 +9,11 @@ use otap_df_config::pipeline_group::{CoreAllocation, CoreRange, Quota}; use otap_df_config::{PipelineGroupId, PipelineId}; use otap_df_controller::Controller; use otap_df_otap::OTAP_PIPELINE_FACTORY; +use otap_df_telemetry::self_tracing::{ConsoleWriter, RawLoggingLayer}; use std::path::PathBuf; use sysinfo::System; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::Registry; #[cfg(all( not(windows), @@ -123,12 +126,17 @@ fn main() -> Result<(), Box> { println!("{}", system_info()); - // Load pipeline configuration from file - let pipeline_cfg = PipelineConfig::from_file( - pipeline_group_id.clone(), - pipeline_id.clone(), - &args.pipeline, - )?; + // Load pipeline configuration with early logging so parse errors are readable. + // Use with_default for a thread-local subscriber during config loading only. + let early_subscriber = Registry::default() + .with(RawLoggingLayer::new(ConsoleWriter::color())); + let pipeline_cfg = tracing::subscriber::with_default(early_subscriber, || { + PipelineConfig::from_file( + pipeline_group_id.clone(), + pipeline_id.clone(), + &args.pipeline, + ) + })?; // Create controller and start pipeline with multi-core support let controller = Controller::new(&OTAP_PIPELINE_FACTORY); From 4f8851a991721a26fb03a9132080340cad37a96d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 12 Jan 2026 13:05:06 -0800 Subject: [PATCH 63/92] wip --- .../configs/fake-batch-debug-noop.yaml | 12 ++- .../src/pipeline/service/telemetry/logs.rs | 102 +++++++++--------- .../crates/controller/src/lib.rs | 15 +-- .../crates/telemetry/src/logs.rs | 12 ++- .../telemetry/src/opentelemetry_client.rs | 2 +- 5 files changed, 80 insertions(+), 63 deletions(-) diff --git a/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml b/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml index 194334b4eb..0ea62b6abf 100644 --- a/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml +++ b/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml @@ -49,5 +49,15 @@ nodes: mode: signal noop: kind: exporter - plugin_urn: "urn:otel:noop:exporter" + plugin_urn: "urn:otel:noop:exporterX" config: + +service: + telemetry: + logs: + # The default level is "info". + level: "debug" + strategies: + global: raw + engine: raw + output: noop diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 2c6839afcd..649b675c7e 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -5,35 +5,35 @@ pub mod processors; +use crate::error::Error; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -/// The well-known URN for the Internal Telemetry Receiver node. -/// This receiver collects internal logs from all threads and emits them as OTLP. -pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:otap:internal_telemetry:receiver"; +/// Internal Telemetry Receiver node URN for internal logging using OTLP bytes. +pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:otlp:telemetry:receiver"; /// Internal logs configuration. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct LogsConfig { - /// The log level for internal engine logs. + /// The log level for internal logs. #[serde(default = "default_level")] pub level: LogLevel, - /// Logging strategy configuration for different thread contexts. - #[serde(default = "default_strategies")] - pub strategies: LoggingStrategies, + /// Logging provider configuration. + #[serde(default = "default_providers")] + pub providers: LoggingProviders, - /// What to do with collected log events. + /// What to do with collected log events. This applies when any ProviderMode + /// in providers indicates Buffered or Unbuffered. Does not apply if all + /// providers are in [Noop, Raw, OpenTelemetry]. #[serde(default = "default_output")] pub output: OutputMode, - /// The list of log processors to configure (for OpenTelemetry SDK output mode). - /// Only used when `output.mode` is set to `opentelemetry`. - #[serde(default)] + /// OpenTelemetry SDK is configured via processors. pub processors: Vec, } -/// Log level for internal engine logs. +/// Log level for dataflow engine logs. #[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, Default, PartialEq)] #[serde(rename_all = "lowercase")] pub enum LogLevel { @@ -50,19 +50,23 @@ pub enum LogLevel { Error, } -/// Logging strategies for different execution contexts. -/// -/// Controls how log events are captured and routed to the admin thread. +/// Logging providers for different execution contexts. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct LoggingStrategies { - /// Strategy for non-engine threads. +pub struct LoggingProviders { + /// Provider mode for non-engine threads. This defines the global Tokio + /// `tracing` subscriber. Default is Unbuffered. Note that Buffered + /// requires opt-in thread-local setup. pub global: ProviderMode, - /// Strategy for engine/pipeline threads. + /// Provider mod for engine/pipeline threads. This defines how the + /// engine thread / core sets the Tokio `tracing` + /// subscriber. Default is Buffered. Internal logs will be flushed + /// by either the Internal Telemetry Receiver or the main pipeline + /// controller. pub engine: ProviderMode, - /// Strategy for nodes handling internal telemetry (downstream of internal receiver). - /// Defaults to Noop to prevent log recursion. + /// Provider mode for nodes downstream of Internal Telemetry receiver. + /// This defaults to Noop to avoid internal feedback. #[serde(default = "default_internal_provider")] pub internal: ProviderMode, } @@ -83,24 +87,30 @@ pub enum ProviderMode { /// Use OTel-Rust as the provider. OpenTelemetry, - /// Use synchronous logging. + /// Use synchronous logging. Note! This can block the producing thread. Raw, } -/// Output mode: what the recipient does with received log events. +/// Output mode: what the recipient does with received events for +/// Buffered and Unbuffered provider logging modes. #[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)] #[serde(rename_all = "lowercase")] pub enum OutputMode { - /// No-output is incompatible with Buffered and Unbuffered provider modes. + /// Noop prevents the use of Buffered and Unbuffered modes. This + /// output mode can be set when all providers are configured to + /// avoid the internal output configuration through Noop, Raw, or + /// OpenTelemetry settings. Noop, - /// Raw logging: format and print directly to console (stdout/stderr). - /// ERROR/WARN go to stderr, others to stdout. + /// Raw logging: format and print directly to console + /// (stdout/stderr) from the logs collector thread. ERROR and + /// WARN go to stderr, others to stdout. #[default] Raw, - /// Route to internal telemetry receiver node. - /// Requires engine provider to be Buffered. + /// Route to Internal Telemetry Receiver node. The pipeline must + /// include a nod with INTERNAL_TELEMETRY_RECEIVER_URN. The + /// engine provider mode must be Buffered for internal output. Internal, } @@ -109,15 +119,15 @@ fn default_output() -> OutputMode { } fn default_level() -> LogLevel { - LogLevel::Off + LogLevel::Info } fn default_internal_provider() -> ProviderMode { ProviderMode::Noop } -fn default_strategies() -> LoggingStrategies { - LoggingStrategies { +fn default_providers() -> LoggingProviders { + LoggingProviders { global: ProviderMode::Unbuffered, engine: ProviderMode::Buffered, internal: default_internal_provider(), @@ -128,7 +138,7 @@ impl Default for LogsConfig { fn default() -> Self { Self { level: default_level(), - strategies: default_strategies(), + providers: default_providers(), output: default_output(), processors: Vec::new(), } @@ -139,38 +149,32 @@ impl LogsConfig { /// Validate the logs configuration. /// /// Returns an error if: - /// - `output` is `Noop` but a provider strategy uses `Buffered` or `Unbuffered` + /// - `output` is `Noop` but a provider uses `Buffered` or `Unbuffered` /// (logs would be sent but discarded) /// - `output` is `Internal` but engine provider is not `Buffered` - pub fn validate(&self) -> Result<(), String> { + pub fn validate(&self) -> Result<(), Error> { if self.output == OutputMode::Noop { let global_sends = matches!( - self.strategies.global, + self.providers.global, ProviderMode::Buffered | ProviderMode::Unbuffered ); let engine_sends = matches!( - self.strategies.engine, + self.providers.engine, ProviderMode::Buffered | ProviderMode::Unbuffered ); if global_sends || engine_sends { - return Err(format!( - "output mode is 'noop' but provider strategies would send logs: \ - global={:?}, engine={:?}. Set strategies to 'noop', 'raw', or 'opentelemetry', \ - or change output to 'raw'.", - self.strategies.global, self.strategies.engine - )); + return Err(Error::InvalidUserConfig { + error: "output mode is 'noop' but a provider uses buffered or unbuffered" + .into(), + }); } } - if self.output == OutputMode::Internal { - if self.strategies.engine != ProviderMode::Buffered { - return Err(format!( - "output mode is 'internal' but engine provider is {:?}. \ - Internal output requires engine provider to be 'buffered'.", - self.strategies.engine - )); - } + if self.output == OutputMode::Internal && self.providers.engine != ProviderMode::Buffered { + return Err(Error::InvalidUserConfig { + error: "output mode is 'internal', engine must use buffered provider".into(), + }); } Ok(()) diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index a52b01a5c4..5f831c26f0 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -110,31 +110,32 @@ impl Controller { // The receiver end goes to either: // - LogsCollector thread (output == Raw): prints to console // - Internal Telemetry Receiver node (output == Internal): emits as OTLP - let (logs_reporter, logs_receiver) = if strategies_need_reporter { + let (logs_reporter, logs_receiver, logs_collector_handle) = if strategies_need_reporter { match telemetry_config.logs.output { OutputMode::Raw => { // Start collector thread for Raw output mode let (logs_collector, reporter) = LogsCollector::new(telemetry_config.reporting_channel_size); - // TODO: Store handle for graceful shutdown - let _logs_collector_handle = + let logs_collector_handle = spawn_thread_local_task("logs-collector", move |_cancellation_token| { logs_collector.run() })?; - (Some(reporter), None) + (Some(reporter), None, Some(logs_collector_handle)) } OutputMode::Internal => { // For Internal output, create just the channel. // The ITR node will receive from it during pipeline build. let (logs_receiver, reporter) = LogsCollector::channel(telemetry_config.reporting_channel_size); - (Some(reporter), Some(logs_receiver)) + (Some(reporter), Some(logs_receiver), None) } - OutputMode::Noop => (None, None), + OutputMode::Noop => (None, None, None), } } else { - (None, None) + (None, None, None) }; + // Keep the handle alive - dropping it would join the thread and block forever + let _logs_collector_handle = logs_collector_handle; let opentelemetry_client = OpentelemetryClient::new(telemetry_config, logs_reporter.clone())?; diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 14ac3b4b82..5275637ffa 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -3,9 +3,11 @@ //! Internal logs collection for OTAP-Dataflow. -use bytes::Bytes; use crate::error::Error; -use crate::self_tracing::{ConsoleWriter, DirectLogRecordEncoder, LogRecord, RawLoggingLayer, SavedCallsite}; +use crate::self_tracing::{ + ConsoleWriter, DirectLogRecordEncoder, LogRecord, RawLoggingLayer, SavedCallsite, +}; +use bytes::Bytes; use otap_df_pdata::otlp::ProtoBuffer; use otap_df_pdata::proto::consts::field_num::logs::{ LOGS_DATA_RESOURCE, RESOURCE_LOGS_SCOPE_LOGS, SCOPE_LOGS_LOG_RECORDS, @@ -13,9 +15,9 @@ use otap_df_pdata::proto::consts::field_num::logs::{ use otap_df_pdata::proto_encode_len_delimited_unknown_size; use std::cell::RefCell; use tracing::{Event, Subscriber}; +use tracing_subscriber::Registry; use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; use tracing_subscriber::registry::LookupSpan; -use tracing_subscriber::Registry; /// A batch of log records from a pipeline thread. pub struct LogBatch { @@ -27,6 +29,7 @@ pub struct LogBatch { impl LogBatch { /// The total size including dropped records. + #[must_use] pub fn size_with_dropped(&self) -> usize { self.records.len() + self.dropped_count } @@ -301,7 +304,7 @@ impl ThreadBufferedLayer { if let Some(batch) = CURRENT_LOG_BUFFER.with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) { - let _ = self.reporter.try_report(LogPayload::Batch(batch))?; + self.reporter.try_report(LogPayload::Batch(batch))?; } Ok(()) } @@ -318,7 +321,6 @@ where if let Some(ref mut buffer) = *cell.borrow_mut() { buffer.push(record); } - // TODO: Fallback consideration. }); } } diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 403ce1d097..2d03c57fa3 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -68,7 +68,7 @@ impl OpentelemetryClient { // Configure the global subscriber based on strategies.global. // Engine threads override this with BufferWriterLayer via with_default(). - let (logger_provider, runtime) = match config.logs.strategies.global { + let (logger_provider, runtime) = match config.logs.providers.global { ProviderMode::Noop => { // No-op: just install the filter, events are dropped if let Err(err) = tracing::subscriber::NoSubscriber::new().try_init() { From c0023dc669aead7be529df69ae4b693a9151f4ec Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 12 Jan 2026 13:16:46 -0800 Subject: [PATCH 64/92] providers --- .../configs/fake-batch-debug-noop.yaml | 11 ++++---- .../src/pipeline/service/telemetry/logs.rs | 1 + .../crates/controller/src/lib.rs | 26 ++++++++++--------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml b/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml index 0ea62b6abf..9e486e870a 100644 --- a/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml +++ b/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml @@ -49,15 +49,14 @@ nodes: mode: signal noop: kind: exporter - plugin_urn: "urn:otel:noop:exporterX" + plugin_urn: "urn:otel:noop:exporter" config: service: telemetry: logs: - # The default level is "info". level: "debug" - strategies: - global: raw - engine: raw - output: noop + providers: + global: unbuffered + engine: buffered + output: raw diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 649b675c7e..caedf05993 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -30,6 +30,7 @@ pub struct LogsConfig { pub output: OutputMode, /// OpenTelemetry SDK is configured via processors. + #[serde(default)] pub processors: Vec, } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 5f831c26f0..98d9d4bf06 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -22,7 +22,7 @@ use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; use otap_df_config::pipeline::service::telemetry::logs::{ - OutputMode, ProviderMode, INTERNAL_TELEMETRY_RECEIVER_URN, + INTERNAL_TELEMETRY_RECEIVER_URN, OutputMode, ProviderMode, }; use otap_df_config::{ PipelineGroupId, PipelineId, @@ -92,25 +92,27 @@ impl Controller { telemetry_config .logs .validate() - .map_err(|msg| Error::ConfigurationError { message: msg })?; + .map_err(|msg| Error::ConfigurationError { + message: msg.to_string(), + })?; - // Create logs reporter based on provider strategies. + // Create logs reporter based on provider providers. // LogsReporter is needed when: // - global == Unbuffered (global threads send directly to channel) // - engine == Buffered or Unbuffered (engine threads send to channel) // Raw provider mode = synchronous console output, no reporter needed. - let strategies_need_reporter = - telemetry_config.logs.strategies.global == ProviderMode::Unbuffered - || matches!( - telemetry_config.logs.strategies.engine, - ProviderMode::Buffered | ProviderMode::Unbuffered - ); + let providers_need_reporter = telemetry_config.logs.providers.global + == ProviderMode::Unbuffered + || matches!( + telemetry_config.logs.providers.engine, + ProviderMode::Buffered | ProviderMode::Unbuffered + ); - // Create the reporter if strategies need it. + // Create the reporter if providers need it. // The receiver end goes to either: // - LogsCollector thread (output == Raw): prints to console // - Internal Telemetry Receiver node (output == Internal): emits as OTLP - let (logs_reporter, logs_receiver, logs_collector_handle) = if strategies_need_reporter { + let (logs_reporter, logs_receiver, logs_collector_handle) = if providers_need_reporter { match telemetry_config.logs.output { OutputMode::Raw => { // Start collector thread for Raw output mode @@ -173,7 +175,7 @@ impl Controller { // Create engine logs setup based on strategy configuration. // When output is Internal, the logs go through the channel to ITR. // The validation layer ensures that when output=Internal, engine strategy is Buffered. - let engine_logs_setup = match telemetry_config.logs.strategies.engine { + let engine_logs_setup = match telemetry_config.logs.providers.engine { ProviderMode::Noop => EngineLogsSetup::Noop, ProviderMode::Raw => EngineLogsSetup::Raw, ProviderMode::Buffered => EngineLogsSetup::Buffered { From f24f3e96d94520793abe409b16326553272e628e Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 12 Jan 2026 15:10:08 -0800 Subject: [PATCH 65/92] remove clippy::print_stderr exceptions --- .../crates/controller/Cargo.toml | 1 + .../crates/controller/src/thread_task.rs | 15 ++++++----- .../crates/pdata/src/validation/mod.rs | 1 - rust/otap-dataflow/crates/state/Cargo.toml | 8 ++++-- .../crates/state/src/reporter.rs | 15 ++++++----- rust/otap-dataflow/crates/state/src/store.rs | 27 ++++++++++++------- rust/otap-dataflow/src/main.rs | 15 +++++------ 7 files changed, 49 insertions(+), 33 deletions(-) diff --git a/rust/otap-dataflow/crates/controller/Cargo.toml b/rust/otap-dataflow/crates/controller/Cargo.toml index ad214acc03..fc379825bd 100644 --- a/rust/otap-dataflow/crates/controller/Cargo.toml +++ b/rust/otap-dataflow/crates/controller/Cargo.toml @@ -27,3 +27,4 @@ miette = { workspace = true } core_affinity = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } +tracing = { workspace = true } diff --git a/rust/otap-dataflow/crates/controller/src/thread_task.rs b/rust/otap-dataflow/crates/controller/src/thread_task.rs index c3788ec130..f9cea278d7 100644 --- a/rust/otap-dataflow/crates/controller/src/thread_task.rs +++ b/rust/otap-dataflow/crates/controller/src/thread_task.rs @@ -4,6 +4,7 @@ //! Utilities to run a non-Send async task on a dedicated OS thread with a //! single-threaded Tokio runtime and LocalSet, plus a shutdown signal. +use otap_df_telemetry::raw_error; use std::future::Future; use std::thread; use tokio::{runtime::Builder as RtBuilder, task::LocalSet}; @@ -50,7 +51,6 @@ impl ThreadLocalTaskHandle { } impl Drop for ThreadLocalTaskHandle { - #[allow(clippy::print_stderr)] fn drop(&mut self) { // Best-effort, idempotent shutdown on drop. self.cancel_token.cancel(); @@ -64,17 +64,18 @@ impl Drop for ThreadLocalTaskHandle { Ok(Err(_)) => { // Task returned an error; can't propagate it from Drop, so just log. // ToDo Replace this eprintln once we have selected a logging solution - eprintln!( - "Thread '{}' finished with an error during drop; error suppressed", - self.name + raw_error!( + "Thread finished with an error during drop; error suppressed", + thread_name = &self.name, ); } Err(panic) => { // Don't panic in Drop; report and suppress. // ToDo Replace this eprintln once we have selected a logging solution - eprintln!( - "Thread '{}' panicked during drop: {panic:?}; panic suppressed", - self.name + raw_error!( + "Thread panicked during drop; panic suppressed", + thread_name = &self.name, + panicked = tracing::field::debug(panic), ); } } diff --git a/rust/otap-dataflow/crates/pdata/src/validation/mod.rs b/rust/otap-dataflow/crates/pdata/src/validation/mod.rs index 8b54626a61..0e7433b3ae 100644 --- a/rust/otap-dataflow/crates/pdata/src/validation/mod.rs +++ b/rust/otap-dataflow/crates/pdata/src/validation/mod.rs @@ -6,7 +6,6 @@ // Allow test-friendly patterns in this test-only module #![allow(clippy::unwrap_used)] -#![allow(clippy::print_stderr)] mod collector; mod error; diff --git a/rust/otap-dataflow/crates/state/Cargo.toml b/rust/otap-dataflow/crates/state/Cargo.toml index 61e0af312c..fb9e035460 100644 --- a/rust/otap-dataflow/crates/state/Cargo.toml +++ b/rust/otap-dataflow/crates/state/Cargo.toml @@ -13,7 +13,8 @@ rust-version.workspace = true workspace = true [dependencies] -otap-df-config = { path = "../config" } +otap-df-config = { workspace = true } +otap-df-telemetry = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } @@ -22,4 +23,7 @@ flume = { workspace = true } thiserror = { workspace = true } tokio-util = { workspace = true } tokio = { workspace = true } -log = { workspace = true } + +# Note: use for tracing::field::debug() annotations, b/c otel_xxx! macros +# do not support directly. Consider whether to add or consolidate. +tracing = { workspace = true } diff --git a/rust/otap-dataflow/crates/state/src/reporter.rs b/rust/otap-dataflow/crates/state/src/reporter.rs index c030335ceb..74367b74a5 100644 --- a/rust/otap-dataflow/crates/state/src/reporter.rs +++ b/rust/otap-dataflow/crates/state/src/reporter.rs @@ -4,6 +4,7 @@ //! A reporter of observed events. use crate::event::ObservedEvent; +use otap_df_telemetry::raw_error; use std::time::Duration; /// A sharable/clonable observed event reporter sending events to an `ObservedStore`. @@ -25,17 +26,19 @@ impl ObservedEventReporter { /// Note: This method does not return an error if sending the event to the reporting channel /// fails, as this is not sufficient reason to interrupt the normal flow of the system under /// observation. However, an error message is logged to the standard error output. - #[allow( - clippy::print_stderr, - reason = "Use `eprintln!` while waiting for a decision on a framework for debugging/tracing." - )] pub fn report(&self, event: ObservedEvent) { match self.sender.send_timeout(event, self.timeout) { Err(flume::SendTimeoutError::Timeout(event)) => { - eprintln!("Timeout sending observed event: {event:?}") + raw_error!( + "Timeout sending observed event", + event = tracing::field::debug(event) + ); } Err(flume::SendTimeoutError::Disconnected(event)) => { - eprintln!("Disconnected event: {event:?}") + raw_error!( + "Disconnected event observer", + event = tracing::field::debug(event) + ); } Ok(_) => {} } diff --git a/rust/otap-dataflow/crates/state/src/store.rs b/rust/otap-dataflow/crates/state/src/store.rs index c7ad071f8e..32fe24ce09 100644 --- a/rust/otap-dataflow/crates/state/src/store.rs +++ b/rust/otap-dataflow/crates/state/src/store.rs @@ -11,6 +11,7 @@ use crate::pipeline_rt_status::{ApplyOutcome, PipelineRuntimeStatus}; use crate::pipeline_status::PipelineStatus; use crate::reporter::ObservedEventReporter; use otap_df_config::pipeline::PipelineSettings; +use otap_df_telemetry::{otel_error, otel_warn, raw_error}; use serde::{Serialize, Serializer}; use std::collections::HashMap; use std::sync::{Arc, Mutex}; @@ -51,7 +52,7 @@ impl ObservedStateHandle { match self.pipelines.lock() { Ok(guard) => guard.clone(), Err(poisoned) => { - log::warn!( + otel_warn!( "ObservedStateHandle mutex was poisoned; returning possibly stale snapshot" ); poisoned.into_inner().clone() @@ -101,23 +102,28 @@ impl ObservedStateStore { } /// Reports a new observed event in the store. - #[allow( - clippy::print_stderr, - reason = "Use `eprintln!` while waiting for https://github.com/open-telemetry/otel-arrow/issues/1237." - )] fn report(&self, observed_event: ObservedEvent) -> Result { // ToDo Event reporting see: https://github.com/open-telemetry/otel-arrow/issues/1237 // The code below is temporary and should be replaced with a proper event reporting // mechanism (see previous todo). match &observed_event.r#type { - EventType::Request(_) | EventType::Error(_) => { - eprintln!("Observed event: {observed_event:?}") + EventType::Request(_) => { + otel_error!( + "request.event", + observed_event = tracing::field::debug(&observed_event) + ); + } + EventType::Error(_) => { + otel_error!( + "error.event", + observed_event = tracing::field::debug(&observed_event) + ); } EventType::Success(_) => { /* no console output for success events */ } } let mut pipelines = self.pipelines.lock().unwrap_or_else(|poisoned| { - log::warn!( + otel_warn!( "ObservedStateStore mutex was poisoned; continuing with possibly inconsistent state" ); poisoned.into_inner() @@ -154,7 +160,10 @@ impl ObservedStateStore { // Exit the loop if the channel is closed while let Ok(event) = self.receiver.recv_async().await { if let Err(e) = self.report(event) { - log::error!("Error reporting observed event: {e}"); + raw_error!( + "Error reporting observed event", + error = e.to_string(), + ); } } } => { /* Channel closed, exit gracefully */ } diff --git a/rust/otap-dataflow/src/main.rs b/rust/otap-dataflow/src/main.rs index 9de41c9b05..1ca48b86f9 100644 --- a/rust/otap-dataflow/src/main.rs +++ b/rust/otap-dataflow/src/main.rs @@ -12,8 +12,8 @@ use otap_df_otap::OTAP_PIPELINE_FACTORY; use otap_df_telemetry::self_tracing::{ConsoleWriter, RawLoggingLayer}; use std::path::PathBuf; use sysinfo::System; -use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::Registry; +use tracing_subscriber::layer::SubscriberExt; #[cfg(all( not(windows), @@ -124,12 +124,9 @@ fn main() -> Result<(), Box> { let pipeline_group_id: PipelineGroupId = "default_pipeline_group".into(); let pipeline_id: PipelineId = "default_pipeline".into(); - println!("{}", system_info()); - // Load pipeline configuration with early logging so parse errors are readable. // Use with_default for a thread-local subscriber during config loading only. - let early_subscriber = Registry::default() - .with(RawLoggingLayer::new(ConsoleWriter::color())); + let early_subscriber = Registry::default().with(RawLoggingLayer::new(ConsoleWriter::color())); let pipeline_cfg = tracing::subscriber::with_default(early_subscriber, || { PipelineConfig::from_file( pipeline_group_id.clone(), @@ -138,6 +135,8 @@ fn main() -> Result<(), Box> { ) })?; + tracing::info!("{}", system_info()); + // Create controller and start pipeline with multi-core support let controller = Controller::new(&OTAP_PIPELINE_FACTORY); @@ -159,7 +158,7 @@ fn main() -> Result<(), Box> { CoreAllocation::AllCores => println!("Requested core allocation: all available cores"), CoreAllocation::CoreCount { count } => println!("Requested core allocation: {count} cores"), CoreAllocation::CoreSet { .. } => { - println!("Requested core allocation: {}", quota.core_allocation); + tracing::info!("Requested core allocation: {}", quota.core_allocation); } } @@ -175,11 +174,11 @@ fn main() -> Result<(), Box> { ); match result { Ok(_) => { - println!("Pipeline run successfully"); + tracing::info!("Pipeline run successfully"); std::process::exit(0); } Err(e) => { - eprintln!("Pipeline failed to run: {e}"); + tracing::error!("Pipeline failed to run: {e}"); std::process::exit(1); } } From 7bc59323dc3fbcf5dc141f875a28be55fc8e9591 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 12 Jan 2026 16:25:22 -0800 Subject: [PATCH 66/92] post-crash --- rust/otap-dataflow/configs/internal-telemetry.yaml | 14 +++++++++++--- .../config/src/pipeline/service/telemetry/logs.rs | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index c859d424f4..8da280426c 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -19,6 +19,15 @@ nodes: signals_per_second: 1000 log_weight: 100 registry_path: https://github.com/open-telemetry/semantic-conventions.git[model] + internal_telemetry: + kind: receiver + plugin_urn: "urn:otel:internal:otlp:receiver" + out_ports: + out_port: + destinations: + - debug + dispatch_strategy: round_robin + config: {} debug: kind: processor plugin_urn: "urn:otel:debug:processor" @@ -32,14 +41,13 @@ nodes: noop: kind: exporter plugin_urn: "urn:otel:noop:exporter" - config: + config: {} service: telemetry: logs: - # The default level is "info". level: "debug" strategies: global: unbuffered engine: buffered - output: raw + output: internal diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index caedf05993..88a2073681 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -10,7 +10,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; /// Internal Telemetry Receiver node URN for internal logging using OTLP bytes. -pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:otlp:telemetry:receiver"; +pub const INTERNAL_TELEMETRY_RECEIVER_URN: &str = "urn:otel:internal:otlp:receiver"; /// Internal logs configuration. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] From 93ef68084559e10e06d1c29eff69c48c5566fd4e Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 12 Jan 2026 16:59:56 -0800 Subject: [PATCH 67/92] wip --- .../configs/internal-telemetry.yaml | 4 +-- .../otap-dataflow/crates/telemetry/src/lib.rs | 9 ++--- rust/otap-dataflow/src/main.rs | 34 ++++++++++++------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 8da280426c..54de509c66 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -37,7 +37,7 @@ nodes: - noop dispatch_strategy: round_robin config: - verbosity: basic + verbosity: detailed noop: kind: exporter plugin_urn: "urn:otel:noop:exporter" @@ -47,7 +47,7 @@ service: telemetry: logs: level: "debug" - strategies: + providers: global: unbuffered engine: buffered output: internal diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 5823b47b76..00019286be 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -160,10 +160,11 @@ impl Default for MetricsSystem { } } -// If RUST_LOG is set, use it for fine-grained control. -// Otherwise, fall back to the config level with some noisy dependencies silenced. -// Users can override by setting RUST_LOG explicitly. -pub(crate) fn get_env_filter(level: LogLevel) -> EnvFilter { +/// Creates an `EnvFilter` for the given log level. +/// +/// If `RUST_LOG` is set in the environment, it takes precedence for fine-grained control. +/// Otherwise, falls back to the config level with known noisy dependencies (h2, hyper) silenced. +pub fn get_env_filter(level: LogLevel) -> EnvFilter { let level = match level { LogLevel::Off => LevelFilter::OFF, LogLevel::Debug => LevelFilter::DEBUG, diff --git a/rust/otap-dataflow/src/main.rs b/rust/otap-dataflow/src/main.rs index 1ca48b86f9..6715e53323 100644 --- a/rust/otap-dataflow/src/main.rs +++ b/rust/otap-dataflow/src/main.rs @@ -5,10 +5,12 @@ use clap::Parser; use otap_df_config::pipeline::PipelineConfig; +use otap_df_config::pipeline::service::telemetry::logs::LogLevel; use otap_df_config::pipeline_group::{CoreAllocation, CoreRange, Quota}; use otap_df_config::{PipelineGroupId, PipelineId}; use otap_df_controller::Controller; use otap_df_otap::OTAP_PIPELINE_FACTORY; +use otap_df_telemetry::{get_env_filter, raw_error}; use otap_df_telemetry::self_tracing::{ConsoleWriter, RawLoggingLayer}; use std::path::PathBuf; use sysinfo::System; @@ -117,6 +119,14 @@ fn main() -> Result<(), Box> { .install_default() .map_err(|e| format!("Failed to install rustls crypto provider: {e:?}"))?; + // Set up raw logging as the global default subscriber for the main thread. + // Engine threads will set their own thread-local subscribers based on config. + let raw_subscriber = Registry::default() + .with(get_env_filter(LogLevel::Debug)) + .with(RawLoggingLayer::new(ConsoleWriter::color())); + tracing::subscriber::set_global_default(raw_subscriber) + .expect("Failed to set global default subscriber"); + let args = Args::parse(); // For now, we predefine pipeline group and pipeline IDs. @@ -124,16 +134,12 @@ fn main() -> Result<(), Box> { let pipeline_group_id: PipelineGroupId = "default_pipeline_group".into(); let pipeline_id: PipelineId = "default_pipeline".into(); - // Load pipeline configuration with early logging so parse errors are readable. - // Use with_default for a thread-local subscriber during config loading only. - let early_subscriber = Registry::default().with(RawLoggingLayer::new(ConsoleWriter::color())); - let pipeline_cfg = tracing::subscriber::with_default(early_subscriber, || { - PipelineConfig::from_file( - pipeline_group_id.clone(), - pipeline_id.clone(), - &args.pipeline, - ) - })?; + // Load pipeline configuration + let pipeline_cfg = PipelineConfig::from_file( + pipeline_group_id.clone(), + pipeline_id.clone(), + &args.pipeline, + )?; tracing::info!("{}", system_info()); @@ -155,8 +161,10 @@ fn main() -> Result<(), Box> { // Print the requested core configuration match "a.core_allocation { - CoreAllocation::AllCores => println!("Requested core allocation: all available cores"), - CoreAllocation::CoreCount { count } => println!("Requested core allocation: {count} cores"), + CoreAllocation::AllCores => tracing::info!("Requested core allocation: all available cores"), + CoreAllocation::CoreCount { count } => { + tracing::info!("Requested core allocation: {count} cores") + } CoreAllocation::CoreSet { .. } => { tracing::info!("Requested core allocation: {}", quota.core_allocation); } @@ -178,7 +186,7 @@ fn main() -> Result<(), Box> { std::process::exit(0); } Err(e) => { - tracing::error!("Pipeline failed to run: {e}"); + raw_error!("Pipeline failed to run", error = format!("{e}")); std::process::exit(1); } } From 4521ae79c891f5311b47895388d52d7bfc0266e9 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 10:37:24 -0800 Subject: [PATCH 68/92] real otel --- .../crates/controller/src/lib.rs | 16 ++++----- .../crates/otap/src/debug_processor.rs | 29 ++++++++------- .../crates/telemetry/src/logs.rs | 12 +++++++ .../telemetry/src/opentelemetry_client.rs | 35 +++++++++++++------ 4 files changed, 60 insertions(+), 32 deletions(-) diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 98d9d4bf06..0af8230b30 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -189,16 +189,12 @@ impl Controller { .clone() .expect("validated: unbuffered requires reporter"), }, - ProviderMode::OpenTelemetry => { - // OpenTelemetry mode for engine is not yet supported - // Fall back to buffered for now - EngineLogsSetup::Buffered { - reporter: logs_reporter - .clone() - .expect("validated: opentelemetry requires reporter"), - capacity: 1024, - } - } + ProviderMode::OpenTelemetry => EngineLogsSetup::OpenTelemetry { + logger_provider: opentelemetry_client + .logger_provider() + .clone() + .expect("validated: opentelemetry engine requires logger_provider from global"), + }, }; let log_level = telemetry_config.logs.level; diff --git a/rust/otap-dataflow/crates/otap/src/debug_processor.rs b/rust/otap-dataflow/crates/otap/src/debug_processor.rs index d05c440bdc..6494cfaf5a 100644 --- a/rust/otap-dataflow/crates/otap/src/debug_processor.rs +++ b/rust/otap-dataflow/crates/otap/src/debug_processor.rs @@ -34,6 +34,7 @@ use otap_df_pdata::proto::opentelemetry::{ trace::v1::TracesData, }; use otap_df_telemetry::metrics::MetricSet; +use otap_df_telemetry::otel_info; use prost::Message as _; use serde_json::Value; use std::sync::Arc; @@ -373,12 +374,13 @@ impl DebugProcessor { .metric_datapoints_consumed .add(data_points as u64); - let report_basic = format!( - "Received {resource_metrics} resource metrics\nReceived {metrics} metrics\nReceived {data_points} data points\n" + otel_info!( + name: "debug.received.metrics", + resource_metrics = resource_metrics, + metrics = metrics, + data_points = data_points ); - debug_output.output_message(report_basic.as_str()).await?; - // return early if don't need to output anymore information if debug_output.is_basic() { return Ok(()); @@ -423,11 +425,13 @@ impl DebugProcessor { self.metrics.span_events_consumed.add(events as u64); self.metrics.span_links_consumed.add(links as u64); - let report_basic = format!( - "Received {resource_spans} resource spans\nReceived {spans} spans\nReceived {events} events\nReceived {links} links\n" + otel_info!( + name: "debug.received.traces", + resource_spans = resource_spans, + spans = spans, + events = events, + links = links ); - - debug_output.output_message(report_basic.as_str()).await?; // return early if don't need to output anymore information if debug_output.is_basic() { return Ok(()); @@ -468,12 +472,13 @@ impl DebugProcessor { self.metrics.log_signals_consumed.add(log_records as u64); self.metrics.events_consumed.add(events); - let report_basic = format!( - "Received {resource_logs} resource logs\nReceived {log_records} log records\nReceived {events} events\n" + otel_info!( + name: "debug.received.logs", + resource_logs = resource_logs, + log_records = log_records, + events = events ); - debug_output.output_message(report_basic.as_str()).await?; - // return early if don't need to output anymore information if debug_output.is_basic() { return Ok(()); diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 5275637ffa..c1f03e1c13 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -8,6 +8,8 @@ use crate::self_tracing::{ ConsoleWriter, DirectLogRecordEncoder, LogRecord, RawLoggingLayer, SavedCallsite, }; use bytes::Bytes; +use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; +use opentelemetry_sdk::logs::SdkLoggerProvider; use otap_df_pdata::otlp::ProtoBuffer; use otap_df_pdata::proto::consts::field_num::logs::{ LOGS_DATA_RESOURCE, RESOURCE_LOGS_SCOPE_LOGS, SCOPE_LOGS_LOG_RECORDS, @@ -378,6 +380,11 @@ pub enum EngineLogsSetup { /// Reporter to send singletons through. reporter: LogsReporter, }, + /// OpenTelemetry SDK: logs go through the OpenTelemetry logging pipeline. + OpenTelemetry { + /// The OpenTelemetry SDK logger provider. + logger_provider: SdkLoggerProvider, + }, } /// Handle for flushing buffered logs from the engine thread. @@ -453,6 +460,11 @@ impl EngineLogsSetup { let subscriber = Registry::default().with(filter).with(layer); tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) } + EngineLogsSetup::OpenTelemetry { logger_provider } => { + let sdk_layer = OpenTelemetryTracingBridge::new(logger_provider); + let subscriber = Registry::default().with(filter).with(sdk_layer); + tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) + } } } } diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 2d03c57fa3..90b37c18ab 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -48,6 +48,10 @@ impl OpentelemetryClient { /// The `logs_reporter` parameter is required when `strategies.global` is set to /// `Unbuffered`. It should be created via `LogsCollector::new()` and the collector /// should be run on a dedicated thread. + /// + /// The logger provider is configured when either global or engine providers + /// are set to `OpenTelemetry`. This allows the engine to use the same SDK + /// pipeline even when global uses a different logging strategy. pub fn new( config: &TelemetryConfig, logs_reporter: Option, @@ -66,15 +70,28 @@ impl OpentelemetryClient { crate::raw_error!("tracing.subscriber.init", error = err.to_string()); }; + // Check if either global or engine needs the OpenTelemetry logger provider + let global_needs_otel = config.logs.providers.global == ProviderMode::OpenTelemetry; + let engine_needs_otel = config.logs.providers.engine == ProviderMode::OpenTelemetry; + + // Configure the logger provider if either global or engine needs it + let (logger_provider, runtime) = if global_needs_otel || engine_needs_otel { + let (provider, rt) = + LoggerProvider::configure(sdk_resource.clone(), &config.logs, runtime)? + .into_parts(); + (Some(provider), rt) + } else { + (None, runtime) + }; + // Configure the global subscriber based on strategies.global. // Engine threads override this with BufferWriterLayer via with_default(). - let (logger_provider, runtime) = match config.logs.providers.global { + match config.logs.providers.global { ProviderMode::Noop => { // No-op: just install the filter, events are dropped if let Err(err) = tracing::subscriber::NoSubscriber::new().try_init() { logerr(err); } - (None, runtime) } ProviderMode::Raw => { if let Err(err) = tracing_setup @@ -83,7 +100,6 @@ impl OpentelemetryClient { { logerr(err); } - (None, runtime) } ProviderMode::Buffered => { return Err(Error::ConfigurationError( @@ -98,19 +114,18 @@ impl OpentelemetryClient { if let Err(err) = tracing_setup.with(channel_layer).try_init() { logerr(err); } - (None, runtime) } ProviderMode::OpenTelemetry => { - let (logger_provider, runtime) = - LoggerProvider::configure(sdk_resource, &config.logs, runtime)?.into_parts(); - - let sdk_layer = OpenTelemetryTracingBridge::new(&logger_provider); + // logger_provider is guaranteed to be Some here since global_needs_otel is true + let sdk_layer = OpenTelemetryTracingBridge::new( + logger_provider + .as_ref() + .expect("logger_provider configured when global is OpenTelemetry"), + ); if let Err(err) = tracing_setup.with(sdk_layer).try_init() { logerr(err) } - - (Some(logger_provider), runtime) } }; From 31dbd95ac9a8b721782de3f6c767660ccede21db Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 10:45:36 -0800 Subject: [PATCH 69/92] revert debug_proc b/c tests --- .../src/pipeline/service/telemetry/logs.rs | 12 ++++---- .../crates/controller/src/lib.rs | 2 +- .../crates/otap/src/debug_processor.rs | 29 ++++++++----------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 88a2073681..3756c7407b 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -103,20 +103,22 @@ pub enum OutputMode { /// OpenTelemetry settings. Noop, - /// Raw logging: format and print directly to console - /// (stdout/stderr) from the logs collector thread. ERROR and - /// WARN go to stderr, others to stdout. + /// Direct console logging: format and print directly to console + /// (stdout/stderr) from the logs collector thread, bypasses any + /// internal use of the dataflow engine. ERROR and WARN go to + /// stderr, others to stdout. #[default] - Raw, + Direct, /// Route to Internal Telemetry Receiver node. The pipeline must /// include a nod with INTERNAL_TELEMETRY_RECEIVER_URN. The /// engine provider mode must be Buffered for internal output. + /// This will become default. Internal, } fn default_output() -> OutputMode { - OutputMode::Raw + OutputMode::Direct } fn default_level() -> LogLevel { diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 0af8230b30..02e9052045 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -114,7 +114,7 @@ impl Controller { // - Internal Telemetry Receiver node (output == Internal): emits as OTLP let (logs_reporter, logs_receiver, logs_collector_handle) = if providers_need_reporter { match telemetry_config.logs.output { - OutputMode::Raw => { + OutputMode::Direct => { // Start collector thread for Raw output mode let (logs_collector, reporter) = LogsCollector::new(telemetry_config.reporting_channel_size); diff --git a/rust/otap-dataflow/crates/otap/src/debug_processor.rs b/rust/otap-dataflow/crates/otap/src/debug_processor.rs index 6494cfaf5a..d05c440bdc 100644 --- a/rust/otap-dataflow/crates/otap/src/debug_processor.rs +++ b/rust/otap-dataflow/crates/otap/src/debug_processor.rs @@ -34,7 +34,6 @@ use otap_df_pdata::proto::opentelemetry::{ trace::v1::TracesData, }; use otap_df_telemetry::metrics::MetricSet; -use otap_df_telemetry::otel_info; use prost::Message as _; use serde_json::Value; use std::sync::Arc; @@ -374,13 +373,12 @@ impl DebugProcessor { .metric_datapoints_consumed .add(data_points as u64); - otel_info!( - name: "debug.received.metrics", - resource_metrics = resource_metrics, - metrics = metrics, - data_points = data_points + let report_basic = format!( + "Received {resource_metrics} resource metrics\nReceived {metrics} metrics\nReceived {data_points} data points\n" ); + debug_output.output_message(report_basic.as_str()).await?; + // return early if don't need to output anymore information if debug_output.is_basic() { return Ok(()); @@ -425,13 +423,11 @@ impl DebugProcessor { self.metrics.span_events_consumed.add(events as u64); self.metrics.span_links_consumed.add(links as u64); - otel_info!( - name: "debug.received.traces", - resource_spans = resource_spans, - spans = spans, - events = events, - links = links + let report_basic = format!( + "Received {resource_spans} resource spans\nReceived {spans} spans\nReceived {events} events\nReceived {links} links\n" ); + + debug_output.output_message(report_basic.as_str()).await?; // return early if don't need to output anymore information if debug_output.is_basic() { return Ok(()); @@ -472,13 +468,12 @@ impl DebugProcessor { self.metrics.log_signals_consumed.add(log_records as u64); self.metrics.events_consumed.add(events); - otel_info!( - name: "debug.received.logs", - resource_logs = resource_logs, - log_records = log_records, - events = events + let report_basic = format!( + "Received {resource_logs} resource logs\nReceived {log_records} log records\nReceived {events} events\n" ); + debug_output.output_message(report_basic.as_str()).await?; + // return early if don't need to output anymore information if debug_output.is_basic() { return Ok(()); From 97c7ae0df6fa1ddd18be843822df69c0d6058aec Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 12:05:14 -0800 Subject: [PATCH 70/92] move raw_error --- .../configs/internal-telemetry.yaml | 27 +- rust/otap-dataflow/crates/config/src/error.rs | 28 ++ .../crates/config/src/pipeline.rs | 412 +++++++++++++++++- .../crates/telemetry/src/internal_events.rs | 39 ++ .../crates/telemetry/src/self_tracing.rs | 1 - .../telemetry/src/self_tracing/raw_log.rs | 64 --- 6 files changed, 486 insertions(+), 85 deletions(-) delete mode 100644 rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 54de509c66..f6e03a2984 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -19,26 +19,44 @@ nodes: signals_per_second: 1000 log_weight: 100 registry_path: https://github.com/open-telemetry/semantic-conventions.git[model] + debug: + kind: processor + plugin_urn: "urn:otel:debug:processor" + out_ports: + out_port: + destinations: + - noop + dispatch_strategy: round_robin + config: + verbosity: detailed + noop: + kind: exporter + plugin_urn: "urn:otel:noop:exporter" + config: {} + +# Internal telemetry pipeline - separate from main pipeline +# Uses hardcoded settings: single thread, no admin server +internal: internal_telemetry: kind: receiver plugin_urn: "urn:otel:internal:otlp:receiver" out_ports: out_port: destinations: - - debug + - internal_debug dispatch_strategy: round_robin config: {} - debug: + internal_debug: kind: processor plugin_urn: "urn:otel:debug:processor" out_ports: out_port: destinations: - - noop + - internal_noop dispatch_strategy: round_robin config: verbosity: detailed - noop: + internal_noop: kind: exporter plugin_urn: "urn:otel:noop:exporter" config: {} @@ -50,4 +68,5 @@ service: providers: global: unbuffered engine: buffered + internal: noop # Avoid feedback in internal pipeline output: internal diff --git a/rust/otap-dataflow/crates/config/src/error.rs b/rust/otap-dataflow/crates/config/src/error.rs index 4b0f378f2b..a8f1436870 100644 --- a/rust/otap-dataflow/crates/config/src/error.rs +++ b/rust/otap-dataflow/crates/config/src/error.rs @@ -118,6 +118,34 @@ pub enum Error { /// The id of the pipeline that was duplicated. pipeline_id: PipelineId, }, + + /// A receiver in the internal telemetry pipeline has an invalid plugin URN. + /// Only Internal Telemetry Receivers (ITR) are allowed in the internal pipeline. + #[error( + "Invalid receiver in internal pipeline: node `{node_id}` has plugin_urn `{plugin_urn}`, \ + but only Internal Telemetry Receivers are allowed\nContext: {context}" + )] + #[diagnostic(code(data_plane::invalid_internal_receiver), url(docsrs))] + InvalidInternalReceiver { + /// The context in which the error occurred. + context: Context, + /// The node id of the invalid receiver. + node_id: NodeId, + /// The plugin URN of the invalid receiver. + plugin_urn: String, + }, + + /// The internal telemetry pipeline is required but not configured. + #[error( + "Internal telemetry pipeline required but not configured. \ + When output mode is 'internal', the `internal` section must be present \ + with at least one Internal Telemetry Receiver.\nContext: {context}" + )] + #[diagnostic(code(data_plane::missing_internal_pipeline), url(docsrs))] + MissingInternalPipeline { + /// The context in which the error occurred. + context: Context, + }, } /// Information that all errors provide to help identify diff --git a/rust/otap-dataflow/crates/config/src/pipeline.rs b/rust/otap-dataflow/crates/config/src/pipeline.rs index fcc49e482e..0d807e439f 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline.rs @@ -9,6 +9,7 @@ use crate::error::{Context, Error, HyperEdgeSpecDetails}; use crate::health::HealthPolicy; use crate::node::{DispatchStrategy, HyperEdgeConfig, NodeKind, NodeUserConfig}; use crate::observed_state::ObservedStateSettings; +use crate::pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN; use crate::pipeline::service::ServiceConfig; use crate::{Description, NodeId, NodeUrn, PipelineGroupId, PipelineId, PortName}; use schemars::JsonSchema; @@ -48,6 +49,21 @@ pub struct PipelineConfig { /// across multiple cores/threads without cloning the entire configuration. nodes: HashMap>, + /// Internal telemetry pipeline nodes. + /// + /// This optional section defines nodes for processing internal telemetry + /// (logs, metrics, traces generated by the engine itself). + /// + /// The internal pipeline runs on a dedicated thread with hardcoded settings + /// (single thread, no admin server), separate from the main pipeline. + /// + /// Constraints: + /// - Receivers must be Internal Telemetry Receivers (ITR) + /// with plugin_urn matching `INTERNAL_TELEMETRY_RECEIVER_URN` + /// - Processors and exporters can be any valid plugin + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + internal: HashMap>, + /// Service-level telemetry configuration. #[serde(default)] service: ServiceConfig, @@ -68,6 +84,7 @@ pub enum PipelineType { /// OpenTelemetry with Apache Arrow Protocol (OTAP) pipeline. Otap, } + /// A configuration for a pipeline. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct PipelineSettings { @@ -278,6 +295,23 @@ impl PipelineConfig { &self.service } + /// Returns true if the internal telemetry pipeline is configured. + #[must_use] + pub fn has_internal_pipeline(&self) -> bool { + !self.internal.is_empty() + } + + /// Returns a reference to the internal pipeline nodes. + #[must_use] + pub fn internal_nodes(&self) -> &HashMap> { + &self.internal + } + + /// Returns an iterator visiting all nodes in the internal telemetry pipeline. + pub fn internal_node_iter(&self) -> impl Iterator)> { + self.internal.iter() + } + /// Validate the pipeline specification. /// /// This method checks for: @@ -285,6 +319,7 @@ impl PipelineConfig { /// - Duplicate out-ports (same source node + port name) /// - Invalid hyper-edges (missing source or target nodes) /// - Cycles in the DAG + /// - Internal pipeline receivers are only ITR nodes pub fn validate( &self, pipeline_group_id: &PipelineGroupId, @@ -292,13 +327,80 @@ impl PipelineConfig { ) -> Result<(), Error> { let mut errors = Vec::new(); - // Check for invalid hyper-edges (references to non-existent nodes) - for (node_id, node) in self.nodes.iter() { + // Validate main pipeline hyper-edges + Self::validate_hyper_edges( + &self.nodes, + pipeline_group_id, + pipeline_id, + &mut errors, + ); + + // Validate internal pipeline if present + if !self.internal.is_empty() { + // Check that receivers in internal pipeline are only ITR nodes + for (node_id, node) in self.internal.iter() { + if node.kind == NodeKind::Receiver + && node.plugin_urn.as_ref() != INTERNAL_TELEMETRY_RECEIVER_URN + { + errors.push(Error::InvalidInternalReceiver { + context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), + node_id: node_id.clone(), + plugin_urn: node.plugin_urn.to_string(), + }); + } + } + + // Validate internal pipeline hyper-edges + Self::validate_hyper_edges( + &self.internal, + pipeline_group_id, + pipeline_id, + &mut errors, + ); + + // Check for cycles in internal pipeline + if errors.is_empty() { + let cycles = Self::detect_cycles_in(&self.internal); + for cycle in cycles { + errors.push(Error::CycleDetected { + context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), + nodes: cycle, + }); + } + } + } + + // Check for cycles in main pipeline if no errors so far + if errors.is_empty() { + let cycles = self.detect_cycles(); + for cycle in cycles { + errors.push(Error::CycleDetected { + context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), + nodes: cycle, + }); + } + } + + if !errors.is_empty() { + Err(Error::InvalidConfiguration { errors }) + } else { + Ok(()) + } + } + + /// Validate hyper-edges for a set of nodes. + fn validate_hyper_edges( + nodes: &HashMap>, + pipeline_group_id: &PipelineGroupId, + pipeline_id: &PipelineId, + errors: &mut Vec, + ) { + for (node_id, node) in nodes.iter() { for edge in node.out_ports.values() { let mut missing_targets = Vec::new(); for target in &edge.destinations { - if !self.nodes.contains_key(target) { + if !nodes.contains_key(target) { missing_targets.push(target.clone()); } } @@ -307,7 +409,7 @@ impl PipelineConfig { errors.push(Error::InvalidHyperEdgeSpec { context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), source_node: node_id.clone(), - missing_source: false, // source exists since we're iterating over nodes + missing_source: false, details: Box::new(HyperEdgeSpecDetails { target_nodes: edge.destinations.iter().cloned().collect(), dispatch_strategy: edge.dispatch_strategy.clone(), @@ -317,23 +419,62 @@ impl PipelineConfig { } } } + } - // Check for cycles if no errors so far - if errors.is_empty() { - let cycles = self.detect_cycles(); - for cycle in cycles { - errors.push(Error::CycleDetected { - context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), - nodes: cycle, - }); + /// Detect cycles in a set of nodes. + fn detect_cycles_in(nodes: &HashMap>) -> Vec> { + fn visit( + node: &NodeId, + nodes: &HashMap>, + visiting: &mut HashSet, + visited: &mut HashSet, + current_path: &mut Vec, + cycles: &mut Vec>, + ) { + if visited.contains(node) { + return; + } + if visiting.contains(node) { + if let Some(pos) = current_path.iter().position(|n| n == node) { + cycles.push(current_path[pos..].to_vec()); + } + return; + } + _ = visiting.insert(node.clone()); + current_path.push(node.clone()); + + if let Some(n) = nodes.get(node) { + for edge in n.out_ports.values() { + for tgt in &edge.destinations { + visit(tgt, nodes, visiting, visited, current_path, cycles); + } + } } + + _ = visiting.remove(node); + _ = visited.insert(node.clone()); + _ = current_path.pop(); } - if !errors.is_empty() { - Err(Error::InvalidConfiguration { errors }) - } else { - Ok(()) + let mut visiting = HashSet::new(); + let mut current_path = Vec::new(); + let mut visited = HashSet::new(); + let mut cycles = Vec::new(); + + for node in nodes.keys() { + if !visited.contains(node) { + visit( + node, + nodes, + &mut visiting, + &mut visited, + &mut current_path, + &mut cycles, + ); + } } + + cycles } fn detect_cycles(&self) -> Vec> { @@ -656,6 +797,7 @@ impl PipelineConfigBuilder { .into_iter() .map(|(id, node)| (id, Arc::new(node))) .collect(), + internal: HashMap::new(), settings: PipelineSettings::default(), r#type: pipeline_type, service: ServiceConfig::default(), @@ -1321,4 +1463,242 @@ mod tests { panic!("Expected deserialization to fail due to unknown exporter"); } } + + #[test] + fn test_internal_pipeline_with_valid_itr() { + // Valid internal pipeline with ITR receiver + let yaml_str = r#" +nodes: + receiver: + kind: receiver + plugin_urn: "urn:test:receiver" + out_ports: + out: + destinations: + - exporter + dispatch_strategy: round_robin + exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} + +internal: + itr: + kind: receiver + plugin_urn: "urn:otel:internal:otlp:receiver" + out_ports: + out: + destinations: + - internal_exporter + dispatch_strategy: round_robin + config: {} + internal_exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} +"#; + let result = super::PipelineConfig::from_yaml( + "test_group".into(), + "test_pipeline".into(), + yaml_str, + ); + assert!(result.is_ok()); + let config = result.unwrap(); + assert!(config.has_internal_pipeline()); + assert_eq!(config.internal_node_iter().count(), 2); + } + + #[test] + fn test_internal_pipeline_rejects_non_itr_receiver() { + // Invalid: internal pipeline has a non-ITR receiver + let yaml_str = r#" +nodes: + receiver: + kind: receiver + plugin_urn: "urn:test:receiver" + out_ports: + out: + destinations: + - exporter + dispatch_strategy: round_robin + exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} + +internal: + bad_receiver: + kind: receiver + plugin_urn: "urn:test:some_other_receiver" + out_ports: + out: + destinations: + - internal_exporter + dispatch_strategy: round_robin + config: {} + internal_exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} +"#; + let result = super::PipelineConfig::from_yaml( + "test_group".into(), + "test_pipeline".into(), + yaml_str, + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidConfiguration { errors }) => { + assert_eq!(errors.len(), 1); + match &errors[0] { + Error::InvalidInternalReceiver { + node_id, + plugin_urn, + .. + } => { + assert_eq!(node_id.as_ref(), "bad_receiver"); + assert_eq!(plugin_urn, "urn:test:some_other_receiver"); + } + other => panic!("Expected InvalidInternalReceiver, got {other:?}"), + } + } + other => panic!("Expected InvalidConfiguration error, got {other:?}"), + } + } + + #[test] + fn test_internal_pipeline_allows_processors_and_exporters() { + // Valid: internal pipeline can have processors and exporters + let yaml_str = r#" +nodes: + receiver: + kind: receiver + plugin_urn: "urn:test:receiver" + out_ports: + out: + destinations: + - exporter + dispatch_strategy: round_robin + exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} + +internal: + itr: + kind: receiver + plugin_urn: "urn:otel:internal:otlp:receiver" + out_ports: + out: + destinations: + - processor + dispatch_strategy: round_robin + config: {} + processor: + kind: processor + plugin_urn: "urn:test:processor" + out_ports: + out: + destinations: + - internal_exporter + dispatch_strategy: round_robin + config: {} + internal_exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} +"#; + let result = super::PipelineConfig::from_yaml( + "test_group".into(), + "test_pipeline".into(), + yaml_str, + ); + assert!(result.is_ok()); + let config = result.unwrap(); + assert!(config.has_internal_pipeline()); + assert_eq!(config.internal_node_iter().count(), 3); + } + + #[test] + fn test_internal_pipeline_validates_hyper_edges() { + // Invalid: internal pipeline has missing target node + let yaml_str = r#" +nodes: + receiver: + kind: receiver + plugin_urn: "urn:test:receiver" + out_ports: + out: + destinations: + - exporter + dispatch_strategy: round_robin + exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} + +internal: + itr: + kind: receiver + plugin_urn: "urn:otel:internal:otlp:receiver" + out_ports: + out: + destinations: + - missing_node + dispatch_strategy: round_robin + config: {} +"#; + let result = super::PipelineConfig::from_yaml( + "test_group".into(), + "test_pipeline".into(), + yaml_str, + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidConfiguration { errors }) => { + assert_eq!(errors.len(), 1); + match &errors[0] { + Error::InvalidHyperEdgeSpec { + source_node, + details, + .. + } => { + assert_eq!(source_node.as_ref(), "itr"); + assert!(details.missing_targets.contains(&"missing_node".into())); + } + other => panic!("Expected InvalidHyperEdgeSpec, got {other:?}"), + } + } + other => panic!("Expected InvalidConfiguration error, got {other:?}"), + } + } + + #[test] + fn test_empty_internal_pipeline() { + // Valid: no internal section means no internal pipeline + let yaml_str = r#" +nodes: + receiver: + kind: receiver + plugin_urn: "urn:test:receiver" + out_ports: + out: + destinations: + - exporter + dispatch_strategy: round_robin + exporter: + kind: exporter + plugin_urn: "urn:test:exporter" + config: {} +"#; + let result = super::PipelineConfig::from_yaml( + "test_group".into(), + "test_pipeline".into(), + yaml_str, + ); + assert!(result.is_ok()); + let config = result.unwrap(); + assert!(!config.has_internal_pipeline()); + assert_eq!(config.internal_node_iter().count(), 0); + } } diff --git a/rust/otap-dataflow/crates/telemetry/src/internal_events.rs b/rust/otap-dataflow/crates/telemetry/src/internal_events.rs index 07a68bd98f..b5cb7afd3a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/internal_events.rs +++ b/rust/otap-dataflow/crates/telemetry/src/internal_events.rs @@ -118,3 +118,42 @@ macro_rules! otel_error { ) }; } + +/// Create a subscriber that writes directly to console (bypassing channels). +fn raw_logging_subscriber() -> impl tracing::Subscriber { + use crate::self_tracing::{ConsoleWriter, RawLoggingLayer}; + use tracing_subscriber::layer::SubscriberExt; + + tracing_subscriber::registry().with(RawLoggingLayer::new(ConsoleWriter::no_color())) +} + +/// Execute a closure with a raw logging subscriber that writes directly to console. +#[inline] +pub fn with_raw_logging(f: F) -> R +where + F: FnOnce() -> R, +{ + tracing::subscriber::with_default(raw_logging_subscriber(), f) +} + +/// Log an error message directly to stderr, bypassing the tracing subscriber. +#[macro_export] +macro_rules! raw_error { + ($name:expr $(,)?) => { + $crate::internal_events::with_raw_logging(|| { + $crate::_private::error!(name: $name, target: env!("CARGO_PKG_NAME"), name = $name, ""); + }) + }; + ($name:expr, $($key:ident = $value:expr),+ $(,)?) => { + $crate::internal_events::with_raw_logging(|| { + $crate::_private::error!(name: $name, + target: env!("CARGO_PKG_NAME"), + name = $name, + $($key = { + $value + }),+, + "" + ); + }) + }; +} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index db4ed4f998..9d03fd56c7 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -9,7 +9,6 @@ pub mod encoder; pub mod formatter; -pub mod raw_log; use bytes::Bytes; use encoder::DirectFieldVisitor; diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs deleted file mode 100644 index 8a449b00a5..0000000000 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/raw_log.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//! Raw logging macros that bypass the tracing subscriber and write to -//! the console. A single `raw_error!(...)` API is provided. - -#![allow(unused_macros)] - -use super::formatter::RawLoggingLayer; -use tracing_subscriber::prelude::*; - -#[doc(hidden)] -pub mod _private { - pub use tracing::error; -} - -/// Create a subscriber that writes directly to console (bypassing channels). -fn raw_logging_subscriber() -> impl tracing::Subscriber { - tracing_subscriber::registry().with(RawLoggingLayer::new(super::ConsoleWriter::no_color())) -} - -/// Execute a closure with a raw logging subscriber that writes directly to console. -#[inline] -pub fn with_raw_logging(f: F) -> R -where - F: FnOnce() -> R, -{ - tracing::subscriber::with_default(raw_logging_subscriber(), f) -} - -/// Log an error message directly to stderr, bypassing the tracing subscriber. -/// -/// This should be used sparingly, only emergencies! This is a good -/// configuration for diagnosing internal other logging facilities, -/// because it is unbuffered and overrides the tracing subscriber. -#[macro_export] -macro_rules! raw_error { - ($name:expr, $(,)?) => { - $crate::self_tracing::raw_log::with_raw_logging(|| { - $crate::_private::error!(name: $name, target: env!("CARGO_PKG_NAME"), name = $name, ""); - }) - }; - ($name:expr, $($key:ident = $value:expr),+ $(,)?) => { - $crate::self_tracing::raw_log::with_raw_logging(|| { - $crate::_private::error!(name: $name, - target: env!("CARGO_PKG_NAME"), - name = $name, - $($key = { - $value - }),+, - "" - ) - }) - }; -} - -#[cfg(test)] -mod tests { - #[test] - fn test_raw_error() { - raw_error!("panic.late", msg = "test error message"); - raw_error!("panic.early", msg = "test error with arg", arg = 42); - } -} From 7d4672b05c2f0b77a38e6c6a023f0ae266bf1c62 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 12:44:56 -0800 Subject: [PATCH 71/92] works --- .../configs/internal-telemetry.yaml | 12 +- .../crates/config/src/pipeline.rs | 475 ++++++++++-------- .../src/pipeline/service/telemetry/logs.rs | 2 +- .../crates/controller/src/lib.rs | 105 +++- 4 files changed, 370 insertions(+), 224 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index f6e03a2984..a6b9fd18ee 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -28,7 +28,7 @@ nodes: - noop dispatch_strategy: round_robin config: - verbosity: detailed + verbosity: basic noop: kind: exporter plugin_urn: "urn:otel:noop:exporter" @@ -37,26 +37,26 @@ nodes: # Internal telemetry pipeline - separate from main pipeline # Uses hardcoded settings: single thread, no admin server internal: - internal_telemetry: + telemetry: kind: receiver plugin_urn: "urn:otel:internal:otlp:receiver" out_ports: out_port: destinations: - - internal_debug + - debug dispatch_strategy: round_robin config: {} - internal_debug: + debug: kind: processor plugin_urn: "urn:otel:debug:processor" out_ports: out_port: destinations: - - internal_noop + - noop dispatch_strategy: round_robin config: verbosity: detailed - internal_noop: + noop: kind: exporter plugin_urn: "urn:otel:noop:exporter" config: {} diff --git a/rust/otap-dataflow/crates/config/src/pipeline.rs b/rust/otap-dataflow/crates/config/src/pipeline.rs index 0d807e439f..4cdf0d14f4 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline.rs @@ -9,8 +9,8 @@ use crate::error::{Context, Error, HyperEdgeSpecDetails}; use crate::health::HealthPolicy; use crate::node::{DispatchStrategy, HyperEdgeConfig, NodeKind, NodeUserConfig}; use crate::observed_state::ObservedStateSettings; -use crate::pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN; use crate::pipeline::service::ServiceConfig; +use crate::pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN; use crate::{Description, NodeId, NodeUrn, PipelineGroupId, PipelineId, PortName}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -43,11 +43,12 @@ pub struct PipelineConfig { #[serde(default)] settings: PipelineSettings, - /// All nodes in this pipeline, keyed by node ID. + /// All nodes in this pipeline. /// /// Note: We use `Arc` to allow sharing the same pipeline configuration /// across multiple cores/threads without cloning the entire configuration. - nodes: HashMap>, + #[serde(default)] + nodes: PipelineNodes, /// Internal telemetry pipeline nodes. /// @@ -61,8 +62,8 @@ pub struct PipelineConfig { /// - Receivers must be Internal Telemetry Receivers (ITR) /// with plugin_urn matching `INTERNAL_TELEMETRY_RECEIVER_URN` /// - Processors and exporters can be any valid plugin - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - internal: HashMap>, + #[serde(default, skip_serializing_if = "PipelineNodes::is_empty")] + internal: PipelineNodes, /// Service-level telemetry configuration. #[serde(default)] @@ -85,6 +86,185 @@ pub enum PipelineType { Otap, } +/// A collection of nodes forming a pipeline graph (hyper-DAG). +/// +/// This wrapper provides validation methods for the node graph structure, +/// including hyper-edge validation and cycle detection. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(transparent)] +pub struct PipelineNodes(HashMap>); + +impl PipelineNodes { + /// Returns true if the node collection is empty. + #[must_use] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the number of nodes. + #[must_use] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns a reference to the node with the given ID, if it exists. + #[must_use] + pub fn get(&self, id: &str) -> Option<&Arc> { + self.0.get(id) + } + + /// Returns true if a node with the given ID exists. + #[must_use] + pub fn contains_key(&self, id: &str) -> bool { + self.0.contains_key(id) + } + + /// Returns an iterator visiting all nodes. + pub fn iter(&self) -> impl Iterator)> { + self.0.iter() + } + + /// Returns an iterator over node IDs. + pub fn keys(&self) -> impl Iterator { + self.0.keys() + } + + /// Validate the node graph structure. + /// + /// Checks for: + /// - Invalid hyper-edges (missing target nodes) + /// - Cycles in the DAG + pub fn validate( + &self, + pipeline_group_id: &PipelineGroupId, + pipeline_id: &PipelineId, + errors: &mut Vec, + ) { + self.validate_hyper_edges(pipeline_group_id, pipeline_id, errors); + + // Only check for cycles if no hyper-edge errors + if errors.is_empty() { + for cycle in self.detect_cycles() { + errors.push(Error::CycleDetected { + context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), + nodes: cycle, + }); + } + } + } + + /// Validate hyper-edges (check that all destination nodes exist). + fn validate_hyper_edges( + &self, + pipeline_group_id: &PipelineGroupId, + pipeline_id: &PipelineId, + errors: &mut Vec, + ) { + for (node_id, node) in self.0.iter() { + for edge in node.out_ports.values() { + let missing_targets: Vec<_> = edge + .destinations + .iter() + .filter(|target| !self.0.contains_key(*target)) + .cloned() + .collect(); + + if !missing_targets.is_empty() { + errors.push(Error::InvalidHyperEdgeSpec { + context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), + source_node: node_id.clone(), + missing_source: false, + details: Box::new(HyperEdgeSpecDetails { + target_nodes: edge.destinations.iter().cloned().collect(), + dispatch_strategy: edge.dispatch_strategy.clone(), + missing_targets, + }), + }); + } + } + } + } + + /// Detect cycles in the node graph. + fn detect_cycles(&self) -> Vec> { + fn visit( + node: &NodeId, + nodes: &HashMap>, + visiting: &mut HashSet, + visited: &mut HashSet, + current_path: &mut Vec, + cycles: &mut Vec>, + ) { + if visited.contains(node) { + return; + } + if visiting.contains(node) { + if let Some(pos) = current_path.iter().position(|n| n == node) { + cycles.push(current_path[pos..].to_vec()); + } + return; + } + _ = visiting.insert(node.clone()); + current_path.push(node.clone()); + + if let Some(n) = nodes.get(node) { + for edge in n.out_ports.values() { + for tgt in &edge.destinations { + visit(tgt, nodes, visiting, visited, current_path, cycles); + } + } + } + + _ = visiting.remove(node); + _ = visited.insert(node.clone()); + _ = current_path.pop(); + } + + let mut visiting = HashSet::new(); + let mut current_path = Vec::new(); + let mut visited = HashSet::new(); + let mut cycles = Vec::new(); + + for node in self.0.keys() { + if !visited.contains(node) { + visit( + node, + &self.0, + &mut visiting, + &mut visited, + &mut current_path, + &mut cycles, + ); + } + } + + cycles + } +} + +impl std::ops::Index<&str> for PipelineNodes { + type Output = Arc; + + fn index(&self, id: &str) -> &Self::Output { + &self.0[id] + } +} + +impl IntoIterator for PipelineNodes { + type Item = (NodeId, Arc); + type IntoIter = std::collections::hash_map::IntoIter>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl FromIterator<(NodeId, Arc)> for PipelineNodes { + fn from_iter)>>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + /// A configuration for a pipeline. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct PipelineSettings { @@ -279,6 +459,12 @@ impl PipelineConfig { &self.settings } + /// Returns a reference to the main pipeline nodes. + #[must_use] + pub fn nodes(&self) -> &PipelineNodes { + &self.nodes + } + /// Returns an iterator visiting all nodes in the pipeline. pub fn node_iter(&self) -> impl Iterator)> { self.nodes.iter() @@ -303,7 +489,7 @@ impl PipelineConfig { /// Returns a reference to the internal pipeline nodes. #[must_use] - pub fn internal_nodes(&self) -> &HashMap> { + pub fn internal_nodes(&self) -> &PipelineNodes { &self.internal } @@ -312,12 +498,59 @@ impl PipelineConfig { self.internal.iter() } + /// Extracts the internal pipeline as a separate, independent `PipelineConfig`. + /// + /// This creates a complete pipeline configuration from the internal nodes, + /// with hardcoded settings appropriate for internal telemetry processing: + /// - Smaller channel sizes (suitable for single-threaded operation) + /// - Minimal telemetry overhead (no internal metrics to avoid feedback loops) + /// + /// Returns `None` if no internal pipeline is configured. + /// + /// The returned config has its own NodeId namespace - completely separate + /// from the main pipeline. The only connection between main and internal + /// pipelines is through the LogsReporter/LogsReceiver channel. + #[must_use] + pub fn extract_internal_config(&self) -> Option { + if self.internal.is_empty() { + return None; + } + + Some(PipelineConfig { + r#type: self.r#type.clone(), + settings: Self::internal_pipeline_settings(), + nodes: self.internal.clone(), + internal: PipelineNodes::default(), // Internal pipeline has no nested internal + service: ServiceConfig::default(), // Minimal service config + }) + } + + /// Returns hardcoded settings for the internal telemetry pipeline. + /// + /// These settings are optimized for single-threaded internal telemetry: + /// - Smaller channel sizes (50 instead of 100) + /// - Telemetry capture disabled to avoid feedback loops + #[must_use] + pub fn internal_pipeline_settings() -> PipelineSettings { + PipelineSettings { + default_node_ctrl_msg_channel_size: 50, + default_pipeline_ctrl_msg_channel_size: 50, + default_pdata_channel_size: 50, + observed_state: ObservedStateSettings::default(), + health_policy: HealthPolicy::default(), + telemetry: TelemetrySettings { + // Disable internal metrics for the internal pipeline to avoid feedback + pipeline_metrics: false, + tokio_metrics: false, + channel_metrics: false, + }, + } + } + /// Validate the pipeline specification. /// /// This method checks for: - /// - Duplicate node IDs - /// - Duplicate out-ports (same source node + port name) - /// - Invalid hyper-edges (missing source or target nodes) + /// - Invalid hyper-edges (missing target nodes) /// - Cycles in the DAG /// - Internal pipeline receivers are only ITR nodes pub fn validate( @@ -327,13 +560,9 @@ impl PipelineConfig { ) -> Result<(), Error> { let mut errors = Vec::new(); - // Validate main pipeline hyper-edges - Self::validate_hyper_edges( - &self.nodes, - pipeline_group_id, - pipeline_id, - &mut errors, - ); + // Validate main pipeline + self.nodes + .validate(pipeline_group_id, pipeline_id, &mut errors); // Validate internal pipeline if present if !self.internal.is_empty() { @@ -350,35 +579,9 @@ impl PipelineConfig { } } - // Validate internal pipeline hyper-edges - Self::validate_hyper_edges( - &self.internal, - pipeline_group_id, - pipeline_id, - &mut errors, - ); - - // Check for cycles in internal pipeline - if errors.is_empty() { - let cycles = Self::detect_cycles_in(&self.internal); - for cycle in cycles { - errors.push(Error::CycleDetected { - context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), - nodes: cycle, - }); - } - } - } - - // Check for cycles in main pipeline if no errors so far - if errors.is_empty() { - let cycles = self.detect_cycles(); - for cycle in cycles { - errors.push(Error::CycleDetected { - context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), - nodes: cycle, - }); - } + // Validate internal pipeline graph structure + self.internal + .validate(pipeline_group_id, pipeline_id, &mut errors); } if !errors.is_empty() { @@ -387,151 +590,6 @@ impl PipelineConfig { Ok(()) } } - - /// Validate hyper-edges for a set of nodes. - fn validate_hyper_edges( - nodes: &HashMap>, - pipeline_group_id: &PipelineGroupId, - pipeline_id: &PipelineId, - errors: &mut Vec, - ) { - for (node_id, node) in nodes.iter() { - for edge in node.out_ports.values() { - let mut missing_targets = Vec::new(); - - for target in &edge.destinations { - if !nodes.contains_key(target) { - missing_targets.push(target.clone()); - } - } - - if !missing_targets.is_empty() { - errors.push(Error::InvalidHyperEdgeSpec { - context: Context::new(pipeline_group_id.clone(), pipeline_id.clone()), - source_node: node_id.clone(), - missing_source: false, - details: Box::new(HyperEdgeSpecDetails { - target_nodes: edge.destinations.iter().cloned().collect(), - dispatch_strategy: edge.dispatch_strategy.clone(), - missing_targets, - }), - }); - } - } - } - } - - /// Detect cycles in a set of nodes. - fn detect_cycles_in(nodes: &HashMap>) -> Vec> { - fn visit( - node: &NodeId, - nodes: &HashMap>, - visiting: &mut HashSet, - visited: &mut HashSet, - current_path: &mut Vec, - cycles: &mut Vec>, - ) { - if visited.contains(node) { - return; - } - if visiting.contains(node) { - if let Some(pos) = current_path.iter().position(|n| n == node) { - cycles.push(current_path[pos..].to_vec()); - } - return; - } - _ = visiting.insert(node.clone()); - current_path.push(node.clone()); - - if let Some(n) = nodes.get(node) { - for edge in n.out_ports.values() { - for tgt in &edge.destinations { - visit(tgt, nodes, visiting, visited, current_path, cycles); - } - } - } - - _ = visiting.remove(node); - _ = visited.insert(node.clone()); - _ = current_path.pop(); - } - - let mut visiting = HashSet::new(); - let mut current_path = Vec::new(); - let mut visited = HashSet::new(); - let mut cycles = Vec::new(); - - for node in nodes.keys() { - if !visited.contains(node) { - visit( - node, - nodes, - &mut visiting, - &mut visited, - &mut current_path, - &mut cycles, - ); - } - } - - cycles - } - - fn detect_cycles(&self) -> Vec> { - fn visit( - node: &NodeId, - nodes: &HashMap>, - visiting: &mut HashSet, - visited: &mut HashSet, - current_path: &mut Vec, - cycles: &mut Vec>, - ) { - if visited.contains(node) { - return; - } - if visiting.contains(node) { - // Cycle found - if let Some(pos) = current_path.iter().position(|n| n == node) { - cycles.push(current_path[pos..].to_vec()); - } - return; - } - _ = visiting.insert(node.clone()); - current_path.push(node.clone()); - - if let Some(n) = nodes.get(node) { - for edge in n.out_ports.values() { - for tgt in &edge.destinations { - visit(tgt, nodes, visiting, visited, current_path, cycles); - } - } - } - - _ = visiting.remove(node); - _ = visited.insert(node.clone()); - _ = current_path.pop(); - } - - let mut visiting = HashSet::new(); - let mut current_path = Vec::new(); - let mut visited = HashSet::new(); - let mut cycles = Vec::new(); - - for node in self.nodes.keys() { - if !visited.contains(node) { - visit( - node, - &self.nodes, - &mut visiting, - &mut visited, - &mut current_path, - &mut cycles, - ); - } - } - - cycles - } } /// A builder for constructing a [`PipelineConfig`]. @@ -797,7 +855,7 @@ impl PipelineConfigBuilder { .into_iter() .map(|(id, node)| (id, Arc::new(node))) .collect(), - internal: HashMap::new(), + internal: PipelineNodes::default(), settings: PipelineSettings::default(), r#type: pipeline_type, service: ServiceConfig::default(), @@ -1497,11 +1555,8 @@ internal: plugin_urn: "urn:test:exporter" config: {} "#; - let result = super::PipelineConfig::from_yaml( - "test_group".into(), - "test_pipeline".into(), - yaml_str, - ); + let result = + super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); assert!(result.is_ok()); let config = result.unwrap(); assert!(config.has_internal_pipeline()); @@ -1541,11 +1596,8 @@ internal: plugin_urn: "urn:test:exporter" config: {} "#; - let result = super::PipelineConfig::from_yaml( - "test_group".into(), - "test_pipeline".into(), - yaml_str, - ); + let result = + super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); assert!(result.is_err()); match result { Err(Error::InvalidConfiguration { errors }) => { @@ -1608,11 +1660,8 @@ internal: plugin_urn: "urn:test:exporter" config: {} "#; - let result = super::PipelineConfig::from_yaml( - "test_group".into(), - "test_pipeline".into(), - yaml_str, - ); + let result = + super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); assert!(result.is_ok()); let config = result.unwrap(); assert!(config.has_internal_pipeline()); @@ -1648,11 +1697,8 @@ internal: dispatch_strategy: round_robin config: {} "#; - let result = super::PipelineConfig::from_yaml( - "test_group".into(), - "test_pipeline".into(), - yaml_str, - ); + let result = + super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); assert!(result.is_err()); match result { Err(Error::InvalidConfiguration { errors }) => { @@ -1691,11 +1737,8 @@ nodes: plugin_urn: "urn:test:exporter" config: {} "#; - let result = super::PipelineConfig::from_yaml( - "test_group".into(), - "test_pipeline".into(), - yaml_str, - ); + let result = + super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); assert!(result.is_ok()); let config = result.unwrap(); assert!(!config.has_internal_pipeline()); diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 3756c7407b..98e0169f8a 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -111,7 +111,7 @@ pub enum OutputMode { Direct, /// Route to Internal Telemetry Receiver node. The pipeline must - /// include a nod with INTERNAL_TELEMETRY_RECEIVER_URN. The + /// include a node with INTERNAL_TELEMETRY_RECEIVER_URN. The /// engine provider mode must be Buffered for internal output. /// This will become default. Internal, diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 02e9052045..785e00217e 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -112,7 +112,7 @@ impl Controller { // The receiver end goes to either: // - LogsCollector thread (output == Raw): prints to console // - Internal Telemetry Receiver node (output == Internal): emits as OTLP - let (logs_reporter, logs_receiver, logs_collector_handle) = if providers_need_reporter { + let (logs_reporter, mut logs_receiver, logs_collector_handle) = if providers_need_reporter { match telemetry_config.logs.output { OutputMode::Direct => { // Start collector thread for Raw output mode @@ -198,6 +198,73 @@ impl Controller { }; let log_level = telemetry_config.logs.level; + // Spawn internal pipeline thread if configured + // The internal pipeline runs on a single unpinned thread and processes + // internal telemetry (logs from LogsReporter) through its own node graph. + let internal_pipeline_thread = if let Some(internal_config) = pipeline.extract_internal_config() { + // Internal pipeline only exists when output mode is Internal + // The logs_receiver goes to the internal pipeline's ITR node + let internal_logs_receiver = logs_receiver.take(); + let internal_factory = self.pipeline_factory; + let internal_pipeline_id: PipelineId = "internal".into(); + let internal_pipeline_key = DeployedPipelineKey { + pipeline_group_id: pipeline_group_id.clone(), + pipeline_id: internal_pipeline_id.clone(), + core_id: 0, // Virtual core ID for internal pipeline + }; + let internal_pipeline_ctx = controller_ctx.pipeline_context_with( + pipeline_group_id.clone(), + internal_pipeline_id.clone(), + 0, // Virtual core ID + 0, // Virtual thread ID + ); + let internal_obs_evt_reporter = obs_evt_reporter.clone(); + let internal_metrics_reporter = metrics_reporter.clone(); + + // Internal pipeline uses Raw logging (direct console output) + // to avoid feedback loops - it can't log through itself + let internal_engine_logs_setup = EngineLogsSetup::Raw; + let internal_log_level = log_level; + + // Create control message channel for internal pipeline + let (internal_ctrl_tx, internal_ctrl_rx) = pipeline_ctrl_msg_channel( + internal_config.pipeline_settings().default_pipeline_ctrl_msg_channel_size, + ); + + let thread_name = "internal-pipeline".to_string(); + let handle = thread::Builder::new() + .name(thread_name.clone()) + .spawn(move || { + Self::run_pipeline_thread( + internal_pipeline_key, + CoreId { id: 0 }, // No pinning for internal pipeline + internal_config, + internal_factory, + internal_pipeline_ctx, + internal_obs_evt_reporter, + internal_metrics_reporter, + internal_engine_logs_setup, + internal_log_level, + internal_logs_receiver, + internal_ctrl_tx, + internal_ctrl_rx, + ) + }) + .map_err(|e| Error::ThreadSpawnError { + thread_name: thread_name.clone(), + source: e, + })?; + + otel_info!( + "InternalPipeline.Started", + num_nodes = pipeline.internal_nodes().len() + ); + + Some((thread_name, handle)) + } else { + None + }; + // Start one thread per requested core // Get available CPU cores for pinning let requested_cores = Self::select_cores_for_quota( @@ -331,6 +398,42 @@ impl Controller { } } + // Wait for internal pipeline thread if it was spawned + if let Some((_thread_name, handle)) = internal_pipeline_thread { + let internal_pipeline_id: PipelineId = "internal".into(); + let pipeline_key = DeployedPipelineKey { + pipeline_group_id: pipeline_group_id.clone(), + pipeline_id: internal_pipeline_id, + core_id: 0, // Virtual core ID for internal pipeline + }; + match handle.join() { + Ok(Ok(_)) => { + obs_evt_reporter.report(ObservedEvent::drained(pipeline_key, None)); + } + Ok(Err(e)) => { + let err_summary: ErrorSummary = error_summary_from_gen(&e); + obs_evt_reporter.report(ObservedEvent::pipeline_runtime_error( + pipeline_key.clone(), + "Internal pipeline encountered a runtime error.", + err_summary, + )); + // Log but don't fail - internal pipeline errors shouldn't bring down main + otel_warn!( + "InternalPipeline.Error", + message = "Internal telemetry pipeline failed", + error = format!("{e:?}") + ); + } + Err(e) => { + otel_warn!( + "InternalPipeline.Panic", + message = "Internal telemetry pipeline panicked", + panic_message = format!("{e:?}") + ); + } + } + } + // ToDo Add CTRL-C handler to initiate graceful shutdown of pipelines and admin server. // In this project phase (alpha), we park the main thread indefinitely. This is useful for From e3d99fe4c54371a94e1ddcf3b436af0b82768409 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 14:36:29 -0800 Subject: [PATCH 72/92] remove thread-local --- .../src/pipeline/service/telemetry/logs.rs | 69 ++++---- .../crates/controller/src/lib.rs | 144 ++++++++-------- .../crates/engine/src/pipeline_ctrl.rs | 21 +-- .../crates/engine/src/runtime_pipeline.rs | 3 - .../otap-dataflow/crates/telemetry/src/lib.rs | 4 +- .../crates/telemetry/src/logs.rs | 160 ++---------------- .../telemetry/src/opentelemetry_client.rs | 15 +- 7 files changed, 123 insertions(+), 293 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 98e0169f8a..7a2f9c9a40 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -79,27 +79,33 @@ pub enum ProviderMode { /// Log events are silently ignored. Noop, - /// Place into a thread-local buffer. - Buffered, - - /// Non-blocking, immediate delivery. - Unbuffered, + /// Immediate delivery to the internal telemetry pipeline. + Immediate, /// Use OTel-Rust as the provider. OpenTelemetry, - /// Use synchronous logging. Note! This can block the producing thread. + /// Synchronous console logging. Note! This can block the producing thread. Raw, } +impl ProviderMode { + /// Returns true if this requires a LogsReporter channel for + /// asynchronous logging. + #[must_use] + pub fn needs_reporter(&self) -> bool { + matches!(self, Self::Immediate) + } +} + /// Output mode: what the recipient does with received events for -/// Buffered and Unbuffered provider logging modes. +/// provider logging modes. #[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)] #[serde(rename_all = "lowercase")] pub enum OutputMode { - /// Noop prevents the use of Buffered and Unbuffered modes. This - /// output mode can be set when all providers are configured to - /// avoid the internal output configuration through Noop, Raw, or + /// Noop prevents the use of the Unbuffered mode. This output mode + /// can be set when all providers are configured to avoid the + /// internal output configuration through Noop, Raw, or /// OpenTelemetry settings. Noop, @@ -110,10 +116,7 @@ pub enum OutputMode { #[default] Direct, - /// Route to Internal Telemetry Receiver node. The pipeline must - /// include a node with INTERNAL_TELEMETRY_RECEIVER_URN. The - /// engine provider mode must be Buffered for internal output. - /// This will become default. + /// Route to the internal telemetry pipeline. Internal, } @@ -131,8 +134,8 @@ fn default_internal_provider() -> ProviderMode { fn default_providers() -> LoggingProviders { LoggingProviders { - global: ProviderMode::Unbuffered, - engine: ProviderMode::Buffered, + global: ProviderMode::Immediate, + engine: ProviderMode::Immediate, internal: default_internal_provider(), } } @@ -152,34 +155,28 @@ impl LogsConfig { /// Validate the logs configuration. /// /// Returns an error if: - /// - `output` is `Noop` but a provider uses `Buffered` or `Unbuffered` + /// - `output` is `Noop` but a provider uses `Unbuffered` /// (logs would be sent but discarded) - /// - `output` is `Internal` but engine provider is not `Buffered` pub fn validate(&self) -> Result<(), Error> { + if self.providers.internal.needs_reporter() { + return Err(Error::InvalidUserConfig { + error: format!( + "internal provider is invalid: {:?}", + self.providers.internal + ), + }); + } if self.output == OutputMode::Noop { - let global_sends = matches!( - self.providers.global, - ProviderMode::Buffered | ProviderMode::Unbuffered - ); - let engine_sends = matches!( - self.providers.engine, - ProviderMode::Buffered | ProviderMode::Unbuffered - ); - - if global_sends || engine_sends { + let global_reports = self.providers.global.needs_reporter(); + let engine_reports = self.providers.engine.needs_reporter(); + + if global_reports || engine_reports { return Err(Error::InvalidUserConfig { - error: "output mode is 'noop' but a provider uses buffered or unbuffered" - .into(), + error: "output mode is 'noop' but a provider uses an internal reporter".into(), }); } } - if self.output == OutputMode::Internal && self.providers.engine != ProviderMode::Buffered { - return Err(Error::InvalidUserConfig { - error: "output mode is 'internal', engine must use buffered provider".into(), - }); - } - Ok(()) } } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 785e00217e..40c2317bdd 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -101,12 +101,8 @@ impl Controller { // - global == Unbuffered (global threads send directly to channel) // - engine == Buffered or Unbuffered (engine threads send to channel) // Raw provider mode = synchronous console output, no reporter needed. - let providers_need_reporter = telemetry_config.logs.providers.global - == ProviderMode::Unbuffered - || matches!( - telemetry_config.logs.providers.engine, - ProviderMode::Buffered | ProviderMode::Unbuffered - ); + let providers_need_reporter = telemetry_config.logs.providers.global.needs_reporter() + || telemetry_config.logs.providers.engine.needs_reporter(); // Create the reporter if providers need it. // The receiver end goes to either: @@ -178,13 +174,7 @@ impl Controller { let engine_logs_setup = match telemetry_config.logs.providers.engine { ProviderMode::Noop => EngineLogsSetup::Noop, ProviderMode::Raw => EngineLogsSetup::Raw, - ProviderMode::Buffered => EngineLogsSetup::Buffered { - reporter: logs_reporter - .clone() - .expect("validated: buffered requires reporter"), - capacity: 1024, // TODO: make configurable - }, - ProviderMode::Unbuffered => EngineLogsSetup::Unbuffered { + ProviderMode::Immediate => EngineLogsSetup::Immediate { reporter: logs_reporter .clone() .expect("validated: unbuffered requires reporter"), @@ -201,69 +191,72 @@ impl Controller { // Spawn internal pipeline thread if configured // The internal pipeline runs on a single unpinned thread and processes // internal telemetry (logs from LogsReporter) through its own node graph. - let internal_pipeline_thread = if let Some(internal_config) = pipeline.extract_internal_config() { - // Internal pipeline only exists when output mode is Internal - // The logs_receiver goes to the internal pipeline's ITR node - let internal_logs_receiver = logs_receiver.take(); - let internal_factory = self.pipeline_factory; - let internal_pipeline_id: PipelineId = "internal".into(); - let internal_pipeline_key = DeployedPipelineKey { - pipeline_group_id: pipeline_group_id.clone(), - pipeline_id: internal_pipeline_id.clone(), - core_id: 0, // Virtual core ID for internal pipeline - }; - let internal_pipeline_ctx = controller_ctx.pipeline_context_with( - pipeline_group_id.clone(), - internal_pipeline_id.clone(), - 0, // Virtual core ID - 0, // Virtual thread ID - ); - let internal_obs_evt_reporter = obs_evt_reporter.clone(); - let internal_metrics_reporter = metrics_reporter.clone(); - - // Internal pipeline uses Raw logging (direct console output) - // to avoid feedback loops - it can't log through itself - let internal_engine_logs_setup = EngineLogsSetup::Raw; - let internal_log_level = log_level; - - // Create control message channel for internal pipeline - let (internal_ctrl_tx, internal_ctrl_rx) = pipeline_ctrl_msg_channel( - internal_config.pipeline_settings().default_pipeline_ctrl_msg_channel_size, - ); - - let thread_name = "internal-pipeline".to_string(); - let handle = thread::Builder::new() - .name(thread_name.clone()) - .spawn(move || { - Self::run_pipeline_thread( - internal_pipeline_key, - CoreId { id: 0 }, // No pinning for internal pipeline - internal_config, - internal_factory, - internal_pipeline_ctx, - internal_obs_evt_reporter, - internal_metrics_reporter, - internal_engine_logs_setup, - internal_log_level, - internal_logs_receiver, - internal_ctrl_tx, - internal_ctrl_rx, - ) - }) - .map_err(|e| Error::ThreadSpawnError { - thread_name: thread_name.clone(), - source: e, - })?; + let internal_pipeline_thread = + if let Some(internal_config) = pipeline.extract_internal_config() { + // Internal pipeline only exists when output mode is Internal + // The logs_receiver goes to the internal pipeline's ITR node + let internal_logs_receiver = logs_receiver.take(); + let internal_factory = self.pipeline_factory; + let internal_pipeline_id: PipelineId = "internal".into(); + let internal_pipeline_key = DeployedPipelineKey { + pipeline_group_id: pipeline_group_id.clone(), + pipeline_id: internal_pipeline_id.clone(), + core_id: 0, // Virtual core ID for internal pipeline + }; + let internal_pipeline_ctx = controller_ctx.pipeline_context_with( + pipeline_group_id.clone(), + internal_pipeline_id.clone(), + 0, // Virtual core ID + 0, // Virtual thread ID + ); + let internal_obs_evt_reporter = obs_evt_reporter.clone(); + let internal_metrics_reporter = metrics_reporter.clone(); + + // Internal pipeline uses Raw logging (direct console output) + // to avoid feedback loops - it can't log through itself + let internal_engine_logs_setup = EngineLogsSetup::Raw; + let internal_log_level = log_level; + + // Create control message channel for internal pipeline + let (internal_ctrl_tx, internal_ctrl_rx) = pipeline_ctrl_msg_channel( + internal_config + .pipeline_settings() + .default_pipeline_ctrl_msg_channel_size, + ); - otel_info!( - "InternalPipeline.Started", - num_nodes = pipeline.internal_nodes().len() - ); + let thread_name = "internal-pipeline".to_string(); + let handle = thread::Builder::new() + .name(thread_name.clone()) + .spawn(move || { + Self::run_pipeline_thread( + internal_pipeline_key, + CoreId { id: 0 }, // No pinning for internal pipeline + internal_config, + internal_factory, + internal_pipeline_ctx, + internal_obs_evt_reporter, + internal_metrics_reporter, + internal_engine_logs_setup, + internal_log_level, + internal_logs_receiver, + internal_ctrl_tx, + internal_ctrl_rx, + ) + }) + .map_err(|e| Error::ThreadSpawnError { + thread_name: thread_name.clone(), + source: e, + })?; + + otel_info!( + "InternalPipeline.Started", + num_nodes = pipeline.internal_nodes().len() + ); - Some((thread_name, handle)) - } else { - None - }; + Some((thread_name, handle)) + } else { + None + }; // Start one thread per requested core // Get available CPU cores for pinning @@ -578,7 +571,7 @@ impl Controller { ) -> Result, Error> { // Run with the engine-appropriate tracing subscriber. // The closure receives a LogsFlusher for buffered mode. - engine_logs_setup.with_engine_subscriber(log_level, |logs_flusher| { + engine_logs_setup.with_engine_subscriber(log_level, || { // Create a tracing span for this pipeline thread // so that all logs within this scope include pipeline context. let span = otel_info_span!("pipeline_thread", core.id = core_id.id); @@ -621,7 +614,6 @@ impl Controller { pipeline_context, obs_evt_reporter, metrics_reporter, - logs_flusher, pipeline_ctrl_msg_tx, pipeline_ctrl_msg_rx, ) diff --git a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs index e5b4e0e8f6..601f697081 100644 --- a/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs +++ b/rust/otap-dataflow/crates/engine/src/pipeline_ctrl.rs @@ -19,9 +19,8 @@ use otap_df_config::pipeline::TelemetrySettings; use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; -use otap_df_telemetry::logs::LogsFlusher; +use otap_df_telemetry::otel_warn; use otap_df_telemetry::reporter::MetricsReporter; -use otap_df_telemetry::{otel_error, otel_warn}; use std::cmp::Reverse; use std::collections::{BinaryHeap, HashMap}; use std::time::{Duration, Instant}; @@ -183,8 +182,6 @@ pub struct PipelineCtrlMsgManager { event_reporter: ObservedEventReporter, /// Global metrics reporter. metrics_reporter: MetricsReporter, - /// Logs flusher for periodic flush of internal log buffers. - logs_flusher: LogsFlusher, /// Channel metrics handles for periodic reporting. channel_metrics: Vec, @@ -202,7 +199,6 @@ impl PipelineCtrlMsgManager { control_senders: ControlSenders, event_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, - logs_flusher: LogsFlusher, internal_telemetry: TelemetrySettings, channel_metrics: Vec, ) -> Self { @@ -216,7 +212,6 @@ impl PipelineCtrlMsgManager { delayed_data: BinaryHeap::new(), event_reporter, metrics_reporter, - logs_flusher, channel_metrics, telemetry: internal_telemetry, } @@ -354,11 +349,6 @@ impl PipelineCtrlMsgManager { } } - // Flush internal logs from the thread-local buffer - if let Err(err) = self.logs_flusher.flush() { - otel_error!("logs.flush.fail", error = err.to_string()); - } - // Deliver all accumulated control messages (best-effort) for (node_id, msg) in to_send { self.send(node_id, msg).await; @@ -433,7 +423,6 @@ mod tests { use otap_df_config::pipeline::PipelineSettings; use otap_df_config::{PipelineGroupId, PipelineId}; use otap_df_state::store::ObservedStateStore; - use otap_df_telemetry::logs::LogsFlusher; use std::collections::HashMap; use std::time::{Duration, Instant}; use tokio::task::LocalSet; @@ -486,9 +475,6 @@ mod tests { thread_id, ); - // Create a no-op LogsFlusher for testing - let logs_flusher = LogsFlusher::Noop; - let manager = PipelineCtrlMsgManager::new( DeployedPipelineKey { pipeline_group_id, @@ -500,7 +486,6 @@ mod tests { control_senders, observed_state_store.reporter(), metrics_reporter, - logs_flusher, pipeline_settings.telemetry.clone(), Vec::new(), ); @@ -907,9 +892,6 @@ mod tests { thread_id, ); - // Create a no-op LogsFlusher for testing - let logs_flusher = LogsFlusher::Noop; - // Create manager with empty control_senders map (no registered nodes) let manager = PipelineCtrlMsgManager::<()>::new( pipeline_key, @@ -918,7 +900,6 @@ mod tests { ControlSenders::new(), observed_state_store.reporter(), metrics_reporter, - logs_flusher, TelemetrySettings::default(), Vec::new(), ); diff --git a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs index 9f572150c3..691f213c67 100644 --- a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs +++ b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs @@ -14,7 +14,6 @@ use crate::terminal_state::TerminalState; use crate::{exporter::ExporterWrapper, processor::ProcessorWrapper, receiver::ReceiverWrapper}; use otap_df_config::pipeline::PipelineConfig; use otap_df_telemetry::reporter::MetricsReporter; -use otap_df_telemetry::logs::LogsFlusher; use crate::context::PipelineContext; use otap_df_state::DeployedPipelineKey; @@ -107,7 +106,6 @@ impl RuntimePipeline { pipeline_context: PipelineContext, event_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, - logs_flusher: LogsFlusher, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { @@ -195,7 +193,6 @@ impl RuntimePipeline { control_senders, event_reporter, metrics_reporter, - logs_flusher, internal_telemetry, channel_metrics, ); diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 00019286be..5d068a7e22 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -66,8 +66,8 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - EngineLogsSetup, LogBatch, LogPayload, LogsCollector, LogsFlusher, LogsReceiver, LogsReporter, - ThreadBufferedLayer, UnbufferedLayer, + EngineLogsSetup, ImmediateLayer, LogBatch, LogPayload, LogsCollector, LogsReceiver, + LogsReporter, }; // TODO This should be #[cfg(test)], but something is preventing it from working. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index c1f03e1c13..6afbe35d79 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -10,12 +10,12 @@ use crate::self_tracing::{ use bytes::Bytes; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_sdk::logs::SdkLoggerProvider; +use otap_df_config::pipeline::service::telemetry::logs::LogLevel; use otap_df_pdata::otlp::ProtoBuffer; use otap_df_pdata::proto::consts::field_num::logs::{ LOGS_DATA_RESOURCE, RESOURCE_LOGS_SCOPE_LOGS, SCOPE_LOGS_LOG_RECORDS, }; use otap_df_pdata::proto_encode_len_delimited_unknown_size; -use std::cell::RefCell; use tracing::{Event, Subscriber}; use tracing_subscriber::Registry; use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; @@ -141,44 +141,6 @@ impl LogBuffer { } } -// Thread-local log buffer for the current pipeline thread. -thread_local! { - static CURRENT_LOG_BUFFER: RefCell> = const { RefCell::new(None) }; -} - -/// Run a closure with a thread-local log buffer installed. -/// -/// The buffer is automatically uninstalled when the closure returns (or panics). -pub fn with_thread_log_buffer(capacity: usize, f: F) -> R -where - F: FnOnce() -> R, -{ - CURRENT_LOG_BUFFER.with(|cell| { - *cell.borrow_mut() = Some(LogBuffer::new(capacity)); - }); - - struct Guard; - impl Drop for Guard { - fn drop(&mut self) { - CURRENT_LOG_BUFFER.with(|cell| { - *cell.borrow_mut() = None; - }); - } - } - let _guard = Guard; - - f() -} - -/// Drain the thread-local log buffer and return the batch. -/// -/// Returns `None` if no buffer is installed (e.g., not in an engine thread). -/// This is for use by the internal telemetry receiver node. -#[must_use] -pub fn drain_thread_log_buffer() -> Option { - CURRENT_LOG_BUFFER.with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) -} - /// Reporter for sending log batches through a channel. #[derive(Clone)] pub struct LogsReporter { @@ -286,54 +248,13 @@ impl LogsCollector { } } -/// A tracing Layer that buffers records in thread-local storage. -/// -/// For engine threads that control their own flush timing. -pub struct ThreadBufferedLayer { - /// Reporter for flushing batches. - reporter: LogsReporter, -} - -impl ThreadBufferedLayer { - /// Create a new thread-buffered layer. - #[must_use] - pub fn new(reporter: LogsReporter) -> Self { - Self { reporter } - } - - /// Flush the current thread's log buffer and send via the channel. - pub fn flush(&self) -> Result<(), Error> { - if let Some(batch) = - CURRENT_LOG_BUFFER.with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) - { - self.reporter.try_report(LogPayload::Batch(batch))?; - } - Ok(()) - } -} - -impl TracingLayer for ThreadBufferedLayer -where - S: Subscriber + for<'a> LookupSpan<'a>, -{ - fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - let record = LogRecord::new(event); - - CURRENT_LOG_BUFFER.with(|cell| { - if let Some(ref mut buffer) = *cell.borrow_mut() { - buffer.push(record); - } - }); - } -} - /// A tracing Layer that sends each record immediately. -pub struct UnbufferedLayer { +pub struct ImmediateLayer { /// Reporter for sending to the channel. reporter: LogsReporter, } -impl UnbufferedLayer { +impl ImmediateLayer { /// Create a new unbuffered layer. #[must_use] pub fn new(reporter: LogsReporter) -> Self { @@ -341,7 +262,7 @@ impl UnbufferedLayer { } } -impl TracingLayer for UnbufferedLayer +impl TracingLayer for ImmediateLayer where S: Subscriber + for<'a> LookupSpan<'a>, { @@ -368,15 +289,8 @@ pub enum EngineLogsSetup { Noop, /// Synchronous raw logging to console. Raw, - /// Buffered: accumulates in thread-local buffer, flushed periodically. - Buffered { - /// Reporter to send batches through. - reporter: LogsReporter, - /// Buffer capacity per thread. - capacity: usize, - }, - /// Unbuffered: each log is sent immediately. - Unbuffered { + /// Immediate: each log is sent immediately. + Immediate { /// Reporter to send singletons through. reporter: LogsReporter, }, @@ -387,83 +301,37 @@ pub enum EngineLogsSetup { }, } -/// Handle for flushing buffered logs from the engine thread. -/// -/// For non-buffered modes, flush is a no-op. -#[derive(Clone)] -pub enum LogsFlusher { - /// No-op flusher for modes that don't buffer. - Noop, - /// Flusher that drains the thread-local buffer and sends via the reporter. - Buffered(LogsReporter), -} - -impl LogsFlusher { - /// Flush any buffered logs by sending to the reporter. - /// - /// For `Noop`, this does nothing. - /// For `Buffered`, this drains the thread-local buffer and sends as a batch. - pub fn flush(&self) -> Result<(), Error> { - match self { - LogsFlusher::Noop => Ok(()), - LogsFlusher::Buffered(reporter) => { - if let Some(batch) = CURRENT_LOG_BUFFER - .with(|cell| cell.borrow_mut().as_mut().map(|buffer| buffer.drain())) - { - reporter.try_report(LogPayload::Batch(batch))?; - } - Ok(()) - } - } - } -} - impl EngineLogsSetup { /// Run a closure with the engine-appropriate tracing subscriber. /// /// Returns a `LogsFlusher` that can be used to periodically flush buffered logs. /// For non-buffered modes, the flusher is a no-op. - pub fn with_engine_subscriber( - &self, - log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, - f: F, - ) -> R + pub fn with_engine_subscriber(&self, log_level: LogLevel, f: F) -> R where - F: FnOnce(LogsFlusher) -> R, + F: FnOnce() -> R, { let filter = crate::get_env_filter(log_level); match self { EngineLogsSetup::Noop => { - // Use NoSubscriber - events are dropped let subscriber = tracing::subscriber::NoSubscriber::new(); - tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) + tracing::subscriber::with_default(subscriber, || f()) } EngineLogsSetup::Raw => { let subscriber = Registry::default() .with(filter) .with(RawLoggingLayer::new(ConsoleWriter::default())); - tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) - } - EngineLogsSetup::Buffered { reporter, capacity } => { - let layer = ThreadBufferedLayer::new(reporter.clone()); - let subscriber = Registry::default().with(filter).with(layer); - let flusher = LogsFlusher::Buffered(reporter.clone()); - - // Install the thread-local buffer - with_thread_log_buffer(*capacity, || { - tracing::subscriber::with_default(subscriber, || f(flusher)) - }) + tracing::subscriber::with_default(subscriber, || f()) } - EngineLogsSetup::Unbuffered { reporter } => { - let layer = UnbufferedLayer::new(reporter.clone()); + EngineLogsSetup::Immediate { reporter } => { + let layer = ImmediateLayer::new(reporter.clone()); let subscriber = Registry::default().with(filter).with(layer); - tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) + tracing::subscriber::with_default(subscriber, || f()) } EngineLogsSetup::OpenTelemetry { logger_provider } => { let sdk_layer = OpenTelemetryTracingBridge::new(logger_provider); let subscriber = Registry::default().with(filter).with(sdk_layer); - tracing::subscriber::with_default(subscriber, || f(LogsFlusher::Noop)) + tracing::subscriber::with_default(subscriber, || f()) } } } diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs index 90b37c18ab..58719676a0 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs @@ -16,7 +16,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::Tr use crate::{ error::Error, - logs::{LogsReporter, UnbufferedLayer}, + logs::{ImmediateLayer, LogsReporter}, opentelemetry_client::logger_provider::LoggerProvider, opentelemetry_client::meter_provider::MeterProvider, self_tracing::{ConsoleWriter, RawLoggingLayer}, @@ -46,7 +46,7 @@ impl OpentelemetryClient { /// noisy HTTP/2 and hyper logs. /// /// The `logs_reporter` parameter is required when `strategies.global` is set to - /// `Unbuffered`. It should be created via `LogsCollector::new()` and the collector + /// `Immediate`. It should be created via `LogsCollector::new()` and the collector /// should be run on a dedicated thread. /// /// The logger provider is configured when either global or engine providers @@ -101,16 +101,11 @@ impl OpentelemetryClient { logerr(err); } } - ProviderMode::Buffered => { - return Err(Error::ConfigurationError( - "global buffered logging not supported".into(), - )); - } - ProviderMode::Unbuffered => { + ProviderMode::Immediate => { let reporter = logs_reporter.ok_or_else(|| { - Error::ConfigurationError("Unbuffered logging requires a LogsReporter".into()) + Error::ConfigurationError("Immediate logging requires a LogsReporter".into()) })?; - let channel_layer = UnbufferedLayer::new(reporter); + let channel_layer = ImmediateLayer::new(reporter); if let Err(err) = tracing_setup.with(channel_layer).try_init() { logerr(err); } From 766aa9f5fd3827c1e44b494a9150cfbdd271ac39 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 14:42:11 -0800 Subject: [PATCH 73/92] rename opentelemetry_client -> telemetry_settings --- rust/otap-dataflow/crates/controller/src/lib.rs | 9 ++++----- rust/otap-dataflow/crates/telemetry/src/lib.rs | 2 +- .../{opentelemetry_client.rs => telemetry_settings.rs} | 8 ++++---- .../logger_provider.rs | 0 .../meter_provider.rs | 2 +- .../meter_provider/otlp_exporter_provider.rs | 0 .../meter_provider/prometheus_exporter_provider.rs | 0 .../meter_provider/views_provider.rs | 0 8 files changed, 10 insertions(+), 11 deletions(-) rename rust/otap-dataflow/crates/telemetry/src/{opentelemetry_client.rs => telemetry_settings.rs} (97%) rename rust/otap-dataflow/crates/telemetry/src/{opentelemetry_client => telemetry_settings}/logger_provider.rs (100%) rename rust/otap-dataflow/crates/telemetry/src/{opentelemetry_client => telemetry_settings}/meter_provider.rs (99%) rename rust/otap-dataflow/crates/telemetry/src/{opentelemetry_client => telemetry_settings}/meter_provider/otlp_exporter_provider.rs (100%) rename rust/otap-dataflow/crates/telemetry/src/{opentelemetry_client => telemetry_settings}/meter_provider/prometheus_exporter_provider.rs (100%) rename rust/otap-dataflow/crates/telemetry/src/{opentelemetry_client => telemetry_settings}/meter_provider/views_provider.rs (100%) diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 40c2317bdd..6ddf59a50e 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -40,8 +40,8 @@ use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; use otap_df_telemetry::logs::{EngineLogsSetup, LogsCollector}; -use otap_df_telemetry::opentelemetry_client::OpentelemetryClient; use otap_df_telemetry::reporter::MetricsReporter; +use otap_df_telemetry::telemetry_settings::OpentelemetryClient; use otap_df_telemetry::{MetricsSystem, otel_info, otel_info_span, otel_warn}; use std::thread; @@ -135,8 +135,7 @@ impl Controller { // Keep the handle alive - dropping it would join the thread and block forever let _logs_collector_handle = logs_collector_handle; - let opentelemetry_client = - OpentelemetryClient::new(telemetry_config, logs_reporter.clone())?; + let telemetry_settings = OpentelemetryClient::new(telemetry_config, logs_reporter.clone())?; let metrics_system = MetricsSystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -180,7 +179,7 @@ impl Controller { .expect("validated: unbuffered requires reporter"), }, ProviderMode::OpenTelemetry => EngineLogsSetup::OpenTelemetry { - logger_provider: opentelemetry_client + logger_provider: telemetry_settings .logger_provider() .clone() .expect("validated: opentelemetry engine requires logger_provider from global"), @@ -441,7 +440,7 @@ impl Controller { handle.shutdown_and_join()?; } obs_state_join_handle.shutdown_and_join()?; - opentelemetry_client.shutdown()?; + telemetry_settings.shutdown()?; Ok(()) } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 5d068a7e22..dee27affc6 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -42,11 +42,11 @@ pub mod internal_events; /// Internal logs collection and transport. pub mod logs; pub mod metrics; -pub mod opentelemetry_client; pub mod registry; pub mod reporter; pub mod self_tracing; pub mod semconv; +pub mod telemetry_settings; // Re-export _private module from internal_events for macro usage. // This allows the otel_info!, otel_warn!, etc. macros to work in other crates diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs similarity index 97% rename from rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs index 58719676a0..46d1223731 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs @@ -17,9 +17,9 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::Tr use crate::{ error::Error, logs::{ImmediateLayer, LogsReporter}, - opentelemetry_client::logger_provider::LoggerProvider, - opentelemetry_client::meter_provider::MeterProvider, self_tracing::{ConsoleWriter, RawLoggingLayer}, + telemetry_settings::logger_provider::LoggerProvider, + telemetry_settings::meter_provider::MeterProvider, }; /// Client for the OpenTelemetry SDK. @@ -224,7 +224,7 @@ mod tests { use std::{f64::consts::PI, time::Duration}; #[test] - fn test_configure_minimal_opentelemetry_client() -> Result<(), Error> { + fn test_configure_minimal_telemetry_settings() -> Result<(), Error> { let config = TelemetryConfig::default(); let (_collector, reporter) = LogsCollector::new(10); let client = OpentelemetryClient::new(&config, Some(reporter))?; @@ -239,7 +239,7 @@ mod tests { } #[test] - fn test_configure_opentelemetry_client() -> Result<(), Error> { + fn test_configure_telemetry_settings() -> Result<(), Error> { let mut resource = std::collections::HashMap::new(); _ = resource.insert( "service.name".to_string(), diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/logger_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/logger_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_settings/logger_provider.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider.rs similarity index 99% rename from rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider.rs index 358d8dad76..ef99a35be0 100644 --- a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider.rs @@ -22,7 +22,7 @@ use otap_df_config::pipeline::service::telemetry::metrics::{ use crate::{ error::Error, - opentelemetry_client::meter_provider::{ + telemetry_settings::meter_provider::{ otlp_exporter_provider::OtlpExporterProvider, prometheus_exporter_provider::PrometheusExporterProvider, }, diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider/otlp_exporter_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/otlp_exporter_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider/otlp_exporter_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/otlp_exporter_provider.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider/prometheus_exporter_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/prometheus_exporter_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider/prometheus_exporter_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/prometheus_exporter_provider.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider/views_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/views_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/opentelemetry_client/meter_provider/views_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/views_provider.rs From a76027909022aace9b67e544c0deb024c8136832 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 14:59:17 -0800 Subject: [PATCH 74/92] rename TelemetrySettings --- .../telemetry/src/telemetry_settings.rs | 116 ++++++++++++++---- 1 file changed, 91 insertions(+), 25 deletions(-) diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs index 46d1223731..6f30c41770 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs @@ -10,29 +10,44 @@ use opentelemetry::KeyValue; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider, metrics::SdkMeterProvider}; use otap_df_config::pipeline::service::telemetry::{ - AttributeValue, AttributeValueArray, TelemetryConfig, logs::ProviderMode, + AttributeValue, AttributeValueArray, TelemetryConfig, + logs::{OutputMode, ProviderMode}, }; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::TryInitError}; use crate::{ + LogsReceiver, error::Error, - logs::{ImmediateLayer, LogsReporter}, + logs::{ImmediateLayer, LogsCollector, LogsReporter}, self_tracing::{ConsoleWriter, RawLoggingLayer}, telemetry_settings::logger_provider::LoggerProvider, telemetry_settings::meter_provider::MeterProvider, }; -/// Client for the OpenTelemetry SDK. -pub struct OpentelemetryClient { +/// Client for the OpenTelemetry SDK and internal telemetry settings. +/// +/// This struct owns all telemetry infrastructure including: +/// - OpenTelemetry SDK meter and logger providers +/// - Internal logs reporter and receiver channels +/// - Optional logs collector for Direct output mode +pub struct TelemetrySettings { /// The tokio runtime used to run the OpenTelemetry SDK OTLP exporter. /// The reference is kept to ensure the runtime lives as long as the client. _runtime: Option, meter_provider: SdkMeterProvider, logger_provider: Option, + /// Reporter for sending logs through the internal channel. + /// Present when global or engine provider mode needs a channel. + logs_reporter: Option, + /// Receiver for the internal logs channel (Internal output mode only). + /// The ITR node consumes this to process internal telemetry. + logs_receiver: Option, + /// Collector for Direct output mode. Must be spawned by the controller. + logs_collector: Option, // TODO: Add traces providers. } -impl OpentelemetryClient { +impl TelemetrySettings { /// Create a new OpenTelemetry client from the given configuration. /// /// Logging-specific notes: @@ -45,17 +60,15 @@ impl OpentelemetryClient { /// Example: `RUST_LOG=info,h2=warn,hyper=warn` enables info level but silences /// noisy HTTP/2 and hyper logs. /// - /// The `logs_reporter` parameter is required when `strategies.global` is set to - /// `Immediate`. It should be created via `LogsCollector::new()` and the collector - /// should be run on a dedicated thread. + /// The logs reporter is created internally based on the configuration: + /// - For `Direct` output: creates reporter + collector (collector must be spawned) + /// - For `Internal` output: creates reporter + receiver (receiver goes to ITR node) + /// - For `Noop` output: no reporter is created /// /// The logger provider is configured when either global or engine providers /// are set to `OpenTelemetry`. This allows the engine to use the same SDK /// pipeline even when global uses a different logging strategy. - pub fn new( - config: &TelemetryConfig, - logs_reporter: Option, - ) -> Result { + pub fn new(config: &TelemetryConfig) -> Result { let sdk_resource = Self::configure_resource(&config.resource); let runtime = None; @@ -63,6 +76,30 @@ impl OpentelemetryClient { let (meter_provider, runtime) = MeterProvider::configure(sdk_resource.clone(), &config.metrics, runtime)?.into_parts(); + // Determine if we need a logs reporter based on provider modes + let providers_need_reporter = config.logs.providers.global.needs_reporter() + || config.logs.providers.engine.needs_reporter(); + + // Create the logs reporter, receiver, and collector based on output mode + let (logs_reporter, logs_receiver, logs_collector) = if providers_need_reporter { + match config.logs.output { + OutputMode::Direct => { + // Direct mode: logs go to a collector that prints to console + let (collector, reporter) = LogsCollector::new(config.reporting_channel_size); + (Some(reporter), None, Some(collector)) + } + OutputMode::Internal => { + // Internal mode: logs go through channel to ITR node + let (receiver, reporter) = + LogsCollector::channel(config.reporting_channel_size); + (Some(reporter), Some(receiver), None) + } + OutputMode::Noop => (None, None, None), + } + } else { + (None, None, None) + }; + let tracing_setup = tracing_subscriber::registry().with(crate::get_env_filter(config.logs.level)); @@ -84,8 +121,8 @@ impl OpentelemetryClient { (None, runtime) }; - // Configure the global subscriber based on strategies.global. - // Engine threads override this with BufferWriterLayer via with_default(). + // Configure the global subscriber based on providers.global. + // Engine threads override this with their own subscriber via with_default(). match config.logs.providers.global { ProviderMode::Noop => { // No-op: just install the filter, events are dropped @@ -102,7 +139,7 @@ impl OpentelemetryClient { } } ProviderMode::Immediate => { - let reporter = logs_reporter.ok_or_else(|| { + let reporter = logs_reporter.clone().ok_or_else(|| { Error::ConfigurationError("Immediate logging requires a LogsReporter".into()) })?; let channel_layer = ImmediateLayer::new(reporter); @@ -131,6 +168,9 @@ impl OpentelemetryClient { _runtime: runtime, meter_provider, logger_provider, + logs_reporter, + logs_receiver, + logs_collector, }) } @@ -184,6 +224,35 @@ impl OpentelemetryClient { &self.logger_provider } + /// Get a reference to the logs reporter. + /// + /// Returns `Some` when the configuration requires a channel-based reporter + /// (global or engine provider is `Immediate`). + #[must_use] + pub fn logs_reporter(&self) -> Option<&LogsReporter> { + self.logs_reporter.as_ref() + } + + /// Take the logs receiver for the internal telemetry pipeline. + /// + /// Returns `Some` only when output mode is `Internal`. The receiver should + /// be passed to the Internal Telemetry Receiver (ITR) node. + /// + /// This method takes ownership of the receiver (can only be called once). + pub fn take_logs_receiver(&mut self) -> Option { + self.logs_receiver.take() + } + + /// Take the logs collector for Direct output mode. + /// + /// Returns `Some` only when output mode is `Direct`. The collector should + /// be spawned on a dedicated thread to process log records. + /// + /// This method takes ownership of the collector (can only be called once). + pub fn take_logs_collector(&mut self) -> Option { + self.logs_collector.take() + } + /// Shutdown the OpenTelemetry SDK. pub fn shutdown(&self) -> Result<(), Error> { let meter_shutdown_result = self.meter_provider().shutdown(); @@ -220,14 +289,12 @@ mod tests { }; use super::*; - use crate::logs::LogsCollector; use std::{f64::consts::PI, time::Duration}; #[test] fn test_configure_minimal_telemetry_settings() -> Result<(), Error> { let config = TelemetryConfig::default(); - let (_collector, reporter) = LogsCollector::new(10); - let client = OpentelemetryClient::new(&config, Some(reporter))?; + let client = TelemetrySettings::new(&config)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); @@ -261,8 +328,7 @@ mod tests { logs: LogsConfig::default(), resource, }; - let (_collector, reporter) = LogsCollector::new(10); - let client = OpentelemetryClient::new(&config, Some(reporter))?; + let client = TelemetrySettings::new(&config)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); @@ -277,31 +343,31 @@ mod tests { fn test_to_sdk_value() { let string_attr = AttributeValue::String("example".to_string()); assert_eq!( - OpentelemetryClient::to_sdk_value(&string_attr), + TelemetrySettings::to_sdk_value(&string_attr), opentelemetry::Value::String("example".into()) ); let bool_attr = AttributeValue::Bool(true); assert_eq!( - OpentelemetryClient::to_sdk_value(&bool_attr), + TelemetrySettings::to_sdk_value(&bool_attr), opentelemetry::Value::Bool(true) ); let i64_attr = AttributeValue::I64(42); assert_eq!( - OpentelemetryClient::to_sdk_value(&i64_attr), + TelemetrySettings::to_sdk_value(&i64_attr), opentelemetry::Value::I64(42) ); let f64_attr = AttributeValue::F64(PI); assert_eq!( - OpentelemetryClient::to_sdk_value(&f64_attr), + TelemetrySettings::to_sdk_value(&f64_attr), opentelemetry::Value::F64(PI) ); let array_attr = AttributeValue::Array(AttributeValueArray::I64(vec![1, 2, 3])); assert_eq!( - OpentelemetryClient::to_sdk_value(&array_attr), + TelemetrySettings::to_sdk_value(&array_attr), opentelemetry::Value::Array(opentelemetry::Array::I64(vec![1, 2, 3])) ); } From a6e680de738f132fc80f7a681fcc748bddf4a8e2 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 15:07:11 -0800 Subject: [PATCH 75/92] wip --- .../src/pipeline/service/telemetry/logs.rs | 18 ++++- .../crates/controller/src/lib.rs | 74 +++++++------------ 2 files changed, 42 insertions(+), 50 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs index 7a2f9c9a40..d435f36020 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry/logs.rs @@ -155,8 +155,11 @@ impl LogsConfig { /// Validate the logs configuration. /// /// Returns an error if: - /// - `output` is `Noop` but a provider uses `Unbuffered` + /// - `output` is `Noop` but a provider uses `Immediate` /// (logs would be sent but discarded) + /// - `engine` is `OpenTelemetry` but `global` is not + /// (current implementation restriction: the SDK logger provider is only + /// configured when global uses OpenTelemetry) pub fn validate(&self) -> Result<(), Error> { if self.providers.internal.needs_reporter() { return Err(Error::InvalidUserConfig { @@ -177,6 +180,19 @@ impl LogsConfig { } } + // Current implementation restriction: engine OpenTelemetry requires global OpenTelemetry. + // The SDK logger provider is only created when the global provider is OpenTelemetry. + // This could be lifted in the future by creating the logger provider independently. + if self.providers.engine == ProviderMode::OpenTelemetry + && self.providers.global != ProviderMode::OpenTelemetry + { + return Err(Error::InvalidUserConfig { + error: "engine provider 'opentelemetry' requires global provider to also be \ + 'opentelemetry' (current implementation restriction)" + .into(), + }); + } + Ok(()) } } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 6ddf59a50e..b429c0c8cf 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -22,7 +22,7 @@ use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; use otap_df_config::pipeline::service::telemetry::logs::{ - INTERNAL_TELEMETRY_RECEIVER_URN, OutputMode, ProviderMode, + INTERNAL_TELEMETRY_RECEIVER_URN, ProviderMode, }; use otap_df_config::{ PipelineGroupId, PipelineId, @@ -39,9 +39,9 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; -use otap_df_telemetry::logs::{EngineLogsSetup, LogsCollector}; +use otap_df_telemetry::logs::EngineLogsSetup; use otap_df_telemetry::reporter::MetricsReporter; -use otap_df_telemetry::telemetry_settings::OpentelemetryClient; +use otap_df_telemetry::telemetry_settings::TelemetrySettings; use otap_df_telemetry::{MetricsSystem, otel_info, otel_info_span, otel_warn}; use std::thread; @@ -88,7 +88,7 @@ impl Controller { pipeline_ctrl_msg_channel_size = settings.default_pipeline_ctrl_msg_channel_size ); - // Validate logs configuration + // Validate logs configuration. telemetry_config .logs .validate() @@ -96,46 +96,23 @@ impl Controller { message: msg.to_string(), })?; - // Create logs reporter based on provider providers. - // LogsReporter is needed when: - // - global == Unbuffered (global threads send directly to channel) - // - engine == Buffered or Unbuffered (engine threads send to channel) - // Raw provider mode = synchronous console output, no reporter needed. - let providers_need_reporter = telemetry_config.logs.providers.global.needs_reporter() - || telemetry_config.logs.providers.engine.needs_reporter(); - - // Create the reporter if providers need it. - // The receiver end goes to either: - // - LogsCollector thread (output == Raw): prints to console - // - Internal Telemetry Receiver node (output == Internal): emits as OTLP - let (logs_reporter, mut logs_receiver, logs_collector_handle) = if providers_need_reporter { - match telemetry_config.logs.output { - OutputMode::Direct => { - // Start collector thread for Raw output mode - let (logs_collector, reporter) = - LogsCollector::new(telemetry_config.reporting_channel_size); - let logs_collector_handle = - spawn_thread_local_task("logs-collector", move |_cancellation_token| { - logs_collector.run() - })?; - (Some(reporter), None, Some(logs_collector_handle)) - } - OutputMode::Internal => { - // For Internal output, create just the channel. - // The ITR node will receive from it during pipeline build. - let (logs_receiver, reporter) = - LogsCollector::channel(telemetry_config.reporting_channel_size); - (Some(reporter), Some(logs_receiver), None) - } - OutputMode::Noop => (None, None, None), - } - } else { - (None, None, None) - }; - // Keep the handle alive - dropping it would join the thread and block forever - let _logs_collector_handle = logs_collector_handle; + // Create telemetry settings - this creates logs reporter/receiver internally + let mut telemetry_settings = TelemetrySettings::new(telemetry_config)?; + + // Start the logs collector thread if needed (Direct output mode) + let _logs_collector_handle = + if let Some(logs_collector) = telemetry_settings.take_logs_collector() { + Some(spawn_thread_local_task( + "logs-collector", + move |_cancellation_token| logs_collector.run(), + )?) + } else { + None + }; + + // Get logs receiver for Internal output mode (passed to internal pipeline) + let mut logs_receiver = telemetry_settings.take_logs_receiver(); - let telemetry_settings = OpentelemetryClient::new(telemetry_config, logs_reporter.clone())?; let metrics_system = MetricsSystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -167,16 +144,15 @@ impl Controller { obs_state_store.run(cancellation_token) })?; - // Create engine logs setup based on strategy configuration. - // When output is Internal, the logs go through the channel to ITR. - // The validation layer ensures that when output=Internal, engine strategy is Buffered. + // Create engine logs setup based on provider configuration. let engine_logs_setup = match telemetry_config.logs.providers.engine { ProviderMode::Noop => EngineLogsSetup::Noop, ProviderMode::Raw => EngineLogsSetup::Raw, ProviderMode::Immediate => EngineLogsSetup::Immediate { - reporter: logs_reporter - .clone() - .expect("validated: unbuffered requires reporter"), + reporter: telemetry_settings + .logs_reporter() + .cloned() + .expect("validated: immediate requires reporter"), }, ProviderMode::OpenTelemetry => EngineLogsSetup::OpenTelemetry { logger_provider: telemetry_settings From c1d751cea5038d881456b38553a73e19d438214b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 15:25:06 -0800 Subject: [PATCH 76/92] ex --- .../configs/fake-batch-debug-noop.yaml | 6 +++--- .../configs/internal-telemetry.yaml | 4 ++-- .../crates/config/src/pipeline.rs | 21 +++++-------------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml b/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml index 9e486e870a..b162bcef5c 100644 --- a/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml +++ b/rust/otap-dataflow/configs/fake-batch-debug-noop.yaml @@ -57,6 +57,6 @@ service: logs: level: "debug" providers: - global: unbuffered - engine: buffered - output: raw + global: immediate + engine: immediate + output: direct diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index a6b9fd18ee..e0d7fefc15 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -66,7 +66,7 @@ service: logs: level: "debug" providers: - global: unbuffered - engine: buffered + global: immediate + engine: immediate internal: noop # Avoid feedback in internal pipeline output: internal diff --git a/rust/otap-dataflow/crates/config/src/pipeline.rs b/rust/otap-dataflow/crates/config/src/pipeline.rs index dcb7233976..40b8b9f802 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline.rs @@ -50,18 +50,9 @@ pub struct PipelineConfig { #[serde(default)] nodes: PipelineNodes, - /// Internal telemetry pipeline nodes. - /// - /// This optional section defines nodes for processing internal telemetry - /// (logs, metrics, traces generated by the engine itself). - /// - /// The internal pipeline runs on a dedicated thread with hardcoded settings - /// (single thread, no admin server), separate from the main pipeline. - /// - /// Constraints: - /// - Receivers must be Internal Telemetry Receivers (ITR) - /// with plugin_urn matching `INTERNAL_TELEMETRY_RECEIVER_URN` - /// - Processors and exporters can be any valid plugin + /// Internal telemetry pipeline nodes. These have the same structure + /// as `nodes` but are independent and isolated to a separate internal + /// telemetry runtime. #[serde(default, skip_serializing_if = "PipelineNodes::is_empty")] internal: PipelineNodes, @@ -86,10 +77,8 @@ pub enum PipelineType { Otap, } -/// A collection of nodes forming a pipeline graph (hyper-DAG). -/// -/// This wrapper provides validation methods for the node graph structure, -/// including hyper-edge validation and cycle detection. +/// A collection of nodes forming a pipeline graph (hyper-DAG). One of +/// these is the main pipeline, and one is the internal telemetry pipeline. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] #[serde(transparent)] pub struct PipelineNodes(HashMap>); From c14ddfeb3bdc98319475e48365eba122c4e1d3e3 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 15:31:00 -0800 Subject: [PATCH 77/92] ->telemetry runtime --- .../crates/controller/src/lib.rs | 18 ++++++------- .../otap-dataflow/crates/telemetry/src/lib.rs | 2 +- ...metry_settings.rs => telemetry_runtime.rs} | 26 +++++++++---------- .../logger_provider.rs | 0 .../meter_provider.rs | 2 +- .../meter_provider/otlp_exporter_provider.rs | 0 .../prometheus_exporter_provider.rs | 0 .../meter_provider/views_provider.rs | 0 8 files changed, 24 insertions(+), 24 deletions(-) rename rust/otap-dataflow/crates/telemetry/src/{telemetry_settings.rs => telemetry_runtime.rs} (95%) rename rust/otap-dataflow/crates/telemetry/src/{telemetry_settings => telemetry_runtime}/logger_provider.rs (100%) rename rust/otap-dataflow/crates/telemetry/src/{telemetry_settings => telemetry_runtime}/meter_provider.rs (99%) rename rust/otap-dataflow/crates/telemetry/src/{telemetry_settings => telemetry_runtime}/meter_provider/otlp_exporter_provider.rs (100%) rename rust/otap-dataflow/crates/telemetry/src/{telemetry_settings => telemetry_runtime}/meter_provider/prometheus_exporter_provider.rs (100%) rename rust/otap-dataflow/crates/telemetry/src/{telemetry_settings => telemetry_runtime}/meter_provider/views_provider.rs (100%) diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 6ba8c15f0c..e9d2a6c12d 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -41,7 +41,7 @@ use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; use otap_df_telemetry::logs::EngineLogsSetup; use otap_df_telemetry::reporter::MetricsReporter; -use otap_df_telemetry::telemetry_settings::TelemetrySettings; +use otap_df_telemetry::telemetry_runtime::TelemetryRuntime; use otap_df_telemetry::{InternalTelemetrySystem, otel_info, otel_info_span, otel_warn}; use std::thread; @@ -97,12 +97,12 @@ impl Controller { message: msg.to_string(), })?; - // Create telemetry settings. This creates logs reporter/receiver internally - let mut telemetry_settings = TelemetrySettings::new(telemetry_config)?; + // Create telemetry runtime according to the various options. + let mut telemetry_runtime = TelemetryRuntime::new(telemetry_config)?; - // Start the logs collector thread if needed for Direct output mode. + // Start the logs collector thread if needed for direct output. let _logs_collector_handle = - if let Some(logs_collector) = telemetry_settings.take_logs_collector() { + if let Some(logs_collector) = telemetry_runtime.take_logs_collector() { Some(spawn_thread_local_task( "logs-collector", move |_cancellation_token| logs_collector.run(), @@ -112,7 +112,7 @@ impl Controller { }; // Get logs receiver for Internal output mode (passed to internal pipeline) - let mut logs_receiver = telemetry_settings.take_logs_receiver(); + let mut logs_receiver = telemetry_runtime.take_logs_receiver(); let metrics_system = InternalTelemetrySystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); @@ -150,13 +150,13 @@ impl Controller { ProviderMode::Noop => EngineLogsSetup::Noop, ProviderMode::Raw => EngineLogsSetup::Raw, ProviderMode::Immediate => EngineLogsSetup::Immediate { - reporter: telemetry_settings + reporter: telemetry_runtime .logs_reporter() .cloned() .expect("validated: immediate requires reporter"), }, ProviderMode::OpenTelemetry => EngineLogsSetup::OpenTelemetry { - logger_provider: telemetry_settings + logger_provider: telemetry_runtime .logger_provider() .clone() .expect("validated: opentelemetry engine requires logger_provider from global"), @@ -415,7 +415,7 @@ impl Controller { handle.shutdown_and_join()?; } obs_state_join_handle.shutdown_and_join()?; - telemetry_settings.shutdown()?; + telemetry_runtime.shutdown()?; Ok(()) } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index bc41b6a52e..406ed4b66a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -49,7 +49,7 @@ pub mod registry; pub mod reporter; pub mod self_tracing; pub mod semconv; -pub mod telemetry_settings; +pub mod telemetry_runtime; // Re-export _private module from internal_events for macro usage. // This allows the otel_info!, otel_warn!, etc. macros to work in other crates diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs similarity index 95% rename from rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs index 6f30c41770..56637ba322 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs @@ -20,8 +20,8 @@ use crate::{ error::Error, logs::{ImmediateLayer, LogsCollector, LogsReporter}, self_tracing::{ConsoleWriter, RawLoggingLayer}, - telemetry_settings::logger_provider::LoggerProvider, - telemetry_settings::meter_provider::MeterProvider, + telemetry_runtime::logger_provider::LoggerProvider, + telemetry_runtime::meter_provider::MeterProvider, }; /// Client for the OpenTelemetry SDK and internal telemetry settings. @@ -30,7 +30,7 @@ use crate::{ /// - OpenTelemetry SDK meter and logger providers /// - Internal logs reporter and receiver channels /// - Optional logs collector for Direct output mode -pub struct TelemetrySettings { +pub struct TelemetryRuntime { /// The tokio runtime used to run the OpenTelemetry SDK OTLP exporter. /// The reference is kept to ensure the runtime lives as long as the client. _runtime: Option, @@ -47,7 +47,7 @@ pub struct TelemetrySettings { // TODO: Add traces providers. } -impl TelemetrySettings { +impl TelemetryRuntime { /// Create a new OpenTelemetry client from the given configuration. /// /// Logging-specific notes: @@ -292,9 +292,9 @@ mod tests { use std::{f64::consts::PI, time::Duration}; #[test] - fn test_configure_minimal_telemetry_settings() -> Result<(), Error> { + fn test_configure_minimal_telemetry_runtime() -> Result<(), Error> { let config = TelemetryConfig::default(); - let client = TelemetrySettings::new(&config)?; + let client = TelemetryRuntime::new(&config)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); @@ -306,7 +306,7 @@ mod tests { } #[test] - fn test_configure_telemetry_settings() -> Result<(), Error> { + fn test_configure_telemetry_runtime() -> Result<(), Error> { let mut resource = std::collections::HashMap::new(); _ = resource.insert( "service.name".to_string(), @@ -328,7 +328,7 @@ mod tests { logs: LogsConfig::default(), resource, }; - let client = TelemetrySettings::new(&config)?; + let client = TelemetryRuntime::new(&config)?; let meter = global::meter("test-meter"); let counter = meter.u64_counter("test-counter").build(); @@ -343,31 +343,31 @@ mod tests { fn test_to_sdk_value() { let string_attr = AttributeValue::String("example".to_string()); assert_eq!( - TelemetrySettings::to_sdk_value(&string_attr), + TelemetryRuntime::to_sdk_value(&string_attr), opentelemetry::Value::String("example".into()) ); let bool_attr = AttributeValue::Bool(true); assert_eq!( - TelemetrySettings::to_sdk_value(&bool_attr), + TelemetryRuntime::to_sdk_value(&bool_attr), opentelemetry::Value::Bool(true) ); let i64_attr = AttributeValue::I64(42); assert_eq!( - TelemetrySettings::to_sdk_value(&i64_attr), + TelemetryRuntime::to_sdk_value(&i64_attr), opentelemetry::Value::I64(42) ); let f64_attr = AttributeValue::F64(PI); assert_eq!( - TelemetrySettings::to_sdk_value(&f64_attr), + TelemetryRuntime::to_sdk_value(&f64_attr), opentelemetry::Value::F64(PI) ); let array_attr = AttributeValue::Array(AttributeValueArray::I64(vec![1, 2, 3])); assert_eq!( - TelemetrySettings::to_sdk_value(&array_attr), + TelemetryRuntime::to_sdk_value(&array_attr), opentelemetry::Value::Array(opentelemetry::Array::I64(vec![1, 2, 3])) ); } diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/logger_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/logger_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/telemetry_settings/logger_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/logger_provider.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider.rs similarity index 99% rename from rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider.rs index ef99a35be0..e22f163d9b 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider.rs @@ -22,7 +22,7 @@ use otap_df_config::pipeline::service::telemetry::metrics::{ use crate::{ error::Error, - telemetry_settings::meter_provider::{ + telemetry_runtime::meter_provider::{ otlp_exporter_provider::OtlpExporterProvider, prometheus_exporter_provider::PrometheusExporterProvider, }, diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/otlp_exporter_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider/otlp_exporter_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/otlp_exporter_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider/otlp_exporter_provider.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/prometheus_exporter_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider/prometheus_exporter_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/prometheus_exporter_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider/prometheus_exporter_provider.rs diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/views_provider.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider/views_provider.rs similarity index 100% rename from rust/otap-dataflow/crates/telemetry/src/telemetry_settings/meter_provider/views_provider.rs rename to rust/otap-dataflow/crates/telemetry/src/telemetry_runtime/meter_provider/views_provider.rs From f87d005e92d433cc0bc64b86e2ec6f2b77df0848 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 17:12:50 -0800 Subject: [PATCH 78/92] dry provider setup --- .../crates/controller/src/lib.rs | 60 ++++------ .../otap-dataflow/crates/telemetry/src/lib.rs | 4 +- .../crates/telemetry/src/logs.rs | 56 ++++++--- .../crates/telemetry/src/telemetry_runtime.rs | 112 ++++++++++-------- 4 files changed, 128 insertions(+), 104 deletions(-) diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index e9d2a6c12d..b0f63487ce 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -21,9 +21,7 @@ use crate::error::Error; use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; -use otap_df_config::pipeline::service::telemetry::logs::{ - INTERNAL_TELEMETRY_RECEIVER_URN, ProviderMode, -}; +use otap_df_config::pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN; use otap_df_config::{ PipelineGroupId, PipelineId, pipeline::PipelineConfig, @@ -39,7 +37,7 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; -use otap_df_telemetry::logs::EngineLogsSetup; +use otap_df_telemetry::logs::TelemetrySetup; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::telemetry_runtime::TelemetryRuntime; use otap_df_telemetry::{InternalTelemetrySystem, otel_info, otel_info_span, otel_warn}; @@ -145,52 +143,34 @@ impl Controller { obs_state_store.run(cancellation_token) })?; - // Create engine logs setup based on provider configuration. - let engine_logs_setup = match telemetry_config.logs.providers.engine { - ProviderMode::Noop => EngineLogsSetup::Noop, - ProviderMode::Raw => EngineLogsSetup::Raw, - ProviderMode::Immediate => EngineLogsSetup::Immediate { - reporter: telemetry_runtime - .logs_reporter() - .cloned() - .expect("validated: immediate requires reporter"), - }, - ProviderMode::OpenTelemetry => EngineLogsSetup::OpenTelemetry { - logger_provider: telemetry_runtime - .logger_provider() - .clone() - .expect("validated: opentelemetry engine requires logger_provider from global"), - }, - }; + // Create telemetry setup for engine and internal pipelines from provider configuration. + let engine_telemetry_setup = + telemetry_runtime.telemetry_setup_for(telemetry_config.logs.providers.engine); + let internal_telemetry_setup = + telemetry_runtime.telemetry_setup_for(telemetry_config.logs.providers.internal); let log_level = telemetry_config.logs.level; - // Spawn internal pipeline thread if configured. + // Spawn internal telemetry pipeline thread, if configured. let internal_pipeline_thread = if let Some(internal_config) = pipeline.extract_internal_config() { - // Internal pipeline only exists when output mode is Internal - // The logs_receiver goes to the internal pipeline's ITR node + // TODO: this is a bunch of placeholder values! let internal_logs_receiver = logs_receiver.take(); let internal_factory = self.pipeline_factory; let internal_pipeline_id: PipelineId = "internal".into(); let internal_pipeline_key = DeployedPipelineKey { pipeline_group_id: pipeline_group_id.clone(), pipeline_id: internal_pipeline_id.clone(), - core_id: 0, // Virtual core ID for internal pipeline + core_id: 0, }; let internal_pipeline_ctx = controller_ctx.pipeline_context_with( pipeline_group_id.clone(), internal_pipeline_id.clone(), - 0, // Virtual core ID - 0, // Virtual thread ID + 0, + 0, ); let internal_obs_evt_reporter = obs_evt_reporter.clone(); let internal_metrics_reporter = metrics_reporter.clone(); - // Internal pipeline uses Raw logging (direct console output) - // to avoid feedback loops - it can't log through itself - let internal_engine_logs_setup = EngineLogsSetup::Raw; - let internal_log_level = log_level; - // Create control message channel for internal pipeline let (internal_ctrl_tx, internal_ctrl_rx) = pipeline_ctrl_msg_channel( internal_config @@ -199,6 +179,7 @@ impl Controller { ); let thread_name = "internal-pipeline".to_string(); + let internal_telemetry_setup = internal_telemetry_setup.clone(); let handle = thread::Builder::new() .name(thread_name.clone()) .spawn(move || { @@ -210,8 +191,8 @@ impl Controller { internal_pipeline_ctx, internal_obs_evt_reporter, internal_metrics_reporter, - internal_engine_logs_setup, - internal_log_level, + internal_telemetry_setup, + log_level, // TODO: separate log level for internal pipeline. internal_logs_receiver, internal_ctrl_tx, internal_ctrl_rx, @@ -265,7 +246,7 @@ impl Controller { thread_id, ); let metrics_reporter = metrics_reporter.clone(); - let engine_logs_setup = engine_logs_setup.clone(); + let telemetry_setup = engine_telemetry_setup.clone(); let logs_receiver = logs_receiver.clone(); let thread_name = format!("pipeline-core-{}", core_id.id); @@ -281,7 +262,7 @@ impl Controller { pipeline_handle, obs_evt_reporter, metrics_reporter, - engine_logs_setup, + telemetry_setup, log_level, logs_receiver, pipeline_ctrl_msg_tx, @@ -537,15 +518,14 @@ impl Controller { pipeline_context: PipelineContext, obs_evt_reporter: ObservedEventReporter, metrics_reporter: MetricsReporter, - engine_logs_setup: EngineLogsSetup, + telemetry_setup: TelemetrySetup, log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, logs_receiver: Option, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { - // Run with the engine-appropriate tracing subscriber. - // The closure receives a LogsFlusher for buffered mode. - engine_logs_setup.with_engine_subscriber(log_level, || { + // Run with the appropriate tracing subscriber for this pipeline. + telemetry_setup.with_subscriber(log_level, || { // Create a tracing span for this pipeline thread // so that all logs within this scope include pipeline context. let span = otel_info_span!("pipeline_thread", core.id = core_id.id); diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 406ed4b66a..67bb567943 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -69,8 +69,8 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - EngineLogsSetup, ImmediateLayer, LogBatch, LogPayload, LogsCollector, LogsReceiver, - LogsReporter, + ImmediateLayer, LogBatch, LogPayload, LogsCollector, LogsReceiver, LogsReporter, + TelemetrySetup, }; // TODO This should be #[cfg(test)], but something is preventing it from working. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 6afbe35d79..3beec2d1bb 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -278,13 +278,13 @@ where } } -/// Engine logging configuration, carrying the data needed for each mode. +/// Telemetry setup for pipeline threads, carrying the data needed for each mode. /// -/// This enum is constructed based on `config.logs.strategies.engine` and passed -/// to each engine thread. The engine thread uses `with_engine_subscriber()` to -/// run its work with the appropriate logging layer. +/// This enum is constructed based on `config.logs.providers.engine` (for main pipelines) +/// or `config.logs.providers.internal` (for the internal telemetry pipeline). +/// Pipeline threads use `with_subscriber()` to run with the appropriate logging layer. #[derive(Clone)] -pub enum EngineLogsSetup { +pub enum TelemetrySetup { /// Logs are silently dropped. Noop, /// Synchronous raw logging to console. @@ -301,34 +301,62 @@ pub enum EngineLogsSetup { }, } -impl EngineLogsSetup { - /// Run a closure with the engine-appropriate tracing subscriber. +impl TelemetrySetup { + /// Initialize this setup as the global tracing subscriber. /// - /// Returns a `LogsFlusher` that can be used to periodically flush buffered logs. - /// For non-buffered modes, the flusher is a no-op. - pub fn with_engine_subscriber(&self, log_level: LogLevel, f: F) -> R + /// This is used during startup to set the global subscriber. Returns an error + /// if a global subscriber has already been set. + pub fn try_init_global( + &self, + log_level: LogLevel, + ) -> Result<(), tracing_subscriber::util::TryInitError> { + use tracing_subscriber::util::SubscriberInitExt; + + let filter = crate::get_env_filter(log_level); + + match self { + TelemetrySetup::Noop => tracing::subscriber::NoSubscriber::new().try_init(), + TelemetrySetup::Raw => Registry::default() + .with(filter) + .with(RawLoggingLayer::new(ConsoleWriter::default())) + .try_init(), + TelemetrySetup::Immediate { reporter } => { + let layer = ImmediateLayer::new(reporter.clone()); + Registry::default().with(filter).with(layer).try_init() + } + TelemetrySetup::OpenTelemetry { logger_provider } => { + let sdk_layer = OpenTelemetryTracingBridge::new(logger_provider); + Registry::default().with(filter).with(sdk_layer).try_init() + } + } + } + + /// Run a closure with the appropriate tracing subscriber for this setup. + /// + /// The closure runs with the configured logging layer active. + pub fn with_subscriber(&self, log_level: LogLevel, f: F) -> R where F: FnOnce() -> R, { let filter = crate::get_env_filter(log_level); match self { - EngineLogsSetup::Noop => { + TelemetrySetup::Noop => { let subscriber = tracing::subscriber::NoSubscriber::new(); tracing::subscriber::with_default(subscriber, || f()) } - EngineLogsSetup::Raw => { + TelemetrySetup::Raw => { let subscriber = Registry::default() .with(filter) .with(RawLoggingLayer::new(ConsoleWriter::default())); tracing::subscriber::with_default(subscriber, || f()) } - EngineLogsSetup::Immediate { reporter } => { + TelemetrySetup::Immediate { reporter } => { let layer = ImmediateLayer::new(reporter.clone()); let subscriber = Registry::default().with(filter).with(layer); tracing::subscriber::with_default(subscriber, || f()) } - EngineLogsSetup::OpenTelemetry { logger_provider } => { + TelemetrySetup::OpenTelemetry { logger_provider } => { let sdk_layer = OpenTelemetryTracingBridge::new(logger_provider); let subscriber = Registry::default().with(filter).with(sdk_layer); tracing::subscriber::with_default(subscriber, || f()) diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs index 56637ba322..a646b45590 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs @@ -7,19 +7,16 @@ pub mod logger_provider; pub mod meter_provider; use opentelemetry::KeyValue; -use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_sdk::{Resource, logs::SdkLoggerProvider, metrics::SdkMeterProvider}; use otap_df_config::pipeline::service::telemetry::{ AttributeValue, AttributeValueArray, TelemetryConfig, logs::{OutputMode, ProviderMode}, }; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, util::TryInitError}; use crate::{ LogsReceiver, error::Error, - logs::{ImmediateLayer, LogsCollector, LogsReporter}, - self_tracing::{ConsoleWriter, RawLoggingLayer}, + logs::{LogsCollector, LogsReporter}, telemetry_runtime::logger_provider::LoggerProvider, telemetry_runtime::meter_provider::MeterProvider, }; @@ -100,13 +97,6 @@ impl TelemetryRuntime { (None, None, None) }; - let tracing_setup = - tracing_subscriber::registry().with(crate::get_env_filter(config.logs.level)); - - let logerr = |err: TryInitError| { - crate::raw_error!("tracing.subscriber.init", error = err.to_string()); - }; - // Check if either global or engine needs the OpenTelemetry logger provider let global_needs_otel = config.logs.providers.global == ProviderMode::OpenTelemetry; let engine_needs_otel = config.logs.providers.engine == ProviderMode::OpenTelemetry; @@ -123,43 +113,14 @@ impl TelemetryRuntime { // Configure the global subscriber based on providers.global. // Engine threads override this with their own subscriber via with_default(). - match config.logs.providers.global { - ProviderMode::Noop => { - // No-op: just install the filter, events are dropped - if let Err(err) = tracing::subscriber::NoSubscriber::new().try_init() { - logerr(err); - } - } - ProviderMode::Raw => { - if let Err(err) = tracing_setup - .with(RawLoggingLayer::new(ConsoleWriter::default())) - .try_init() - { - logerr(err); - } - } - ProviderMode::Immediate => { - let reporter = logs_reporter.clone().ok_or_else(|| { - Error::ConfigurationError("Immediate logging requires a LogsReporter".into()) - })?; - let channel_layer = ImmediateLayer::new(reporter); - if let Err(err) = tracing_setup.with(channel_layer).try_init() { - logerr(err); - } - } - ProviderMode::OpenTelemetry => { - // logger_provider is guaranteed to be Some here since global_needs_otel is true - let sdk_layer = OpenTelemetryTracingBridge::new( - logger_provider - .as_ref() - .expect("logger_provider configured when global is OpenTelemetry"), - ); - - if let Err(err) = tracing_setup.with(sdk_layer).try_init() { - logerr(err) - } - } - }; + let global_setup = Self::make_telemetry_setup( + config.logs.providers.global, + logs_reporter.as_ref(), + logger_provider.as_ref(), + )?; + if let Err(err) = global_setup.try_init_global(config.logs.level) { + crate::raw_error!("tracing.subscriber.init", error = err.to_string()); + } // Note: Any span-level detail, typically through a traces provider, has // to be configured via the try_init() cases above. @@ -253,6 +214,61 @@ impl TelemetryRuntime { self.logs_collector.take() } + /// Create a `TelemetrySetup` for the given provider mode. + /// + /// This uses the runtime's shared `logs_reporter` and `logger_provider` to configure + /// the setup for the given provider mode. + /// + /// # Panics + /// Panics if the provider mode requires a resource that wasn't configured: + /// - `Immediate` requires `logs_reporter` to be present + /// - `OpenTelemetry` requires `logger_provider` to be present + #[must_use] + pub fn telemetry_setup_for(&self, provider_mode: ProviderMode) -> crate::logs::TelemetrySetup { + Self::make_telemetry_setup( + provider_mode, + self.logs_reporter.as_ref(), + self.logger_provider.as_ref(), + ) + .expect("validated: provider mode resources should be configured") + } + + /// Helper to create a TelemetrySetup from a ProviderMode and optional resources. + /// + /// Returns an error if the mode requires a resource that isn't provided. + fn make_telemetry_setup( + provider_mode: ProviderMode, + logs_reporter: Option<&LogsReporter>, + logger_provider: Option<&SdkLoggerProvider>, + ) -> Result { + use crate::logs::TelemetrySetup; + + match provider_mode { + ProviderMode::Noop => Ok(TelemetrySetup::Noop), + ProviderMode::Raw => Ok(TelemetrySetup::Raw), + ProviderMode::Immediate => { + let reporter = logs_reporter.ok_or_else(|| { + Error::ConfigurationError( + "Immediate provider mode requires logs_reporter".into(), + ) + })?; + Ok(TelemetrySetup::Immediate { + reporter: reporter.clone(), + }) + } + ProviderMode::OpenTelemetry => { + let provider = logger_provider.ok_or_else(|| { + Error::ConfigurationError( + "OpenTelemetry provider mode requires logger_provider".into(), + ) + })?; + Ok(TelemetrySetup::OpenTelemetry { + logger_provider: provider.clone(), + }) + } + } + } + /// Shutdown the OpenTelemetry SDK. pub fn shutdown(&self) -> Result<(), Error> { let meter_shutdown_result = self.meter_provider().shutdown(); From 946f55aea895fda616fa376778840c739416a305 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 17:59:10 -0800 Subject: [PATCH 79/92] wip --- .../otap/src/console_exporter/formatter.rs | 539 ++++++++++++++++++ .../crates/otap/src/console_exporter/mod.rs | 194 +++++++ rust/otap-dataflow/crates/otap/src/lib.rs | 3 + 3 files changed, 736 insertions(+) create mode 100644 rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs create mode 100644 rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs new file mode 100644 index 0000000000..eb0b40fdea --- /dev/null +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs @@ -0,0 +1,539 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Hierarchical formatter for OTLP data with tree-style output. +//! +//! Output format: +//! ```text +//! RESOURCE {service.name=my-service, host.name=localhost} +//! │ SCOPE {name=my-library, version=1.0.0} +//! │ ├─ INFO event_name: message body [attr=value, ...] +//! │ ├─ WARN event_name: warning message +//! │ └─ ERROR event_name: error message [code=500] +//! ``` + +use chrono::{DateTime, Datelike, Timelike, Utc}; +use otap_df_pdata::views::common::{AnyValueView, AttributeView, InstrumentationScopeView}; +use otap_df_pdata::views::logs::{LogRecordView, LogsDataView, ResourceLogsView, ScopeLogsView}; +use otap_df_pdata::views::otlp::bytes::logs::RawLogsData; +use otap_df_pdata::views::resource::ResourceView; +use otap_df_pdata::OtlpProtoBytes; +use std::io::{Cursor, Write}; + +/// Buffer size for formatting output. +const OUTPUT_BUFFER_SIZE: usize = 8192; + +/// Log level derived from OTLP severity. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Level { + Trace, + Debug, + Info, + Warn, + Error, +} + +impl Level { + fn as_str(self) -> &'static str { + match self { + Level::Trace => "TRACE", + Level::Debug => "DEBUG", + Level::Info => "INFO", + Level::Warn => "WARN", + Level::Error => "ERROR", + } + } +} + +/// Tree drawing characters for Unicode mode. +mod unicode_tree { + pub const VERTICAL: &str = "│"; + pub const TEE: &str = "├─"; + pub const CORNER: &str = "└─"; + pub const SPACE: &str = " "; +} + +/// Tree drawing characters for ASCII mode. +mod ascii_tree { + pub const VERTICAL: &str = "|"; + pub const TEE: &str = "+-"; + pub const CORNER: &str = "\\-"; + pub const SPACE: &str = " "; +} + +/// ANSI codes for colored output. +#[derive(Clone, Copy)] +#[repr(u8)] +enum AnsiCode { + Reset = 0, + Bold = 1, + Dim = 2, + Red = 31, + Green = 32, + Yellow = 33, + Blue = 34, + Magenta = 35, + Cyan = 36, +} + +/// Hierarchical formatter for OTLP data. +pub struct HierarchicalFormatter { + use_color: bool, + use_unicode: bool, +} + +impl HierarchicalFormatter { + /// Create a new hierarchical formatter. + #[must_use] + pub fn new(use_color: bool, use_unicode: bool) -> Self { + Self { + use_color, + use_unicode, + } + } + + /// Format logs from OTLP bytes. + pub fn format_logs_bytes(&self, bytes: &OtlpProtoBytes) { + if let OtlpProtoBytes::ExportLogsRequest(data) = bytes { + let logs_data = RawLogsData::new(data.as_ref()); + self.format_logs_data(&logs_data); + } + } + + /// Format logs from a LogsDataView. + fn format_logs_data<'a, L: LogsDataView>(&self, logs_data: &'a L) { + for resource_logs in logs_data.resources() { + self.format_resource_logs(&resource_logs); + } + } + + /// Format a ResourceLogs with its nested scopes. + fn format_resource_logs<'a, R: ResourceLogsView>(&self, resource_logs: &'a R) { + let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; + let mut w = Cursor::new(buf.as_mut_slice()); + + // Get first timestamp from nested log records for the resource line + let first_ts = self.get_first_log_timestamp(resource_logs); + + // Write resource header + self.write_ansi(&mut w, AnsiCode::Dim); + Self::write_timestamp(&mut w, first_ts); + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + self.write_ansi(&mut w, AnsiCode::Cyan); + self.write_ansi(&mut w, AnsiCode::Bold); + let _ = w.write_all(b"RESOURCE"); + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + + // Write resource attributes + if let Some(resource) = resource_logs.resource() { + self.write_resource_attrs(&mut w, &resource); + } else { + let _ = w.write_all(b"{}"); + } + let _ = w.write_all(b"\n"); + + // Print resource line + let len = w.position() as usize; + let _ = std::io::stdout().write_all(&buf[..len]); + + // Format each scope + let scopes: Vec<_> = resource_logs.scopes().collect(); + let scope_count = scopes.len(); + for (i, scope_logs) in scopes.into_iter().enumerate() { + let is_last_scope = i == scope_count - 1; + self.format_scope_logs(&scope_logs, is_last_scope); + } + } + + /// Get the first timestamp from log records in a ResourceLogs. + fn get_first_log_timestamp<'a, R: ResourceLogsView>(&self, resource_logs: &'a R) -> u64 { + for scope_logs in resource_logs.scopes() { + for log_record in scope_logs.log_records() { + if let Some(ts) = log_record.time_unix_nano() { + return ts; + } + if let Some(ts) = log_record.observed_time_unix_nano() { + return ts; + } + } + } + 0 + } + + /// Format a ScopeLogs with its nested log records. + fn format_scope_logs<'a, S: ScopeLogsView>(&self, scope_logs: &'a S, is_last_scope: bool) { + let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; + let mut w = Cursor::new(buf.as_mut_slice()); + + let tree = self.tree_chars(); + + // Get first timestamp from log records for the scope line + let first_ts = scope_logs + .log_records() + .find_map(|lr| lr.time_unix_nano().or_else(|| lr.observed_time_unix_nano())) + .unwrap_or(0); + + // Write scope header with tree prefix + let _ = w.write_all(tree.vertical.as_bytes()); + let _ = w.write_all(b" "); + self.write_ansi(&mut w, AnsiCode::Dim); + Self::write_timestamp(&mut w, first_ts); + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + self.write_ansi(&mut w, AnsiCode::Magenta); + self.write_ansi(&mut w, AnsiCode::Bold); + let _ = w.write_all(b"SCOPE"); + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + + // Write scope info + if let Some(scope) = scope_logs.scope() { + self.write_scope_info(&mut w, &scope); + } else { + let _ = w.write_all(b"{}"); + } + let _ = w.write_all(b"\n"); + + // Print scope line + let len = w.position() as usize; + let _ = std::io::stdout().write_all(&buf[..len]); + + // Format each log record + let records: Vec<_> = scope_logs.log_records().collect(); + let record_count = records.len(); + for (i, log_record) in records.into_iter().enumerate() { + let is_last_record = i == record_count - 1; + self.format_log_record(&log_record, is_last_scope, is_last_record); + } + } + + /// Format a single log record. + fn format_log_record<'a, L: LogRecordView>( + &self, + log_record: &'a L, + is_last_scope: bool, + is_last_record: bool, + ) { + let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; + let mut w = Cursor::new(buf.as_mut_slice()); + + let tree = self.tree_chars(); + + // Tree prefix: vertical line for scope continuation, then branch for record + let _ = w.write_all(tree.vertical.as_bytes()); + let _ = w.write_all(b" "); + if is_last_record && is_last_scope { + let _ = w.write_all(tree.corner.as_bytes()); + } else { + let _ = w.write_all(tree.tee.as_bytes()); + } + let _ = w.write_all(b" "); + + // Timestamp + let ts = log_record + .time_unix_nano() + .or_else(|| log_record.observed_time_unix_nano()) + .unwrap_or(0); + self.write_ansi(&mut w, AnsiCode::Dim); + Self::write_timestamp(&mut w, ts); + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + + // Level + let level = self.severity_to_level(log_record.severity_number()); + self.write_level(&mut w, level); + let _ = w.write_all(b" "); + + // Event name + self.write_ansi(&mut w, AnsiCode::Bold); + if let Some(name) = log_record.event_name() { + let _ = w.write_all(name.as_ref()); + } else { + let _ = w.write_all(b"event"); + } + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b": "); + + // Body + if let Some(body) = log_record.body() { + self.write_any_value(&mut w, &body); + } + + // Attributes + let mut attrs = log_record.attributes().peekable(); + if attrs.peek().is_some() { + let _ = w.write_all(b" ["); + let mut first = true; + for attr in attrs { + if !first { + let _ = w.write_all(b", "); + } + first = false; + let _ = w.write_all(attr.key()); + let _ = w.write_all(b"="); + if let Some(v) = attr.value() { + self.write_any_value(&mut w, &v); + } + } + let _ = w.write_all(b"]"); + } + + let _ = w.write_all(b"\n"); + + // Print to stdout or stderr based on level + let len = w.position() as usize; + if matches!(level, Level::Error | Level::Warn) { + let _ = std::io::stderr().write_all(&buf[..len]); + } else { + let _ = std::io::stdout().write_all(&buf[..len]); + } + } + + /// Get tree drawing characters based on mode. + fn tree_chars(&self) -> TreeChars { + if self.use_unicode { + TreeChars { + vertical: unicode_tree::VERTICAL, + tee: unicode_tree::TEE, + corner: unicode_tree::CORNER, + _space: unicode_tree::SPACE, + } + } else { + TreeChars { + vertical: ascii_tree::VERTICAL, + tee: ascii_tree::TEE, + corner: ascii_tree::CORNER, + _space: ascii_tree::SPACE, + } + } + } + + /// Write an ANSI escape code. + #[inline] + fn write_ansi(&self, w: &mut Cursor<&mut [u8]>, code: AnsiCode) { + if self.use_color { + let _ = write!(w, "\x1b[{}m", code as u8); + } + } + + /// Write a colored level indicator. + fn write_level(&self, w: &mut Cursor<&mut [u8]>, level: Level) { + let color = match level { + Level::Error => AnsiCode::Red, + Level::Warn => AnsiCode::Yellow, + Level::Info => AnsiCode::Green, + Level::Debug => AnsiCode::Blue, + Level::Trace => AnsiCode::Magenta, + }; + self.write_ansi(w, color); + let _ = w.write_all(level.as_str().as_bytes()); + self.write_ansi(w, AnsiCode::Reset); + // Pad to 5 chars + let padding = 5 - level.as_str().len(); + for _ in 0..padding { + let _ = w.write_all(b" "); + } + } + + /// Convert OTLP severity number to Level. + fn severity_to_level(&self, severity: Option) -> Level { + match severity { + Some(n) if n >= 17 => Level::Error, // FATAL, ERROR + Some(n) if n >= 13 => Level::Warn, // WARN + Some(n) if n >= 9 => Level::Info, // INFO + Some(n) if n >= 5 => Level::Debug, // DEBUG + Some(_) => Level::Trace, // TRACE + None => Level::Info, // Default to INFO + } + } + + /// Write timestamp in ISO 8601 format. + fn write_timestamp(w: &mut Cursor<&mut [u8]>, nanos: u64) { + let secs = (nanos / 1_000_000_000) as i64; + let subsec_nanos = (nanos % 1_000_000_000) as u32; + + if let Some(dt) = DateTime::::from_timestamp(secs, subsec_nanos) { + let date = dt.date_naive(); + let time = dt.time(); + let millis = subsec_nanos / 1_000_000; + + let _ = write!( + w, + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", + date.year(), + date.month(), + date.day(), + time.hour(), + time.minute(), + time.second(), + millis + ); + } else { + let _ = w.write_all(b""); + } + } + + /// Write resource attributes. + fn write_resource_attrs(&self, w: &mut Cursor<&mut [u8]>, resource: &R) { + let _ = w.write_all(b"{"); + let mut first = true; + for attr in resource.attributes() { + if !first { + let _ = w.write_all(b", "); + } + first = false; + let _ = w.write_all(attr.key()); + let _ = w.write_all(b"="); + if let Some(v) = attr.value() { + self.write_any_value(w, &v); + } + } + let _ = w.write_all(b"}"); + } + + /// Write scope information. + fn write_scope_info( + &self, + w: &mut Cursor<&mut [u8]>, + scope: &S, + ) { + let _ = w.write_all(b"{"); + let mut has_content = false; + + if let Some(name) = scope.name() { + let _ = w.write_all(b"name="); + let _ = w.write_all(name.as_ref()); + has_content = true; + } + + if let Some(version) = scope.version() { + if has_content { + let _ = w.write_all(b", "); + } + let _ = w.write_all(b"version="); + let _ = w.write_all(version.as_ref()); + has_content = true; + } + + // Include scope attributes + for attr in scope.attributes() { + if has_content { + let _ = w.write_all(b", "); + } + let _ = w.write_all(attr.key()); + let _ = w.write_all(b"="); + if let Some(v) = attr.value() { + self.write_any_value(w, &v); + } + has_content = true; + } + + let _ = w.write_all(b"}"); + } + + /// Write an AnyValue. + fn write_any_value<'a>(&self, w: &mut Cursor<&mut [u8]>, value: &impl AnyValueView<'a>) { + use otap_df_pdata::views::common::ValueType; + + match value.value_type() { + ValueType::String => { + if let Some(s) = value.as_string() { + let _ = w.write_all(s); + } + } + ValueType::Int64 => { + if let Some(i) = value.as_int64() { + let _ = write!(w, "{}", i); + } + } + ValueType::Bool => { + if let Some(b) = value.as_bool() { + let _ = w.write_all(if b { b"true" } else { b"false" }); + } + } + ValueType::Double => { + if let Some(d) = value.as_double() { + let _ = write!(w, "{:.6}", d); + } + } + ValueType::Bytes => { + if let Some(bytes) = value.as_bytes() { + let _ = write!(w, "<{} bytes>", bytes.len()); + } + } + ValueType::Array => { + let _ = w.write_all(b"["); + if let Some(array_iter) = value.as_array() { + let mut first = true; + for item in array_iter { + if !first { + let _ = w.write_all(b", "); + } + first = false; + self.write_any_value(w, &item); + } + } + let _ = w.write_all(b"]"); + } + ValueType::KeyValueList => { + let _ = w.write_all(b"{"); + if let Some(kvlist_iter) = value.as_kvlist() { + let mut first = true; + for kv in kvlist_iter { + if !first { + let _ = w.write_all(b", "); + } + first = false; + let _ = w.write_all(kv.key()); + if let Some(val) = kv.value() { + let _ = w.write_all(b"="); + self.write_any_value(w, &val); + } + } + } + let _ = w.write_all(b"}"); + } + ValueType::Empty => {} + } + } +} + +/// Tree drawing characters. +struct TreeChars { + vertical: &'static str, + tee: &'static str, + corner: &'static str, + _space: &'static str, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_severity_to_level() { + let formatter = HierarchicalFormatter::new(false, true); + + assert_eq!(formatter.severity_to_level(Some(21)), Level::Error); // FATAL + assert_eq!(formatter.severity_to_level(Some(17)), Level::Error); // ERROR + assert_eq!(formatter.severity_to_level(Some(13)), Level::Warn); // WARN + assert_eq!(formatter.severity_to_level(Some(9)), Level::Info); // INFO + assert_eq!(formatter.severity_to_level(Some(5)), Level::Debug); // DEBUG + assert_eq!(formatter.severity_to_level(Some(1)), Level::Trace); // TRACE + assert_eq!(formatter.severity_to_level(None), Level::Info); // Default + } + + #[test] + fn test_tree_chars() { + let unicode = HierarchicalFormatter::new(false, true); + let ascii = HierarchicalFormatter::new(false, false); + + assert_eq!(unicode.tree_chars().vertical, "│"); + assert_eq!(unicode.tree_chars().tee, "├─"); + + assert_eq!(ascii.tree_chars().vertical, "|"); + assert_eq!(ascii.tree_chars().tee, "+-"); + } +} diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs new file mode 100644 index 0000000000..22a15f0208 --- /dev/null +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs @@ -0,0 +1,194 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Console exporter that prints OTLP data with hierarchical formatting. +//! +//! This exporter displays logs (and future support for traces/metrics) with +//! resource and scope grouping using tree-style output: +//! +//! ```text +//! RESOURCE {service.name=my-service, ...} +//! │ SCOPE {name=my-scope, version=1.0} +//! │ ├─ INFO event_name: message [attr=value] +//! │ ├─ WARN event_name: warning message +//! │ └─ ERROR event_name: error message [code=500] +//! ``` + +use crate::OTAP_EXPORTER_FACTORIES; +use crate::pdata::OtapPdata; +use async_trait::async_trait; +use linkme::distributed_slice; +use otap_df_config::node::NodeUserConfig; +use otap_df_config::SignalType; +use otap_df_engine::config::ExporterConfig; +use otap_df_engine::context::PipelineContext; +use otap_df_engine::control::{AckMsg, NodeControlMsg}; +use otap_df_engine::error::Error; +use otap_df_engine::exporter::ExporterWrapper; +use otap_df_engine::local::exporter::{EffectHandler, Exporter}; +use otap_df_engine::message::{Message, MessageChannel}; +use otap_df_engine::node::NodeId; +use otap_df_engine::terminal_state::TerminalState; +use otap_df_engine::{ConsumerEffectHandlerExtension, ExporterFactory}; +use otap_df_pdata::OtapPayload; +use std::sync::Arc; + +mod formatter; + +use formatter::HierarchicalFormatter; + +/// The URN for the console exporter +pub const CONSOLE_EXPORTER_URN: &str = "urn:otap:console:exporter"; + +/// Configuration for the console exporter +#[derive(Debug, Clone, Default, serde::Deserialize)] +pub struct ConsoleExporterConfig { + /// Whether to use ANSI colors in output (default: true) + #[serde(default = "default_color")] + pub color: bool, + /// Whether to use Unicode box-drawing characters (default: true) + #[serde(default = "default_unicode")] + pub unicode: bool, +} + +fn default_color() -> bool { + true +} + +fn default_unicode() -> bool { + true +} + +/// Console exporter that prints OTLP data with hierarchical formatting +pub struct ConsoleExporter { + formatter: HierarchicalFormatter, +} + +impl ConsoleExporter { + /// Create a new console exporter with the given configuration. + #[must_use] + pub fn new(config: ConsoleExporterConfig) -> Self { + Self { + formatter: HierarchicalFormatter::new(config.color, config.unicode), + } + } +} + +/// Declare the Console Exporter as a local exporter factory +#[allow(unsafe_code)] +#[distributed_slice(OTAP_EXPORTER_FACTORIES)] +pub static CONSOLE_EXPORTER: ExporterFactory = ExporterFactory { + name: CONSOLE_EXPORTER_URN, + create: |_pipeline: PipelineContext, + node: NodeId, + node_config: Arc, + exporter_config: &ExporterConfig| { + let config: ConsoleExporterConfig = + serde_json::from_value(node_config.config.clone()).map_err(|e| { + otap_df_config::error::Error::InvalidUserConfig { + error: format!("Failed to parse console exporter config: {}", e), + } + })?; + Ok(ExporterWrapper::local( + ConsoleExporter::new(config), + node, + node_config, + exporter_config, + )) + }, +}; + +#[async_trait(?Send)] +impl Exporter for ConsoleExporter { + async fn start( + self: Box, + mut msg_chan: MessageChannel, + effect_handler: EffectHandler, + ) -> Result { + loop { + match msg_chan.recv().await? { + Message::Control(NodeControlMsg::Shutdown { .. }) => break, + Message::PData(data) => { + self.export(&data); + effect_handler.notify_ack(AckMsg::new(data)).await?; + } + _ => { + // do nothing + } + } + } + + Ok(TerminalState::default()) + } +} + +impl ConsoleExporter { + fn export(&self, data: &OtapPdata) { + let (_, payload) = data.clone().into_parts(); + match payload.signal_type() { + SignalType::Logs => self.export_logs(&payload), + SignalType::Traces => self.export_traces(&payload), + SignalType::Metrics => self.export_metrics(&payload), + } + } + + fn export_logs(&self, payload: &OtapPayload) { + match payload { + OtapPayload::OtlpBytes(bytes) => { + self.formatter.format_logs_bytes(bytes); + } + OtapPayload::OtapArrowRecords(_records) => { + // TODO: Support Arrow format + eprintln!("Console exporter: Arrow format not yet supported for logs"); + } + } + } + + fn export_traces(&self, _payload: &OtapPayload) { + // TODO: Implement traces formatting + eprintln!("Console exporter: Traces formatting not yet implemented"); + } + + fn export_metrics(&self, _payload: &OtapPayload) { + // TODO: Implement metrics formatting + eprintln!("Console exporter: Metrics formatting not yet implemented"); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::testing::*; + use otap_df_engine::Interests; + use serde_json::json; + + #[test] + fn test_console_exporter_no_subscription() { + test_exporter_no_subscription(&CONSOLE_EXPORTER, json!({})); + } + + #[test] + fn test_console_exporter_with_subscription() { + test_exporter_with_subscription( + &CONSOLE_EXPORTER, + json!({}), + Interests::ACKS, + Interests::ACKS, + ); + } + + #[test] + fn test_console_exporter_config_defaults() { + let config: ConsoleExporterConfig = serde_json::from_value(json!({})).unwrap(); + assert!(config.color); + assert!(config.unicode); + } + + #[test] + fn test_console_exporter_config_custom() { + let config: ConsoleExporterConfig = + serde_json::from_value(json!({"color": false, "unicode": false})).unwrap(); + assert!(!config.color); + assert!(!config.unicode); + } +} diff --git a/rust/otap-dataflow/crates/otap/src/lib.rs b/rust/otap-dataflow/crates/otap/src/lib.rs index 73e3cb2d1d..ff2315f323 100644 --- a/rust/otap-dataflow/crates/otap/src/lib.rs +++ b/rust/otap-dataflow/crates/otap/src/lib.rs @@ -49,6 +49,9 @@ pub mod filter_processor; /// Implementation of a noop exporter that acts as a exporter placeholder pub mod noop_exporter; +/// Console exporter that prints OTLP data with hierarchical formatting +pub mod console_exporter; + /// An error-exporter returns a static error. pub mod error_exporter; From 32a62c46dc894e39fb036b0e8f8685aabd8e2b36 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jan 2026 23:06:19 -0800 Subject: [PATCH 80/92] huge --- rust/otap-dataflow/Cargo.toml | 1 + .../configs/internal-telemetry.yaml | 16 +-- .../crates/controller/src/error.rs | 7 + .../crates/controller/src/lib.rs | 134 ++++++++++++++++-- rust/otap-dataflow/crates/engine/src/lib.rs | 2 + .../crates/engine/src/receiver.rs | 4 + .../crates/engine/src/runtime_pipeline.rs | 2 + .../crates/otap/src/console_exporter/mod.rs | 2 +- .../crates/state/src/reporter.rs | 4 +- .../otap-dataflow/crates/telemetry/Cargo.toml | 1 + .../crates/telemetry/src/internal_events.rs | 87 +++++++++--- .../crates/telemetry/src/logs.rs | 4 + .../telemetry/src/self_tracing/formatter.rs | 77 ++++++++++ .../crates/telemetry/src/telemetry_runtime.rs | 43 ++++-- rust/otap-dataflow/src/main.rs | 93 ++++++------ 15 files changed, 378 insertions(+), 99 deletions(-) diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index 4fb98aa794..76332f970c 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -150,6 +150,7 @@ sysinfo = "0.37" tempfile = "3" thiserror = "2.0.17" tracing = { version = ">=0.1.40", default-features = false } +tracing-core = { version = ">=0.1.32", default-features = false } tracing-subscriber = { version = "0.3", default-features = false } tokio = { version = "1.48.0", features = ["rt", "time", "net", "io-util", "sync", "macros", "rt-multi-thread", "fs", "io-std", "process"] } tokio-stream = "0.1.17" diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index e0d7fefc15..5bf47fc0af 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -43,22 +43,12 @@ internal: out_ports: out_port: destinations: - - debug + - console dispatch_strategy: round_robin config: {} - debug: - kind: processor - plugin_urn: "urn:otel:debug:processor" - out_ports: - out_port: - destinations: - - noop - dispatch_strategy: round_robin - config: - verbosity: detailed - noop: + console: kind: exporter - plugin_urn: "urn:otel:noop:exporter" + plugin_urn: "urn:otel:console:exporter" config: {} service: diff --git a/rust/otap-dataflow/crates/controller/src/error.rs b/rust/otap-dataflow/crates/controller/src/error.rs index 683fb5e59f..17e5ea9031 100644 --- a/rust/otap-dataflow/crates/controller/src/error.rs +++ b/rust/otap-dataflow/crates/controller/src/error.rs @@ -103,4 +103,11 @@ pub enum Error { /// Error message describing the configuration problem. message: String, }, + + /// Internal telemetry pipeline failed to start. + #[error("Internal telemetry pipeline failed to start: {message}")] + InternalPipelineStartupFailed { + /// Error message describing why startup failed. + message: String, + }, } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index b0f63487ce..77a00661d6 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -41,6 +41,7 @@ use otap_df_telemetry::logs::TelemetrySetup; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::telemetry_runtime::TelemetryRuntime; use otap_df_telemetry::{InternalTelemetrySystem, otel_info, otel_info_span, otel_warn}; +use std::sync::mpsc as std_mpsc; use std::thread; /// Error types and helpers for the controller module. @@ -102,7 +103,7 @@ impl Controller { let _logs_collector_handle = if let Some(logs_collector) = telemetry_runtime.take_logs_collector() { Some(spawn_thread_local_task( - "logs-collector", + "logs-collector", move |_cancellation_token| logs_collector.run(), )?) } else { @@ -111,6 +112,7 @@ impl Controller { // Get logs receiver for Internal output mode (passed to internal pipeline) let mut logs_receiver = telemetry_runtime.take_logs_receiver(); + eprintln!("DEBUG: logs_receiver after take: {:?}", logs_receiver.is_some()); let metrics_system = InternalTelemetrySystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); @@ -155,6 +157,7 @@ impl Controller { if let Some(internal_config) = pipeline.extract_internal_config() { // TODO: this is a bunch of placeholder values! let internal_logs_receiver = logs_receiver.take(); + eprintln!("DEBUG: internal_logs_receiver after take: {:?}", internal_logs_receiver.is_some()); let internal_factory = self.pipeline_factory; let internal_pipeline_id: PipelineId = "internal".into(); let internal_pipeline_key = DeployedPipelineKey { @@ -178,12 +181,16 @@ impl Controller { .default_pipeline_ctrl_msg_channel_size, ); + // Create a channel to signal startup success/failure + // This allows us to fail fast if the internal pipeline can't build + let (startup_tx, startup_rx) = std_mpsc::channel::>(); + let thread_name = "internal-pipeline".to_string(); let internal_telemetry_setup = internal_telemetry_setup.clone(); let handle = thread::Builder::new() .name(thread_name.clone()) .spawn(move || { - Self::run_pipeline_thread( + Self::run_internal_pipeline_thread( internal_pipeline_key, CoreId { id: 0 }, // No pinning for internal pipeline internal_config, @@ -196,6 +203,7 @@ impl Controller { internal_logs_receiver, internal_ctrl_tx, internal_ctrl_rx, + startup_tx, ) }) .map_err(|e| Error::ThreadSpawnError { @@ -203,16 +211,38 @@ impl Controller { source: e, })?; - otel_info!( - "InternalPipeline.Started", - num_nodes = pipeline.internal_nodes().len() - ); + // Wait for the internal pipeline to signal successful startup + // This ensures we fail fast with a clear error if the internal pipeline can't build + match startup_rx.recv() { + Ok(Ok(())) => { + // Internal pipeline built successfully and is running + } + Ok(Err(e)) => { + // Internal pipeline failed to build - propagate the error + return Err(e); + } + Err(_) => { + // Channel closed unexpectedly - thread may have panicked + return Err(Error::InternalPipelineStartupFailed { + message: "Internal pipeline thread terminated unexpectedly during startup".to_string(), + }); + } + } Some((thread_name, handle)) } else { None }; + // Initialize the global subscriber AFTER the internal pipeline has signaled successful startup. + // This ensures the channel receiver is being consumed before we start sending logs. + telemetry_runtime.init_global_subscriber(); + + otel_info!( + "InternalPipeline.Started", + num_nodes = pipeline.internal_nodes().len() + ); + // Start one thread per requested core // Get available CPU cores for pinning let requested_cores = Self::select_cores_for_quota( @@ -548,19 +578,107 @@ impl Controller { // Build the runtime pipeline from the configuration // Pass logs_receiver for injection into ITR node (if present) + eprintln!("DEBUG: run_pipeline_thread - logs_receiver is_some: {:?}", logs_receiver.is_some()); let logs_receiver_param = logs_receiver .map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); + eprintln!("DEBUG: run_pipeline_thread - logs_receiver_param is_some: {:?}", logs_receiver_param.is_some()); let runtime_pipeline = pipeline_factory .build(pipeline_context.clone(), pipeline_config.clone(), logs_receiver_param) - .map_err(|e| Error::PipelineRuntimeError { - source: Box::new(e), + .map_err(|e| { + eprintln!("DEBUG: run_pipeline_thread - build FAILED: {:?}", e); + Error::PipelineRuntimeError { + source: Box::new(e), + } })?; + eprintln!("DEBUG: run_pipeline_thread - pipeline built for {:?}", pipeline_key); obs_evt_reporter.report(ObservedEvent::ready( pipeline_key.clone(), Some("Pipeline initialization successful.".to_owned()), )); + eprintln!("DEBUG: run_pipeline_thread - about to call run_forever for pipeline_key: {:?}", pipeline_key); + + // Start the pipeline (this will use the current thread's Tokio runtime) + runtime_pipeline + .run_forever( + pipeline_key, + pipeline_context, + obs_evt_reporter, + metrics_reporter, + pipeline_ctrl_msg_tx, + pipeline_ctrl_msg_rx, + ) + .map_err(|e| Error::PipelineRuntimeError { + source: Box::new(e), + }) + }) + } + + /// Runs the internal telemetry pipeline in the current thread. + /// + /// This is similar to `run_pipeline_thread` but includes a startup signal channel + /// to notify the parent thread when the pipeline has successfully built and is ready + /// to receive logs. This allows the controller to fail fast with a clear error message + /// if the internal pipeline configuration is invalid. + #[allow(clippy::too_many_arguments)] + fn run_internal_pipeline_thread( + pipeline_key: DeployedPipelineKey, + core_id: CoreId, + pipeline_config: PipelineConfig, + pipeline_factory: &'static PipelineFactory, + pipeline_context: PipelineContext, + obs_evt_reporter: ObservedEventReporter, + metrics_reporter: MetricsReporter, + telemetry_setup: TelemetrySetup, + log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, + logs_receiver: Option, + pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, + pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, + startup_tx: std_mpsc::Sender>, + ) -> Result, Error> { + // Run with the appropriate tracing subscriber for this pipeline. + telemetry_setup.with_subscriber(log_level, || { + // Create a tracing span for this pipeline thread + let span = otel_info_span!("internal_pipeline_thread", core.id = core_id.id); + let _guard = span.enter(); + + // No core pinning for internal pipeline - it's lightweight + + obs_evt_reporter.report(ObservedEvent::admitted( + pipeline_key.clone(), + Some("Internal pipeline admission successful.".to_owned()), + )); + + // Build the runtime pipeline from the configuration + // Pass logs_receiver for injection into ITR node + let logs_receiver_param = logs_receiver + .map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); + + let runtime_pipeline = match pipeline_factory + .build(pipeline_context.clone(), pipeline_config.clone(), logs_receiver_param) + { + Ok(pipeline) => pipeline, + Err(e) => { + // Signal failure to parent thread with the actual error + let error = Error::PipelineRuntimeError { + source: Box::new(e), + }; + let _ = startup_tx.send(Err(Error::InternalPipelineStartupFailed { + message: format!("{}", error), + })); + return Err(error); + } + }; + + obs_evt_reporter.report(ObservedEvent::ready( + pipeline_key.clone(), + Some("Internal pipeline initialization successful.".to_owned()), + )); + + // Signal successful startup - the pipeline is built and ready to run + let _ = startup_tx.send(Ok(())); + // Start the pipeline (this will use the current thread's Tokio runtime) runtime_pipeline .run_forever( diff --git a/rust/otap-dataflow/crates/engine/src/lib.rs b/rust/otap-dataflow/crates/engine/src/lib.rs index c2a635278b..3415b5dcd6 100644 --- a/rust/otap-dataflow/crates/engine/src/lib.rs +++ b/rust/otap-dataflow/crates/engine/src/lib.rs @@ -351,7 +351,9 @@ impl PipelineFactory { // Inject logs receiver if this is the target node if let Some((target_urn, ref logs_rx)) = logs_receiver { + eprintln!("DEBUG: build - pipeline: {:?}, checking receiver URN: {} vs target: {}", pipeline_id, node_config.plugin_urn.as_ref(), target_urn); if node_config.plugin_urn.as_ref() == target_urn { + eprintln!("DEBUG: build - pipeline: {:?}, injecting logs_receiver into node!", pipeline_id); wrapper.set_logs_receiver(logs_rx.clone()); } } diff --git a/rust/otap-dataflow/crates/engine/src/receiver.rs b/rust/otap-dataflow/crates/engine/src/receiver.rs index a6969e345c..7f6db4db3d 100644 --- a/rust/otap-dataflow/crates/engine/src/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/receiver.rs @@ -269,6 +269,7 @@ impl ReceiverWrapper { }, metrics_reporter, ) => { + eprintln!("DEBUG: ReceiverWrapper::start - Local, node_id: {:?}, logs_receiver.is_some(): {:?}", node_id, logs_receiver.is_some()); let msg_senders = if pdata_senders.is_empty() { return Err(Error::ReceiverError { receiver: node_id.clone(), @@ -386,11 +387,14 @@ impl ReceiverWrapper { /// This is used by the Internal Telemetry Receiver to receive logs /// from all threads via the logs channel. pub fn set_logs_receiver(&mut self, receiver: LogsReceiver) { + eprintln!("DEBUG: ReceiverWrapper::set_logs_receiver called"); match self { ReceiverWrapper::Local { logs_receiver, .. } => { + eprintln!("DEBUG: set_logs_receiver - Local variant"); *logs_receiver = Some(receiver); } ReceiverWrapper::Shared { logs_receiver, .. } => { + eprintln!("DEBUG: set_logs_receiver - Shared variant"); *logs_receiver = Some(receiver); } } diff --git a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs index 691f213c67..84797fa56f 100644 --- a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs +++ b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs @@ -111,6 +111,8 @@ impl RuntimePipeline { ) -> Result, Error> { use futures::stream::{FuturesUnordered, StreamExt}; + eprintln!("DEBUG: run_forever - pipeline_key: {:?}, num_receivers: {}", pipeline_key, self.receivers.len()); + let RuntimePipeline { config, receivers, diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs index 22a15f0208..0414062ff4 100644 --- a/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs @@ -38,7 +38,7 @@ mod formatter; use formatter::HierarchicalFormatter; /// The URN for the console exporter -pub const CONSOLE_EXPORTER_URN: &str = "urn:otap:console:exporter"; +pub const CONSOLE_EXPORTER_URN: &str = "urn:otel:console:exporter"; /// Configuration for the console exporter #[derive(Debug, Clone, Default, serde::Deserialize)] diff --git a/rust/otap-dataflow/crates/state/src/reporter.rs b/rust/otap-dataflow/crates/state/src/reporter.rs index 74367b74a5..af0f552ecb 100644 --- a/rust/otap-dataflow/crates/state/src/reporter.rs +++ b/rust/otap-dataflow/crates/state/src/reporter.rs @@ -31,13 +31,13 @@ impl ObservedEventReporter { Err(flume::SendTimeoutError::Timeout(event)) => { raw_error!( "Timeout sending observed event", - event = tracing::field::debug(event) + event = event ); } Err(flume::SendTimeoutError::Disconnected(event)) => { raw_error!( "Disconnected event observer", - event = tracing::field::debug(event) + event = event ); } Ok(_) => {} diff --git a/rust/otap-dataflow/crates/telemetry/Cargo.toml b/rust/otap-dataflow/crates/telemetry/Cargo.toml index 8be2d5a138..841846e0db 100644 --- a/rust/otap-dataflow/crates/telemetry/Cargo.toml +++ b/rust/otap-dataflow/crates/telemetry/Cargo.toml @@ -43,6 +43,7 @@ opentelemetry-otlp = { workspace = true, features = ["grpc-tonic", "tls", "metri opentelemetry-prometheus = { workspace = true } opentelemetry-appender-tracing = { workspace = true } tracing = { workspace = true } +tracing-core = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter","registry", "std", "fmt"] } [dev-dependencies] diff --git a/rust/otap-dataflow/crates/telemetry/src/internal_events.rs b/rust/otap-dataflow/crates/telemetry/src/internal_events.rs index b5cb7afd3a..31ef90fd24 100644 --- a/rust/otap-dataflow/crates/telemetry/src/internal_events.rs +++ b/rust/otap-dataflow/crates/telemetry/src/internal_events.rs @@ -13,6 +13,7 @@ #[doc(hidden)] pub mod _private { pub use tracing::{debug, error, info, warn}; + pub use tracing_core; } /// Macro for logging informational messages. @@ -136,24 +137,74 @@ where tracing::subscriber::with_default(raw_logging_subscriber(), f) } -/// Log an error message directly to stderr, bypassing the tracing subscriber. +/// Log an error message directly to stderr, bypassing the tracing dispatcher. +/// +/// This macro creates a real tracing Event with proper Metadata, then dispatches +/// it directly to `RawLoggingLayer::dispatch_event`, bypassing the global +/// dispatcher. This is safe to call from within tracing subscriber callbacks +/// (e.g., `on_event`) where using `tracing::subscriber::with_default` would +/// cause a RefCell panic. +/// +/// Output format matches the standard log format: +/// `2026-01-06T10:30:45.123Z ERROR target::name: message [key=value, ...]` #[macro_export] macro_rules! raw_error { - ($name:expr $(,)?) => { - $crate::internal_events::with_raw_logging(|| { - $crate::_private::error!(name: $name, target: env!("CARGO_PKG_NAME"), name = $name, ""); - }) - }; - ($name:expr, $($key:ident = $value:expr),+ $(,)?) => { - $crate::internal_events::with_raw_logging(|| { - $crate::_private::error!(name: $name, - target: env!("CARGO_PKG_NAME"), - name = $name, - $($key = { - $value - }),+, - "" - ); - }) - }; + ($name:expr $(,)?) => {{ + use $crate::self_tracing::{ConsoleWriter, RawLoggingLayer}; + use $crate::_private::tracing_core::{Event, Metadata, Level, field::FieldSet, callsite::DefaultCallsite}; + + static CALLSITE: DefaultCallsite = DefaultCallsite::new(&META); + static META: Metadata<'static> = Metadata::new( + $name, + env!("CARGO_PKG_NAME"), + Level::ERROR, + Some(file!()), + Some(line!()), + Some(env!("CARGO_PKG_NAME")), + FieldSet::new(&[], $crate::_private::tracing_core::callsite::Identifier(&CALLSITE)), + $crate::_private::tracing_core::metadata::Kind::EVENT, + ); + + let layer = RawLoggingLayer::new(ConsoleWriter::no_color()); + let valueset = META.fields().value_set(&[]); + let event = Event::new(&META, &valueset); + layer.dispatch_event(&event); + }}; + ($name:expr, $($key:ident = $value:expr),+ $(,)?) => {{ + use $crate::self_tracing::{ConsoleWriter, RawLoggingLayer}; + use $crate::_private::tracing_core::{Event, Metadata, Level, field::FieldSet, callsite::DefaultCallsite}; + + // Define field names as static strings + static FIELD_NAMES: &[&str] = &[$(stringify!($key)),+]; + + static CALLSITE: DefaultCallsite = DefaultCallsite::new(&META); + static META: Metadata<'static> = Metadata::new( + $name, + env!("CARGO_PKG_NAME"), + Level::ERROR, + Some(file!()), + Some(line!()), + Some(env!("CARGO_PKG_NAME")), + FieldSet::new(FIELD_NAMES, $crate::_private::tracing_core::callsite::Identifier(&CALLSITE)), + $crate::_private::tracing_core::metadata::Kind::EVENT, + ); + + let layer = RawLoggingLayer::new(ConsoleWriter::no_color()); + + // Bind values to extend their lifetimes - use Debug formatting + $( + let $key = format!("{:?}", $value); + )+ + + // Create fixed-size array of field-value pairs (the repetition creates N elements) + let field_values = &[ + $(( + &META.fields().field(stringify!($key)).expect("field exists"), + Some(&$key as &dyn $crate::_private::tracing_core::field::Value) + )),+ + ]; + let valueset = META.fields().value_set(field_values); + let event = Event::new(&META, &valueset); + layer.dispatch_event(&event); + }}; } diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 3beec2d1bb..7f0d077db0 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -167,6 +167,10 @@ impl LogsReporter { } /// Try to send a payload, non-blocking. + /// + /// Returns: + /// - `Ok(())` if the payload was sent + /// - `Err` if the channel is full or disconnected pub fn try_report(&self, payload: LogPayload) -> Result<(), Error> { self.sender .try_send(payload) diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index 6528b089f0..ca818e38b1 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -331,6 +331,83 @@ impl ConsoleWriter { std::io::stdout().write_all(data) }; } + + /// Write a raw error message directly to stderr, bypassing tracing entirely. + /// + /// This method is safe to call from within tracing subscriber callbacks + /// (e.g., `on_event`) where calling `tracing::subscriber::with_default` + /// would cause a "RefCell already borrowed" panic. + /// + /// Output format matches the standard log format: + /// `2026-01-06T10:30:45.123Z ERROR target::name: message [key=value, ...]` + pub fn raw_write_error(&self, target: &str, name: &str, message: &str, attrs: &[(&str, &str)]) { + use std::time::{SystemTime, UNIX_EPOCH}; + + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let mut w = Cursor::new(buf.as_mut_slice()); + let cm = self.color_mode; + + // Timestamp + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + cm.write_ansi(&mut w, AnsiCode::Dim); + Self::write_timestamp(&mut w, nanos); + cm.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + + // Level (always ERROR for raw_write_error) + cm.write_level(&mut w, &Level::ERROR); + + // Event name (target::name) + cm.write_ansi(&mut w, AnsiCode::Bold); + let _ = w.write_all(target.as_bytes()); + let _ = w.write_all(b"::"); + let _ = w.write_all(name.as_bytes()); + cm.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b": "); + + // Message body + let _ = w.write_all(message.as_bytes()); + + // Attributes + if !attrs.is_empty() { + let _ = w.write_all(b" ["); + for (i, (key, value)) in attrs.iter().enumerate() { + if i > 0 { + let _ = w.write_all(b", "); + } + let _ = w.write_all(key.as_bytes()); + let _ = w.write_all(b"="); + let _ = w.write_all(value.as_bytes()); + } + let _ = w.write_all(b"]"); + } + + let _ = w.write_all(b"\n"); + + // Always write to stderr for errors + let len = w.position() as usize; + let _ = std::io::stderr().write_all(&buf[..len]); + } +} + +impl RawLoggingLayer { + /// Process a tracing Event directly, bypassing the dispatcher. + /// + /// This method is safe to call from within tracing subscriber callbacks + /// (e.g., `on_event`) where calling `tracing::subscriber::with_default` + /// would cause a "RefCell already borrowed" panic. + /// + /// It performs the same formatting as the Layer's on_event, writing + /// directly to stdout/stderr based on the event's level. + pub fn dispatch_event(&self, event: &Event<'_>) { + let record = LogRecord::new(event); + let callsite = SavedCallsite::new(event.metadata()); + self.writer.raw_print(&record, &callsite); + } } impl TracingLayer for RawLoggingLayer diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs index a646b45590..3d1ae7272a 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs @@ -16,10 +16,11 @@ use otap_df_config::pipeline::service::telemetry::{ use crate::{ LogsReceiver, error::Error, - logs::{LogsCollector, LogsReporter}, + logs::{LogsCollector, LogsReporter, TelemetrySetup}, telemetry_runtime::logger_provider::LoggerProvider, telemetry_runtime::meter_provider::MeterProvider, }; +use otap_df_config::pipeline::service::telemetry::logs::LogLevel; /// Client for the OpenTelemetry SDK and internal telemetry settings. /// @@ -41,6 +42,11 @@ pub struct TelemetryRuntime { logs_receiver: Option, /// Collector for Direct output mode. Must be spawned by the controller. logs_collector: Option, + /// Deferred global subscriber setup. Must be initialized by controller + /// AFTER the internal pipeline is started (so the channel is being consumed). + global_setup: Option, + /// Log level for the global subscriber. + global_log_level: LogLevel, // TODO: Add traces providers. } @@ -83,12 +89,14 @@ impl TelemetryRuntime { OutputMode::Direct => { // Direct mode: logs go to a collector that prints to console let (collector, reporter) = LogsCollector::new(config.reporting_channel_size); + eprintln!("DEBUG: TelemetryRuntime::new - Direct mode, no receiver"); (Some(reporter), None, Some(collector)) } OutputMode::Internal => { // Internal mode: logs go through channel to ITR node let (receiver, reporter) = LogsCollector::channel(config.reporting_channel_size); + eprintln!("DEBUG: TelemetryRuntime::new - Internal mode, receiver created"); (Some(reporter), Some(receiver), None) } OutputMode::Noop => (None, None, None), @@ -111,19 +119,14 @@ impl TelemetryRuntime { (None, runtime) }; - // Configure the global subscriber based on providers.global. - // Engine threads override this with their own subscriber via with_default(). + // Build the global setup but DO NOT initialize it yet. + // The controller must call init_global_subscriber() after the internal + // pipeline is started, so the channel receiver is being consumed. let global_setup = Self::make_telemetry_setup( config.logs.providers.global, logs_reporter.as_ref(), logger_provider.as_ref(), )?; - if let Err(err) = global_setup.try_init_global(config.logs.level) { - crate::raw_error!("tracing.subscriber.init", error = err.to_string()); - } - - // Note: Any span-level detail, typically through a traces provider, has - // to be configured via the try_init() cases above. Ok(Self { _runtime: runtime, @@ -132,6 +135,8 @@ impl TelemetryRuntime { logs_reporter, logs_receiver, logs_collector, + global_setup: Some(global_setup), + global_log_level: config.logs.level, }) } @@ -214,6 +219,21 @@ impl TelemetryRuntime { self.logs_collector.take() } + /// Initialize the global tracing subscriber. + /// + /// This MUST be called AFTER the internal pipeline is started (when using + /// Internal output mode), so the channel receiver is being actively consumed. + /// Otherwise, logs sent before the receiver starts will fill the channel buffer. + /// + /// For other output modes (Direct, Noop), this can be called at any time. + pub fn init_global_subscriber(&mut self) { + if let Some(setup) = self.global_setup.take() { + if let Err(err) = setup.try_init_global(self.global_log_level) { + crate::raw_error!("tracing.subscriber.init", error = err.to_string()); + } + } + } + /// Create a `TelemetrySetup` for the given provider mode. /// /// This uses the runtime's shared `logs_reporter` and `logger_provider` to configure @@ -224,7 +244,7 @@ impl TelemetryRuntime { /// - `Immediate` requires `logs_reporter` to be present /// - `OpenTelemetry` requires `logger_provider` to be present #[must_use] - pub fn telemetry_setup_for(&self, provider_mode: ProviderMode) -> crate::logs::TelemetrySetup { + pub fn telemetry_setup_for(&self, provider_mode: ProviderMode) -> TelemetrySetup { Self::make_telemetry_setup( provider_mode, self.logs_reporter.as_ref(), @@ -240,8 +260,7 @@ impl TelemetryRuntime { provider_mode: ProviderMode, logs_reporter: Option<&LogsReporter>, logger_provider: Option<&SdkLoggerProvider>, - ) -> Result { - use crate::logs::TelemetrySetup; + ) -> Result { match provider_mode { ProviderMode::Noop => Ok(TelemetrySetup::Noop), diff --git a/rust/otap-dataflow/src/main.rs b/rust/otap-dataflow/src/main.rs index 6715e53323..b33b38e31d 100644 --- a/rust/otap-dataflow/src/main.rs +++ b/rust/otap-dataflow/src/main.rs @@ -5,12 +5,11 @@ use clap::Parser; use otap_df_config::pipeline::PipelineConfig; -use otap_df_config::pipeline::service::telemetry::logs::LogLevel; use otap_df_config::pipeline_group::{CoreAllocation, CoreRange, Quota}; use otap_df_config::{PipelineGroupId, PipelineId}; use otap_df_controller::Controller; use otap_df_otap::OTAP_PIPELINE_FACTORY; -use otap_df_telemetry::{get_env_filter, raw_error}; +use otap_df_telemetry::raw_error; use otap_df_telemetry::self_tracing::{ConsoleWriter, RawLoggingLayer}; use std::path::PathBuf; use sysinfo::System; @@ -119,14 +118,6 @@ fn main() -> Result<(), Box> { .install_default() .map_err(|e| format!("Failed to install rustls crypto provider: {e:?}"))?; - // Set up raw logging as the global default subscriber for the main thread. - // Engine threads will set their own thread-local subscribers based on config. - let raw_subscriber = Registry::default() - .with(get_env_filter(LogLevel::Debug)) - .with(RawLoggingLayer::new(ConsoleWriter::color())); - tracing::subscriber::set_global_default(raw_subscriber) - .expect("Failed to set global default subscriber"); - let args = Args::parse(); // For now, we predefine pipeline group and pipeline IDs. @@ -134,45 +125,57 @@ fn main() -> Result<(), Box> { let pipeline_group_id: PipelineGroupId = "default_pipeline_group".into(); let pipeline_id: PipelineId = "default_pipeline".into(); - // Load pipeline configuration - let pipeline_cfg = PipelineConfig::from_file( - pipeline_group_id.clone(), - pipeline_id.clone(), - &args.pipeline, - )?; - - tracing::info!("{}", system_info()); + // Use with_default for a thread-local subscriber during startup. + // This covers config loading and early info logging. + // TelemetryRuntime::new() (called inside run_forever) will set the actual global subscriber. + let early_subscriber = Registry::default().with(RawLoggingLayer::new(ConsoleWriter::color())); + let (pipeline_cfg, quota, admin_settings) = + tracing::subscriber::with_default(early_subscriber, || { + // Load pipeline configuration + let pipeline_cfg = PipelineConfig::from_file( + pipeline_group_id.clone(), + pipeline_id.clone(), + &args.pipeline, + )?; + + tracing::info!("{}", system_info()); + + // Map CLI arguments to the core allocation enum + let core_allocation = if let Some(range) = args.core_id_range.clone() { + range + } else if args.num_cores == 0 { + CoreAllocation::AllCores + } else { + CoreAllocation::CoreCount { + count: args.num_cores, + } + }; + + let quota = Quota { core_allocation }; + + // Print the requested core configuration + match "a.core_allocation { + CoreAllocation::AllCores => { + tracing::info!("Requested core allocation: all available cores") + } + CoreAllocation::CoreCount { count } => { + tracing::info!("Requested core allocation: {count} cores") + } + CoreAllocation::CoreSet { .. } => { + tracing::info!("Requested core allocation: {}", quota.core_allocation); + } + } + + let admin_settings = otap_df_config::engine::HttpAdminSettings { + bind_address: args.http_admin_bind.clone(), + }; + + Ok::<_, Box>((pipeline_cfg, quota, admin_settings)) + })?; // Create controller and start pipeline with multi-core support let controller = Controller::new(&OTAP_PIPELINE_FACTORY); - // Map CLI arguments to the core allocation enum - let core_allocation = if let Some(range) = args.core_id_range { - range - } else if args.num_cores == 0 { - CoreAllocation::AllCores - } else { - CoreAllocation::CoreCount { - count: args.num_cores, - } - }; - - let quota = Quota { core_allocation }; - - // Print the requested core configuration - match "a.core_allocation { - CoreAllocation::AllCores => tracing::info!("Requested core allocation: all available cores"), - CoreAllocation::CoreCount { count } => { - tracing::info!("Requested core allocation: {count} cores") - } - CoreAllocation::CoreSet { .. } => { - tracing::info!("Requested core allocation: {}", quota.core_allocation); - } - } - - let admin_settings = otap_df_config::engine::HttpAdminSettings { - bind_address: args.http_admin_bind, - }; let result = controller.run_forever( pipeline_group_id, pipeline_id, From 7748e72487488d5c860e8a117028af1ef2cde5d6 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 09:40:47 -0800 Subject: [PATCH 81/92] raw log safety --- .../crates/controller/src/lib.rs | 175 +++++++++--------- .../crates/controller/src/thread_task.rs | 4 +- rust/otap-dataflow/crates/engine/src/lib.rs | 2 - .../crates/engine/src/local/receiver.rs | 36 +++- .../crates/engine/src/receiver.rs | 4 - .../crates/engine/src/runtime_pipeline.rs | 2 - .../otap/src/console_exporter/formatter.rs | 22 +-- .../crates/otap/src/console_exporter/mod.rs | 15 +- .../otap/src/internal_telemetry_receiver.rs | 16 +- .../crates/state/src/reporter.rs | 4 +- rust/otap-dataflow/crates/state/src/store.rs | 12 +- .../otap-dataflow/crates/telemetry/Cargo.toml | 1 - .../crates/telemetry/src/internal_events.rs | 86 +++------ .../otap-dataflow/crates/telemetry/src/lib.rs | 4 +- .../crates/telemetry/src/logs.rs | 10 +- .../crates/telemetry/src/telemetry_runtime.rs | 3 - 16 files changed, 181 insertions(+), 215 deletions(-) diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 77a00661d6..65742000c1 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -103,7 +103,7 @@ impl Controller { let _logs_collector_handle = if let Some(logs_collector) = telemetry_runtime.take_logs_collector() { Some(spawn_thread_local_task( - "logs-collector", + "logs-collector", move |_cancellation_token| logs_collector.run(), )?) } else { @@ -112,7 +112,6 @@ impl Controller { // Get logs receiver for Internal output mode (passed to internal pipeline) let mut logs_receiver = telemetry_runtime.take_logs_receiver(); - eprintln!("DEBUG: logs_receiver after take: {:?}", logs_receiver.is_some()); let metrics_system = InternalTelemetrySystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); @@ -153,86 +152,87 @@ impl Controller { let log_level = telemetry_config.logs.level; // Spawn internal telemetry pipeline thread, if configured. - let internal_pipeline_thread = - if let Some(internal_config) = pipeline.extract_internal_config() { - // TODO: this is a bunch of placeholder values! - let internal_logs_receiver = logs_receiver.take(); - eprintln!("DEBUG: internal_logs_receiver after take: {:?}", internal_logs_receiver.is_some()); - let internal_factory = self.pipeline_factory; - let internal_pipeline_id: PipelineId = "internal".into(); - let internal_pipeline_key = DeployedPipelineKey { - pipeline_group_id: pipeline_group_id.clone(), - pipeline_id: internal_pipeline_id.clone(), - core_id: 0, - }; - let internal_pipeline_ctx = controller_ctx.pipeline_context_with( - pipeline_group_id.clone(), - internal_pipeline_id.clone(), - 0, - 0, - ); - let internal_obs_evt_reporter = obs_evt_reporter.clone(); - let internal_metrics_reporter = metrics_reporter.clone(); - - // Create control message channel for internal pipeline - let (internal_ctrl_tx, internal_ctrl_rx) = pipeline_ctrl_msg_channel( - internal_config - .pipeline_settings() - .default_pipeline_ctrl_msg_channel_size, - ); + let internal_pipeline_thread = if let Some(internal_config) = + pipeline.extract_internal_config() + { + // TODO: this is a bunch of placeholder values! + let internal_logs_receiver = logs_receiver.take(); + let internal_factory = self.pipeline_factory; + let internal_pipeline_id: PipelineId = "internal".into(); + let internal_pipeline_key = DeployedPipelineKey { + pipeline_group_id: pipeline_group_id.clone(), + pipeline_id: internal_pipeline_id.clone(), + core_id: 0, + }; + let internal_pipeline_ctx = controller_ctx.pipeline_context_with( + pipeline_group_id.clone(), + internal_pipeline_id.clone(), + 0, + 0, + ); + let internal_obs_evt_reporter = obs_evt_reporter.clone(); + let internal_metrics_reporter = metrics_reporter.clone(); - // Create a channel to signal startup success/failure - // This allows us to fail fast if the internal pipeline can't build - let (startup_tx, startup_rx) = std_mpsc::channel::>(); - - let thread_name = "internal-pipeline".to_string(); - let internal_telemetry_setup = internal_telemetry_setup.clone(); - let handle = thread::Builder::new() - .name(thread_name.clone()) - .spawn(move || { - Self::run_internal_pipeline_thread( - internal_pipeline_key, - CoreId { id: 0 }, // No pinning for internal pipeline - internal_config, - internal_factory, - internal_pipeline_ctx, - internal_obs_evt_reporter, - internal_metrics_reporter, - internal_telemetry_setup, - log_level, // TODO: separate log level for internal pipeline. - internal_logs_receiver, - internal_ctrl_tx, - internal_ctrl_rx, - startup_tx, - ) - }) - .map_err(|e| Error::ThreadSpawnError { - thread_name: thread_name.clone(), - source: e, - })?; - - // Wait for the internal pipeline to signal successful startup - // This ensures we fail fast with a clear error if the internal pipeline can't build - match startup_rx.recv() { - Ok(Ok(())) => { - // Internal pipeline built successfully and is running - } - Ok(Err(e)) => { - // Internal pipeline failed to build - propagate the error - return Err(e); - } - Err(_) => { - // Channel closed unexpectedly - thread may have panicked - return Err(Error::InternalPipelineStartupFailed { - message: "Internal pipeline thread terminated unexpectedly during startup".to_string(), - }); - } + // Create control message channel for internal pipeline + let (internal_ctrl_tx, internal_ctrl_rx) = pipeline_ctrl_msg_channel( + internal_config + .pipeline_settings() + .default_pipeline_ctrl_msg_channel_size, + ); + + // Create a channel to signal startup success/failure + // This allows us to fail fast if the internal pipeline can't build + let (startup_tx, startup_rx) = std_mpsc::channel::>(); + + let thread_name = "internal-pipeline".to_string(); + let internal_telemetry_setup = internal_telemetry_setup.clone(); + let handle = thread::Builder::new() + .name(thread_name.clone()) + .spawn(move || { + Self::run_internal_pipeline_thread( + internal_pipeline_key, + CoreId { id: 0 }, // No pinning for internal pipeline + internal_config, + internal_factory, + internal_pipeline_ctx, + internal_obs_evt_reporter, + internal_metrics_reporter, + internal_telemetry_setup, + log_level, // TODO: separate log level for internal pipeline. + internal_logs_receiver, + internal_ctrl_tx, + internal_ctrl_rx, + startup_tx, + ) + }) + .map_err(|e| Error::ThreadSpawnError { + thread_name: thread_name.clone(), + source: e, + })?; + + // Wait for the internal pipeline to signal successful startup + // This ensures we fail fast with a clear error if the internal pipeline can't build + match startup_rx.recv() { + Ok(Ok(())) => { + // Internal pipeline built successfully and is running } + Ok(Err(e)) => { + // Internal pipeline failed to build - propagate the error + return Err(e); + } + Err(_) => { + // Channel closed unexpectedly - thread may have panicked + return Err(Error::InternalPipelineStartupFailed { + message: "Internal pipeline thread terminated unexpectedly during startup" + .to_string(), + }); + } + } - Some((thread_name, handle)) - } else { - None - }; + Some((thread_name, handle)) + } else { + None + }; // Initialize the global subscriber AFTER the internal pipeline has signaled successful startup. // This ensures the channel receiver is being consumed before we start sending logs. @@ -578,27 +578,21 @@ impl Controller { // Build the runtime pipeline from the configuration // Pass logs_receiver for injection into ITR node (if present) - eprintln!("DEBUG: run_pipeline_thread - logs_receiver is_some: {:?}", logs_receiver.is_some()); let logs_receiver_param = logs_receiver .map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); - eprintln!("DEBUG: run_pipeline_thread - logs_receiver_param is_some: {:?}", logs_receiver_param.is_some()); let runtime_pipeline = pipeline_factory .build(pipeline_context.clone(), pipeline_config.clone(), logs_receiver_param) .map_err(|e| { - eprintln!("DEBUG: run_pipeline_thread - build FAILED: {:?}", e); Error::PipelineRuntimeError { source: Box::new(e), } })?; - eprintln!("DEBUG: run_pipeline_thread - pipeline built for {:?}", pipeline_key); obs_evt_reporter.report(ObservedEvent::ready( pipeline_key.clone(), Some("Pipeline initialization successful.".to_owned()), )); - eprintln!("DEBUG: run_pipeline_thread - about to call run_forever for pipeline_key: {:?}", pipeline_key); - // Start the pipeline (this will use the current thread's Tokio runtime) runtime_pipeline .run_forever( @@ -652,12 +646,13 @@ impl Controller { // Build the runtime pipeline from the configuration // Pass logs_receiver for injection into ITR node - let logs_receiver_param = logs_receiver - .map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); + let logs_receiver_param = logs_receiver.map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); - let runtime_pipeline = match pipeline_factory - .build(pipeline_context.clone(), pipeline_config.clone(), logs_receiver_param) - { + let runtime_pipeline = match pipeline_factory.build( + pipeline_context.clone(), + pipeline_config.clone(), + logs_receiver_param, + ) { Ok(pipeline) => pipeline, Err(e) => { // Signal failure to parent thread with the actual error diff --git a/rust/otap-dataflow/crates/controller/src/thread_task.rs b/rust/otap-dataflow/crates/controller/src/thread_task.rs index f9cea278d7..e5a98a0257 100644 --- a/rust/otap-dataflow/crates/controller/src/thread_task.rs +++ b/rust/otap-dataflow/crates/controller/src/thread_task.rs @@ -66,7 +66,7 @@ impl Drop for ThreadLocalTaskHandle { // ToDo Replace this eprintln once we have selected a logging solution raw_error!( "Thread finished with an error during drop; error suppressed", - thread_name = &self.name, + thread_name = &self.name ); } Err(panic) => { @@ -75,7 +75,7 @@ impl Drop for ThreadLocalTaskHandle { raw_error!( "Thread panicked during drop; panic suppressed", thread_name = &self.name, - panicked = tracing::field::debug(panic), + panicked = ?panic ); } } diff --git a/rust/otap-dataflow/crates/engine/src/lib.rs b/rust/otap-dataflow/crates/engine/src/lib.rs index 3415b5dcd6..c2a635278b 100644 --- a/rust/otap-dataflow/crates/engine/src/lib.rs +++ b/rust/otap-dataflow/crates/engine/src/lib.rs @@ -351,9 +351,7 @@ impl PipelineFactory { // Inject logs receiver if this is the target node if let Some((target_urn, ref logs_rx)) = logs_receiver { - eprintln!("DEBUG: build - pipeline: {:?}, checking receiver URN: {} vs target: {}", pipeline_id, node_config.plugin_urn.as_ref(), target_urn); if node_config.plugin_urn.as_ref() == target_urn { - eprintln!("DEBUG: build - pipeline: {:?}, injecting logs_receiver into node!", pipeline_id); wrapper.set_logs_receiver(logs_rx.clone()); } } diff --git a/rust/otap-dataflow/crates/engine/src/local/receiver.rs b/rust/otap-dataflow/crates/engine/src/local/receiver.rs index 00ceb82bc1..71ff5e5c7a 100644 --- a/rust/otap-dataflow/crates/engine/src/local/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/local/receiver.rs @@ -334,7 +334,14 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); + let eh = EffectHandler::new( + test_node("recv"), + senders, + None, + ctrl_tx, + metrics_reporter, + None, + ); eh.send_message_to("b", 42).await.unwrap(); @@ -355,7 +362,14 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); + let eh = EffectHandler::new( + test_node("recv"), + senders, + None, + ctrl_tx, + metrics_reporter, + None, + ); eh.send_message(7).await.unwrap(); assert_eq!(rx.recv().await.unwrap(), 7); @@ -402,7 +416,14 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); + let eh = EffectHandler::new( + test_node("recv"), + senders, + None, + ctrl_tx, + metrics_reporter, + None, + ); let res = eh.send_message(5).await; assert!(res.is_err()); @@ -431,7 +452,14 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter, None); + let eh = EffectHandler::new( + test_node("recv"), + senders, + None, + ctrl_tx, + metrics_reporter, + None, + ); let ports: HashSet<_> = eh.connected_ports().into_iter().collect(); let expected: HashSet<_> = [Cow::from("a"), Cow::from("b")].into_iter().collect(); diff --git a/rust/otap-dataflow/crates/engine/src/receiver.rs b/rust/otap-dataflow/crates/engine/src/receiver.rs index 7f6db4db3d..a6969e345c 100644 --- a/rust/otap-dataflow/crates/engine/src/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/receiver.rs @@ -269,7 +269,6 @@ impl ReceiverWrapper { }, metrics_reporter, ) => { - eprintln!("DEBUG: ReceiverWrapper::start - Local, node_id: {:?}, logs_receiver.is_some(): {:?}", node_id, logs_receiver.is_some()); let msg_senders = if pdata_senders.is_empty() { return Err(Error::ReceiverError { receiver: node_id.clone(), @@ -387,14 +386,11 @@ impl ReceiverWrapper { /// This is used by the Internal Telemetry Receiver to receive logs /// from all threads via the logs channel. pub fn set_logs_receiver(&mut self, receiver: LogsReceiver) { - eprintln!("DEBUG: ReceiverWrapper::set_logs_receiver called"); match self { ReceiverWrapper::Local { logs_receiver, .. } => { - eprintln!("DEBUG: set_logs_receiver - Local variant"); *logs_receiver = Some(receiver); } ReceiverWrapper::Shared { logs_receiver, .. } => { - eprintln!("DEBUG: set_logs_receiver - Shared variant"); *logs_receiver = Some(receiver); } } diff --git a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs index 84797fa56f..691f213c67 100644 --- a/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs +++ b/rust/otap-dataflow/crates/engine/src/runtime_pipeline.rs @@ -111,8 +111,6 @@ impl RuntimePipeline { ) -> Result, Error> { use futures::stream::{FuturesUnordered, StreamExt}; - eprintln!("DEBUG: run_forever - pipeline_key: {:?}, num_receivers: {}", pipeline_key, self.receivers.len()); - let RuntimePipeline { config, receivers, diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs index eb0b40fdea..a4d4a2c087 100644 --- a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs @@ -13,11 +13,11 @@ //! ``` use chrono::{DateTime, Datelike, Timelike, Utc}; +use otap_df_pdata::OtlpProtoBytes; use otap_df_pdata::views::common::{AnyValueView, AttributeView, InstrumentationScopeView}; use otap_df_pdata::views::logs::{LogRecordView, LogsDataView, ResourceLogsView, ScopeLogsView}; use otap_df_pdata::views::otlp::bytes::logs::RawLogsData; use otap_df_pdata::views::resource::ResourceView; -use otap_df_pdata::OtlpProtoBytes; use std::io::{Cursor, Write}; /// Buffer size for formatting output. @@ -101,14 +101,14 @@ impl HierarchicalFormatter { } /// Format logs from a LogsDataView. - fn format_logs_data<'a, L: LogsDataView>(&self, logs_data: &'a L) { + fn format_logs_data(&self, logs_data: &'_ L) { for resource_logs in logs_data.resources() { self.format_resource_logs(&resource_logs); } } /// Format a ResourceLogs with its nested scopes. - fn format_resource_logs<'a, R: ResourceLogsView>(&self, resource_logs: &'a R) { + fn format_resource_logs(&self, resource_logs: &'_ R) { let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; let mut w = Cursor::new(buf.as_mut_slice()); @@ -148,7 +148,7 @@ impl HierarchicalFormatter { } /// Get the first timestamp from log records in a ResourceLogs. - fn get_first_log_timestamp<'a, R: ResourceLogsView>(&self, resource_logs: &'a R) -> u64 { + fn get_first_log_timestamp(&self, resource_logs: &'_ R) -> u64 { for scope_logs in resource_logs.scopes() { for log_record in scope_logs.log_records() { if let Some(ts) = log_record.time_unix_nano() { @@ -163,7 +163,7 @@ impl HierarchicalFormatter { } /// Format a ScopeLogs with its nested log records. - fn format_scope_logs<'a, S: ScopeLogsView>(&self, scope_logs: &'a S, is_last_scope: bool) { + fn format_scope_logs(&self, scope_logs: &'_ S, is_last_scope: bool) { let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; let mut w = Cursor::new(buf.as_mut_slice()); @@ -210,9 +210,9 @@ impl HierarchicalFormatter { } /// Format a single log record. - fn format_log_record<'a, L: LogRecordView>( + fn format_log_record( &self, - log_record: &'a L, + log_record: &'_ L, is_last_scope: bool, is_last_record: bool, ) { @@ -277,7 +277,7 @@ impl HierarchicalFormatter { self.write_any_value(&mut w, &v); } } - let _ = w.write_all(b"]"); + let _ = w.write_all(b"]"); } let _ = w.write_all(b"\n"); @@ -394,11 +394,7 @@ impl HierarchicalFormatter { } /// Write scope information. - fn write_scope_info( - &self, - w: &mut Cursor<&mut [u8]>, - scope: &S, - ) { + fn write_scope_info(&self, w: &mut Cursor<&mut [u8]>, scope: &S) { let _ = w.write_all(b"{"); let mut has_content = false; diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs index 0414062ff4..cd4820a422 100644 --- a/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/mod.rs @@ -18,8 +18,8 @@ use crate::OTAP_EXPORTER_FACTORIES; use crate::pdata::OtapPdata; use async_trait::async_trait; use linkme::distributed_slice; -use otap_df_config::node::NodeUserConfig; use otap_df_config::SignalType; +use otap_df_config::node::NodeUserConfig; use otap_df_engine::config::ExporterConfig; use otap_df_engine::context::PipelineContext; use otap_df_engine::control::{AckMsg, NodeControlMsg}; @@ -31,6 +31,7 @@ use otap_df_engine::node::NodeId; use otap_df_engine::terminal_state::TerminalState; use otap_df_engine::{ConsumerEffectHandlerExtension, ExporterFactory}; use otap_df_pdata::OtapPayload; +use otap_df_telemetry::raw_error; use std::sync::Arc; mod formatter; @@ -83,11 +84,9 @@ pub static CONSOLE_EXPORTER: ExporterFactory = ExporterFactory { node: NodeId, node_config: Arc, exporter_config: &ExporterConfig| { - let config: ConsoleExporterConfig = - serde_json::from_value(node_config.config.clone()).map_err(|e| { - otap_df_config::error::Error::InvalidUserConfig { - error: format!("Failed to parse console exporter config: {}", e), - } + let config: ConsoleExporterConfig = serde_json::from_value(node_config.config.clone()) + .map_err(|e| otap_df_config::error::Error::InvalidUserConfig { + error: format!("Failed to parse console exporter config: {}", e), })?; Ok(ExporterWrapper::local( ConsoleExporter::new(config), @@ -146,12 +145,12 @@ impl ConsoleExporter { fn export_traces(&self, _payload: &OtapPayload) { // TODO: Implement traces formatting - eprintln!("Console exporter: Traces formatting not yet implemented"); + raw_error!("Console exporter: Traces formatting not yet implemented"); } fn export_metrics(&self, _payload: &OtapPayload) { // TODO: Implement metrics formatting - eprintln!("Console exporter: Metrics formatting not yet implemented"); + raw_error!("Console exporter: Metrics formatting not yet implemented"); } } diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs index b827f38b9c..12ef848971 100644 --- a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -11,6 +11,7 @@ use crate::pdata::OtapPdata; use async_trait::async_trait; use linkme::distributed_slice; use otap_df_config::node::NodeUserConfig; +use otap_df_engine::ReceiverFactory; use otap_df_engine::config::ReceiverConfig; use otap_df_engine::context::PipelineContext; use otap_df_engine::control::NodeControlMsg; @@ -19,7 +20,6 @@ use otap_df_engine::local::receiver as local; use otap_df_engine::node::NodeId; use otap_df_engine::receiver::ReceiverWrapper; use otap_df_engine::terminal_state::TerminalState; -use otap_df_engine::ReceiverFactory; use otap_df_pdata::OtlpProtoBytes; use otap_df_telemetry::logs::{LogBatch, LogPayload}; use otap_df_telemetry::metrics::MetricSetSnapshot; @@ -74,12 +74,11 @@ impl InternalTelemetryReceiver { /// Create a receiver from a JSON configuration. pub fn from_config(config: &Value) -> Result { - let config: Config = - serde_json::from_value(config.clone()).map_err(|e| { - otap_df_config::error::Error::InvalidUserConfig { - error: e.to_string(), - } - })?; + let config: Config = serde_json::from_value(config.clone()).map_err(|e| { + otap_df_config::error::Error::InvalidUserConfig { + error: e.to_string(), + } + })?; Ok(Self::new(config)) } } @@ -161,7 +160,8 @@ impl InternalTelemetryReceiver { if !batch.records.is_empty() { let bytes = batch.encode_export_logs_request(); - let pdata = OtapPdata::new_todo_context(OtlpProtoBytes::ExportLogsRequest(bytes).into()); + let pdata = + OtapPdata::new_todo_context(OtlpProtoBytes::ExportLogsRequest(bytes).into()); effect_handler.send_message(pdata).await?; } Ok(()) diff --git a/rust/otap-dataflow/crates/state/src/reporter.rs b/rust/otap-dataflow/crates/state/src/reporter.rs index af0f552ecb..785b3bbe0b 100644 --- a/rust/otap-dataflow/crates/state/src/reporter.rs +++ b/rust/otap-dataflow/crates/state/src/reporter.rs @@ -31,13 +31,13 @@ impl ObservedEventReporter { Err(flume::SendTimeoutError::Timeout(event)) => { raw_error!( "Timeout sending observed event", - event = event + event = ?event ); } Err(flume::SendTimeoutError::Disconnected(event)) => { raw_error!( "Disconnected event observer", - event = event + event = ?event ); } Ok(_) => {} diff --git a/rust/otap-dataflow/crates/state/src/store.rs b/rust/otap-dataflow/crates/state/src/store.rs index 32fe24ce09..f82b189524 100644 --- a/rust/otap-dataflow/crates/state/src/store.rs +++ b/rust/otap-dataflow/crates/state/src/store.rs @@ -11,7 +11,7 @@ use crate::pipeline_rt_status::{ApplyOutcome, PipelineRuntimeStatus}; use crate::pipeline_status::PipelineStatus; use crate::reporter::ObservedEventReporter; use otap_df_config::pipeline::PipelineSettings; -use otap_df_telemetry::{otel_error, otel_warn, raw_error}; +use otap_df_telemetry::{otel_warn, raw_error}; use serde::{Serialize, Serializer}; use std::collections::HashMap; use std::sync::{Arc, Mutex}; @@ -108,15 +108,15 @@ impl ObservedStateStore { // mechanism (see previous todo). match &observed_event.r#type { EventType::Request(_) => { - otel_error!( + raw_error!( "request.event", - observed_event = tracing::field::debug(&observed_event) + observed_event = ?observed_event, ); } EventType::Error(_) => { - otel_error!( + raw_error!( "error.event", - observed_event = tracing::field::debug(&observed_event) + observed_event = ?observed_event, ); } EventType::Success(_) => { /* no console output for success events */ } @@ -162,7 +162,7 @@ impl ObservedStateStore { if let Err(e) = self.report(event) { raw_error!( "Error reporting observed event", - error = e.to_string(), + error = ?e, ); } } diff --git a/rust/otap-dataflow/crates/telemetry/Cargo.toml b/rust/otap-dataflow/crates/telemetry/Cargo.toml index 841846e0db..8be2d5a138 100644 --- a/rust/otap-dataflow/crates/telemetry/Cargo.toml +++ b/rust/otap-dataflow/crates/telemetry/Cargo.toml @@ -43,7 +43,6 @@ opentelemetry-otlp = { workspace = true, features = ["grpc-tonic", "tls", "metri opentelemetry-prometheus = { workspace = true } opentelemetry-appender-tracing = { workspace = true } tracing = { workspace = true } -tracing-core = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter","registry", "std", "fmt"] } [dev-dependencies] diff --git a/rust/otap-dataflow/crates/telemetry/src/internal_events.rs b/rust/otap-dataflow/crates/telemetry/src/internal_events.rs index 31ef90fd24..9225754974 100644 --- a/rust/otap-dataflow/crates/telemetry/src/internal_events.rs +++ b/rust/otap-dataflow/crates/telemetry/src/internal_events.rs @@ -12,8 +12,11 @@ #[doc(hidden)] pub mod _private { - pub use tracing::{debug, error, info, warn}; - pub use tracing_core; + pub use tracing::callsite::{Callsite, DefaultCallsite}; + pub use tracing::field::ValueSet; + pub use tracing::metadata::Kind; + pub use tracing::{Event, Level}; + pub use tracing::{callsite2, debug, error, info, valueset, warn}; } /// Macro for logging informational messages. @@ -138,73 +141,30 @@ where } /// Log an error message directly to stderr, bypassing the tracing dispatcher. -/// -/// This macro creates a real tracing Event with proper Metadata, then dispatches -/// it directly to `RawLoggingLayer::dispatch_event`, bypassing the global -/// dispatcher. This is safe to call from within tracing subscriber callbacks -/// (e.g., `on_event`) where using `tracing::subscriber::with_default` would -/// cause a RefCell panic. -/// -/// Output format matches the standard log format: -/// `2026-01-06T10:30:45.123Z ERROR target::name: message [key=value, ...]` +/// TODO: the way this is written it supports the full tracing syntax for +/// debug and display formatting of field values. The macros above should +/// be extended similarly. #[macro_export] macro_rules! raw_error { - ($name:expr $(,)?) => {{ + ($name:expr $(, $($fields:tt)*)?) => {{ use $crate::self_tracing::{ConsoleWriter, RawLoggingLayer}; - use $crate::_private::tracing_core::{Event, Metadata, Level, field::FieldSet, callsite::DefaultCallsite}; + use $crate::_private::Callsite; - static CALLSITE: DefaultCallsite = DefaultCallsite::new(&META); - static META: Metadata<'static> = Metadata::new( - $name, - env!("CARGO_PKG_NAME"), - Level::ERROR, - Some(file!()), - Some(line!()), - Some(env!("CARGO_PKG_NAME")), - FieldSet::new(&[], $crate::_private::tracing_core::callsite::Identifier(&CALLSITE)), - $crate::_private::tracing_core::metadata::Kind::EVENT, - ); + static __CALLSITE: $crate::_private::DefaultCallsite = $crate::_private::callsite2! { + name: $name, + kind: $crate::_private::Kind::EVENT, + target: module_path!(), + level: $crate::_private::Level::ERROR, + fields: $($($fields)*)? + }; + let meta = __CALLSITE.metadata(); let layer = RawLoggingLayer::new(ConsoleWriter::no_color()); - let valueset = META.fields().value_set(&[]); - let event = Event::new(&META, &valueset); - layer.dispatch_event(&event); - }}; - ($name:expr, $($key:ident = $value:expr),+ $(,)?) => {{ - use $crate::self_tracing::{ConsoleWriter, RawLoggingLayer}; - use $crate::_private::tracing_core::{Event, Metadata, Level, field::FieldSet, callsite::DefaultCallsite}; - - // Define field names as static strings - static FIELD_NAMES: &[&str] = &[$(stringify!($key)),+]; - - static CALLSITE: DefaultCallsite = DefaultCallsite::new(&META); - static META: Metadata<'static> = Metadata::new( - $name, - env!("CARGO_PKG_NAME"), - Level::ERROR, - Some(file!()), - Some(line!()), - Some(env!("CARGO_PKG_NAME")), - FieldSet::new(FIELD_NAMES, $crate::_private::tracing_core::callsite::Identifier(&CALLSITE)), - $crate::_private::tracing_core::metadata::Kind::EVENT, - ); - - let layer = RawLoggingLayer::new(ConsoleWriter::no_color()); - - // Bind values to extend their lifetimes - use Debug formatting - $( - let $key = format!("{:?}", $value); - )+ - // Create fixed-size array of field-value pairs (the repetition creates N elements) - let field_values = &[ - $(( - &META.fields().field(stringify!($key)).expect("field exists"), - Some(&$key as &dyn $crate::_private::tracing_core::field::Value) - )),+ - ]; - let valueset = META.fields().value_set(field_values); - let event = Event::new(&META, &valueset); - layer.dispatch_event(&event); + // Use closure to extend valueset lifetime (same pattern as tracing::event!) + (|valueset: $crate::_private::ValueSet<'_>| { + let event = $crate::_private::Event::new(meta, &valueset); + layer.dispatch_event(&event); + })($crate::_private::valueset!(meta.fields(), $($($fields)*)?)); }}; } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 67bb567943..64579f1a23 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -69,8 +69,7 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - ImmediateLayer, LogBatch, LogPayload, LogsCollector, LogsReceiver, LogsReporter, - TelemetrySetup, + ImmediateLayer, LogBatch, LogPayload, LogsCollector, LogsReceiver, LogsReporter, TelemetrySetup, }; // TODO This should be #[cfg(test)], but something is preventing it from working. @@ -169,6 +168,7 @@ impl Default for InternalTelemetrySystem { /// /// If `RUST_LOG` is set in the environment, it takes precedence for fine-grained control. /// Otherwise, falls back to the config level with known noisy dependencies (h2, hyper) silenced. +#[must_use] pub fn get_env_filter(level: LogLevel) -> EnvFilter { let level = match level { LogLevel::Off => LevelFilter::OFF, diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 7f0d077db0..ac027b77ce 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -276,7 +276,7 @@ where match self.reporter.try_report(LogPayload::Singleton(record)) { Ok(()) => {} Err(err) => { - crate::raw_error!("failed to send log", err = err.to_string()); + crate::raw_error!("failed to send log", err = %err); } } } @@ -347,23 +347,23 @@ impl TelemetrySetup { match self { TelemetrySetup::Noop => { let subscriber = tracing::subscriber::NoSubscriber::new(); - tracing::subscriber::with_default(subscriber, || f()) + tracing::subscriber::with_default(subscriber, f) } TelemetrySetup::Raw => { let subscriber = Registry::default() .with(filter) .with(RawLoggingLayer::new(ConsoleWriter::default())); - tracing::subscriber::with_default(subscriber, || f()) + tracing::subscriber::with_default(subscriber, f) } TelemetrySetup::Immediate { reporter } => { let layer = ImmediateLayer::new(reporter.clone()); let subscriber = Registry::default().with(filter).with(layer); - tracing::subscriber::with_default(subscriber, || f()) + tracing::subscriber::with_default(subscriber, f) } TelemetrySetup::OpenTelemetry { logger_provider } => { let sdk_layer = OpenTelemetryTracingBridge::new(logger_provider); let subscriber = Registry::default().with(filter).with(sdk_layer); - tracing::subscriber::with_default(subscriber, || f()) + tracing::subscriber::with_default(subscriber, f) } } } diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs index 3d1ae7272a..f0e54b0239 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs @@ -89,14 +89,12 @@ impl TelemetryRuntime { OutputMode::Direct => { // Direct mode: logs go to a collector that prints to console let (collector, reporter) = LogsCollector::new(config.reporting_channel_size); - eprintln!("DEBUG: TelemetryRuntime::new - Direct mode, no receiver"); (Some(reporter), None, Some(collector)) } OutputMode::Internal => { // Internal mode: logs go through channel to ITR node let (receiver, reporter) = LogsCollector::channel(config.reporting_channel_size); - eprintln!("DEBUG: TelemetryRuntime::new - Internal mode, receiver created"); (Some(reporter), Some(receiver), None) } OutputMode::Noop => (None, None, None), @@ -261,7 +259,6 @@ impl TelemetryRuntime { logs_reporter: Option<&LogsReporter>, logger_provider: Option<&SdkLoggerProvider>, ) -> Result { - match provider_mode { ProviderMode::Noop => Ok(TelemetrySetup::Noop), ProviderMode::Raw => Ok(TelemetrySetup::Raw), From 4c4b4bf228cf16ea700b2f8fdf4d42093748d20d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 10:32:49 -0800 Subject: [PATCH 82/92] ok save --- .../otap/src/console_exporter/formatter.rs | 301 +++---------- .../otap/src/internal_telemetry_receiver.rs | 8 +- .../crates/telemetry/src/self_tracing.rs | 2 +- .../telemetry/src/self_tracing/formatter.rs | 412 ++++++++++++------ 4 files changed, 329 insertions(+), 394 deletions(-) diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs index a4d4a2c087..bf6dea8b7e 100644 --- a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs @@ -3,7 +3,9 @@ //! Hierarchical formatter for OTLP data with tree-style output. //! -//! Output format: +//! This module uses the shared formatting primitives from `otap_df_telemetry::self_tracing` +//! to render OTLP log data in a hierarchical tree format: +//! //! ```text //! RESOURCE {service.name=my-service, host.name=localhost} //! │ SCOPE {name=my-library, version=1.0.0} @@ -12,38 +14,13 @@ //! │ └─ ERROR event_name: error message [code=500] //! ``` -use chrono::{DateTime, Datelike, Timelike, Utc}; -use otap_df_pdata::OtlpProtoBytes; -use otap_df_pdata::views::common::{AnyValueView, AttributeView, InstrumentationScopeView}; +use otap_df_pdata::views::common::{AttributeView, InstrumentationScopeView}; use otap_df_pdata::views::logs::{LogRecordView, LogsDataView, ResourceLogsView, ScopeLogsView}; use otap_df_pdata::views::otlp::bytes::logs::RawLogsData; use otap_df_pdata::views::resource::ResourceView; -use std::io::{Cursor, Write}; - -/// Buffer size for formatting output. -const OUTPUT_BUFFER_SIZE: usize = 8192; - -/// Log level derived from OTLP severity. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Level { - Trace, - Debug, - Info, - Warn, - Error, -} - -impl Level { - fn as_str(self) -> &'static str { - match self { - Level::Trace => "TRACE", - Level::Debug => "DEBUG", - Level::Info => "INFO", - Level::Warn => "WARN", - Level::Error => "ERROR", - } - } -} +use otap_df_pdata::OtlpProtoBytes; +use otap_df_telemetry::self_tracing::{AnsiCode, BufWriter, ConsoleWriter, LOG_BUFFER_SIZE}; +use std::io::Write; /// Tree drawing characters for Unicode mode. mod unicode_tree { @@ -61,24 +38,9 @@ mod ascii_tree { pub const SPACE: &str = " "; } -/// ANSI codes for colored output. -#[derive(Clone, Copy)] -#[repr(u8)] -enum AnsiCode { - Reset = 0, - Bold = 1, - Dim = 2, - Red = 31, - Green = 32, - Yellow = 33, - Blue = 34, - Magenta = 35, - Cyan = 36, -} - /// Hierarchical formatter for OTLP data. pub struct HierarchicalFormatter { - use_color: bool, + writer: ConsoleWriter, use_unicode: bool, } @@ -86,10 +48,12 @@ impl HierarchicalFormatter { /// Create a new hierarchical formatter. #[must_use] pub fn new(use_color: bool, use_unicode: bool) -> Self { - Self { - use_color, - use_unicode, - } + let writer = if use_color { + ConsoleWriter::color() + } else { + ConsoleWriter::no_color() + }; + Self { writer, use_unicode } } /// Format logs from OTLP bytes. @@ -109,21 +73,21 @@ impl HierarchicalFormatter { /// Format a ResourceLogs with its nested scopes. fn format_resource_logs(&self, resource_logs: &'_ R) { - let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; - let mut w = Cursor::new(buf.as_mut_slice()); + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let mut w = std::io::Cursor::new(buf.as_mut_slice()); // Get first timestamp from nested log records for the resource line let first_ts = self.get_first_log_timestamp(resource_logs); // Write resource header - self.write_ansi(&mut w, AnsiCode::Dim); - Self::write_timestamp(&mut w, first_ts); - self.write_ansi(&mut w, AnsiCode::Reset); + self.writer.write_ansi(&mut w, AnsiCode::Dim); + ConsoleWriter::write_timestamp(&mut w, first_ts); + self.writer.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); - self.write_ansi(&mut w, AnsiCode::Cyan); - self.write_ansi(&mut w, AnsiCode::Bold); + self.writer.write_ansi(&mut w, AnsiCode::Cyan); + self.writer.write_ansi(&mut w, AnsiCode::Bold); let _ = w.write_all(b"RESOURCE"); - self.write_ansi(&mut w, AnsiCode::Reset); + self.writer.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); // Write resource attributes @@ -164,8 +128,8 @@ impl HierarchicalFormatter { /// Format a ScopeLogs with its nested log records. fn format_scope_logs(&self, scope_logs: &'_ S, is_last_scope: bool) { - let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; - let mut w = Cursor::new(buf.as_mut_slice()); + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let mut w = std::io::Cursor::new(buf.as_mut_slice()); let tree = self.tree_chars(); @@ -178,14 +142,14 @@ impl HierarchicalFormatter { // Write scope header with tree prefix let _ = w.write_all(tree.vertical.as_bytes()); let _ = w.write_all(b" "); - self.write_ansi(&mut w, AnsiCode::Dim); - Self::write_timestamp(&mut w, first_ts); - self.write_ansi(&mut w, AnsiCode::Reset); + self.writer.write_ansi(&mut w, AnsiCode::Dim); + ConsoleWriter::write_timestamp(&mut w, first_ts); + self.writer.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); - self.write_ansi(&mut w, AnsiCode::Magenta); - self.write_ansi(&mut w, AnsiCode::Bold); + self.writer.write_ansi(&mut w, AnsiCode::Magenta); + self.writer.write_ansi(&mut w, AnsiCode::Bold); let _ = w.write_all(b"SCOPE"); - self.write_ansi(&mut w, AnsiCode::Reset); + self.writer.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); // Write scope info @@ -216,10 +180,11 @@ impl HierarchicalFormatter { is_last_scope: bool, is_last_record: bool, ) { - let mut buf = [0u8; OUTPUT_BUFFER_SIZE]; - let mut w = Cursor::new(buf.as_mut_slice()); + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let mut w = std::io::Cursor::new(buf.as_mut_slice()); let tree = self.tree_chars(); + let severity = log_record.severity_number(); // Tree prefix: vertical line for scope continuation, then branch for record let _ = w.write_all(tree.vertical.as_bytes()); @@ -236,55 +201,37 @@ impl HierarchicalFormatter { .time_unix_nano() .or_else(|| log_record.observed_time_unix_nano()) .unwrap_or(0); - self.write_ansi(&mut w, AnsiCode::Dim); - Self::write_timestamp(&mut w, ts); - self.write_ansi(&mut w, AnsiCode::Reset); + self.writer.write_ansi(&mut w, AnsiCode::Dim); + ConsoleWriter::write_timestamp(&mut w, ts); + self.writer.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); - // Level - let level = self.severity_to_level(log_record.severity_number()); - self.write_level(&mut w, level); - let _ = w.write_all(b" "); + // Level (using shared severity formatting) + self.writer.write_severity(&mut w, severity); // Event name - self.write_ansi(&mut w, AnsiCode::Bold); + self.writer.write_ansi(&mut w, AnsiCode::Bold); if let Some(name) = log_record.event_name() { let _ = w.write_all(name.as_ref()); } else { let _ = w.write_all(b"event"); } - self.write_ansi(&mut w, AnsiCode::Reset); + self.writer.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b": "); - // Body + // Body (using shared AnyValue formatting) if let Some(body) = log_record.body() { - self.write_any_value(&mut w, &body); + ConsoleWriter::write_any_value(&mut w, &body); } - // Attributes - let mut attrs = log_record.attributes().peekable(); - if attrs.peek().is_some() { - let _ = w.write_all(b" ["); - let mut first = true; - for attr in attrs { - if !first { - let _ = w.write_all(b", "); - } - first = false; - let _ = w.write_all(attr.key()); - let _ = w.write_all(b"="); - if let Some(v) = attr.value() { - self.write_any_value(&mut w, &v); - } - } - let _ = w.write_all(b"]"); - } + // Attributes (using shared attribute formatting) + ConsoleWriter::write_attrs(&mut w, log_record.attributes()); let _ = w.write_all(b"\n"); - // Print to stdout or stderr based on level + // Print to stdout or stderr based on severity let len = w.position() as usize; - if matches!(level, Level::Error | Level::Warn) { + if ConsoleWriter::severity_is_error_or_warn(severity) { let _ = std::io::stderr().write_all(&buf[..len]); } else { let _ = std::io::stdout().write_all(&buf[..len]); @@ -310,73 +257,8 @@ impl HierarchicalFormatter { } } - /// Write an ANSI escape code. - #[inline] - fn write_ansi(&self, w: &mut Cursor<&mut [u8]>, code: AnsiCode) { - if self.use_color { - let _ = write!(w, "\x1b[{}m", code as u8); - } - } - - /// Write a colored level indicator. - fn write_level(&self, w: &mut Cursor<&mut [u8]>, level: Level) { - let color = match level { - Level::Error => AnsiCode::Red, - Level::Warn => AnsiCode::Yellow, - Level::Info => AnsiCode::Green, - Level::Debug => AnsiCode::Blue, - Level::Trace => AnsiCode::Magenta, - }; - self.write_ansi(w, color); - let _ = w.write_all(level.as_str().as_bytes()); - self.write_ansi(w, AnsiCode::Reset); - // Pad to 5 chars - let padding = 5 - level.as_str().len(); - for _ in 0..padding { - let _ = w.write_all(b" "); - } - } - - /// Convert OTLP severity number to Level. - fn severity_to_level(&self, severity: Option) -> Level { - match severity { - Some(n) if n >= 17 => Level::Error, // FATAL, ERROR - Some(n) if n >= 13 => Level::Warn, // WARN - Some(n) if n >= 9 => Level::Info, // INFO - Some(n) if n >= 5 => Level::Debug, // DEBUG - Some(_) => Level::Trace, // TRACE - None => Level::Info, // Default to INFO - } - } - - /// Write timestamp in ISO 8601 format. - fn write_timestamp(w: &mut Cursor<&mut [u8]>, nanos: u64) { - let secs = (nanos / 1_000_000_000) as i64; - let subsec_nanos = (nanos % 1_000_000_000) as u32; - - if let Some(dt) = DateTime::::from_timestamp(secs, subsec_nanos) { - let date = dt.date_naive(); - let time = dt.time(); - let millis = subsec_nanos / 1_000_000; - - let _ = write!( - w, - "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z", - date.year(), - date.month(), - date.day(), - time.hour(), - time.minute(), - time.second(), - millis - ); - } else { - let _ = w.write_all(b""); - } - } - - /// Write resource attributes. - fn write_resource_attrs(&self, w: &mut Cursor<&mut [u8]>, resource: &R) { + /// Write resource attributes in `{key=value, ...}` format. + fn write_resource_attrs(&self, w: &mut BufWriter<'_>, resource: &R) { let _ = w.write_all(b"{"); let mut first = true; for attr in resource.attributes() { @@ -387,14 +269,14 @@ impl HierarchicalFormatter { let _ = w.write_all(attr.key()); let _ = w.write_all(b"="); if let Some(v) = attr.value() { - self.write_any_value(w, &v); + ConsoleWriter::write_any_value(w, &v); } } let _ = w.write_all(b"}"); } - /// Write scope information. - fn write_scope_info(&self, w: &mut Cursor<&mut [u8]>, scope: &S) { + /// Write scope information in `{name=..., version=..., ...}` format. + fn write_scope_info(&self, w: &mut BufWriter<'_>, scope: &S) { let _ = w.write_all(b"{"); let mut has_content = false; @@ -421,79 +303,13 @@ impl HierarchicalFormatter { let _ = w.write_all(attr.key()); let _ = w.write_all(b"="); if let Some(v) = attr.value() { - self.write_any_value(w, &v); + ConsoleWriter::write_any_value(w, &v); } has_content = true; } let _ = w.write_all(b"}"); } - - /// Write an AnyValue. - fn write_any_value<'a>(&self, w: &mut Cursor<&mut [u8]>, value: &impl AnyValueView<'a>) { - use otap_df_pdata::views::common::ValueType; - - match value.value_type() { - ValueType::String => { - if let Some(s) = value.as_string() { - let _ = w.write_all(s); - } - } - ValueType::Int64 => { - if let Some(i) = value.as_int64() { - let _ = write!(w, "{}", i); - } - } - ValueType::Bool => { - if let Some(b) = value.as_bool() { - let _ = w.write_all(if b { b"true" } else { b"false" }); - } - } - ValueType::Double => { - if let Some(d) = value.as_double() { - let _ = write!(w, "{:.6}", d); - } - } - ValueType::Bytes => { - if let Some(bytes) = value.as_bytes() { - let _ = write!(w, "<{} bytes>", bytes.len()); - } - } - ValueType::Array => { - let _ = w.write_all(b"["); - if let Some(array_iter) = value.as_array() { - let mut first = true; - for item in array_iter { - if !first { - let _ = w.write_all(b", "); - } - first = false; - self.write_any_value(w, &item); - } - } - let _ = w.write_all(b"]"); - } - ValueType::KeyValueList => { - let _ = w.write_all(b"{"); - if let Some(kvlist_iter) = value.as_kvlist() { - let mut first = true; - for kv in kvlist_iter { - if !first { - let _ = w.write_all(b", "); - } - first = false; - let _ = w.write_all(kv.key()); - if let Some(val) = kv.value() { - let _ = w.write_all(b"="); - self.write_any_value(w, &val); - } - } - } - let _ = w.write_all(b"}"); - } - ValueType::Empty => {} - } - } } /// Tree drawing characters. @@ -508,19 +324,6 @@ struct TreeChars { mod tests { use super::*; - #[test] - fn test_severity_to_level() { - let formatter = HierarchicalFormatter::new(false, true); - - assert_eq!(formatter.severity_to_level(Some(21)), Level::Error); // FATAL - assert_eq!(formatter.severity_to_level(Some(17)), Level::Error); // ERROR - assert_eq!(formatter.severity_to_level(Some(13)), Level::Warn); // WARN - assert_eq!(formatter.severity_to_level(Some(9)), Level::Info); // INFO - assert_eq!(formatter.severity_to_level(Some(5)), Level::Debug); // DEBUG - assert_eq!(formatter.severity_to_level(Some(1)), Level::Trace); // TRACE - assert_eq!(formatter.severity_to_level(None), Level::Info); // Default - } - #[test] fn test_tree_chars() { let unicode = HierarchicalFormatter::new(false, true); diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs index 12ef848971..be11e20b16 100644 --- a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -31,16 +31,10 @@ use std::sync::Arc; pub use otap_df_config::pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN; /// Configuration for the internal telemetry receiver. -#[derive(Clone, Deserialize, Serialize)] +#[derive(Clone, Deserialize, Serialize, Default)] #[serde(deny_unknown_fields)] pub struct Config {} -impl Default for Config { - fn default() -> Self { - Self {} - } -} - /// A receiver that consumes internal logs from the logging channel and emits OTLP logs. pub struct InternalTelemetryReceiver { #[allow(dead_code)] diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 9d03fd56c7..43d14001b3 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -18,7 +18,7 @@ use tracing::callsite::Identifier; use tracing::{Event, Level, Metadata}; pub use encoder::DirectLogRecordEncoder; -pub use formatter::{ConsoleWriter, RawLoggingLayer}; +pub use formatter::{AnsiCode, BufWriter, ColorMode, ConsoleWriter, RawLoggingLayer, LOG_BUFFER_SIZE}; /// A log record with structural metadata and pre-encoded body/attributes. #[derive(Debug, Clone)] diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs index ca818e38b1..d436bd0557 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/formatter.rs @@ -1,7 +1,16 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//! An alternative to Tokio fmt::layer(). +//! Log formatting primitives for console output. +//! +//! This module provides shared formatting infrastructure used by: +//! - `RawLoggingLayer`: Flat format for tracing events before OTLP pipeline is ready +//! - Console exporter: Hierarchical format for OTLP log data with tree structure +//! +//! The core abstraction is [`ConsoleWriter`] which provides methods for formatting +//! timestamps, levels, bodies, and attributes. The [`ConsoleWriter::format_log_line`] +//! method accepts a callback for customizing the "level" section, enabling different +//! output formats (flat vs hierarchical) while sharing all other formatting logic. use super::{LogRecord, SavedCallsite}; use bytes::Bytes; @@ -23,19 +32,29 @@ pub const LOG_BUFFER_SIZE: usize = 4096; /// ANSI codes a.k.a. "Select Graphic Rendition" codes. #[derive(Clone, Copy)] #[repr(u8)] -enum AnsiCode { +pub enum AnsiCode { + /// Reset all attributes. Reset = 0, + /// Bold text. Bold = 1, + /// Dim/faint text. Dim = 2, + /// Red foreground. Red = 31, + /// Green foreground. Green = 32, + /// Yellow foreground. Yellow = 33, + /// Blue foreground. Blue = 34, + /// Magenta foreground. Magenta = 35, + /// Cyan foreground. + Cyan = 36, } /// Color mode for console output. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ColorMode { /// Enable ANSI color codes. Color, @@ -43,37 +62,6 @@ pub enum ColorMode { NoColor, } -impl ColorMode { - /// Write an ANSI escape sequence (no-op for NoColor). - #[inline] - fn write_ansi(self, w: &mut BufWriter<'_>, code: AnsiCode) { - if let ColorMode::Color = self { - let _ = write!(w, "\x1b[{}m", code as u8); - } - } - - /// Write level with color and padding. - #[inline] - fn write_level(self, w: &mut BufWriter<'_>, level: &Level) { - self.write_ansi(w, Self::color(level)); - let _ = w.write_all(level.as_str().as_bytes()); - self.write_ansi(w, AnsiCode::Reset); - let _ = w.write_all(b" "); - } - - /// Get ANSI color code for a severity level. - #[inline] - fn color(level: &Level) -> AnsiCode { - match *level { - Level::ERROR => AnsiCode::Red, - Level::WARN => AnsiCode::Yellow, - Level::INFO => AnsiCode::Green, - Level::DEBUG => AnsiCode::Blue, - Level::TRACE => AnsiCode::Magenta, - } - } -} - /// Console writes formatted text to stdout or stderr. #[derive(Debug, Clone, Copy)] pub struct ConsoleWriter { @@ -121,65 +109,27 @@ impl ConsoleWriter { } } - /// Format a LogRecord as a human-readable string (for testing/compatibility). - /// - /// Output format: `2026-01-06T10:30:45.123Z INFO target::name (file.rs:42): body [attr=value, ...]` - pub fn format_log_record(&self, record: &LogRecord, callsite: &SavedCallsite) -> String { - let mut buf = [0u8; LOG_BUFFER_SIZE]; - let len = self.write_log_record(&mut buf, record, callsite); - // The buffer contains valid UTF-8 since we only write ASCII and valid UTF-8 strings - String::from_utf8_lossy(&buf[..len]).into_owned() - } - - /// Write a LogRecord to stdout or stderr (based on level). - /// - /// ERROR and WARN go to stderr, others go to stdout. - /// This is the same routing logic used by RawLoggingLayer. - pub fn raw_print(&self, record: &LogRecord, callsite: &SavedCallsite) { - let mut buf = [0u8; LOG_BUFFER_SIZE]; - let len = self.write_log_record(&mut buf, record, callsite); - self.write_line(callsite.level(), &buf[..len]); + /// Returns the color mode. + #[must_use] + pub fn color_mode(&self) -> ColorMode { + self.color_mode } - /// Write a LogRecord to a byte buffer. Returns the number of bytes written. - pub(crate) fn write_log_record( - &self, - buf: &mut [u8], - record: &LogRecord, - callsite: &SavedCallsite, - ) -> usize { - let mut w = Cursor::new(buf); - let cm = self.color_mode; - - cm.write_ansi(&mut w, AnsiCode::Dim); - Self::write_timestamp(&mut w, record.timestamp_ns); - cm.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b" "); - cm.write_level(&mut w, callsite.level()); - cm.write_ansi(&mut w, AnsiCode::Bold); - Self::write_event_name(&mut w, callsite); - cm.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b": "); - Self::write_body_attrs(&mut w, &record.body_attrs_bytes); - let _ = w.write_all(b"\n"); + // ======================================================================== + // Core formatting primitives - used by both flat and hierarchical formats + // ======================================================================== - w.position() as usize - } - - /// Write callsite details as event_name to buffer. + /// Write an ANSI escape sequence (no-op for NoColor mode). #[inline] - fn write_event_name(w: &mut BufWriter<'_>, callsite: &SavedCallsite) { - let _ = w.write_all(callsite.target().as_bytes()); - let _ = w.write_all(b"::"); - let _ = w.write_all(callsite.name().as_bytes()); - if let (Some(file), Some(line)) = (callsite.file(), callsite.line()) { - let _ = write!(w, " ({}:{})", file, line); + pub fn write_ansi(&self, w: &mut BufWriter<'_>, code: AnsiCode) { + if self.color_mode == ColorMode::Color { + let _ = write!(w, "\x1b[{}m", code as u8); } } /// Write nanosecond timestamp as ISO 8601 (UTC) to buffer. #[inline] - fn write_timestamp(w: &mut BufWriter<'_>, nanos: u64) { + pub fn write_timestamp(w: &mut BufWriter<'_>, nanos: u64) { let secs = (nanos / 1_000_000_000) as i64; let subsec_nanos = (nanos % 1_000_000_000) as u32; @@ -204,55 +154,76 @@ impl ConsoleWriter { } } - /// Write body+attrs bytes to buffer using LogRecordView. - fn write_body_attrs(w: &mut BufWriter<'_>, bytes: &Bytes) { - if bytes.is_empty() { - return; + /// Write a tracing level with color and padding. + /// + /// Format: `INFO ` (level string + padding to 6 chars total) + #[inline] + pub fn write_level(&self, w: &mut BufWriter<'_>, level: &Level) { + self.write_ansi(w, Self::level_color(level)); + let _ = w.write_all(level.as_str().as_bytes()); + self.write_ansi(w, AnsiCode::Reset); + // Pad to 6 chars total (longest is "ERROR" = 5, plus 1 space minimum) + let padding = 6 - level.as_str().len(); + for _ in 0..padding { + let _ = w.write_all(b" "); } + } - // A partial protobuf message (just body + attributes) is still a valid message. - // We can use the RawLogRecord view to access just the fields we encoded. - let record = RawLogRecord::new(bytes.as_ref()); + /// Get ANSI color code for a tracing level. + #[inline] + #[must_use] + pub fn level_color(level: &Level) -> AnsiCode { + match *level { + Level::ERROR => AnsiCode::Red, + Level::WARN => AnsiCode::Yellow, + Level::INFO => AnsiCode::Green, + Level::DEBUG => AnsiCode::Blue, + Level::TRACE => AnsiCode::Magenta, + } + } - // Write body if present - if let Some(body) = record.body() { - Self::write_any_value(w, &body); + /// Write OTLP severity with color and padding. + /// + /// Converts OTLP severity number to level string and writes with appropriate color. + /// Format: `INFO ` (level string + padding to 6 chars total) + #[inline] + pub fn write_severity(&self, w: &mut BufWriter<'_>, severity: Option) { + let (text, color) = Self::severity_to_text_and_color(severity); + self.write_ansi(w, color); + let _ = w.write_all(text.as_bytes()); + self.write_ansi(w, AnsiCode::Reset); + // Pad to 6 chars total (longest is "ERROR" = 5, plus 1 space minimum) + let padding = 6 - text.len(); + for _ in 0..padding { + let _ = w.write_all(b" "); } + } - // Write attributes if present - let mut attrs = record.attributes().peekable(); - if attrs.peek().is_some() { - let _ = w.write_all(b" ["); - let mut first = true; - for attr in attrs { - if Self::is_full(w) { - break; - } - if !first { - let _ = w.write_all(b", "); - } - first = false; - let _ = w.write_all(attr.key()); - let _ = w.write_all(b"="); - match attr.value() { - Some(v) => Self::write_any_value(w, &v), - None => { - let _ = w.write_all(b""); - } - } - } - let _ = w.write_all(b"]"); + /// Convert OTLP severity number to display text and ANSI color. + /// + /// See: + #[inline] + #[must_use] + pub fn severity_to_text_and_color(severity: Option) -> (&'static str, AnsiCode) { + match severity { + Some(n) if n >= 17 => ("ERROR", AnsiCode::Red), // FATAL, ERROR + Some(n) if n >= 13 => ("WARN", AnsiCode::Yellow), // WARN + Some(n) if n >= 9 => ("INFO", AnsiCode::Green), // INFO + Some(n) if n >= 5 => ("DEBUG", AnsiCode::Blue), // DEBUG + Some(_) => ("TRACE", AnsiCode::Magenta), // TRACE + None => ("INFO", AnsiCode::Green), // Default to INFO } } - /// Check if the buffer is full (position >= capacity). + /// Check if OTLP severity indicates error or warning (for stderr routing). #[inline] - fn is_full(w: &BufWriter<'_>) -> bool { - w.position() as usize >= w.get_ref().len() + #[must_use] + pub fn severity_is_error_or_warn(severity: Option) -> bool { + matches!(severity, Some(n) if n >= 13) } /// Write an AnyValue to buffer. - fn write_any_value<'a>(w: &mut BufWriter<'_>, value: &impl AnyValueView<'a>) { + pub fn write_any_value<'a>(w: &mut BufWriter<'_>, value: &impl AnyValueView<'a>) { match value.value_type() { ValueType::String => { if let Some(s) = value.as_string() { @@ -276,14 +247,7 @@ impl ConsoleWriter { } ValueType::Bytes => { if let Some(bytes) = value.as_bytes() { - let _ = w.write_all(b"["); - for (i, b) in bytes.iter().enumerate() { - if i > 0 { - let _ = w.write_all(b", "); - } - let _ = write!(w, "{}", b); - } - let _ = w.write_all(b"]"); + let _ = write!(w, "<{} bytes>", bytes.len()); } } ValueType::Array => { @@ -322,8 +286,50 @@ impl ConsoleWriter { } } - /// Write a log line to stdout or stderr. - pub(crate) fn write_line(&self, level: &Level, data: &[u8]) { + /// Write attributes in `[key=value, ...]` format. + /// + /// Writes nothing if the iterator is empty. + pub fn write_attrs(w: &mut BufWriter<'_>, attrs: I) + where + A: AttributeView, + I: Iterator, + { + let mut attrs = attrs.peekable(); + if attrs.peek().is_some() { + let _ = w.write_all(b" ["); + let mut first = true; + for attr in attrs { + if Self::is_full(w) { + break; + } + if !first { + let _ = w.write_all(b", "); + } + first = false; + let _ = w.write_all(attr.key()); + let _ = w.write_all(b"="); + match attr.value() { + Some(v) => Self::write_any_value(w, &v), + None => { + let _ = w.write_all(b""); + } + } + } + let _ = w.write_all(b"]"); + } + } + + /// Check if the buffer is full (position >= capacity). + #[inline] + #[must_use] + pub fn is_full(w: &BufWriter<'_>) -> bool { + w.position() as usize >= w.get_ref().len() + } + + /// Write a log line to stdout or stderr based on level. + /// + /// ERROR and WARN go to stderr, others go to stdout. + pub fn write_output(&self, level: &Level, data: &[u8]) { let use_stderr = matches!(*level, Level::ERROR | Level::WARN); let _ = if use_stderr { std::io::stderr().write_all(data) @@ -332,6 +338,139 @@ impl ConsoleWriter { }; } + // ======================================================================== + // Generic log line formatting with customizable level section + // ======================================================================== + + /// Format a log line from a `LogRecordView` with customizable level formatting. + /// + /// This is the core formatting method used by both: + /// - Flat format (tracing events): callback writes `INFO ` with color + /// - Hierarchical format (OTLP): callback writes tree chars + `RESOURCE`/`SCOPE`/level + /// + /// Output format: ` : [attrs]` + /// + /// # Arguments + /// * `w` - Buffer to write to + /// * `timestamp_ns` - Timestamp in nanoseconds since UNIX epoch + /// * `event_name` - Event name to display (e.g., "target::name" or "v1.Resource") + /// * `record` - LogRecordView providing body() and attributes() + /// * `format_level` - Callback to format the level section; receives (writer, console_writer) + pub fn format_log_line( + &self, + w: &mut BufWriter<'_>, + timestamp_ns: u64, + event_name: &str, + record: &V, + format_level: F, + ) where + V: LogRecordView, + F: FnOnce(&mut BufWriter<'_>, &Self), + { + // Dim timestamp + self.write_ansi(w, AnsiCode::Dim); + Self::write_timestamp(w, timestamp_ns); + self.write_ansi(w, AnsiCode::Reset); + let _ = w.write_all(b" "); + + // Level section (delegated to callback) + format_level(w, self); + + // Bold event name + self.write_ansi(w, AnsiCode::Bold); + let _ = w.write_all(event_name.as_bytes()); + self.write_ansi(w, AnsiCode::Reset); + let _ = w.write_all(b": "); + + // Body + if let Some(body) = record.body() { + Self::write_any_value(w, &body); + } + + // Attributes + Self::write_attrs(w, record.attributes()); + + let _ = w.write_all(b"\n"); + } + + // ======================================================================== + // Tracing-specific methods (for RawLoggingLayer compatibility) + // ======================================================================== + + /// Format a LogRecord as a human-readable string (for testing/compatibility). + /// + /// Output format: `2026-01-06T10:30:45.123Z INFO target::name (file.rs:42): body [attr=value, ...]` + pub fn format_log_record(&self, record: &LogRecord, callsite: &SavedCallsite) -> String { + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let len = self.write_log_record(&mut buf, record, callsite); + // The buffer contains valid UTF-8 since we only write ASCII and valid UTF-8 strings + String::from_utf8_lossy(&buf[..len]).into_owned() + } + + /// Write a LogRecord to stdout or stderr (based on level). + /// + /// ERROR and WARN go to stderr, others go to stdout. + /// This is the same routing logic used by RawLoggingLayer. + pub fn raw_print(&self, record: &LogRecord, callsite: &SavedCallsite) { + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let len = self.write_log_record(&mut buf, record, callsite); + self.write_output(callsite.level(), &buf[..len]); + } + + /// Write a LogRecord to a byte buffer. Returns the number of bytes written. + pub(crate) fn write_log_record( + &self, + buf: &mut [u8], + record: &LogRecord, + callsite: &SavedCallsite, + ) -> usize { + let mut w = Cursor::new(buf); + + self.write_ansi(&mut w, AnsiCode::Dim); + Self::write_timestamp(&mut w, record.timestamp_ns); + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + self.write_level(&mut w, callsite.level()); + self.write_ansi(&mut w, AnsiCode::Bold); + Self::write_event_name(&mut w, callsite); + self.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b": "); + Self::write_body_attrs(&mut w, &record.body_attrs_bytes); + let _ = w.write_all(b"\n"); + + w.position() as usize + } + + /// Write callsite details as event_name to buffer. + #[inline] + fn write_event_name(w: &mut BufWriter<'_>, callsite: &SavedCallsite) { + let _ = w.write_all(callsite.target().as_bytes()); + let _ = w.write_all(b"::"); + let _ = w.write_all(callsite.name().as_bytes()); + if let (Some(file), Some(line)) = (callsite.file(), callsite.line()) { + let _ = write!(w, " ({}:{})", file, line); + } + } + + /// Write body+attrs bytes to buffer using LogRecordView. + fn write_body_attrs(w: &mut BufWriter<'_>, bytes: &Bytes) { + if bytes.is_empty() { + return; + } + + // A partial protobuf message (just body + attributes) is still a valid message. + // We can use the RawLogRecord view to access just the fields we encoded. + let record = RawLogRecord::new(bytes.as_ref()); + + // Write body if present + if let Some(body) = record.body() { + Self::write_any_value(w, &body); + } + + // Write attributes if present + Self::write_attrs(w, record.attributes()); + } + /// Write a raw error message directly to stderr, bypassing tracing entirely. /// /// This method is safe to call from within tracing subscriber callbacks @@ -345,7 +484,6 @@ impl ConsoleWriter { let mut buf = [0u8; LOG_BUFFER_SIZE]; let mut w = Cursor::new(buf.as_mut_slice()); - let cm = self.color_mode; // Timestamp let nanos = SystemTime::now() @@ -353,20 +491,20 @@ impl ConsoleWriter { .unwrap_or_default() .as_nanos() as u64; - cm.write_ansi(&mut w, AnsiCode::Dim); + self.write_ansi(&mut w, AnsiCode::Dim); Self::write_timestamp(&mut w, nanos); - cm.write_ansi(&mut w, AnsiCode::Reset); + self.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b" "); // Level (always ERROR for raw_write_error) - cm.write_level(&mut w, &Level::ERROR); + self.write_level(&mut w, &Level::ERROR); // Event name (target::name) - cm.write_ansi(&mut w, AnsiCode::Bold); + self.write_ansi(&mut w, AnsiCode::Bold); let _ = w.write_all(target.as_bytes()); let _ = w.write_all(b"::"); let _ = w.write_all(name.as_bytes()); - cm.write_ansi(&mut w, AnsiCode::Reset); + self.write_ansi(&mut w, AnsiCode::Reset); let _ = w.write_all(b": "); // Message body From c4a7e8a2ffe9f63bd743718d47519fb49130783a Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 12:04:18 -0800 Subject: [PATCH 83/92] space --- .../configs/internal-telemetry.yaml | 16 +- .../otap/src/console_exporter/formatter.rs | 672 +++++++++++++----- 2 files changed, 504 insertions(+), 184 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 5bf47fc0af..066809ed17 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -43,9 +43,23 @@ internal: out_ports: out_port: destinations: - - console + - batch dispatch_strategy: round_robin config: {} + batch: + kind: processor + plugin_urn: "urn:otel:batch:processor" + out_ports: + out_port: + destinations: + - console + dispatch_strategy: round_robin + config: + otap: + min_size: 1000 + sizer: items + flush_timeout: 3s + format: otlp console: kind: exporter plugin_urn: "urn:otel:console:exporter" diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs index bf6dea8b7e..87c02bc35c 100644 --- a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs @@ -3,22 +3,24 @@ //! Hierarchical formatter for OTLP data with tree-style output. //! -//! This module uses the shared formatting primitives from `otap_df_telemetry::self_tracing` -//! to render OTLP log data in a hierarchical tree format: +//! This module uses the shared `format_log_line` from `otap_df_telemetry::self_tracing` +//! to render OTLP log data in a hierarchical tree format. The tree structure is +//! inserted via the level-formatting callback, keeping timestamp left-aligned: //! //! ```text -//! RESOURCE {service.name=my-service, host.name=localhost} -//! │ SCOPE {name=my-library, version=1.0.0} -//! │ ├─ INFO event_name: message body [attr=value, ...] -//! │ ├─ WARN event_name: warning message -//! │ └─ ERROR event_name: error message [code=500] +//! 2026-01-14T18:29:09.645Z RESOURCE v1.Resource: [service.name=my-service] +//! 2026-01-14T18:29:09.645Z │ SCOPE v1.InstrumentationScope: [name=my-lib] +//! 2026-01-14T18:29:09.645Z │ └─ DEBUG event_name: body [attr=value] //! ``` -use otap_df_pdata::views::common::{AttributeView, InstrumentationScopeView}; +use otap_df_pdata::OtlpProtoBytes; +use otap_df_pdata::schema::{SpanId, TraceId}; +use otap_df_pdata::views::common::{ + AnyValueView, AttributeView, InstrumentationScopeView, Str, ValueType, +}; use otap_df_pdata::views::logs::{LogRecordView, LogsDataView, ResourceLogsView, ScopeLogsView}; use otap_df_pdata::views::otlp::bytes::logs::RawLogsData; use otap_df_pdata::views::resource::ResourceView; -use otap_df_pdata::OtlpProtoBytes; use otap_df_telemetry::self_tracing::{AnsiCode, BufWriter, ConsoleWriter, LOG_BUFFER_SIZE}; use std::io::Write; @@ -27,7 +29,6 @@ mod unicode_tree { pub const VERTICAL: &str = "│"; pub const TEE: &str = "├─"; pub const CORNER: &str = "└─"; - pub const SPACE: &str = " "; } /// Tree drawing characters for ASCII mode. @@ -35,13 +36,38 @@ mod ascii_tree { pub const VERTICAL: &str = "|"; pub const TEE: &str = "+-"; pub const CORNER: &str = "\\-"; - pub const SPACE: &str = " "; +} + +/// Tree drawing characters. +#[derive(Clone, Copy)] +struct TreeChars { + vertical: &'static str, + tee: &'static str, + corner: &'static str, +} + +impl TreeChars { + fn unicode() -> Self { + Self { + vertical: unicode_tree::VERTICAL, + tee: unicode_tree::TEE, + corner: unicode_tree::CORNER, + } + } + + fn ascii() -> Self { + Self { + vertical: ascii_tree::VERTICAL, + tee: ascii_tree::TEE, + corner: ascii_tree::CORNER, + } + } } /// Hierarchical formatter for OTLP data. pub struct HierarchicalFormatter { writer: ConsoleWriter, - use_unicode: bool, + tree: TreeChars, } impl HierarchicalFormatter { @@ -53,7 +79,12 @@ impl HierarchicalFormatter { } else { ConsoleWriter::no_color() }; - Self { writer, use_unicode } + let tree = if use_unicode { + TreeChars::unicode() + } else { + TreeChars::ascii() + }; + Self { writer, tree } } /// Format logs from OTLP bytes. @@ -65,54 +96,64 @@ impl HierarchicalFormatter { } /// Format logs from a LogsDataView. - fn format_logs_data(&self, logs_data: &'_ L) { + fn format_logs_data(&self, logs_data: &L) { for resource_logs in logs_data.resources() { self.format_resource_logs(&resource_logs); } } /// Format a ResourceLogs with its nested scopes. - fn format_resource_logs(&self, resource_logs: &'_ R) { + fn format_resource_logs(&self, resource_logs: &R) { + // Get first timestamp from nested log records + let first_ts = self.get_first_log_timestamp(resource_logs); + + // Always format resource line (even if empty) for consistent tree structure + match resource_logs.resource() { + Some(resource) => { + let view = ResourceLogView::new(&resource); + self.print_line(first_ts, "v1.Resource", &view, |w, cw| { + cw.write_ansi(w, AnsiCode::Cyan); + cw.write_ansi(w, AnsiCode::Bold); + let _ = w.write_all(b"RESOURCE"); + cw.write_ansi(w, AnsiCode::Reset); + let _ = w.write_all(b" "); + }); + } + None => { + self.print_resource_line(first_ts); + } + } + + // Format each scope + let scopes: Vec<_> = resource_logs.scopes().collect(); + let scope_count = scopes.len(); + for (i, scope_logs) in scopes.into_iter().enumerate() { + let is_last_scope = i == scope_count - 1; + self.format_scope_logs(&scope_logs, is_last_scope); + } + } + + /// Print a resource line with no attributes (used when resource is None). + fn print_resource_line(&self, timestamp_ns: u64) { let mut buf = [0u8; LOG_BUFFER_SIZE]; let mut w = std::io::Cursor::new(buf.as_mut_slice()); - // Get first timestamp from nested log records for the resource line - let first_ts = self.get_first_log_timestamp(resource_logs); - - // Write resource header self.writer.write_ansi(&mut w, AnsiCode::Dim); - ConsoleWriter::write_timestamp(&mut w, first_ts); + ConsoleWriter::write_timestamp(&mut w, timestamp_ns); self.writer.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b" "); + let _ = w.write_all(b" "); self.writer.write_ansi(&mut w, AnsiCode::Cyan); self.writer.write_ansi(&mut w, AnsiCode::Bold); let _ = w.write_all(b"RESOURCE"); self.writer.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b" "); + let _ = w.write_all(b" v1.Resource:\n"); - // Write resource attributes - if let Some(resource) = resource_logs.resource() { - self.write_resource_attrs(&mut w, &resource); - } else { - let _ = w.write_all(b"{}"); - } - let _ = w.write_all(b"\n"); - - // Print resource line let len = w.position() as usize; let _ = std::io::stdout().write_all(&buf[..len]); - - // Format each scope - let scopes: Vec<_> = resource_logs.scopes().collect(); - let scope_count = scopes.len(); - for (i, scope_logs) in scopes.into_iter().enumerate() { - let is_last_scope = i == scope_count - 1; - self.format_scope_logs(&scope_logs, is_last_scope); - } } /// Get the first timestamp from log records in a ResourceLogs. - fn get_first_log_timestamp(&self, resource_logs: &'_ R) -> u64 { + fn get_first_log_timestamp(&self, resource_logs: &R) -> u64 { for scope_logs in resource_logs.scopes() { for log_record in scope_logs.log_records() { if let Some(ts) = log_record.time_unix_nano() { @@ -127,197 +168,462 @@ impl HierarchicalFormatter { } /// Format a ScopeLogs with its nested log records. - fn format_scope_logs(&self, scope_logs: &'_ S, is_last_scope: bool) { - let mut buf = [0u8; LOG_BUFFER_SIZE]; - let mut w = std::io::Cursor::new(buf.as_mut_slice()); - - let tree = self.tree_chars(); - - // Get first timestamp from log records for the scope line + fn format_scope_logs(&self, scope_logs: &S, is_last_scope: bool) { + // Get first timestamp from log records let first_ts = scope_logs .log_records() .find_map(|lr| lr.time_unix_nano().or_else(|| lr.observed_time_unix_nano())) .unwrap_or(0); - // Write scope header with tree prefix - let _ = w.write_all(tree.vertical.as_bytes()); - let _ = w.write_all(b" "); + // Always format scope line (even if empty) for consistent tree structure + match scope_logs.scope() { + Some(scope) => { + let view = ScopeLogView::new(&scope); + let tree = self.tree; + self.print_line(first_ts, "v1.InstrumentationScope", &view, |w, cw| { + let _ = w.write_all(tree.vertical.as_bytes()); + let _ = w.write_all(b" "); + cw.write_ansi(w, AnsiCode::Magenta); + cw.write_ansi(w, AnsiCode::Bold); + let _ = w.write_all(b"SCOPE"); + cw.write_ansi(w, AnsiCode::Reset); + let _ = w.write_all(b" "); + }); + } + None => { + self.print_scope_line(first_ts); + } + } + + // Format each log record + let records: Vec<_> = scope_logs.log_records().collect(); + let record_count = records.len(); + for (i, log_record) in records.into_iter().enumerate() { + let is_last_record = i == record_count - 1; + self.format_log_record(&log_record, is_last_scope, is_last_record); + } + } + + /// Print a scope line with no attributes (used when scope is None). + fn print_scope_line(&self, timestamp_ns: u64) { + let mut buf = [0u8; LOG_BUFFER_SIZE]; + let mut w = std::io::Cursor::new(buf.as_mut_slice()); + self.writer.write_ansi(&mut w, AnsiCode::Dim); - ConsoleWriter::write_timestamp(&mut w, first_ts); + ConsoleWriter::write_timestamp(&mut w, timestamp_ns); self.writer.write_ansi(&mut w, AnsiCode::Reset); + let _ = w.write_all(b" "); + let _ = w.write_all(self.tree.vertical.as_bytes()); let _ = w.write_all(b" "); self.writer.write_ansi(&mut w, AnsiCode::Magenta); self.writer.write_ansi(&mut w, AnsiCode::Bold); let _ = w.write_all(b"SCOPE"); self.writer.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b" "); + let _ = w.write_all(b" v1.InstrumentationScope:\n"); - // Write scope info - if let Some(scope) = scope_logs.scope() { - self.write_scope_info(&mut w, &scope); - } else { - let _ = w.write_all(b"{}"); - } - let _ = w.write_all(b"\n"); - - // Print scope line let len = w.position() as usize; let _ = std::io::stdout().write_all(&buf[..len]); - - // Format each log record - let records: Vec<_> = scope_logs.log_records().collect(); - let record_count = records.len(); - for (i, log_record) in records.into_iter().enumerate() { - let is_last_record = i == record_count - 1; - self.format_log_record(&log_record, is_last_scope, is_last_record); - } } - /// Format a single log record. + /// Format a single log record using format_log_line. fn format_log_record( &self, - log_record: &'_ L, + log_record: &L, is_last_scope: bool, is_last_record: bool, ) { + let ts = log_record + .time_unix_nano() + .or_else(|| log_record.observed_time_unix_nano()) + .unwrap_or(0); + + let event_name = log_record + .event_name() + .map(|s| String::from_utf8_lossy(s).into_owned()) + .unwrap_or_else(|| "event".to_string()); + + let severity = log_record.severity_number(); + let tree = self.tree; + + self.print_line(ts, &event_name, log_record, |w, cw| { + // Tree prefix + let _ = w.write_all(tree.vertical.as_bytes()); + let _ = w.write_all(b" "); + if is_last_record && is_last_scope { + let _ = w.write_all(tree.corner.as_bytes()); + } else { + let _ = w.write_all(tree.tee.as_bytes()); + } + let _ = w.write_all(b" "); + // Severity with color + cw.write_severity(w, severity); + }); + } + + /// Print a line using the shared format_log_line. + fn print_line(&self, timestamp_ns: u64, event_name: &str, record: &V, format_level: F) + where + V: LogRecordView, + F: FnOnce(&mut BufWriter<'_>, &ConsoleWriter), + { let mut buf = [0u8; LOG_BUFFER_SIZE]; let mut w = std::io::Cursor::new(buf.as_mut_slice()); - let tree = self.tree_chars(); - let severity = log_record.severity_number(); + self.writer + .format_log_line(&mut w, timestamp_ns, event_name, record, format_level); - // Tree prefix: vertical line for scope continuation, then branch for record - let _ = w.write_all(tree.vertical.as_bytes()); - let _ = w.write_all(b" "); - if is_last_record && is_last_scope { - let _ = w.write_all(tree.corner.as_bytes()); - } else { - let _ = w.write_all(tree.tee.as_bytes()); - } - let _ = w.write_all(b" "); + let len = w.position() as usize; + let _ = std::io::stdout().write_all(&buf[..len]); + } +} - // Timestamp - let ts = log_record - .time_unix_nano() - .or_else(|| log_record.observed_time_unix_nano()) - .unwrap_or(0); - self.writer.write_ansi(&mut w, AnsiCode::Dim); - ConsoleWriter::write_timestamp(&mut w, ts); - self.writer.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b" "); +// ============================================================================ +// View adapters that present Resource and Scope as LogRecordView +// ============================================================================ - // Level (using shared severity formatting) - self.writer.write_severity(&mut w, severity); +/// A view adapter that presents a Resource as a LogRecordView. +/// +/// The resource attributes become the log record's attributes. +/// Body is empty, severity is ignored. +struct ResourceLogView<'a, R: ResourceView> { + resource: &'a R, +} - // Event name - self.writer.write_ansi(&mut w, AnsiCode::Bold); - if let Some(name) = log_record.event_name() { - let _ = w.write_all(name.as_ref()); - } else { - let _ = w.write_all(b"event"); +impl<'a, R: ResourceView> ResourceLogView<'a, R> { + fn new(resource: &'a R) -> Self { + Self { resource } + } +} + +impl LogRecordView for ResourceLogView<'_, R> { + type Attribute<'att> + = R::Attribute<'att> + where + Self: 'att; + type AttributeIter<'att> + = R::AttributesIter<'att> + where + Self: 'att; + type Body<'bod> + = EmptyAnyValue + where + Self: 'bod; + + fn time_unix_nano(&self) -> Option { + None + } + fn observed_time_unix_nano(&self) -> Option { + None + } + fn severity_number(&self) -> Option { + None + } + fn severity_text(&self) -> Option> { + None + } + fn body(&self) -> Option> { + None + } + fn attributes(&self) -> Self::AttributeIter<'_> { + self.resource.attributes() + } + fn dropped_attributes_count(&self) -> u32 { + 0 + } + fn flags(&self) -> Option { + None + } + fn trace_id(&self) -> Option<&TraceId> { + None + } + fn span_id(&self) -> Option<&SpanId> { + None + } + fn event_name(&self) -> Option> { + None + } +} + +/// A view adapter that presents an InstrumentationScope as a LogRecordView. +/// +/// The scope's name, version, and attributes are merged into the attributes iterator. +struct ScopeLogView<'a, S: InstrumentationScopeView> { + scope: &'a S, +} + +impl<'a, S: InstrumentationScopeView> ScopeLogView<'a, S> { + fn new(scope: &'a S) -> Self { + Self { scope } + } +} + +impl LogRecordView for ScopeLogView<'_, S> { + type Attribute<'att> + = ScopeAttribute<'att, S> + where + Self: 'att; + type AttributeIter<'att> + = ScopeAttributeIter<'att, S> + where + Self: 'att; + type Body<'bod> + = EmptyAnyValue + where + Self: 'bod; + + fn time_unix_nano(&self) -> Option { + None + } + fn observed_time_unix_nano(&self) -> Option { + None + } + fn severity_number(&self) -> Option { + None + } + fn severity_text(&self) -> Option> { + None + } + fn body(&self) -> Option> { + None + } + fn attributes(&self) -> Self::AttributeIter<'_> { + ScopeAttributeIter::new(self.scope) + } + fn dropped_attributes_count(&self) -> u32 { + 0 + } + fn flags(&self) -> Option { + None + } + fn trace_id(&self) -> Option<&TraceId> { + None + } + fn span_id(&self) -> Option<&SpanId> { + None + } + fn event_name(&self) -> Option> { + None + } +} + +/// Iterator that yields scope name, version, and attributes as a unified attribute stream. +struct ScopeAttributeIter<'a, S: InstrumentationScopeView> { + scope: &'a S, + phase: ScopeIterPhase<'a, S>, +} + +enum ScopeIterPhase<'a, S: InstrumentationScopeView + 'a> { + Name, + Version, + Attributes(S::AttributeIter<'a>), + Done, +} + +impl<'a, S: InstrumentationScopeView + 'a> ScopeAttributeIter<'a, S> { + fn new(scope: &'a S) -> Self { + Self { + scope, + phase: ScopeIterPhase::Name, } - self.writer.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b": "); + } +} - // Body (using shared AnyValue formatting) - if let Some(body) = log_record.body() { - ConsoleWriter::write_any_value(&mut w, &body); +impl<'a, S: InstrumentationScopeView + 'a> Iterator for ScopeAttributeIter<'a, S> { + type Item = ScopeAttribute<'a, S>; + + fn next(&mut self) -> Option { + loop { + match &mut self.phase { + ScopeIterPhase::Name => { + if let Some(name) = self.scope.name() { + self.phase = ScopeIterPhase::Version; + return Some(ScopeAttribute::Name(name)); + } + self.phase = ScopeIterPhase::Version; + } + ScopeIterPhase::Version => { + if let Some(version) = self.scope.version() { + self.phase = ScopeIterPhase::Attributes(self.scope.attributes()); + return Some(ScopeAttribute::Version(version)); + } + self.phase = ScopeIterPhase::Attributes(self.scope.attributes()); + } + ScopeIterPhase::Attributes(iter) => { + if let Some(attr) = iter.next() { + return Some(ScopeAttribute::Attr(attr)); + } + self.phase = ScopeIterPhase::Done; + return None; + } + ScopeIterPhase::Done => return None, + } } + } +} - // Attributes (using shared attribute formatting) - ConsoleWriter::write_attrs(&mut w, log_record.attributes()); +/// A synthetic attribute for scope name/version or a real scope attribute. +enum ScopeAttribute<'a, S: InstrumentationScopeView + 'a> { + Name(Str<'a>), + Version(Str<'a>), + Attr(S::Attribute<'a>), +} - let _ = w.write_all(b"\n"); +impl<'a, S: InstrumentationScopeView + 'a> AttributeView for ScopeAttribute<'a, S> { + type Val<'val> + = ScopeAttributeValue<'a, 'val, S> + where + Self: 'val; + + fn key(&self) -> Str<'_> { + match self { + ScopeAttribute::Name(_) => b"name", + ScopeAttribute::Version(_) => b"version", + ScopeAttribute::Attr(a) => a.key(), + } + } - // Print to stdout or stderr based on severity - let len = w.position() as usize; - if ConsoleWriter::severity_is_error_or_warn(severity) { - let _ = std::io::stderr().write_all(&buf[..len]); - } else { - let _ = std::io::stdout().write_all(&buf[..len]); + fn value(&self) -> Option> { + match self { + ScopeAttribute::Name(s) => Some(ScopeAttributeValue::String(s)), + ScopeAttribute::Version(s) => Some(ScopeAttributeValue::String(s)), + ScopeAttribute::Attr(a) => a.value().map(ScopeAttributeValue::Delegated), } } +} - /// Get tree drawing characters based on mode. - fn tree_chars(&self) -> TreeChars { - if self.use_unicode { - TreeChars { - vertical: unicode_tree::VERTICAL, - tee: unicode_tree::TEE, - corner: unicode_tree::CORNER, - _space: unicode_tree::SPACE, - } - } else { - TreeChars { - vertical: ascii_tree::VERTICAL, - tee: ascii_tree::TEE, - corner: ascii_tree::CORNER, - _space: ascii_tree::SPACE, - } +/// Value type for scope attributes - either a string (name/version) or delegated. +/// 'a is the lifetime of the underlying data +/// 'val is the borrow lifetime for the value call +enum ScopeAttributeValue<'a, 'val, S: InstrumentationScopeView + 'a> +where + S::Attribute<'a>: 'val, +{ + String(Str<'a>), + Delegated( as AttributeView>::Val<'val>), +} + +impl<'a, 'val, S: InstrumentationScopeView + 'a> AnyValueView<'val> + for ScopeAttributeValue<'a, 'val, S> +where + 'a: 'val, +{ + type KeyValue = EmptyAttribute; + type ArrayIter<'arr> + = std::iter::Empty + where + Self: 'arr; + type KeyValueIter<'kv> + = std::iter::Empty + where + Self: 'kv; + + fn value_type(&self) -> ValueType { + match self { + ScopeAttributeValue::String(_) => ValueType::String, + ScopeAttributeValue::Delegated(v) => v.value_type(), } } - /// Write resource attributes in `{key=value, ...}` format. - fn write_resource_attrs(&self, w: &mut BufWriter<'_>, resource: &R) { - let _ = w.write_all(b"{"); - let mut first = true; - for attr in resource.attributes() { - if !first { - let _ = w.write_all(b", "); - } - first = false; - let _ = w.write_all(attr.key()); - let _ = w.write_all(b"="); - if let Some(v) = attr.value() { - ConsoleWriter::write_any_value(w, &v); - } + fn as_string(&self) -> Option> { + match self { + ScopeAttributeValue::String(s) => Some(s), + ScopeAttributeValue::Delegated(v) => v.as_string(), } - let _ = w.write_all(b"}"); } - /// Write scope information in `{name=..., version=..., ...}` format. - fn write_scope_info(&self, w: &mut BufWriter<'_>, scope: &S) { - let _ = w.write_all(b"{"); - let mut has_content = false; + fn as_int64(&self) -> Option { + match self { + ScopeAttributeValue::String(_) => None, + ScopeAttributeValue::Delegated(v) => v.as_int64(), + } + } - if let Some(name) = scope.name() { - let _ = w.write_all(b"name="); - let _ = w.write_all(name.as_ref()); - has_content = true; + fn as_bool(&self) -> Option { + match self { + ScopeAttributeValue::String(_) => None, + ScopeAttributeValue::Delegated(v) => v.as_bool(), } + } - if let Some(version) = scope.version() { - if has_content { - let _ = w.write_all(b", "); - } - let _ = w.write_all(b"version="); - let _ = w.write_all(version.as_ref()); - has_content = true; + fn as_double(&self) -> Option { + match self { + ScopeAttributeValue::String(_) => None, + ScopeAttributeValue::Delegated(v) => v.as_double(), } + } - // Include scope attributes - for attr in scope.attributes() { - if has_content { - let _ = w.write_all(b", "); - } - let _ = w.write_all(attr.key()); - let _ = w.write_all(b"="); - if let Some(v) = attr.value() { - ConsoleWriter::write_any_value(w, &v); - } - has_content = true; + fn as_bytes(&self) -> Option<&[u8]> { + match self { + ScopeAttributeValue::String(_) => None, + ScopeAttributeValue::Delegated(v) => v.as_bytes(), } + } - let _ = w.write_all(b"}"); + fn as_array(&self) -> Option> { + None + } + + fn as_kvlist(&self) -> Option> { + None } } -/// Tree drawing characters. -struct TreeChars { - vertical: &'static str, - tee: &'static str, - corner: &'static str, - _space: &'static str, +/// An empty AnyValue (used as placeholder for body). +struct EmptyAnyValue; + +impl<'a> AnyValueView<'a> for EmptyAnyValue { + type KeyValue = EmptyAttribute; + type ArrayIter<'arr> + = std::iter::Empty + where + Self: 'arr; + type KeyValueIter<'kv> + = std::iter::Empty + where + Self: 'kv; + + fn value_type(&self) -> ValueType { + ValueType::Empty + } + fn as_string(&self) -> Option> { + None + } + fn as_int64(&self) -> Option { + None + } + fn as_bool(&self) -> Option { + None + } + fn as_double(&self) -> Option { + None + } + fn as_bytes(&self) -> Option<&[u8]> { + None + } + fn as_array(&self) -> Option> { + None + } + fn as_kvlist(&self) -> Option> { + None + } +} + +/// An empty attribute (used as placeholder). +struct EmptyAttribute; + +impl AttributeView for EmptyAttribute { + type Val<'val> + = EmptyAnyValue + where + Self: 'val; + + fn key(&self) -> Str<'_> { + b"" + } + + fn value(&self) -> Option> { + None + } } #[cfg(test)] @@ -326,13 +632,13 @@ mod tests { #[test] fn test_tree_chars() { - let unicode = HierarchicalFormatter::new(false, true); - let ascii = HierarchicalFormatter::new(false, false); + let unicode = TreeChars::unicode(); + let ascii = TreeChars::ascii(); - assert_eq!(unicode.tree_chars().vertical, "│"); - assert_eq!(unicode.tree_chars().tee, "├─"); + assert_eq!(unicode.vertical, "│"); + assert_eq!(unicode.tee, "├─"); - assert_eq!(ascii.tree_chars().vertical, "|"); - assert_eq!(ascii.tree_chars().tee, "+-"); + assert_eq!(ascii.vertical, "|"); + assert_eq!(ascii.tee, "+-"); } } From 1feacae2ee4603525539c8deacda19f56aa4fa64 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 12:28:29 -0800 Subject: [PATCH 84/92] resource --- .../configs/internal-telemetry.yaml | 3 + .../crates/controller/src/lib.rs | 34 +++---- rust/otap-dataflow/crates/engine/Cargo.toml | 1 + rust/otap-dataflow/crates/engine/src/lib.rs | 15 +-- .../crates/engine/src/local/receiver.rs | 18 ++++ .../crates/engine/src/receiver.rs | 49 ++++++++++ .../otap/src/internal_telemetry_receiver.rs | 7 +- .../otap-dataflow/crates/telemetry/src/lib.rs | 1 + .../crates/telemetry/src/logs.rs | 93 ++++++++++++++++++- 9 files changed, 194 insertions(+), 27 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 066809ed17..bac6df3ab4 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -74,3 +74,6 @@ service: engine: immediate internal: noop # Avoid feedback in internal pipeline output: internal + resource: + service.id: 1234 + service.name: test diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 65742000c1..413e616f84 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -33,11 +33,12 @@ use otap_df_engine::control::{ PipelineCtrlMsgReceiver, PipelineCtrlMsgSender, pipeline_ctrl_msg_channel, }; use otap_df_engine::error::{Error as EngineError, error_summary_from}; +use otap_df_engine::receiver::InternalTelemetrySettings; use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; -use otap_df_telemetry::logs::TelemetrySetup; +use otap_df_telemetry::logs::{TelemetrySetup, encode_resource_bytes}; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::telemetry_runtime::TelemetryRuntime; use otap_df_telemetry::{InternalTelemetrySystem, otel_info, otel_info_span, otel_warn}; @@ -113,6 +114,9 @@ impl Controller { // Get logs receiver for Internal output mode (passed to internal pipeline) let mut logs_receiver = telemetry_runtime.take_logs_receiver(); + // Pre-encode resource bytes once for all log batches + let resource_bytes = encode_resource_bytes(&telemetry_config.resource); + let metrics_system = InternalTelemetrySystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -155,8 +159,14 @@ impl Controller { let internal_pipeline_thread = if let Some(internal_config) = pipeline.extract_internal_config() { - // TODO: this is a bunch of placeholder values! - let internal_logs_receiver = logs_receiver.take(); + // Create internal telemetry settings if we have a logs receiver + let internal_telemetry_settings = logs_receiver.take().map(|rx| { + InternalTelemetrySettings { + target_urn: INTERNAL_TELEMETRY_RECEIVER_URN, + logs_receiver: rx, + resource_bytes: resource_bytes.clone(), + } + }); let internal_factory = self.pipeline_factory; let internal_pipeline_id: PipelineId = "internal".into(); let internal_pipeline_key = DeployedPipelineKey { @@ -199,7 +209,7 @@ impl Controller { internal_metrics_reporter, internal_telemetry_setup, log_level, // TODO: separate log level for internal pipeline. - internal_logs_receiver, + internal_telemetry_settings, internal_ctrl_tx, internal_ctrl_rx, startup_tx, @@ -277,7 +287,6 @@ impl Controller { ); let metrics_reporter = metrics_reporter.clone(); let telemetry_setup = engine_telemetry_setup.clone(); - let logs_receiver = logs_receiver.clone(); let thread_name = format!("pipeline-core-{}", core_id.id); let obs_evt_reporter = obs_evt_reporter.clone(); @@ -294,7 +303,6 @@ impl Controller { metrics_reporter, telemetry_setup, log_level, - logs_receiver, pipeline_ctrl_msg_tx, pipeline_ctrl_msg_rx, ) @@ -550,7 +558,6 @@ impl Controller { metrics_reporter: MetricsReporter, telemetry_setup: TelemetrySetup, log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, - logs_receiver: Option, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, ) -> Result, Error> { @@ -577,11 +584,9 @@ impl Controller { )); // Build the runtime pipeline from the configuration - // Pass logs_receiver for injection into ITR node (if present) - let logs_receiver_param = logs_receiver - .map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); + // Regular pipelines don't need ITR injection - that's only for the internal pipeline let runtime_pipeline = pipeline_factory - .build(pipeline_context.clone(), pipeline_config.clone(), logs_receiver_param) + .build(pipeline_context.clone(), pipeline_config.clone(), None) .map_err(|e| { Error::PipelineRuntimeError { source: Box::new(e), @@ -626,7 +631,7 @@ impl Controller { metrics_reporter: MetricsReporter, telemetry_setup: TelemetrySetup, log_level: otap_df_config::pipeline::service::telemetry::logs::LogLevel, - logs_receiver: Option, + internal_telemetry: Option, pipeline_ctrl_msg_tx: PipelineCtrlMsgSender, pipeline_ctrl_msg_rx: PipelineCtrlMsgReceiver, startup_tx: std_mpsc::Sender>, @@ -645,13 +650,10 @@ impl Controller { )); // Build the runtime pipeline from the configuration - // Pass logs_receiver for injection into ITR node - let logs_receiver_param = logs_receiver.map(|rx| (INTERNAL_TELEMETRY_RECEIVER_URN, rx)); - let runtime_pipeline = match pipeline_factory.build( pipeline_context.clone(), pipeline_config.clone(), - logs_receiver_param, + internal_telemetry, ) { Ok(pipeline) => pipeline, Err(e) => { diff --git a/rust/otap-dataflow/crates/engine/Cargo.toml b/rust/otap-dataflow/crates/engine/Cargo.toml index 8d46abfb00..2eafccf98a 100644 --- a/rust/otap-dataflow/crates/engine/Cargo.toml +++ b/rust/otap-dataflow/crates/engine/Cargo.toml @@ -41,6 +41,7 @@ uuid = { workspace = true } once_cell = { workspace = true } data-encoding = { workspace = true } prost = { workspace = true } +bytes = { workspace = true } libmimalloc-sys = { workspace = true } byte-unit = { workspace = true } cpu-time = { workspace = true } diff --git a/rust/otap-dataflow/crates/engine/src/lib.rs b/rust/otap-dataflow/crates/engine/src/lib.rs index c2a635278b..8c5624bbca 100644 --- a/rust/otap-dataflow/crates/engine/src/lib.rs +++ b/rust/otap-dataflow/crates/engine/src/lib.rs @@ -297,13 +297,13 @@ impl PipelineFactory { /// - `pipeline_ctx`: The pipeline context for this build. /// - `config`: The pipeline configuration. /// - `logs_receiver`: Optional tuple of (URN, receiver) for internal logs channel. - /// When provided, the receiver is injected into any receiver node matching the URN, - /// enabling collection of logs from all threads via the channel. + /// When provided, the receiver and resource bytes are injected into any receiver + /// node matching the URN, enabling collection of logs from all threads via the channel. pub fn build( self: &PipelineFactory, pipeline_ctx: PipelineContext, config: PipelineConfig, - logs_receiver: Option<(&str, receiver::LogsReceiver)>, + internal_telemetry: Option, ) -> Result, Error> { let mut receivers = Vec::new(); let mut processors = Vec::new(); @@ -349,10 +349,11 @@ impl PipelineFactory { node_config.clone(), )?; - // Inject logs receiver if this is the target node - if let Some((target_urn, ref logs_rx)) = logs_receiver { - if node_config.plugin_urn.as_ref() == target_urn { - wrapper.set_logs_receiver(logs_rx.clone()); + // Inject internal telemetry settings if this is the target node + if let Some(ref settings) = internal_telemetry { + if node_config.plugin_urn.as_ref() == settings.target_urn { + wrapper.set_logs_receiver(settings.logs_receiver.clone()); + wrapper.set_resource_bytes(settings.resource_bytes.clone()); } } diff --git a/rust/otap-dataflow/crates/engine/src/local/receiver.rs b/rust/otap-dataflow/crates/engine/src/local/receiver.rs index 71ff5e5c7a..6a4fe6e3ec 100644 --- a/rust/otap-dataflow/crates/engine/src/local/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/local/receiver.rs @@ -135,6 +135,8 @@ pub struct EffectHandler { default_sender: Option>, /// Receiver for internal logs (for internal telemetry receiver). logs_receiver: Option, + /// Pre-encoded resource bytes for OTLP log encoding (for internal telemetry receiver). + resource_bytes: Option, } /// Implementation for the `!Send` effect handler. @@ -148,6 +150,7 @@ impl EffectHandler { node_request_sender: PipelineCtrlMsgSender, metrics_reporter: MetricsReporter, logs_receiver: Option, + resource_bytes: Option, ) -> Self { let mut core = EffectHandlerCore::new(node_id, metrics_reporter); core.set_pipeline_ctrl_msg_sender(node_request_sender); @@ -166,6 +169,7 @@ impl EffectHandler { msg_senders, default_sender, logs_receiver, + resource_bytes, } } @@ -178,6 +182,15 @@ impl EffectHandler { self.logs_receiver.as_ref() } + /// Returns the pre-encoded resource bytes, if configured. + /// + /// This is used by the Internal Telemetry Receiver to include resource + /// attributes in the OTLP log encoding. + #[must_use] + pub fn resource_bytes(&self) -> Option<&bytes::Bytes> { + self.resource_bytes.as_ref() + } + /// Returns the id of the receiver associated with this handler. #[must_use] pub fn receiver_id(&self) -> NodeId { @@ -341,6 +354,7 @@ mod tests { ctrl_tx, metrics_reporter, None, + None, ); eh.send_message_to("b", 42).await.unwrap(); @@ -369,6 +383,7 @@ mod tests { ctrl_tx, metrics_reporter, None, + None, ); eh.send_message(7).await.unwrap(); @@ -393,6 +408,7 @@ mod tests { ctrl_tx, metrics_reporter, None, + None, ); eh.send_message(11).await.unwrap(); @@ -423,6 +439,7 @@ mod tests { ctrl_tx, metrics_reporter, None, + None, ); let res = eh.send_message(5).await; @@ -459,6 +476,7 @@ mod tests { ctrl_tx, metrics_reporter, None, + None, ); let ports: HashSet<_> = eh.connected_ports().into_iter().collect(); diff --git a/rust/otap-dataflow/crates/engine/src/receiver.rs b/rust/otap-dataflow/crates/engine/src/receiver.rs index a6969e345c..bdc1f7e928 100644 --- a/rust/otap-dataflow/crates/engine/src/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/receiver.rs @@ -19,6 +19,7 @@ use crate::node::{Node, NodeId, NodeWithPDataSender}; use crate::shared::message::{SharedReceiver, SharedSender}; use crate::shared::receiver as shared; use crate::terminal_state::TerminalState; +use bytes::Bytes; use otap_df_channel::error::SendError; use otap_df_channel::mpsc; use otap_df_config::PortName; @@ -30,6 +31,19 @@ use std::sync::Arc; /// Type alias for the internal logs receiver channel. pub type LogsReceiver = otap_df_telemetry::LogsReceiver; +/// Runtime settings for internal telemetry injection into a receiver. +/// +/// This struct bundles the logs receiver channel and pre-encoded resource bytes +/// that should be injected into the Internal Telemetry Receiver node. +pub struct InternalTelemetrySettings { + /// The URN of the receiver to inject into. + pub target_urn: &'static str, + /// The logs receiver channel. + pub logs_receiver: LogsReceiver, + /// Pre-encoded resource bytes for OTLP log encoding. + pub resource_bytes: Bytes, +} + /// A wrapper for the receiver that allows for both `Send` and `!Send` receivers. /// /// Note: This is useful for creating a single interface for the receiver regardless of their @@ -55,6 +69,8 @@ pub enum ReceiverWrapper { pdata_receiver: Option>, /// Receiver for internal logs (for internal telemetry receiver). logs_receiver: Option, + /// Pre-encoded resource bytes for internal telemetry (for internal telemetry receiver). + resource_bytes: Option, }, /// A receiver with a `Send` implementation. Shared { @@ -76,6 +92,8 @@ pub enum ReceiverWrapper { pdata_receiver: Option>, /// Receiver for internal logs (for internal telemetry receiver). logs_receiver: Option, + /// Pre-encoded resource bytes for internal telemetry (for internal telemetry receiver). + resource_bytes: Option, }, } @@ -116,6 +134,7 @@ impl ReceiverWrapper { pdata_senders: HashMap::new(), pdata_receiver: None, logs_receiver: None, + resource_bytes: None, } } @@ -142,6 +161,7 @@ impl ReceiverWrapper { pdata_senders: HashMap::new(), pdata_receiver: None, logs_receiver: None, + resource_bytes: None, } } @@ -165,6 +185,7 @@ impl ReceiverWrapper { pdata_senders, pdata_receiver, logs_receiver, + resource_bytes, } => { let channel_id = control_channel_id(&node_id); let control_sender = match control_sender.into_mpsc() { @@ -199,6 +220,7 @@ impl ReceiverWrapper { pdata_senders, pdata_receiver, logs_receiver, + resource_bytes, } } ReceiverWrapper::Shared { @@ -211,6 +233,7 @@ impl ReceiverWrapper { pdata_senders, pdata_receiver, logs_receiver, + resource_bytes, } => { let channel_id = control_channel_id(&node_id); let control_sender = match control_sender.into_mpsc() { @@ -245,6 +268,7 @@ impl ReceiverWrapper { pdata_senders, pdata_receiver, logs_receiver, + resource_bytes, } } } @@ -265,6 +289,7 @@ impl ReceiverWrapper { pdata_senders, user_config, logs_receiver, + resource_bytes, .. }, metrics_reporter, @@ -288,6 +313,7 @@ impl ReceiverWrapper { pipeline_ctrl_msg_tx, metrics_reporter, logs_receiver, + resource_bytes, ); receiver.start(ctrl_msg_chan, effect_handler).await } @@ -403,6 +429,29 @@ impl ReceiverWrapper { ReceiverWrapper::Shared { logs_receiver, .. } => logs_receiver.take(), } } + + /// Set the pre-encoded resource bytes for internal telemetry. + /// + /// This is used by the Internal Telemetry Receiver to include resource + /// attributes in the encoded OTLP log messages. + pub fn set_resource_bytes(&mut self, bytes: Bytes) { + match self { + ReceiverWrapper::Local { resource_bytes, .. } => { + *resource_bytes = Some(bytes); + } + ReceiverWrapper::Shared { resource_bytes, .. } => { + *resource_bytes = Some(bytes); + } + } + } + + /// Take the pre-encoded resource bytes, if set. + pub fn take_resource_bytes(&mut self) -> Option { + match self { + ReceiverWrapper::Local { resource_bytes, .. } => resource_bytes.take(), + ReceiverWrapper::Shared { resource_bytes, .. } => resource_bytes.take(), + } + } } impl NodeWithPDataSender for ReceiverWrapper { diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs index be11e20b16..f63030fc97 100644 --- a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -153,7 +153,12 @@ impl InternalTelemetryReceiver { }; if !batch.records.is_empty() { - let bytes = batch.encode_export_logs_request(); + // Use resource bytes if available, otherwise encode without resource + let bytes = if let Some(resource_bytes) = effect_handler.resource_bytes() { + batch.encode_export_logs_request_with_resource(resource_bytes) + } else { + batch.encode_export_logs_request() + }; let pdata = OtapPdata::new_todo_context(OtlpProtoBytes::ExportLogsRequest(bytes).into()); effect_handler.send_message(pdata).await?; diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 64579f1a23..d9a0aab3e3 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -70,6 +70,7 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ ImmediateLayer, LogBatch, LogPayload, LogsCollector, LogsReceiver, LogsReporter, TelemetrySetup, + encode_resource_bytes, }; // TODO This should be #[cfg(test)], but something is preventing it from working. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index ac027b77ce..58b4fa7024 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -11,11 +11,19 @@ use bytes::Bytes; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_sdk::logs::SdkLoggerProvider; use otap_df_config::pipeline::service::telemetry::logs::LogLevel; +use otap_df_config::pipeline::service::telemetry::AttributeValue; use otap_df_pdata::otlp::ProtoBuffer; +use otap_df_pdata::proto::consts::field_num::common::{ + ANY_VALUE_BOOL_VALUE, ANY_VALUE_DOUBLE_VALUE, ANY_VALUE_INT_VALUE, ANY_VALUE_STRING_VALUE, + KEY_VALUE_KEY, KEY_VALUE_VALUE, +}; use otap_df_pdata::proto::consts::field_num::logs::{ - LOGS_DATA_RESOURCE, RESOURCE_LOGS_SCOPE_LOGS, SCOPE_LOGS_LOG_RECORDS, + LOGS_DATA_RESOURCE, RESOURCE_LOGS_RESOURCE, RESOURCE_LOGS_SCOPE_LOGS, SCOPE_LOGS_LOG_RECORDS, }; +use otap_df_pdata::proto::consts::field_num::resource::RESOURCE_ATTRIBUTES; +use otap_df_pdata::proto::consts::wire_types; use otap_df_pdata::proto_encode_len_delimited_unknown_size; +use std::collections::HashMap; use tracing::{Event, Subscriber}; use tracing_subscriber::Registry; use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; @@ -29,6 +37,74 @@ pub struct LogBatch { pub dropped_count: usize, } +/// Pre-encode resource bytes for use in OTLP log messages. +/// +/// This encodes the resource attributes into a protobuf fragment that can be +/// inserted directly into ResourceLogs messages. The returned bytes include +/// the `RESOURCE_LOGS_RESOURCE` field tag and length-delimited Resource message. +/// +/// This is a one-time operation at startup; the resulting bytes can be reused +/// for all subsequent log batches. +#[must_use] +pub fn encode_resource_bytes(resource_attributes: &HashMap) -> Bytes { + if resource_attributes.is_empty() { + return Bytes::new(); + } + + let mut buf = ProtoBuffer::with_capacity(resource_attributes.len() * 64); + + // Encode: field 1 (RESOURCE_LOGS_RESOURCE) -> Resource message + proto_encode_len_delimited_unknown_size!( + RESOURCE_LOGS_RESOURCE, + { + // Resource { attributes: [ KeyValue, ... ] } + for (key, value) in resource_attributes { + encode_resource_attribute(&mut buf, key, value); + } + }, + &mut buf + ); + + buf.into_bytes() +} + +/// Encode a single resource attribute as a KeyValue message. +fn encode_resource_attribute(buf: &mut ProtoBuffer, key: &str, value: &AttributeValue) { + proto_encode_len_delimited_unknown_size!( + RESOURCE_ATTRIBUTES, + { + buf.encode_string(KEY_VALUE_KEY, key); + proto_encode_len_delimited_unknown_size!( + KEY_VALUE_VALUE, + { + match value { + AttributeValue::String(s) => { + buf.encode_string(ANY_VALUE_STRING_VALUE, s); + } + AttributeValue::Bool(b) => { + buf.encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); + buf.encode_varint(u64::from(*b)); + } + AttributeValue::I64(i) => { + buf.encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); + buf.encode_varint(*i as u64); + } + AttributeValue::F64(f) => { + buf.encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); + buf.extend_from_slice(&f.to_le_bytes()); + } + AttributeValue::Array(_) => { + // Arrays not supported for resource attributes + } + } + }, + buf + ); + }, + buf + ); +} + impl LogBatch { /// The total size including dropped records. #[must_use] @@ -44,14 +120,25 @@ impl LogBatch { /// - All log records from the batch #[must_use] pub fn encode_export_logs_request(&self) -> Bytes { - let mut buf = ProtoBuffer::with_capacity(self.records.len() * 256); + self.encode_export_logs_request_with_resource(&Bytes::new()) + } + + /// Encode this batch as an OTLP ExportLogsServiceRequest with pre-encoded resource. + /// + /// The `resource_bytes` should be pre-encoded using [`encode_resource_bytes`]. + /// This allows efficient reuse of the same resource for all log batches. + #[must_use] + pub fn encode_export_logs_request_with_resource(&self, resource_bytes: &Bytes) -> Bytes { + let mut buf = ProtoBuffer::with_capacity(self.records.len() * 256 + resource_bytes.len()); // ExportLogsServiceRequest { resource_logs: [ ResourceLogs { ... } ] } proto_encode_len_delimited_unknown_size!( LOGS_DATA_RESOURCE, // field 1: resource_logs (same field number) { + // Insert pre-encoded resource (field 1: resource) + buf.extend_from_slice(resource_bytes); + // ResourceLogs { scope_logs: [ ScopeLogs { ... } ] } - // Note: we skip resource (field 1) to use empty/default resource proto_encode_len_delimited_unknown_size!( RESOURCE_LOGS_SCOPE_LOGS, // field 2: scope_logs { From 63b9edc99dc925f5748353afa8cdf856100618fc Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 12:38:56 -0800 Subject: [PATCH 85/92] simpler --- .../configs/internal-telemetry.yaml | 2 +- .../crates/engine/src/local/receiver.rs | 32 +++++++++++-------- .../crates/engine/src/receiver.rs | 11 +++++-- .../otap/src/console_exporter/formatter.rs | 2 +- .../crates/otap/src/syslog_cef_receiver.rs | 2 -- 5 files changed, 28 insertions(+), 21 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index bac6df3ab4..5617e7165f 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -59,7 +59,7 @@ internal: min_size: 1000 sizer: items flush_timeout: 3s - format: otlp + format: otap console: kind: exporter plugin_urn: "urn:otel:console:exporter" diff --git a/rust/otap-dataflow/crates/engine/src/local/receiver.rs b/rust/otap-dataflow/crates/engine/src/local/receiver.rs index 6a4fe6e3ec..db61393c2a 100644 --- a/rust/otap-dataflow/crates/engine/src/local/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/local/receiver.rs @@ -149,8 +149,6 @@ impl EffectHandler { default_port: Option, node_request_sender: PipelineCtrlMsgSender, metrics_reporter: MetricsReporter, - logs_receiver: Option, - resource_bytes: Option, ) -> Self { let mut core = EffectHandlerCore::new(node_id, metrics_reporter); core.set_pipeline_ctrl_msg_sender(node_request_sender); @@ -168,11 +166,27 @@ impl EffectHandler { core, msg_senders, default_sender, - logs_receiver, - resource_bytes, + logs_receiver: None, + resource_bytes: None, } } + /// Sets the logs receiver for internal telemetry. + /// + /// This is called by the engine when starting an Internal Telemetry Receiver + /// that has been configured with a logs channel. + pub fn set_logs_receiver(&mut self, logs_receiver: LogsReceiver) { + self.logs_receiver = Some(logs_receiver); + } + + /// Sets the pre-encoded resource bytes for internal telemetry. + /// + /// This is called by the engine when starting an Internal Telemetry Receiver + /// that has been configured with resource attributes. + pub fn set_resource_bytes(&mut self, resource_bytes: bytes::Bytes) { + self.resource_bytes = Some(resource_bytes); + } + /// Returns the logs receiver, if configured. /// /// This is used by the Internal Telemetry Receiver to consume logs @@ -353,8 +367,6 @@ mod tests { None, ctrl_tx, metrics_reporter, - None, - None, ); eh.send_message_to("b", 42).await.unwrap(); @@ -382,8 +394,6 @@ mod tests { None, ctrl_tx, metrics_reporter, - None, - None, ); eh.send_message(7).await.unwrap(); @@ -407,8 +417,6 @@ mod tests { Some("a".into()), ctrl_tx, metrics_reporter, - None, - None, ); eh.send_message(11).await.unwrap(); @@ -438,8 +446,6 @@ mod tests { None, ctrl_tx, metrics_reporter, - None, - None, ); let res = eh.send_message(5).await; @@ -475,8 +481,6 @@ mod tests { None, ctrl_tx, metrics_reporter, - None, - None, ); let ports: HashSet<_> = eh.connected_ports().into_iter().collect(); diff --git a/rust/otap-dataflow/crates/engine/src/receiver.rs b/rust/otap-dataflow/crates/engine/src/receiver.rs index bdc1f7e928..692322450e 100644 --- a/rust/otap-dataflow/crates/engine/src/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/receiver.rs @@ -306,15 +306,20 @@ impl ReceiverWrapper { }; let default_port = user_config.default_out_port.clone(); let ctrl_msg_chan = local::ControlChannel::new(Receiver::Local(control_receiver)); - let effect_handler = local::EffectHandler::new( + let mut effect_handler = local::EffectHandler::new( node_id, msg_senders, default_port, pipeline_ctrl_msg_tx, metrics_reporter, - logs_receiver, - resource_bytes, ); + // Inject internal telemetry settings if configured + if let Some(logs_rx) = logs_receiver { + effect_handler.set_logs_receiver(logs_rx); + } + if let Some(res_bytes) = resource_bytes { + effect_handler.set_resource_bytes(res_bytes); + } receiver.start(ctrl_msg_chan, effect_handler).await } ( diff --git a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs index 87c02bc35c..3917bbb901 100644 --- a/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs +++ b/rust/otap-dataflow/crates/otap/src/console_exporter/formatter.rs @@ -146,7 +146,7 @@ impl HierarchicalFormatter { self.writer.write_ansi(&mut w, AnsiCode::Bold); let _ = w.write_all(b"RESOURCE"); self.writer.write_ansi(&mut w, AnsiCode::Reset); - let _ = w.write_all(b" v1.Resource:\n"); + let _ = w.write_all(b" v1.Resource:\n"); let len = w.position() as usize; let _ = std::io::stdout().write_all(&buf[..len]); diff --git a/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs b/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs index bfb57ad7b7..cce2501b26 100644 --- a/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/syslog_cef_receiver.rs @@ -1184,7 +1184,6 @@ mod telemetry_tests { None, pipe_tx, reporter.clone(), - None, ); let (ctrl_tx, ctrl_rx) = otap_df_channel::mpsc::Channel::new(16); @@ -1277,7 +1276,6 @@ mod telemetry_tests { None, pipe_tx, reporter.clone(), - None, ); let (ctrl_tx, ctrl_rx) = otap_df_channel::mpsc::Channel::new(8); From e794c794a72a285bb4d9c79cbcda0e5123375149 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 16:44:09 -0800 Subject: [PATCH 86/92] remove lots --- .../crates/config/src/pipeline.rs | 223 ------------- .../config/src/pipeline/service/telemetry.rs | 1 + .../crates/controller/src/lib.rs | 53 +-- .../crates/engine/src/local/receiver.rs | 58 +--- .../crates/engine/src/receiver.rs | 5 +- .../otap/src/internal_telemetry_receiver.rs | 84 +++-- .../crates/telemetry/src/attributes.rs | 3 + .../otap-dataflow/crates/telemetry/src/lib.rs | 6 +- .../crates/telemetry/src/logs.rs | 307 ++---------------- .../crates/telemetry/src/telemetry_runtime.rs | 46 +-- 10 files changed, 145 insertions(+), 641 deletions(-) diff --git a/rust/otap-dataflow/crates/config/src/pipeline.rs b/rust/otap-dataflow/crates/config/src/pipeline.rs index 40b8b9f802..7f96b031f5 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline.rs @@ -1506,227 +1506,4 @@ mod tests { panic!("Expected deserialization to fail due to unknown exporter"); } } - - #[test] - fn test_internal_pipeline_with_valid_itr() { - // Valid internal pipeline with ITR receiver - let yaml_str = r#" -nodes: - receiver: - kind: receiver - plugin_urn: "urn:test:receiver" - out_ports: - out: - destinations: - - exporter - dispatch_strategy: round_robin - exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} - -internal: - itr: - kind: receiver - plugin_urn: "urn:otel:internal:otlp:receiver" - out_ports: - out: - destinations: - - internal_exporter - dispatch_strategy: round_robin - config: {} - internal_exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} -"#; - let result = - super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); - assert!(result.is_ok()); - let config = result.unwrap(); - assert!(config.has_internal_pipeline()); - assert_eq!(config.internal_node_iter().count(), 2); - } - - #[test] - fn test_internal_pipeline_rejects_non_itr_receiver() { - // Invalid: internal pipeline has a non-ITR receiver - let yaml_str = r#" -nodes: - receiver: - kind: receiver - plugin_urn: "urn:test:receiver" - out_ports: - out: - destinations: - - exporter - dispatch_strategy: round_robin - exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} - -internal: - bad_receiver: - kind: receiver - plugin_urn: "urn:test:some_other_receiver" - out_ports: - out: - destinations: - - internal_exporter - dispatch_strategy: round_robin - config: {} - internal_exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} -"#; - let result = - super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); - assert!(result.is_err()); - match result { - Err(Error::InvalidConfiguration { errors }) => { - assert_eq!(errors.len(), 1); - match &errors[0] { - Error::InvalidInternalReceiver { - node_id, - plugin_urn, - .. - } => { - assert_eq!(node_id.as_ref(), "bad_receiver"); - assert_eq!(plugin_urn, "urn:test:some_other_receiver"); - } - other => panic!("Expected InvalidInternalReceiver, got {other:?}"), - } - } - other => panic!("Expected InvalidConfiguration error, got {other:?}"), - } - } - - #[test] - fn test_internal_pipeline_allows_processors_and_exporters() { - // Valid: internal pipeline can have processors and exporters - let yaml_str = r#" -nodes: - receiver: - kind: receiver - plugin_urn: "urn:test:receiver" - out_ports: - out: - destinations: - - exporter - dispatch_strategy: round_robin - exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} - -internal: - itr: - kind: receiver - plugin_urn: "urn:otel:internal:otlp:receiver" - out_ports: - out: - destinations: - - processor - dispatch_strategy: round_robin - config: {} - processor: - kind: processor - plugin_urn: "urn:test:processor" - out_ports: - out: - destinations: - - internal_exporter - dispatch_strategy: round_robin - config: {} - internal_exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} -"#; - let result = - super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); - assert!(result.is_ok()); - let config = result.unwrap(); - assert!(config.has_internal_pipeline()); - assert_eq!(config.internal_node_iter().count(), 3); - } - - #[test] - fn test_internal_pipeline_validates_hyper_edges() { - // Invalid: internal pipeline has missing target node - let yaml_str = r#" -nodes: - receiver: - kind: receiver - plugin_urn: "urn:test:receiver" - out_ports: - out: - destinations: - - exporter - dispatch_strategy: round_robin - exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} - -internal: - itr: - kind: receiver - plugin_urn: "urn:otel:internal:otlp:receiver" - out_ports: - out: - destinations: - - missing_node - dispatch_strategy: round_robin - config: {} -"#; - let result = - super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); - assert!(result.is_err()); - match result { - Err(Error::InvalidConfiguration { errors }) => { - assert_eq!(errors.len(), 1); - match &errors[0] { - Error::InvalidHyperEdgeSpec { - source_node, - details, - .. - } => { - assert_eq!(source_node.as_ref(), "itr"); - assert!(details.missing_targets.contains(&"missing_node".into())); - } - other => panic!("Expected InvalidHyperEdgeSpec, got {other:?}"), - } - } - other => panic!("Expected InvalidConfiguration error, got {other:?}"), - } - } - - #[test] - fn test_empty_internal_pipeline() { - // Valid: no internal section means no internal pipeline - let yaml_str = r#" -nodes: - receiver: - kind: receiver - plugin_urn: "urn:test:receiver" - out_ports: - out: - destinations: - - exporter - dispatch_strategy: round_robin - exporter: - kind: exporter - plugin_urn: "urn:test:exporter" - config: {} -"#; - let result = - super::PipelineConfig::from_yaml("test_group".into(), "test_pipeline".into(), yaml_str); - assert!(result.is_ok()); - let config = result.unwrap(); - assert!(!config.has_internal_pipeline()); - assert_eq!(config.internal_node_iter().count(), 0); - } } diff --git a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry.rs b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry.rs index 07edce8d85..7323f57554 100644 --- a/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry.rs +++ b/rust/otap-dataflow/crates/config/src/pipeline/service/telemetry.rs @@ -55,6 +55,7 @@ fn default_reporting_interval() -> Duration { } /// Attribute value types for telemetry resource attributes. +/// TODO: Replace with OTLP AnyValue? #[derive(Debug, Clone, PartialEq, Serialize, JsonSchema)] pub enum AttributeValue { /// String type attribute value. diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index 413e616f84..a941fa36c2 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -21,7 +21,9 @@ use crate::error::Error; use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; -use otap_df_config::pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN; +use otap_df_config::pipeline::service::telemetry::logs::{ + INTERNAL_TELEMETRY_RECEIVER_URN, OutputMode, +}; use otap_df_config::{ PipelineGroupId, PipelineId, pipeline::PipelineConfig, @@ -38,10 +40,13 @@ use otap_df_state::DeployedPipelineKey; use otap_df_state::event::{ErrorSummary, ObservedEvent}; use otap_df_state::reporter::ObservedEventReporter; use otap_df_state::store::ObservedStateStore; -use otap_df_telemetry::logs::{TelemetrySetup, encode_resource_bytes}; +use otap_df_telemetry::logs::{DirectCollector, TelemetrySetup}; use otap_df_telemetry::reporter::MetricsReporter; +use otap_df_telemetry::self_tracing::ConsoleWriter; use otap_df_telemetry::telemetry_runtime::TelemetryRuntime; -use otap_df_telemetry::{InternalTelemetrySystem, otel_info, otel_info_span, otel_warn}; +use otap_df_telemetry::{ + InternalTelemetrySystem, otel_info, otel_info_span, otel_warn, resource::encode_resource_bytes, +}; use std::sync::mpsc as std_mpsc; use std::thread; @@ -100,19 +105,22 @@ impl Controller { // Create telemetry runtime according to the various options. let mut telemetry_runtime = TelemetryRuntime::new(telemetry_config)?; - // Start the logs collector thread if needed for direct output. - let _logs_collector_handle = - if let Some(logs_collector) = telemetry_runtime.take_logs_collector() { - Some(spawn_thread_local_task( - "logs-collector", - move |_cancellation_token| logs_collector.run(), - )?) - } else { - None - }; + let direct_collector = (telemetry_config.logs.output == OutputMode::Direct).then(|| { + DirectCollector::new( + ConsoleWriter::color(), + telemetry_runtime.take_logs_receiver().expect("valid"), + ) + }); - // Get logs receiver for Internal output mode (passed to internal pipeline) - let mut logs_receiver = telemetry_runtime.take_logs_receiver(); + // Start the logs collector thread if needed for direct output. + let _logs_collector_handle = if telemetry_config.logs.output == OutputMode::Direct { + Some(spawn_thread_local_task( + "logs-collector", + move |_cancellation_token| direct_collector.expect("ok").run(), + )?) + } else { + None + }; // Pre-encode resource bytes once for all log batches let resource_bytes = encode_resource_bytes(&telemetry_config.resource); @@ -160,13 +168,14 @@ impl Controller { pipeline.extract_internal_config() { // Create internal telemetry settings if we have a logs receiver - let internal_telemetry_settings = logs_receiver.take().map(|rx| { - InternalTelemetrySettings { - target_urn: INTERNAL_TELEMETRY_RECEIVER_URN, - logs_receiver: rx, - resource_bytes: resource_bytes.clone(), - } - }); + let internal_telemetry_settings = + telemetry_runtime + .take_logs_receiver() + .map(|rx| InternalTelemetrySettings { + target_urn: INTERNAL_TELEMETRY_RECEIVER_URN, + logs_receiver: rx, + resource_bytes: resource_bytes.clone(), + }); let internal_factory = self.pipeline_factory; let internal_pipeline_id: PipelineId = "internal".into(); let internal_pipeline_key = DeployedPipelineKey { diff --git a/rust/otap-dataflow/crates/engine/src/local/receiver.rs b/rust/otap-dataflow/crates/engine/src/local/receiver.rs index db61393c2a..6520761f94 100644 --- a/rust/otap-dataflow/crates/engine/src/local/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/local/receiver.rs @@ -172,34 +172,22 @@ impl EffectHandler { } /// Sets the logs receiver for internal telemetry. - /// - /// This is called by the engine when starting an Internal Telemetry Receiver - /// that has been configured with a logs channel. - pub fn set_logs_receiver(&mut self, logs_receiver: LogsReceiver) { + pub fn set_logs_receiver( + &mut self, + logs_receiver: LogsReceiver, + resource_bytes: Option, + ) { self.logs_receiver = Some(logs_receiver); + self.resource_bytes = resource_bytes; } - /// Sets the pre-encoded resource bytes for internal telemetry. - /// - /// This is called by the engine when starting an Internal Telemetry Receiver - /// that has been configured with resource attributes. - pub fn set_resource_bytes(&mut self, resource_bytes: bytes::Bytes) { - self.resource_bytes = Some(resource_bytes); - } - - /// Returns the logs receiver, if configured. - /// - /// This is used by the Internal Telemetry Receiver to consume logs - /// from all threads via the logs channel. + /// Returns the logs receiver, if configured.. #[must_use] pub fn logs_receiver(&self) -> Option<&LogsReceiver> { self.logs_receiver.as_ref() } /// Returns the pre-encoded resource bytes, if configured. - /// - /// This is used by the Internal Telemetry Receiver to include resource - /// attributes in the OTLP log encoding. #[must_use] pub fn resource_bytes(&self) -> Option<&bytes::Bytes> { self.resource_bytes.as_ref() @@ -361,13 +349,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new( - test_node("recv"), - senders, - None, - ctrl_tx, - metrics_reporter, - ); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); eh.send_message_to("b", 42).await.unwrap(); @@ -388,13 +370,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new( - test_node("recv"), - senders, - None, - ctrl_tx, - metrics_reporter, - ); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); eh.send_message(7).await.unwrap(); assert_eq!(rx.recv().await.unwrap(), 7); @@ -440,13 +416,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new( - test_node("recv"), - senders, - None, - ctrl_tx, - metrics_reporter, - ); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); let res = eh.send_message(5).await; assert!(res.is_err()); @@ -475,13 +445,7 @@ mod tests { let (ctrl_tx, _ctrl_rx) = pipeline_ctrl_msg_channel(4); let (_metrics_rx, metrics_reporter) = MetricsReporter::create_new_and_receiver(1); - let eh = EffectHandler::new( - test_node("recv"), - senders, - None, - ctrl_tx, - metrics_reporter, - ); + let eh = EffectHandler::new(test_node("recv"), senders, None, ctrl_tx, metrics_reporter); let ports: HashSet<_> = eh.connected_ports().into_iter().collect(); let expected: HashSet<_> = [Cow::from("a"), Cow::from("b")].into_iter().collect(); diff --git a/rust/otap-dataflow/crates/engine/src/receiver.rs b/rust/otap-dataflow/crates/engine/src/receiver.rs index 692322450e..293c2324df 100644 --- a/rust/otap-dataflow/crates/engine/src/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/receiver.rs @@ -315,10 +315,7 @@ impl ReceiverWrapper { ); // Inject internal telemetry settings if configured if let Some(logs_rx) = logs_receiver { - effect_handler.set_logs_receiver(logs_rx); - } - if let Some(res_bytes) = resource_bytes { - effect_handler.set_resource_bytes(res_bytes); + effect_handler.set_logs_receiver(logs_rx, resource_bytes) } receiver.start(ctrl_msg_chan, effect_handler).await } diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs index f63030fc97..f7d33d90a9 100644 --- a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -20,8 +20,8 @@ use otap_df_engine::local::receiver as local; use otap_df_engine::node::NodeId; use otap_df_engine::receiver::ReceiverWrapper; use otap_df_engine::terminal_state::TerminalState; -use otap_df_pdata::OtlpProtoBytes; -use otap_df_telemetry::logs::{LogBatch, LogPayload}; +//use otap_df_pdata::OtlpProtoBytes; +use otap_df_telemetry::logs::LogPayload; use otap_df_telemetry::metrics::MetricSetSnapshot; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -141,28 +141,70 @@ impl InternalTelemetryReceiver { /// Send a log payload as OTLP logs. async fn send_payload( &self, - effect_handler: &local::EffectHandler, + _effect_handler: &local::EffectHandler, payload: LogPayload, ) -> Result<(), Error> { - let batch = match payload { - LogPayload::Singleton(record) => LogBatch { - records: vec![record], - dropped_count: 0, - }, - LogPayload::Batch(batch) => batch, - }; - - if !batch.records.is_empty() { - // Use resource bytes if available, otherwise encode without resource - let bytes = if let Some(resource_bytes) = effect_handler.resource_bytes() { - batch.encode_export_logs_request_with_resource(resource_bytes) - } else { - batch.encode_export_logs_request() - }; - let pdata = - OtapPdata::new_todo_context(OtlpProtoBytes::ExportLogsRequest(bytes).into()); - effect_handler.send_message(pdata).await?; + match payload { + LogPayload::Singleton(_record) => { + // // Use resource bytes if available, otherwise encode without resource + // let bytes = if let Some(resource_bytes) = effect_handler.resource_bytes() { + // //batch.encode_export_logs_request_with_resource(resource_bytes) + // } else { + // //batch.encode_export_logs_request() + // }; + // let pdata = + // OtapPdata::new_todo_context(OtlpProtoBytes::ExportLogsRequest(bytes).into()); + // effect_handler.send_message(pdata).await?; + } } Ok(()) } } + +// /// Encode singleton as an OTLP ExportLogsServiceRequest. +// #[must_use] +// pub fn encode_singleton_request(single: LogRecord, resource_bytes: &Option) -> Bytes { +// let mut buf = ProtoBuffer::with_capacity(256 + resource_bytes.len()); + +// // ExportLogsServiceRequest { resource_logs: [ ResourceLogs { ... } ] } +// proto_encode_len_delimited_unknown_size!( +// LOGS_DATA_RESOURCE, // field 1: resource_logs (same field number) +// { +// // Insert pre-encoded resource (field 1: resource) +// buf.extend_from_slice(resource_bytes); + +// // ResourceLogs { scope_logs: [ ScopeLogs { ... } ] } +// proto_encode_len_delimited_unknown_size!( +// RESOURCE_LOGS_SCOPE_LOGS, // field 2: scope_logs +// { +// // ScopeLogs { log_records: [ ... ] } +// // Note: we skip scope (field 1) to use empty/default scope +// for record in &self.records { +// self.encode_log_record(record, &mut buf); +// } +// }, +// &mut buf +// ); +// }, +// &mut buf +// ); + +// buf.into_bytes() +// } + +// /// Encode a single log record into the buffer. +// fn encode_log_record(&self, record: &LogRecord, buf: &mut ProtoBuffer) { +// // Get the callsite metadata for encoding +// let metadata = record.callsite_id.0.metadata(); +// let callsite = SavedCallsite::new(metadata); + +// proto_encode_len_delimited_unknown_size!( +// SCOPE_LOGS_LOG_RECORDS, // field 2: log_records +// { +// let mut encoder = DirectLogRecordEncoder::new(buf); +// // Clone record since encode_log_record takes ownership +// let _ = encoder.encode_log_record(record.clone(), &callsite); +// }, +// buf +// ); +// } diff --git a/rust/otap-dataflow/crates/telemetry/src/attributes.rs b/rust/otap-dataflow/crates/telemetry/src/attributes.rs index 5b70d0b970..3de94a238c 100644 --- a/rust/otap-dataflow/crates/telemetry/src/attributes.rs +++ b/rust/otap-dataflow/crates/telemetry/src/attributes.rs @@ -122,6 +122,9 @@ pub trait AttributeSetHandler { } /// Represents a single attribute value that can be of different types. +/// +/// TODO: Duplicate of crates/config/src/pipeline/service/telemetry.rs +/// AttributeValue or OTLP AnyValue? #[derive(Debug, Clone, PartialEq, Serialize)] pub enum AttributeValue { /// String attribute value diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index d9a0aab3e3..8334a83dd9 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -40,13 +40,14 @@ pub mod collector; pub mod descriptor; pub mod error; pub mod instrument; -/// Internal logs/events module for engine. +/// Internal logging macros. pub mod internal_events; /// Internal logs collection and transport. pub mod logs; pub mod metrics; pub mod registry; pub mod reporter; +pub mod resource; pub mod self_tracing; pub mod semconv; pub mod telemetry_runtime; @@ -69,8 +70,7 @@ pub use tracing::warn_span as otel_warn_span; // Re-export commonly used logs types for convenience. pub use logs::{ - ImmediateLayer, LogBatch, LogPayload, LogsCollector, LogsReceiver, LogsReporter, TelemetrySetup, - encode_resource_bytes, + DirectCollector, ImmediateLayer, LogPayload, LogsReceiver, LogsReporter, TelemetrySetup, }; // TODO This should be #[cfg(test)], but something is preventing it from working. diff --git a/rust/otap-dataflow/crates/telemetry/src/logs.rs b/rust/otap-dataflow/crates/telemetry/src/logs.rs index 58b4fa7024..cecf9ae08d 100644 --- a/rust/otap-dataflow/crates/telemetry/src/logs.rs +++ b/rust/otap-dataflow/crates/telemetry/src/logs.rs @@ -3,306 +3,51 @@ //! Internal logs collection for OTAP-Dataflow. -use crate::error::Error; -use crate::self_tracing::{ - ConsoleWriter, DirectLogRecordEncoder, LogRecord, RawLoggingLayer, SavedCallsite, -}; -use bytes::Bytes; +use crate::self_tracing::{ConsoleWriter, LogRecord, RawLoggingLayer, SavedCallsite}; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_sdk::logs::SdkLoggerProvider; use otap_df_config::pipeline::service::telemetry::logs::LogLevel; -use otap_df_config::pipeline::service::telemetry::AttributeValue; -use otap_df_pdata::otlp::ProtoBuffer; -use otap_df_pdata::proto::consts::field_num::common::{ - ANY_VALUE_BOOL_VALUE, ANY_VALUE_DOUBLE_VALUE, ANY_VALUE_INT_VALUE, ANY_VALUE_STRING_VALUE, - KEY_VALUE_KEY, KEY_VALUE_VALUE, -}; -use otap_df_pdata::proto::consts::field_num::logs::{ - LOGS_DATA_RESOURCE, RESOURCE_LOGS_RESOURCE, RESOURCE_LOGS_SCOPE_LOGS, SCOPE_LOGS_LOG_RECORDS, -}; -use otap_df_pdata::proto::consts::field_num::resource::RESOURCE_ATTRIBUTES; -use otap_df_pdata::proto::consts::wire_types; -use otap_df_pdata::proto_encode_len_delimited_unknown_size; -use std::collections::HashMap; use tracing::{Event, Subscriber}; use tracing_subscriber::Registry; use tracing_subscriber::layer::{Context, Layer as TracingLayer, SubscriberExt}; use tracing_subscriber::registry::LookupSpan; -/// A batch of log records from a pipeline thread. -pub struct LogBatch { - /// The log records in this batch. - pub records: Vec, - /// Number of records dropped in the same period. - pub dropped_count: usize, -} - -/// Pre-encode resource bytes for use in OTLP log messages. -/// -/// This encodes the resource attributes into a protobuf fragment that can be -/// inserted directly into ResourceLogs messages. The returned bytes include -/// the `RESOURCE_LOGS_RESOURCE` field tag and length-delimited Resource message. -/// -/// This is a one-time operation at startup; the resulting bytes can be reused -/// for all subsequent log batches. -#[must_use] -pub fn encode_resource_bytes(resource_attributes: &HashMap) -> Bytes { - if resource_attributes.is_empty() { - return Bytes::new(); - } - - let mut buf = ProtoBuffer::with_capacity(resource_attributes.len() * 64); - - // Encode: field 1 (RESOURCE_LOGS_RESOURCE) -> Resource message - proto_encode_len_delimited_unknown_size!( - RESOURCE_LOGS_RESOURCE, - { - // Resource { attributes: [ KeyValue, ... ] } - for (key, value) in resource_attributes { - encode_resource_attribute(&mut buf, key, value); - } - }, - &mut buf - ); - - buf.into_bytes() -} - -/// Encode a single resource attribute as a KeyValue message. -fn encode_resource_attribute(buf: &mut ProtoBuffer, key: &str, value: &AttributeValue) { - proto_encode_len_delimited_unknown_size!( - RESOURCE_ATTRIBUTES, - { - buf.encode_string(KEY_VALUE_KEY, key); - proto_encode_len_delimited_unknown_size!( - KEY_VALUE_VALUE, - { - match value { - AttributeValue::String(s) => { - buf.encode_string(ANY_VALUE_STRING_VALUE, s); - } - AttributeValue::Bool(b) => { - buf.encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); - buf.encode_varint(u64::from(*b)); - } - AttributeValue::I64(i) => { - buf.encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); - buf.encode_varint(*i as u64); - } - AttributeValue::F64(f) => { - buf.encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); - buf.extend_from_slice(&f.to_le_bytes()); - } - AttributeValue::Array(_) => { - // Arrays not supported for resource attributes - } - } - }, - buf - ); - }, - buf - ); -} - -impl LogBatch { - /// The total size including dropped records. - #[must_use] - pub fn size_with_dropped(&self) -> usize { - self.records.len() + self.dropped_count - } - - /// Encode this batch as an OTLP ExportLogsServiceRequest. - /// - /// The batch is wrapped in a minimal structure: - /// - One ResourceLogs with no resource attributes - /// - One ScopeLogs with no scope - /// - All log records from the batch - #[must_use] - pub fn encode_export_logs_request(&self) -> Bytes { - self.encode_export_logs_request_with_resource(&Bytes::new()) - } - - /// Encode this batch as an OTLP ExportLogsServiceRequest with pre-encoded resource. - /// - /// The `resource_bytes` should be pre-encoded using [`encode_resource_bytes`]. - /// This allows efficient reuse of the same resource for all log batches. - #[must_use] - pub fn encode_export_logs_request_with_resource(&self, resource_bytes: &Bytes) -> Bytes { - let mut buf = ProtoBuffer::with_capacity(self.records.len() * 256 + resource_bytes.len()); - - // ExportLogsServiceRequest { resource_logs: [ ResourceLogs { ... } ] } - proto_encode_len_delimited_unknown_size!( - LOGS_DATA_RESOURCE, // field 1: resource_logs (same field number) - { - // Insert pre-encoded resource (field 1: resource) - buf.extend_from_slice(resource_bytes); - - // ResourceLogs { scope_logs: [ ScopeLogs { ... } ] } - proto_encode_len_delimited_unknown_size!( - RESOURCE_LOGS_SCOPE_LOGS, // field 2: scope_logs - { - // ScopeLogs { log_records: [ ... ] } - // Note: we skip scope (field 1) to use empty/default scope - for record in &self.records { - self.encode_log_record(record, &mut buf); - } - }, - &mut buf - ); - }, - &mut buf - ); - - buf.into_bytes() - } - - /// Encode a single log record into the buffer. - fn encode_log_record(&self, record: &LogRecord, buf: &mut ProtoBuffer) { - // Get the callsite metadata for encoding - let metadata = record.callsite_id.0.metadata(); - let callsite = SavedCallsite::new(metadata); - - proto_encode_len_delimited_unknown_size!( - SCOPE_LOGS_LOG_RECORDS, // field 2: log_records - { - let mut encoder = DirectLogRecordEncoder::new(buf); - // Clone record since encode_log_record takes ownership - let _ = encoder.encode_log_record(record.clone(), &callsite); - }, - buf - ); - } -} - -/// A payload of two kinds +/// A payload of log data +/// TODO: merge with Event in crates/state pub enum LogPayload { /// A single record. Singleton(LogRecord), - /// A batch. - Batch(LogBatch), -} - -impl LogPayload { - /// The total number of records (including dropped) in this payload. - pub fn size_with_dropped(&self) -> usize { - match self { - Self::Singleton(_) => 1, - Self::Batch(batch) => batch.size_with_dropped(), - } - } -} - -/// Thread-local log buffer for a pipeline thread. -pub struct LogBuffer { - batch: LogBatch, -} - -impl LogBuffer { - /// Create a new log buffer with the given capacity. - #[must_use] - pub fn new(capacity: usize) -> Self { - Self { - batch: LogBatch { - records: Vec::with_capacity(capacity), - dropped_count: 0, - }, - } - } - - /// Push a log record. If at capacity, the record is dropped and counted. - pub fn push(&mut self, record: LogRecord) { - if self.batch.records.len() >= self.batch.records.capacity() { - self.batch.dropped_count += 1; - } else { - self.batch.records.push(record); - } - } - - /// Drain all records from the buffer, returning them as a batch. - pub fn drain(&mut self) -> LogBatch { - LogBatch { - records: self.batch.records.drain(..).collect(), - dropped_count: std::mem::take(&mut self.batch.dropped_count), - } - } } /// Reporter for sending log batches through a channel. -#[derive(Clone)] -pub struct LogsReporter { - sender: flume::Sender, -} - -impl LogsReporter { - /// Create a new LogsReporter with the given sender. - #[must_use] - pub fn new(sender: flume::Sender) -> Self { - Self { sender } - } +pub type LogsReporter = flume::Sender; - /// Create a null reporter that discards all payloads. - /// - /// Used for internal telemetry mode where the buffer is drained directly - /// rather than sent through a channel. - #[must_use] - pub fn null() -> Self { - // Create a bounded channel of size 0 - sends will always fail - // but we never actually call try_report on a null reporter - let (sender, _receiver) = flume::bounded(0); - Self { sender } - } +/// Type alias for the log payload receiver channel. +pub type LogsReceiver = flume::Receiver; - /// Try to send a payload, non-blocking. - /// - /// Returns: - /// - `Ok(())` if the payload was sent - /// - `Err` if the channel is full or disconnected - pub fn try_report(&self, payload: LogPayload) -> Result<(), Error> { - self.sender - .try_send(payload) - .map_err(|e| Error::LogSendError { - message: e.to_string(), - dropped: e.into_inner().size_with_dropped(), - }) - } +/// Create a reporter and receiver pair without the collector. +/// +/// Use this when the receiver will be consumed elsewhere (e.g., by the +/// Internal Telemetry Receiver node). +#[must_use] +pub fn channel(channel_size: usize) -> (LogsReporter, LogsReceiver) { + flume::bounded(channel_size) } -/// Collector that receives log batches and writes them to console. -pub struct LogsCollector { - receiver: flume::Receiver, +/// Direct logs collector +pub struct DirectCollector { writer: ConsoleWriter, + receiver: LogsReceiver, } -/// Type alias for the log payload receiver channel. -pub type LogsReceiver = flume::Receiver; - -impl LogsCollector { - /// Create a new collector and reporter pair. - #[must_use] - pub fn new(channel_size: usize) -> (Self, LogsReporter) { - let (sender, receiver) = flume::bounded(channel_size); - let collector = Self { - receiver, - writer: ConsoleWriter::color(), - }; - let reporter = LogsReporter::new(sender); - (collector, reporter) - } - - /// Create a reporter and receiver pair without the collector. - /// - /// Use this when the receiver will be consumed elsewhere (e.g., by the - /// Internal Telemetry Receiver node). - #[must_use] - pub fn channel(channel_size: usize) -> (LogsReceiver, LogsReporter) { - let (sender, receiver) = flume::bounded(channel_size); - let reporter = LogsReporter::new(sender); - (receiver, reporter) +impl DirectCollector { + /// New collector with writer. + pub fn new(writer: ConsoleWriter, receiver: LogsReceiver) -> Self { + Self { writer, receiver } } /// Run the collection loop until the channel is closed. - pub async fn run(self) -> Result<(), Error> { + pub async fn run(self) -> Result<(), crate::Error> { loop { match self.receiver.recv_async().await { Ok(payload) => { @@ -318,14 +63,8 @@ impl LogsCollector { /// Write a batch of log records to console. fn write_batch(&self, payload: LogPayload) { - // TODO: Print dropped count as a formatted warning before the batch match payload { LogPayload::Singleton(record) => self.write_record(record), - LogPayload::Batch(batch) => { - for record in batch.records { - self.write_record(record); - } - } } } @@ -360,12 +99,12 @@ where fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { let record = LogRecord::new(event); - match self.reporter.try_report(LogPayload::Singleton(record)) { + match self.reporter.try_send(LogPayload::Singleton(record)) { Ok(()) => {} Err(err) => { crate::raw_error!("failed to send log", err = %err); } - } + }; } } diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs index f0e54b0239..ae1dccfafd 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs @@ -16,7 +16,8 @@ use otap_df_config::pipeline::service::telemetry::{ use crate::{ LogsReceiver, error::Error, - logs::{LogsCollector, LogsReporter, TelemetrySetup}, + logs::channel as logs_channel, + logs::{LogsReporter, TelemetrySetup}, telemetry_runtime::logger_provider::LoggerProvider, telemetry_runtime::meter_provider::MeterProvider, }; @@ -27,7 +28,6 @@ use otap_df_config::pipeline::service::telemetry::logs::LogLevel; /// This struct owns all telemetry infrastructure including: /// - OpenTelemetry SDK meter and logger providers /// - Internal logs reporter and receiver channels -/// - Optional logs collector for Direct output mode pub struct TelemetryRuntime { /// The tokio runtime used to run the OpenTelemetry SDK OTLP exporter. /// The reference is kept to ensure the runtime lives as long as the client. @@ -40,8 +40,6 @@ pub struct TelemetryRuntime { /// Receiver for the internal logs channel (Internal output mode only). /// The ITR node consumes this to process internal telemetry. logs_receiver: Option, - /// Collector for Direct output mode. Must be spawned by the controller. - logs_collector: Option, /// Deferred global subscriber setup. Must be initialized by controller /// AFTER the internal pipeline is started (so the channel is being consumed). global_setup: Option, @@ -64,7 +62,7 @@ impl TelemetryRuntime { /// noisy HTTP/2 and hyper logs. /// /// The logs reporter is created internally based on the configuration: - /// - For `Direct` output: creates reporter + collector (collector must be spawned) + /// - For `Direct` output: creates reporter + receiver (collector must be spawned) /// - For `Internal` output: creates reporter + receiver (receiver goes to ITR node) /// - For `Noop` output: no reporter is created /// @@ -79,28 +77,13 @@ impl TelemetryRuntime { let (meter_provider, runtime) = MeterProvider::configure(sdk_resource.clone(), &config.metrics, runtime)?.into_parts(); - // Determine if we need a logs reporter based on provider modes - let providers_need_reporter = config.logs.providers.global.needs_reporter() - || config.logs.providers.engine.needs_reporter(); - - // Create the logs reporter, receiver, and collector based on output mode - let (logs_reporter, logs_receiver, logs_collector) = if providers_need_reporter { - match config.logs.output { - OutputMode::Direct => { - // Direct mode: logs go to a collector that prints to console - let (collector, reporter) = LogsCollector::new(config.reporting_channel_size); - (Some(reporter), None, Some(collector)) - } - OutputMode::Internal => { - // Internal mode: logs go through channel to ITR node - let (receiver, reporter) = - LogsCollector::channel(config.reporting_channel_size); - (Some(reporter), Some(receiver), None) - } - OutputMode::Noop => (None, None, None), + // Create the logs reporter, receiver + let (logs_reporter, logs_receiver) = match config.logs.output { + OutputMode::Direct | OutputMode::Internal => { + let (x, y) = logs_channel(config.reporting_channel_size); + (Some(x), Some(y)) } - } else { - (None, None, None) + _ => (None, None), }; // Check if either global or engine needs the OpenTelemetry logger provider @@ -132,7 +115,6 @@ impl TelemetryRuntime { logger_provider, logs_reporter, logs_receiver, - logs_collector, global_setup: Some(global_setup), global_log_level: config.logs.level, }) @@ -207,16 +189,6 @@ impl TelemetryRuntime { self.logs_receiver.take() } - /// Take the logs collector for Direct output mode. - /// - /// Returns `Some` only when output mode is `Direct`. The collector should - /// be spawned on a dedicated thread to process log records. - /// - /// This method takes ownership of the collector (can only be called once). - pub fn take_logs_collector(&mut self) -> Option { - self.logs_collector.take() - } - /// Initialize the global tracing subscriber. /// /// This MUST be called AFTER the internal pipeline is started (when using From fac94609a6cfb1af24af675c5e6564a7a8349eb2 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 16:54:53 -0800 Subject: [PATCH 87/92] hand --- .../otap/src/internal_telemetry_receiver.rs | 75 ++++-------------- .../crates/telemetry/src/resource.rs | 79 +++++++++++++++++++ .../crates/telemetry/src/self_tracing.rs | 1 + .../telemetry/src/self_tracing/encoder.rs | 54 +++++++++++++ 4 files changed, 148 insertions(+), 61 deletions(-) create mode 100644 rust/otap-dataflow/crates/telemetry/src/resource.rs diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs index f7d33d90a9..e5bc7c1234 100644 --- a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -7,7 +7,7 @@ //! the logs as OTLP ExportLogsRequest messages into the pipeline. use crate::OTAP_RECEIVER_FACTORIES; -use crate::pdata::OtapPdata; +use crate::pdata::{Context, OtapPdata}; use async_trait::async_trait; use linkme::distributed_slice; use otap_df_config::node::NodeUserConfig; @@ -20,9 +20,10 @@ use otap_df_engine::local::receiver as local; use otap_df_engine::node::NodeId; use otap_df_engine::receiver::ReceiverWrapper; use otap_df_engine::terminal_state::TerminalState; -//use otap_df_pdata::OtlpProtoBytes; +use otap_df_pdata::OtlpProtoBytes; use otap_df_telemetry::logs::LogPayload; use otap_df_telemetry::metrics::MetricSetSnapshot; +use otap_df_telemetry::self_tracing::{SavedCallsite, encode_export_logs_request}; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::sync::Arc; @@ -141,70 +142,22 @@ impl InternalTelemetryReceiver { /// Send a log payload as OTLP logs. async fn send_payload( &self, - _effect_handler: &local::EffectHandler, + effect_handler: &local::EffectHandler, payload: LogPayload, ) -> Result<(), Error> { match payload { - LogPayload::Singleton(_record) => { - // // Use resource bytes if available, otherwise encode without resource - // let bytes = if let Some(resource_bytes) = effect_handler.resource_bytes() { - // //batch.encode_export_logs_request_with_resource(resource_bytes) - // } else { - // //batch.encode_export_logs_request() - // }; - // let pdata = - // OtapPdata::new_todo_context(OtlpProtoBytes::ExportLogsRequest(bytes).into()); - // effect_handler.send_message(pdata).await?; + LogPayload::Singleton(record) => { + let callsite = SavedCallsite::new(record.callsite_id.0.metadata()); + let bytes = + encode_export_logs_request(record, &callsite, effect_handler.resource_bytes()); + + let pdata = OtapPdata::new( + Context::default(), + OtlpProtoBytes::ExportLogsRequest(bytes).into(), + ); + effect_handler.send_message(pdata).await?; } } Ok(()) } } - -// /// Encode singleton as an OTLP ExportLogsServiceRequest. -// #[must_use] -// pub fn encode_singleton_request(single: LogRecord, resource_bytes: &Option) -> Bytes { -// let mut buf = ProtoBuffer::with_capacity(256 + resource_bytes.len()); - -// // ExportLogsServiceRequest { resource_logs: [ ResourceLogs { ... } ] } -// proto_encode_len_delimited_unknown_size!( -// LOGS_DATA_RESOURCE, // field 1: resource_logs (same field number) -// { -// // Insert pre-encoded resource (field 1: resource) -// buf.extend_from_slice(resource_bytes); - -// // ResourceLogs { scope_logs: [ ScopeLogs { ... } ] } -// proto_encode_len_delimited_unknown_size!( -// RESOURCE_LOGS_SCOPE_LOGS, // field 2: scope_logs -// { -// // ScopeLogs { log_records: [ ... ] } -// // Note: we skip scope (field 1) to use empty/default scope -// for record in &self.records { -// self.encode_log_record(record, &mut buf); -// } -// }, -// &mut buf -// ); -// }, -// &mut buf -// ); - -// buf.into_bytes() -// } - -// /// Encode a single log record into the buffer. -// fn encode_log_record(&self, record: &LogRecord, buf: &mut ProtoBuffer) { -// // Get the callsite metadata for encoding -// let metadata = record.callsite_id.0.metadata(); -// let callsite = SavedCallsite::new(metadata); - -// proto_encode_len_delimited_unknown_size!( -// SCOPE_LOGS_LOG_RECORDS, // field 2: log_records -// { -// let mut encoder = DirectLogRecordEncoder::new(buf); -// // Clone record since encode_log_record takes ownership -// let _ = encoder.encode_log_record(record.clone(), &callsite); -// }, -// buf -// ); -// } diff --git a/rust/otap-dataflow/crates/telemetry/src/resource.rs b/rust/otap-dataflow/crates/telemetry/src/resource.rs new file mode 100644 index 0000000000..5f59464c46 --- /dev/null +++ b/rust/otap-dataflow/crates/telemetry/src/resource.rs @@ -0,0 +1,79 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Form OTLP Resource encodings from the configuration struct + +use bytes::Bytes; +use otap_df_config::pipeline::service::telemetry::AttributeValue; +use otap_df_pdata::otlp::ProtoBuffer; +use otap_df_pdata::proto::consts::field_num::common::{ + ANY_VALUE_BOOL_VALUE, ANY_VALUE_DOUBLE_VALUE, ANY_VALUE_INT_VALUE, ANY_VALUE_STRING_VALUE, + KEY_VALUE_KEY, KEY_VALUE_VALUE, +}; +use otap_df_pdata::proto::consts::field_num::logs::RESOURCE_LOGS_RESOURCE; +use otap_df_pdata::proto::consts::field_num::resource::RESOURCE_ATTRIBUTES; +use otap_df_pdata::proto::consts::wire_types; +use otap_df_pdata::proto_encode_len_delimited_unknown_size; +use std::collections::HashMap; + +/// Encode OTLP bytes of the ResourceLogs.resource field, the whole +/// tag-and-bytes representation for a single copy. +#[must_use] +pub fn encode_resource_bytes(attrs: &HashMap) -> Bytes { + if attrs.is_empty() { + return Bytes::new(); + } + + let mut buf = ProtoBuffer::with_capacity(attrs.len() * 64); + + // Encode: field 1 (RESOURCE_LOGS_RESOURCE) -> Resource message + proto_encode_len_delimited_unknown_size!( + RESOURCE_LOGS_RESOURCE, + { + // Resource { attributes: [ KeyValue, ... ] } + for (key, value) in attrs { + encode_resource_attribute(&mut buf, key, value); + } + }, + &mut buf + ); + + buf.into_bytes() +} + +/// Encode a single resource attribute as a KeyValue message. +fn encode_resource_attribute(buf: &mut ProtoBuffer, key: &str, value: &AttributeValue) { + proto_encode_len_delimited_unknown_size!( + RESOURCE_ATTRIBUTES, + { + buf.encode_string(KEY_VALUE_KEY, key); + proto_encode_len_delimited_unknown_size!( + KEY_VALUE_VALUE, + { + match value { + AttributeValue::String(s) => { + buf.encode_string(ANY_VALUE_STRING_VALUE, s); + } + AttributeValue::Bool(b) => { + buf.encode_field_tag(ANY_VALUE_BOOL_VALUE, wire_types::VARINT); + buf.encode_varint(u64::from(*b)); + } + AttributeValue::I64(i) => { + buf.encode_field_tag(ANY_VALUE_INT_VALUE, wire_types::VARINT); + buf.encode_varint(*i as u64); + } + AttributeValue::F64(f) => { + buf.encode_field_tag(ANY_VALUE_DOUBLE_VALUE, wire_types::FIXED64); + buf.extend_from_slice(&f.to_le_bytes()); + } + AttributeValue::Array(_) => { + crate::raw_error!("Arrays are not supported in resource attributes"); + } + } + }, + buf + ); + }, + buf + ); +} diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs index 43d14001b3..86fb85a541 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing.rs @@ -18,6 +18,7 @@ use tracing::callsite::Identifier; use tracing::{Event, Level, Metadata}; pub use encoder::DirectLogRecordEncoder; +pub use encoder::encode_export_logs_request; pub use formatter::{AnsiCode, BufWriter, ColorMode, ConsoleWriter, RawLoggingLayer, LOG_BUFFER_SIZE}; /// A log record with structural metadata and pre-encoded body/attributes. diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index 1d88be5b60..d576aaf4de 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -301,3 +301,57 @@ pub fn level_to_severity_number(level: &Level) -> u8 { Level::ERROR => 17, } } + +/// Encode a single LogRecord as an OTLP ExportLogsServiceRequest. +/// +/// This creates a complete OTLP request containing a single log record wrapped +/// in the appropriate resource/scope hierarchy. +/// +/// # Arguments +/// * `record` - The log record to encode +/// * `callsite` - The callsite metadata for the log record +/// * `resource_bytes` - Optional pre-encoded resource bytes (from `encode_resource_bytes`) +/// +/// # Returns +/// The protobuf-encoded ExportLogsServiceRequest as bytes. +#[must_use] +pub fn encode_export_logs_request( + record: LogRecord, + callsite: &SavedCallsite, + resource_bytes: Option<&bytes::Bytes>, +) -> bytes::Bytes { + let capacity = 256 + resource_bytes.map_or(0, |b| b.len()); + let mut buf = ProtoBuffer::with_capacity(capacity); + + // ExportLogsServiceRequest { resource_logs: [ ResourceLogs { ... } ] } + proto_encode_len_delimited_unknown_size!( + LOGS_DATA_RESOURCE, // field 1: resource_logs + { + // Insert pre-encoded resource (field 1: resource) if available + if let Some(res_bytes) = resource_bytes { + buf.extend_from_slice(res_bytes); + } + + // ResourceLogs { scope_logs: [ ScopeLogs { ... } ] } + proto_encode_len_delimited_unknown_size!( + RESOURCE_LOGS_SCOPE_LOGS, // field 2: scope_logs + { + // ScopeLogs { log_records: [ LogRecord { ... } ] } + // Note: we skip scope (field 1) to use empty/default scope + proto_encode_len_delimited_unknown_size!( + SCOPE_LOGS_LOG_RECORDS, // field 2: log_records + { + let mut encoder = DirectLogRecordEncoder::new(&mut buf); + let _ = encoder.encode_log_record(record, callsite); + }, + &mut buf + ); + }, + &mut buf + ); + }, + &mut buf + ); + + buf.into_bytes() +} From 6515c93cc147d00eb521643a607eaa854faf9c28 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 17:00:08 -0800 Subject: [PATCH 88/92] encode logsdata --- .../otap/src/internal_telemetry_receiver.rs | 20 ++++++++--- .../telemetry/src/self_tracing/encoder.rs | 34 +++++++------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs index e5bc7c1234..f3538f1ca5 100644 --- a/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs +++ b/rust/otap-dataflow/crates/otap/src/internal_telemetry_receiver.rs @@ -20,6 +20,8 @@ use otap_df_engine::local::receiver as local; use otap_df_engine::node::NodeId; use otap_df_engine::receiver::ReceiverWrapper; use otap_df_engine::terminal_state::TerminalState; +use bytes::Bytes; +use otap_df_pdata::otlp::ProtoBuffer; use otap_df_pdata::OtlpProtoBytes; use otap_df_telemetry::logs::LogPayload; use otap_df_telemetry::metrics::MetricSetSnapshot; @@ -95,6 +97,9 @@ impl local::Receiver for InternalTelemetryReceiver { .start_periodic_telemetry(std::time::Duration::from_secs(1)) .await?; + // Reusable buffer for encoding log records + let mut buf = ProtoBuffer::with_capacity(512); + loop { tokio::select! { biased; @@ -105,7 +110,7 @@ impl local::Receiver for InternalTelemetryReceiver { Ok(NodeControlMsg::Shutdown { deadline, .. }) => { // Drain any remaining logs from channel before shutdown while let Ok(payload) = logs_receiver.try_recv() { - self.send_payload(&effect_handler, payload).await?; + self.send_payload(&effect_handler, payload, &mut buf).await?; } return Ok(TerminalState::new::<[MetricSetSnapshot; 0]>(deadline, [])); } @@ -125,7 +130,7 @@ impl local::Receiver for InternalTelemetryReceiver { result = logs_receiver.recv_async() => { match result { Ok(payload) => { - self.send_payload(&effect_handler, payload).await?; + self.send_payload(&effect_handler, payload, &mut buf).await?; } Err(_) => { // Channel closed, exit gracefully @@ -144,16 +149,21 @@ impl InternalTelemetryReceiver { &self, effect_handler: &local::EffectHandler, payload: LogPayload, + buf: &mut ProtoBuffer, ) -> Result<(), Error> { match payload { LogPayload::Singleton(record) => { let callsite = SavedCallsite::new(record.callsite_id.0.metadata()); - let bytes = - encode_export_logs_request(record, &callsite, effect_handler.resource_bytes()); + encode_export_logs_request( + buf, + record, + &callsite, + effect_handler.resource_bytes(), + ); let pdata = OtapPdata::new( Context::default(), - OtlpProtoBytes::ExportLogsRequest(bytes).into(), + OtlpProtoBytes::ExportLogsRequest(Bytes::copy_from_slice(buf.as_ref())).into(), ); effect_handler.send_message(pdata).await?; } diff --git a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs index d576aaf4de..1a8034dfcb 100644 --- a/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs +++ b/rust/otap-dataflow/crates/telemetry/src/self_tracing/encoder.rs @@ -302,26 +302,18 @@ pub fn level_to_severity_number(level: &Level) -> u8 { } } -/// Encode a single LogRecord as an OTLP ExportLogsServiceRequest. +/// Encode a single LogRecord as an OTLP ExportLogsServiceRequest also +/// known as LogsData. /// -/// This creates a complete OTLP request containing a single log record wrapped -/// in the appropriate resource/scope hierarchy. -/// -/// # Arguments -/// * `record` - The log record to encode -/// * `callsite` - The callsite metadata for the log record -/// * `resource_bytes` - Optional pre-encoded resource bytes (from `encode_resource_bytes`) -/// -/// # Returns -/// The protobuf-encoded ExportLogsServiceRequest as bytes. -#[must_use] +/// The buffer is cleared before encoding. After this call, the buffer +/// contains the complete encoded request. pub fn encode_export_logs_request( + buf: &mut ProtoBuffer, record: LogRecord, callsite: &SavedCallsite, resource_bytes: Option<&bytes::Bytes>, -) -> bytes::Bytes { - let capacity = 256 + resource_bytes.map_or(0, |b| b.len()); - let mut buf = ProtoBuffer::with_capacity(capacity); +) { + buf.clear(); // ExportLogsServiceRequest { resource_logs: [ ResourceLogs { ... } ] } proto_encode_len_delimited_unknown_size!( @@ -337,21 +329,19 @@ pub fn encode_export_logs_request( RESOURCE_LOGS_SCOPE_LOGS, // field 2: scope_logs { // ScopeLogs { log_records: [ LogRecord { ... } ] } - // Note: we skip scope (field 1) to use empty/default scope + // TODO: add scope (field 1) proto_encode_len_delimited_unknown_size!( SCOPE_LOGS_LOG_RECORDS, // field 2: log_records { - let mut encoder = DirectLogRecordEncoder::new(&mut buf); + let mut encoder = DirectLogRecordEncoder::new(buf); let _ = encoder.encode_log_record(record, callsite); }, - &mut buf + buf ); }, - &mut buf + buf ); }, - &mut buf + buf ); - - buf.into_bytes() } From cbda0ef3b36076e5157db055035d8ca1f3636ea2 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 23:06:00 -0800 Subject: [PATCH 89/92] minor --- .../crates/controller/src/error.rs | 14 --- .../crates/controller/src/lib.rs | 80 +++------------- rust/otap-dataflow/crates/engine/src/lib.rs | 13 +-- .../crates/engine/src/local/receiver.rs | 27 ++---- .../crates/engine/src/receiver.rs | 95 +++++-------------- .../otap-dataflow/crates/telemetry/src/lib.rs | 12 +++ .../crates/telemetry/src/telemetry_runtime.rs | 19 ++++ 7 files changed, 81 insertions(+), 179 deletions(-) diff --git a/rust/otap-dataflow/crates/controller/src/error.rs b/rust/otap-dataflow/crates/controller/src/error.rs index 17e5ea9031..53ca9aa299 100644 --- a/rust/otap-dataflow/crates/controller/src/error.rs +++ b/rust/otap-dataflow/crates/controller/src/error.rs @@ -96,18 +96,4 @@ pub enum Error { /// Panic message. panic_message: String, }, - - /// Configuration validation error. - #[error("Configuration error: {message}")] - ConfigurationError { - /// Error message describing the configuration problem. - message: String, - }, - - /// Internal telemetry pipeline failed to start. - #[error("Internal telemetry pipeline failed to start: {message}")] - InternalPipelineStartupFailed { - /// Error message describing why startup failed. - message: String, - }, } diff --git a/rust/otap-dataflow/crates/controller/src/lib.rs b/rust/otap-dataflow/crates/controller/src/lib.rs index a941fa36c2..13bae0f3e8 100644 --- a/rust/otap-dataflow/crates/controller/src/lib.rs +++ b/rust/otap-dataflow/crates/controller/src/lib.rs @@ -21,9 +21,7 @@ use crate::error::Error; use crate::thread_task::spawn_thread_local_task; use core_affinity::CoreId; use otap_df_config::engine::HttpAdminSettings; -use otap_df_config::pipeline::service::telemetry::logs::{ - INTERNAL_TELEMETRY_RECEIVER_URN, OutputMode, -}; +use otap_df_config::pipeline::service::telemetry::logs::OutputMode; use otap_df_config::{ PipelineGroupId, PipelineId, pipeline::PipelineConfig, @@ -44,9 +42,7 @@ use otap_df_telemetry::logs::{DirectCollector, TelemetrySetup}; use otap_df_telemetry::reporter::MetricsReporter; use otap_df_telemetry::self_tracing::ConsoleWriter; use otap_df_telemetry::telemetry_runtime::TelemetryRuntime; -use otap_df_telemetry::{ - InternalTelemetrySystem, otel_info, otel_info_span, otel_warn, resource::encode_resource_bytes, -}; +use otap_df_telemetry::{InternalTelemetrySystem, otel_info, otel_info_span, otel_warn}; use std::sync::mpsc as std_mpsc; use std::thread; @@ -98,8 +94,8 @@ impl Controller { telemetry_config .logs .validate() - .map_err(|msg| Error::ConfigurationError { - message: msg.to_string(), + .map_err(|err| Error::InvalidConfiguration { + errors: [err].into(), })?; // Create telemetry runtime according to the various options. @@ -122,9 +118,6 @@ impl Controller { None }; - // Pre-encode resource bytes once for all log batches - let resource_bytes = encode_resource_bytes(&telemetry_config.resource); - let metrics_system = InternalTelemetrySystem::new(telemetry_config); let metrics_dispatcher = metrics_system.dispatcher(); let metrics_reporter = metrics_system.reporter(); @@ -164,18 +157,11 @@ impl Controller { let log_level = telemetry_config.logs.level; // Spawn internal telemetry pipeline thread, if configured. - let internal_pipeline_thread = if let Some(internal_config) = + let _internal_pipeline_thread = if let Some(internal_config) = pipeline.extract_internal_config() { - // Create internal telemetry settings if we have a logs receiver - let internal_telemetry_settings = - telemetry_runtime - .take_logs_receiver() - .map(|rx| InternalTelemetrySettings { - target_urn: INTERNAL_TELEMETRY_RECEIVER_URN, - logs_receiver: rx, - resource_bytes: resource_bytes.clone(), - }); + // Take internal telemetry settings (logs receiver + resource bytes) if available + let internal_telemetry_settings = telemetry_runtime.take_internal_telemetry_settings(); let internal_factory = self.pipeline_factory; let internal_pipeline_id: PipelineId = "internal".into(); let internal_pipeline_key = DeployedPipelineKey { @@ -239,12 +225,9 @@ impl Controller { // Internal pipeline failed to build - propagate the error return Err(e); } - Err(_) => { + Err(err) => { // Channel closed unexpectedly - thread may have panicked - return Err(Error::InternalPipelineStartupFailed { - message: "Internal pipeline thread terminated unexpectedly during startup" - .to_string(), - }); + return Err(Error::PipelineRuntimeError{source: Box::new(err)}) } } @@ -393,42 +376,6 @@ impl Controller { } } - // Wait for internal pipeline thread if it was spawned - if let Some((_thread_name, handle)) = internal_pipeline_thread { - let internal_pipeline_id: PipelineId = "internal".into(); - let pipeline_key = DeployedPipelineKey { - pipeline_group_id: pipeline_group_id.clone(), - pipeline_id: internal_pipeline_id, - core_id: 0, // Virtual core ID for internal pipeline - }; - match handle.join() { - Ok(Ok(_)) => { - obs_evt_reporter.report(ObservedEvent::drained(pipeline_key, None)); - } - Ok(Err(e)) => { - let err_summary: ErrorSummary = error_summary_from_gen(&e); - obs_evt_reporter.report(ObservedEvent::pipeline_runtime_error( - pipeline_key.clone(), - "Internal pipeline encountered a runtime error.", - err_summary, - )); - // Log but don't fail - internal pipeline errors shouldn't bring down main - otel_warn!( - "InternalPipeline.Error", - message = "Internal telemetry pipeline failed", - error = format!("{e:?}") - ); - } - Err(e) => { - otel_warn!( - "InternalPipeline.Panic", - message = "Internal telemetry pipeline panicked", - panic_message = format!("{e:?}") - ); - } - } - } - // ToDo Add CTRL-C handler to initiate graceful shutdown of pipelines and admin server. // In this project phase (alpha), we park the main thread indefinitely. This is useful for @@ -666,14 +613,11 @@ impl Controller { ) { Ok(pipeline) => pipeline, Err(e) => { - // Signal failure to parent thread with the actual error - let error = Error::PipelineRuntimeError { + // Send error to main thread and exit; main thread will propagate it + let _ = startup_tx.send(Err(Error::PipelineRuntimeError { source: Box::new(e), - }; - let _ = startup_tx.send(Err(Error::InternalPipelineStartupFailed { - message: format!("{}", error), })); - return Err(error); + return Ok(vec![]); } }; diff --git a/rust/otap-dataflow/crates/engine/src/lib.rs b/rust/otap-dataflow/crates/engine/src/lib.rs index 8c5624bbca..13d3b5b197 100644 --- a/rust/otap-dataflow/crates/engine/src/lib.rs +++ b/rust/otap-dataflow/crates/engine/src/lib.rs @@ -25,6 +25,7 @@ use otap_df_config::{ PortName, node::{DispatchStrategy, NodeUserConfig}, pipeline::PipelineConfig, + pipeline::service::telemetry::logs::INTERNAL_TELEMETRY_RECEIVER_URN, }; use otap_df_telemetry::otel_debug; use std::borrow::Cow; @@ -292,13 +293,6 @@ impl PipelineFactory { /// the hyper-edges between them to determine the best channel type. /// - Assign channels to the source nodes and their destination nodes based on the previous /// analysis. - /// - /// # Parameters - /// - `pipeline_ctx`: The pipeline context for this build. - /// - `config`: The pipeline configuration. - /// - `logs_receiver`: Optional tuple of (URN, receiver) for internal logs channel. - /// When provided, the receiver and resource bytes are injected into any receiver - /// node matching the URN, enabling collection of logs from all threads via the channel. pub fn build( self: &PipelineFactory, pipeline_ctx: PipelineContext, @@ -351,9 +345,8 @@ impl PipelineFactory { // Inject internal telemetry settings if this is the target node if let Some(ref settings) = internal_telemetry { - if node_config.plugin_urn.as_ref() == settings.target_urn { - wrapper.set_logs_receiver(settings.logs_receiver.clone()); - wrapper.set_resource_bytes(settings.resource_bytes.clone()); + if node_config.plugin_urn.as_ref() == INTERNAL_TELEMETRY_RECEIVER_URN { + wrapper.set_internal_telemetry(settings.clone()); } } diff --git a/rust/otap-dataflow/crates/engine/src/local/receiver.rs b/rust/otap-dataflow/crates/engine/src/local/receiver.rs index 6520761f94..9da335143c 100644 --- a/rust/otap-dataflow/crates/engine/src/local/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/local/receiver.rs @@ -37,6 +37,7 @@ use crate::effect_handler::{EffectHandlerCore, TelemetryTimerCancelHandle, Timer use crate::error::{Error, ReceiverErrorKind, TypedError}; use crate::local::message::LocalSender; use crate::node::NodeId; +use crate::receiver::InternalTelemetrySettings; use crate::terminal_state::TerminalState; use async_trait::async_trait; use otap_df_channel::error::RecvError; @@ -133,10 +134,8 @@ pub struct EffectHandler { msg_senders: HashMap>, /// Cached default sender for fast access in the hot path default_sender: Option>, - /// Receiver for internal logs (for internal telemetry receiver). - logs_receiver: Option, - /// Pre-encoded resource bytes for OTLP log encoding (for internal telemetry receiver). - resource_bytes: Option, + /// Internal telemetry settings (for internal telemetry receiver). + internal_telemetry: Option, } /// Implementation for the `!Send` effect handler. @@ -166,31 +165,25 @@ impl EffectHandler { core, msg_senders, default_sender, - logs_receiver: None, - resource_bytes: None, + internal_telemetry: None, } } - /// Sets the logs receiver for internal telemetry. - pub fn set_logs_receiver( - &mut self, - logs_receiver: LogsReceiver, - resource_bytes: Option, - ) { - self.logs_receiver = Some(logs_receiver); - self.resource_bytes = resource_bytes; + /// Sets the internal telemetry settings. + pub fn set_internal_telemetry(&mut self, settings: InternalTelemetrySettings) { + self.internal_telemetry = Some(settings); } - /// Returns the logs receiver, if configured.. + /// Returns the logs receiver, if configured. #[must_use] pub fn logs_receiver(&self) -> Option<&LogsReceiver> { - self.logs_receiver.as_ref() + self.internal_telemetry.as_ref().map(|s| &s.logs_receiver) } /// Returns the pre-encoded resource bytes, if configured. #[must_use] pub fn resource_bytes(&self) -> Option<&bytes::Bytes> { - self.resource_bytes.as_ref() + self.internal_telemetry.as_ref().map(|s| &s.resource_bytes) } /// Returns the id of the receiver associated with this handler. diff --git a/rust/otap-dataflow/crates/engine/src/receiver.rs b/rust/otap-dataflow/crates/engine/src/receiver.rs index 293c2324df..5c223b7819 100644 --- a/rust/otap-dataflow/crates/engine/src/receiver.rs +++ b/rust/otap-dataflow/crates/engine/src/receiver.rs @@ -19,7 +19,6 @@ use crate::node::{Node, NodeId, NodeWithPDataSender}; use crate::shared::message::{SharedReceiver, SharedSender}; use crate::shared::receiver as shared; use crate::terminal_state::TerminalState; -use bytes::Bytes; use otap_df_channel::error::SendError; use otap_df_channel::mpsc; use otap_df_config::PortName; @@ -31,18 +30,8 @@ use std::sync::Arc; /// Type alias for the internal logs receiver channel. pub type LogsReceiver = otap_df_telemetry::LogsReceiver; -/// Runtime settings for internal telemetry injection into a receiver. -/// -/// This struct bundles the logs receiver channel and pre-encoded resource bytes -/// that should be injected into the Internal Telemetry Receiver node. -pub struct InternalTelemetrySettings { - /// The URN of the receiver to inject into. - pub target_urn: &'static str, - /// The logs receiver channel. - pub logs_receiver: LogsReceiver, - /// Pre-encoded resource bytes for OTLP log encoding. - pub resource_bytes: Bytes, -} +/// Re-export from telemetry crate for convenience. +pub use otap_df_telemetry::InternalTelemetrySettings; /// A wrapper for the receiver that allows for both `Send` and `!Send` receivers. /// @@ -67,10 +56,8 @@ pub enum ReceiverWrapper { pdata_senders: HashMap>, /// A receiver for pdata messages. pdata_receiver: Option>, - /// Receiver for internal logs (for internal telemetry receiver). - logs_receiver: Option, - /// Pre-encoded resource bytes for internal telemetry (for internal telemetry receiver). - resource_bytes: Option, + /// Internal telemetry settings (for internal telemetry receiver). + internal_telemetry: Option, }, /// A receiver with a `Send` implementation. Shared { @@ -90,10 +77,8 @@ pub enum ReceiverWrapper { pdata_senders: HashMap>, /// A receiver for pdata messages. pdata_receiver: Option>, - /// Receiver for internal logs (for internal telemetry receiver). - logs_receiver: Option, - /// Pre-encoded resource bytes for internal telemetry (for internal telemetry receiver). - resource_bytes: Option, + /// Internal telemetry settings (for internal telemetry receiver). + internal_telemetry: Option, }, } @@ -133,8 +118,7 @@ impl ReceiverWrapper { control_receiver: LocalReceiver::mpsc(control_receiver), pdata_senders: HashMap::new(), pdata_receiver: None, - logs_receiver: None, - resource_bytes: None, + internal_telemetry: None, } } @@ -160,8 +144,7 @@ impl ReceiverWrapper { control_receiver: SharedReceiver::mpsc(control_receiver), pdata_senders: HashMap::new(), pdata_receiver: None, - logs_receiver: None, - resource_bytes: None, + internal_telemetry: None, } } @@ -184,8 +167,7 @@ impl ReceiverWrapper { receiver, pdata_senders, pdata_receiver, - logs_receiver, - resource_bytes, + internal_telemetry, } => { let channel_id = control_channel_id(&node_id); let control_sender = match control_sender.into_mpsc() { @@ -219,8 +201,7 @@ impl ReceiverWrapper { control_receiver, pdata_senders, pdata_receiver, - logs_receiver, - resource_bytes, + internal_telemetry, } } ReceiverWrapper::Shared { @@ -232,8 +213,7 @@ impl ReceiverWrapper { receiver, pdata_senders, pdata_receiver, - logs_receiver, - resource_bytes, + internal_telemetry, } => { let channel_id = control_channel_id(&node_id); let control_sender = match control_sender.into_mpsc() { @@ -267,8 +247,7 @@ impl ReceiverWrapper { control_receiver, pdata_senders, pdata_receiver, - logs_receiver, - resource_bytes, + internal_telemetry, } } } @@ -288,8 +267,7 @@ impl ReceiverWrapper { control_receiver, pdata_senders, user_config, - logs_receiver, - resource_bytes, + internal_telemetry, .. }, metrics_reporter, @@ -314,8 +292,8 @@ impl ReceiverWrapper { metrics_reporter, ); // Inject internal telemetry settings if configured - if let Some(logs_rx) = logs_receiver { - effect_handler.set_logs_receiver(logs_rx, resource_bytes) + if let Some(settings) = internal_telemetry { + effect_handler.set_internal_telemetry(settings); } receiver.start(ctrl_msg_chan, effect_handler).await } @@ -409,49 +387,26 @@ impl Node for ReceiverWrapper { } impl ReceiverWrapper { - /// Set the logs receiver for internal telemetry. + /// Set the internal telemetry settings for this receiver. /// /// This is used by the Internal Telemetry Receiver to receive logs /// from all threads via the logs channel. - pub fn set_logs_receiver(&mut self, receiver: LogsReceiver) { - match self { - ReceiverWrapper::Local { logs_receiver, .. } => { - *logs_receiver = Some(receiver); - } - ReceiverWrapper::Shared { logs_receiver, .. } => { - *logs_receiver = Some(receiver); - } - } - } - - /// Take the logs receiver, if set. - pub fn take_logs_receiver(&mut self) -> Option { - match self { - ReceiverWrapper::Local { logs_receiver, .. } => logs_receiver.take(), - ReceiverWrapper::Shared { logs_receiver, .. } => logs_receiver.take(), - } - } - - /// Set the pre-encoded resource bytes for internal telemetry. - /// - /// This is used by the Internal Telemetry Receiver to include resource - /// attributes in the encoded OTLP log messages. - pub fn set_resource_bytes(&mut self, bytes: Bytes) { + pub fn set_internal_telemetry(&mut self, settings: InternalTelemetrySettings) { match self { - ReceiverWrapper::Local { resource_bytes, .. } => { - *resource_bytes = Some(bytes); + ReceiverWrapper::Local { internal_telemetry, .. } => { + *internal_telemetry = Some(settings); } - ReceiverWrapper::Shared { resource_bytes, .. } => { - *resource_bytes = Some(bytes); + ReceiverWrapper::Shared { internal_telemetry, .. } => { + *internal_telemetry = Some(settings); } } } - /// Take the pre-encoded resource bytes, if set. - pub fn take_resource_bytes(&mut self) -> Option { + /// Take the internal telemetry settings, if set. + pub fn take_internal_telemetry(&mut self) -> Option { match self { - ReceiverWrapper::Local { resource_bytes, .. } => resource_bytes.take(), - ReceiverWrapper::Shared { resource_bytes, .. } => resource_bytes.take(), + ReceiverWrapper::Local { internal_telemetry, .. } => internal_telemetry.take(), + ReceiverWrapper::Shared { internal_telemetry, .. } => internal_telemetry.take(), } } } diff --git a/rust/otap-dataflow/crates/telemetry/src/lib.rs b/rust/otap-dataflow/crates/telemetry/src/lib.rs index 8334a83dd9..487f6a64ef 100644 --- a/rust/otap-dataflow/crates/telemetry/src/lib.rs +++ b/rust/otap-dataflow/crates/telemetry/src/lib.rs @@ -73,6 +73,18 @@ pub use logs::{ DirectCollector, ImmediateLayer, LogPayload, LogsReceiver, LogsReporter, TelemetrySetup, }; +/// Runtime settings for internal telemetry injection into a receiver. +/// +/// This struct bundles the logs receiver channel and pre-encoded resource bytes +/// that should be injected into the Internal Telemetry Receiver node. +#[derive(Clone)] +pub struct InternalTelemetrySettings { + /// The logs receiver channel. + pub logs_receiver: LogsReceiver, + /// Pre-encoded resource bytes for OTLP log encoding. + pub resource_bytes: bytes::Bytes, +} + // TODO This should be #[cfg(test)], but something is preventing it from working. // The #[cfg(test)]-labeled otap_batch_processor::test_helpers::from_config // can't load this module unless I remove #[cfg(test)]! See #1304. diff --git a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs index ae1dccfafd..4f5a77162e 100644 --- a/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs +++ b/rust/otap-dataflow/crates/telemetry/src/telemetry_runtime.rs @@ -40,6 +40,8 @@ pub struct TelemetryRuntime { /// Receiver for the internal logs channel (Internal output mode only). /// The ITR node consumes this to process internal telemetry. logs_receiver: Option, + /// Pre-encoded resource bytes for OTLP log encoding. + resource_bytes: bytes::Bytes, /// Deferred global subscriber setup. Must be initialized by controller /// AFTER the internal pipeline is started (so the channel is being consumed). global_setup: Option, @@ -72,6 +74,9 @@ impl TelemetryRuntime { pub fn new(config: &TelemetryConfig) -> Result { let sdk_resource = Self::configure_resource(&config.resource); + // Pre-encode resource bytes once for internal telemetry + let resource_bytes = crate::resource::encode_resource_bytes(&config.resource); + let runtime = None; let (meter_provider, runtime) = @@ -115,6 +120,7 @@ impl TelemetryRuntime { logger_provider, logs_reporter, logs_receiver, + resource_bytes, global_setup: Some(global_setup), global_log_level: config.logs.level, }) @@ -189,6 +195,19 @@ impl TelemetryRuntime { self.logs_receiver.take() } + /// Take the internal telemetry settings for injection into the ITR node. + /// + /// Returns `Some` only when output mode is `Internal`. This bundles the + /// logs receiver channel and pre-encoded resource bytes together. + /// + /// This method takes ownership of the receiver (can only be called once). + pub fn take_internal_telemetry_settings(&mut self) -> Option { + self.logs_receiver.take().map(|rx| crate::InternalTelemetrySettings { + logs_receiver: rx, + resource_bytes: self.resource_bytes.clone(), + }) + } + /// Initialize the global tracing subscriber. /// /// This MUST be called AFTER the internal pipeline is started (when using From daa127c0644bbfcb9b6eb2b63cca8ff7a92a4f52 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 14 Jan 2026 23:42:30 -0800 Subject: [PATCH 90/92] placehold; otap bug logs with raw --- .../configs/internal-telemetry.yaml | 2 +- .../crates/otap/src/batch_processor.rs | 29 +++----- .../crates/pdata/src/otap/groups.rs | 68 ++++++++++--------- 3 files changed, 48 insertions(+), 51 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 5617e7165f..93dbea2c54 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -72,7 +72,7 @@ service: providers: global: immediate engine: immediate - internal: noop # Avoid feedback in internal pipeline + internal: raw # Avoid feedback in internal pipeline output: internal resource: service.id: 1234 diff --git a/rust/otap-dataflow/crates/otap/src/batch_processor.rs b/rust/otap-dataflow/crates/otap/src/batch_processor.rs index 8109201463..e3ba7125cb 100644 --- a/rust/otap-dataflow/crates/otap/src/batch_processor.rs +++ b/rust/otap-dataflow/crates/otap/src/batch_processor.rs @@ -73,10 +73,6 @@ pub const DEFAULT_OTLP_MIN_SIZE_BYTES: usize = 262144; /// Timeout in milliseconds for periodic flush pub const DEFAULT_TIMEOUT_MS: u64 = 200; -/// Log messages -const LOG_MSG_BATCHING_FAILED_PREFIX: &str = "OTAP batch processor: low-level batching failed for"; -const LOG_MSG_BATCHING_FAILED_SUFFIX: &str = "; dropping"; - /// How to size a batch. /// /// Note: these are not always supported. In the present code, the only @@ -514,18 +510,6 @@ fn nzu_to_nz64(nz: Option) -> Option { nz.map(|nz| NonZeroU64::new(nz.get() as u64).expect("nonzero")) } -async fn log_batching_failed( - effect: &mut local::EffectHandler, - signal: SignalType, - err: &impl std::fmt::Display, -) { - effect - .info(&format!( - "{LOG_MSG_BATCHING_FAILED_PREFIX} {signal:?}: {err}{LOG_MSG_BATCHING_FAILED_SUFFIX}" - )) - .await; -} - impl BatchProcessor { /// Parse JSON config and build the processor instance with the provided metrics set. /// This function does not wrap the processor into a ProcessorWrapper so callers can @@ -857,14 +841,21 @@ where Ok(v) => v, Err(e) => { self.metrics.batching_errors.add(count as u64); - log_batching_failed(effect, self.signal, &e).await; + otap_df_telemetry::otel_error!( + "Processor.BatchingError", + signal = format!("{:?}", self.signal), + error = e.to_string(), + ); + let str = e.to_string(); - let res = Err(str.clone()); + let res = Err(str); // In this case, we are sending failure to all the pending inputs. self.buffer .handle_partial_responses(self.signal, effect, &res, inputs.context) .await?; - return Err(EngineError::InternalError { message: str }); + // Log and drop instead of returning an error to avoid crashing the pipeline. + // The error has been logged and subscribers notified; continue processing. + return Ok(()); } }; diff --git a/rust/otap-dataflow/crates/pdata/src/otap/groups.rs b/rust/otap-dataflow/crates/pdata/src/otap/groups.rs index a985136e96..bb8f392df3 100644 --- a/rust/otap-dataflow/crates/pdata/src/otap/groups.rs +++ b/rust/otap-dataflow/crates/pdata/src/otap/groups.rs @@ -1513,7 +1513,7 @@ impl UnifiedDictionaryTypeSelector { Self { total_batch_size: 0, values_arrays: Vec::new(), - smallest_key_type: DataType::UInt16, + smallest_key_type: DataType::UInt8, // Start with smallest, upgrade as needed selected_type: None, } } @@ -1529,7 +1529,7 @@ impl UnifiedDictionaryTypeSelector { .as_any() .downcast_ref::>() .expect("can cast array to data type"); - self.smallest_key_type = DataType::UInt8; + // Keep smallest_key_type as-is (UInt8 doesn't require upgrade) dict_col.values() } DataType::UInt16 => { @@ -1537,6 +1537,8 @@ impl UnifiedDictionaryTypeSelector { .as_any() .downcast_ref::>() .expect("can cast array to data type"); + // Upgrade to UInt16 if we see a UInt16 dictionary + self.smallest_key_type = DataType::UInt16; dict_col.values() } key_type => { @@ -1562,18 +1564,28 @@ impl UnifiedDictionaryTypeSelector { return Ok(selected_type.clone()); } - // check early termination conditions - if self.total_batch_size <= u8::MAX as usize { - let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt8); - self.selected_type = Some(selected_type.clone()); - return Ok(selected_type); - } + // If we've seen a UInt16 dictionary, we must use at least UInt16 (can't downcast) + if self.smallest_key_type == DataType::UInt16 { + if self.total_batch_size <= u16::MAX as usize { + let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt16); + self.selected_type = Some(selected_type.clone()); + return Ok(selected_type); + } + // Fall through to cardinality estimation for larger sizes + } else { + // smallest_key_type is UInt8, check if we can use UInt8 + if self.total_batch_size <= u8::MAX as usize { + let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt8); + self.selected_type = Some(selected_type.clone()); + return Ok(selected_type); + } - if self.smallest_key_type == DataType::UInt16 && self.total_batch_size <= u16::MAX as usize - { - let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt16); - self.selected_type = Some(selected_type.clone()); - return Ok(selected_type); + // Check if UInt16 is sufficient + if self.total_batch_size <= u16::MAX as usize { + let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt16); + self.selected_type = Some(selected_type.clone()); + return Ok(selected_type); + } } // None of the easy cases applied so we have to iterate through some values to estimate @@ -2015,7 +2027,7 @@ fn try_unify_dictionaries( ), UnifiedDictionaryType::Native => cast(column, &values_type), } - .expect("can cast dictionary column"); + .map_err(|e| Error::Batching { source: e })?; let new_field = fields[field_index] .as_ref() @@ -2026,10 +2038,8 @@ fn try_unify_dictionaries( } } - // safety: should be safe to expect that building the record batch won't fail here. The schema - // should match the columns and the columns should all have the correct length - Ok(RecordBatch::try_new(Arc::new(Schema::new(fields)), columns) - .expect("can unify dict columns")) + RecordBatch::try_new(Arc::new(Schema::new(fields)), columns) + .map_err(|e| Error::Batching { source: e }) } fn try_discover_structs( @@ -2155,7 +2165,8 @@ fn try_unify_structs( let struct_nulls = rb_field .is_nullable() .then(|| NullBuffer::from_iter(repeat_n(false, len))); - let new_rb_column = StructArray::new(struct_fields, struct_columns, struct_nulls); + let new_rb_column = StructArray::try_new(struct_fields, struct_columns, struct_nulls) + .map_err(|e| Error::Batching { source: e })?; let new_rb_field = Field::new( rb_field.name(), new_rb_column.data_type().clone(), @@ -2167,12 +2178,8 @@ fn try_unify_structs( } } - Ok( - // Safety: here we should have an array of fields that match the types in the columns - // and all the columns are same length, so it's safe to expect here - RecordBatch::try_new(Arc::new(Schema::new(rb_fields)), rb_columns) - .expect("could not new record batch with unified struct columns"), - ) + RecordBatch::try_new(Arc::new(Schema::new(rb_fields)), rb_columns) + .map_err(|e| Error::Batching { source: e }) } fn try_unify_struct_fields( @@ -2218,10 +2225,8 @@ fn try_unify_struct_fields( let current_column = current_array.column(field_index).clone(); let new_column = match current_field.data_type() { DataType::Dictionary(_, _) => { - // safety: casting the dictionary keys should be infallible here as we're - // either casting to a native dict (which should be infallible), or we're - // casting the keys to a size we've calculated will fit - cast(¤t_column, &data_type).expect("can cast dictionary column") + cast(¤t_column, &data_type) + .map_err(|e| Error::Batching { source: e })? } _ => current_column, }; @@ -2238,11 +2243,12 @@ fn try_unify_struct_fields( } } - Ok(StructArray::new( + StructArray::try_new( Fields::from(new_fields), new_columns, current_array.nulls().cloned(), - )) + ) + .map_err(|e| Error::Batching { source: e }) } /// Note! the tests below validate internal details of the the logic above. From 86dfc317a6b98c3ce39b402a9713800660808707 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 15 Jan 2026 10:12:38 -0800 Subject: [PATCH 91/92] test config --- rust/otap-dataflow/configs/internal-telemetry.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 93dbea2c54..776d685a88 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -59,7 +59,7 @@ internal: min_size: 1000 sizer: items flush_timeout: 3s - format: otap + format: otlp console: kind: exporter plugin_urn: "urn:otel:console:exporter" From db8de3ef66896e3b328db8c84387f505f05e1784 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 15 Jan 2026 10:27:04 -0800 Subject: [PATCH 92/92] simplify --- .../configs/internal-telemetry.yaml | 4 +- .../crates/otap/src/batch_processor.rs | 29 +++++--- rust/otap-dataflow/crates/pdata/src/lib.rs | 3 - .../crates/pdata/src/otap/groups.rs | 68 +++++++++---------- .../crates/pdata/src/validation/mod.rs | 1 + 5 files changed, 53 insertions(+), 52 deletions(-) diff --git a/rust/otap-dataflow/configs/internal-telemetry.yaml b/rust/otap-dataflow/configs/internal-telemetry.yaml index 776d685a88..55d63863ce 100644 --- a/rust/otap-dataflow/configs/internal-telemetry.yaml +++ b/rust/otap-dataflow/configs/internal-telemetry.yaml @@ -14,7 +14,7 @@ nodes: dispatch_strategy: round_robin config: traffic_config: - max_signal_count: 1000 + max_signal_count: 100000 max_batch_size: 1000 signals_per_second: 1000 log_weight: 100 @@ -59,7 +59,7 @@ internal: min_size: 1000 sizer: items flush_timeout: 3s - format: otlp + format: preserve console: kind: exporter plugin_urn: "urn:otel:console:exporter" diff --git a/rust/otap-dataflow/crates/otap/src/batch_processor.rs b/rust/otap-dataflow/crates/otap/src/batch_processor.rs index e3ba7125cb..8109201463 100644 --- a/rust/otap-dataflow/crates/otap/src/batch_processor.rs +++ b/rust/otap-dataflow/crates/otap/src/batch_processor.rs @@ -73,6 +73,10 @@ pub const DEFAULT_OTLP_MIN_SIZE_BYTES: usize = 262144; /// Timeout in milliseconds for periodic flush pub const DEFAULT_TIMEOUT_MS: u64 = 200; +/// Log messages +const LOG_MSG_BATCHING_FAILED_PREFIX: &str = "OTAP batch processor: low-level batching failed for"; +const LOG_MSG_BATCHING_FAILED_SUFFIX: &str = "; dropping"; + /// How to size a batch. /// /// Note: these are not always supported. In the present code, the only @@ -510,6 +514,18 @@ fn nzu_to_nz64(nz: Option) -> Option { nz.map(|nz| NonZeroU64::new(nz.get() as u64).expect("nonzero")) } +async fn log_batching_failed( + effect: &mut local::EffectHandler, + signal: SignalType, + err: &impl std::fmt::Display, +) { + effect + .info(&format!( + "{LOG_MSG_BATCHING_FAILED_PREFIX} {signal:?}: {err}{LOG_MSG_BATCHING_FAILED_SUFFIX}" + )) + .await; +} + impl BatchProcessor { /// Parse JSON config and build the processor instance with the provided metrics set. /// This function does not wrap the processor into a ProcessorWrapper so callers can @@ -841,21 +857,14 @@ where Ok(v) => v, Err(e) => { self.metrics.batching_errors.add(count as u64); - otap_df_telemetry::otel_error!( - "Processor.BatchingError", - signal = format!("{:?}", self.signal), - error = e.to_string(), - ); - + log_batching_failed(effect, self.signal, &e).await; let str = e.to_string(); - let res = Err(str); + let res = Err(str.clone()); // In this case, we are sending failure to all the pending inputs. self.buffer .handle_partial_responses(self.signal, effect, &res, inputs.context) .await?; - // Log and drop instead of returning an error to avoid crashing the pipeline. - // The error has been logged and subscribers notified; continue processing. - return Ok(()); + return Err(EngineError::InternalError { message: str }); } }; diff --git a/rust/otap-dataflow/crates/pdata/src/lib.rs b/rust/otap-dataflow/crates/pdata/src/lib.rs index 30046807c9..b023deb05b 100644 --- a/rust/otap-dataflow/crates/pdata/src/lib.rs +++ b/rust/otap-dataflow/crates/pdata/src/lib.rs @@ -38,9 +38,6 @@ mod validation; pub use decode::decoder::Consumer; pub use encode::producer::Producer; -/// Re-export prost for proto message encoding/decoding. -pub use prost; - /// TraceID identifier of a Trace #[derive(Eq, PartialEq, Clone, Copy, Debug, Default)] pub struct TraceID([u8; 16]); diff --git a/rust/otap-dataflow/crates/pdata/src/otap/groups.rs b/rust/otap-dataflow/crates/pdata/src/otap/groups.rs index bb8f392df3..a985136e96 100644 --- a/rust/otap-dataflow/crates/pdata/src/otap/groups.rs +++ b/rust/otap-dataflow/crates/pdata/src/otap/groups.rs @@ -1513,7 +1513,7 @@ impl UnifiedDictionaryTypeSelector { Self { total_batch_size: 0, values_arrays: Vec::new(), - smallest_key_type: DataType::UInt8, // Start with smallest, upgrade as needed + smallest_key_type: DataType::UInt16, selected_type: None, } } @@ -1529,7 +1529,7 @@ impl UnifiedDictionaryTypeSelector { .as_any() .downcast_ref::>() .expect("can cast array to data type"); - // Keep smallest_key_type as-is (UInt8 doesn't require upgrade) + self.smallest_key_type = DataType::UInt8; dict_col.values() } DataType::UInt16 => { @@ -1537,8 +1537,6 @@ impl UnifiedDictionaryTypeSelector { .as_any() .downcast_ref::>() .expect("can cast array to data type"); - // Upgrade to UInt16 if we see a UInt16 dictionary - self.smallest_key_type = DataType::UInt16; dict_col.values() } key_type => { @@ -1564,28 +1562,18 @@ impl UnifiedDictionaryTypeSelector { return Ok(selected_type.clone()); } - // If we've seen a UInt16 dictionary, we must use at least UInt16 (can't downcast) - if self.smallest_key_type == DataType::UInt16 { - if self.total_batch_size <= u16::MAX as usize { - let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt16); - self.selected_type = Some(selected_type.clone()); - return Ok(selected_type); - } - // Fall through to cardinality estimation for larger sizes - } else { - // smallest_key_type is UInt8, check if we can use UInt8 - if self.total_batch_size <= u8::MAX as usize { - let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt8); - self.selected_type = Some(selected_type.clone()); - return Ok(selected_type); - } + // check early termination conditions + if self.total_batch_size <= u8::MAX as usize { + let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt8); + self.selected_type = Some(selected_type.clone()); + return Ok(selected_type); + } - // Check if UInt16 is sufficient - if self.total_batch_size <= u16::MAX as usize { - let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt16); - self.selected_type = Some(selected_type.clone()); - return Ok(selected_type); - } + if self.smallest_key_type == DataType::UInt16 && self.total_batch_size <= u16::MAX as usize + { + let selected_type = UnifiedDictionaryType::Dictionary(DataType::UInt16); + self.selected_type = Some(selected_type.clone()); + return Ok(selected_type); } // None of the easy cases applied so we have to iterate through some values to estimate @@ -2027,7 +2015,7 @@ fn try_unify_dictionaries( ), UnifiedDictionaryType::Native => cast(column, &values_type), } - .map_err(|e| Error::Batching { source: e })?; + .expect("can cast dictionary column"); let new_field = fields[field_index] .as_ref() @@ -2038,8 +2026,10 @@ fn try_unify_dictionaries( } } - RecordBatch::try_new(Arc::new(Schema::new(fields)), columns) - .map_err(|e| Error::Batching { source: e }) + // safety: should be safe to expect that building the record batch won't fail here. The schema + // should match the columns and the columns should all have the correct length + Ok(RecordBatch::try_new(Arc::new(Schema::new(fields)), columns) + .expect("can unify dict columns")) } fn try_discover_structs( @@ -2165,8 +2155,7 @@ fn try_unify_structs( let struct_nulls = rb_field .is_nullable() .then(|| NullBuffer::from_iter(repeat_n(false, len))); - let new_rb_column = StructArray::try_new(struct_fields, struct_columns, struct_nulls) - .map_err(|e| Error::Batching { source: e })?; + let new_rb_column = StructArray::new(struct_fields, struct_columns, struct_nulls); let new_rb_field = Field::new( rb_field.name(), new_rb_column.data_type().clone(), @@ -2178,8 +2167,12 @@ fn try_unify_structs( } } - RecordBatch::try_new(Arc::new(Schema::new(rb_fields)), rb_columns) - .map_err(|e| Error::Batching { source: e }) + Ok( + // Safety: here we should have an array of fields that match the types in the columns + // and all the columns are same length, so it's safe to expect here + RecordBatch::try_new(Arc::new(Schema::new(rb_fields)), rb_columns) + .expect("could not new record batch with unified struct columns"), + ) } fn try_unify_struct_fields( @@ -2225,8 +2218,10 @@ fn try_unify_struct_fields( let current_column = current_array.column(field_index).clone(); let new_column = match current_field.data_type() { DataType::Dictionary(_, _) => { - cast(¤t_column, &data_type) - .map_err(|e| Error::Batching { source: e })? + // safety: casting the dictionary keys should be infallible here as we're + // either casting to a native dict (which should be infallible), or we're + // casting the keys to a size we've calculated will fit + cast(¤t_column, &data_type).expect("can cast dictionary column") } _ => current_column, }; @@ -2243,12 +2238,11 @@ fn try_unify_struct_fields( } } - StructArray::try_new( + Ok(StructArray::new( Fields::from(new_fields), new_columns, current_array.nulls().cloned(), - ) - .map_err(|e| Error::Batching { source: e }) + )) } /// Note! the tests below validate internal details of the the logic above. diff --git a/rust/otap-dataflow/crates/pdata/src/validation/mod.rs b/rust/otap-dataflow/crates/pdata/src/validation/mod.rs index 0e7433b3ae..8b54626a61 100644 --- a/rust/otap-dataflow/crates/pdata/src/validation/mod.rs +++ b/rust/otap-dataflow/crates/pdata/src/validation/mod.rs @@ -6,6 +6,7 @@ // Allow test-friendly patterns in this test-only module #![allow(clippy::unwrap_used)] +#![allow(clippy::print_stderr)] mod collector; mod error;