diff --git a/.github/workflows/hedwig.yml b/.github/workflows/hedwig.yml index 7134baf..8090144 100644 --- a/.github/workflows/hedwig.yml +++ b/.github/workflows/hedwig.yml @@ -12,11 +12,9 @@ on: jobs: lint: - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest, macOS-latest] timeout-minutes: 10 steps: - uses: actions/checkout@v2 @@ -31,7 +29,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: clippy - args: -- -Dclippy::correctness -Dclippy::complexity -Dclippy::perf -Dunsafe_code -Dunreachable_pub -Dunused + args: --all-features -- -Dclippy::correctness -Dclippy::complexity -Dclippy::perf -Dunsafe_code -Dunreachable_pub -Dunused doc: runs-on: ubuntu-latest @@ -50,14 +48,14 @@ jobs: command: doc args: --all-features --manifest-path=Cargo.toml env: - RUSTDOCFLAGS: --cfg docsrs -Dmissing_docs -Dbroken_intra_doc_links + RUSTDOCFLAGS: --cfg docsrs -Dmissing_docs -Drustdoc::broken_intra_doc_links test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - rust_toolchain: [nightly, stable, 1.49.0] + rust_toolchain: [nightly, stable, 1.53.0] os: [ubuntu-latest, windows-latest, macOS-latest] timeout-minutes: 20 steps: diff --git a/Cargo.toml b/Cargo.toml index 377fb1a..eb6b879 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "hedwig" # TODO: When bumping to next major version, make sure to clean up the MRV and lints we allow in CI. -version = "4.1.0" +version = "5.0.0" authors = [ "Aniruddha Maru ", - "Simonas Kazlauskas " + "Simonas Kazlauskas ", + "Renar Narubin ", ] edition = "2018" repository = "https://github.com/standard-ai/hedwig-rust.git" @@ -19,50 +20,49 @@ categories = ["asynchronous", "web-programming"] maintenance = { status = "actively-developed" } [features] -default = ["consume", "sink"] +default = [] -# Whether publishing/consuming is enabled -publish = [] -consume = ["async-trait", "either"] - -# Publishers -google = ["base64", "yup-oauth2", "hyper", "http", "serde_json", "serde", "serde/derive", "uuid/serde"] +# Backends +google = ["ya-gcp", "tracing", "parking_lot"] +mock = ["async-channel", "parking_lot"] # Validators json-schema = ["valico", "serde_json", "serde"] protobuf = ["prost"] -# Convenience API -sink = ["futures-util/sink", "either", "publish"] - [[example]] -name = "publish" -required-features = ["google", "json-schema"] +name = "googlepubsub" +required-features = ["google", "protobuf"] [dependencies] +async-trait = { version = "0.1" } bytes = "1" -futures-util = { version = "0.3", features = ["std"], default-features = false } +either = { version = "1", features = ["use_std"], default-features = false } +futures-util = { version = "0.3.17", features = ["std", "sink"], default-features = false } pin-project = "1" thiserror = { version = "1", default-features = false } url = { version = "2", default-features = false } uuid = { version = "^0.8", features = ["v4"], default-features = false } -async-trait = { version = "0.1", optional = true } -either = { version = "1", optional = true, features = ["use_std"], default-features = false } +async-channel = { version = "1.6", optional = true } serde = { version = "^1.0", optional = true, default-features = false } serde_json = { version = "^1", features = ["std"], optional = true, default-features = false } +parking_lot = { version = "0.11", optional = true } +prost = { version = "0.8", optional = true, features = ["std"], default-features = false } +tracing = { version = "0.1.26", optional = true } valico = { version = "^3.2", optional = true, default-features = false } -base64 = { version = "^0.13", optional = true, default-features = false } -http = { version = "^0.2", optional = true, default-features = false } -hyper = { version = "^0.14.4", optional = true, features = ["client", "stream"], default-features = false } -yup-oauth2 = { version = "5.1", optional = true, features = ["hyper-rustls"], default-features = false } -prost = { version = "0.7", optional = true, features = ["std"], default-features = false } +ya-gcp = { version = "0.6.3", features = ["pubsub"], optional = true } [dev-dependencies] -hyper-tls = "0.5.0" -prost = { version = "0.7", features = ["std", "prost-derive"] } +async-channel = { version = "1.6" } +futures-channel = "0.3.17" +parking_lot = { version = "0.11" } +prost = { version = "0.8", features = ["std", "prost-derive"] } tokio = { version = "1", features = ["macros", "rt"] } +tonic = "0.5" serde = { version = "1", features = ["derive"] } +ya-gcp = { version = "0.6.3", features = ["pubsub", "emulators"] } +structopt = "0.3" [package.metadata.docs.rs] all-features = true diff --git a/examples/googlepubsub.rs b/examples/googlepubsub.rs new file mode 100644 index 0000000..055541f --- /dev/null +++ b/examples/googlepubsub.rs @@ -0,0 +1,238 @@ +//! An example of ingesting messages from a PubSub subscription, applying a +//! transformation, then submitting those transformations to another PubSub topic. + +use futures_util::{SinkExt, StreamExt, TryFutureExt}; +use hedwig::{ + googlepubsub::{ + AuthFlow, ClientBuilder, ClientBuilderConfig, PubSubConfig, PubSubMessage, PublishError, + ServiceAccountAuth, StreamSubscriptionConfig, SubscriptionConfig, SubscriptionName, + TopicConfig, TopicName, + }, + validators, Consumer, DecodableMessage, EncodableMessage, Headers, Publisher, +}; +use std::{error::Error as StdError, time::SystemTime}; +use structopt::StructOpt; + +const USER_CREATED_TOPIC: &str = "user.created"; +const USER_UPDATED_TOPIC: &str = "user.updated"; + +/// The input data, representing some user being created with the given name +#[derive(PartialEq, Eq, prost::Message)] +struct UserCreatedMessage { + #[prost(string, tag = "1")] + name: String, +} + +impl EncodableMessage for UserCreatedMessage { + type Error = validators::ProstValidatorError; + type Validator = validators::ProstValidator; + fn topic(&self) -> hedwig::Topic { + USER_CREATED_TOPIC.into() + } + fn encode(&self, validator: &Self::Validator) -> Result { + Ok(validator.validate( + uuid::Uuid::new_v4(), + SystemTime::now(), + "user.created/1.0", + Headers::new(), + self, + )?) + } +} + +impl DecodableMessage for UserCreatedMessage { + type Error = validators::ProstDecodeError; + type Decoder = + validators::ProstDecoder>; + + fn decode(msg: hedwig::ValidatedMessage, decoder: &Self::Decoder) -> Result { + decoder.decode(msg) + } +} + +/// The output data, where the given user has now been assigned an ID and some metadata +#[derive(PartialEq, Eq, prost::Message)] +struct UserUpdatedMessage { + #[prost(string, tag = "1")] + name: String, + + #[prost(int64, tag = "2")] + id: i64, + + #[prost(string, tag = "3")] + metadata: String, +} + +/// The output message will carry an ack token from the input message, to ack when the output is +/// successfully published, or nack on failure +#[derive(Debug)] +struct TransformedMessage(PubSubMessage); + +impl EncodableMessage for TransformedMessage { + type Error = validators::ProstValidatorError; + type Validator = validators::ProstValidator; + + fn topic(&self) -> hedwig::Topic { + USER_UPDATED_TOPIC.into() + } + + fn encode(&self, validator: &Self::Validator) -> Result { + Ok(validator.validate( + uuid::Uuid::new_v4(), + SystemTime::now(), + "user.updated/1.0", + Headers::new(), + &self.0.message, + )?) + } +} + +#[derive(Debug, StructOpt)] +struct Args { + /// The name of the pubsub project + #[structopt(long)] + project_name: String, +} + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<(), Box> { + let args = Args::from_args(); + + println!("Building PubSub clients"); + + let builder = ClientBuilder::new( + ClientBuilderConfig::new().auth_flow(AuthFlow::ServiceAccount(ServiceAccountAuth::EnvVar)), + PubSubConfig::default(), + ) + .await?; + + let input_topic_name = TopicName::new(USER_CREATED_TOPIC); + let subscription_name = SubscriptionName::new("user-metadata-updaters"); + + let output_topic_name = TopicName::new(USER_UPDATED_TOPIC); + const APP_NAME: &str = "user-metadata-updater"; + + let mut publisher_client = builder + .build_publisher(&args.project_name, APP_NAME) + .await?; + let mut consumer_client = builder.build_consumer(&args.project_name, APP_NAME).await?; + + for topic_name in [&input_topic_name, &output_topic_name] { + println!("Creating topic {:?}", topic_name); + + publisher_client + .create_topic(TopicConfig { + name: topic_name.clone(), + ..TopicConfig::default() + }) + .await?; + } + + println!("Creating subscription {:?}", &subscription_name); + + consumer_client + .create_subscription(SubscriptionConfig { + topic: input_topic_name.clone(), + name: subscription_name.clone(), + ..SubscriptionConfig::default() + }) + .await?; + + println!( + "Synthesizing input messages for topic {:?}", + &input_topic_name + ); + + { + let validator = validators::ProstValidator::new(); + let mut input_sink = + Publisher::::publish_sink(publisher_client.publisher(), validator); + + for i in 1..=10 { + let message = UserCreatedMessage { + name: format!("Example Name #{}", i), + }; + + input_sink.feed(message).await?; + } + input_sink.flush().await?; + } + + println!("Ingesting input messages, applying transformations, and publishing to destination"); + + let mut read_stream = consumer_client + .stream_subscription( + subscription_name.clone(), + StreamSubscriptionConfig::default(), + ) + .consume::(hedwig::validators::ProstDecoder::new( + hedwig::validators::prost::ExactSchemaMatcher::new("user.created/1.0"), + )); + + let mut output_sink = Publisher::::publish_sink_with_responses( + publisher_client.publisher(), + validators::ProstValidator::new(), + futures_util::sink::unfold((), |_, message: TransformedMessage| async move { + // if the output is successfully sent, ack the input to mark it as processed + message.0.ack().await.map(|_success| ()) + }), + ); + + for i in 1..=10 { + let PubSubMessage { ack_token, message } = read_stream + .next() + .await + .expect("stream should have 10 elements")?; + + assert_eq!(&message.name, &format!("Example Name #{}", i)); + + let transformed = TransformedMessage(PubSubMessage { + ack_token, + message: UserUpdatedMessage { + name: message.name, + id: random_id(), + metadata: "some metadata".into(), + }, + }); + + output_sink + .feed(transformed) + .or_else(|publish_error| async move { + // if publishing fails, nack the failed messages to allow later retries + Err(match publish_error { + PublishError::Publish { cause, messages } => { + for failed_transform in messages { + failed_transform.0.nack().await?; + } + Box::::from(cause) + } + err => Box::::from(err), + }) + }) + .await? + } + output_sink.flush().await?; + + println!("All messages matched and published successfully!"); + + println!("Deleting subscription {:?}", &subscription_name); + + consumer_client + .delete_subscription(subscription_name) + .await?; + + for topic_name in [input_topic_name, output_topic_name] { + println!("Deleting topic {:?}", &topic_name); + + publisher_client.delete_topic(topic_name).await?; + } + + println!("Done"); + + Ok(()) +} + +fn random_id() -> i64 { + 4 // chosen by fair dice roll. + // guaranteed to be random. +} diff --git a/examples/publish.rs b/examples/publish.rs deleted file mode 100644 index e1f1b87..0000000 --- a/examples/publish.rs +++ /dev/null @@ -1,122 +0,0 @@ -use futures_util::stream::StreamExt; -use hedwig::{ - publish::{EncodableMessage, GooglePubSubPublisher, Publisher}, - Headers, -}; -use std::{env, time::SystemTime}; - -#[derive(serde::Serialize)] -struct UserCreatedMessage { - #[serde(skip)] - uuid: uuid::Uuid, - user_id: String, -} - -impl<'a> EncodableMessage for &'a UserCreatedMessage { - type Error = hedwig::validators::JsonSchemaValidatorError; - type Validator = hedwig::validators::JsonSchemaValidator; - fn topic(&self) -> hedwig::Topic { - "user.created".into() - } - fn encode(self, validator: &Self::Validator) -> Result { - Ok(validator - .validate( - self.uuid, - SystemTime::now(), - "https://hedwig.corp/schema#/schemas/user.created/1.0", - Headers::new(), - self, - ) - .unwrap()) - } -} - -const PUBLISHER: &str = "myapp"; - -const SCHEMA: &str = r#"{ - "$id": "https://hedwig.corp/schema", - "$schema": "https://json-schema.org/draft-04/schema#", - "description": "Example Schema", - "schemas": { - "user.created": { - "1.*": { - "description": "A new user was created", - "type": "object", - "x-versions": [ - "1.0" - ], - "required": [ - "user_id" - ], - "properties": { - "user_id": { - "$ref": "https://hedwig.corp/schema#/definitions/UserId/1.0" - } - } - } - } - }, - "definitions": { - "UserId": { - "1.0": { - "type": "string" - } - } - } -}"#; - -async fn run() -> Result<(), Box> { - let google_project = - env::var("GOOGLE_CLOUD_PROJECT").expect("env var GOOGLE_CLOUD_PROJECT is required"); - let google_credentials = env::var("GOOGLE_APPLICATION_CREDENTIALS") - .expect("env var GOOGLE_APPLICATION_CREDENTIALS is required"); - let secret = yup_oauth2::read_service_account_key(google_credentials) - .await - .expect("$GOOGLE_APPLICATION_CREDENTIALS is not a valid service account key"); - - let client = hyper::Client::builder().build(hyper_tls::HttpsConnector::new()); - let authenticator = yup_oauth2::ServiceAccountAuthenticator::builder(secret) - .hyper_client(client.clone()) - .build() - .await - .expect("could not create an authenticator"); - - let publisher = GooglePubSubPublisher::new( - PUBLISHER.into(), - google_project.into(), - client, - authenticator, - ); - let validator = hedwig::validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let message = UserCreatedMessage { - uuid: uuid::Uuid::new_v4(), - user_id: "U_123".into(), - }; - let topic = EncodableMessage::topic(&&message); - let validated = message.encode(&validator).unwrap(); - let mut publish = publisher.publish(topic, [validated].iter()); - while let Some(r) = publish.next().await { - println!("publish result: {:?}", r?); - } - - Ok(()) -} - -fn main() { - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .expect("runtime builds"); - match rt.block_on(run()) { - Ok(_) => std::process::exit(0), - Err(e) => { - eprintln!("error: {}", e); - let mut source = e.source(); - while let Some(src) = source { - eprintln!(" caused by: {}", src); - source = src.source(); - } - std::process::exit(1); - } - } -} diff --git a/src/backends/googlepubsub/consumer.rs b/src/backends/googlepubsub/consumer.rs new file mode 100644 index 0000000..98bf9ac --- /dev/null +++ b/src/backends/googlepubsub/consumer.rs @@ -0,0 +1,597 @@ +//! A [`Consumer`](crate::Consumer) implementation for Google's [PubSub][0] service +//! +//! [0]: https://cloud.google.com/pubsub/ + +use crate::{Headers, ValidatedMessage}; +use async_trait::async_trait; +use futures_util::stream; +use pin_project::pin_project; +use std::{ + borrow::Cow, + fmt::Display, + ops::Bound, + pin::Pin, + str::FromStr, + task::{Context, Poll}, + time::{Duration, SystemTime}, +}; +use tracing::debug; +use uuid::Uuid; +use ya_gcp::pubsub; + +use super::{ + retry_policy, AcknowledgeError, BoxError, Connect, DefaultConnector, MakeConnection, + ModifyAcknowledgeError, PubSubError, StatusCodeSet, StreamSubscriptionConfig, TopicName, Uri, +}; + +/// A PubSub subscription name. +/// +/// This will be used to internally construct the expected +/// `projects/{project}/subscriptions/hedwig-{queue}-{subscription_name}` format for API calls +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SubscriptionName<'s>(Cow<'s, str>); + +impl<'s> SubscriptionName<'s> { + /// Create a new `SubscriptionName` + pub fn new(name: impl Into>) -> Self { + Self(name.into()) + } + + /// Construct a full project and subscription name with this name + fn into_project_subscription_name( + self, + project_name: impl Display, + queue_name: impl Display, + ) -> pubsub::ProjectSubscriptionName { + pubsub::ProjectSubscriptionName::new( + project_name, + std::format_args!( + "hedwig-{queue}-{subscription}", + queue = queue_name, + subscription = self.0 + ), + ) + } +} + +/// A client through which PubSub consuming operations can be performed. +/// +/// This includes managing subscriptions and reading data from subscriptions. Created using +/// [`build_consumer`](super::ClientBuilder::build_consumer) +#[derive(Debug, Clone)] +pub struct ConsumerClient { + client: pubsub::SubscriberClient, + project: String, + queue: String, +} + +impl ConsumerClient { + pub(super) fn new(client: pubsub::SubscriberClient, project: String, queue: String) -> Self { + ConsumerClient { + client, + project, + queue, + } + } + + fn project(&self) -> &str { + &self.project + } + + fn queue(&self) -> &str { + &self.queue + } +} + +impl ConsumerClient +where + C: MakeConnection + Connect + Clone + Send + Sync + 'static, + C::Connection: Unpin + Send + 'static, + C::Future: Send + 'static, + BoxError: From, +{ + /// Create a new PubSub subscription + /// + /// See the GCP documentation on subscriptions [here](https://cloud.google.com/pubsub/docs/subscriber) + pub async fn create_subscription( + &mut self, + config: SubscriptionConfig<'_>, + ) -> Result<(), PubSubError> { + let subscription = SubscriptionConfig::into_subscription(config, &*self); + + self.client.create_subscription(subscription).await?; + + Ok(()) + } + + /// Delete an existing PubSub subscription. + /// + /// See the GCP documentation on subscriptions [here](https://cloud.google.com/pubsub/docs/subscriber) + pub async fn delete_subscription( + &mut self, + subscription: SubscriptionName<'_>, + ) -> Result<(), PubSubError> { + let subscription = subscription + .into_project_subscription_name(self.project(), self.queue()) + .into(); + + self.client + .delete_subscription(pubsub::api::DeleteSubscriptionRequest { subscription }) + .await?; + + Ok(()) + } + + /// Connect to PubSub and start streaming messages from the given subscription + pub fn stream_subscription( + &mut self, + subscription: SubscriptionName<'_>, + stream_config: StreamSubscriptionConfig, + ) -> PubSubStream { + let subscription = + subscription.into_project_subscription_name(self.project(), self.queue()); + + PubSubStream(self.client.stream_subscription(subscription, stream_config)) + } + + /// Seeks messages from the given timestamp. + /// It marks as acknowledged all the messages prior to the timestamp, and as + /// not acknowledged the messages after the timestamp. + pub async fn seek( + &mut self, + subscription: SubscriptionName<'_>, + timestamp: pubsub::api::Timestamp, + ) -> Result<(), PubSubError> { + let request = pubsub::api::SeekRequest { + subscription: subscription + .into_project_subscription_name(self.project(), self.queue()) + .into(), + target: Some(pubsub::api::seek_request::Target::Time(timestamp)), + }; + self.client.seek(request).await?; + Ok(()) + } + + // TODO list_subscriptions (paginated, nontrivial) + // TODO update_subscriptions (field mask necessary?) + // TODO get_subscription (impl From for SubscriptionConfig) + // TODO snapshots? +} + +match_fields! { + pubsub::api::Subscription => + + /// Configuration describing a PubSub subscription. + // TODO incorporate standard_config + #[derive(Debug, Clone)] + pub struct SubscriptionConfig<'s> { + pub name: SubscriptionName<'s>, + pub topic: TopicName<'s>, + pub ack_deadline_seconds: u16, + pub retain_acked_messages: bool, + pub message_retention_duration: Option, + pub labels: std::collections::HashMap, + pub enable_message_ordering: bool, + pub expiration_policy: Option, + pub filter: String, + pub dead_letter_policy: Option, + pub retry_policy: Option, + + @except: + push_config, + detached, + topic_message_retention_duration, + } +} + +impl<'s> SubscriptionConfig<'s> { + fn into_subscription(self, client: &ConsumerClient) -> pubsub::api::Subscription { + pubsub::api::Subscription { + name: self + .name + .into_project_subscription_name(client.project(), client.queue()) + .into(), + topic: self.topic.into_project_topic_name(client.project()).into(), + ack_deadline_seconds: self.ack_deadline_seconds.into(), + retain_acked_messages: self.retain_acked_messages, + message_retention_duration: self.message_retention_duration, + labels: self.labels, + enable_message_ordering: self.enable_message_ordering, + expiration_policy: self.expiration_policy, + filter: self.filter, + dead_letter_policy: self.dead_letter_policy, + retry_policy: self.retry_policy, + push_config: None, // push delivery isn't used, it's streaming pull + detached: false, // set by the server on gets/listing + topic_message_retention_duration: None, // Output only, set by the server + } + } +} + +// TODO replace with a builder? +impl<'s> Default for SubscriptionConfig<'s> { + fn default() -> Self { + Self { + name: SubscriptionName::new(String::new()), + topic: TopicName::new(String::new()), + ack_deadline_seconds: 0, + retain_acked_messages: false, + message_retention_duration: None, + labels: std::collections::HashMap::default(), + enable_message_ordering: false, + expiration_policy: None, + filter: "".into(), + dead_letter_policy: None, + retry_policy: None, + } + } +} + +// TODO match_fields! on ExpirationPolicy, DeadLetterPolicy, RetryPolicy + +/// A message received from PubSub. +/// +/// This includes the message itself, and an [`AcknowledgeToken`](crate::AcknowledgeToken) used to +/// inform the message service when this message has been processed. +#[cfg_attr(docsrs, doc(cfg(feature = "google")))] +pub type PubSubMessage = crate::consumer::AcknowledgeableMessage; + +/// Errors encountered while streaming messages from PubSub +#[derive(Debug, thiserror::Error)] +#[cfg_attr(docsrs, doc(cfg(feature = "google")))] +pub enum PubSubStreamError { + /// An error from the underlying stream + #[error(transparent)] + Stream(#[from] PubSubError), + + /// An error from a missing hedwig attribute + #[error("missing expected attribute: {key}")] + MissingAttribute { + /// the missing attribute + key: &'static str, + }, + + /// An error from a hedwig attribute with an invalid value + #[error("invalid attribute value for {key}: {invalid_value}")] + InvalidAttribute { + /// the invalid attribute + key: &'static str, + /// the invalid value + invalid_value: String, + /// the error describing the invalidity + #[source] + source: BoxError, + }, +} + +#[async_trait] +impl crate::consumer::AcknowledgeToken for pubsub::AcknowledgeToken { + type AckError = AcknowledgeError; + type ModifyError = ModifyAcknowledgeError; + type NackError = AcknowledgeError; + + async fn ack(self) -> Result<(), Self::AckError> { + self.ack().await + } + + async fn nack(self) -> Result<(), Self::NackError> { + self.nack().await + } + + async fn modify_deadline(&mut self, seconds: u32) -> Result<(), Self::ModifyError> { + self.modify_deadline(seconds).await + } +} + +/// A stream of messages from a subscription in PubSub. +/// +/// Created by [`ConsumerClient::stream_subscription`] +#[pin_project] +#[cfg_attr(docsrs, doc(cfg(feature = "google")))] +pub struct PubSubStream>( + #[pin] pubsub::StreamSubscription, +); + +impl PubSubStream { + /// Set the [`RetryPolicy`](retry_policy::RetryPolicy) to use for this streaming subscription. + /// + /// The stream will be reconnected if the policy indicates that an encountered error should be + /// retried + // Because `poll_next` requires `Pin<&mut Self>`, this function cannot be called after the + // stream has started because it moves `self`. That means that the retry policy can only be + // changed before the polling starts, and is fixed from that point on + pub fn with_retry_policy(self, retry_policy: R) -> PubSubStream + where + R: retry_policy::RetryPolicy<(), PubSubError>, + { + PubSubStream(self.0.with_retry_policy(retry_policy)) + } +} + +impl stream::Stream for PubSubStream +where + C: MakeConnection + Connect + Clone + Send + Sync + 'static, + C::Connection: Unpin + Send + 'static, + C::Future: Send + 'static, + BoxError: From, +{ + type Item = Result, PubSubStreamError>; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.project().0.poll_next(cx).map(|opt| { + opt.map(|res| { + let (ack_token, message) = res?; + Ok(PubSubMessage { + ack_token, + message: pubsub_to_hedwig(message)?, + }) + }) + }) + } +} + +impl crate::consumer::Consumer for PubSubStream +where + C: MakeConnection + Connect + Clone + Send + Sync + 'static, + C::Connection: Unpin + Send + 'static, + C::Future: Send + 'static, + BoxError: From, +{ + type AckToken = pubsub::AcknowledgeToken; + type Error = PubSubStreamError; + type Stream = PubSubStream; + + fn stream(self) -> Self::Stream { + self + } +} + +/// The namespace of all the hedwig-internal attributes applied to messages +// the backtick '`' is one greater than the underscore '_' in ascii, which makes it the next +// greatest for Ord. Having this as the excluded upper bound makes the range contain every +// string prefixed by "hedwig_". +// +// This uses explicit Bounds instead of the Range syntax because impl RangeBounds for Range<&T> +// requires T: Sized for some reason +const HEDWIG_NAME_RANGE: (Bound<&str>, Bound<&str>) = + (Bound::Included("hedwig_"), Bound::Excluded("hedwig`")); + +/// convert a pubsub message into a hedwig message +fn pubsub_to_hedwig( + msg: pubsub::api::PubsubMessage, +) -> Result { + let mut headers = msg.attributes; + + // extract the hedwig attributes from the attribute map. + // any remaining attributes were ones inserted by the user + fn take_attr( + map: &mut Headers, + key: &'static str, + parse: F, + ) -> Result + where + F: FnOnce(String) -> Result, + { + let value = map + .remove(key) + .ok_or(PubSubStreamError::MissingAttribute { key })?; + + parse(value).map_err( + |(invalid_value, source)| PubSubStreamError::InvalidAttribute { + key, + invalid_value, + source, + }, + ) + } + + let id = take_attr(&mut headers, crate::HEDWIG_ID, |string| { + Uuid::from_str(&string).map_err(|e| (string, BoxError::from(e))) + })?; + + let timestamp = take_attr(&mut headers, crate::HEDWIG_MESSAGE_TIMESTAMP, |string| { + // match instead of map_err to keep ownership of string + let millis_since_epoch = match u64::from_str(&string) { + Err(err) => return Err((string, BoxError::from(err))), + Ok(t) => t, + }; + SystemTime::UNIX_EPOCH + .checked_add(Duration::from_millis(millis_since_epoch)) + .ok_or_else(|| { + ( + string, + BoxError::from(format!( + "time stamp {} is too large for SystemTime", + millis_since_epoch + )), + ) + }) + })?; + let schema = take_attr(&mut headers, crate::HEDWIG_SCHEMA, Ok::)?; + + // these attributes we don't actually use, but we check for their existence as defensive + // validation, and remove them so that the user doesn't see them among the headers + take_attr(&mut headers, crate::HEDWIG_PUBLISHER, |_| Ok(()))?; + take_attr(&mut headers, crate::HEDWIG_FORMAT_VERSION, |_| Ok(()))?; + + // for forwards compatibility with future hedwig formats, remove any other "hedwig_*" + // attributes that might exist, so that the user doesn't witness them. + headers + .range::(HEDWIG_NAME_RANGE) + .map(|(k, _v)| k.clone()) // clone b/c there isn't a remove_range, and we can't borrow + remove + .collect::>() + .into_iter() + .for_each(|k| { + debug!(message = "removing unknown hedwig attribute", key = &k[..]); + headers.remove(&k); + }); + + Ok(ValidatedMessage::new( + id, timestamp, schema, headers, msg.data, + )) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + HEDWIG_FORMAT_VERSION, HEDWIG_ID, HEDWIG_MESSAGE_TIMESTAMP, HEDWIG_PUBLISHER, HEDWIG_SCHEMA, + }; + use pubsub::api::PubsubMessage; + use std::collections::BTreeMap; + + #[derive(Debug, Clone)] + struct EqValidatedMessage(ValidatedMessage); + + impl std::ops::Deref for EqValidatedMessage { + type Target = ValidatedMessage; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl PartialEq for EqValidatedMessage { + fn eq(&self, other: &ValidatedMessage) -> bool { + self.uuid() == other.uuid() + && self.timestamp() == other.timestamp() + && self.schema() == other.schema() + && self.headers() == other.headers() + && self.data() == other.data() + } + } + + macro_rules! string_btree { + ($($key:expr => $val:expr),* $(,)?) => { + { + #[allow(unused_mut)] + let mut map = BTreeMap::new(); + $( + map.insert(($key).to_string(), ($val).to_string()); + )* + map + } + } + } + + /// Check that the data in headers is deserialized appropriately + #[test] + fn headers_parsed() { + let user_attrs = string_btree! { + "aaa" => "aaa_value", + "zzz" => "zzz_value", + "some_longer_string" => "the value for the longer string", + }; + + let hedwig_attrs = string_btree! { + HEDWIG_ID => Uuid::nil(), + HEDWIG_MESSAGE_TIMESTAMP => 1000, + HEDWIG_SCHEMA => "my-test-schema", + HEDWIG_PUBLISHER => "my-test-publisher", + HEDWIG_FORMAT_VERSION => "1", + }; + + let data = "foobar"; + + let mut attributes = user_attrs.clone(); + attributes.extend(hedwig_attrs); + + let message = PubsubMessage { + data: data.into(), + attributes, + message_id: String::from("some_unique_id"), + publish_time: Some(pubsub::api::Timestamp { + seconds: 15, + nanos: 42, + }), + ordering_key: String::new(), + }; + + let validated_message = pubsub_to_hedwig(message).unwrap(); + + assert_eq!( + EqValidatedMessage(ValidatedMessage::new( + Uuid::nil(), + SystemTime::UNIX_EPOCH + Duration::from_millis(1000), + "my-test-schema", + user_attrs, + data + )), + validated_message + ); + } + + /// Check that parsing headers fails if a hedwig attribute is missing + #[test] + fn headers_error_on_missing() { + let full_hedwig_attrs = string_btree! { + HEDWIG_ID => Uuid::nil(), + HEDWIG_MESSAGE_TIMESTAMP => 1000, + HEDWIG_SCHEMA => "my-test-schema", + HEDWIG_PUBLISHER => "my-test-publisher", + HEDWIG_FORMAT_VERSION => "1", + }; + + for &missing_header in [ + HEDWIG_ID, + HEDWIG_MESSAGE_TIMESTAMP, + HEDWIG_SCHEMA, + HEDWIG_PUBLISHER, + HEDWIG_FORMAT_VERSION, + ] + .iter() + { + let mut attributes = full_hedwig_attrs.clone(); + attributes.remove(missing_header); + + let res = pubsub_to_hedwig(PubsubMessage { + attributes, + ..PubsubMessage::default() + }); + + match res { + Err(PubSubStreamError::MissingAttribute { key }) => assert_eq!(key, missing_header), + _ => panic!( + "result did not fail on missing attribute {}: {:?}", + missing_header, res + ), + } + } + } + + /// Check that unknown hedwig headers are removed from the user-visible message, under the + /// assumption that they are from some hedwig format change + #[test] + fn forward_compat_headers_removed() { + let hedwig_attrs = string_btree! { + HEDWIG_ID => Uuid::nil(), + HEDWIG_MESSAGE_TIMESTAMP => 1000, + HEDWIG_SCHEMA => "my-test-schema", + HEDWIG_PUBLISHER => "my-test-publisher", + HEDWIG_FORMAT_VERSION => "1", + "hedwig_some_new_flag" => "boom!", + "hedwig_another_change_from_the_future" => "kablam!", + }; + + let user_attrs = string_btree! { + "abc" => "123", + "foo" => "bar", + "aaaaaaaaaaaaaaaaaaaaaaaaa" => "bbbbbbbbbbbbbbbbbbbb", + // hedwig attributes are restricted to the "hedwig_" prefix by producers. It should + // then be valid for a user to have the word "hedwig" prefixed for their own keys + "hedwig-key-but-with-hyphens" => "assumes the restricted format always uses underscores", + "hedwigAsAPrefixToSomeString" => "camelCase", + }; + + let mut attributes = user_attrs.clone(); + attributes.extend(hedwig_attrs); + + let validated_message = pubsub_to_hedwig(PubsubMessage { + attributes, + ..PubsubMessage::default() + }) + .unwrap(); + + assert_eq!(&user_attrs, validated_message.headers()); + } +} diff --git a/src/backends/googlepubsub/mod.rs b/src/backends/googlepubsub/mod.rs new file mode 100644 index 0000000..a59f5be --- /dev/null +++ b/src/backends/googlepubsub/mod.rs @@ -0,0 +1,191 @@ +//! Adapters for using GCP's PubSub as a message service for hedwig + +#![macro_use] + +use std::{borrow::Cow, fmt::Display}; + +pub use ya_gcp::{ + grpc::StatusCodeSet, + pubsub::{ + AcknowledgeError, AcknowledgeToken, BuildError, Error as PubSubError, MakeConnection, + ModifyAcknowledgeError, PubSubConfig, SinkError, StreamSubscriptionConfig, Uri, + DEFAULT_RETRY_CODES, + }, + retry_policy, AuthFlow, ClientBuilderConfig, Connect, CreateBuilderError, DefaultConnector, + ServiceAccountAuth, +}; + +type BoxError = Box; + +/// Create a new struct with the same fields as another struct, with the annotated exceptions +/// +/// This is used to create a narrowed-down API type, with irrelevant fields removed and other fields +/// replaced with richer types. +macro_rules! match_fields { + ( + $target:path => + + $(#[$struct_attr:meta])* + pub struct $struct_name:ident $(<$struct_generics:tt>)? { + $( + $(#[$field_attr:meta])* + pub $field_name:ident : $field_type:ty, + )*$(,)? + + // fields which exist in the target but not in the struct. + // used to ensure names are listed exhaustively + @except: + $( + $target_except_field:ident, + )*$(,)? + } + ) => { + $(#[$struct_attr])* + // nested cfg_attr prevents older compilers from parsing the new doc = EXPR syntax + #[cfg_attr(docsrs, cfg_attr(docsrs, + doc = "", // newline + doc = concat!("This is a more ergonomic wrapper over [`", stringify!($target), "`]") + ))] + #[cfg_attr(not(docsrs), allow(missing_docs))] + pub struct $struct_name $(<$struct_generics>)? { + $( + #[cfg_attr(docsrs, cfg_attr(docsrs, doc = concat!( + "See [`", stringify!($field_name), "`]", + "(", stringify!($target), "::", stringify!($field_name), ")" + )))] + $(#[$field_attr])* + pub $field_name : $field_type, + )* + } + + impl$(<$struct_generics>)? $struct_name $(<$struct_generics>)? { + const _MATCH_CHECK: () = { + match None { + Some($target { + $( + $field_name: _, + )* + $( + $target_except_field: _, + )* + }) => {}, + None => {} + }; + }; + } + }; +} + +mod consumer; +mod publisher; + +pub use consumer::*; +pub use publisher::*; + +/// A PubSub topic name. +/// +/// This will be used to internally construct the expected +/// `projects/{project}/topics/hedwig-{topic}` format for API calls +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TopicName<'s>(Cow<'s, str>); + +impl<'s> TopicName<'s> { + /// Create a new `TopicName` + pub fn new(name: impl Into>) -> Self { + Self(name.into()) + } + + /// Construct a full project and topic name with this name + fn into_project_topic_name( + self, + project_name: impl Display, + ) -> ya_gcp::pubsub::ProjectTopicName { + ya_gcp::pubsub::ProjectTopicName::new( + project_name, + std::format_args!("hedwig-{topic}", topic = self.0), + ) + } +} + +/// A builder used to create [`ConsumerClient`] and [`PublisherClient`] instances +/// +/// Note that the builder is not consumed when creating clients, and many clients can be built +/// using the same builder. This may allow some resource re-use across the clients +pub struct ClientBuilder { + inner: ya_gcp::ClientBuilder, + pubsub_config: PubSubConfig, +} + +impl ClientBuilder { + /// Create a new client builder using the default HTTPS connector based on the crate's + /// enabled features + pub async fn new( + config: ClientBuilderConfig, + pubsub_config: PubSubConfig, + ) -> Result { + Ok(ClientBuilder { + inner: ya_gcp::ClientBuilder::new(config).await?, + pubsub_config, + }) + } +} + +impl ClientBuilder +where + C: Connect + Clone + Send + Sync + 'static, +{ + /// Create a new client builder with the given connector. + pub async fn with_connector( + config: ClientBuilderConfig, + pubsub_config: PubSubConfig, + connector: C, + ) -> Result { + Ok(ClientBuilder { + inner: ya_gcp::ClientBuilder::with_connector(config, connector).await?, + pubsub_config, + }) + } +} + +impl ClientBuilder +where + C: MakeConnection + Connect + Clone + Send + Sync + 'static, + C::Connection: Unpin + Send + 'static, + C::Future: Send + 'static, + BoxError: From, +{ + /// Create a new [`ConsumerClient`] for consuming messages from PubSub subscriptions within the + /// given project, identified by the given queue name. + pub async fn build_consumer( + &self, + project: impl Into, + queue: impl Into, + ) -> Result, BuildError> { + Ok(ConsumerClient::new( + self.inner + .build_pubsub_subscriber(self.pubsub_config.clone()) + .await?, + project.into(), + queue.into(), + )) + } + + /// Create a new [`PublisherClient`] for publishing messages to PubSub topics within the given + /// project. + /// + /// Each published message will have an attribute labelling the publisher with the given + /// identifier. + pub async fn build_publisher( + &self, + project: impl Into, + publisher_id: impl Into, + ) -> Result, BuildError> { + Ok(PublisherClient::new( + self.inner + .build_pubsub_publisher(self.pubsub_config.clone()) + .await?, + project.into(), + publisher_id.into(), + )) + } +} diff --git a/src/backends/googlepubsub/publisher.rs b/src/backends/googlepubsub/publisher.rs new file mode 100644 index 0000000..633b49a --- /dev/null +++ b/src/backends/googlepubsub/publisher.rs @@ -0,0 +1,795 @@ +use crate::{EncodableMessage, Topic, ValidatedMessage}; +use futures_util::{ + ready, + sink::{Sink, SinkExt}, +}; +use pin_project::pin_project; +use std::{ + collections::{BTreeMap, VecDeque}, + error::Error as StdError, + fmt, + pin::Pin, + task::{Context, Poll}, + time::SystemTime, +}; +use ya_gcp::pubsub; + +use super::{ + retry_policy::{ + exponential_backoff::Config as ExponentialBackoffConfig, ExponentialBackoff, + RetryOperation, RetryPolicy, + }, + BoxError, Connect, DefaultConnector, MakeConnection, PubSubError, StatusCodeSet, TopicName, + Uri, +}; + +use message_translate::{TopicSink, TopicSinkError}; + +/// A thread-safe analog to Rc> +/// +/// There are a few components in the publishing sink which are shared between layers and +/// exclusively borrowed, but not in a way the compiler can recognize. These can't use references +/// because the layers need ownership (some are passed to other libs like into gcp). In principle +/// they could use raw pointers, aided by Pin preventing moves; but the unsafety is unnerving, so +/// checked sharing is used instead. +/// +/// Note the element is never actually borrowed across threads, or even across `await` points; all +/// calls happen in a single call stack of `poll_*` functions. Send + Sync are required to ensure +/// the containing top-level sink can be held across awaits (or actually sent) without an unsafe +/// Send+Sync declaration +#[derive(Debug)] +struct Shared(std::sync::Arc>); + +impl Shared { + fn new(t: T) -> Self { + Self(std::sync::Arc::new(parking_lot::Mutex::new(t))) + } + + fn borrow_mut(&self) -> impl std::ops::DerefMut + '_ { + self.0 + .try_lock() + .unwrap_or_else(|| panic!("unexpected overlapping borrow of shared state")) + } +} + +impl Clone for Shared { + fn clone(&self) -> Self { + Self(std::sync::Arc::clone(&self.0)) + } +} + +/// A client through which PubSub publishing operations can be performed. +/// +/// This includes managing topics and writing data to topics. Created using +/// [`build_publisher`](super::ClientBuilder::build_publisher) +#[derive(Debug, Clone)] +pub struct PublisherClient { + client: pubsub::PublisherClient, + project: String, + identifier: String, +} + +impl PublisherClient { + pub(super) fn new( + client: pubsub::PublisherClient, + project: String, + identifier: String, + ) -> Self { + PublisherClient { + client, + project, + identifier, + } + } + + fn project(&self) -> &str { + &self.project + } + + fn identifier(&self) -> &str { + &self.identifier + } +} + +/// Errors which can occur while publishing a message +#[derive(Debug)] +pub enum PublishError { + /// An error from publishing + Publish { + /// The cause of the error + cause: PubSubError, + + /// The batch of messages which failed to be published + messages: Vec, + }, + + /// An error from submitting a successfully published message to the user-provided response + /// sink + Response(E), + + /// An error from validating the given message + InvalidMessage { + /// The cause of the error + cause: M::Error, + + /// The message which failed to be validated + message: M, + }, +} + +impl fmt::Display for PublishError +where + M::Error: fmt::Display, + E: fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PublishError::Publish { messages, .. } => f.write_fmt(format_args!( + "could not publish {} messages", + messages.len() + )), + PublishError::Response(..) => f.write_str( + "could not forward response for a successfully published message to the sink", + ), + PublishError::InvalidMessage { .. } => f.write_str("could not validate message"), + } + } +} + +impl StdError for PublishError +where + M: fmt::Debug, + M::Error: StdError + 'static, + E: StdError + 'static, +{ + fn source(&self) -> Option<&(dyn StdError + 'static)> { + match self { + PublishError::Publish { cause, .. } => Some(cause as &_), + PublishError::Response(cause) => Some(cause as &_), + PublishError::InvalidMessage { cause, .. } => Some(cause as &_), + } + } +} + +impl From> for PublishError { + fn from(from: TopicSinkError) -> Self { + match from { + TopicSinkError::Publish(cause, messages) => PublishError::Publish { cause, messages }, + TopicSinkError::Response(err) => PublishError::Response(err), + } + } +} + +impl PublisherClient +where + C: MakeConnection + ya_gcp::Connect + Clone + Send + Sync + 'static, + C::Connection: Unpin + Send + 'static, + C::Future: Send + 'static, + BoxError: From, +{ + /// Create a new PubSub topic. + /// + /// See the GCP documentation on topics [here](https://cloud.google.com/pubsub/docs/admin) + pub async fn create_topic(&mut self, topic: TopicConfig<'_>) -> Result<(), PubSubError> { + let topic = topic.into_topic(self); + self.client.create_topic(topic).await?; + + Ok(()) + } + + /// Delete an existing PubSub topic. + /// + /// See the GCP documentation on topics [here](https://cloud.google.com/pubsub/docs/admin) + pub async fn delete_topic(&mut self, topic: TopicName<'_>) -> Result<(), PubSubError> { + let topic = topic.into_project_topic_name(self.project()).into(); + + self.client + .delete_topic(pubsub::api::DeleteTopicRequest { topic }) + .await?; + + Ok(()) + } + + /// Create a a new [`Publisher`] instance for publishing messages. + /// + /// Multiple publishers can be created using the same client, for example to use different + /// validators. They may share some underlying resources for greater efficiency than creating + /// multiple clients. + pub fn publisher(&self) -> Publisher { + Publisher { + client: self.clone(), + retry_policy: ExponentialBackoff::new( + pubsub::DEFAULT_RETRY_CODES, + ExponentialBackoffConfig::default(), + ), + } + } + + // TODO list_topics (paginated, nontrivial) + // TODO list_topic_subscriptions (same) + // TODO list_topic_snapshots (same) + // TODO update_topic + // TODO get_topic + // TODO detach_subscription +} + +/// A publisher for sending messages to PubSub topics +pub struct Publisher> { + client: PublisherClient, + retry_policy: R, +} + +impl Publisher { + /// Set the retry policy for this `Publisher`. + /// + /// If a publishing operation encounters an error, the given retry policy will be consulted to + /// possibly retry the operation, or otherwise propagate the error to the caller. + pub fn with_retry_policy(self, retry_policy: R) -> Publisher + where + R: RetryPolicy<[M], PubSubError> + Clone, + M: EncodableMessage, + { + Publisher { + retry_policy, + client: self.client, + } + } +} + +impl crate::publisher::Publisher for Publisher +where + C: Connect + Clone + Send + Sync + 'static, + M: EncodableMessage + Send + 'static, + S: Sink + Send + 'static, + R: RetryPolicy<[M], PubSubError> + Clone + 'static, + R::RetryOp: Send + 'static, + >::Sleep: Send + 'static, +{ + type PublishError = PublishError; + type PublishSink = PublishSink; + + fn publish_sink_with_responses( + self, + validator: M::Validator, + response_sink: S, + ) -> Self::PublishSink { + PublishSink { + topic_sinks: BTreeMap::new(), + validator, + buffer: None, + client: self.client, + retry_policy: self.retry_policy, + response_sink: Shared::new(Box::pin(response_sink)), + _p: std::marker::PhantomPinned, + } + } +} + +match_fields! { + pubsub::api::Topic => + + /// Configuration describing a PubSub topic. + #[derive(Debug, Clone)] + pub struct TopicConfig<'s> { + pub name: TopicName<'s>, + pub labels: std::collections::HashMap, + pub message_storage_policy: Option, + pub kms_key_name: String, + pub message_retention_duration: Option, + + @except: + schema_settings, + satisfies_pzs, + } +} + +impl<'s> TopicConfig<'s> { + fn into_topic(self, client: &PublisherClient) -> pubsub::api::Topic { + pubsub::api::Topic { + name: self.name.into_project_topic_name(client.project()).into(), + labels: self.labels, + message_storage_policy: self.message_storage_policy, + kms_key_name: self.kms_key_name, + message_retention_duration: self.message_retention_duration, + + schema_settings: None, // documented as experimental, and hedwig enforces schemas anyway + satisfies_pzs: false, // documented as reserved (currently unused) + } + } +} + +impl<'s> Default for TopicConfig<'s> { + fn default() -> Self { + Self { + name: TopicName::new(String::new()), + labels: std::collections::HashMap::new(), + message_storage_policy: None, + kms_key_name: String::new(), + message_retention_duration: None, + } + } +} + +/// A sink for publishing messages to pubsub topics. +/// +/// Created by [`Publisher::publish_sink`](crate::Publisher::publish_sink) +#[pin_project] +pub struct PublishSink, R> { + // The underlying sinks operate on a single topic. The incoming messages could have varying + // topics, so this map holds a lazily initialized set of underlying sinks + #[allow(clippy::type_complexity)] // mostly from Pin+Box + topic_sinks: BTreeMap>>>, + + // The validator for the messages + validator: M::Validator, + + // In order to know which sink to check in `poll_ready`, we need a message's topic; but we + // won't know the topic until looking at the element in `start_send`, which contractually must + // always be preceded by a `poll_ready`. + // + // Work around this chicken-egg problem by deferring readiness checking by 1 message. + // The first `poll_ready` will always be Ready, and the first value will be seeded in this + // buffer. Subsequent `poll_ready`s will check the *previous* message in the buffer, and try to + // send it to its corresponding underlying sink + buffer: Option, + + // Because the sinks will be generated lazily, we need a client, retry policy, and + // destination sink to create new per-topic sinks + client: PublisherClient, + retry_policy: R, + + // The sink where user messages are sent once published, to inform the user that the message + // was successfully sent. + // + // Boxing this sink isn't strictly necessary because it's already in an Arc which does half the + // job of preventing moves by putting it on the heap; unfortunately there's no pin projection + // through mutexes, so we can't mark it pinned without some unsafe shenanigans. If we go + // unsafe, we should ditch the Arc sharing altogether and pass pointers, which should be mostly + // fine due to the outer pinning + response_sink: Shared>>, + + // enable future !Unpin without breaking changes + _p: std::marker::PhantomPinned, +} + +impl Sink for PublishSink +where + C: Connect + Clone + Send + Sync + 'static, + M: EncodableMessage + Send + 'static, + S: Sink + Send + 'static, + R: RetryPolicy<[M], PubSubError> + Clone + 'static, + R::RetryOp: Send + 'static, + >::Sleep: Send + 'static, +{ + type Error = PublishError; + + fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let this = self.project(); + let client = this.client; + + // Given the buffered (topic, message), find the sink corresponding to the topic + match this.buffer.as_ref() { + Some(msg) => { + let topic = msg.topic(); + // look up the sink by topic. If a sink doesn't exist, initialize one + let sink = { + let retry_policy = this.retry_policy; + let response_sink = this.response_sink; + this.topic_sinks.entry(topic.clone()).or_insert_with(|| { + Box::pin(TopicSink::new( + client.client.publish_topic_sink( + TopicName::new(topic.as_ref()) + .into_project_topic_name(client.project()), + ), + retry_policy.clone(), + Shared::clone(response_sink), + )) + }) + }; + + // poll the sink to see if it's ready + ready!(sink.poll_ready_unpin(cx))?; + + // only take out of the buffer when we know the sink is ready + let message = this.buffer.take().expect("already check Some"); + + // validate the message with the validator + let validated = match message.encode(this.validator) { + Ok(validated_msg) => validated_msg, + Err(err) => { + return Poll::Ready(Err(PublishError::InvalidMessage { + cause: err, + message, + })) + } + }; + + // convert the validated message to pubsub's message type + let api_message = match hedwig_to_pubsub(validated, client.identifier()) { + Ok(api_message) => api_message, + Err(err) => { + return Poll::Ready(Err(PublishError::Publish { + cause: err, + messages: vec![message], + })) + } + }; + + // now send the message to the sink + sink.start_send_unpin((message, api_message))?; + Poll::Ready(Ok(())) + } + + // The buffer could be empty on the first ever poll_ready or after explicit flushes. + // In that case the sink is immediately ready for an element + None => Poll::Ready(Ok(())), + } + } + + fn start_send(self: Pin<&mut Self>, item: M) -> Result<(), Self::Error> { + // try to put the item into the buffer. + // If an item is already in the buffer, the user must not have called `poll_ready` + if self.project().buffer.replace(item).is_some() { + panic!("each `start_send` must be preceded by a successful call to `poll_ready`") + } + + Ok(()) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + // first send any element in the buffer by checking readiness + ready!(self.as_mut().poll_ready(cx))?; + + // then flush all of the underlying sinks + let mut all_ready = true; + for sink in self.topic_sinks.values_mut() { + all_ready &= sink.poll_flush_unpin(cx)?.is_ready(); + } + + if all_ready { + Poll::Ready(Ok(())) + } else { + Poll::Pending + } + } + + fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + // first initiate a flush as required by the Sink contract + ready!(self.as_mut().poll_flush(cx))?; + + // then close all of the underlying sinks + let mut all_ready = true; + for sink in self.topic_sinks.values_mut() { + all_ready &= sink.poll_close_unpin(cx)?.is_ready(); + } + + if all_ready { + Poll::Ready(Ok(())) + } else { + Poll::Pending + } + } +} + +/// convert a hedwig message into a pubsub message +fn hedwig_to_pubsub( + mut msg: ValidatedMessage, + publisher_id: &str, +) -> Result { + let mut attributes = std::mem::take(msg.headers_mut()); + + if let Some(invalid_key) = attributes.keys().find(|key| key.starts_with("hedwig_")) { + return Err(PubSubError::invalid_argument(format!( + "keys starting with \"hedwig_\" are reserved: {}", + invalid_key + ))); + } + + attributes.insert(crate::HEDWIG_ID.into(), msg.uuid().to_string()); + attributes.insert( + crate::HEDWIG_MESSAGE_TIMESTAMP.into(), + msg.timestamp() + .duration_since(SystemTime::UNIX_EPOCH) + .map_err(|_| { + PubSubError::invalid_argument(format!( + "timestamp should be after UNIX epoch: {:?}", + msg.timestamp() + )) + })? + .as_millis() + .to_string(), + ); + attributes.insert(crate::HEDWIG_SCHEMA.into(), msg.schema().into()); + attributes.insert(crate::HEDWIG_PUBLISHER.into(), publisher_id.into()); + attributes.insert(crate::HEDWIG_FORMAT_VERSION.into(), "1.0".into()); + + Ok(pubsub::api::PubsubMessage { + data: msg.into_data(), + attributes, + ..pubsub::api::PubsubMessage::default() + }) +} + +/// Translation mechanisms for converting between user messages and api messages. +/// +/// While the user submits messages of arbitrary type `M` to the publisher, that information is +/// transformed (first by the generic validator, then a pubsub-specific conversion) into a concrete +/// type (`pubsub::api::PubsubMessage`) to actually communicate with the remote service. Some +/// operations then require user input based on messages in the api type (for example, checking +/// whether a retry is necessary) but the api type is meaningless to the user, they only understand +/// `M`. +/// +/// This module provides several means of translating from the api type back into the type `M` +/// (without explicit de-transformation). +mod message_translate { + use super::*; + + /// A buffer which will hold un-encoded user messages while the encoded version of the message is + /// published. After publishing (or on encountering an error) the encoded version is mapped back to + /// this user message so that success (or errors) can be reported in terms of the user's familiar + /// type, rather than an opaque encoded/serialized version. + /// + /// The actual mapping mechanism is ordering-based synchronization. This buffer will + /// maintain a queue of `M` which is implicitly in the same order as the pubsub library's + /// internal buffer; one `M` will be pushed here for every corresponding api message pushed to + /// the lib's buffer, and conversely popped when the corresponding api messages are published + /// in order. This relies on the pubsub lib's documented preservation of FIFO order. + /// + /// This ordering is also preserved after errors. The pubsub sink will report errors along with + /// the affected messages; this buffer will remove user messages for each error-carried message + /// to relay back to the user. + struct TranslateBuffer { + buf: VecDeque, + } + + impl TranslateBuffer { + /// The maximum number of messages that could be inserted before a publisher flushes. + /// + /// This is defined by the pubsub service + const PUBLISH_BUFFER_SIZE: usize = 1000; + + fn new() -> Self { + Self { + buf: VecDeque::with_capacity(Self::PUBLISH_BUFFER_SIZE), + } + } + + fn add_message(&mut self, user_message: M) { + self.buf.push_back(user_message) + } + + fn remove_success(&mut self, _api_message: pubsub::api::PubsubMessage) -> M { + self.buf + .pop_front() + .expect("translate buffer should be in sync with publish buffer") + } + + fn remove_errors( + &mut self, + error: pubsub::PublishError, + ) -> (PubSubError, impl Iterator + '_) { + (error.source, self.buf.drain(0..error.messages.len())) + } + + fn view_messages(&mut self, api_messages: &[pubsub::api::PubsubMessage]) -> &[M] { + // When a publishing request fails, a retry may be attempted; that retry policy will + // check on the request payload and the user may choose to retry or not. That payload + // needs to be translated back into user messages for retry assessment. + // + // Ideally we could return a subrange of the vecdeque, but the retry policy API + // provides the user with `&T` of the failed request, so we can only return a reference + // and not an instantiated struct. We _can_ get slices of the underlying queue, + // but a vecdeque might be split into two segments so it wouldn't be a single reference. + // + // This call moves elements within the queue such that it all exists in a contiguous + // segment (while preserving order); then we can return just a single slice. This only + // happens on publishing errors, so all the moves aren't in the common path and + // probably won't be a big problem in practice. + // + // There is a crate https://crates.io/crates/slice-deque that can create a slice + // without this data movement (by using clever virtual memory tricks). That's an ideal + // candidate for this use case (long-lived buffer, ideally contiguous) but its + // (un)safety makes me nervous, whereas std's vecdeque has more eyes on it + &self.buf.make_contiguous()[0..api_messages.len()] + } + } + + /// A wrapper over the pubsub sink which holds the user message buffer and provides message + /// translation for the response sink and retry policy + #[pin_project] + pub(super) struct TopicSink, R> { + user_messages: Shared>, + #[pin] + pubsub_sink: pubsub::PublishTopicSink, TranslateSink>, + } + + pub(super) enum TopicSinkError { + Publish(PubSubError, Vec), + Response(E), + } + + impl, R> TopicSink + where + S: Sink, + R: RetryPolicy<[M], PubSubError>, + { + pub(super) fn new( + pubsub_sink: pubsub::PublishTopicSink, + retry_policy: R, + response_sink: Shared>>, + ) -> Self { + let user_messages = Shared::new(TranslateBuffer::new()); + Self { + user_messages: Shared::clone(&user_messages), + pubsub_sink: pubsub_sink + .with_retry_policy(TranslateRetryPolicy { + user_messages: Shared::clone(&user_messages), + user_retry: retry_policy, + }) + .with_response_sink(TranslateSink { + user_messages, + user_sink: response_sink, + }), + } + } + + /// Translate the error type of a poll_x function into one holding user messages instead of + /// api messages + fn translate_poll_fn( + self: Pin<&mut Self>, + poll_fn: F, + cx: &mut Context, + ) -> Poll>> + where + F: FnOnce( + Pin< + &mut pubsub::PublishTopicSink< + C, + TranslateRetryPolicy, + TranslateSink, + >, + >, + &mut Context, + ) -> Poll>>, + { + let this = self.project(); + let user_messages = this.user_messages; + + poll_fn(this.pubsub_sink, cx).map_err(|err| match err { + pubsub::SinkError::Publish(publish_error) => { + let mut user_messages = user_messages.borrow_mut(); + let (source, messages) = user_messages.remove_errors(publish_error); + TopicSinkError::Publish(source, messages.collect()) + } + pubsub::SinkError::Response(response_error) => { + TopicSinkError::Response(response_error) + } + }) + } + } + + impl, R> Sink<(M, pubsub::api::PubsubMessage)> for TopicSink + where + C: Connect + Clone + Send + Sync + 'static, + R: RetryPolicy<[M], PubSubError> + 'static, + R::RetryOp: Send + 'static, + >::Sleep: Send + 'static, + S: Sink + Send + 'static, + M: EncodableMessage + Send + 'static, + { + type Error = TopicSinkError; + + fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.translate_poll_fn(pubsub::PublishTopicSink::poll_ready, cx) + } + + fn start_send( + self: Pin<&mut Self>, + (user_message, api_message): (M, pubsub::api::PubsubMessage), + ) -> Result<(), Self::Error> { + let this = self.project(); + + // try to send the api message to the sink. Only if successful will it be added to the + // buffer; if it fails some argument check, the buffer does not need to be popped for + // translation + match this.pubsub_sink.start_send(api_message) { + Ok(()) => { + this.user_messages.borrow_mut().add_message(user_message); + Ok(()) + } + Err(err) => Err(match err { + pubsub::SinkError::Publish(publish_error) => { + assert_eq!(publish_error.messages.len(), 1); + TopicSinkError::Publish(publish_error.source, vec![user_message]) + } + pubsub::SinkError::Response(_) => { + unreachable!("response sink should not be used in start_send") + } + }), + } + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.translate_poll_fn(pubsub::PublishTopicSink::poll_flush, cx) + } + fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.translate_poll_fn(pubsub::PublishTopicSink::poll_close, cx) + } + } + + /// A retry policy which can be used by pubsub to retry api messages, but will provide the user + /// with user messages to assess retry-worthyness + struct TranslateRetryPolicy { + user_messages: Shared>, + user_retry: R, + } + + impl RetryPolicy for TranslateRetryPolicy + where + R: RetryPolicy<[M], PubSubError>, + { + type RetryOp = TranslateRetryOp; + + fn new_operation(&mut self) -> Self::RetryOp { + TranslateRetryOp { + user_messages: Shared::clone(&self.user_messages), + user_retry_op: self.user_retry.new_operation(), + } + } + } + + struct TranslateRetryOp { + user_messages: Shared>, + user_retry_op: O, + } + + impl RetryOperation for TranslateRetryOp + where + O: RetryOperation<[M], PubSubError>, + { + type Sleep = O::Sleep; + + fn check_retry( + &mut self, + failed_value: &pubsub::api::PublishRequest, + error: &PubSubError, + ) -> Option { + // Given a failed request with api messages, translate it into user messages + let mut user_messages = self.user_messages.borrow_mut(); + let failed_messages = user_messages.view_messages(&failed_value.messages); + + self.user_retry_op.check_retry(failed_messages, error) + } + } + + /// A sink used to translate successful publishing responses from api messages back to user + /// messages for consumption by the user's response sink + struct TranslateSink> { + user_messages: Shared>, + user_sink: Shared>>, + } + + impl Sink for TranslateSink + where + S: Sink, + { + type Error = S::Error; + + fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.user_sink.borrow_mut().poll_ready_unpin(cx) + } + fn start_send( + self: Pin<&mut Self>, + api_message: pubsub::api::PubsubMessage, + ) -> Result<(), Self::Error> { + let user_message = self.user_messages.borrow_mut().remove_success(api_message); + self.user_sink.borrow_mut().start_send_unpin(user_message) + } + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.user_sink.borrow_mut().poll_flush_unpin(cx) + } + fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.user_sink.borrow_mut().poll_close_unpin(cx) + } + } +} diff --git a/src/backends/mock.rs b/src/backends/mock.rs new file mode 100644 index 0000000..06c2d2d --- /dev/null +++ b/src/backends/mock.rs @@ -0,0 +1,295 @@ +//! In-memory messaging implementations, meant to imitate distributed messaging services for test +//! purposes. +//! +//! See [`MockPublisher`] for an entry point to the mock system. + +use crate::{consumer::AcknowledgeableMessage, EncodableMessage, Topic, ValidatedMessage}; +use async_channel as mpmc; +use futures_util::{ + sink, + stream::{self, StreamExt}, +}; +use parking_lot::Mutex; +use pin_project::pin_project; +use std::{ + collections::BTreeMap, + error::Error as StdError, + pin::Pin, + sync::Arc, + task::{Context, Poll}, +}; + +/// Errors originating from mock publisher and consumer operations +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct Error { + /// The underlying source of the error + pub cause: Box, +} + +impl Error { + fn from(from: E) -> Self + where + Box: From, + { + Self { cause: from.into() } + } +} + +type Topics = BTreeMap; +type Subscriptions = BTreeMap>; + +/// An in-memory publisher. +/// +/// Consumers for the published data can be created using the `new_consumer` method. +/// +/// Messages are published to particular [`Topics`](crate::Topic). Each topic may have multiple +/// `Subscriptions`, and every message for a topic will be sent to each of its subscriptions. A +/// subscription, in turn, may have multiple consumers; consumers will take messages from the +/// subscription on a first-polled-first-served basis. +/// +/// This publisher can be cloned, allowing multiple publishers to send messages to the same set of +/// topics and subscriptions. Any consumer created with `new_consumer` will receive all on-topic +/// and on-subscription messages from all the associated publishers, regardless of whether the +/// consumer was created from a cloned instance. +#[derive(Debug, Clone)] +pub struct MockPublisher { + topics: Arc>, +} + +impl MockPublisher { + /// Create a new `MockPublisher` + pub fn new() -> Self { + MockPublisher { + topics: Arc::new(Mutex::new(BTreeMap::new())), + } + } + + /// Create a new consumer which will listen for messages published to the given topic and + /// subscription by this publisher (or any of its clones) + pub fn new_consumer( + &self, + topic: impl Into, + subscription: impl Into, + ) -> MockConsumer { + let mut topics = self.topics.lock(); + let subscriptions = topics.entry(topic.into()).or_insert_with(BTreeMap::new); + + let channel = subscriptions + .entry(subscription.into()) + .or_insert_with(|| { + let (sender, receiver) = mpmc::unbounded(); + Channel { sender, receiver } + }) + .clone(); + + MockConsumer { + subscription_messages: channel.receiver, + subscription_resend: channel.sender, + } + } +} + +impl Default for MockPublisher { + fn default() -> Self { + Self::new() + } +} + +impl crate::Publisher for MockPublisher +where + M: crate::EncodableMessage, + M::Error: StdError + 'static, + S: sink::Sink, + S::Error: StdError + 'static, +{ + type PublishError = Error; + type PublishSink = MockSink; + + fn publish_sink_with_responses( + self, + validator: M::Validator, + response_sink: S, + ) -> Self::PublishSink { + MockSink { + topics: self.topics, + validator, + response_sink, + } + } +} + +/// The sink used by the `MockPublisher` +#[pin_project] +#[derive(Debug)] +pub struct MockSink { + topics: Arc>, + validator: M::Validator, + #[pin] + response_sink: S, +} + +#[derive(Debug, Clone)] +struct Channel { + sender: mpmc::Sender, + receiver: mpmc::Receiver, +} + +impl sink::Sink for MockSink +where + M: EncodableMessage, + M::Error: StdError + 'static, + S: sink::Sink, + S::Error: StdError + 'static, +{ + type Error = Error; + + fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.project() + .response_sink + .poll_ready(cx) + .map_err(Error::from) + } + + fn start_send(self: Pin<&mut Self>, message: M) -> Result<(), Self::Error> { + let this = self.project(); + + let topic = message.topic(); + let validated_message = message.encode(this.validator).map_err(Error::from)?; + + // lock critical section + { + let mut topics = this.topics.lock(); + + // send the message to every subscription listening on the given topic + + // find the subscriptions for this topic + let subscriptions = topics.entry(topic).or_insert_with(Subscriptions::new); + + // Send to every subscription that still has consumers. If a subscription's consumers are + // all dropped, the channel will have been closed and should be removed from the list + subscriptions.retain(|_subscription_name, channel| { + match channel.sender.try_send(validated_message.clone()) { + // if successfully sent, retain the channel + Ok(()) => true, + // if the channel has disconnected due to drops, remove it from the list + Err(mpmc::TrySendError::Closed(_)) => false, + Err(mpmc::TrySendError::Full(_)) => { + unreachable!("unbounded channel should never be full") + } + } + }); + } + + // notify the caller that the message has been sent successfully + this.response_sink + .start_send(message) + .map_err(Error::from)?; + + Ok(()) + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.project() + .response_sink + .poll_flush(cx) + .map_err(Error::from) + } + + fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.project() + .response_sink + .poll_close(cx) + .map_err(Error::from) + } +} + +/// An opaque identifier for individual subscriptions to a [`MockPublisher`] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MockSubscription(String); + +impl From for MockSubscription +where + S: Into, +{ + fn from(string: S) -> Self { + MockSubscription(string.into()) + } +} + +/// A consumer for messages from a particular subscription to a [`MockPublisher`] +#[derive(Debug, Clone)] +pub struct MockConsumer { + // channel receiver to get messages from the subscription + subscription_messages: mpmc::Receiver, + + // channel sender to resend messages to the subscription on nack + subscription_resend: mpmc::Sender, +} + +impl crate::Consumer for MockConsumer { + type AckToken = MockAckToken; + type Error = Error; + type Stream = Self; + + fn stream(self) -> Self::Stream { + self + } +} + +impl stream::Stream for MockConsumer { + type Item = Result, Error>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.subscription_messages + .poll_next_unpin(cx) + .map(|opt_message| { + opt_message.map(|message| { + Ok(AcknowledgeableMessage { + ack_token: MockAckToken { + message: message.clone(), + subscription_resend: self.subscription_resend.clone(), + }, + message, + }) + }) + }) + } +} + +/// An acknowledge token associated with a particular message from a [`MockConsumer`]. +/// +/// When `nack` is called for a particular message's token, that message will be re-submitted to +/// consumers of the corresponding subscription. Messages otherwise do not have any timeout +/// behavior, so a message is only re-sent to consumers if it is explicitly nack'ed; `ack` and +/// `modify_deadline` have no effect +#[derive(Debug)] +pub struct MockAckToken { + message: ValidatedMessage, + subscription_resend: mpmc::Sender, +} + +#[async_trait::async_trait] +impl crate::consumer::AcknowledgeToken for MockAckToken { + type AckError = Error; + type NackError = Error; + type ModifyError = Error; + + async fn ack(self) -> Result<(), Self::AckError> { + Ok(()) + } + + async fn nack(self) -> Result<(), Self::NackError> { + self.subscription_resend + .send(self.message) + .await + .map_err(|mpmc::SendError(_message)| Error { + cause: "Could not nack message because all consumers have been dropped".into(), + }) + } + + async fn modify_deadline(&mut self, _seconds: u32) -> Result<(), Self::ModifyError> { + // currently does nothing + Ok(()) + } +} diff --git a/src/backends/mod.rs b/src/backends/mod.rs new file mode 100644 index 0000000..fe2e079 --- /dev/null +++ b/src/backends/mod.rs @@ -0,0 +1,5 @@ +#[cfg(feature = "google")] +pub mod googlepubsub; + +#[cfg(any(test, feature = "mock"))] +pub mod mock; diff --git a/src/consume/mod.rs b/src/consumer.rs similarity index 94% rename from src/consume/mod.rs rename to src/consumer.rs index f578bdc..160e6a9 100644 --- a/src/consume/mod.rs +++ b/src/consumer.rs @@ -17,12 +17,10 @@ use std::{ /// /// ## Message Decoding /// -/// Messages pulled from the service are assumed to have been created by some -#[cfg_attr(feature = "publish", doc = "[hedwig publisher](crate::publish::Publisher)")] -#[cfg_attr(not(feature = "publish"), doc = "hedwig publisher")] -/// and therefore were validated against the included schema when publishing. It is the decoder's -/// responsibility (when provided to functions like [`consume`](Consumer::consume)) to check this -/// schema and the accompanying payload for validity. +/// Messages pulled from the service are assumed to have been created by some [hedwig +/// publisher](crate::Publisher) and therefore were validated against the included schema +/// when publishing. It is the decoder's responsibility (when provided to functions like +/// [`consume`](Consumer::consume)) to check this schema and the accompanying payload for validity. /// /// ## Acknowledging Messages /// Typically message services deliver messages with a particular delivery time window, during @@ -67,8 +65,9 @@ pub trait Consumer { /// Create a stream of decoded messages from this consumer, using a decoder for the given /// [decodable](DecodableMessage) message type. fn consume(self, decoder: M::Decoder) -> MessageStream - where Self: Sized, - M: DecodableMessage, + where + Self: Sized, + M: DecodableMessage, { MessageStream { stream: self.stream(), diff --git a/src/lib.rs b/src/lib.rs index 5f79534..33b5052 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,88 +9,67 @@ //! Publish a message. Payload encoded with JSON and validated using a JSON Schema. //! //! ``` -//! use uuid::Uuid; -//! use std::{path::Path, time::SystemTime}; -//! use futures_util::stream::StreamExt; -//! -//! # #[cfg(not(feature = "json-schema"))] +//! use hedwig::{validators, Publisher, Consumer}; +//! # use uuid::Uuid; +//! # use std::{path::Path, time::SystemTime}; +//! # use futures_util::{sink::SinkExt, stream::StreamExt}; +//! # #[cfg(not(all(feature = "protobuf", feature = "mock")))] //! # fn main() {} +//! # #[cfg(all(feature = "protobuf", feature = "mock"))] // example uses a protobuf validator. +//! # #[tokio::main(flavor = "current_thread")] +//! # async fn main() -> Result<(), Box> { //! -//! # #[cfg(feature = "json-schema")] // example uses a JSON Schema validator. -//! # fn main() -> Result<(), Box> { -//! let schema = r#"{ -//! "$id": "https://hedwig.corp/schema", -//! "$schema": "https://json-schema.org/draft-04/schema#", -//! "description": "Example Schema", -//! "schemas": { -//! "user-created": { -//! "1.*": { -//! "description": "A new user was created", -//! "type": "object", -//! "x-versions": [ -//! "1.0" -//! ], -//! "required": [ -//! "user_id" -//! ], -//! "properties": { -//! "user_id": { -//! "$ref": "https://hedwig.corp/schema#/definitions/UserId/1.0" -//! } -//! } -//! } -//! } -//! }, -//! "definitions": { -//! "UserId": { -//! "1.0": { -//! "type": "string" -//! } -//! } -//! } -//! }"#; -//! -//! #[derive(serde::Serialize)] +//! #[derive(Clone, PartialEq, Eq, prost::Message)] //! struct UserCreatedMessage { +//! #[prost(string, tag = "1")] //! user_id: String, //! } //! -//! impl<'a> hedwig::publish::EncodableMessage for &'a UserCreatedMessage { -//! type Error = hedwig::validators::JsonSchemaValidatorError; -//! type Validator = hedwig::validators::JsonSchemaValidator; -//! fn topic(&self) -> hedwig::Topic { "user.created".into() } -//! fn encode(self, validator: &Self::Validator) -//! -> Result { -//! validator.validate( -//! Uuid::new_v4(), +//! impl<'a> hedwig::EncodableMessage for UserCreatedMessage { +//! type Error = validators::ProstValidatorError; +//! type Validator = validators::ProstValidator; +//! fn topic(&self) -> hedwig::Topic { +//! "user.created".into() +//! } +//! fn encode(&self, validator: &Self::Validator) -> Result { +//! Ok(validator.validate( +//! uuid::Uuid::new_v4(), //! SystemTime::now(), -//! "https://hedwig.corp/schema#/schemas/user.created/1.0", -//! hedwig::Headers::new(), +//! "user.created/1.0", +//! Default::default(), //! self, -//! ) +//! )?) //! } //! } //! -//! let publisher = /* Some publisher */ -//! # hedwig::publish::NullPublisher; -//! let validator = hedwig::validators::JsonSchemaValidator::new(schema)?; -//! let mut batch = hedwig::publish::PublishBatch::new(); -//! batch.message(&validator, &UserCreatedMessage { user_id: String::from("U_123") }); -//! let mut result_stream = batch.publish(&publisher); -//! let mut next_batch = hedwig::publish::PublishBatch::new(); -//! async { -//! while let Some(result) = result_stream.next().await { -//! match result { -//! (Ok(id), _, msg) => { -//! println!("message {} published successfully: {:?}", msg.uuid(), id); -//! } -//! (Err(e), topic, msg) => { -//! eprintln!("failed to publish {}: {}", msg.uuid(), e); -//! next_batch.push(topic, msg); -//! } -//! } +//! impl hedwig::DecodableMessage for UserCreatedMessage { +//! type Error = validators::ProstDecodeError; +//! type Decoder = +//! validators::ProstDecoder>; +//! +//! fn decode(msg: hedwig::ValidatedMessage, decoder: &Self::Decoder) -> Result { +//! decoder.decode(msg) //! } -//! }; +//! } +//! +//! +//! let publisher = /* Some publisher */ +//! # hedwig::mock::MockPublisher::new(); +//! let consumer = /* Consumer associated to that publisher */ +//! # publisher.new_consumer("user.created", "example_subscription"); +//! +//! let mut publish_sink = Publisher::::publish_sink(publisher, validators::ProstValidator::new()); +//! let mut consumer_stream = consumer.consume::( +//! validators::ProstDecoder::new(validators::prost::ExactSchemaMatcher::new("user.created/1.0")), +//! ); +//! +//! publish_sink.send(UserCreatedMessage { user_id: String::from("U_123") }).await?; +//! +//! assert_eq!( +//! "U_123", +//! consumer_stream.next().await.unwrap()?.ack().await?.user_id +//! ); +//! //! # Ok(()) //! # } //! ``` @@ -102,19 +81,24 @@ pub use topic::Topic; use bytes::Bytes; use uuid::Uuid; -#[cfg(feature = "publish")] -#[cfg_attr(docsrs, doc(cfg(feature = "publish")))] -pub mod publish; - -#[cfg(feature = "consume")] -#[cfg_attr(docsrs, doc(cfg(feature = "consume")))] -pub mod consume; - -#[cfg(test)] +mod backends; +mod consumer; +mod publisher; mod tests; mod topic; pub mod validators; +pub use backends::*; +pub use consumer::*; +pub use publisher::*; + +// TODO make these public somewhere? +pub(crate) const HEDWIG_ID: &str = "hedwig_id"; +pub(crate) const HEDWIG_MESSAGE_TIMESTAMP: &str = "hedwig_message_timestamp"; +pub(crate) const HEDWIG_SCHEMA: &str = "hedwig_schema"; +pub(crate) const HEDWIG_PUBLISHER: &str = "hedwig_publisher"; +pub(crate) const HEDWIG_FORMAT_VERSION: &str = "hedwig_format_version"; + /// All errors that may be returned when operating top level APIs. #[derive(Debug, thiserror::Error)] #[non_exhaustive] @@ -191,8 +175,18 @@ impl ValidatedMessage { &self.headers } + /// Mutable access to the message headers + pub fn headers_mut(&mut self) -> &mut Headers { + &mut self.headers + } + /// The encoded message data. pub fn data(&self) -> &[u8] { &self.data } + + /// Destructure this message into just the contained data + pub fn into_data(self) -> Bytes { + self.data + } } diff --git a/src/publish/mod.rs b/src/publish/mod.rs deleted file mode 100644 index fcda033..0000000 --- a/src/publish/mod.rs +++ /dev/null @@ -1,191 +0,0 @@ -//! Types, traits, and functions necessary to publish messages using hedwig - -use crate::{Error, Topic, ValidatedMessage}; -use futures_util::{ - ready, - stream::{self, Stream}, -}; -use pin_project::pin_project; -use std::{ - collections::BTreeMap, - pin::Pin, - task::{Context, Poll}, -}; - -mod publishers; -pub use publishers::*; - -#[cfg(feature = "sink")] -#[cfg_attr(docsrs, doc(cfg(feature = "sink")))] -pub mod sink; - -/// Message publishers. -/// -/// Message publishers deliver a validated message to an endpoint, possibly a remote one. Message -/// publishers may also additionally validate a message for publisher-specific requirements (e.g. -/// size). -pub trait Publisher { - /// The identifier for a successfully published message. - type MessageId: 'static; - - /// The error that this publisher returns when publishing of a message fails. - type MessageError: std::error::Error + Send + Sync + 'static; - - /// The stream of results that the `publish` method returns. - type PublishStream: Stream>; - - /// Publish a batch of messages. - /// - /// The output stream shall return a result for each message in `messages` slice in order. - fn publish<'a, I>(&self, topic: Topic, messages: I) -> Self::PublishStream - where - I: Iterator + DoubleEndedIterator + ExactSizeIterator; -} - -/// Types that can be encoded and published. -pub trait EncodableMessage { - /// The errors that can occur when calling the [`EncodableMessage::encode`] method. - /// - /// Will typically match the errors returned by the [`EncodableMessage::Validator`]. - type Error: std::error::Error + Send + Sync + 'static; - - /// The validator to use for this message. - type Validator; - - /// Topic into which this message shall be published. - fn topic(&self) -> Topic; - - /// Encode the message payload. - fn encode(self, validator: &Self::Validator) -> Result; -} - -/// A convenience builder for publishing in batches. -#[derive(Default, Debug)] -pub struct PublishBatch { - messages: BTreeMap>, -} - -impl PublishBatch { - /// Construct a new batch. - pub fn new() -> Self { - Self::default() - } - - /// Number of messages currently queued. - pub fn len(&self) -> usize { - self.messages.iter().fold(0, |acc, (_, v)| acc + v.len()) - } - - /// Whether the batch is empty. - pub fn is_empty(&self) -> bool { - self.messages.iter().all(|(_, v)| v.is_empty()) - } - - /// Add an already validated message to be published in this batch. - pub fn push(&mut self, topic: Topic, validated: ValidatedMessage) -> &mut Self { - self.messages.entry(topic).or_default().push(validated); - self - } - - /// Validate and add a message to be published in this batch. - pub fn message( - &mut self, - validator: &M::Validator, - msg: M, - ) -> Result<&mut Self, Error> { - let topic = msg.topic(); - let validated = msg - .encode(validator) - .map_err(|e| Error::EncodeMessage(e.into()))?; - Ok(self.push(topic, validated)) - } - - /// Publish all the enqueued messages, batching them for high efficiency. - /// - /// The order in which messages were added to the batch and the order of messages as seen by - /// the publisher is not strictly preserved. As thus, the output stream will not preserve the - /// message ordering either. - /// - /// Some kinds of errors that occur during publishing may not be transient. An example of such - /// an error is attempting to publish a too large message with the `GooglePubSubPublisher`. - /// For - /// errors like these retrying is most likely incorrect as they would just fail again. - /// Publisher-specific error types may have methods to make a decision easier. - pub fn publish

(self, publisher: &P) -> PublishBatchStream - where - P: Publisher, - P::PublishStream: Unpin, - { - PublishBatchStream( - self.messages - .into_iter() - .map(|(topic, msgs)| TopicPublishStream::new(topic, msgs, publisher)) - .collect::>(), - ) - } -} - -/// The stream returned by the method [`PublishBatch::publish`](PublishBatch::publish) -// This stream and TopicPublishStream are made explicit types instead of combinators like -// map/zip/etc so that callers can refer to a concrete return type instead of `impl Stream` -#[pin_project] -#[derive(Debug)] -pub struct PublishBatchStream

(#[pin] stream::SelectAll>); - -impl

Stream for PublishBatchStream

-where - P: Stream + Unpin, -{ - type Item = (P::Item, Topic, ValidatedMessage); - - fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - self.project().0.poll_next(cx) - } -} - -#[pin_project] -#[derive(Debug)] -struct TopicPublishStream

{ - topic: Topic, - messages: std::vec::IntoIter, - - #[pin] - publish_stream: P, -} - -impl

TopicPublishStream

{ - fn new(topic: Topic, messages: Vec, publisher: &Pub) -> Self - where - Pub: Publisher, - P: Stream>, - { - let publish_stream = publisher.publish(topic, messages.iter()); - Self { - topic, - messages: messages.into_iter(), - publish_stream, - } - } -} - -impl

Stream for TopicPublishStream

-where - P: Stream, -{ - type Item = (P::Item, Topic, ValidatedMessage); - - fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - let this = self.project(); - - Poll::Ready(match ready!(this.publish_stream.poll_next(cx)) { - None => None, - Some(stream_item) => Some(( - stream_item, - *this.topic, - this.messages - .next() - .expect("should be as many messages as publishes"), - )), - }) - } -} diff --git a/src/publish/publishers/googlepubsub.rs b/src/publish/publishers/googlepubsub.rs deleted file mode 100644 index 930a71d..0000000 --- a/src/publish/publishers/googlepubsub.rs +++ /dev/null @@ -1,886 +0,0 @@ -use futures_util::stream::{Stream, StreamExt, TryStreamExt}; -use std::{borrow::Cow, pin::Pin, sync::Arc, task, time::SystemTime}; -use yup_oauth2::authenticator::Authenticator; - -use crate::{Topic, ValidatedMessage}; - -const AUTH_SCOPES: [&str; 1] = ["https://www.googleapis.com/auth/pubsub"]; -const JSON_METATYPE: &str = "application/json"; -const API_BODY_PREFIX: &[u8] = br##"{"messages":["##; -const API_BODY_SUFFIX: &[u8] = br##"]}"##; - -// https://cloud.google.com/pubsub/quotas#resource_limits -const API_DATA_LENGTH_LIMIT: usize = 10 * 1024 * 1024; // 10MiB -const API_MSG_COUNT_LIMIT: usize = 1000; -// Not actually important, because submitting through API already has the same limit (and there's -// some overhead in the API call), so even if we do check this, we would still be prone to hitting -// the `API_DATA_LENGTH_LIMIT` limit. So just check the `API_DATA_LENGTH_LIMIT`. -// -// const API_MSG_LENGTH_LIMIT: usize = 10 * 1024 * 1024; // 10MiB -const API_MSG_ATTRIBUTE_COUNT_LIMIT: usize = 100; -const API_MSG_ATTRIBUTE_KEY_SIZE_LIMIT: usize = 256; // 256 bytes -const API_MSG_ATTRIBUTE_VAL_SIZE_LIMIT: usize = 1024; // 1024 bytes - -/// Error messages that occur when publishing to Google PubSub. -#[derive(Debug, Clone, thiserror::Error)] -#[non_exhaustive] -#[cfg_attr(docsrs, doc(cfg(feature = "google")))] -pub enum GooglePubSubError { - /// Could not get authentication token. - #[error("could not get authentication token")] - GetAuthToken(#[source] Arc), - /// Could not POST the request with messages. - #[error("could not POST the request with messages")] - PostMessages(#[source] Arc), - /// Could not construct the request URI. - #[error("could not construct the request URI")] - ConstructRequestUri(#[source] Arc), - /// Could not construct the request. - #[error("could not construct the request")] - ConstructRequest(#[source] Arc), - /// Publish request failed with a bad HTTP status code. - #[error("publish request failed with status code {1}")] - ResponseStatus(#[source] Option>, http::StatusCode), - /// Could not parse the response body. - #[error("could not parse the response body")] - ResponseParse(#[source] Arc), - /// Could not receive the response body. - #[error("could not receive the response body")] - ResponseBodyReceive(#[source] Arc), - - // Critical message serialization errors - /// Message contains too many headers. - #[error("message contains too many headers")] - MessageTooManyHeaders, - /// Message contains a header key that's too large. - #[error("message contains a header key that's too large")] - MessageHeaderKeysTooLarge, - /// Message contains a header with {0} key which is reserved. - #[error("message contains a header with {0} key which is reserved")] - MessageHeaderKeysReserved(Arc), - /// Message contains a header value that's too large. - #[error("message contains a header value that's too large")] - MessageHeaderValuesTooLarge, - /// Encoded message data is too long. - #[error("encoded message data is too long")] - MessageDataTooLong, - /// Message timestamp is too far in the past. - #[error("message timestamp is too far in the past")] - MessageTimestampTooOld(#[source] std::time::SystemTimeError), - /// Could not serialize message data. - #[error("could not serialize message data")] - SerializeMessageData(#[source] Arc), -} - -/// An error message returned by the API -#[derive(Debug, thiserror::Error)] -#[error("{message}")] -#[cfg_attr(docsrs, doc(cfg(feature = "google")))] -pub struct PubSubPublishError { - message: String, -} - -impl GooglePubSubError { - /// Can this error be considered recoverable. - /// - /// Some examples of transient errors include errors such as failure to make a network request - /// or authenticate with the API endpoint. - /// - /// In these instances there is a good chance that retrying publishing of a message may - /// succeed. - /// - /// # Examples - /// - /// This function is useful when deciding whether to re-queue message for publishing. - /// - /// ```no_run - /// # use hedwig::publish::GooglePubSubPublisher; - /// use futures_util::stream::StreamExt; - /// - /// let publisher: GooglePubSubPublisher = unimplemented!(); - /// let mut batch = hedwig::publish::PublishBatch::new(); - /// // add messages - /// let mut stream = batch.publish(&publisher); - /// let mut next_batch = hedwig::publish::PublishBatch::new(); - /// async { - /// while let Some(result) = stream.next().await { - /// match result { - /// (Ok(id), _, msg) => { - /// println!("message {} published successfully: {:?}", msg.uuid(), id); - /// } - /// (Err(e), topic, msg) if e.is_transient() => { - /// // Retry - /// next_batch.push(topic, msg); - /// } - /// (Err(e), _, msg) => { - /// eprintln!("failed to publish {}: {}", msg.uuid(), e); - /// } - /// } - /// } - /// }; - /// ``` - pub fn is_transient(&self) -> bool { - use http::StatusCode; - use GooglePubSubError::*; - const GOOGLE_STATUS_CODE_CANCELLED: u16 = 499; - - match self { - // These will typically encode I/O errors, although they might encode non-I/O stuff - // too. - PostMessages(..) => true, - ResponseBodyReceive(..) => true, - GetAuthToken(err) if matches!(**err, yup_oauth2::Error::HttpError(_)) => true, - GetAuthToken(_) => false, - - // Some HTTP response codes are plausibly retry-able. - // - // References: - // - // https://github.com/googleapis/google-cloud-go/blob/9e64b018255bd8d9b31d60e8f396966251de946b/pubsub/apiv1/publisher_client.go#L86 - // https://cloud.google.com/apis/design/errors#handling_errors - // https://cloud.google.com/pubsub/docs/reference/error-codes - ResponseStatus(_, StatusCode::BAD_GATEWAY) => true, - ResponseStatus(_, StatusCode::SERVICE_UNAVAILABLE) => true, - ResponseStatus(_, StatusCode::GATEWAY_TIMEOUT) => true, - ResponseStatus(_, StatusCode::TOO_MANY_REQUESTS) => true, - ResponseStatus(_, StatusCode::CONFLICT) => true, - ResponseStatus(_, code) => code.as_u16() == GOOGLE_STATUS_CODE_CANCELLED, - - // Unlikely to ever succeed. - ConstructRequestUri(..) => false, - ResponseParse(..) => false, - ConstructRequest(..) => false, - MessageTooManyHeaders => false, - MessageHeaderKeysTooLarge => false, - MessageHeaderKeysReserved(..) => false, - MessageHeaderValuesTooLarge => false, - MessageDataTooLong => false, - MessageTimestampTooOld(..) => false, - SerializeMessageData(..) => false, - } - } -} - -/// A publisher that uses the Google Cloud Pub/Sub service as a message transport -/// -/// # Examples -/// -/// ```no_run -/// async { -/// let google_project = -/// std::env::var("GOOGLE_CLOUD_PROJECT").unwrap(); -/// let google_credentials = std::env::var("GOOGLE_APPLICATION_CREDENTIALS").unwrap(); -/// let secret = yup_oauth2::read_service_account_key(google_credentials) -/// .await -/// .expect("$GOOGLE_APPLICATION_CREDENTIALS is not a valid service account key"); -/// let client = hyper::Client::builder().build(hyper_tls::HttpsConnector::new()); -/// let authenticator = yup_oauth2::ServiceAccountAuthenticator::builder(secret) -/// .hyper_client(client.clone()) -/// .build() -/// .await -/// .expect("could not create an authenticator"); -/// let publisher = hedwig::publish::GooglePubSubPublisher::new( -/// "rust_publisher".into(), -/// google_project.into(), -/// client, -/// authenticator -/// ); -/// Ok::<_, Box>(publisher) -/// }; -/// ``` -#[allow(missing_debug_implementations)] -#[derive(Clone)] -#[cfg_attr(docsrs, doc(cfg(feature = "google")))] -pub struct GooglePubSubPublisher { - identifier: Cow<'static, str>, - google_cloud_project: Cow<'static, str>, - client: hyper::Client, - authenticator: Option>, - endpoint: http::Uri, -} - -impl GooglePubSubPublisher { - /// Create a new Google Cloud Pub/Sub publisher - pub fn new( - identifier: Cow<'static, str>, - google_cloud_project: Cow<'static, str>, - client: hyper::Client, - authenticator: Authenticator, - ) -> GooglePubSubPublisher { - GooglePubSubPublisher { - identifier, - google_cloud_project, - client, - authenticator: Some(authenticator), - endpoint: http::Uri::from_static("https://pubsub.googleapis.com/v1"), - } - } - - /// Create a new Google Cloud Pub/Sub publisher pointed at a specific endpoint. - /// - /// This could be useful for running against an emulator, or for targeting a particular - /// region's endpoint. - /// - /// The authenticator is also made optional for this constructor, for use in auth-less contexts - /// such as emulators. - pub fn with_endpoint( - identifier: Cow<'static, str>, - google_cloud_project: Cow<'static, str>, - client: hyper::Client, - authenticator: Option>, - endpoint: http::Uri, - ) -> GooglePubSubPublisher { - GooglePubSubPublisher { - identifier, - google_cloud_project, - client, - authenticator, - endpoint, - } - } -} - -async fn publish_single_body( - batch: Result, - uri: http::uri::Uri, - authenticator: Option>, - client: hyper::Client, -) -> Vec> -where - C: hyper::client::connect::Connect + Clone + Send + Sync + 'static, -{ - let batch = match batch { - Ok(b) => b, - Err(e) => return vec![Err(e)], - }; - let msg_count = batch.messages_in_body; - let result = async move { - let request = http::Request::post(uri) - .header(http::header::ACCEPT, JSON_METATYPE) - .header(http::header::CONTENT_TYPE, JSON_METATYPE); - - let request = if let Some(authenticator) = authenticator { - let token = authenticator - .token(&AUTH_SCOPES) - .await - .map_err(|e| GooglePubSubError::GetAuthToken(Arc::new(e)))?; - - request.header( - http::header::AUTHORIZATION, - format!("Bearer {}", token.as_str()), - ) - } else { - request - }; - - let request = request - .body(batch.request_body) - .map_err(|e| GooglePubSubError::ConstructRequest(Arc::new(e)))?; - let response = client - .request(request) - .await - .map_err(|e| GooglePubSubError::PostMessages(Arc::new(e)))?; - let (parts, body) = response.into_parts(); - let response_body_data = body - .try_fold(vec![], |mut acc, ok| async move { - acc.extend(ok); - Ok(acc) - }) - .await - .map_err(|e| GooglePubSubError::ResponseBodyReceive(Arc::new(e)))?; - if !parts.status.is_success() { - let src = serde_json::from_slice(&response_body_data) - .ok() - .map(|v: PubSubPublishFailResponseSchema| Arc::new(v.error.into())); - return Err(GooglePubSubError::ResponseStatus(src, parts.status)); - } - let response_json: PubSubPublishResponseSchema = - serde_json::from_slice(&response_body_data) - .map_err(|e| GooglePubSubError::ResponseParse(Arc::new(e)))?; - Ok(response_json.message_ids.into_iter().map(Ok).collect()) - } - .await; - match result { - Ok(msgs) => msgs, - Err(err) => std::iter::repeat(err).map(Err).take(msg_count).collect(), - } -} - -impl crate::publish::Publisher for GooglePubSubPublisher -where - C: hyper::client::connect::Connect + Clone + Send + Sync + 'static, -{ - type MessageId = String; - type MessageError = GooglePubSubError; - type PublishStream = GooglePubSubPublishStream; - - fn publish<'a, I>(&self, topic: Topic, messages: I) -> Self::PublishStream - where - I: Iterator, - { - let Self { - identifier, - authenticator, - client, - google_cloud_project, - endpoint, - } = self; - let uri = http::Uri::from_maybe_shared(format!( - "{endpoint}/projects/{project}/topics/hedwig-{topic}:publish", - endpoint = endpoint, - project = google_cloud_project, - topic = topic - )) - .map_err(Arc::new); - let uri = match uri { - Ok(uri) => uri, - Err(e) => { - let c = messages.count(); - return GooglePubSubPublishStream(Box::pin(futures_util::stream::iter( - std::iter::repeat(Err(GooglePubSubError::ConstructRequestUri(e))).take(c), - ))); - } - }; - let stream = GoogleMessageSegmenter::new(&*identifier, messages) - .map(|batch| { - publish_single_body(batch, uri.clone(), authenticator.clone(), client.clone()) - }) - .collect::>() - .flat_map(futures_util::stream::iter); - GooglePubSubPublishStream(Box::pin(stream)) - } -} - -/// The `GooglePubSubPublisher::publish` stream -#[cfg_attr(docsrs, doc(cfg(feature = "google")))] -pub struct GooglePubSubPublishStream( - Pin> + Send + 'static>>, -); - -impl Stream for GooglePubSubPublishStream { - type Item = Result; - fn poll_next( - mut self: Pin<&mut Self>, - cx: &mut task::Context<'_>, - ) -> task::Poll> { - Pin::new(&mut self.0).poll_next(cx) - } -} - -/// Encode and segment the messages into API request bodies Google is willing to stomach. -/// -/// Here, the logic is somewhat convoluted. We, in a very hacky way, implement a portion -/// of json serialization in order to validate requirements PubSub places on API calls. In -/// particular Google PubSub has the following limits that matter. Some of those limits are -/// something we can possibly attempt to handle here: -/// -/// Single publish API call: < API_DATA_LENGTH_LIMIT -/// Single publish API call: < API_MSG_COUNT_LIMIT -/// -/// Some others are irrecoverable and are indicative of malformed messages: -/// -/// Message size (the data field) < API_MSG_LENGTH_LIMIT -/// Attributes per message < API_MSG_ATTRIBUTE_COUNT_LIMIT -/// Attribute key size < API_MSG_ATTRIBUTE_KEY_SIZE_LIMIT -/// Attribute value size < API_MSG_ATTRIBUTE_VAL_SIZE_LIMIT -/// -/// We will validate for both of these in this code section below. -struct GoogleMessageSegmenter<'a, I> { - identifier: &'a str, - messages: I, - body_data: Vec, - messages_in_body: usize, - enqueued_error: Option, -} - -struct SegmentationResult { - request_body: hyper::Body, - messages_in_body: usize, -} - -impl<'a, I> GoogleMessageSegmenter<'a, I> { - fn new(identifier: &'a str, messages: I) -> Self { - Self { - identifier, - messages, - body_data: Vec::from(API_BODY_PREFIX), - messages_in_body: 0, - enqueued_error: None, - } - } - - fn take_batch(&mut self) -> Option { - debug_assert!(self.messages_in_body <= API_MSG_COUNT_LIMIT); - debug_assert!(self.body_data.len() <= API_DATA_LENGTH_LIMIT); - if self.messages_in_body == 0 { - return None; - } - let mut body_data = std::mem::replace(&mut self.body_data, Vec::from(API_BODY_PREFIX)); - let messages_in_body = std::mem::replace(&mut self.messages_in_body, 0); - body_data.extend(API_BODY_SUFFIX); - Some(SegmentationResult { - request_body: hyper::Body::from(body_data), - messages_in_body, - }) - } - - fn maybe_enqueue_error( - &mut self, - error: GooglePubSubError, - ) -> Result { - match self.take_batch() { - Some(batch) => { - self.enqueued_error = Some(error); - Ok(batch) - } - None => Err(error), - } - } - - fn encode_message(&self, message: &ValidatedMessage) -> Result, GooglePubSubError> { - let header_count = message.headers.len() + PubSubAttributesSchema::BUILTIN_ATTRIBUTES; - if header_count > API_MSG_ATTRIBUTE_COUNT_LIMIT { - return Err(GooglePubSubError::MessageTooManyHeaders); - } - for (key, value) in message.headers.iter() { - if key.len() >= API_MSG_ATTRIBUTE_KEY_SIZE_LIMIT { - return Err(GooglePubSubError::MessageHeaderKeysTooLarge); - } - if key.starts_with("hedwig_") { - return Err(GooglePubSubError::MessageHeaderKeysReserved(key[..].into())); - } - if value.len() >= API_MSG_ATTRIBUTE_VAL_SIZE_LIMIT { - return Err(GooglePubSubError::MessageHeaderValuesTooLarge); - } - } - let schema_value_too_large = message.schema.len() >= API_MSG_ATTRIBUTE_VAL_SIZE_LIMIT; - let identifier_too_large = self.identifier.len() >= API_MSG_ATTRIBUTE_VAL_SIZE_LIMIT; - if schema_value_too_large || identifier_too_large { - return Err(GooglePubSubError::MessageHeaderValuesTooLarge); - } - let payload = base64::encode(&message.data); - let timestamp = message - .timestamp - .duration_since(SystemTime::UNIX_EPOCH) - .map_err(GooglePubSubError::MessageTimestampTooOld)? - .as_millis() - .to_string(); - let encoded_api_body = serde_json::to_vec(&PubSubMessageSchema { - data: &payload, - attributes: PubSubAttributesSchema { - hedwig_id: &message.id, - hedwig_publisher: &*self.identifier, - hedwig_message_timestamp: ×tamp, - hedwig_format_version: "1.0", - hedwig_schema: &message.schema, - headers: &message.headers, - }, - }) - .map_err(|e| GooglePubSubError::SerializeMessageData(Arc::new(e)))?; - if API_BODY_PREFIX.len() + API_BODY_SUFFIX.len() + encoded_api_body.len() - >= API_DATA_LENGTH_LIMIT - { - // This message wouldn’t fit even if we created a new batch, it is too large! - return Err(GooglePubSubError::MessageDataTooLong); - } - Ok(encoded_api_body) - } - - fn append_message_data(&mut self, msg_json: &[u8]) { - debug_assert!(self.messages_in_body == 0 || self.body_data.last() == Some(&b',')); - self.body_data.extend(msg_json); - self.messages_in_body += 1; - } -} - -impl<'a, 'v, I: Iterator> Iterator for GoogleMessageSegmenter<'a, I> { - type Item = Result; - fn next(&mut self) -> Option { - // This iterator needs to preserve the ordering of messages, so when we encounter an error - // we must first release the currently built batch before reporting an error. Then, on the - // next iteration (i.e. this one) we return the error. - if let Some(err) = self.enqueued_error.take() { - return Some(Err(err)); - } - loop { - let message = match self.messages.next() { - None => return self.take_batch().map(Ok), - Some(msg) => msg, - }; - // Validate if a message is structurally valid. - let msg_json = match self.encode_message(message) { - Ok(msg_json) => msg_json, - Err(e) => return Some(self.maybe_enqueue_error(e)), - }; - // Append to current batch or create a new one (and return finished batch). - let need_comma = self.messages_in_body != 0; - let data_len = self.body_data.len() + need_comma as usize + msg_json.len(); - let data_fits_in_current = data_len + API_BODY_SUFFIX.len() < API_DATA_LENGTH_LIMIT; - let message_fits_in_current = self.messages_in_body < API_MSG_COUNT_LIMIT; - if !data_fits_in_current || !message_fits_in_current { - // We need a new batch. - self.body_data.extend(API_BODY_SUFFIX); - let batch = self.take_batch(); - self.append_message_data(&msg_json); - debug_assert!(batch.is_some()); - return batch.map(Ok); - } else { - // We can append the current batch. - if need_comma { - self.body_data.push(b','); - } - self.append_message_data(&msg_json); - } - } - } -} - -/// Schema for the Google PubSubMessage REST API type -#[derive(serde::Serialize)] -struct PubSubMessageSchema<'a> { - data: &'a str, - attributes: PubSubAttributesSchema<'a>, -} - -#[derive(serde::Serialize)] -struct PubSubAttributesSchema<'a> { - hedwig_id: &'a uuid::Uuid, - hedwig_format_version: &'a str, - hedwig_message_timestamp: &'a str, - hedwig_publisher: &'a str, - hedwig_schema: &'a str, - #[serde(flatten)] - headers: &'a crate::Headers, -} - -impl<'a> PubSubAttributesSchema<'a> { - const BUILTIN_ATTRIBUTES: usize = 5; -} - -/// Schema for the Google PubSubResponse REST API type -#[derive(serde::Deserialize)] -struct PubSubPublishResponseSchema { - #[serde(rename = "messageIds")] - message_ids: Vec, -} - -#[derive(serde::Deserialize)] -struct PubSubPublishFailResponseSchema { - error: PubSubPublishErrorSchema, -} - -#[derive(serde::Deserialize)] -struct PubSubPublishErrorSchema { - message: String, -} - -impl From for PubSubPublishError { - fn from(other: PubSubPublishErrorSchema) -> Self { - Self { - message: other.message, - } - } -} - -#[cfg(test)] -#[allow(unused)] -mod tests { - use super::{GoogleMessageSegmenter, GooglePubSubError, SegmentationResult}; - use crate::{publish::EncodableMessage, tests::*, validators, Headers, ValidatedMessage}; - use futures_util::stream::TryStreamExt; - use hyper::body::HttpBody; - use std::time::SystemTime; - use uuid::Uuid; - - #[test] - fn assert_send_sync() { - fn assert_markers() {} - assert_markers::>(); - } - - #[test] - fn empty_segmenter() { - let mut segmenter = GoogleMessageSegmenter::new("", [].iter()); - assert!(segmenter.take_batch().is_none()); - assert!(segmenter.next().is_none()); - assert!(segmenter.take_batch().is_none()); - assert!(segmenter.next().is_none()); - } - - async fn test_segmenter(messages: Vec) { - let messages_expected = messages.len(); - let mut segmenter = GoogleMessageSegmenter::new("", messages.iter()); - assert!(segmenter.take_batch().is_none()); - let mut segment = match segmenter.next() { - None => panic!("expected a segment!"), - Some(Err(e)) => panic!("expected a successful segment! Got {}", e), - Some(Ok(segment)) => segment, - }; - assert!(matches!(segmenter.next(), None)); - assert!(segmenter.take_batch().is_none()); - let data = segment - .request_body - .data() - .await - .expect("body") - .expect("body"); - let val = serde_json::from_slice::(&data[..]) - .expect("data should be valid json!"); - let messages = val.get("messages").expect("messages key exists"); - for message_idx in 0..messages_expected { - let message = messages.get(message_idx).expect("mesage exists"); - message.get("data").expect("mesage data exists"); - } - } - - #[cfg(feature = "json-schema")] - #[tokio::test] - async fn single_msg_segmenter() { - let validator = validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let msgs = vec![JsonUserCreatedMessage::new_valid("U_123") - .encode(&validator) - .unwrap()]; - test_segmenter(msgs).await; - } - - #[cfg(feature = "json-schema")] - #[tokio::test] - async fn multi_msg_segmenter() { - let validator = validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let msgs = vec![ - JsonUserCreatedMessage::new_valid("U_123") - .encode(&validator) - .unwrap(), - JsonUserCreatedMessage::new_valid("U_124") - .encode(&validator) - .unwrap(), - JsonUserCreatedMessage::new_valid("U_125") - .encode(&validator) - .unwrap(), - JsonUserCreatedMessage::new_valid("U_126") - .encode(&validator) - .unwrap(), - ]; - test_segmenter(msgs).await; - } - - #[cfg(feature = "json-schema")] - #[tokio::test] - async fn huuuge_single_msg_segmenter() { - let validator = validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let msgs = vec![JsonUserCreatedMessage::new_valid( - // base64 makes strings 4/3 larger than original. - String::from_utf8(vec![b'a'; (10 * 1024 * 1024 - 512) * 3 / 4]).unwrap(), - ) - .encode(&validator) - .unwrap()]; - test_segmenter(msgs).await; - } - - #[cfg(feature = "json-schema")] - #[tokio::test] - async fn less_huge_double_msg_segmenter() { - let validator = validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - // base64 makes strings 4/3 larger than original. - let msgs = vec![ - JsonUserCreatedMessage::new_valid( - String::from_utf8(vec![b'a'; (5 * 1024 * 1024 - 512) * 3 / 4]).unwrap(), - ) - .encode(&validator) - .unwrap(), - JsonUserCreatedMessage::new_valid( - String::from_utf8(vec![b'a'; (5 * 1024 * 1024 - 512) * 3 / 4]).unwrap(), - ) - .encode(&validator) - .unwrap(), - ]; - test_segmenter(msgs).await; - } - - #[cfg(feature = "json-schema")] - #[test] - fn oversized_single_msg_segmenter() { - let validator = validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let small_message = JsonUserCreatedMessage::new_valid("U345"); - let oversized_message = JsonUserCreatedMessage::new_valid( - String::from_utf8(vec![b'a'; (10 * 1024 * 1024 - 50) * 3 / 4]).unwrap(), - ); - let msgs = vec![ - small_message.encode(&validator).unwrap(), - oversized_message.encode(&validator).unwrap(), - small_message.encode(&validator).unwrap(), - ]; - let mut segmenter = GoogleMessageSegmenter::new("", msgs.iter()); - assert!(segmenter.take_batch().is_none()); - assert!(matches!(segmenter.next(), Some(Ok(_)))); - assert!(matches!(segmenter.next(), Some(Err(_)))); - assert!(matches!(segmenter.next(), Some(Ok(_)))); - assert!(matches!(segmenter.next(), None)); - } - - #[cfg(feature = "json-schema")] - #[test] - fn segmenter_preserves_order_when_splitting() { - let validator = validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let small_message = JsonUserCreatedMessage::new_valid("U345"); - let oversized_message = JsonUserCreatedMessage::new_valid( - String::from_utf8(vec![b'a'; (10 * 1024 * 1024 - 512) * 3 / 4]).unwrap(), - ); - - let msgs = vec![ - small_message.encode(&validator).unwrap(), - oversized_message.encode(&validator).unwrap(), - small_message.encode(&validator).unwrap(), - ]; - let mut segmenter = GoogleMessageSegmenter::new("", msgs.iter()); - assert!(segmenter.take_batch().is_none()); - assert!(matches!(segmenter.next(), Some(Ok(_)))); - assert!(matches!(segmenter.next(), Some(Ok(_)))); - assert!(matches!(segmenter.next(), Some(Ok(_)))); - assert!(matches!(segmenter.next(), None)); - } - - #[test] - fn errors_send_sync() { - assert_error::(); - } - - #[test] - fn publish_stream_is_send() { - assert_send::(); - } - - #[cfg(feature = "json-schema")] - fn test_headers(headers: Headers) -> Option> { - let validator = validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let msg = JsonUserCreatedMessage { - uuid: Uuid::new_v4(), - schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", - user_id: String::from("hello"), - headers, - time: SystemTime::now(), - } - .encode(&validator) - .expect("validates"); - let mut segmenter = GoogleMessageSegmenter::new("", vec![&msg].into_iter()); - segmenter.next() - } - - #[cfg(feature = "json-schema")] - fn test_header_name_value( - name: String, - value: String, - ) -> Option> { - test_headers(vec![(name, value)].into_iter().collect()) - } - - #[cfg(feature = "json-schema")] - fn test_header_name(name: String) -> Option> { - test_header_name_value(name, "value".into()) - } - - #[cfg(feature = "json-schema")] - fn test_header_value(value: String) -> Option> { - test_header_name_value("key".into(), value) - } - - #[cfg(feature = "json-schema")] - #[test] - fn reserved_header_names_fail_validation() { - for &name in &["hedwig_", "hedwig_banana", "hedwig_message_timestamp"] { - assert!(matches!( - test_header_name(name.into()), - Some(Err(GooglePubSubError::MessageHeaderKeysReserved(_))) - )); - } - } - - #[cfg(feature = "json-schema")] - #[test] - fn valid_header_names_and_values_pass() { - for &name in &["hello", "hedwi", "banana"] { - assert!(matches!(test_header_name(name.into()), Some(Ok(_)))); - } - } - - #[cfg(feature = "json-schema")] - #[test] - fn valid_very_long_header_name() { - let name = String::from_utf8(vec![b'a'; 255]).unwrap(); - assert!(matches!(test_header_name(name), Some(Ok(_)))); - } - - #[cfg(feature = "json-schema")] - #[test] - fn invalid_overlong_header_name() { - let name = String::from_utf8(vec![b'a'; 256]).unwrap(); - assert!(matches!( - test_header_name(name), - Some(Err(GooglePubSubError::MessageHeaderKeysTooLarge)) - )); - } - - #[cfg(feature = "json-schema")] - #[test] - fn valid_very_long_header_value() { - let name = String::from_utf8(vec![b'a'; 1023]).unwrap(); - assert!(matches!(test_header_value(name), Some(Ok(_)))); - } - - #[cfg(feature = "json-schema")] - #[test] - fn invalid_overlong_header_value() { - let name = String::from_utf8(vec![b'a'; 1024]).unwrap(); - assert!(matches!( - test_header_value(name), - Some(Err(GooglePubSubError::MessageHeaderValuesTooLarge)) - )); - } - - #[cfg(feature = "json-schema")] - #[tokio::test] - async fn valid_very_many_headers() { - let mut headers = Headers::new(); - for i in 0..(super::API_MSG_ATTRIBUTE_COUNT_LIMIT - - super::PubSubAttributesSchema::BUILTIN_ATTRIBUTES) - { - headers.insert(format!("hdr{}", i), String::from("value")); - } - let result = test_headers(headers); - assert!(matches!(result, Some(Ok(_)))); - let result = result.unwrap().unwrap(); - let body = result - .request_body - .try_fold(vec![], |mut acc, ok| async move { - acc.extend(ok); - Ok(acc) - }) - .await - .unwrap(); - let msg: serde_json::Value = serde_json::from_slice(&body).unwrap(); - let actual_attribute_count = msg - .get("messages") - .unwrap() - .get(0) - .unwrap() - .get("attributes") - .unwrap() - .as_object() - .unwrap() - .len(); - assert_eq!(actual_attribute_count, super::API_MSG_ATTRIBUTE_COUNT_LIMIT); - } - - #[cfg(feature = "json-schema")] - #[test] - fn invalid_too_many_headers() { - let mut headers = Headers::new(); - for i in 0..(100 - super::PubSubAttributesSchema::BUILTIN_ATTRIBUTES + 1) { - headers.insert(format!("hdr{}", i), String::from("value")); - } - assert!(matches!( - test_headers(headers), - Some(Err(GooglePubSubError::MessageTooManyHeaders)) - )); - } -} diff --git a/src/publish/publishers/mock.rs b/src/publish/publishers/mock.rs deleted file mode 100644 index 09f7bf3..0000000 --- a/src/publish/publishers/mock.rs +++ /dev/null @@ -1,114 +0,0 @@ -use crate::{publish::Publisher, Topic, ValidatedMessage}; - -use std::{ - pin::Pin, - sync::{Arc, Mutex, MutexGuard}, - task, -}; - -use futures_util::stream::Stream; -use uuid::Uuid; - -/// A mock publisher that stores messages in-memory for later verification. -/// -/// This is useful primarily in tests. -/// -/// # Examples -/// -/// ``` -/// use hedwig::publish::MockPublisher; -/// let publisher = MockPublisher::default(); -/// let publisher_view = publisher.clone(); -/// ``` -#[derive(Debug, Default, Clone)] -pub struct MockPublisher(Arc>>); - -impl MockPublisher { - /// Create a new mock publisher. - pub fn new() -> Self { - Default::default() - } - - /// Number of messages published into this publisher. - pub fn len(&self) -> usize { - let lock = self.0.lock().expect("this mutex cannot get poisoned"); - lock.len() - } - - /// Number of messages published into this publisher. - pub fn is_empty(&self) -> bool { - let lock = self.0.lock().expect("this mutex cannot get poisoned"); - lock.is_empty() - } - - /// Verify that a message was published. This method asserts that the message you expected to - /// be published, was indeed published - /// - /// Panics if the message was not published. - pub fn assert_message_published>(&self, topic: T, uuid: &Uuid) { - let topic = topic.into(); - { - let lock = self.0.lock().expect("this mutex cannot get poisoned"); - for (mt, msg) in &lock[..] { - if mt == &topic && &msg.id == uuid { - return; - } - } - } - panic!( - "Message with uuid {} was not published to topic {}", - uuid, topic - ); - } - - /// Get a view over the messages that have been published to this publisher - pub fn messages(&self) -> Messages { - Messages(self.0.lock().expect("lock poisoned!")) - } -} - -/// A view over the messages in a `MockPublisher`, returned by -/// [`messages`](MockPublisher::messages) -pub struct Messages<'a>(MutexGuard<'a, Vec<(Topic, ValidatedMessage)>>); - -impl<'a> Messages<'a> { - /// Get an iterator over the messages in the `MockPublisher` - pub fn iter(&self) -> impl Iterator { - self.0.iter() - } -} - -impl Publisher for MockPublisher { - type MessageId = Uuid; - type MessageError = std::convert::Infallible; - type PublishStream = MockPublishStream; - - fn publish<'a, I>(&self, topic: Topic, messages: I) -> Self::PublishStream - where - I: Iterator + ExactSizeIterator, - { - let data = self.0.clone(); - let messages: Vec<_> = messages.cloned().collect(); - MockPublishStream(Box::new(messages.into_iter().map(move |msg| { - let id = msg.id; - data.lock() - .expect("this mutex cannot get poisoned") - .push((topic, msg)); - id - }))) - } -} - -/// Stream of mock publisher results. -pub struct MockPublishStream(Box + Send + Sync>); - -impl Stream for MockPublishStream { - type Item = Result; - - fn poll_next( - mut self: Pin<&mut Self>, - _: &mut task::Context<'_>, - ) -> task::Poll> { - task::Poll::Ready(self.0.next().map(Ok)) - } -} diff --git a/src/publish/publishers/mod.rs b/src/publish/publishers/mod.rs deleted file mode 100644 index fa2cdc8..0000000 --- a/src/publish/publishers/mod.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! [`Publisher`](crate::publish::Publisher) implementations. - -mod null; - -// unreachable_pub doesn't work through glob imports. -// see https://github.com/rust-lang/rust/blob/0148b971c921a0831fbf3357e5936eec724e3566/compiler/rustc_privacy/src/lib.rs#L590 -#[allow(unreachable_pub)] -pub use null::*; - -mod mock; -#[allow(unreachable_pub)] -pub use mock::*; - -#[cfg(feature = "google")] -mod googlepubsub; -#[cfg(feature = "google")] -#[allow(unreachable_pub)] -pub use googlepubsub::*; diff --git a/src/publish/publishers/null.rs b/src/publish/publishers/null.rs deleted file mode 100644 index 00fcf53..0000000 --- a/src/publish/publishers/null.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::{publish::Publisher, ValidatedMessage}; -use futures_util::stream::Stream; -use std::{pin::Pin, task}; - -/// A blackhole publisher that doesn't publish messages anywhere. -/// -/// Great for conditionally disabling publishing. -/// -/// # Examples -/// -/// ``` -/// use hedwig::publish::NullPublisher; -/// let publisher = NullPublisher::default(); -/// ``` -#[derive(Debug, Default, Clone, Copy)] -pub struct NullPublisher; - -impl Publisher for NullPublisher { - type MessageId = (); - type MessageError = std::convert::Infallible; - type PublishStream = NullPublishStream; - - fn publish<'a, I>(&self, _: crate::Topic, messages: I) -> Self::PublishStream - where - I: Iterator + ExactSizeIterator, - { - NullPublishStream(0..messages.len()) - } -} - -/// Stream for `NullPublisher::publish`. -pub struct NullPublishStream(std::ops::Range); - -impl Stream for NullPublishStream { - type Item = Result<(), std::convert::Infallible>; - - fn poll_next( - mut self: Pin<&mut Self>, - _: &mut task::Context<'_>, - ) -> task::Poll> { - task::Poll::Ready(self.0.next().map(|_| Ok(()))) - } -} diff --git a/src/publish/sink.rs b/src/publish/sink.rs deleted file mode 100644 index b188d80..0000000 --- a/src/publish/sink.rs +++ /dev/null @@ -1,834 +0,0 @@ -//! Provides a `Sink` interface for publishing to hedwig. - -use crate::{ - publish::{EncodableMessage, PublishBatch, Publisher}, - Topic, ValidatedMessage, -}; -use either::Either; -use futures_util::{ready, sink::Sink, stream::Stream}; -use pin_project::pin_project; -use std::{ - pin::Pin, - task::{Context, Poll}, -}; - -/// A sink which ingests messages, validates them with the given validator, then forwards them to -/// the given destination sink. -pub fn validator_sink( - validator: M::Validator, - destination_sink: S, -) -> ValidatorSink -where - M: EncodableMessage, - S: Sink<(Topic, ValidatedMessage)>, -{ - ValidatorSink { - _message_type: std::marker::PhantomData, - validator, - sink: destination_sink, - } -} - -/// A sink which ingests validated messages and publishes them to the given publisher. -/// -/// This sink internally batches elements to publish multiple messages at once. The `batch_size` -/// argument can be adjusted to control the number of elements stored in these batches. -/// `poll_ready` will check whether inserting an additional element would exceed this size limit, -/// and trigger a flush before returning `Ready` if so. Users may call `poll_flush` to empty this -/// batch at any time. -/// -/// Unlike some sinks, this sink's polling functions can be resumed after encountering an error, so -/// long as the underlying publisher's errors are not terminal. Transient errors, for example, can -/// be ignored and polling can be resumed to continue publishing. -/// -/// The sink can accept new elements while a flush is in progress, so long as the internal batch -/// has additional capacity -- i.e. `poll_ready` may return `Ready` while `poll_flush` returns -/// `Pending`. Together with the resume-on-error support mentioned above and the data in the -/// [`FailedMessage`](FailedMessage) error type, this behavior can be used to retry failed messages -/// by re-submitting them to this same sink. -// TODO actually implement such a retry Sink adapter layer with some backoff -pub fn publisher_sink

(publisher: P, batch_size: usize) -> PublisherSink -where - P: Publisher, -{ - PublisherSink { - publisher, - batch_capacity: usize::max(1, batch_size), - batch: PublishBatch::new(), - flush_state: FlushState::NotFlushing, - } -} - -/// The sink returned by the [`validator_sink`](validator_sink) function -#[pin_project] -pub struct ValidatorSink { - _message_type: std::marker::PhantomData, - validator: V, - #[pin] - sink: S, -} - -impl Sink for ValidatorSink -where - M: EncodableMessage, - S: Sink<(Topic, ValidatedMessage)>, -{ - type Error = Either; - - fn poll_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.project() - .sink - .poll_ready(cx) - .map(|res| res.map_err(Either::Right)) - } - - fn start_send(self: Pin<&mut Self>, message: M) -> Result<(), Self::Error> { - let this = self.project(); - - let topic = message.topic(); - let validated_message = message.encode(this.validator).map_err(Either::Left)?; - - this.sink - .start_send((topic, validated_message)) - .map_err(Either::Right) - } - - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.project() - .sink - .poll_flush(cx) - .map(|res| res.map_err(Either::Right)) - } - - fn poll_close(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.project() - .sink - .poll_close(cx) - .map(|res| res.map_err(Either::Right)) - } -} - -/// An error encountered when a message fails to be published -#[derive(Debug, Clone, thiserror::Error)] -#[cfg_attr(test, derive(PartialEq, Eq))] -#[error("Failed to publish a message")] -pub struct FailedMessage { - /// The underlying error source - #[source] - pub error: E, - - /// The topic to which this publish attempt was made - pub topic: Topic, - - /// The message which failed to be published - pub message: ValidatedMessage, -} - -/// The sink returned by the [`publisher_sink`](publisher_sink) function -#[pin_project] -pub struct PublisherSink { - publisher: P, - batch_capacity: usize, - batch: PublishBatch, - #[pin] - flush_state: FlushState, -} - -/// The sink is either flushing or not flushing. Closing and ready-checking are indirectly flushes -#[pin_project(project=FlushProjection)] -#[derive(Debug)] -enum FlushState { - NotFlushing, - Flushing(#[pin] crate::publish::PublishBatchStream), -} - -impl

Sink<(Topic, ValidatedMessage)> for PublisherSink -where - P: Publisher, - P::PublishStream: Unpin, -{ - type Error = FailedMessage; - - // The sink works by maintaining a PublishBatch to collect entries, and publishing that batch - // as soon as the configured capacity is met, or an explicit flush or close is called. - // - // Once a flush starts, the batch creates a stream which will submit messages to the publisher. - // This stream is driven by the `poll_flush` method of the sink; once the stream is complete, - // `poll_flush` will return Ready. - // - // Because the publishing stream may treat errors as transient -- and thus may allow the stream - // to be polled again after an error -- the sink also allows polling the flush after - // encountering an error. Thanks to this non-terminal erroring, the sink can support retries by - // having the caller submit failed messages back to the sink. The actual retry logic is left to - // a layer above this sink, but the sink itself provides the mechanisms to support it by having - // the batch collect elements while a flush is in progress. - - fn poll_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - if self.batch.len() < self.batch_capacity { - Poll::Ready(Ok(())) - } else { - self.poll_flush(cx) - } - } - - fn start_send( - mut self: Pin<&mut Self>, - (topic, message): (Topic, ValidatedMessage), - ) -> Result<(), Self::Error> { - assert!( - self.batch.len() < self.batch_capacity, - "start_send must be preceded by a successful poll_ready" - ); - self.batch.push(topic, message); - Ok(()) - } - - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - use FlushProjection::{Flushing, NotFlushing}; - - let mut this = self.as_mut().project(); - - // Check whether the sink is in the flushing state already or not. - // - // State transitions are done using this loop + match. The state variable may be updated by - // a match arm, after which the caller will continue on the loop - 'check_state: loop { - return match this.flush_state.as_mut().project() { - NotFlushing => { - if this.batch.is_empty() { - // if the sink isn't flushing, and the batch is empty, there's nothing to do! - Poll::Ready(Ok(())) - } else { - // the sink isn't yet flushing, but there are elements to flush. - // take them out of the batch and start the publishing stream - let batch = std::mem::replace(this.batch, PublishBatch::new()); - let publish = batch.publish(this.publisher); - this.flush_state.set(FlushState::Flushing(publish)); - - // re-enter the match with the new state, where the publish stream will be - // polled - continue 'check_state; - } - } - Flushing(mut flush_stream) => { - // if the sink is flushing, the publish stream needs to be polled until its - // completion - 'poll_publish: loop { - break match ready!(flush_stream.as_mut().poll_next(cx)) { - None => { - // done flushing. switch to NotFlushing and check if any new - // elements have been added to the publish buffer - this.flush_state.set(FlushState::NotFlushing); - - //re-enter the match with the new state - continue 'check_state; - } - // if an error occurs in publishing, pause flushing and propogate the - // error. The caller may continue to poll after this, and the publish - // attempt will resume - Some((Err(error), topic, message)) => Poll::Ready(Err(FailedMessage { - error, - topic, - message, - })), - Some((Ok(_msg_id), _topic, _message)) => { - // successful returns from the publishing stream simply have - // nothing to do. keep driving the publishing to completion - continue 'poll_publish; - } - }; - } - } - }; - } - } - - fn poll_close(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.poll_flush(cx) - } -} - -#[cfg(test)] -mod test { - use super::*; - - fn test_validated_message(data: impl Into>) -> ValidatedMessage { - ValidatedMessage { - id: uuid::Uuid::nil(), - timestamp: std::time::SystemTime::UNIX_EPOCH, - schema: "test_schema".into(), - headers: crate::Headers::default(), - data: data.into().into(), - } - } - - struct TestValidator; - - #[derive(Debug, Eq, PartialEq)] - struct TestMessage(&'static str); - - impl EncodableMessage for TestMessage { - type Error = std::convert::Infallible; - type Validator = TestValidator; - - fn topic(&self) -> Topic { - "test_topic".into() - } - - fn encode(self, _: &Self::Validator) -> Result { - Ok(test_validated_message(self.0)) - } - } - - mod validator_sink { - use super::*; - - struct CountingSink { - poll_ready_called: u32, - start_send_called: u32, - poll_flush_called: u32, - poll_close_called: u32, - elements: Vec, - } - - impl CountingSink { - fn new() -> Self { - Self { - poll_ready_called: 0, - start_send_called: 0, - poll_flush_called: 0, - poll_close_called: 0, - elements: Vec::new(), - } - } - } - - // not auto-impl'ed because of T maybe? - impl Unpin for CountingSink {} - - impl Sink for CountingSink { - type Error = std::convert::Infallible; - - fn poll_ready( - mut self: Pin<&mut Self>, - _: &mut Context<'_>, - ) -> Poll> { - self.poll_ready_called += 1; - Poll::Ready(Ok(())) - } - - fn start_send(mut self: Pin<&mut Self>, item: T) -> Result<(), Self::Error> { - self.start_send_called += 1; - self.elements.push(item); - Ok(()) - } - - fn poll_flush( - mut self: Pin<&mut Self>, - _: &mut Context<'_>, - ) -> Poll> { - self.poll_flush_called += 1; - Poll::Ready(Ok(())) - } - - fn poll_close( - mut self: Pin<&mut Self>, - _: &mut Context<'_>, - ) -> Poll> { - self.poll_close_called += 1; - Poll::Ready(Ok(())) - } - } - - #[test] - fn methods_delegated() { - let mut sink = validator_sink::(TestValidator, CountingSink::new()); - - let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); - - // sanity check - assert_eq!(0, sink.sink.poll_ready_called); - assert_eq!(0, sink.sink.start_send_called); - assert_eq!(0, sink.sink.poll_flush_called); - assert_eq!(0, sink.sink.poll_close_called); - assert_eq!(Vec::<(Topic, ValidatedMessage)>::new(), sink.sink.elements); - - assert_eq!(Poll::Ready(Ok(())), Pin::new(&mut sink).poll_ready(&mut cx)); - - assert_eq!(1, sink.sink.poll_ready_called); - assert_eq!(0, sink.sink.start_send_called); - assert_eq!(0, sink.sink.poll_flush_called); - assert_eq!(0, sink.sink.poll_close_called); - - assert_eq!(Poll::Ready(Ok(())), Pin::new(&mut sink).poll_flush(&mut cx)); - - assert_eq!(1, sink.sink.poll_ready_called); - assert_eq!(0, sink.sink.start_send_called); - assert_eq!(1, sink.sink.poll_flush_called); - assert_eq!(0, sink.sink.poll_close_called); - - assert_eq!(Poll::Ready(Ok(())), Pin::new(&mut sink).poll_close(&mut cx)); - - assert_eq!(1, sink.sink.poll_ready_called); - assert_eq!(0, sink.sink.start_send_called); - assert_eq!(1, sink.sink.poll_flush_called); - assert_eq!(1, sink.sink.poll_close_called); - - assert_eq!(Ok(()), Pin::new(&mut sink).start_send(TestMessage("foo"))); - - assert_eq!(1, sink.sink.poll_ready_called); - assert_eq!(1, sink.sink.start_send_called); - assert_eq!(1, sink.sink.poll_flush_called); - assert_eq!(1, sink.sink.poll_close_called); - assert_eq!( - vec![( - "test_topic".into(), - TestMessage("foo").encode(&TestValidator).unwrap() - )], - sink.sink.elements - ); - } - } - - mod publisher_sink { - use super::*; - use crate::publish::publishers::MockPublisher; - use futures_util::pin_mut; - use std::{cell::RefCell, rc::Rc}; - - #[derive(Debug, Eq, PartialEq, thiserror::Error)] - #[error("test error")] - struct TestError(String); - - /// what should the publish stream do next - #[derive(Debug)] - enum PublishCommand { - /// return Poll::Pending - Pending, - - /// advance the stream - Next, - - /// discard the next value and return the given error - Error(TestError), - } - - struct ControlledPublishStream { - command: Rc>, - mock: MockPublisher, - messages: std::vec::IntoIter<(Topic, ValidatedMessage)>, - } - - impl Stream for ControlledPublishStream { - type Item = Result<(), TestError>; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - use PublishCommand::{Error, Next, Pending}; - - // reset to Pending every time so that advancing has to be done affirmatively - let command = std::mem::replace(&mut *self.command.borrow_mut(), Pending); - - match command { - Pending => { - cx.waker().wake_by_ref(); - Poll::Pending - } - Next => { - // publish the next element to the mock - let (topic, message) = match self.messages.next() { - None => return Poll::Ready(None), - Some(t) => t, - }; - - let stream = self.mock.publish(topic, std::iter::once(&message)); - pin_mut!(stream); - - // mock publishing shouldn't fail - assert_eq!( - Poll::Ready(Some(Ok(*message.uuid()))), - stream.as_mut().poll_next(cx) - ); - assert_eq!(Poll::Ready(None), stream.as_mut().poll_next(cx)); - - Poll::Ready(Some(Ok(()))) - } - Error(err) => { - // drop the publish stream's actual element, as if the publish had failed - std::mem::drop(self.messages.next()); - - // replace it with this error - Poll::Ready(Some(Err(err))) - } - } - } - } - - #[derive(Debug)] - struct ControlledPublisher { - command: Rc>, - mock: MockPublisher, - } - - impl ControlledPublisher { - fn new() -> (Self, Rc>) { - let command = Rc::new(RefCell::new(PublishCommand::Pending)); - ( - ControlledPublisher { - command: Rc::clone(&command), - mock: MockPublisher::new(), - }, - command, - ) - } - } - - impl Publisher for ControlledPublisher { - type MessageId = (); - type MessageError = TestError; - type PublishStream = ControlledPublishStream; - - fn publish<'a, I>(&self, topic: Topic, messages: I) -> Self::PublishStream - where - I: Iterator + ExactSizeIterator + DoubleEndedIterator, - { - ControlledPublishStream { - command: Rc::clone(&self.command), - mock: self.mock.clone(), - messages: messages - .cloned() - .map(|msg| (topic, msg)) - .collect::>() - .into_iter(), - } - } - } - - /// The publisher should check that a batch size of zero doesn't crash and instead uses - /// something like a max(1, size) - #[test] - fn batch_size_zero() { - let batch_size = 0; - let publisher = MockPublisher::new(); - let sink = publisher_sink(publisher, batch_size); - pin_mut!(sink); - let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); - - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - assert_eq!( - Ok(()), - sink.as_mut() - .start_send(("test_topic".into(), test_validated_message("foo"))) - ); - } - - /// The publisher should start flushing when the batch size has been exceeded - #[test] - fn batching_batches() { - let topic = "test_topic".into(); - let batch_size = 3; - let publisher = MockPublisher::new(); - let sink = publisher_sink(publisher, batch_size); - pin_mut!(sink); - - // sanity check - assert!(sink.publisher.is_empty()); - - let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); - - // insert 3 messages to reach the buffer limit - for &data in ["foo", "bar", "baz"].iter() { - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - assert_eq!( - Ok(()), - sink.as_mut() - .start_send((topic, test_validated_message(data))) - ); - } - - // the flush should not have been triggered yet - assert!(sink.publisher.is_empty()); - - // the next poll should find that the buffer is full; it will trigger a flush before - // returning `Ready` (it won't return Pending because the MockPublisher isn't async) - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - - assert_eq!( - vec![ - &(topic, test_validated_message("foo")), - &(topic, test_validated_message("bar")), - &(topic, test_validated_message("baz")) - ], - sink.publisher.messages().iter().collect::>() - ); - } - - /// The publisher should flush buffered elements when asked to close - #[test] - fn close_flushes_batch() { - let topic = "test_topic".into(); - let batch_size = 3; - let publisher = MockPublisher::new(); - let sink = publisher_sink(publisher, batch_size); - pin_mut!(sink); - - // sanity check - assert!(sink.publisher.is_empty()); - - let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); - - // insert 1 messages to reach the buffer limit - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - assert_eq!( - Ok(()), - sink.as_mut() - .start_send((topic, test_validated_message("foo"))) - ); - - // no flush has been triggered yet - assert!(sink.publisher.is_empty()); - - // closing should trigger a flush - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_close(&mut cx)); - - assert_eq!( - vec![&(topic, test_validated_message("foo")),], - sink.publisher.messages().iter().collect::>() - ); - } - - /// The publisher should flush buffered elements when asked to flush - #[test] - fn flush_incomplete_batch() { - let topic = "test_topic".into(); - let batch_size = 3; - let publisher = MockPublisher::new(); - let sink = publisher_sink(publisher, batch_size); - pin_mut!(sink); - - // sanity check - assert!(sink.publisher.is_empty()); - - let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); - - // insert 1 messages to reach the buffer limit - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - assert_eq!( - Ok(()), - sink.as_mut() - .start_send((topic, test_validated_message("foo"))) - ); - - // no flush has been triggered yet - assert!(sink.publisher.is_empty()); - - // trigger a flush - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_flush(&mut cx)); - - assert_eq!( - vec![&(topic, test_validated_message("foo")),], - sink.publisher.messages().iter().collect::>() - ); - } - - /// `start_send` should panic if the buffer is full, because the user should have checked - /// `poll_ready` - #[test] - #[should_panic] - fn panic_at_buffer_full_without_ready_check() { - let topic = "test_topic".into(); - let batch_size = 1; - let publisher = MockPublisher::new(); - let sink = publisher_sink(publisher, batch_size); - pin_mut!(sink); - - assert_eq!( - Ok(()), - sink.as_mut() - .start_send((topic, test_validated_message("foo"))) - ); - - // should panic here - let _ = sink - .as_mut() - .start_send((topic, test_validated_message("bar"))); - } - - /// Step through flushing a non-full batch and see that yield points are respected - #[test] - fn partial_flushing_check() { - let topic = "test_topic".into(); - let batch_size = 3; - let (publisher, command) = ControlledPublisher::new(); - let sink = publisher_sink(publisher, batch_size); - pin_mut!(sink); - - let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); - - // insert 2 elements into the sink before flushing - - for &data in ["foo", "bar"].iter() { - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - assert_eq!( - Ok(()), - sink.as_mut() - .start_send((topic, test_validated_message(data))) - ); - } - - // the elements should have been inserted into the batch, but not yet published - assert_eq!(2, sink.batch.len()); - assert!(sink.publisher.mock.is_empty()); - - // set the publisher to return Pending on its next iteration - *command.borrow_mut() = PublishCommand::Pending; - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - - // the start of flushing should empty the batch. Though the publisher should have - // yielded, so the elements are in flight and not yet published - assert_eq!(0, sink.batch.len()); - assert!(sink.publisher.mock.is_empty()); - - // stepping the publish stream once should publish the first element - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - - assert_eq!( - vec![&(topic, test_validated_message("foo"))], - sink.publisher.mock.messages().iter().collect::>() - ); - - // the publisher might be pending for a while, the flushing will propogate this - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - - // advance to the next element - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - assert_eq!( - vec![ - &(topic, test_validated_message("foo")), - &(topic, test_validated_message("bar")) - ], - sink.publisher.mock.messages().iter().collect::>() - ); - - // the publish stream isn't done because it hasn't yielded None yet - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - - // one last advance to None should finish the flush - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_flush(&mut cx)); - } - - /// A failed message can be re-sent to the sink and eventually succeed - #[test] - fn flushing_error_retry() { - let topic = "test_topic".into(); - let batch_size = 5; - let (publisher, command) = ControlledPublisher::new(); - let sink = publisher_sink(publisher, batch_size); - pin_mut!(sink); - - let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); - - // insert 3 elements. the middle one will encounter an error later - for &data in ["a", "b", "c"].iter() { - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - assert_eq!( - Ok(()), - sink.as_mut() - .start_send((topic, test_validated_message(data))) - ); - } - - //sanity check - assert!(sink.publisher.mock.is_empty()); - - // start flushing - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - assert_eq!( - vec![&(topic, test_validated_message("a"))], - sink.publisher.mock.messages().iter().collect::>() - ); - - // encounter an error on the second element - *command.borrow_mut() = PublishCommand::Error(TestError("boom!".into())); - let failed_message = match sink.as_mut().poll_flush(&mut cx) { - Poll::Ready(Err(err)) => err, - _ => panic!("expected ready error"), - }; - assert_eq!( - &FailedMessage { - error: TestError("boom!".into()), - topic, - message: test_validated_message("b") - }, - &failed_message - ); - - //re-submit the failed message to the sink - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_ready(&mut cx)); - assert_eq!( - Ok(()), - sink.as_mut() - .start_send((failed_message.topic, failed_message.message)) - ); - - // the retried element is now waiting in the sink's next batch - assert_eq!(1, sink.batch.len()); - // the publisher is still in the same state of having the first element - assert_eq!( - vec![&(topic, test_validated_message("a"))], - sink.publisher.mock.messages().iter().collect::>() - ); - - //flushing can continue after an error - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); // just a pending pass-through - - // push the next publish from the first batch, as if the error were transient - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - - // the re-submit pushed the retry to essentially the back of the line, so the flush - // resumes from where the publish stream left off with the 3rd element - assert_eq!( - vec![ - &(topic, test_validated_message("a")), - &(topic, test_validated_message("c")) - ], - sink.publisher.mock.messages().iter().collect::>() - ); - - // advance the stream, but this is to the ending None so the retry won't be - // published yet. The next batch will be pulled from however - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - assert!(sink.batch.is_empty()); - assert_eq!( - vec![ - &(topic, test_validated_message("a")), - &(topic, test_validated_message("c")) - ], - sink.publisher.mock.messages().iter().collect::>() - ); - - // finally the retry element can be published - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Pending, sink.as_mut().poll_flush(&mut cx)); - assert_eq!( - vec![ - &(topic, test_validated_message("a")), - &(topic, test_validated_message("c")), - &(topic, test_validated_message("b")) - ], - sink.publisher.mock.messages().iter().collect::>() - ); - - // advance the stream to terminate and finish the flush - *command.borrow_mut() = PublishCommand::Next; - assert_eq!(Poll::Ready(Ok(())), sink.as_mut().poll_flush(&mut cx)); - } - } -} diff --git a/src/publisher.rs b/src/publisher.rs new file mode 100644 index 0000000..72ff6e0 --- /dev/null +++ b/src/publisher.rs @@ -0,0 +1,90 @@ +//! Types, traits, and functions necessary to publish messages using hedwig + +use crate::{Topic, ValidatedMessage}; +use futures_util::sink; +use std::{ + pin::Pin, + task::{Context, Poll}, +}; + +/// Message publishers. +/// +/// Message publishers validate, encode, and deliver messages to an endpoint, possibly a remote +/// one. Message publishers may also additionally validate a message for publisher-specific +/// requirements (e.g. size). +pub trait Publisher = Drain> { + /// The error type that may be encountered when publishing a message + type PublishError; + /// The [`Sink`](futures_util::sink::Sink) type provided by the publisher to accept messages, + /// validate them, then publish them to the destination. + type PublishSink: sink::Sink; + + /// Create a new sink to accept messages. + /// + /// The sink will use the given validator to validate and/or encode messages, possibly batch + /// them together, then publish them to their destination. The details of the internal encoding + /// and batching may vary by `Publisher` implementation. + fn publish_sink(self, validator: M::Validator) -> Self::PublishSink + where + Self: Sized, + S: Default, + { + self.publish_sink_with_responses(validator, S::default()) + } + + /// Create a new sink to accept messages. + /// + /// This creates a sink like [`publish_sink`](Publisher::publish_sink) while additionally + /// listening for successful responses; after a message has been successfully published, it + /// will be passed to the given response sink to complete any necessary work (e.g. + /// acknowledging success or collecting metrics) + fn publish_sink_with_responses( + self, + validator: M::Validator, + response_sink: S, + ) -> Self::PublishSink; +} + +/// Types that can be encoded and published. +pub trait EncodableMessage { + /// The errors that can occur when calling the [`EncodableMessage::encode`] method. + /// + /// Will typically match the errors returned by the [`EncodableMessage::Validator`]. + type Error; + + /// The validator to use for this message. + type Validator; + + /// Topic into which this message shall be published. + fn topic(&self) -> Topic; + + /// Encode the message payload. + fn encode(&self, validator: &Self::Validator) -> Result; +} + +/// Like [`futures_util::sink::Drain`] but implements `Default` +#[derive(Debug)] +pub struct Drain(std::marker::PhantomData); + +impl Default for Drain { + fn default() -> Self { + Self(std::marker::PhantomData) + } +} + +impl sink::Sink for Drain { + type Error = futures_util::never::Never; + + fn poll_ready(self: Pin<&mut Self>, _: &mut Context) -> Poll> { + Poll::Ready(Ok(())) + } + fn start_send(self: Pin<&mut Self>, _: T) -> Result<(), Self::Error> { + Ok(()) + } + fn poll_flush(self: Pin<&mut Self>, _: &mut Context) -> Poll> { + Poll::Ready(Ok(())) + } + fn poll_close(self: Pin<&mut Self>, _: &mut Context) -> Poll> { + Poll::Ready(Ok(())) + } +} diff --git a/src/tests.rs b/src/tests.rs deleted file mode 100644 index a42c3ea..0000000 --- a/src/tests.rs +++ /dev/null @@ -1,154 +0,0 @@ -#![allow(unused)] - -use crate::{publish::EncodableMessage, validators, Headers, ValidatedMessage}; - -use futures_util::stream::StreamExt; -use std::time::SystemTime; -use uuid::Uuid; - -pub(crate) const SCHEMA: &str = r#"{ - "$id": "https://hedwig.corp/schema", - "$schema": "https://json-schema.org/draft-04/schema#", - "description": "Example Schema", - "schemas": { - "user.created": { - "1.*": { - "description": "A new user was created", - "type": "object", - "x-versions": [ - "1.0" - ], - "required": [ - "user_id" - ], - "properties": { - "user_id": { - "$ref": "https://hedwig.corp/schema#/definitions/UserId/1.0" - } - } - } - }, - "invalid.route": { - "1.*": {} - } - }, - "definitions": { - "UserId": { - "1.0": { - "type": "string" - } - } - } -}"#; - -#[derive(serde::Serialize)] -pub(crate) struct JsonUserCreatedMessage { - #[serde(skip)] - pub(crate) uuid: uuid::Uuid, - #[serde(skip)] - pub(crate) schema: &'static str, - #[serde(skip)] - pub(crate) headers: Headers, - #[serde(skip)] - pub(crate) time: SystemTime, - pub(crate) user_id: I, -} - -impl JsonUserCreatedMessage { - pub(crate) fn new_valid>(id: V) -> Self { - JsonUserCreatedMessage { - uuid: Uuid::new_v4(), - schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", - user_id: id.into(), - headers: Default::default(), - time: SystemTime::now(), - } - } -} - -#[cfg(feature = "json-schema")] -impl<'a, I: serde::Serialize> EncodableMessage for &'a JsonUserCreatedMessage { - type Error = validators::JsonSchemaValidatorError; - type Validator = validators::JsonSchemaValidator; - - fn topic(&self) -> crate::Topic { - "user.created".into() - } - fn encode(self, validator: &Self::Validator) -> Result { - validator.validate( - self.uuid, - self.time, - self.schema, - self.headers.clone(), - self, - ) - } -} - -pub(crate) fn assert_error() {} -pub(crate) fn assert_send() {} -pub(crate) fn assert_send_val(_: &T) {} - -#[tokio::test] -async fn publish_empty_batch() { - let publisher = crate::publish::MockPublisher::new(); - let batch = crate::publish::PublishBatch::new(); - let mut stream = batch.publish(&publisher); - assert!(matches!(stream.next().await, None)); - assert!(publisher.is_empty()); -} - -#[cfg(feature = "json-schema")] -#[tokio::test] -async fn publish_batch() { - let validator = crate::validators::JsonSchemaValidator::new(SCHEMA).unwrap(); - let publisher = crate::publish::MockPublisher::new(); - let mut batch = crate::publish::PublishBatch::new(); - let message_one = JsonUserCreatedMessage::new_valid("U123"); - let message_two = JsonUserCreatedMessage::new_valid("U124"); - let message_three = JsonUserCreatedMessage::new_valid("U126"); - let message_invalid = JsonUserCreatedMessage { - uuid: Uuid::new_v4(), - schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", - user_id: 125u64, - time: SystemTime::now(), - headers: Headers::new(), - }; - batch - .message(&validator, &message_one) - .expect("adding valid message"); - assert!(matches!( - batch.message(&validator, &message_invalid).err(), - Some(_) - )); - batch - .message(&validator, &message_two) - .expect("adding valid message"); - batch - .message(&validator, &message_three) - .expect("adding valid message"); - let mut stream = batch.publish(&publisher); - // Stream should return the message ids that are actually being published. - // - // The ordering doesn't necessarily need to be preserved, but for the purpose of this test we - // know that `MockPublisher` does. - assert_eq!(stream.next().await.map(|x| x.0), Some(Ok(message_one.uuid))); - assert_eq!(stream.next().await.map(|x| x.0), Some(Ok(message_two.uuid))); - assert_eq!( - stream.next().await.map(|x| x.0), - Some(Ok(message_three.uuid)) - ); - assert_eq!(stream.next().await.map(|x| x.0), None); - assert_eq!(publisher.len(), 3); - publisher.assert_message_published("user.created", &message_one.uuid); - publisher.assert_message_published("user.created", &message_two.uuid); - publisher.assert_message_published("user.created", &message_three.uuid); -} - -#[test] -fn publish_stream_is_send() { - let publisher = crate::publish::MockPublisher::new(); - let batch = crate::publish::PublishBatch::new(); - let stream = batch.publish(&publisher); - assert_send_val(&stream); -} diff --git a/src/tests/google.rs b/src/tests/google.rs new file mode 100644 index 0000000..444ff84 --- /dev/null +++ b/src/tests/google.rs @@ -0,0 +1,458 @@ +#![cfg(all(feature = "google", feature = "protobuf"))] + +type BoxError = Box; +use crate::{ + googlepubsub::{ + retry_policy::{RetryOperation, RetryPolicy}, + AuthFlow, ClientBuilder, ClientBuilderConfig, PubSubConfig, PubSubError, PublishError, + StreamSubscriptionConfig, SubscriptionConfig, SubscriptionName, TopicConfig, TopicName, + }, + validators::{ + prost::{ExactSchemaMatcher, SchemaMismatchError}, + ProstDecodeError, ProstDecoder, ProstValidator, ProstValidatorError, + }, + Consumer, DecodableMessage, EncodableMessage, Headers, Publisher, Topic, ValidatedMessage, +}; +use futures_util::{pin_mut, SinkExt, StreamExt, TryFutureExt, TryStreamExt}; +use std::{ + sync::mpsc, + task::{Context, Poll}, +}; +use ya_gcp::pubsub::emulator::EmulatorClient; + +const SCHEMA: &str = "test-schema"; +const TOPIC: &str = "test-topic"; + +#[derive(Clone, PartialEq, Eq, prost::Message)] +struct TestMessage { + #[prost(string, tag = "1")] + payload: String, +} + +impl EncodableMessage for TestMessage { + type Error = ProstValidatorError; + type Validator = ProstValidator; + + fn topic(&self) -> Topic { + TOPIC.into() + } + + fn encode(&self, validator: &Self::Validator) -> Result { + validator.validate( + uuid::Uuid::nil(), + std::time::SystemTime::UNIX_EPOCH, + SCHEMA, + Headers::default(), + self, + ) + } +} + +impl DecodableMessage for TestMessage { + type Decoder = ProstDecoder>; + type Error = ProstDecodeError; + + fn decode(msg: ValidatedMessage, validator: &Self::Decoder) -> Result { + validator.decode(msg) + } +} + +#[tokio::test] +#[ignore = "pubsub emulator is finicky, run this test manually"] +async fn roundtrip_protobuf() -> Result<(), BoxError> { + let project_name = "test-project"; + let topic_name = TopicName::new(TOPIC); + let subscription_name = SubscriptionName::new("test-subscription"); + + let emulator = EmulatorClient::with_project(project_name).await?; + + let client_builder = ClientBuilder::new( + ClientBuilderConfig::new().auth_flow(AuthFlow::NoAuth), + PubSubConfig::new().endpoint(emulator.endpoint()), + ) + .await?; + + let mut publisher_client = client_builder + .build_publisher(project_name, "test_publisher") + .await?; + + publisher_client + .create_topic(TopicConfig { + name: topic_name.clone(), + ..TopicConfig::default() + }) + .await?; + + let mut consumer_client = client_builder + .build_consumer(project_name, "test_queue") + .await?; + + consumer_client + .create_subscription(SubscriptionConfig { + name: subscription_name.clone(), + topic: topic_name.clone(), + ..SubscriptionConfig::default() + }) + .await?; + + let mut publisher = + Publisher::::publish_sink(publisher_client.publisher(), ProstValidator::new()); + + publisher + .send(TestMessage { + payload: "foobar".into(), + }) + .await?; + + let consumer = consumer_client + .stream_subscription(subscription_name, StreamSubscriptionConfig::default()) + .consume::(ProstDecoder::new(ExactSchemaMatcher::new(SCHEMA))); + + pin_mut!(consumer); + + assert_eq!( + TestMessage { + payload: "foobar".into() + }, + Option::unwrap(consumer.next().await)?.ack().await? + ); + Ok(()) +} + +/// Test that the publisher-side response sink receives elements when the publisher publishes +#[tokio::test] +#[ignore = "pubsub emulator is finicky, run this test manually"] +async fn response_sink_responses() -> Result<(), BoxError> { + let project_name = "test-project"; + let topic_name = TopicName::new(TOPIC); + let subscription_name = SubscriptionName::new("test-subscription"); + + let emulator = EmulatorClient::with_project(project_name).await?; + + let client_builder = ClientBuilder::new( + ClientBuilderConfig::new().auth_flow(AuthFlow::NoAuth), + PubSubConfig::new().endpoint(emulator.endpoint()), + ) + .await?; + + let mut publisher_client = client_builder + .build_publisher(project_name, "test_publisher") + .await?; + + publisher_client + .create_topic(TopicConfig { + name: topic_name.clone(), + ..TopicConfig::default() + }) + .await?; + + let mut consumer_client = client_builder + .build_consumer(project_name, "test_queue") + .await?; + + consumer_client + .create_subscription(SubscriptionConfig { + name: subscription_name.clone(), + topic: topic_name.clone(), + ..SubscriptionConfig::default() + }) + .await?; + + let (response_sink, mut responses) = futures_channel::mpsc::unbounded(); + let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); + + let mut publisher = Publisher::::publish_sink_with_responses( + publisher_client.publisher(), + ProstValidator::new(), + response_sink, + ); + + let consumer = consumer_client + .stream_subscription(subscription_name, StreamSubscriptionConfig::default()) + .consume::(ProstDecoder::new(ExactSchemaMatcher::new(SCHEMA))); + + pin_mut!(consumer); + + { + let message = TestMessage { + payload: "foobar".into(), + }; + + publisher.feed(message.clone()).await?; + + // the response sink should not be populated until a flush + assert_eq!(Poll::Pending, responses.poll_next_unpin(&mut cx)); + publisher.flush().await?; + assert_eq!( + Poll::Ready(Some(message.clone())), + responses.poll_next_unpin(&mut cx) + ); + + assert_eq!(message, Option::unwrap(consumer.next().await)?.ack().await?); + } + + { + let message1 = TestMessage { + payload: "one".into(), + }; + let message2 = TestMessage { + payload: "two".into(), + }; + let message3 = TestMessage { + payload: "three".into(), + }; + // create a message that will exceed the message limits (~10MB) and therefore error + let invalid_message4 = TestMessage { + payload: "4".repeat(10 * 1_000_000 + 1), + }; + let message5 = TestMessage { + payload: "five".into(), + }; + + publisher.feed(message1.clone()).await?; + publisher.feed(message2.clone()).await?; + publisher.feed(message3.clone()).await?; + + // buffering the invalid message (via feed) actually works, its validity is checked later + // when submitted to the underlying sink with the next poll_ready + publisher.feed(invalid_message4.clone()).await?; + match publisher.poll_ready_unpin(&mut cx) { + Poll::Ready(Err(PublishError::Publish { cause, messages })) => { + assert_eq!(vec![invalid_message4], messages); + assert_eq!(tonic::Code::InvalidArgument, cause.code()); + } + other => panic!("expected invalid arg error, was {:?}", other), + } + + publisher.feed(message5.clone()).await?; + + // no responses are sent yet + assert_eq!(Poll::Pending, responses.poll_next_unpin(&mut cx)); + + // the flush can still happen despite the error and the non-error values should come through + publisher.flush().await?; + assert_eq!( + vec![ + message1.clone(), + message2.clone(), + message3.clone(), + message5.clone() + ], + responses.by_ref().take(4).collect::>().await + ); + + assert_eq!( + vec![ + message1.clone(), + message2.clone(), + message3.clone(), + message5.clone() + ], + consumer + .by_ref() + .take(4) + .map_err(BoxError::from) + .and_then(|msg| msg.ack().map_err(BoxError::from)) + .try_collect::>() + .await? + ); + } + + { + let message6 = TestMessage { + payload: "six".into(), + }; + let message7 = TestMessage { + payload: "seven".into(), + }; + // create a message that will *not* exceed the message limits, but will exceed the total + // request limits even when it's the only message in a request. This induces an error later + // in the process, at the time of flush instead of insertion + let invalid_message8 = TestMessage { + payload: "8".repeat(10 * 1_000_000 - 6), + }; + let message9 = TestMessage { + payload: "nine".into(), + }; + + publisher.feed(message6.clone()).await?; + publisher.feed(message7.clone()).await?; + + publisher.feed(invalid_message8.clone()).await?; + // the error doesn't happen here because the invalid message was only just submitted to the + // sub-sink by this ready check. The buffer will first note that it's over capacity, and + // induce a flush. + assert!(matches!( + publisher.poll_ready_unpin(&mut cx), + Poll::Ready(Ok(())) + )); + // to actually poll that flush, we need a new element in the hedwig buffer to forward + // readiness checks to the pubsub sink (we're avoiding a manual `flush` call to test the + // path where flushes happen unprompted) + publisher.start_send_unpin(message9.clone())?; + + // now readiness checking will drive the flush, and eventually find the invalid message and + // return an error + match futures_util::future::poll_fn(|cx| publisher.poll_ready_unpin(cx)).await { + Err(PublishError::Publish { cause, messages }) => { + assert_eq!(vec![invalid_message8], messages); + assert_eq!(tonic::Code::InvalidArgument, cause.code()); + } + other => panic!("expected invalid arg error, was {:?}", other), + } + + // flushing did allow two messages through before the error + assert_eq!( + vec![message6.clone(), message7.clone()], + responses.by_ref().take(2).collect::>().await + ); + + // then a manual flush can send the last message submitted after the invalid message + publisher.flush().await?; + assert_eq!( + vec![message9.clone()], + responses.by_ref().take(1).collect::>().await + ); + + // all the sent messages eventually arrive to the consumer + assert_eq!( + vec![message6.clone(), message7.clone(), message9.clone()], + consumer + .by_ref() + .take(3) + .map_err(BoxError::from) + .and_then(|msg| msg.ack().map_err(BoxError::from)) + .try_collect::>() + .await? + ); + } + Ok(()) +} + +/// Check to see that the retry policy will translate from api messages to user messages +#[tokio::test] +#[ignore = "pubsub emulator is finicky, run this test manually"] +async fn retry_message_translate() -> Result<(), BoxError> { + let project_name = "roundtrip-test-project"; + let topic_name = TopicName::new(TOPIC); + + let emulator = EmulatorClient::with_project(project_name).await?; + + let client_builder = ClientBuilder::new( + ClientBuilderConfig::new().auth_flow(AuthFlow::NoAuth), + PubSubConfig::new().endpoint(emulator.endpoint()), + ) + .await?; + + let mut publisher_client = client_builder + .build_publisher(project_name, "roundtrip_test_publisher") + .await?; + + publisher_client + .create_topic(TopicConfig { + name: topic_name.clone(), + ..TopicConfig::default() + }) + .await?; + + // Create a retry policy which will send the failure values to a channel (for manual + // inspection) then fail the operation without retrying + #[derive(Clone)] + struct TestRetryPolicy { + sender: mpsc::Sender>, + } + + struct TestRetryOperation { + sender: mpsc::Sender>, + } + + impl RetryPolicy<[TestMessage], PubSubError> for TestRetryPolicy { + type RetryOp = TestRetryOperation; + + fn new_operation(&mut self) -> Self::RetryOp { + TestRetryOperation { + sender: self.sender.clone(), + } + } + } + + impl RetryOperation<[TestMessage], PubSubError> for TestRetryOperation { + type Sleep = futures_util::future::Ready<()>; + + fn check_retry( + &mut self, + failed_value: &[TestMessage], + _error: &PubSubError, + ) -> Option { + self.sender + .send(failed_value.to_owned()) + .expect("receiver should not be dropped while senders in use"); + None + } + } + + // construct messages such that the first two will buffer and the third will force a flush of + // the first two. The request limit is 10MB, so 2+2MB start the buffer and an additional 8MB + // will trigger a flush + let message1 = TestMessage { + payload: "1".repeat(2 * 1_000_000), + }; + let message2 = TestMessage { + payload: "2".repeat(2 * 1_000_000), + }; + let message3 = TestMessage { + payload: "3".repeat(8 * 1_000_000), + }; + let message4 = TestMessage { + payload: "4".into(), + }; + + let (retry_tx, retry_rx) = mpsc::channel(); + let mut publisher = Publisher::::publish_sink( + publisher_client + .publisher() + .with_retry_policy(TestRetryPolicy { sender: retry_tx }), + ProstValidator::new(), + ); + + publisher.feed(message1.clone()).await?; + publisher.feed(message2.clone()).await?; + publisher.feed(message3.clone()).await?; + publisher.feed(message4.clone()).await?; + + // flushing (and thus errors/retries) should not have been triggered yet + assert_eq!(Err(mpsc::TryRecvError::Empty), retry_rx.try_recv()); + + // drop the emulator to kill the process and trigger errors on publishing + std::mem::drop(emulator); + + // flushing still hasn't happened + assert_eq!(Err(mpsc::TryRecvError::Empty), retry_rx.try_recv()); + + // check readiness to trigger the capacity flush (less than a full flush though, only enough to + // make room for a new request) + match futures_util::future::poll_fn(|cx| publisher.poll_ready_unpin(cx)).await { + Err(PublishError::Publish { cause: _, messages }) => { + assert_eq!(vec![message1.clone(), message2.clone()], messages); + } + other => panic!("expected publish error, was {:?}", other), + } + + //now the retry attempts of the first flush should be visible + assert_eq!(Ok(vec![message1, message2]), retry_rx.try_recv()); + // nothing else has attempted flushing though + assert_eq!(Err(mpsc::TryRecvError::Empty), retry_rx.try_recv()); + + // flush the rest + match publisher.flush().await { + Err(PublishError::Publish { cause: _, messages }) => { + assert_eq!(vec![message3.clone(), message4.clone()], messages); + } + other => panic!("expected publish error, was {:?}", other), + } + + // witness the retries are of everything left + assert_eq!(Ok(vec![message3, message4]), retry_rx.try_recv()); + + Ok(()) +} diff --git a/src/tests/json.rs b/src/tests/json.rs new file mode 100644 index 0000000..c38f3b6 --- /dev/null +++ b/src/tests/json.rs @@ -0,0 +1,192 @@ +#![cfg(feature = "json-schema")] + +use crate::{ + mock::{Error as MockError, MockPublisher}, + validators, + validators::JsonSchemaValidatorError, + Consumer, DecodableMessage, EncodableMessage, Headers, Publisher, Topic, ValidatedMessage, +}; + +use futures_util::{sink::SinkExt, stream::StreamExt}; +use std::time::SystemTime; +use uuid::Uuid; + +pub(crate) const SCHEMA: &str = r#"{ + "$id": "https://hedwig.corp/schema", + "$schema": "https://json-schema.org/draft-04/schema#", + "description": "Example Schema", + "schemas": { + "user.created": { + "1.*": { + "description": "A new user was created", + "type": "object", + "x-versions": [ + "1.0" + ], + "required": [ + "user_id" + ], + "properties": { + "user_id": { + "$ref": "https://hedwig.corp/schema#/definitions/UserId/1.0" + } + } + } + }, + "invalid.route": { + "1.*": {} + } + }, + "definitions": { + "UserId": { + "1.0": { + "type": "string" + } + } + } +}"#; + +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub(crate) struct JsonUserCreatedMessage { + #[serde(skip)] + pub(crate) uuid: uuid::Uuid, + #[serde(skip)] + pub(crate) schema: &'static str, + #[serde(skip)] + pub(crate) headers: Headers, + #[serde(skip, default = "SystemTime::now")] + pub(crate) time: SystemTime, + pub(crate) user_id: I, +} + +impl JsonUserCreatedMessage { + pub(crate) fn new_valid>(id: V) -> Self { + JsonUserCreatedMessage { + uuid: Uuid::new_v4(), + schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", + user_id: id.into(), + headers: Default::default(), + time: SystemTime::now(), + } + } +} + +impl<'a, I: serde::Serialize> EncodableMessage for JsonUserCreatedMessage { + type Error = validators::JsonSchemaValidatorError; + type Validator = validators::JsonSchemaValidator; + + fn topic(&self) -> Topic { + "user.created".into() + } + fn encode(&self, validator: &Self::Validator) -> Result { + validator.validate( + self.uuid, + self.time, + self.schema, + self.headers.clone(), + self, + ) + } +} + +impl DecodableMessage for JsonUserCreatedMessage { + type Error = serde_json::Error; + type Decoder = (); + + fn decode(msg: ValidatedMessage, _: &()) -> Result { + Ok(JsonUserCreatedMessage { + uuid: *msg.uuid(), + headers: msg.headers().clone(), + schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", + time: *msg.timestamp(), + ..serde_json::from_slice(msg.data())? + }) + } +} + +#[tokio::test] +async fn publish_messages() -> Result<(), Box> { + let publisher = MockPublisher::new(); + let message_one = JsonUserCreatedMessage::new_valid("U123"); + let message_two = JsonUserCreatedMessage::new_valid("U124"); + let message_three = JsonUserCreatedMessage::new_valid("U126"); + let message_invalid = JsonUserCreatedMessage { + uuid: Uuid::new_v4(), + schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", + user_id: 125u64, + time: SystemTime::now(), + headers: Headers::new(), + }; + let mut responses = Vec::new(); + + // prepare a consumer to read any sent messages + let mut consumer = publisher + .new_consumer((&message_one).topic(), "subscription1") + .consume::>(()); + + // publishing the message with a u64 id should error on trying to send + let mut publish_sink = >>::publish_sink( + publisher.clone(), + crate::validators::JsonSchemaValidator::new(SCHEMA).unwrap(), + ); + assert!(matches!( + publish_sink + .send(message_invalid) + .await + .map_err(|MockError { cause }| cause + .downcast::() + .map(|boxed| *boxed)), + Err(Ok(JsonSchemaValidatorError::ValidateData { .. })) + )); + + // publishing the type with string ids should work + let mut publish_sink = + , _>>::publish_sink_with_responses( + publisher.clone(), + crate::validators::JsonSchemaValidator::new(SCHEMA).unwrap(), + &mut responses, + ); + + assert!(publish_sink.send(message_one.clone()).await.is_ok()); + assert!(publish_sink.send(message_two.clone()).await.is_ok()); + assert!(publish_sink.send(message_three.clone()).await.is_ok()); + + // if the sink uses buffering, the user should be informed of successful publishes in the + // response sink. + assert_eq!( + vec![ + message_one.clone(), + message_two.clone(), + message_three.clone() + ], + responses + ); + + // Now actually read from the consumer. + // The ordering doesn't necessarily need to be preserved, but for the purpose of this test we + // know that `MockPublisher` does. + assert_eq!( + message_one, + consumer.next().await.unwrap().unwrap().ack().await.unwrap() + ); + assert_eq!( + message_two, + consumer.next().await.unwrap().unwrap().ack().await.unwrap() + ); + assert_eq!( + message_three, + consumer.next().await.unwrap().unwrap().ack().await.unwrap() + ); + + Ok(()) +} + +#[test] +fn publish_sink_is_send() { + let publisher = MockPublisher::new(); + let sink = >>::publish_sink( + publisher, + crate::validators::JsonSchemaValidator::new(SCHEMA).unwrap(), + ); + crate::tests::assert_send_val(&sink); +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs new file mode 100644 index 0000000..07a9ba4 --- /dev/null +++ b/src/tests/mod.rs @@ -0,0 +1,7 @@ +#![cfg(test)] + +pub(crate) mod google; +pub(crate) mod json; + +pub(crate) fn assert_error() {} +pub(crate) fn assert_send_val(_: &T) {} diff --git a/src/topic.rs b/src/topic.rs index f9c0f69..eb88477 100644 --- a/src/topic.rs +++ b/src/topic.rs @@ -1,5 +1,5 @@ /// A message queue topic name to which messages can be published -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] pub struct Topic(&'static str); impl std::fmt::Display for Topic { @@ -14,8 +14,8 @@ impl From<&'static str> for Topic { } } -impl From for &'static str { - fn from(s: Topic) -> &'static str { - s.0 +impl AsRef for Topic { + fn as_ref(&self) -> &str { + self.0 } } diff --git a/src/validators/json_schema.rs b/src/validators/json_schema.rs index 3a2cd6c..ffdbb6e 100644 --- a/src/validators/json_schema.rs +++ b/src/validators/json_schema.rs @@ -82,7 +82,7 @@ impl JsonSchemaValidator { url::Url::parse(&wildcard_url) .map_err(|e| JsonSchemaValidatorError::SchemaUrlParse(e, wildcard_url))? } else { - url::Url::parse(&schema) + url::Url::parse(schema) .map_err(|e| JsonSchemaValidatorError::SchemaUrlParse(e, schema.into()))? }; let msg_schema = self @@ -110,7 +110,7 @@ impl JsonSchemaValidator { #[cfg(test)] mod tests { use super::*; - use crate::{publish::EncodableMessage, tests::*}; + use crate::{tests::json::*, EncodableMessage}; use uuid::Uuid; #[test] @@ -192,7 +192,7 @@ mod tests { #[test] fn errors_send_sync() { - assert_error::(); + crate::tests::assert_error::(); } #[test] diff --git a/src/validators/mod.rs b/src/validators/mod.rs index 81c712b..32291cb 100644 --- a/src/validators/mod.rs +++ b/src/validators/mod.rs @@ -15,6 +15,3 @@ pub use self::json_schema::*; pub mod prost; #[cfg(feature = "prost")] pub use self::prost::{ProstDecodeError, ProstDecoder, ProstValidator, ProstValidatorError}; - -// #[cfg_attr(docsrs, doc(cfg(feature = "prost")))] -// pub use self::prost::ProstValidator; diff --git a/tests/google-key.json b/tests/google-key.json deleted file mode 100644 index 9f59008..0000000 --- a/tests/google-key.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "type": "service_account", - "project_id": "foobar", - "private_key_id": "", - "private_key": "", - "client_email": "", - "client_id": "", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://accounts.google.com/o/oauth2/token", - "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", - "client_x509_cert_url": "" -}