@@ -19,7 +19,7 @@ use bytes::{Buf, Bytes};
1919use differential_dataflow:: lattice:: Lattice ;
2020use differential_dataflow:: trace:: Description ;
2121use mz_ore:: cast:: CastInto ;
22- use mz_ore:: { assert_none, halt} ;
22+ use mz_ore:: { assert_none, halt, soft_panic_or_log } ;
2323use mz_persist:: indexed:: encoding:: { BatchColumnarFormat , BlobTraceBatchPart , BlobTraceUpdates } ;
2424use mz_persist:: location:: { SeqNo , VersionedData } ;
2525use mz_persist:: metrics:: ColumnarMetrics ;
@@ -32,7 +32,7 @@ use proptest::strategy::Strategy;
3232use prost:: Message ;
3333use semver:: Version ;
3434use serde:: ser:: SerializeStruct ;
35- use serde:: { Deserialize , Serialize } ;
35+ use serde:: { Deserialize , Serialize , Serializer } ;
3636use timely:: progress:: { Antichain , Timestamp } ;
3737use uuid:: Uuid ;
3838
@@ -192,6 +192,105 @@ impl<T: Message + Default> RustType<Bytes> for LazyProto<T> {
192192 }
193193}
194194
195+ /// Our Proto implementation, Prost, cannot handle unrecognized fields. This means that unexpected
196+ /// data will be dropped at deserialization time, which means that we can't reliably roundtrip data
197+ /// from future versions of the code, which causes trouble during upgrades and at other times.
198+ ///
199+ /// This type works around the issue by defining an unstructured metadata map. Keys are expected to
200+ /// be well-known strings defined in the code; values are bytes, expected to be encoded protobuf.
201+ /// (The association between the two is lightly enforced with the affiliated [MetadataKey] type.)
202+ /// It's safe to add new metadata keys in new versions, since even unrecognized keys can be losslessly
203+ /// roundtripped. However, if the metadata is not safe for the old version to ignore -- perhaps it
204+ /// needs to be kept in sync with some other part of the struct -- you will need to use a more
205+ /// heavyweight migration for it.
206+ #[ derive( Debug , Default , Clone , Eq , PartialEq , Ord , PartialOrd , Hash ) ]
207+ pub ( crate ) struct MetadataMap ( BTreeMap < String , Bytes > ) ;
208+
209+ /// Associating a field name and an associated Proto message type, for lookup in a metadata map.
210+ ///
211+ /// It is an error to reuse key names, or to change the type associated with a particular name.
212+ /// It is polite to choose short names, since they get serialized alongside every struct.
213+ #[ allow( unused) ]
214+ #[ derive( Debug , Copy , Clone , Eq , PartialEq , Ord , PartialOrd , Hash ) ]
215+ pub ( crate ) struct MetadataKey < V , P = V > {
216+ name : & ' static str ,
217+ type_ : PhantomData < ( V , P ) > ,
218+ }
219+
220+ impl < V , P > MetadataKey < V , P > {
221+ #[ allow( unused) ]
222+ pub ( crate ) const fn new ( name : & ' static str ) -> Self {
223+ MetadataKey {
224+ name,
225+ type_ : PhantomData ,
226+ }
227+ }
228+ }
229+
230+ impl serde:: Serialize for MetadataMap {
231+ fn serialize < S > ( & self , serializer : S ) -> Result < S :: Ok , S :: Error >
232+ where
233+ S : Serializer ,
234+ {
235+ serializer. collect_map ( self . 0 . iter ( ) )
236+ }
237+ }
238+
239+ impl MetadataMap {
240+ /// Returns true iff no metadata keys have been set.
241+ pub fn is_empty ( & self ) -> bool {
242+ self . 0 . is_empty ( )
243+ }
244+
245+ /// Serialize and insert a new key into the map, replacing any existing value for the key.
246+ #[ allow( unused) ]
247+ pub fn set < V : RustType < P > , P : prost:: Message > ( & mut self , key : MetadataKey < V , P > , value : V ) {
248+ self . 0 . insert (
249+ String :: from ( key. name ) ,
250+ Bytes :: from ( value. into_proto_owned ( ) . encode_to_vec ( ) ) ,
251+ ) ;
252+ }
253+
254+ /// Deserialize a key from the map, if it is present.
255+ #[ allow( unused) ]
256+ pub fn get < V : RustType < P > , P : prost:: Message + Default > (
257+ & self ,
258+ key : MetadataKey < V , P > ,
259+ ) -> Option < V > {
260+ let proto = match P :: decode ( self . 0 . get ( key. name ) ?. as_ref ( ) ) {
261+ Ok ( decoded) => decoded,
262+ Err ( err) => {
263+ // This should be impossible unless one of the MetadataKey invariants are broken.
264+ soft_panic_or_log ! (
265+ "error when decoding {key}; was it redefined? {err}" ,
266+ key = key. name
267+ ) ;
268+ return None ;
269+ }
270+ } ;
271+
272+ match proto. into_rust ( ) {
273+ Ok ( proto) => Some ( proto) ,
274+ Err ( err) => {
275+ // This should be impossible unless one of the MetadataKey invariants are broken.
276+ soft_panic_or_log ! (
277+ "error when decoding {key}; was it redefined? {err}" ,
278+ key = key. name
279+ ) ;
280+ None
281+ }
282+ }
283+ }
284+ }
285+ impl RustType < BTreeMap < String , Bytes > > for MetadataMap {
286+ fn into_proto ( & self ) -> BTreeMap < String , Bytes > {
287+ self . 0 . clone ( )
288+ }
289+ fn from_proto ( proto : BTreeMap < String , Bytes > ) -> Result < Self , TryFromProtoError > {
290+ Ok ( MetadataMap ( proto) )
291+ }
292+ }
293+
195294pub ( crate ) fn parse_id ( id_prefix : & str , id_type : & str , encoded : & str ) -> Result < [ u8 ; 16 ] , String > {
196295 let uuid_encoded = match encoded. strip_prefix ( id_prefix) {
197296 Some ( x) => x,
@@ -1362,6 +1461,7 @@ impl<T: Timestamp + Codec64> RustType<ProtoHollowBatch> for HollowBatch<T> {
13621461 parts. extend ( proto. deprecated_keys . into_iter ( ) . map ( |key| {
13631462 RunPart :: Single ( BatchPart :: Hollow ( HollowBatchPart {
13641463 key : PartialBatchKey ( key) ,
1464+ meta : Default :: default ( ) ,
13651465 encoded_size_bytes : 0 ,
13661466 key_lower : vec ! [ ] ,
13671467 structured_key_lower : None ,
@@ -1418,6 +1518,7 @@ impl RustType<ProtoRunMeta> for RunMeta {
14181518 deprecated_schema_id : self . deprecated_schema . into_proto ( ) ,
14191519 id : self . id . into_proto ( ) ,
14201520 len : self . len . into_proto ( ) ,
1521+ meta : self . meta . into_proto ( ) ,
14211522 }
14221523 }
14231524
@@ -1434,6 +1535,7 @@ impl RustType<ProtoRunMeta> for RunMeta {
14341535 deprecated_schema : proto. deprecated_schema_id . into_rust ( ) ?,
14351536 id : proto. id . into_rust ( ) ?,
14361537 len : proto. len . into_rust ( ) ?,
1538+ meta : proto. meta . into_rust ( ) ?,
14371539 } )
14381540 }
14391541}
@@ -1472,6 +1574,7 @@ impl<T: Timestamp + Codec64> RustType<ProtoHollowBatchPart> for HollowRunRef<T>
14721574 schema_id : None ,
14731575 structured_key_lower : self . structured_key_lower . into_proto ( ) ,
14741576 deprecated_schema_id : None ,
1577+ metadata : BTreeMap :: default ( ) ,
14751578 } ;
14761579 part
14771580 }
@@ -1509,6 +1612,7 @@ impl<T: Timestamp + Codec64> RustType<ProtoHollowBatchPart> for BatchPart<T> {
15091612 format : x. format . map ( |f| f. into_proto ( ) ) ,
15101613 schema_id : x. schema_id . into_proto ( ) ,
15111614 deprecated_schema_id : x. deprecated_schema_id . into_proto ( ) ,
1615+ metadata : BTreeMap :: default ( ) ,
15121616 } ,
15131617 BatchPart :: Inline {
15141618 updates,
@@ -1526,6 +1630,7 @@ impl<T: Timestamp + Codec64> RustType<ProtoHollowBatchPart> for BatchPart<T> {
15261630 format : None ,
15271631 schema_id : schema_id. into_proto ( ) ,
15281632 deprecated_schema_id : deprecated_schema_id. into_proto ( ) ,
1633+ metadata : BTreeMap :: default ( ) ,
15291634 } ,
15301635 }
15311636 }
@@ -1541,6 +1646,7 @@ impl<T: Timestamp + Codec64> RustType<ProtoHollowBatchPart> for BatchPart<T> {
15411646 Some ( proto_hollow_batch_part:: Kind :: Key ( key) ) => {
15421647 Ok ( BatchPart :: Hollow ( HollowBatchPart {
15431648 key : key. into_rust ( ) ?,
1649+ meta : proto. metadata . into_rust ( ) ?,
15441650 encoded_size_bytes : proto. encoded_size_bytes . into_rust ( ) ?,
15451651 key_lower : proto. key_lower . into ( ) ,
15461652 structured_key_lower : proto. structured_key_lower . into_rust ( ) ?,
@@ -1839,6 +1945,25 @@ mod tests {
18391945
18401946 use super :: * ;
18411947
1948+ #[ mz_ore:: test]
1949+ fn metadata_map ( ) {
1950+ const COUNT : MetadataKey < u64 > = MetadataKey :: new ( "count" ) ;
1951+
1952+ let mut map = MetadataMap :: default ( ) ;
1953+ map. set ( COUNT , 100 ) ;
1954+ let mut map = MetadataMap :: from_proto ( map. into_proto ( ) ) . unwrap ( ) ;
1955+ assert_eq ! ( map. get( COUNT ) , Some ( 100 ) ) ;
1956+
1957+ const ANTICHAIN : MetadataKey < Antichain < u64 > , ProtoU64Antichain > =
1958+ MetadataKey :: new ( "antichain" ) ;
1959+ assert_none ! ( map. get( ANTICHAIN ) ) ;
1960+
1961+ map. set ( ANTICHAIN , Antichain :: from_elem ( 30 ) ) ;
1962+ let map = MetadataMap :: from_proto ( map. into_proto ( ) ) . unwrap ( ) ;
1963+ assert_eq ! ( map. get( COUNT ) , Some ( 100 ) ) ;
1964+ assert_eq ! ( map. get( ANTICHAIN ) , Some ( Antichain :: from_elem( 30 ) ) ) ;
1965+ }
1966+
18421967 #[ mz_ore:: test]
18431968 fn applier_version_state ( ) {
18441969 let v1 = semver:: Version :: new ( 1 , 0 , 0 ) ;
@@ -1916,6 +2041,7 @@ mod tests {
19162041 ) ,
19172042 vec ! [ RunPart :: Single ( BatchPart :: Hollow ( HollowBatchPart {
19182043 key: PartialBatchKey ( "a" . into( ) ) ,
2044+ meta: Default :: default ( ) ,
19192045 encoded_size_bytes: 5 ,
19202046 key_lower: vec![ ] ,
19212047 structured_key_lower: None ,
@@ -1943,6 +2069,7 @@ mod tests {
19432069 . parts
19442070 . push ( RunPart :: Single ( BatchPart :: Hollow ( HollowBatchPart {
19452071 key : PartialBatchKey ( "b" . into ( ) ) ,
2072+ meta : Default :: default ( ) ,
19462073 encoded_size_bytes : 0 ,
19472074 key_lower : vec ! [ ] ,
19482075 structured_key_lower : None ,
0 commit comments