@@ -96,13 +96,21 @@ enum Version {
96
96
// - Implicitly add inference graph into each TrainingInfoProto's algorithm.
97
97
IR_VERSION_2020_5_8 = 0x0000000000000007 ;
98
98
99
- // IR VERSION 8 published on <TBD>
99
+ // IR VERSION 8 published on July 30, 2021
100
100
// Introduce TypeProto.SparseTensor
101
101
// Introduce TypeProto.Optional
102
102
// Added a list of FunctionProtos local to the model
103
103
// Deprecated since_version and operator status from FunctionProto
104
- IR_VERSION = 0x0000000000000008 ;
104
+ IR_VERSION_2021_7_30 = 0x0000000000000008 ;
105
105
106
+ // IR VERSION 9 published on May 5, 2023
107
+ // Added AttributeProto to FunctionProto so that default attribute values can be set.
108
+ // Added FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ.
109
+ IR_VERSION_2023_5_5 = 0x0000000000000009 ;
110
+
111
+ // IR VERSION 10 published on TBD
112
+ // Added UINT4, INT4.
113
+ IR_VERSION = 0x000000000000000A ;
106
114
}
107
115
108
116
// Attributes
@@ -112,6 +120,8 @@ enum Version {
112
120
// An AttributeProto MUST contain the name field, and *only one* of the
113
121
// following content fields, effectively enforcing a C/C++ union equivalent.
114
122
message AttributeProto {
123
+ reserved 12 , 16 to 19 ;
124
+ reserved "v" ;
115
125
116
126
// Note: this enum is structurally identical to the OpSchema::AttrType
117
127
// enum defined in schema.h. If you rev one, you likely need to rev the other.
@@ -184,6 +194,8 @@ message ValueInfoProto {
184
194
optional TypeProto type = 2 ;
185
195
// A human-readable documentation for this value. Markdown is allowed.
186
196
optional string doc_string = 3 ;
197
+ // Named metadata values; keys should be distinct.
198
+ repeated StringStringEntryProto metadata_props = 4 ;
187
199
}
188
200
189
201
// Nodes
@@ -198,19 +210,24 @@ message NodeProto {
198
210
repeated string output = 2 ; // namespace Value
199
211
200
212
// An optional identifier for this node in a graph.
201
- // This field MAY be absent in ths version of the IR.
213
+ // This field MAY be absent in this version of the IR.
202
214
optional string name = 3 ; // namespace Node
203
215
204
216
// The symbolic identifier of the Operator to execute.
205
217
optional string op_type = 4 ; // namespace Operator
206
218
// The domain of the OperatorSet that specifies the operator named by op_type.
207
219
optional string domain = 7 ; // namespace Domain
220
+ // Overload identifier, used only to map this to a model-local function.
221
+ optional string overload = 8 ;
208
222
209
223
// Additional named attributes.
210
224
repeated AttributeProto attribute = 5 ;
211
225
212
226
// A human-readable documentation for this node. Markdown is allowed.
213
227
optional string doc_string = 6 ;
228
+
229
+ // Named metadata values; keys should be distinct.
230
+ repeated StringStringEntryProto metadata_props = 9 ;
214
231
}
215
232
216
233
// Training information
@@ -255,7 +272,7 @@ message TrainingInfoProto {
255
272
//
256
273
// An execution of the training algorithm step is performed by executing the
257
274
// graph obtained by combining the inference graph (namely "ModelProto.graph")
258
- // and the "algorithm" graph. That is, the actual the actual
275
+ // and the "algorithm" graph. That is, the actual
259
276
// input/initializer/output/node/value_info/sparse_initializer list of
260
277
// the training graph is the concatenation of
261
278
// "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer"
@@ -395,9 +412,9 @@ message ModelProto {
395
412
396
413
// A list of function protos local to the model.
397
414
//
398
- // Name of the function "FunctionProto.name" should be unique within the domain "FunctionProto.domain" .
415
+ // The (domain, name, overload) tuple must be unique across the function protos in this list .
399
416
// In case of any conflicts the behavior (whether the model local functions are given higher priority,
400
- // or standard opserator sets are given higher priotity or this is treated as error) is defined by
417
+ // or standard operator sets are given higher priotity or this is treated as error) is defined by
401
418
// the runtimes.
402
419
//
403
420
// The operator sets imported by FunctionProto should be compatible with the ones
@@ -469,6 +486,9 @@ message GraphProto {
469
486
// which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model.
470
487
repeated TensorAnnotation quantization_annotation = 14 ;
471
488
489
+ // Named metadata values; keys should be distinct.
490
+ repeated StringStringEntryProto metadata_props = 16 ;
491
+
472
492
reserved 3 , 4 , 6 to 9 ;
473
493
reserved "ir_version", "producer_version", "producer_tag", "domain" ;
474
494
}
@@ -505,6 +525,21 @@ message TensorProto {
505
525
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
506
526
BFLOAT16 = 16 ;
507
527
528
+ // Non-IEEE floating-point format based on papers
529
+ // FP8 Formats for Deep Learning, https://arxiv.org/abs/2209.05433,
530
+ // 8-bit Numerical Formats For Deep Neural Networks, https://arxiv.org/pdf/2206.02915.pdf.
531
+ // Operators supported FP8 are Cast, CastLike, QuantizeLinear, DequantizeLinear.
532
+ // The computation usually happens inside a block quantize / dequantize
533
+ // fused by the runtime.
534
+ FLOAT8E4M3FN = 17 ; // float 8, mostly used for coefficients, supports nan, not inf
535
+ FLOAT8E4M3FNUZ = 18 ; // float 8, mostly used for coefficients, supports nan, not inf, no negative zero
536
+ FLOAT8E5M2 = 19 ; // follows IEEE 754, supports nan, inf, mostly used for gradients
537
+ FLOAT8E5M2FNUZ = 20 ; // follows IEEE 754, supports nan, not inf, mostly used for gradients, no negative zero
538
+
539
+ // 4-bit data-types
540
+ UINT4 = 21 ; // Unsigned integer in range [0, 15]
541
+ INT4 = 22 ; // Signed integer in range [-8, 7], using two's-complement representation
542
+
508
543
// Future extensions go here.
509
544
}
510
545
@@ -538,11 +573,13 @@ message TensorProto {
538
573
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
539
574
repeated float float_data = 4 [packed = true ];
540
575
541
- // For int32, uint8, int8, uint16, int16, bool, and float16 values
542
- // float16 values must be bit-wise converted to an uint16_t prior
576
+ // For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
577
+ // float16 and float8 values must be bit-wise converted to an uint16_t prior
543
578
// to writing to the buffer.
579
+ // uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
580
+ // the 4 LSB and the second element is stored in the 4 MSB.
544
581
// When this field is present, the data_type field MUST be
545
- // INT32, INT16, INT8, UINT16, UINT8, BOOL, or FLOAT16
582
+ // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
546
583
repeated int32 int32_data = 5 [packed = true ];
547
584
548
585
// For strings.
@@ -572,6 +609,7 @@ message TensorProto {
572
609
// Complex64 elements must be written as two consecutive FLOAT values, real component first.
573
610
// Complex128 elements must be written as two consecutive DOUBLE values, real component first.
574
611
// Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false).
612
+ // uint4 and int4 values must be packed to 4bitx2, the first element is stored in the 4 LSB and the second element is stored in the 4 MSB.
575
613
//
576
614
// Note: the advantage of specific field rather than the raw_data field is
577
615
// that in some cases (e.g. int data), protobuf does a better packing via
@@ -614,6 +652,9 @@ message TensorProto {
614
652
// When this field is present, the data_type field MUST be
615
653
// UINT32 or UINT64
616
654
repeated uint64 uint64_data = 11 [packed = true ];
655
+
656
+ // Named metadata values; keys should be distinct.
657
+ repeated StringStringEntryProto metadata_props = 16 ;
617
658
}
618
659
619
660
// A serialized sparse-tensor value
@@ -760,9 +801,8 @@ enum OperatorStatus {
760
801
}
761
802
762
803
message FunctionProto {
763
- // The name of the function, similar usage of op_type in OperatorProto.
764
- // Combined with FunctionProto.domain, this forms the unique identity of
765
- // the FunctionProto.
804
+ // The name of the function, similar to op_type in NodeProto.
805
+ // This is part of the unique-id (domain, name, overload) of FunctionProtos in a model.
766
806
optional string name = 1 ;
767
807
768
808
// Deprecated since IR Version 8
@@ -779,9 +819,16 @@ message FunctionProto {
779
819
repeated string input = 4 ;
780
820
repeated string output = 5 ;
781
821
782
- // The attributes of the function.
822
+ // The attribute parameters of the function.
823
+ // It is for function parameters without default values.
783
824
repeated string attribute = 6 ;
784
825
826
+ // The attribute protos of the function.
827
+ // It is for function attributes with default values.
828
+ // A function attribute shall be represented either as
829
+ // a string attribute or an AttributeProto, not both.
830
+ repeated AttributeProto attribute_proto = 11 ;
831
+
785
832
// The nodes in the function.
786
833
repeated NodeProto node = 7 ;
787
834
// A human-readable documentation for this function. Markdown is allowed.
@@ -802,11 +849,23 @@ message FunctionProto {
802
849
803
850
repeated OperatorSetIdProto opset_import = 9 ;
804
851
805
- // The domain which this function belongs to. Combined with FunctionProto.name, this forms the unique identity of
806
- // the FunctionProto .
852
+ // The domain which this function belongs to.
853
+ // This is part of the unique-id (domain, name, overload) of FunctionProtos in a model .
807
854
optional string domain = 10 ;
808
- }
809
855
856
+ // The overload identifier of the function.
857
+ // This is part of the unique-id (domain, name, overload) of FunctionProtos in a model.
858
+ optional string overload = 13 ;
859
+
860
+ // Information for the values in the function. The ValueInfoProto.name's
861
+ // must be distinct and refer to names in the function (including inputs,
862
+ // outputs, and intermediate values). It is optional for a value to appear
863
+ // in value_info list.
864
+ repeated ValueInfoProto value_info = 12 ;
865
+
866
+ // Named metadata values; keys should be distinct.
867
+ repeated StringStringEntryProto metadata_props = 14 ;
868
+ }
810
869
811
870
// For using protobuf-lite
812
871
option optimize_for = LITE_RUNTIME ;
0 commit comments