w3c · alvestrand · Dec 7, 2023 · Nov 13, 2023 · Nov 14, 2023 · Nov 16, 2023
diff --git a/index.bs b/index.bs
@@ -28,6 +28,27 @@ spec:webidl; type:dfn; text:resolve
      "href":
      "https://www.ietf.org/archive/id/draft-ietf-sframe-enc-00.html",
      "title": "Secure Frame (SFrame)"
+   },
+   "VP9": {
+    "href":
+    "https://storage.googleapis.com/downloads.webmproject.org/docs/vp9/vp9-bitstream-specification-v0.6-20160331-draft.pdf",
+    "title": "VP9 Bitstream & Decoding Process Specification",
+    "publisher": "The WebM Project"
+   },
+   "ITU-T-REC-H.264": {
+    "href": "https://www.itu.int/rec/T-REC-H.264",
+    "title": "H.264 : Advanced video coding for generic audiovisual services",
+    "publisher": "ITU"
+   },
+   "ITU-G.711": {
+    "href": "https://www.itu.int/rec/T-REC-G.711/",
+    "title": "G.711 : Pulse code modulation (PCM) of voice frequencies",
+    "publisher": "ITU"
+   },
+   "ITU-G.722": {
+    "href": "https://www.itu.int/rec/T-REC-G.722/",
+    "title": "G.722 : 7 kHz audio-coding within 64 kbit/s",
+    "publisher": "ITU"
    }
 }
 </pre>
@@ -134,6 +155,7 @@ The <dfn abstract-op>writeEncodedData</dfn> algorithm is given a |rtcObject| as
 
 On sender side, as part of [$readEncodedData$], frames produced by |rtcObject|'s encoder MUST be enqueued in |rtcObject|.`[[readable]]` in the encoder's output order.
 As [$writeEncodedData$] ensures that the transform cannot reorder frames, the encoder's output order is also the order followed by packetizers to generate RTP packets and assign RTP packet sequence numbers.
+The packetizer may expect the transformed data to still conform to the original format, e.g. a series of NAL units separated by Annex B start codes.
 
 On receiver side, as part of [$readEncodedData$], frames produced by |rtcObject|'s packetizer MUST be enqueued in |rtcObject|.`[[readable]]` in the same encoder's output order.
 To ensure the order is respected, the depacketizer will typically use RTP packet sequence numbers to reorder RTP packets as needed before enqueuing frames in |rtcObject|.`[[readable]]`.
@@ -435,8 +457,70 @@ interface RTCEncodedVideoFrame {
     </dt>
     <dd>
         <p>
-            The encoded frame data.
+            The encoded frame data. The format of the data depends on the video codec that is
+            used to encode/decode the frame which can be determined by looking at the
+            {{RTCEncodedVideoFrameMetadata/mimeType}}.
+            For <a href="https://w3c.github.io/webrtc-svc/">SVC</a>, each spatial layer
+            is transformed separately.
+            <p class="note">
+              Since packetizers may drop certain elements, e.g. AV1 temporal delimiter OBUs,
+              the input to an receive-side transform may be different from the output of
-              the input to an receive-side transform may be different from the output of
+              the input to a receive-side transform may be different from the output of
-              the input to an receive-side transform may be different from the output of
+              the input to a receive-side transform may be different from the output of
+              a send-side transform.
+            </p>
+            The following table gives a number of examples:
         </p>
+        <table class="simple">
+            <thead>
+                <tr>
+                    <th>mimeType</th><th>Data format</th>
+                </tr>
+            </thead>
+            <tbody>
+                <tr>
+                    <td>
+                        video/VP8
+                    </td>
+                    <td>
+                        The data starts with the "uncompressed data chunk" defined in
+                        <a href="https://datatracker.ietf.org/doc/html/rfc6386#section-9.1">
+                        section 9.1</a> of [[RFC6386]] and is followed by the rest of the
+                        frame data. The <a href="https://www.rfc-editor.org/rfc/rfc7741#section-4.1">
+                        VP8 payload descriptor</a> is not accessible.
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        video/VP9
+                    </td>
+                    <td>
+                        The data is a frame as described in Section 6 of [[VP9]].
+                        The <a href="https://datatracker.ietf.org/doc/html/draft-ietf-payload-vp9#section-4.2">
+                        VP9 payload descriptor</a> is not accessible.
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        video/H264
+                    </td>
+                    <td>
+                        The data is a series of NAL units in Annex B format,
+                        as defined in [[ITU-T-REC-H.264]] Annex B.
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        video/AV1
+                    </td>
+                    <td>
+                        The data is a series of OBUs compliant to the
+                        <a href="https://aomediacodec.github.io/av1-spec/#low-overhead-bitstream-format">
+                        low-overhead bitstream format</a> as described in Section 5 of [[AV1]].
+                        The <a href="https://aomediacodec.github.io/av1-rtp-spec/#41-rtp-header-usage">
+                        AV1 aggregation header</a> is not accessible.
+                    </td>
+                </tr>
+            </tbody>
+        </table>
     </dd>
 </dl>
 
@@ -563,8 +647,76 @@ interface RTCEncodedAudioFrame {
     </dt>
     <dd>
         <p>
-            The encoded frame data.
+            The encoded frame data. The format of the data depends on the audio codec that is
+            used to encode/decode the frame which can be determined by looking at the
+            {{RTCEncodedAudioFrameMetadata/mimeType}}.
+            The following table gives a number of examples:
         </p>
+        <table class="simple">
+            <thead>
+                <tr>
+                    <th>mimeType</th><th>Data format</th>
+                </tr>
+            </thead>
+            <tbody>
+                <tr>
+                    <td>
+                        audio/opus
+                    </td>
+                    <td>
+                        The data is Opus packets, as described in
+                        <a href="https://datatracker.ietf.org/doc/html/rfc6716#section-3">
+                        section 3</a> of [[RFC6716]].
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        audio/PCMU
+                    </td>
+                    <td>
+                        The data is a sequence of bytes of arbitrary length, where each byte is a u-law
+                        encoded PCM sample as defined by Table 2a and 2b in [[ITU-G.711]].
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        audio/PCMA
+                    </td>
+                    <td>
+                        The data is a sequence of bytes of arbitrary length, where each byte is
+                        an A-law encoded PCM sample as defined by Tables 1a and 1b in [[ITU-G.711]].
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        audio/G722
+                    </td>
+                    <td>
+                        The data is G.722 audio as described in [[ITU-G.722]].
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        audio/RED
+                    </td>
+                    <td>
+                        The data is Redundant Audio Data as described in
+                        <a href="https://www.rfc-editor.org/rfc/rfc2198#section-3">
+                        section 3</a> of [[RFC2198]].
+                    </td>
+                </tr>
+                <tr>
+                    <td>
+                        audio/CN
+                    </td>
+                    <td>
+                        The data is Comfort Noise as described in
+                        <a href="https://www.rfc-editor.org/rfc/rfc3389#section-3">
+                        section 3</a> of [[RFC3389]].
+                    </td>
+                </tr>
+            </tbody>
+        </table>
     </dd>
 </dl>