From 9094bcea1011258b1a04e658a9699cfae6178ae7 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Tue, 26 Aug 2025 11:56:36 +0200 Subject: [PATCH 01/18] First draft of QAD blog post --- src/app/blog/qad/page.mdx | 202 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 src/app/blog/qad/page.mdx diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx new file mode 100644 index 00000000..8ec88f7b --- /dev/null +++ b/src/app/blog/qad/page.mdx @@ -0,0 +1,202 @@ +import { BlogPostLayout } from '@/components/BlogPostLayout' +import {ThemeImage} from '@/components/ThemeImage' + +export const post = { + draft: false, + author: 'Floris Bruynooghe', + date: '2025-08-2', + title: 'QUIC Address Discovery', + description: + "Moving STUN into QUIC", +} + +export const metadata = { + title: post.title, + description: post.description, + openGraph: { + title: post.title, + description: post.description, + images: [{ + url: `/api/og?title=Blog&subtitle=${post.title}`, + width: 1200, + height: 630, + alt: post.title, + type: 'image/png', + }], + type: 'article' + } +} + +export default (props) => + +# Holepunching + +As you probably know, iroh is in the business of holepunching. +And gives you a QUIC connection on top. +The typical scenario is establishing a direct connection between two devices, like laptops or phones, both on different home networks. +Home networks tend to have a [NAT] router in front of them, +and even when using IPv6 tend to block new incoming connections in the same fashion as a NAT router would. +And to be fair, blocking random incoming connections to a home network is a sensible choice. + +[NAT]: https://en.wikipedia.org/wiki/Network_address_translation + +The simplified theory of how UDP holepunching works is that both endpoints send a packet to each other at the same time. +Both routers see the outgoing datagram first, and when they receive the incoming datagram, it is considered to be same connection and is allowed in. +To achieve this in practice you need two things: + +- A means of communicating the coordination. + Iroh uses the relay server as a network path between the two endpoints for this. + We explained this in more detail in the [iroh on QUIC Multipath] post. + +[iroh on QUIC Multipath]: https://www.iroh.computer/blog/iroh-on-QUIC-multipath + +- The address the NAT router is going to be using for the other endpoint. + Because this is where you have to send your holepunching datagrams to. + +The second part is often called "address discovery", and it seems an impossible task. +How are we supposed to predict how a random router on the internet is going to behave? + +# NAT Types + +NAT routers have existed for a very long time, +and as the world tried to understand them many words have been wasted classifying and naming them. +It's a confusing mess. +[RFC 4787] can be used as a jumping point to explore the bewildering number of references to older RFC as well as updates to it. +Practical people today mostly classify NATs in two types however: + +[RFC 4787]: https://datatracker.ietf.org/doc/rfc4787/ + +- Destination Independent +- Destination Endpoint Dependent + +What does this mean? +A NAT router's job is to map an internal IP + port to an external IP + port. +When a new connection is created from inside the network the endpoint decides on the source IP + port. +The NAT router then creates a mapping and sends the datagram from some external IP address and port. +Incoming datagrams to to this external IP + port are then looked up in the mapping table to deliver back to the origial source IP + port of the endpoint. + +For a Destination Endpoint Indenpendent mapping the mapping is very simple: +for each unique source IP + port pair is mapped to one external IP + port pair, +*independently* of the destination IP + port of the datagram. +That means a single source IP + port can send datagrams to many destinations on the internet, +and they will all share the same external IP + port on the NAT router. +This is very convenient for holepunching. + +For a Destination Endpoint Dependent mapping there could be several variations. +However for a home router that typically does only have one external IP address only the external port can change. +So the NAT router can pick a new port for each destination, even if the source IP + port remains the same. + +Now think back to holepunching: +you need to know the external IP + port the NAT router will map to in order to send the holepunching datagrams to each other at the same time. +With Destination Endpoint *Independent* NAT you can use the information from another connection for this. +Destination Endpoint *Dependent* NAT however makes this much harder. +There are still tricks you can do, but for now iroh does not yet support this. + + +# Reflexive Transport Address + +This brings us to the fancy term "Reflexive Transport Address". +Consider you are a server sitting on the internet and you receive some datagrams from an endpoint behind a NAT router. +The IP header of the received datagram will contain the source IP address, +while the UDP header will contain the source port number. +The IP + port the server will see is the external IP + port of the mapping the NAT router makes. +To send a response you'd send a datagram addressed to this IP + port. + +In oder words, the source IP + port the server *observes*, +is the address it sends responses too. +Thus you can build a server that informs a client endpoint about the clients address as observed by the server. +The the client this is the *Reflexive Transport Address*. + +If the client is behind a NAT router this will be a different address than the client itself is sending from. +So a client can use this to detect if it is behind a NAT. +A client can go even further and use multiple such servers. +Now it can tell if the NAT router is Destination Endpoint Dependent or Destination Endpoint Independent. + + +# Session Traversal Utilities for NAT: STUN + +Naturally such servers have existed for a while. +As part of all the standardisation around audio-video calls in the form of SIP and WebRTC there was a need for endpoints to learn about their reflexive transport addresses. +For this the STUN spec was created, +initially in [RFC 3489] and several versions later we are now at [RFC 8489] if we didn't miss anything.[^rfc-numbers] + +Not going to lie about it: I've never read the full STUN spec.[^spec-reading] +It contains a lot and can do many things. +And yet, the really useful part is surprisingly small. +Until version 0.32 iroh used STUN exclusively. +It worked pretty simple: + +- Generate a STUN transaction ID. +- Send a STUN request to a STUN server in a UDP datagram (the iroh relay server). +- Wait for a response from the server matching transaction ID. + +That's it. + +So why change working systems? +Let's look at what we don't get from this: + +- Encryption. + While in theory you can encrypt STUN requesets using DTLS it's not something that is done much. + It's also DTLS... + +- Reliability. + It's a simple UDP-based protocol. + If the request is lost you eventually time out and need to resend it, very primitive. + +- Congestion Control. + You will be sending application traffic over the same sockets. + STUN happens outside of this however, + which makes packet loss much more likely if the application is busy. + +All of these are things that are solved in QUIC: QUIC is a secure, +reliable transport with advanced congestion control and loss detection. +And we already use it for our application protocol so we won't have two different endpoints sending and receiving on the same socket. + +[RFC 3489]: https://datatracker.ietf.org/doc/html/rfc3489 +[RFC 8489]: https://datatracker.ietf.org/doc/html/rfc8489 + +[rfc-numbers]: In between there was RFC 5389. RFC number cuteness tricks will never stop being cute. + +[spec-reading]: While I *have* read many QUIC RFCs in their entirity, several times. So it's not like I'm adverse to reading lengthy IETF specs. + + +# QUIC Address Discovery + +This is such an obvious idea that someone already wrote it down as an IETF draft (thanks Maarten and Christian!): +https://quicwg.org/address-discovery/draft-ietf-quic-address-discovery.html + +QUIC Address Discovery, or QAD as we call it, is an extension to the QUIC protocol that gets negotiated during the QUIC handshake. +If negotiated the remote side will send you a new OBSERVED_ADDRESS frame containing the reflexive transport address it observed for you. + +One of the cool things is that this can happen regardless of the application protocol being used, +as it happens entirely in QUIC frames. +So you can still use this connection to carry application data. + +Another really nice feature flowing from this is that this isn't a request-response protocol anymore. +QUIC supports connection migration for clients, +e.g. when your NAT router updates the mapping for some reason, +or when you move from a Wifi network to mobile data, +QUIC will detect this and migrate the connection to this new network, +without losing any data or breaking the connection. +And whenever that happens while the QAD extension is negotiated, +a new reflexive transport address is observed and will be sent in a new OBSERVED_ADDRESS frame. +Thus this becomes event-based rather than request response. + + +# QAD in iroh Relay Servers + +Since iroh 0.32 both iroh and the relay servers have supported, +and used, QAD as well as STUN. +Since the 0.90 release we have switched to QAD exclusively. + +The work is not finished yet though. +iroh still uses a special-purpose QUIC connection for QAD. +At some point we would like to also support making the normal relay connection over QUIC when possible, +in addition to the current HTTPS1.1/WebSocket connection. +This would be one fewer connection to the relay server and truly allow us to benefit from the event-based nature of QAD. +This is something for after the 1.0 release however. + + +** ** + +### Footnotes From f95a4806dae43c53f3f0c6a254468975e7e60382 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Tue, 26 Aug 2025 12:52:51 +0200 Subject: [PATCH 02/18] write a real date? --- src/app/blog/qad/page.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 8ec88f7b..177fe4ce 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -4,7 +4,7 @@ import {ThemeImage} from '@/components/ThemeImage' export const post = { draft: false, author: 'Floris Bruynooghe', - date: '2025-08-2', + date: '2025-08-26', title: 'QUIC Address Discovery', description: "Moving STUN into QUIC", From 73d75afc1937c6aee4e734d4a2e7572bd9702c15 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 11:51:06 +0200 Subject: [PATCH 03/18] Apply suggestions from code review Co-authored-by: ramfox --- src/app/blog/qad/page.mdx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 177fe4ce..02edf012 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -35,7 +35,7 @@ As you probably know, iroh is in the business of holepunching. And gives you a QUIC connection on top. The typical scenario is establishing a direct connection between two devices, like laptops or phones, both on different home networks. Home networks tend to have a [NAT] router in front of them, -and even when using IPv6 tend to block new incoming connections in the same fashion as a NAT router would. +and, even when using IPv6, tend to block new incoming connections in the same fashion as a NAT router would. And to be fair, blocking random incoming connections to a home network is a sensible choice. [NAT]: https://en.wikipedia.org/wiki/Network_address_translation @@ -51,7 +51,7 @@ To achieve this in practice you need two things: [iroh on QUIC Multipath]: https://www.iroh.computer/blog/iroh-on-QUIC-multipath - The address the NAT router is going to be using for the other endpoint. - Because this is where you have to send your holepunching datagrams to. + Because this is where you have to send your holepunching datagrams. The second part is often called "address discovery", and it seems an impossible task. How are we supposed to predict how a random router on the internet is going to behave? @@ -61,7 +61,7 @@ How are we supposed to predict how a random router on the internet is going to b NAT routers have existed for a very long time, and as the world tried to understand them many words have been wasted classifying and naming them. It's a confusing mess. -[RFC 4787] can be used as a jumping point to explore the bewildering number of references to older RFC as well as updates to it. +[RFC 4787] can be used as a jumping point to explore the bewildering number of updates and references to older RFCs. Practical people today mostly classify NATs in two types however: [RFC 4787]: https://datatracker.ietf.org/doc/rfc4787/ @@ -102,9 +102,9 @@ while the UDP header will contain the source port number. The IP + port the server will see is the external IP + port of the mapping the NAT router makes. To send a response you'd send a datagram addressed to this IP + port. -In oder words, the source IP + port the server *observes*, +In other words, the source IP + port the server *observes*, is the address it sends responses too. -Thus you can build a server that informs a client endpoint about the clients address as observed by the server. +Thus you can build a server that informs a client endpoint about the client's address as observed by the server. The the client this is the *Reflexive Transport Address*. If the client is behind a NAT router this will be a different address than the client itself is sending from. @@ -116,7 +116,7 @@ Now it can tell if the NAT router is Destination Endpoint Dependent or Destinati # Session Traversal Utilities for NAT: STUN Naturally such servers have existed for a while. -As part of all the standardisation around audio-video calls in the form of SIP and WebRTC there was a need for endpoints to learn about their reflexive transport addresses. +As part of the standardisation around audio-video calls in the form of SIP and WebRTC, there was a need for endpoints to learn about their reflexive transport addresses. For this the STUN spec was created, initially in [RFC 3489] and several versions later we are now at [RFC 8489] if we didn't miss anything.[^rfc-numbers] @@ -127,7 +127,7 @@ Until version 0.32 iroh used STUN exclusively. It worked pretty simple: - Generate a STUN transaction ID. -- Send a STUN request to a STUN server in a UDP datagram (the iroh relay server). +- Send a STUN request to a STUN server (the iroh relay server) in a UDP datagram. - Wait for a response from the server matching transaction ID. That's it. @@ -136,12 +136,12 @@ So why change working systems? Let's look at what we don't get from this: - Encryption. - While in theory you can encrypt STUN requesets using DTLS it's not something that is done much. + While in theory you can encrypt STUN requests using DTLS it's not something that is done much. It's also DTLS... - Reliability. It's a simple UDP-based protocol. - If the request is lost you eventually time out and need to resend it, very primitive. + If the request is lost you eventually time out and need to resend it—very primitive. - Congestion Control. You will be sending application traffic over the same sockets. @@ -180,7 +180,7 @@ QUIC will detect this and migrate the connection to this new network, without losing any data or breaking the connection. And whenever that happens while the QAD extension is negotiated, a new reflexive transport address is observed and will be sent in a new OBSERVED_ADDRESS frame. -Thus this becomes event-based rather than request response. +Thus this becomes event-based rather than request-response. # QAD in iroh Relay Servers From 50b545f037e6a4939ef25699a55fb8786b06b2d2 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 11:52:41 +0200 Subject: [PATCH 04/18] fix missing word --- src/app/blog/qad/page.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 02edf012..f291b2bf 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -66,7 +66,7 @@ Practical people today mostly classify NATs in two types however: [RFC 4787]: https://datatracker.ietf.org/doc/rfc4787/ -- Destination Independent +- Destination Endpoint Independent - Destination Endpoint Dependent What does this mean? From 3fcb9203b2eb487b842dbf068b1f704d8ba2502a Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:08:18 +0200 Subject: [PATCH 05/18] Change from IP + port to "address" --- src/app/blog/qad/page.mdx | 41 ++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index f291b2bf..22dcb84d 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -31,8 +31,7 @@ export default (props) => # Holepunching -As you probably know, iroh is in the business of holepunching. -And gives you a QUIC connection on top. +As you probably know, iroh is in the business of holepunching and gives you a QUIC connection on top. The typical scenario is establishing a direct connection between two devices, like laptops or phones, both on different home networks. Home networks tend to have a [NAT] router in front of them, and, even when using IPv6, tend to block new incoming connections in the same fashion as a NAT router would. @@ -69,25 +68,35 @@ Practical people today mostly classify NATs in two types however: - Destination Endpoint Independent - Destination Endpoint Dependent -What does this mean? -A NAT router's job is to map an internal IP + port to an external IP + port. -When a new connection is created from inside the network the endpoint decides on the source IP + port. -The NAT router then creates a mapping and sends the datagram from some external IP address and port. -Incoming datagrams to to this external IP + port are then looked up in the mapping table to deliver back to the origial source IP + port of the endpoint. +Let's unpack that a bit more. +A NAT router's job is to map an internal IP & port to an external IP & port, +or let's call this mapping an internal *address* to an external *address* for simplicity.[^addr] +When a new connection is created from inside the network an endpoint binds on a source address, +usually leaving exact IP & port choices to the kernel. +The NAT router then creates a mapping and sends the datagram from some external address. +Incoming datagrams to to this external address are then looked up in the mapping table to deliver back to the origial source address of the endpoint. + +[^addr]: Technically we are dealing with *socket addresses*, which on IPv4 is indeed an IP address + port, + but IPv6 adds in a scope and flow label into the socket address. + These fields have some advanced uses but are often ignored, + so it is easier to think of of an IP & port tuple. + So naming this *address* is a bit a handwavy term, + though sufficient to understand the needed logic. For a Destination Endpoint Indenpendent mapping the mapping is very simple: -for each unique source IP + port pair is mapped to one external IP + port pair, -*independently* of the destination IP + port of the datagram. -That means a single source IP + port can send datagrams to many destinations on the internet, -and they will all share the same external IP + port on the NAT router. +each unique source address is mapped to one of the available external addresses (an IP address + port combination), +*independently* of the destination address of the datagram. +That means a single source address can send datagrams to many destinations on the internet, +and they will all share the same external address on the NAT router. This is very convenient for holepunching. For a Destination Endpoint Dependent mapping there could be several variations. However for a home router that typically does only have one external IP address only the external port can change. -So the NAT router can pick a new port for each destination, even if the source IP + port remains the same. +So the NAT router can pick a new port for each destination, even if the source address remains the same. Now think back to holepunching: -you need to know the external IP + port the NAT router will map to in order to send the holepunching datagrams to each other at the same time. +you need to know the external address the NAT router will map to, +in order to send the holepunching datagrams to each other at the same time. With Destination Endpoint *Independent* NAT you can use the information from another connection for this. Destination Endpoint *Dependent* NAT however makes this much harder. There are still tricks you can do, but for now iroh does not yet support this. @@ -99,10 +108,10 @@ This brings us to the fancy term "Reflexive Transport Address". Consider you are a server sitting on the internet and you receive some datagrams from an endpoint behind a NAT router. The IP header of the received datagram will contain the source IP address, while the UDP header will contain the source port number. -The IP + port the server will see is the external IP + port of the mapping the NAT router makes. -To send a response you'd send a datagram addressed to this IP + port. +The IP & port the server will see is the external address of the mapping the NAT router makes. +To send a response you'd send a datagram addressed to this address. -In other words, the source IP + port the server *observes*, +In other words, the source address the server *observes*, is the address it sends responses too. Thus you can build a server that informs a client endpoint about the client's address as observed by the server. The the client this is the *Reflexive Transport Address*. From 5a0e2c5ad1f889a4ff8e817f3a7def68f62491f0 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:08:40 +0200 Subject: [PATCH 06/18] fix footnote refs --- src/app/blog/qad/page.mdx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 22dcb84d..9ee8d9e5 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -129,12 +129,19 @@ As part of the standardisation around audio-video calls in the form of SIP and W For this the STUN spec was created, initially in [RFC 3489] and several versions later we are now at [RFC 8489] if we didn't miss anything.[^rfc-numbers] +[RFC 3489]: https://datatracker.ietf.org/doc/html/rfc3489 +[RFC 8489]: https://datatracker.ietf.org/doc/html/rfc8489 + +[^rfc-numbers]: In between there was RFC 5389. RFC number cuteness tricks will never stop being cute. + Not going to lie about it: I've never read the full STUN spec.[^spec-reading] It contains a lot and can do many things. And yet, the really useful part is surprisingly small. Until version 0.32 iroh used STUN exclusively. It worked pretty simple: +[^spec-reading]: While I *have* read many QUIC RFCs in their entirity, several times. So it's not like I'm adverse to reading lengthy IETF specs. + - Generate a STUN transaction ID. - Send a STUN request to a STUN server (the iroh relay server) in a UDP datagram. - Wait for a response from the server matching transaction ID. @@ -161,13 +168,6 @@ All of these are things that are solved in QUIC: QUIC is a secure, reliable transport with advanced congestion control and loss detection. And we already use it for our application protocol so we won't have two different endpoints sending and receiving on the same socket. -[RFC 3489]: https://datatracker.ietf.org/doc/html/rfc3489 -[RFC 8489]: https://datatracker.ietf.org/doc/html/rfc8489 - -[rfc-numbers]: In between there was RFC 5389. RFC number cuteness tricks will never stop being cute. - -[spec-reading]: While I *have* read many QUIC RFCs in their entirity, several times. So it's not like I'm adverse to reading lengthy IETF specs. - # QUIC Address Discovery From 548c0bee8294d40869bd1994825418865521294d Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:11:06 +0200 Subject: [PATCH 07/18] slightly tone this down --- src/app/blog/qad/page.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 9ee8d9e5..66d85693 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -58,7 +58,7 @@ How are we supposed to predict how a random router on the internet is going to b # NAT Types NAT routers have existed for a very long time, -and as the world tried to understand them many words have been wasted classifying and naming them. +and as the world tried to understand them many words have been spilled classifying and naming them. It's a confusing mess. [RFC 4787] can be used as a jumping point to explore the bewildering number of updates and references to older RFCs. Practical people today mostly classify NATs in two types however: From 7f7e57adbc23785367875b3a3e5e2f92eab12ca1 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:12:25 +0200 Subject: [PATCH 08/18] fix word --- src/app/blog/qad/page.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 66d85693..df390f36 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -114,7 +114,7 @@ To send a response you'd send a datagram addressed to this address. In other words, the source address the server *observes*, is the address it sends responses too. Thus you can build a server that informs a client endpoint about the client's address as observed by the server. -The the client this is the *Reflexive Transport Address*. +To the client this is the *Reflexive Transport Address*. If the client is behind a NAT router this will be a different address than the client itself is sending from. So a client can use this to detect if it is behind a NAT. From 7d40dce0a106704b8e8a6a5c21db2e8a8e7356ed Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:14:04 +0200 Subject: [PATCH 09/18] Spell this out --- src/app/blog/qad/page.mdx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index df390f36..901fabe7 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -119,7 +119,10 @@ To the client this is the *Reflexive Transport Address*. If the client is behind a NAT router this will be a different address than the client itself is sending from. So a client can use this to detect if it is behind a NAT. A client can go even further and use multiple such servers. -Now it can tell if the NAT router is Destination Endpoint Dependent or Destination Endpoint Independent. +Now if it receives the same reflexive transport address twice, +it is behind a Destination Endpoint Independent NAT. +If it receives two different reflexive transport addresses, +it is stuck behind a Destination Endpoint Dependent NAT. # Session Traversal Utilities for NAT: STUN From 872fbefebb14cbd814b6b1065a4cee3e30d8d8d0 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:27:31 +0200 Subject: [PATCH 10/18] tone down the scorn on STUN --- src/app/blog/qad/page.mdx | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 901fabe7..e60a5b2a 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -128,26 +128,21 @@ it is stuck behind a Destination Endpoint Dependent NAT. # Session Traversal Utilities for NAT: STUN Naturally such servers have existed for a while. -As part of the standardisation around audio-video calls in the form of SIP and WebRTC, there was a need for endpoints to learn about their reflexive transport addresses. +As part of the standardisation around audio-video calls in the form of SIP and WebRTC, +there was a need for endpoints to learn about their reflexive transport addresses. For this the STUN spec was created, -initially in [RFC 3489] and several versions later we are now at [RFC 8489] if we didn't miss anything.[^rfc-numbers] +which by now has evolved into [RFC 8489]. +A sizable tome. -[RFC 3489]: https://datatracker.ietf.org/doc/html/rfc3489 [RFC 8489]: https://datatracker.ietf.org/doc/html/rfc8489 -[^rfc-numbers]: In between there was RFC 5389. RFC number cuteness tricks will never stop being cute. - -Not going to lie about it: I've never read the full STUN spec.[^spec-reading] +Not going to lie about it: I've never read the full STUN spec. It contains a lot and can do many things. -And yet, the really useful part is surprisingly small. -Until version 0.32 iroh used STUN exclusively. -It worked pretty simple: - -[^spec-reading]: While I *have* read many QUIC RFCs in their entirity, several times. So it's not like I'm adverse to reading lengthy IETF specs. +And yet, the part iroh actively used is surprisingly small: -- Generate a STUN transaction ID. +- Generate a STUN transaction ID, just a few random bytes. - Send a STUN request to a STUN server (the iroh relay server) in a UDP datagram. -- Wait for a response from the server matching transaction ID. +- Wait for a response from the server which matches the request's transaction ID. That's it. @@ -167,8 +162,8 @@ Let's look at what we don't get from this: STUN happens outside of this however, which makes packet loss much more likely if the application is busy. -All of these are things that are solved in QUIC: QUIC is a secure, -reliable transport with advanced congestion control and loss detection. +All of these are things that are solved in QUIC: +QUIC is a secure, reliable transport with advanced congestion control and loss detection. And we already use it for our application protocol so we won't have two different endpoints sending and receiving on the same socket. @@ -198,7 +193,7 @@ Thus this becomes event-based rather than request-response. # QAD in iroh Relay Servers Since iroh 0.32 both iroh and the relay servers have supported, -and used, QAD as well as STUN. +and used, both QAD as well as STUN. Since the 0.90 release we have switched to QAD exclusively. The work is not finished yet though. From 32cdd6ed46fdfde07fb7fde52b6a8440b67a0907 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:30:28 +0200 Subject: [PATCH 11/18] remove unjustified stab at DTLS --- src/app/blog/qad/page.mdx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index e60a5b2a..6a80396e 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -151,7 +151,6 @@ Let's look at what we don't get from this: - Encryption. While in theory you can encrypt STUN requests using DTLS it's not something that is done much. - It's also DTLS... - Reliability. It's a simple UDP-based protocol. From 13faa4955281ac9db95be4b9e037a1f9781863ca Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:33:36 +0200 Subject: [PATCH 12/18] slighly more explicit --- src/app/blog/qad/page.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 6a80396e..b652ea97 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -157,8 +157,8 @@ Let's look at what we don't get from this: If the request is lost you eventually time out and need to resend it—very primitive. - Congestion Control. - You will be sending application traffic over the same sockets. - STUN happens outside of this however, + You will be sending application traffic over the same sockets as the STUN datagrams. + However, STUN requests are sent outside of the normal flow of data, which makes packet loss much more likely if the application is busy. All of these are things that are solved in QUIC: From faf273c6a81a21afb0d5e9ca401b90efa7042e8d Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 27 Aug 2025 12:56:03 +0200 Subject: [PATCH 13/18] Whole bunch of fixes, rephrasing etc from a full read-through --- src/app/blog/qad/page.mdx | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index b652ea97..31e23811 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -68,27 +68,27 @@ Practical people today mostly classify NATs in two types however: - Destination Endpoint Independent - Destination Endpoint Dependent -Let's unpack that a bit more. -A NAT router's job is to map an internal IP & port to an external IP & port, -or let's call this mapping an internal *address* to an external *address* for simplicity.[^addr] -When a new connection is created from inside the network an endpoint binds on a source address, +Let's unpack that a bit more: +a NAT router's job is to map an internal IP & port to an external IP & port, +or let's call this mapping an *internal address* to an *external address* for simplicity.[^addr] +When a new connection is created from inside the network an endpoint binds a socket on an internal source address, usually leaving exact IP & port choices to the kernel. -The NAT router then creates a mapping and sends the datagram from some external address. +When this endpoint sends out a datagram to the internet, +the NAT router creates a mapping and sends the datagram from external address of it's choosing. Incoming datagrams to to this external address are then looked up in the mapping table to deliver back to the origial source address of the endpoint. -[^addr]: Technically we are dealing with *socket addresses*, which on IPv4 is indeed an IP address + port, +[^addr]: Technically we are dealing with *socket addresses*, which on IPv4 is indeed an IP address and port, but IPv6 adds in a scope and flow label into the socket address. These fields have some advanced uses but are often ignored, - so it is easier to think of of an IP & port tuple. + so it is easier to think of of an IP & port 2-tuple. So naming this *address* is a bit a handwavy term, though sufficient to understand the needed logic. -For a Destination Endpoint Indenpendent mapping the mapping is very simple: -each unique source address is mapped to one of the available external addresses (an IP address + port combination), -*independently* of the destination address of the datagram. +For a Destination Endpoint Indenpendent mapping the mapping is straight forward: +each unique source address is mapped to one of the available external addresses (an IP address & port combination), +*regardless* of the destination address of the datagram. That means a single source address can send datagrams to many destinations on the internet, and they will all share the same external address on the NAT router. -This is very convenient for holepunching. For a Destination Endpoint Dependent mapping there could be several variations. However for a home router that typically does only have one external IP address only the external port can change. @@ -105,11 +105,11 @@ There are still tricks you can do, but for now iroh does not yet support this. # Reflexive Transport Address This brings us to the fancy term "Reflexive Transport Address". -Consider you are a server sitting on the internet and you receive some datagrams from an endpoint behind a NAT router. +Consider you are a server sitting on the internet and you receive a datagram from an endpoint behind a NAT router. The IP header of the received datagram will contain the source IP address, while the UDP header will contain the source port number. The IP & port the server will see is the external address of the mapping the NAT router makes. -To send a response you'd send a datagram addressed to this address. +To send a response the server would send a datagram addressed to this observed source address. In other words, the source address the server *observes*, is the address it sends responses too. @@ -118,14 +118,14 @@ To the client this is the *Reflexive Transport Address*. If the client is behind a NAT router this will be a different address than the client itself is sending from. So a client can use this to detect if it is behind a NAT. -A client can go even further and use multiple such servers. -Now if it receives the same reflexive transport address twice, +A client can go even further and use multiple such servers: +now if it receives the same reflexive transport address twice, it is behind a Destination Endpoint Independent NAT. If it receives two different reflexive transport addresses, it is stuck behind a Destination Endpoint Dependent NAT. -# Session Traversal Utilities for NAT: STUN +# STUN Naturally such servers have existed for a while. As part of the standardisation around audio-video calls in the form of SIP and WebRTC, @@ -141,7 +141,7 @@ It contains a lot and can do many things. And yet, the part iroh actively used is surprisingly small: - Generate a STUN transaction ID, just a few random bytes. -- Send a STUN request to a STUN server (the iroh relay server) in a UDP datagram. +- Send a STUN request to a STUN server in a UDP datagram. - Wait for a response from the server which matches the request's transaction ID. That's it. @@ -154,7 +154,7 @@ Let's look at what we don't get from this: - Reliability. It's a simple UDP-based protocol. - If the request is lost you eventually time out and need to resend it—very primitive. + If the request is lost you eventually time out and need to resend it — very primitive. - Congestion Control. You will be sending application traffic over the same sockets as the STUN datagrams. @@ -168,11 +168,12 @@ And we already use it for our application protocol so we won't have two differen # QUIC Address Discovery -This is such an obvious idea that someone already wrote it down as an IETF draft (thanks Maarten and Christian!): +This is such an obvious idea that someone already wrote it down as an IETF draft (thanks Marten and Christian!): https://quicwg.org/address-discovery/draft-ietf-quic-address-discovery.html QUIC Address Discovery, or QAD as we call it, is an extension to the QUIC protocol that gets negotiated during the QUIC handshake. -If negotiated the remote side will send you a new OBSERVED_ADDRESS frame containing the reflexive transport address it observed for you. +If negotiated, +the remote side will send you an OBSERVED_ADDRESS frame containing the reflexive transport address it observed for you. One of the cool things is that this can happen regardless of the application protocol being used, as it happens entirely in QUIC frames. From c4b6cd4c79579454cdf35163cfb84b50c20594b8 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Thu, 28 Aug 2025 09:58:17 +0200 Subject: [PATCH 14/18] change the title I don't mind other titles either, but this is already an improvement --- src/app/blog/qad/page.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 31e23811..40a01488 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -5,7 +5,7 @@ export const post = { draft: false, author: 'Floris Bruynooghe', date: '2025-08-26', - title: 'QUIC Address Discovery', + title: 'Moving from STUN to QUIC Address Discovery', description: "Moving STUN into QUIC", } From 68bee33fb71387af072787f49ea5c4dea847c0d7 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Thu, 28 Aug 2025 16:13:53 +0200 Subject: [PATCH 15/18] spelling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philipp Krüger --- src/app/blog/qad/page.mdx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 40a01488..81a3a0db 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -40,7 +40,7 @@ And to be fair, blocking random incoming connections to a home network is a sens [NAT]: https://en.wikipedia.org/wiki/Network_address_translation The simplified theory of how UDP holepunching works is that both endpoints send a packet to each other at the same time. -Both routers see the outgoing datagram first, and when they receive the incoming datagram, it is considered to be same connection and is allowed in. +Both routers see the outgoing datagram first, and when they receive the incoming datagram, it is considered to be the same connection and is allowed in. To achieve this in practice you need two things: - A means of communicating the coordination. @@ -60,8 +60,8 @@ How are we supposed to predict how a random router on the internet is going to b NAT routers have existed for a very long time, and as the world tried to understand them many words have been spilled classifying and naming them. It's a confusing mess. -[RFC 4787] can be used as a jumping point to explore the bewildering number of updates and references to older RFCs. -Practical people today mostly classify NATs in two types however: +[RFC 4787] can be used as a jumping-off point to explore the bewildering number of updates and references to older RFCs. +Practical people today mostly classify NATs into two types however: [RFC 4787]: https://datatracker.ietf.org/doc/rfc4787/ @@ -74,17 +74,17 @@ or let's call this mapping an *internal address* to an *external address* for si When a new connection is created from inside the network an endpoint binds a socket on an internal source address, usually leaving exact IP & port choices to the kernel. When this endpoint sends out a datagram to the internet, -the NAT router creates a mapping and sends the datagram from external address of it's choosing. -Incoming datagrams to to this external address are then looked up in the mapping table to deliver back to the origial source address of the endpoint. +the NAT router creates a mapping and sends the datagram from an external address of its choosing. +Incoming datagrams to this external address are then looked up in the mapping table to deliver back to the original source address of the endpoint. [^addr]: Technically we are dealing with *socket addresses*, which on IPv4 is indeed an IP address and port, but IPv6 adds in a scope and flow label into the socket address. These fields have some advanced uses but are often ignored, - so it is easier to think of of an IP & port 2-tuple. - So naming this *address* is a bit a handwavy term, + so it is easier to think of an IP & port 2-tuple. + So naming this *address* is a bit of a handwavy term, though sufficient to understand the needed logic. -For a Destination Endpoint Indenpendent mapping the mapping is straight forward: +For a Destination Endpoint Independent mapping the mapping is straightforward: each unique source address is mapped to one of the available external addresses (an IP address & port combination), *regardless* of the destination address of the datagram. That means a single source address can send datagrams to many destinations on the internet, @@ -112,7 +112,7 @@ The IP & port the server will see is the external address of the mapping the NAT To send a response the server would send a datagram addressed to this observed source address. In other words, the source address the server *observes*, -is the address it sends responses too. +is the address it sends responses to. Thus you can build a server that informs a client endpoint about the client's address as observed by the server. To the client this is the *Reflexive Transport Address*. @@ -128,7 +128,7 @@ it is stuck behind a Destination Endpoint Dependent NAT. # STUN Naturally such servers have existed for a while. -As part of the standardisation around audio-video calls in the form of SIP and WebRTC, +As part of the standardization around audio-video calls in the form of SIP and WebRTC, there was a need for endpoints to learn about their reflexive transport addresses. For this the STUN spec was created, which by now has evolved into [RFC 8489]. From 004f3efb721af8eac899374a4d2d75dadeab215a Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Thu, 28 Aug 2025 16:58:00 +0200 Subject: [PATCH 16/18] Apply suggestions from code review Co-authored-by: ramfox --- src/app/blog/qad/page.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 81a3a0db..9bfbb937 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -58,7 +58,7 @@ How are we supposed to predict how a random router on the internet is going to b # NAT Types NAT routers have existed for a very long time, -and as the world tried to understand them many words have been spilled classifying and naming them. +and as the world has tried to understand them many words have been spilled classifying and naming them. It's a confusing mess. [RFC 4787] can be used as a jumping-off point to explore the bewildering number of updates and references to older RFCs. Practical people today mostly classify NATs into two types however: @@ -91,7 +91,7 @@ That means a single source address can send datagrams to many destinations on th and they will all share the same external address on the NAT router. For a Destination Endpoint Dependent mapping there could be several variations. -However for a home router that typically does only have one external IP address only the external port can change. +However, a home router typically only has one external IP address, so only the external port can change. So the NAT router can pick a new port for each destination, even if the source address remains the same. Now think back to holepunching: From e51cd97b757489a90a8b62e94d055c00adbef1e9 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Fri, 29 Aug 2025 11:10:39 +0200 Subject: [PATCH 17/18] Apply suggestions from code review Co-authored-by: ramfox --- src/app/blog/qad/page.mdx | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 9bfbb937..13ca9d0a 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -31,16 +31,16 @@ export default (props) => # Holepunching -As you probably know, iroh is in the business of holepunching and gives you a QUIC connection on top. -The typical scenario is establishing a direct connection between two devices, like laptops or phones, both on different home networks. +As you probably know, `iroh` is in the business of holepunching. +The typical scenario is establishing a direct QUIC connection between two devices, like laptops or phones, both on different home networks. Home networks tend to have a [NAT] router in front of them, -and, even when using IPv6, tend to block new incoming connections in the same fashion as a NAT router would. -And to be fair, blocking random incoming connections to a home network is a sensible choice. +and tend to block new incoming connections even when using IPv6. +To be fair: blocking random incoming connections to a home network is a sensible choice. [NAT]: https://en.wikipedia.org/wiki/Network_address_translation The simplified theory of how UDP holepunching works is that both endpoints send a packet to each other at the same time. -Both routers see the outgoing datagram first, and when they receive the incoming datagram, it is considered to be the same connection and is allowed in. +Both routers see the *outgoing* datagram first, and when they receive the *incoming* datagram, it is considered to be the same connection and is allowed in. To achieve this in practice you need two things: - A means of communicating the coordination. @@ -49,8 +49,7 @@ To achieve this in practice you need two things: [iroh on QUIC Multipath]: https://www.iroh.computer/blog/iroh-on-QUIC-multipath -- The address the NAT router is going to be using for the other endpoint. - Because this is where you have to send your holepunching datagrams. +- The address the NAT router is going to be using for the other endpoint – this is where you have to send your holepunching datagrams. The second part is often called "address discovery", and it seems an impossible task. How are we supposed to predict how a random router on the internet is going to behave? @@ -106,10 +105,10 @@ There are still tricks you can do, but for now iroh does not yet support this. This brings us to the fancy term "Reflexive Transport Address". Consider you are a server sitting on the internet and you receive a datagram from an endpoint behind a NAT router. -The IP header of the received datagram will contain the source IP address, -while the UDP header will contain the source port number. -The IP & port the server will see is the external address of the mapping the NAT router makes. -To send a response the server would send a datagram addressed to this observed source address. +The IP header of the received datagram contains the source IP address, +while the UDP header contains the source port number. +The IP & port combination the server sees is the external address, the mapped address the NAT router made. +To send a response, the server needs to send a datagram addressed to this observed source address. In other words, the source address the server *observes*, is the address it sends responses to. @@ -138,7 +137,7 @@ A sizable tome. Not going to lie about it: I've never read the full STUN spec. It contains a lot and can do many things. -And yet, the part iroh actively used is surprisingly small: +And yet, the part `iroh` actively used is surprisingly small: - Generate a STUN transaction ID, just a few random bytes. - Send a STUN request to a STUN server in a UDP datagram. @@ -154,7 +153,7 @@ Let's look at what we don't get from this: - Reliability. It's a simple UDP-based protocol. - If the request is lost you eventually time out and need to resend it — very primitive. + If the request is lost you eventually time out and need to resend it – very primitive. - Congestion Control. You will be sending application traffic over the same sockets as the STUN datagrams. @@ -190,14 +189,14 @@ a new reflexive transport address is observed and will be sent in a new OBSERVED Thus this becomes event-based rather than request-response. -# QAD in iroh Relay Servers +# QAD in `iroh` Relay Servers -Since iroh 0.32 both iroh and the relay servers have supported, +Since `iroh` 0.32, `iroh` and the relay servers have supported, and used, both QAD as well as STUN. Since the 0.90 release we have switched to QAD exclusively. The work is not finished yet though. -iroh still uses a special-purpose QUIC connection for QAD. +Iroh still uses a special-purpose QUIC connection for QAD. At some point we would like to also support making the normal relay connection over QUIC when possible, in addition to the current HTTPS1.1/WebSocket connection. This would be one fewer connection to the relay server and truly allow us to benefit from the event-based nature of QAD. From 02c6bcef7f4bbf82fe9ec6f56e732ab38826f358 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Fri, 29 Aug 2025 11:12:06 +0200 Subject: [PATCH 18/18] Set publishing date --- src/app/blog/qad/page.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/blog/qad/page.mdx b/src/app/blog/qad/page.mdx index 13ca9d0a..c4a259fa 100644 --- a/src/app/blog/qad/page.mdx +++ b/src/app/blog/qad/page.mdx @@ -4,7 +4,7 @@ import {ThemeImage} from '@/components/ThemeImage' export const post = { draft: false, author: 'Floris Bruynooghe', - date: '2025-08-26', + date: '2025-09-01', title: 'Moving from STUN to QUIC Address Discovery', description: "Moving STUN into QUIC",