Skip to content

Commit bfa0392

Browse files
authored
support no_proxy excatly like CURL (#522)
1 parent a04c10a commit bfa0392

File tree

6 files changed

+805
-27
lines changed

6 files changed

+805
-27
lines changed

include/aws/http/private/no_proxy.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#ifndef AWS_NO_PROXY_H
2+
#define AWS_NO_PROXY_H
3+
4+
/**
5+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
6+
* SPDX-License-Identifier: Apache-2.0.
7+
*/
8+
#include <aws/http/http.h>
9+
AWS_PUSH_SANE_WARNING_LEVEL
10+
AWS_EXTERN_C_BEGIN
11+
12+
/*
13+
* Check if a host should bypass the proxy based on the NO_PROXY environment variable or provided no_proxy value.
14+
* Since NO_PROXY has no standard yet. Follows the curl implementation from noproxy.c at 8.14.1.
15+
* https://github.com/curl/curl/blob/curl-8_14_1/lib/noproxy.c
16+
*
17+
* NO_PROXY is a comma-separated list of domain names, hostnames, or IP addresses that
18+
* should bypass the proxy.
19+
*
20+
* If no_proxy is NULL, the function will read the NO_PROXY environment variable.
21+
* If no_proxy is provided, it will be used instead of the environment variable.
22+
*
23+
* Returns true if the host should bypass the proxy.
24+
*/
25+
AWS_HTTP_API bool aws_http_host_matches_no_proxy(
26+
struct aws_allocator *allocator,
27+
struct aws_byte_cursor host,
28+
struct aws_string *no_proxy_str);
29+
30+
AWS_EXTERN_C_END
31+
AWS_POP_SANE_WARNING_LEVEL
32+
33+
#endif /* AWS_NO_PROXY_H */

include/aws/http/proxy.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ enum aws_http_proxy_env_var_type {
4343
* Enable get proxy URL from environment variable, when the manual proxy options of connection manager is not set.
4444
* env HTTPS_PROXY/https_proxy will be checked when the main connection use tls.
4545
* env HTTP_PROXY/http_proxy will be checked when the main connection NOT use tls.
46-
* The lower case version has precedence.
46+
* env NO_PROXY/no_proxy will be checked to bypass proxy if the host match the pattern.
47+
* Check `aws_http_host_matches_no_proxy` for detail. This function can also be used with a direct no_proxy
48+
* parameter. The lower case version has precedence.
4749
*/
4850
AWS_HPEV_ENABLE,
4951
};

source/no_proxy.c

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
/**
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0.
4+
*/
5+
#include <aws/common/environment.h>
6+
#include <aws/http/private/no_proxy.h>
7+
8+
#ifdef _WIN32
9+
# include <ws2tcpip.h>
10+
#else
11+
# include <arpa/inet.h>
12+
#endif
13+
14+
enum hostname_type {
15+
HOSTNAME_TYPE_IPV4,
16+
HOSTNAME_TYPE_IPV6,
17+
HOSTNAME_TYPE_REGULAR,
18+
};
19+
20+
/**
21+
* s_cidr4_match() returns true if the given IPv4 address is within the
22+
* specified CIDR address range.
23+
* Based on the curl implementation Curl_cidr4_match().
24+
*
25+
* @param bits The number of network bits in the CIDR notation
26+
* @param network_part The network pattern to match against (e.g., "192.168.0.0")\
27+
* @param host_addr Pre-parsed binary representation of the host IP, or NULL to parse from host
28+
* @return true if the IP address matches the CIDR pattern, false otherwise
29+
*/
30+
static bool s_cidr4_match(uint64_t bits, struct aws_string *network_part, uint32_t address) {
31+
32+
uint32_t check = 0;
33+
34+
/* Check for valid bits parameter */
35+
if (bits > 32) {
36+
/* Invalid netmask bits */
37+
return false;
38+
}
39+
40+
/* Convert network pattern to binary */
41+
if (inet_pton(AF_INET, aws_string_c_str(network_part), &check) != 1) {
42+
return false;
43+
}
44+
45+
if (bits > 0 && bits < 32) {
46+
/* Apply the network mask for CIDR comparison */
47+
uint32_t mask = 0xffffffff << (32 - bits);
48+
uint32_t host_network = ntohl(address);
49+
uint32_t check_network = ntohl(check);
50+
51+
/* Compare the masked addresses */
52+
return (host_network & mask) == (check_network & mask);
53+
}
54+
55+
/* For /32 or no bits specified, use exact match */
56+
return address == check;
57+
}
58+
59+
/**
60+
* s_cidr6_match() returns true if the given IPv6 address is within the
61+
* specified CIDR address range.
62+
* Based on the curl implementation Curl_cidr6_match().
63+
*
64+
* @param bits The number of network bits in the CIDR notation
65+
* @param network_part The network pattern to match against (e.g., "2001:db8::")
66+
* @param host_addr Pre-parsed binary representation of the host IP, or NULL to parse from host
67+
* @return true if the IP address matches the CIDR pattern, false otherwise
68+
*/
69+
static bool s_cidr6_match(uint64_t bits, struct aws_string *network_part, uint8_t *address) {
70+
uint8_t check[16] = {0};
71+
72+
/* If no bits specified, use full 128 bits for IPv6 */
73+
if (!bits) {
74+
bits = 128;
75+
}
76+
77+
/* Check for valid bits parameter */
78+
if (bits > 128) {
79+
return false;
80+
}
81+
/* Convert network pattern to binary */
82+
if (inet_pton(AF_INET6, aws_string_c_str(network_part), check) != 1) {
83+
return false;
84+
}
85+
86+
/* Calculate full bytes and remaining bits in the netmask */
87+
uint64_t bytes = bits / 8;
88+
uint64_t rest = bits % 8;
89+
90+
/* Compare full bytes of the network part */
91+
if (bytes > 0 && memcmp(address, check, (size_t)bytes) != 0) {
92+
return false;
93+
}
94+
95+
/* If we have remaining bits, compare the partial byte */
96+
if (rest > 0 && bytes < 16) {
97+
/* Create a mask for the remaining bits */
98+
unsigned char mask = (unsigned char)(0xff << (8 - rest));
99+
100+
/* Check if the masked bits match */
101+
if ((address[bytes] & mask) != (check[bytes] & mask)) {
102+
return false;
103+
}
104+
}
105+
106+
/* All checks passed, addresses match within the CIDR range */
107+
return true;
108+
}
109+
110+
static bool s_is_dot(uint8_t c) {
111+
return c == '.';
112+
}
113+
114+
/* The host is expected to be the host result from URL parser. */
115+
bool aws_http_host_matches_no_proxy(
116+
struct aws_allocator *allocator,
117+
struct aws_byte_cursor host,
118+
struct aws_string *no_proxy_str) {
119+
if (host.len == 0 || no_proxy_str == NULL) {
120+
return false;
121+
}
122+
/* Single "*" wildcard matches all hosts */
123+
if (aws_string_eq_c_str(no_proxy_str, "*")) {
124+
AWS_LOGF_DEBUG(AWS_LS_HTTP_CONNECTION, "wildcard no_proxy found, bypassing any proxy");
125+
return true;
126+
}
127+
bool bypass = false;
128+
struct aws_byte_cursor no_proxy_cur = aws_byte_cursor_from_string(no_proxy_str);
129+
struct aws_array_list no_proxy_list;
130+
struct aws_string *host_str = aws_string_new_from_cursor(allocator, &host);
131+
132+
if (aws_array_list_init_dynamic(&no_proxy_list, allocator, 10, sizeof(struct aws_byte_cursor))) {
133+
goto cleanup;
134+
}
135+
/* Split the NO_PROXY string by commas */
136+
if (aws_byte_cursor_split_on_char(&no_proxy_cur, ',', &no_proxy_list)) {
137+
goto cleanup;
138+
}
139+
140+
/* Store parsed binary addresses for reuse */
141+
uint32_t ipv4_addr = 0;
142+
uint8_t ipv6_addr[16] = {0};
143+
144+
/* Determine host type and parse address if applicable */
145+
enum hostname_type type = HOSTNAME_TYPE_REGULAR;
146+
if (inet_pton(AF_INET, aws_string_c_str(host_str), &ipv4_addr) == 1) {
147+
type = HOSTNAME_TYPE_IPV4;
148+
} else {
149+
struct aws_string *host_str_copy = host_str;
150+
struct aws_byte_cursor host_copy = host;
151+
if (host_copy.ptr[0] == '[' && host_copy.ptr[host_copy.len - 1] == ']') {
152+
/* Check if the address is enclosed in brackets and strip them for validation */
153+
aws_byte_cursor_advance(&host_copy, 1);
154+
host_copy.len--;
155+
host_str_copy = aws_string_new_from_cursor(allocator, &host_copy);
156+
}
157+
158+
if (inet_pton(AF_INET6, aws_string_c_str(host_str_copy), ipv6_addr) == 1) {
159+
/* Update the host str */
160+
if (host_str != host_str_copy) {
161+
aws_string_destroy(host_str);
162+
host_str = host_str_copy;
163+
}
164+
type = HOSTNAME_TYPE_IPV6;
165+
} else {
166+
/* Not an IP address, so it's a regular hostname */
167+
type = HOSTNAME_TYPE_REGULAR;
168+
/* Ignore the trailing dot in the hostname */
169+
host = aws_byte_cursor_right_trim_pred(&host, s_is_dot);
170+
}
171+
if (host_str != host_str_copy) {
172+
/* clean up the copy, but don't update the str. */
173+
aws_string_destroy(host_str_copy);
174+
}
175+
}
176+
177+
for (size_t i = 0; i < aws_array_list_length(&no_proxy_list); i++) {
178+
struct aws_byte_cursor pattern;
179+
if (aws_array_list_get_at(&no_proxy_list, &pattern, i)) {
180+
continue;
181+
}
182+
183+
/* Trim whitespace from both ends for the pattern */
184+
pattern = aws_byte_cursor_trim_pred(&pattern, aws_isspace);
185+
if (pattern.len == 0) {
186+
/* If pattern is empty, ignore it. */
187+
continue;
188+
}
189+
switch (type) {
190+
case HOSTNAME_TYPE_REGULAR: {
191+
/**
192+
* A: example.com matches 'example.com'
193+
* B: www.example.com matches 'example.com'
194+
* C: nonexample.com DOES NOT match 'example.com'
195+
*/
196+
/* Trim dot from both ends for the pattern */
197+
pattern = aws_byte_cursor_trim_pred(&pattern, s_is_dot);
198+
if (pattern.len == 0) {
199+
/* If pattern is empty, ignore it. */
200+
continue;
201+
}
202+
if (pattern.len == host.len) {
203+
if (aws_byte_cursor_eq_ignore_case(&pattern, &host)) {
204+
bypass = true;
205+
goto cleanup;
206+
} else {
207+
continue;
208+
}
209+
} else if (pattern.len < host.len) {
210+
/* Check if the pattern is a suffix of the host. All the math is safe since pattern.len <
211+
* host.len
212+
*/
213+
struct aws_byte_cursor tail_with_extra_byte = host;
214+
/* 1. the byte before the tail should be `.` */
215+
aws_byte_cursor_advance(&tail_with_extra_byte, host.len - pattern.len - 1);
216+
uint8_t var = 0;
217+
/* tail_with_extra_byte will be updated to move over the `.` */
218+
aws_byte_cursor_read_u8(&tail_with_extra_byte, &var);
219+
if (var != '.') {
220+
continue;
221+
}
222+
/* 2. the tail of the host should match the pattern */
223+
if (aws_byte_cursor_eq_ignore_case(&pattern, &tail_with_extra_byte)) {
224+
bypass = true;
225+
goto cleanup;
226+
} else {
227+
continue;
228+
}
229+
}
230+
} break;
231+
case HOSTNAME_TYPE_IPV4:
232+
case HOSTNAME_TYPE_IPV6: {
233+
/* Extract network part and bits from CIDR notation */
234+
struct aws_byte_cursor substr = {0};
235+
struct aws_byte_cursor network_part = {0};
236+
/* CIDR found. parse the bits */
237+
uint64_t network_bits = 0;
238+
if (aws_byte_cursor_next_split(&pattern, '/', &substr)) {
239+
network_part = substr;
240+
}
241+
if (aws_byte_cursor_next_split(&pattern, '/', &substr)) {
242+
/* There is a second part of the pattern after `/`. */
243+
/* Now, take the rest of the pattern after `/` as the bits */
244+
aws_byte_cursor_advance(&pattern, network_part.len + 1);
245+
if (aws_byte_cursor_utf8_parse_u64(pattern, &network_bits)) {
246+
continue;
247+
}
248+
}
249+
struct aws_string *network_part_str = aws_string_new_from_cursor(allocator, &network_part);
250+
if (type == HOSTNAME_TYPE_IPV4) {
251+
if (s_cidr4_match(network_bits, network_part_str, ipv4_addr)) {
252+
bypass = true;
253+
aws_string_destroy(network_part_str);
254+
goto cleanup;
255+
}
256+
} else {
257+
if (s_cidr6_match(network_bits, network_part_str, ipv6_addr)) {
258+
bypass = true;
259+
aws_string_destroy(network_part_str);
260+
goto cleanup;
261+
}
262+
}
263+
aws_string_destroy(network_part_str);
264+
} break;
265+
266+
default:
267+
/* Invalid stage */
268+
AWS_FATAL_ASSERT(false);
269+
break;
270+
}
271+
}
272+
273+
cleanup:
274+
aws_string_destroy(host_str);
275+
aws_array_list_clean_up(&no_proxy_list);
276+
return bypass;
277+
}

0 commit comments

Comments
 (0)