Description
After a ngx.semaphore.wait()
call, if the current request is terminated by Nginx due to client_header_timeout
or client_body_timeout
, the corresponding post()
is never executed. As a result, all subsequent attempts to wait()
on the same semaphore hang until they time out, because the semaphore count was never restored.
1. Reproduce by client_header_timeout
on ssl_certificate_by_lua_block
#user nobody;
worker_processes 1;
daemon off;
master_process off;
error_log logs/debug.log debug;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
lua_code_cache on;
include mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
# Shared dictionary for the balancer
lua_shared_dict my_balancer_dict 1m;
init_worker_by_lua_block {
local semaphore = require "ngx.semaphore"
-- create a semaphore with capacity = 1
local sema, err = semaphore.new(1)
if not sema then
ngx.log(ngx.ERR, "failed to create semaphore: ", err)
return
end
ssl_semaphore = sema
}
server {
listen 8080;
server_name localhost;
location / {
content_by_lua_block {
ngx.sleep(10)
ngx.say("Hello, world!")
}
}
}
server {
listen 8443 ssl;
server_name edge.foo.com;
client_header_timeout 8;
# your existing certs:
ssl_certificate /home/barca/src/d26-sandbox/go/data/server.crt;
ssl_certificate_key /home/barca/src/d26-sandbox/go/data/server.key;
ssl_certificate_by_lua_block {
local sema = ssl_semaphore
local ok, err = sema:wait(5)
if not ok then
ngx.log(ngx.ERR, "SEMA] semaphore wait failed: ", err)
return ngx.exit(ngx.ERROR)
end
ngx.log(ngx.INFO, "SEMA] acquired semaphore, sleeping 30s")
local httpc = require "resty.http"
local http_client, err = httpc.new()
if not http_client then
ngx.log(ngx.ERR, "failed to create http client: ", err)
sema:post(1)
return ngx.exit(ngx.ERROR)
end
--http_client:set_timeouts(10000, 10000, 10000) -- connect, send, read timeouts
local request_uri_to_call = "http://127.0.0.1:8080/some/path"
local res, err = http_client:request_uri(request_uri_to_call, {
method = "GET",
})
if not res then
ngx.log(ngx.ERR, "failed to make HTTP request to ", request_uri_to_call, ": ", err)
return ngx.exit(ngx.ERROR)
end
ngx.log(ngx.INFO, "HTTP request to ", request_uri_to_call, " completed with status: ", res.status)
local ok, err = http_client:set_keepalive()
if not ok then
ngx.log(ngx.ERR, "failed to set keepalive: ", err)
end
local posted, perr = sema:post(1)
if not posted then
ngx.log(ngx.ERR, "SEMA] semaphore post failed: ", perr)
else
ngx.log(ngx.INFO, "SEMA] released semaphore")
end
}
location / {
return 200 "Hello, world!";
}
}
}
The first request times out in the SSL phase and never calls post()
.
All subsequent requests hang until their wait()
deadlines.
curl -v -o /dev/null --connect-to edge.foo.com:443:127.0.0.1:8443 'https://edge.foo.com/' --insecure
curl -v -o /dev/null --connect-to edge.foo.com:443:127.0.0.1:8443 'https://edge.foo.com/' --insecure
curl -v -o /dev/null --connect-to edge.foo.com:443:127.0.0.1:8443 'https://edge.foo.com/' --insecure
2. Reproduce by client_body_timeout
on access_by_lua_block
#user nobody;
worker_processes 1;
daemon off;
master_process off;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
error_log logs/debug.log debug;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
lua_code_cache on;
include mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
# Shared dictionary for the balancer
lua_shared_dict my_balancer_dict 1m;
init_worker_by_lua_block {
local semaphore = require "ngx.semaphore"
local sema, err = semaphore.new(1)
if not sema then
ngx.log(ngx.ERR, "failed to create semaphore: ", err)
return
end
upload_semaphore = sema
}
client_body_timeout 5s;
server {
listen 8000;
server_name edge.foo.com;
# on 408 jump here
error_page 408 = @body_timeout;
location /upload {
access_by_lua_block {
local sema = upload_semaphore
local ok, err = sema:wait(10)
if not ok then
ngx.log(ngx.ERR, "SEMA] semaphore wait failed: ", err)
return ngx.exit(500)
end
ngx.log(ngx.INFO, "SEMA] acquired semaphore, now reading body")
ngx.req.read_body()
local data = ngx.req.get_body_data() or ""
sema:post(1)
ngx.log(ngx.INFO, "SEMA] released semaphore")
ngx.say("Got ", #data, " bytes of body")
}
}
location @body_timeout {
return 408 "Request Timeout: client body not received within 5s\n";
}
}
}
The first slow upload triggers client_body_timeout and never calls post()
Subsequent uploads hang at wait()
( printf 'POST /upload HTTP/1.1\r\n'; printf 'Host: edge.foo.com\r\n'; printf 'Content-Length: 100\r\n'; printf 'Connection: close\r\n'; printf '\r\n'; sleep 10; yes | head -c 100; ) | nc 127.0.0.1 8000
( printf 'POST /upload HTTP/1.1\r\n'; printf 'Host: edge.foo.com\r\n'; printf 'Content-Length: 100\r\n'; printf 'Connection: close\r\n'; printf '\r\n'; sleep 3; yes | head -c 100; ) | nc 127.0.0.1 8000
( printf 'POST /upload HTTP/1.1\r\n'; printf 'Host: edge.foo.com\r\n'; printf 'Content-Length: 100\r\n'; printf 'Connection: close\r\n'; printf '\r\n'; sleep 3; yes | head -c 100; ) | nc 127.0.0.1 8000
Proposed Solution
Register a cleanup handler (as done in ngx_http_file_cache
or ngx_http_lua_socket_tcp
) when acquiring the semaphore. If the request is aborted, this handler should call sema:post()
to restore the count.