Opened 6 years ago
Closed 6 years ago
#1673 closed defect (invalid)
Nginx don't read upstream response in case upstream close connection.
Reported by: | Owned by: | ||
---|---|---|---|
Priority: | minor | Milestone: | |
Component: | nginx-core | Version: | 1.15.x |
Keywords: | Cc: | ||
uname -a: | Linux nginx-rj4gd 4.9.0-8-amd64 #1 SMP Debian 4.9.110-3+deb9u4 (2018-08-21) x86_64 GNU/Linux | ||
nginx -V: |
nginx version: nginx/1.13.4
built by gcc 6.3.0 20170516 (Debian 6.3.0-18) built with OpenSSL 1.1.0f 25 May 2017 TLS SNI support enabled configure arguments: --prefix=/etc/nginx --sbin-path=/usr/sbin/nginx --modules-path=/usr/lib/nginx/modules --conf-path=/etc/nginx/nginx.conf --error-log-path=/var/log/nginx/error.log --http-log-path=/var/log/nginx/access.log --pid-path=/var/run/nginx.pid --lock-path=/var/run/nginx.lock --http-client-body-temp-path=/var/cache/nginx/client_temp --http-proxy-temp-path=/var/cache/nginx/proxy_temp --http-fastcgi-temp-path=/var/cache/nginx/fastcgi_temp --http-uwsgi-temp-path=/var/cache/nginx/uwsgi_temp --http-scgi-temp-path=/var/cache/nginx/scgi_temp --user=nginx --group=nginx --with-compat --with-file-aio --with-threads --with-http_addition_module --with-http_auth_request_module --with-http_dav_module --with-http_flv_module --with-http_gunzip_module --with-http_gzip_static_module --with-http_mp4_module --with-http_random_index_module --with-http_realip_module --with-http_secure_link_module --with-http_slice_module --with-http_ssl_module --with-http_stub_status_module --with-http_sub_module --with-http_v2_module --with-mail --with-mail_ssl_module --with-stream --with-stream_realip_module --with-stream_ssl_module --with-stream_ssl_preread_module --with-cc-opt='-g -O2 -fdebug-prefix-map=/data/builder/debuild/nginx-1.13.4/debian/debuild-base/nginx-1.13.4=. -specs=/usr/share/dpkg/no-pie-compile.specs -fstack-protector-strong -Wformat -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -fPIC' --with-ld-opt='-specs=/usr/share/dpkg/no-pie-link.specs -Wl,-z,relro -Wl,-z,now -Wl,--as-needed -pie' |
Description
Hi,
we have very usual configuration nginx + uwsgi, and out tests sometimes failed by next reason.
We upload file with helps our application, and do some checks of these files in process of uploading. In case invalid file our application generate error and stop processing upload request. Below I show you strace output in case successful test:
epoll_wait(7, [{EPOLLIN|EPOLLOUT, {u32=1360470513, u64=139763891249649}}], 512, 75000) = 1 recvfrom(3, "PUT /u/c74e5ad22fd7467dbcbd14db1"..., 1024, 0, NULL, NULL) = 1024 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 12 ioctl(12, FIONBIO, [1]) = 0 epoll_ctl(7, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET, {u32=1360470753, u64=139763891249889}}) = 0 connect(12, {sa_family=AF_INET, sin_port=htons(8080), sin_addr=inet_addr("192.168.127.91")}, 16) = -1 EINPROGRESS (Operation now in progress) epoll_wait(7, [{EPOLLIN|EPOLLOUT, {u32=1360470513, u64=139763891249649}}, {EPOLLOUT, {u32=1360470753, u64=139763891249889}}], 512, 60000) = 2 getsockopt(12, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 setsockopt(12, SOL_TCP, TCP_NODELAY, [1], 4) = 0 writev(12, [{iov_base="PUT /u/c74e5ad22fd7467dbcbd14db1"..., iov_len=378}, {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=625}, {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 3) = 9195 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 ... ----many such rows---- ... writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 2966 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=5226}], 1) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(7, [{EPOLLIN|EPOLLOUT, {u32=1360470513, u64=139763891249649}}], 512, 60000) = 1 epoll_wait(7, [{EPOLLOUT, {u32=1360470753, u64=139763891249889}}], 512, 59993) = 1 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=5226}], 1) = 5226 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 ... ----many such rows---- ... writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 1908 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=6284}], 1) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(7, [{EPOLLIN|EPOLLOUT, {u32=1360470513, u64=139763891249649}}], 512, 59910) = 1 epoll_wait(7, [{EPOLLIN|EPOLLOUT|EPOLLERR|EPOLLHUP|EPOLLRDHUP, {u32=1360470753, u64=139763891249889}}], 512, 59907) = 1 recvfrom(12, "HTTP/1.1 400 BAD REQUEST\r\nX-Offs"..., 4096, 0, NULL, NULL) = 171 readv(12, [{iov_base=0x556392a64f1b, iov_len=3925}], 1) = -1 ECONNRESET (Connection reset by peer) gettid() = 7 sendto(13, "<171>Nov 14 01:30:56 nginx-rj4gd"..., 355, 0, NULL, 0) = 355 close(12) = 0 writev(3, [{iov_base="HTTP/1.1 400 BAD REQUEST\r\nServer"..., iov_len=247}], 1) = 247 shutdown(3, SHUT_WR) = 0 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096, 0, NULL, NULL) = 4096 ... ----many such rows---- ... recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096, 0, NULL, NULL) = 4096 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096, 0, NULL, NULL) = 3471 epoll_wait(7, [{EPOLLOUT, {u32=1360470513, u64=139763891249649}}], 512, 5000) = 1 epoll_wait(7, [{EPOLLIN|EPOLLOUT|EPOLLHUP|EPOLLRDHUP, {u32=1360470513, u64=139763891249649}}], 512, 4906) = 1 recvfrom(3, "", 4096, 0, NULL, NULL) = 0 sendto(11, "<180>Nov 14 01:30:56 nginx-rj4gd"..., 184, 0, NULL, 0) = 184 close(3) = 0 epoll_wait(7,
As you can see nginx accepted connection and started process content, after he got second EAGAIN (Resource temporarily unavailable) he did epoll_wait and got READ event on socket, so he read upstream response and understood that request finished. It good!
But sometime our test failed and we saw next strace output:
epoll_wait(7, [{EPOLLIN|EPOLLOUT, {u32=1360470512, u64=139763891249648}}], 512, 75000) = 1 recvfrom(3, "PUT /u/13fc38b378ee4350a48b384fc"..., 1024, 0, NULL, NULL) = 1024 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 12 ioctl(12, FIONBIO, [1]) = 0 epoll_ctl(7, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET, {u32=1360470753, u64=139763891249889}}) = 0 connect(12, {sa_family=AF_INET, sin_port=htons(8080), sin_addr=inet_addr("192.168.127.91")}, 16) = -1 EINPROGRESS (Operation now in progress) epoll_wait(7, [{EPOLLIN|EPOLLOUT, {u32=1360470512, u64=139763891249648}}, {EPOLLOUT, {u32=1360470753, u64=139763891249889}}], 512, 60000) = 2 getsockopt(12, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 setsockopt(12, SOL_TCP, TCP_NODELAY, [1], 4) = 0 writev(12, [{iov_base="PUT /u/13fc38b378ee4350a48b384fc"..., iov_len=378}, {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=625}, {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 3) = 9195 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 ... ----many such rows---- ... writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 2966 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=5226}], 1) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(7, [{EPOLLIN|EPOLLOUT, {u32=1360470512, u64=139763891249648}}], 512, 59998) = 1 epoll_wait(7, [{EPOLLOUT, {u32=1360470753, u64=139763891249889}}], 512, 59980) = 1 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=5226}], 1) = 5226 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 ... ----many such rows---- ... writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = 8192 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 0, NULL, NULL) = 8192 writev(12, [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=8192}], 1) = -1 ECONNRESET (Connection reset by peer) gettid() = 7 socket(AF_INET, SOCK_DGRAM, IPPROTO_IP) = 13 ioctl(13, FIONBIO, [1]) = 0 connect(13, {sa_family=AF_INET, sin_port=htons(514), sin_addr=inet_addr("192.168.127.132")}, 16) = 0 sendto(13, "<171>Nov 14 01:18:23 nginx-rj4gd"..., 367, 0, NULL, 0) = 367 close(12) = 0 writev(3, [{iov_base="HTTP/1.1 502 Bad Gateway\r\nServer"..., iov_len=165}, {iov_base="<html>\r\n<head><title>502 Bad Gat"..., iov_len=120}, {iov_base="<hr><center>nginx</center>\r\n</bo"..., iov_len=46}], 3) = 331 shutdown(3, SHUT_WR) = 0 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096, 0, NULL, NULL) = 4096 ... ----many such rows---- ... recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096, 0, NULL, NULL) = 4096 recvfrom(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096, 0, NULL, NULL) = 3471 epoll_wait(7, [{EPOLLOUT, {u32=1360470512, u64=139763891249648}}], 512, 5000) = 1 epoll_wait(7, [{EPOLLIN|EPOLLOUT|EPOLLHUP|EPOLLRDHUP, {u32=1360470512, u64=139763891249648}}], 512, 4654) = 1 recvfrom(3, "", 4096, 0, NULL, NULL) = 0 sendto(11, "<180>Nov 14 01:18:23 nginx-rj4gd"..., 186, 0, NULL, 0) = 186 close(3)
As you can see in this case nginx get ECONNRESET (Connection reset by peer), and didn't try to read response from upstream. He desided that connection was close abruptly and generate 502 error, but application in this case also generate same 400 BAD REQUEST error as in previous example. That error describe reason by that connection was closed, but didn't get this response on client side. So how could you explaine this behavior and how we can do some influence to stabilize system behavior?
Attachments (1)
Change History (2)
by , 6 years ago
Attachment: | nginx.conf added |
---|
comment:1 by , 6 years ago
Resolution: | → invalid |
---|---|
Status: | new → closed |
Once the connection was reset, it is not possible to read anything from the socket. To avoid connections being reset, connections have to be closed in stages when further data are expected to come from the client. Quoting RFC 7230:
If a server performs an immediate close of a TCP connection, there is a significant risk that the client will not be able to read the last HTTP response. If the server receives additional data from the client on a fully closed connection, such as another request that was sent by the client before receiving the server's response, the server's TCP stack will send a reset packet to the client; unfortunately, the reset packet might erase the client's unacknowledged input buffers before they can be read and interpreted by the client's HTTP parser. To avoid the TCP reset problem, servers typically close a connection in stages. First, the server performs a half-close by closing only the write side of the read/write connection. The server then continues to read from the connection until it receives a corresponding close by the client, or until the server is reasonably certain that its own TCP stack has received the client's acknowledgement of the packet(s) containing the server's last response. Finally, the server fully closes the connection.
To make it possible for nginx to receive the response, backend must either read the whole body, or implement proper connection teardown (in nginx, this is called lingering_close).
nginx.conf