﻿id	summary	reporter	owner	description	type	status	priority	milestone	component	version	resolution	keywords	cc	uname	nginx_version
2162	Detected socket leaks after restarting worker processes	yugo-horie@…		"we have detected socket leaks after restarting worker processes while it's probably reading a file cache and replying downstream.

we have prepared the nginx as a proxy server which has a foolishly simple nginx.conf.
{{{
worker_processes  16;

error_log  /var/log/nginx/error.log;

events {
    worker_connections  1024;
}

debug_points abort;

http {
    default_type  application/octet-stream;

    log_format  main  '$remote_addr - $remote_user [$time_local] ""$request"" '
                      '$status $body_bytes_sent ""$http_referer"" '
                      '""$http_user_agent"" ""$http_x_forwarded_for""';

    sendfile        on;
    keepalive_timeout  65;


    proxy_cache_path /var/cache/nginx/all levels=1:2 keys_zone=all:8m max_size=100m inactive=1m;

    upstream cache {
        #zone zone_frontedge 1m;
        #keepalive 1024;
        server origin_1:8008 max_fails=100 fail_timeout=10s;
    }

    server {
        listen       8080;
        server_name  localhost;

        access_log  /var/log/nginx/access.log  main;

        sendfile on;
        aio threads;
        aio_write on;

        location ~* /(.*) {
            proxy_cache all;
            proxy_cache_key $host$uri$is_args$args;
            proxy_pass http://cache;

            proxy_cache_valid 200 302 60m;
        }
    }
}
}}}

And then we also prepared the simplest script to `kill -HUP master process` for restart worker processes continuously.
http://nginx.org/en/docs/control.html

{{{
#!/bin/sh
while true; do
   ps aux | grep master | awk '{print $2}' | xargs -I {} kill -HUP {};
   sleep 1;
done;
}}}

At last, we obtained the 10MB contents via this proxy with wrk
https://github.com/wg/wrk
{{{
wrk -c 5 -t 5 localhost:8080/
}}}


When we have experimented these proxy and restarting behavior,  it has often observed socket leaks in error.log
{{{
2021/04/12 07:10:51 [alert] 19458#19458: *119 open socket #44 left in connection 3
2021/04/12 07:10:51 [alert] 19458#19458: aborting
2021/04/12 07:10:52 [alert] 8#8: worker process 19458 exited on signal 6 (core dumped)
}}}

According to this tutorial, It is able to debugging the socket leaks in detail
https://www.nginx.com/resources/wiki/start/topics/tutorials/debugging/
So, we tried it, although we could get only a slight information.
{{{
(gdb) set $c = &ngx_cycle->connections[3]
(gdb) set $r = (ngx_http_request_t *) $c->data
(gdb) set $u = (ngx_http_upstream_t *) $r->upstream
(gdb) p *$r
$1 = {signature = 1347703880, connection = 0x7fbfb253c2c8, ctx = 0x562b9de16fb0, main_conf = 0x562b9de0f6f0, srv_conf = 0x562b9ddfea00,
  loc_conf = 0x562b9de00630, read_event_handler = 0x562b9cbc735d <ngx_http_block_reading>,
  write_event_handler = 0x562b9cbdb904 <ngx_http_upstream_init_request>, cache = 0x562b9de0a690, upstream = 0x562b9de0a230,
  upstream_states = 0x0, pool = 0x562b9de16530, header_in = 0x562b9de16438, headers_in = {headers = {last = 0x562b9de165f0, part = {
        elts = 0x562b9de09e70, nelts = 1, next = 0x0}, size = 48, nalloc = 20, pool = 0x562b9de16530}, host = 0x562b9de09e70, connection = 0x0,
    if_modified_since = 0x0, if_unmodified_since = 0x0, if_match = 0x0, if_none_match = 0x0, user_agent = 0x0, referer = 0x0,
    content_length = 0x0, content_range = 0x0, content_type = 0x0, range = 0x0, if_range = 0x0, transfer_encoding = 0x0, te = 0x0, expect = 0x0,
    upgrade = 0x0, accept_encoding = 0x0, via = 0x0, authorization = 0x0, keep_alive = 0x0, x_forwarded_for = {elts = 0x0, nelts = 0, size = 0,
      nalloc = 0, pool = 0x0}, user = {len = 0, data = 0x0}, passwd = {len = 0, data = 0x0}, cookies = {elts = 0x0, nelts = 0, size = 0,
      nalloc = 0, pool = 0x0}, server = {len = 9, data = 0x562b9ddd9306 ""localhost:8080""}, content_length_n = -1, keep_alive_n = -1,
    connection_type = 0, chunked = 0, msie = 0, msie6 = 0, opera = 0, gecko = 0, chrome = 0, safari = 0, konqueror = 0}, headers_out = {
    headers = {last = 0x562b9de16768, part = {elts = 0x562b9de16b30, nelts = 0, next = 0x0}, size = 48, nalloc = 20, pool = 0x562b9de16530},
    trailers = {last = 0x562b9de167a0, part = {elts = 0x562b9de16ef0, nelts = 0, next = 0x0}, size = 48, nalloc = 4, pool = 0x562b9de16530},
    status = 0, status_line = {len = 0, data = 0x0}, server = 0x0, date = 0x0, content_length = 0x0, content_encoding = 0x0, location = 0x0,
    refresh = 0x0, last_modified = 0x0, content_range = 0x0, accept_ranges = 0x0, www_authenticate = 0x0, expires = 0x0, etag = 0x0,
    override_charset = 0x0, content_type_len = 0, content_type = {len = 0, data = 0x0}, charset = {len = 0, data = 0x0},
    content_type_lowcase = 0x0, content_type_hash = 0, cache_control = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, link = {
      elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, content_length_n = -1, content_offset = 0, date_time = 0,
    last_modified_time = -1}, request_body = 0x562b9de17410, lingering_time = 0, start_sec = 1618211451, start_msec = 935, method = 2,
  http_version = 1001, request_line = {len = 14, data = 0x562b9ddd92f0 ""GET / HTTP/1.1\r\nHost""}, uri = {len = 1,
    data = 0x562b9ddd92f4 ""/ HTTP/1.1\r\nHost""}, args = {len = 0, data = 0x0}, exten = {len = 0, data = 0x0}, unparsed_uri = {len = 1,
    data = 0x562b9ddd92f4 ""/ HTTP/1.1\r\nHost""}, method_name = {len = 3, data = 0x562b9ddd92f0 ""GET / HTTP/1.1\r\nHost""}, http_protocol = {
    len = 8, data = 0x562b9ddd92f6 ""HTTP/1.1\r\nHost""}, schema = {len = 0, data = 0x0}, out = 0x0, main = 0x562b9de16580, parent = 0x0,
  postponed = 0x0, post_subrequest = 0x0, posted_requests = 0x0, phase_handler = 11, content_handler = 0x562b9cc00bd9 <ngx_http_proxy_handler>,
  access_code = 0, variables = 0x562b9de17110, ncaptures = 4, captures = 0x562b9de17218, captures_data = 0x562b9ddd92f4 ""/ HTTP/1.1\r\nHost"",
  limit_rate = 0, limit_rate_after = 0, header_size = 0, request_length = 40, err_status = 0, http_connection = 0x562b9de163e0, stream = 0x0,
  log_handler = 0x562b9cbc5ca4 <ngx_http_log_error_handler>, cleanup = 0x0, count = 1, subrequests = 51, blocked = 1, aio = 1, http_state = 2,
  complex_uri = 0, quoted_uri = 0, plus_in_uri = 0, space_in_uri = 0, empty_path_in_uri = 0, invalid_header = 0, add_uri_to_alias = 0,
  valid_location = 1, valid_unparsed_uri = 1, uri_changed = 0, uri_changes = 11, request_body_in_single_buf = 0, request_body_in_file_only = 0,
  request_body_in_persistent_file = 0, request_body_in_clean_file = 0, request_body_file_group_access = 0, request_body_file_log_level = 5,
  request_body_no_buffering = 0, subrequest_in_memory = 0, waited = 0, cached = 0, gzip_tested = 0, gzip_ok = 0, gzip_vary = 0,
  realloc_captures = 0, proxy = 0, bypass_cache = 0, no_cache = 0, limit_conn_status = 0, limit_req_status = 0, limit_rate_set = 0,
--Type <RET> for more, q to quit, c to continue without paging--
  r_set = 0, pipeline = 0, chunked = 0, header_only = 0, expect_trailers = 0, keepalive = 1, lingering_close = 0, discard_body = 0, reading_body = 0, internal = 0, error_page = 0,
  filter_finalize = 0, post_action = 0, request_complete = 0, request_output = 0, header_sent = 0, expect_tested = 0, root_tested = 0, done = 0, logged = 0, buffered = 0, main_filter_need_in_memory = 0,
  filter_need_in_memory = 0, filter_need_temporary = 0, preserve_body = 0, allow_ranges = 0, subrequest_ranges = 0, single_range = 0, disable_not_modified = 0, stat_reading = 0, stat_writing = 0,
  stat_processing = 0, background = 0, health_check = 0, state = 0, header_hash = 3208616, lowcase_index = 4, lowcase_header = ""host"", '\000' <repeats 27 times>, header_name_start = 0x562b9ddd9316 ""\r\n"",
  header_name_end = 0x562b9ddd9304 """", header_start = 0x562b9ddd9306 ""localhost:8080"", header_end = 0x562b9ddd9316 ""\r\n"", uri_start = 0x562b9ddd92f4 ""/ HTTP/1.1\r\nHost"",
  uri_end = 0x562b9ddd92f5 "" HTTP/1.1\r\nHost"", uri_ext = 0x0, args_start = 0x0, request_start = 0x562b9ddd92f0 ""GET / HTTP/1.1\r\nHost"", request_end = 0x562b9ddd92fe ""\r\nHost"",
  method_end = 0x562b9ddd92f2 ""T / HTTP/1.1\r\nHost"", schema_start = 0x0, schema_end = 0x0, host_start = 0x0, host_end = 0x0, port_start = 0x0, port_end = 0x0, http_minor = 1, http_major = 1}
(gdb) p $u->cache_status
$2 = 1
(gdb) set $cache =(ngx_http_cache_t *) $r->cache
(gdb) set $task=(ngx_thread_task_t *)$cache->file->thread_task
(gdb) p $cache->file
$3 = {fd = 6, name = {len = 58, data = 0x562b9de174c0 ""/var/cache/nginx/all/9/f3/f3bfeb3387c38d7a557be4d864d67f39""}, info = {st_dev = 0, st_ino = 0, st_nlink = 0, st_mode = 0, st_uid = 0, st_gid = 0,
    __pad0 = 0, st_rdev = 0, st_size = 0, st_blksize = 0, st_blocks = 0, st_atim = {tv_sec = 0, tv_nsec = 0}, st_mtim = {tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 0}, __glibc_reserved = {0,
      0, 0}}, offset = 0, sys_offset = 0, log = 0x562b9de16380, thread_handler = 0x562b9cbdf2c7 <ngx_http_cache_thread_handler>, thread_ctx = 0x562b9de16580, thread_task = 0x562b9de0abc0, aio = 0x0,
  valid_info = 0, directio = 0}
(gdb) set $task=(ngx_thread_task_t *)$cache->file->thread_task
(gdb) p *$task
$4 = {next = 0x0, id = 8, ctx = 0x562b9de0ac40, handler = 0x562b9cba8b64 <ngx_thread_read_handler>, event = {data = 0x562b9de16580, write = 0, accept = 0, instance = 0, active = 1, disabled = 0, ready = 0,
    oneshot = 0, complete = 0, eof = 0, error = 0, timedout = 0, timer_set = 0, delayed = 0, deferred_accept = 0, pending_eof = 0, posted = 0, closed = 0, channel = 0, resolver = 0, cancelable = 0,
    available = 0, handler = 0x562b9cbdf3e0 <ngx_http_cache_thread_event_handler>, index = 0, log = 0x0, timer = {key = 0, left = 0x0, right = 0x0, parent = 0x0, color = 0 '\000', data = 0 '\000'}, queue = {
      prev = 0x0, next = 0x0}}}
(gdb) p $task->event.complete
$5 = 0
}}}

* Remaining file descriptor seems not to be reading cache file. It is probably replying contents to end user.
* u->cache_status refers to NGX_HTTP_CACHE_MISS, nevertheless the cache file has been detected what should it replies.
* task->event.complete is 0 which means reading cache file task has not completed, therefore cache_status is NGX_HTTP_CACHE_MISS, then we consider doubtfully that this proxy cannot only send anything for downstreams but also it cannot detect reload signals.

How can we get downstream detailed informations when we has detected such socket leaks?"	defect	closed	minor		nginx-core	1.19.x	fixed	socket leak, worker restarting, aio threads		Linux b0e6a55fbe0e 4.19.76-linuxkit #1 SMP Tue May 26 11:42:35 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux	"nginx version: nginx/1.19.9
built by gcc 9.3.0 (Ubuntu 9.3.0-17ubuntu1~20.04)
built with OpenSSL 1.1.1k  25 Mar 2021
TLS SNI support enabled
configure arguments: --prefix=/etc/nginx --sbin-path=/usr/sbin/nginx --with-pcre-jit --with-http_ssl_module --with-file-aio --with-threads --with-openssl=/usr/local/src/openssl-1.1.1k"
