Opened 10 years ago

Closed 10 years ago

#618 closed defect (invalid)

BoringSSL nginx, worker process XXXXX exited on signal 11

Reported by: Alex Storn Owned by:
Priority: critical Milestone:
Component: nginx-core Version: 1.7.x
Keywords: Cc:
uname -a: FreeBSD s16.example.com 10.0-RELEASE-p7 FreeBSD 10.0-RELEASE-p7 #0: Tue Jul 8 06:37:44 UTC 2014 root@amd64-builder.daemonology.net:/usr/obj/usr/src/sys/GENERIC amd64
nginx -V: nginx version: nginx/1.7.4
TLS SNI support enabled
configure arguments: --prefix=/usr/local/etc/nginx --with-cc-opt='-I /tmp/boringssl/.openssl/include' --with-ld-opt='-L /tmp/boringssl/.openssl/lib' --conf-path=/usr/local/etc/nginx/nginx.conf --sbin-path=/usr/local/sbin/nginx --pid-path=/var/run/nginx.pid --error-log-path=/var/log/nginx-error.log --user=www --group=www --with-file-aio --with-ipv6 --http-client-body-temp-path=/var/tmp/nginx/client_body_temp --http-fastcgi-temp-path=/var/tmp/nginx/fastcgi_temp --http-proxy-temp-path=/var/tmp/nginx/proxy_temp --http-scgi-temp-path=/var/tmp/nginx/scgi_temp --http-uwsgi-temp-path=/var/tmp/nginx/uwsgi_temp --http-log-path=/var/log/nginx-access.log --add-module=/usr/ports/www/nginx-devel/work/openresty-headers-more-nginx-module-0c6e05d --with-http_image_filter_module --with-http_stub_status_module --with-pcre --with-http_spdy_module --with-http_ssl_modul

Description

Randomly crash nginx workers, used BoringSSL last version today. The statement took here https://calomel.org/nginx.html (build Nginx with Google's BoringSSL)

40k errors

[alert] 23307#0: worker process XXXXX exited on signal 11

This patch does not solve the problems workers crash

The crash is caused by SSL_CTX_get_ex_data() returning NULL, which is then dereferenced without a check. Since this check is absent in trunk, I suppose the bug is reproducible there, too.

worker_processes 4;
worker_priority 15;
pcre_jit on;
error_log  /var/log/nginx-error.log crit;
worker_rlimit_nofile 20000;
events {
  worker_connections 19000;
  use kqueue;
}

http {
 # Timeouts: do not keep connections open longer then necessary to reduce
 # resource usage and deny Slowloris type attacks. Slow attacks are related to
 # pausing in between packets.
  client_body_timeout      2s; # maximum time between packets the client can pause when sending nginx any data
  client_header_timeout    2s; # maximum time the client has to send the entire header to nginx
  keepalive_timeout       28s; # timeout which a single keep-alive client connection will stay open
  send_timeout            10s; # maximum time between packets nginx is allowed to pause when sending the client data
  spdy_keepalive_timeout 128s; # inactivity timeout after which the SPDY connection is closed
  spdy_recv_timeout        2s; # timeout if nginx is currently expecting data from the client but nothing arrives

 # general options for FreeBSD on ZFS
  aio                        on; # asynchronous file input/output, fast with ZFS, make sure sendfile=off
  charset                 utf-8; # adds the line "Content-Type" into response-header, same as "source_charset"
  default_type            application/octet-stream;
 #directio                  off; # zfs does not support direct i/o because of the ARC and L2ARC
  disable_symlinks           on; # disable symlinks to avoid malicious symlinks out of the document root
  etag                      off; # disables "ETag" response header so clients use Cache-Control header only
  gzip                      off; # disable on the fly gzip compression, only use gzip_static to reduce latency
  gzip_http_version         1.0; # serve gzipped content to all clients including HTTP/1.0 and greater
 # gzip_static            always; # precompress content (gzip -9) with an external script found on this page below
 #gzip_vary                  on; # send response header "Vary: Accept-Encoding". SPDY ignores Vary header
  gzip_proxied              any; # allows compressed responses for any request even from proxies
  ignore_invalid_headers     on;
  include                    mime.types;
  keepalive_requests         20; # number of keep alive requests per connection, does not affect SPDY
  keepalive_disable        none; # allow all browsers to use keepalive connections
  lingering_time              2; # maximum time during which nginx will process additional data from the client
  lingering_timeout           2; # maximum waiting time for more client data to arrive
  max_ranges                  1; # allow a single range header for resumed downloads and to stop large range header DoS attacks
  merge_slashes              on; # compression of two or more adjacent slashes in a URI into a single slash "//" into "/"
  msie_padding              off;
 #open_file_cache          max=128 inactive=4h; # cache is not be needed if ZFS ARC size is sufficient
 #open_file_cache_errors     on;                # since ARC delivery is faster then the cache lookups
 #open_file_cache_min_uses    1;
 #open_file_cache_valid      3h;
  output_buffers         1 256K; # sendfile=off so set to the total size of all objects on an average page
 #postpone_output          1460; # before sending data response, collect at least one packet's payload (MSS) of data
 #read_ahead                  0; # no forced read ahead, let ZFS handle I/O calls as zfs is efficient
  recursive_error_pages      on;
  reset_timedout_connection  on; # reset timed out connections freeing ram and resources
  sendfile                  off; # off for FreeBSD and ZFS to avoid redundant data caching
  server_tokens             off; # no nginx version number in error pages
 #server_name_in_redirect   off; # if off, nginx will use the requested Host header
  source_charset          utf-8; # same value as "charset"
  spdy_headers_comp           1; # SPDY gzip header compression to at least one(1) (default 0)
  spdy_max_concurrent_streams 20; #SPDY maximum parallel client requests (default 100)
  tcp_nodelay                on; # disable the Nagle buffering algorithm, used for keepalive only
  tcp_nopush                off; # sendfile=off so tcp_nopush can not be used

  ..
}

Change History (2)

comment:1 by Alex Storn, 10 years ago

When building using clang35 problem not reproducible, only base-system clang33.

ngx_int_t
ngx_ssl_handshake(ngx_connection_t *c)
{
    int        n, sslerr;
    ngx_err_t  err;

    ngx_ssl_clear_error(c->log);

 >>>>>   n = SSL_do_handshake(c->ssl->connection);

SSL_do_handshake [ n=0 / sslerr=1 / err=8 ]

# mkdir /tmp/core
# chmod 0755 /tmp/core
# chown www:www /tmp/core
# sysctl kern.sugid_coredump=1
# sysctl kern.corefile=/tmp/core/%N.core.%P
# gdb /usr/local/sbin/nginx /tmp/core/nginx.core.36451
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "amd64-marcel-freebsd"...
Core was generated by `nginx'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /lib/libcrypt.so.5...done.
Loaded symbols for /lib/libcrypt.so.5
Reading symbols from /usr/local/lib/libpcre.so.3...done.
Loaded symbols for /usr/local/lib/libpcre.so.3
Reading symbols from /lib/libz.so.6...done.
Loaded symbols for /lib/libz.so.6
Reading symbols from /usr/local/lib/libgd.so.5...done.
Loaded symbols for /usr/local/lib/libgd.so.5
Reading symbols from /lib/libc.so.7...done.
Loaded symbols for /lib/libc.so.7
Reading symbols from /lib/libthr.so.3...done.
Loaded symbols for /lib/libthr.so.3
Reading symbols from /usr/local/lib/libjpeg.so.11...done.
Loaded symbols for /usr/local/lib/libjpeg.so.11
Reading symbols from /lib/libm.so.5...done.
Loaded symbols for /lib/libm.so.5
Reading symbols from /usr/local/lib/libpng15.so.15...done.
Loaded symbols for /usr/local/lib/libpng15.so.15
Reading symbols from /usr/local/lib/libfreetype.so.6...done.
Loaded symbols for /usr/local/lib/libfreetype.so.6
Reading symbols from /usr/local/lib/libfontconfig.so.1...done.
Loaded symbols for /usr/local/lib/libfontconfig.so.1
Reading symbols from /usr/local/lib/libtiff.so.4...done.
Loaded symbols for /usr/local/lib/libtiff.so.4
Reading symbols from /usr/lib/libbz2.so.4...done.
Loaded symbols for /usr/lib/libbz2.so.4
Reading symbols from /usr/local/lib/libexpat.so.6...done.
Loaded symbols for /usr/local/lib/libexpat.so.6
Reading symbols from /usr/lib/liblzma.so.5...done.
Loaded symbols for /usr/lib/liblzma.so.5
Reading symbols from /usr/local/lib/libjbig.so.2...done.
Loaded symbols for /usr/local/lib/libjbig.so.2
Reading symbols from /libexec/ld-elf.so.1...done.
Loaded symbols for /libexec/ld-elf.so.1
#0  0x0000000801453ba0 in strncmp () from /lib/libc.so.7
[New Thread 803406400 (LWP 100719/<unknown>)]
[New LWP 101618]
(gdb) set logging on
Copying output to gdb.txt.
(gdb) backtrace full
#0  0x0000000801453ba0 in strncmp () from /lib/libc.so.7
No symbol table info available.
#1  0x000000000051e9cf in ssl_parse_clienthello_tlsext ()
No symbol table info available.
#2  0x000000000050c6dd in ssl3_get_client_hello ()
No symbol table info available.
#3  0x000000000050b552 in ssl3_accept ()
No symbol table info available.
#4  0x000000000050501b in ssl23_accept ()
No symbol table info available.
#5  0x00000000004510bb in ngx_ssl_handshake (c=0x803620180) at src/event/ngx_event_openssl.c:1012
	n = 0
	sslerr = 1
	err = 8
#6  0x000000000046c503 in ngx_http_ssl_handshake (rev=0x8036578c0) at src/http/ngx_http_request.c:717
	p = (u_char *) 0x10036627f0 <Address 0x10036627f0 out of bounds>
	buf = "\026\000\000\000\000\000\000\000�����\177\000\000�����\177\000\000��D\000\000\000\000\0000����\177\000\000�zI\000\000\000\000\000h\224F\003\b\000\000\000��a\003\b\000\000\000\200\001b\003\b\000\000\000\030\210F\003\b\000\000\000�8g\003\b\000\000\000�xe\003\b\000\000\000�J\005\030+\000\000\000��\203"
	size = 1
	n = 1
	err = 0
	rc = 34416492928
	c = (ngx_connection_t *) 0x803620180
	hc = (ngx_http_connection_t *) 0x80377feb8
	sscf = (ngx_http_ssl_srv_conf_t *) 0x8034c7940
#7  0x000000000043c5f0 in ngx_event_process_posted (cycle=0x803468050, posted=0x83a8a8) at src/event/ngx_event_posted.c:40
	ev = (ngx_event_t *) 0x8036578c0
#8  0x000000000043ad1f in ngx_process_events_and_timers (cycle=0x803468050) at src/event/ngx_event.c:275
	flags = 3
	timer = 403
	delta = 1
#9  0x000000000044aaf1 in ngx_worker_process_cycle (cycle=0x803468050, data=0x1) at src/os/unix/ngx_process_cycle.c:822
	worker = 1
	i = 140737488344352
	c = (ngx_connection_t *) 0x7fffffffd630
#10 0x00000000004464ee in ngx_spawn_process (cycle=0x803468050, proc=0x44a900 <ngx_worker_process_cycle>, data=0x1, name=0x5b6966 "worker process", respawn=-3)
    at src/os/unix/ngx_process.c:198
	on = 1
	pid = 0
	s = 1
#11 0x00000000004482f9 in ngx_start_worker_processes (cycle=0x803468050, n=4, type=-3) at src/os/unix/ngx_process_cycle.c:368
	i = 1
	ch = {command = 1, pid = 36450, slot = 0, fd = 3}
#12 0x0000000000447af6 in ngx_master_process_cycle (cycle=0x803468050) at src/os/unix/ngx_process_cycle.c:140
	title = 0x803615548 "master process /usr/local/sbin/nginx"
	p = (u_char *) 0x80361556c ""
	size = 37
	i = 1
	n = 140737488345152
---Type <return> to continue, or q <return> to quit---
	sigio = 34414690408
	set = {__bits = {0, 0, 0, 0}}
	itv = {it_interval = {tv_sec = 0, tv_usec = 0}, it_value = {tv_sec = 6, tv_usec = 0}}
	live = 0
	delay = 0
	ls = (ngx_listening_t *) 0x0
	ccf = (ngx_core_conf_t *) 0x803469158
#13 0x0000000000408ee9 in main (argc=1, argv=0x7fffffffdae8) at src/core/nginx.c:407
	i = 54
	log = (ngx_log_t *) 0x836a10
	cycle = (ngx_cycle_t *) 0x803468050
	init_cycle = {conf_ctx = 0x0, pool = 0x803406800, log = 0x836a10, new_log = {log_level = 0, file = 0x0, connection = 0, handler = 0, data = 0x0, writer = 0, wdata = 0x0,
    action = 0x0, next = 0x0}, log_use_stderr = 0, files = 0x0, free_connections = 0x0, free_connection_n = 0, reusable_connections_queue = {prev = 0x0, next = 0x0}, listening = {
    elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, paths = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, open_files = {last = 0x0, part = {elts = 0x0,
      nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0}, shared_memory = {last = 0x0, part = {elts = 0x0, nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0},
  connection_n = 0, files_n = 0, connections = 0x0, read_events = 0x0, write_events = 0x0, old_cycle = 0x0, conf_file = {len = 31,
    data = 0x5b2e87 "/usr/local/etc/nginx/nginx.conf"}, conf_param = {len = 0, data = 0x0}, conf_prefix = {len = 21, data = 0x5b2e87 "/usr/local/etc/nginx/nginx.conf"}, prefix = {
    len = 21, data = 0x5b2e71 "/usr/local/etc/nginx/"}, lock_file = {len = 0, data = 0x0}, hostname = {len = 0, data = 0x0}}
	ccf = (ngx_core_conf_t *) 0x803469158
Last edited 10 years ago by Alex Storn (previous) (diff)

comment:2 by Maxim Dounin, 10 years ago

Resolution: invalid
Status: newclosed

From the backtrace it looks like the problem is in BoringSSL, and quick look into the code confirms this. From the ssl_scan_clienthello_tlsext() code (looks like it's inlined into ssl_parse_clienthello_tlsext() mentioned in the backtrace):

                        /* Decode each ServerName in the extension. */
                        while (CBS_len(&server_name_list) > 0)
                                {
                                uint8_t name_type;
                                CBS host_name;

                                /* Decode the NameType. */
                                if (!CBS_get_u8(&server_name_list, &name_type))
                                        {
                                        *out_alert = SSL_AD_DECODE_ERROR;
                                        return 0;
                                        }

                                /* Only host_name is supported. */
                                if (name_type != TLSEXT_NAMETYPE_host_name)
                                        continue;

                                if (!s->hit)
                                        {
                                        ...
                                        }
                                        }
                                else
                                        {
                                        s->servername_done = s->session->tlsext_hostname
                                                && strlen(s->session->tlsext_hostname) == CBS_len(&host_name)
                                                && strncmp(s->session->tlsext_hostname,
                                                        (char *)CBS_data(&host_name), CBS_len(&host_name)) == 0;
                                        }
                                }

Here host_name is used uninitialized in the else clause. If it happens to have len matching the length of the previously established session, segmentation fault will likely follow due to dereferencing uninitialized pointer. Feel free to report this to BoringSSL guys.

Note: See TracTickets for help on using tickets.