Opened 10 years ago

Closed 10 years ago

#618 closed defect (invalid)

BoringSSL nginx, worker process XXXXX exited on signal 11

Reported by: Alex Storn Owned by:
Priority: critical Milestone:
Component: nginx-core Version: 1.7.x
Keywords: Cc:
uname -a: FreeBSD s16.example.com 10.0-RELEASE-p7 FreeBSD 10.0-RELEASE-p7 #0: Tue Jul 8 06:37:44 UTC 2014 root@amd64-builder.daemonology.net:/usr/obj/usr/src/sys/GENERIC amd64
nginx -V: nginx version: nginx/1.7.4
TLS SNI support enabled
configure arguments: --prefix=/usr/local/etc/nginx --with-cc-opt='-I /tmp/boringssl/.openssl/include' --with-ld-opt='-L /tmp/boringssl/.openssl/lib' --conf-path=/usr/local/etc/nginx/nginx.conf --sbin-path=/usr/local/sbin/nginx --pid-path=/var/run/nginx.pid --error-log-path=/var/log/nginx-error.log --user=www --group=www --with-file-aio --with-ipv6 --http-client-body-temp-path=/var/tmp/nginx/client_body_temp --http-fastcgi-temp-path=/var/tmp/nginx/fastcgi_temp --http-proxy-temp-path=/var/tmp/nginx/proxy_temp --http-scgi-temp-path=/var/tmp/nginx/scgi_temp --http-uwsgi-temp-path=/var/tmp/nginx/uwsgi_temp --http-log-path=/var/log/nginx-access.log --add-module=/usr/ports/www/nginx-devel/work/openresty-headers-more-nginx-module-0c6e05d --with-http_image_filter_module --with-http_stub_status_module --with-pcre --with-http_spdy_module --with-http_ssl_modul

Description

Randomly crash nginx workers, used BoringSSL last version today. The statement took here https://calomel.org/nginx.html (build Nginx with Google's BoringSSL)

40k errors

[alert] 23307#0: worker process XXXXX exited on signal 11

This patch does not solve the problems workers crash

The crash is caused by SSL_CTX_get_ex_data() returning NULL, which is then dereferenced without a check. Since this check is absent in trunk, I suppose the bug is reproducible there, too.

worker_processes 4;
worker_priority 15;
pcre_jit on;
error_log  /var/log/nginx-error.log crit;
worker_rlimit_nofile 20000;
events {
  worker_connections 19000;
  use kqueue;
}

http {
 # Timeouts: do not keep connections open longer then necessary to reduce
 # resource usage and deny Slowloris type attacks. Slow attacks are related to
 # pausing in between packets.
  client_body_timeout      2s; # maximum time between packets the client can pause when sending nginx any data
  client_header_timeout    2s; # maximum time the client has to send the entire header to nginx
  keepalive_timeout       28s; # timeout which a single keep-alive client connection will stay open
  send_timeout            10s; # maximum time between packets nginx is allowed to pause when sending the client data
  spdy_keepalive_timeout 128s; # inactivity timeout after which the SPDY connection is closed
  spdy_recv_timeout        2s; # timeout if nginx is currently expecting data from the client but nothing arrives

 # general options for FreeBSD on ZFS
  aio                        on; # asynchronous file input/output, fast with ZFS, make sure sendfile=off
  charset                 utf-8; # adds the line "Content-Type" into response-header, same as "source_charset"
  default_type            application/octet-stream;
 #directio                  off; # zfs does not support direct i/o because of the ARC and L2ARC
  disable_symlinks           on; # disable symlinks to avoid malicious symlinks out of the document root
  etag                      off; # disables "ETag" response header so clients use Cache-Control header only
  gzip                      off; # disable on the fly gzip compression, only use gzip_static to reduce latency
  gzip_http_version         1.0; # serve gzipped content to all clients including HTTP/1.0 and greater
 # gzip_static            always; # precompress content (gzip -9) with an external script found on this page below
 #gzip_vary                  on; # send response header "Vary: Accept-Encoding". SPDY ignores Vary header
  gzip_proxied              any; # allows compressed responses for any request even from proxies
  ignore_invalid_headers     on;
  include                    mime.types;
  keepalive_requests         20; # number of keep alive requests per connection, does not affect SPDY
  keepalive_disable        none; # allow all browsers to use keepalive connections
  lingering_time              2; # maximum time during which nginx will process additional data from the client
  lingering_timeout           2; # maximum waiting time for more client data to arrive
  max_ranges                  1; # allow a single range header for resumed downloads and to stop large range header DoS attacks
  merge_slashes              on; # compression of two or more adjacent slashes in a URI into a single slash "//" into "/"
  msie_padding              off;
 #open_file_cache          max=128 inactive=4h; # cache is not be needed if ZFS ARC size is sufficient
 #open_file_cache_errors     on;                # since ARC delivery is faster then the cache lookups
 #open_file_cache_min_uses    1;
 #open_file_cache_valid      3h;
  output_buffers         1 256K; # sendfile=off so set to the total size of all objects on an average page
 #postpone_output          1460; # before sending data response, collect at least one packet's payload (MSS) of data
 #read_ahead                  0; # no forced read ahead, let ZFS handle I/O calls as zfs is efficient
  recursive_error_pages      on;
  reset_timedout_connection  on; # reset timed out connections freeing ram and resources
  sendfile                  off; # off for FreeBSD and ZFS to avoid redundant data caching
  server_tokens             off; # no nginx version number in error pages
 #server_name_in_redirect   off; # if off, nginx will use the requested Host header
  source_charset          utf-8; # same value as "charset"
  spdy_headers_comp           1; # SPDY gzip header compression to at least one(1) (default 0)
  spdy_max_concurrent_streams 20; #SPDY maximum parallel client requests (default 100)
  tcp_nodelay                on; # disable the Nagle buffering algorithm, used for keepalive only
  tcp_nopush                off; # sendfile=off so tcp_nopush can not be used

  ..
}

Change History (2)

comment:1 by Alex Storn, 10 years ago

# mkdir /tmp/core
# chmod 0755 /tmp/core
# chown www:www /tmp/core
# sysctl kern.sugid_coredump=1
# sysctl kern.corefile=/tmp/core/%N.core.%P
# gdb /usr/local/sbin/nginx /tmp/core/nginx.core.25334
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "amd64-marcel-freebsd"...(no debugging
symbols found)...
Core was generated by `nginx'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /lib/libcrypt.so.5...(no debugging symbols found)...done.
Loaded symbols for /lib/libcrypt.so.5
Reading symbols from /usr/local/lib/libpcre.so.3...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libpcre.so.3
Reading symbols from /lib/libz.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib/libz.so.6
Reading symbols from /usr/local/lib/libgd.so.5...(no debugging symbols
found)...done.
Loaded symbols for /usr/local/lib/libgd.so.5
Reading symbols from /lib/libc.so.7...(no debugging symbols found)...done.
Loaded symbols for /lib/libc.so.7
Reading symbols from /lib/libthr.so.3...(no debugging symbols found)...done.
Loaded symbols for /lib/libthr.so.3
Reading symbols from /usr/local/lib/libjpeg.so.11...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libjpeg.so.11
Reading symbols from /lib/libm.so.5...(no debugging symbols found)...done.
Loaded symbols for /lib/libm.so.5
Reading symbols from /usr/local/lib/libpng15.so.15...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libpng15.so.15
Reading symbols from /usr/local/lib/libfreetype.so.6...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libfreetype.so.6
Reading symbols from /usr/local/lib/libfontconfig.so.1...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libfontconfig.so.1
Reading symbols from /usr/local/lib/libtiff.so.4...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libtiff.so.4
Reading symbols from /usr/lib/libbz2.so.4...(no debugging symbols found)...done.
Loaded symbols for /usr/lib/libbz2.so.4
Reading symbols from /usr/local/lib/libexpat.so.6...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libexpat.so.6
Reading symbols from /usr/lib/liblzma.so.5...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/liblzma.so.5
Reading symbols from /usr/local/lib/libjbig.so.2...(no debugging
symbols found)...done.
Loaded symbols for /usr/local/lib/libjbig.so.2
Reading symbols from /libexec/ld-elf.so.1...(no debugging symbols found)...done.
Loaded symbols for /libexec/ld-elf.so.1
#0  0x00000008013cdba0 in strncmp () from /lib/libc.so.7
[New Thread 803406400 (LWP 100828/<unknown>)]
[New LWP 100767]
(gdb) backtrace full
#0  0x00000008013cdba0 in strncmp () from /lib/libc.so.7
No symbol table info available.
#1  0x000000000049c73f in ssl_cipher_ptr_id_cmp ()
No symbol table info available.
#2  0x000000000048a44d in ssl3_accept ()
No symbol table info available.
#3  0x00000000004892c2 in ssl3_accept ()
No symbol table info available.
#4  0x0000000000482d8b in ?? ()
No symbol table info available.
#5  0x000000000042b824 in ?? ()
No symbol table info available.
#6  0x0000000000439708 in ?? ()
No symbol table info available.
#7  0x0000000000421dbb in ?? ()
No symbol table info available.
#8  0x0000000000428815 in ?? ()
No symbol table info available.
#9  0x000000000042644f in ?? ()
No symbol table info available.
#10 0x0000000000427799 in ?? ()
No symbol table info available.
#11 0x0000000000426f63 in ?? ()
No symbol table info available.
#12 0x00000000004091df in ?? ()
No symbol table info available.
#13 0x000000000040884f in ?? ()
No symbol table info available.
#14 0x00000008007a1000 in ?? ()
No symbol table info available.
#15 0x0000000000000000 in ?? ()
No symbol table info available.
Version 0, edited 10 years ago by Alex Storn (next)

comment:2 by Maxim Dounin, 10 years ago

Resolution: invalid
Status: newclosed

From the backtrace it looks like the problem is in BoringSSL, and quick look into the code confirms this. From the ssl_scan_clienthello_tlsext() code (looks like it's inlined into ssl_parse_clienthello_tlsext() mentioned in the backtrace):

                        /* Decode each ServerName in the extension. */
                        while (CBS_len(&server_name_list) > 0)
                                {
                                uint8_t name_type;
                                CBS host_name;

                                /* Decode the NameType. */
                                if (!CBS_get_u8(&server_name_list, &name_type))
                                        {
                                        *out_alert = SSL_AD_DECODE_ERROR;
                                        return 0;
                                        }

                                /* Only host_name is supported. */
                                if (name_type != TLSEXT_NAMETYPE_host_name)
                                        continue;

                                if (!s->hit)
                                        {
                                        ...
                                        }
                                        }
                                else
                                        {
                                        s->servername_done = s->session->tlsext_hostname
                                                && strlen(s->session->tlsext_hostname) == CBS_len(&host_name)
                                                && strncmp(s->session->tlsext_hostname,
                                                        (char *)CBS_data(&host_name), CBS_len(&host_name)) == 0;
                                        }
                                }

Here host_name is used uninitialized in the else clause. If it happens to have len matching the length of the previously established session, segmentation fault will likely follow due to dereferencing uninitialized pointer. Feel free to report this to BoringSSL guys.

Note: See TracTickets for help on using tickets.