Opened 8 years ago

Closed 8 years ago

#1200 closed defect (invalid)

$upstream_cache_status HIT when using slice module

Reported by: ilumos@… Owned by:
Priority: minor Milestone:
Component: other Version: 1.9.x
Keywords: ngx_http_slice_module Cc:
uname -a: Linux hostname 4.4.0-59-generic #80-Ubuntu SMP Fri Jan 6 17:47:47 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
nginx -V: nginx version: nginx/1.10.2
built by gcc 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.4)
configure arguments: --sbin-path=/usr/local/bin/nginx --conf-path=/etc/nginx/nginx.conf --pid-path=/run/nginx.pid --user=www-data --with-stream --with-http_slice_module --with-http_stub_status_module --with-ld-opt=-Wl,-rpath,/usr/local/lib --with-pcre=/usr/local/src/pcre --add-module=/usr/local/src/nginx_devel_kit --add-module=/usr/local/src/lua_nginx_module --without-http_gzip_module --without-http_ssi_module --without-http_charset_module --without-http_userid_module --without-http_auth_basic_module --without-http_geo_module --without-http_split_clients_module --without-http_referer_module --without-http_fastcgi_module --without-http_uwsgi_module --without-http_scgi_module --without-http_memcached_module --without-http_limit_conn_module --without-http_limit_req_module --without-http_empty_gif_module --without-http_upstream_hash_module --without-http_upstream_ip_hash_module --without-http_upstream_least_conn_module --without-http_upstream_keepalive_module --without-http_upstream_zone_module

Description

When using the HTTP slice module with Nginx cofigured as a caching reverse proxy, from an empty cache, the access log shows a cache HIT for almost all requests, despite Nginx fetching the file from the upstream server in the background. The cache status should be a MISS if Nginx does not have the slice(s) required to fulfil the client's request.

This is when using Nginx with DNS spoofing to cache game files on a LAN.

nginx.conf

user www-data www-data;
worker_processes auto;

events {
    worker_connections 8192;
    multi_accept on;
    use epoll;
}

http {
    include mime.types;

    log_format lancache-default     '[$time_local] '
                                    '$remote_addr '
                                   '$request_method '
                                    '"$request_uri" '
                                    '$http_range '
                                    '$status '
                                    '$body_bytes_sent '
                                    '$upstream_cache_status '
                                    '$host '
                                    '$upstream_status '
                                    '$upstream_response_time '
                                    '"$http_user_agent"';


    access_log /var/lancache/logs/nginx-access.log lancache-default buffer=128k flush=5s;
    error_log /var/lancache/logs/nginx-error.log;

    sendfile on;
    sendfile_max_chunk 512k;
    tcp_nopush on;
    tcp_nodelay on;
    keepalive_timeout 65;

    proxy_cache_path /var/lancache/cache/installers
                     levels=2:2
                     keys_zone=installers:500m
                     inactive=1y
                     max_size=1000g
                     loader_files=1000
                     loader_sleep=50ms
                     loader_threshold=300ms;

    proxy_temp_path /var/lancache/cache/tmp;

    resolver 8.8.8.8 4.2.2.2; 

    server {
        listen *;

        access_log /var/lancache/logs/origin-access.log lancache-default buffer=128k flush=5s;
        error_log /var/lancache/logs/origin-error.log;


    server_name lancache-origin; # primary - used in proxy_cache_key

    # Domains we are caching content from
    server_name	akamai.cdn.ea.com
                origin-a.akamaihd.net.edgesuite.net
                origin-a.akamaihd.net;

        location / {
            # Pass the entire request URI through to the upstream server
            proxy_pass http://$host$request_uri;

            # Don't modify Refresh: and Location: headers sent from upstreams
            proxy_redirect off; 

            # Send Host: header from client's request to upstreams
            proxy_set_header Host $host;

            # Continue downloading a requested file/slice from upstream, even if a client cancels
            proxy_ignore_client_abort on;

            # Only download the same file once, even if multiple clients 
            # request it before Nginx has finished downloading the file.
            # Timeout set to 1 hour to give Nginx plenty of time to get the file
            proxy_cache_lock on;
            proxy_cache_lock_timeout 1h;

            # Allow the use of stale entries if there is an upstream error
            proxy_cache_use_stale error timeout invalid_header updating http_500 http_502 http_503 http_504;

            # Keep files for 1 year when upstream server gives HTTP 200 (OK) or HTTP 206 (Partial Content)
            proxy_cache_valid 200 206 1y;

            # Disallow caching of HTTP 301 (Moved Permanently) or HTTP 302 (Found) as our
            # cache key does not include query parameters (e.g. ?a=b), therefore may not be valid for all users
            proxy_cache_valid 301 302 0;

            # Enable revalidation of expired cache items using conditional requests, to save re-downloading files
            # that have not been modified
            proxy_cache_revalidate on;

            # Bypass the cache and re-request the upstream file for requests which have the query parameter ?nocache=1
            # so that we can manually force re-caching of a file
            proxy_cache_bypass $arg_nocache;

            # Cache data in the cache named "installers" 
            proxy_cache installers;

            # Origin CDN blocks caching so we must ignore the cache headers they send
            proxy_ignore_headers Expires Cache-Control;

            # Origin client performs many range requests on one single large archive
            # file per game/update, and the Origin CDN refuses requests to download
            # entire files, so we must use "slice" to perform our own consistent range requests
            # of the upstream file, cache each slice, and then respond to the Origin client's
            # range requests.
            # See here for more info on Nginx's behaviour with Slice:
            # https://www.nginx.com/blog/smart-efficient-byte-range-caching-nginx/#cache-slice

            # Set slice range to 1 megabyte    
            slice 1m;

            # Set upstream request headers to include a range request for the slice we want
            proxy_set_header Range $slice_range;

            # Cache based on URI, without query string, and with slice range
            proxy_cache_key "$server_name$uri $slice_range";
        }

    }
}

Change History (5)

comment:1 by Roman Arutyunyan, 8 years ago

When using the slice module, the first slice is fetched in the main request, while all other slices are fetched in subrequests. Since you log only the main request (by default), the variable $upstream_cache_status returns cache status of the first slice, which is likely to be cached. If you want to log subrequests as well, use "log_subrequest on" directive.

comment:2 by ilumos@…, 8 years ago

Great, thank you for the pointer - I'll give that a go and see if that results in the desired result. Many thanks

comment:3 by ilumos@…, 8 years ago

Yes, that gives the desired result, though is there a variable that can be logged that shows whether the logged request is a subrequest?

Version 0, edited 8 years ago by ilumos@… (next)

comment:4 by Roman Arutyunyan, 8 years ago

There's no such variable. But you can use the fact, that the main request log entry comes after all subrequests.

comment:5 by Roman Arutyunyan, 8 years ago

Resolution: invalid
Status: newclosed
Note: See TracTickets for help on using tickets.