I use services.nginx with the option services.nginx.virtualHosts.<name>.enableACME enabled. I also have a web server systemd service that specifies:
before = [
"nginx.service"
];
Because my web server is a bit long to start, the security.acme timeout and its systemd unit fails.
To Reproduce
Use the following example module:
{ config, lib, pkgs, ... }:
with lib;
{
options = {
services.myserver = {
enable = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run MyServer.
'';
};
FQDN = mkOption {
type = types.str;
example = "myserver.example.com";
description = ''
The Fully Qualified Domain Name of the MyServer application.
'';
};
};
};
config =
let
cfg = config.services.myserver;
httpPort = 8080;
in
mkIf cfg.enable {
services.nginx = {
enable = true;
virtualHosts = {
"${cfg.FQDN}" = {
default = true;
forceSSL = true;
enableACME = true;
acmeRoot = "/var/lib/acme/acme-challenges/${cfg.FQDN}";
locations."/" = {
proxyPass = "http://[::1]:${toString httpPort}";
};
};
};
};
security.acme = {
email = "[email protected]";
acceptTerms = true;
};
networking.firewall.allowedTCPPorts = [ 80 443 ];
systemd.services.myserver = {
description = "MyServer Web App";
wantedBy = [
"multi-user.target"
"nginx.service"
];
before = [
"nginx.service"
];
preStart = ''
echo "Delay the start…"
sleep 30
echo "…done!"
'';
script = ''
echo "Hello world!"
'';
};
};
}
Expected behavior
The acme systemd units created by services.nginx should wait that nginx is started before starting.
Metadata
# nix-shell -p nix-info --run "nix-info -m"
these paths will be fetched (0.05 MiB download, 0.28 MiB unpacked):
/nix/store/5h35s50503spks8gkxdv3kk4szz8vx9g-bash-interactive-4.4-p23-dev
copying path '/nix/store/5h35s50503spks8gkxdv3kk4szz8vx9g-bash-interactive-4.4-p23-dev' from 'https://cache.nixos.org'...
- system: `"x86_64-linux"`
- host os: `Linux 5.4.22, NixOS, 20.09pre-git (Nightingale)`
- multi-user?: `yes`
- sandbox: `yes`
- version: `nix-env (Nix) 2.3.3`
- nixpkgs: `/nix/var/nix/profiles/per-user/root/channels/nixos`
Maintainer information:
# a list of nixos modules affected by the problem
module: nixos/modules/services/web-servers/nginx
@arianvp Should adding to after what is already added to wants fix the issue?
Making a note that httpd has the same code and presumably the same issue so any fix on nginx should likely be ported to httpd.
Thank you for the issue and reproducer. I'll have a closer look at this today
@datafoo what exactly do you mean by "timeout" here by the way. could you provide some journal logs of this failure?
I can try reproduce when I'm home, but maybe the logs are enough to understand what is going wrong
@datafoo what exactly do you mean by "timeout" here by the way. could you provide some journal logs of this failure?
I mean "Timeout during connect (likely firewall problem)" in the logs below.
Note that the hostname, email and domain have been redacted.
Also notice the long time between Mar 06 19:02:57 and Mar 06 19:03:14.
Mar 06 19:02:54 myhostname-1234 systemd[1]: Starting Renew ACME Certificate for subdomain.example.com...
Mar 06 19:02:54 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:54 No key found for account [email protected]. Generating a P384 key.
Mar 06 19:02:54 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:54 Saved key to accounts/acme-v02.api.letsencrypt.org/[email protected]/keys/[email protected]
Mar 06 19:02:55 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:55 Account [email protected] is not registered. Use 'run' to register a new account.
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:56 [INFO] acme: Registering account for [email protected]
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: !!!! HEADS UP !!!!
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: Your account credentials have been saved in your Let's Encrypt
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: configuration directory at "accounts".
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: You should make a secure backup of this folder now. This
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: configuration directory will also contain certificates and
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: private keys obtained from Let's Encrypt so making regular
Mar 06 19:02:56 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: backups of this folder is ideal.2020/03/06 19:02:56 [INFO] [subdomain.example.com] acme: Obtaining bundled SAN certificate
Mar 06 19:02:57 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:57 [INFO] [subdomain.example.com] AuthURL: https://acme-v02.api.letsencrypt.org/acme/authz-v3/3201229599
Mar 06 19:02:57 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:57 [INFO] [subdomain.example.com] acme: Could not find solver for: tls-alpn-01
Mar 06 19:02:57 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:57 [INFO] [subdomain.example.com] acme: use http-01 solver
Mar 06 19:02:57 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:02:57 [INFO] [subdomain.example.com] acme: Trying to solve HTTP-01
Mar 06 19:03:14 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:03:14 [INFO] Deactivating auth: https://acme-v02.api.letsencrypt.org/acme/authz-v3/3201229599
Mar 06 19:03:14 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:03:14 [INFO] Unable to deactivate the authorization: https://acme-v02.api.letsencrypt.org/acme/authz-v3/3201229599
Mar 06 19:03:14 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: 2020/03/06 19:03:14 Could not obtain certificates:
Mar 06 19:03:14 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: acme: Error -> One or more domains had a problem:
Mar 06 19:03:14 myhostname-1234 qslf92hywfjqbidi0hp2n5sln0l6jsdi-acme-start[7930]: [subdomain.example.com] acme: error: 400 :: urn:ietf:params:acme:error:connection :: Fetching http://subdomain.example.com/.well-known/acme-challenge/EVr5utReVbW1xQKdA_ghxoy1A3oqmab3qDc9Fuy0PoE: Timeout during connect (likely firewall problem), url:
Mar 06 19:03:14 myhostname-1234 systemd[1]: acme-subdomain.example.com.service: Main process exited, code=exited, status=1/FAILURE
Mar 06 19:03:14 myhostname-1234 systemd[1]: acme-subdomain.example.com.service: Failed with result 'exit-code'.
Mar 06 19:03:14 myhostname-1234 systemd[1]: Failed to start Renew ACME Certificate for subdomain.example.com.
Mar 06 19:03:14 myhostname-1234 systemd[1]: acme-subdomain.example.com.service: Consumed 215ms CPU time, received 20.2K IP traffic, sent 9.6K IP traffic.
nginx was not started when the Let's Encrypt tried to retrieve the challenge:
Mar 06 19:03:24 myhostname-1234 systemd[1]: Starting Nginx Web Server...
Mar 06 19:03:24 myhostname-1234 m2xzyq2pxx85lyshlq6fydbkdanj69px-unit-script-nginx-pre-start[8157]: nginx: [warn] could not build optimal types_hash, you should increase either types_hash_max_size: 1024 or types_hash_bucket_size: 64; ignoring types_hash_bucket_size
Mar 06 19:03:24 myhostname-1234 m2xzyq2pxx85lyshlq6fydbkdanj69px-unit-script-nginx-pre-start[8157]: nginx: the configuration file /nix/store/x45gw1w5xd8ai1zz5xknf97gmfzwmjpx-nginx.conf syntax is ok
Mar 06 19:03:24 myhostname-1234 m2xzyq2pxx85lyshlq6fydbkdanj69px-unit-script-nginx-pre-start[8157]: nginx: configuration file /nix/store/x45gw1w5xd8ai1zz5xknf97gmfzwmjpx-nginx.conf test is successful
Mar 06 19:03:24 myhostname-1234 systemd[1]: Started Nginx Web Server.
Mar 06 19:03:24 myhostname-1234 nginx[8159]: nginx: [warn] could not build optimal types_hash, you should increase either types_hash_max_size: 1024 or types_hash_bucket_size: 64; ignoring types_hash_bucket_size
@arianvp Should adding to after what is already added to wants fix the issue?
I'm scared that won't work. The lack of after here is very deliberate
nginx pulls in acme-selfsigned-${cert}.service (Through after= and wants=) to create fake certs, as nginx crashes hard when certificate paths aren't present.
We want nginx to start up _before_ the acme challenge is initiated, as nginx is responsible for serving the acme-challenge director to complete it. If acme-${cert} starts before nginx, it will always fail.
It's all a bit mind-boggling and the dependencies are a bit complicated. I wish it was a bit more simpler too.
However; isn't what we want here Before= instead? We want to guarentee that nginx is running _before_ the certificate request is fired off. So adding a Before=acme-${cert}.service to nginx.service might work? I'll see if that fixes it..
However; isn't what we want here
Before=instead? We want to guarentee thatnginxis running _before_ the certificate request is fired off. So adding aBefore=acme-${cert}.servicetonginx.servicemight work?
Absolutely, that is what I meant but I wrote it wrongly.
Should we go for the solution proposed?
Yep. I have a PR in the works on my machine. I'll try to get it out this week
Most helpful comment
Yep. I have a PR in the works on my machine. I'll try to get it out this week