config:
core.trust_password: true
api_extensions:
- storage_zfs_remove_snapshots
- container_host_shutdown_timeout
- container_stop_priority
- container_syscall_filtering
- auth_pki
- container_last_used_at
- etag
- patch
- usb_devices
- https_allowed_credentials
- image_compression_algorithm
- directory_manipulation
- container_cpu_time
- storage_zfs_use_refquota
- storage_lvm_mount_options
- network
- profile_usedby
- container_push
- container_exec_recording
- certificate_update
- container_exec_signal_handling
- gpu_devices
- container_image_properties
- migration_progress
- id_map
- network_firewall_filtering
- network_routes
- storage
- file_delete
- file_append
- network_dhcp_expiry
- storage_lvm_vg_rename
- storage_lvm_thinpool_rename
- network_vlan
- image_create_aliases
- container_stateless_copy
- container_only_migration
- storage_zfs_clone_copy
- unix_device_rename
- storage_lvm_use_thinpool
- storage_rsync_bwlimit
- network_vxlan_interface
- storage_btrfs_mount_options
- entity_description
- image_force_refresh
- storage_lvm_lv_resizing
- id_map_base
- file_symlinks
- container_push_target
- network_vlan_physical
- storage_images_delete
- container_edit_metadata
- container_snapshot_stateful_migration
- storage_driver_ceph
- storage_ceph_user_name
- resource_limits
- storage_volatile_initial_source
- storage_ceph_force_osd_reuse
- storage_block_filesystem_btrfs
- resources
- kernel_limits
- storage_api_volume_rename
- macaroon_authentication
- network_sriov
- console
- restrict_devlxd
- migration_pre_copy
- infiniband
- maas_network
- devlxd_events
- proxy
- network_dhcp_gateway
- file_get_symlink
- network_leases
- unix_device_hotplug
- storage_api_local_volume_handling
- operation_description
- clustering
- event_lifecycle
- storage_api_remote_volume_handling
- nvidia_runtime
- container_mount_propagation
- container_backup
- devlxd_images
- container_local_cross_pool_handling
- proxy_unix
- proxy_udp
- clustering_join
- proxy_tcp_udp_multi_port_handling
- network_state
- proxy_unix_dac_properties
- container_protection_delete
- unix_priv_drop
- pprof_http
- proxy_haproxy_protocol
- network_hwaddr
- proxy_nat
- network_nat_order
- container_full
- candid_authentication
- backup_compression
- candid_config
- nvidia_runtime_config
- storage_api_volume_snapshots
- storage_unmapped
- projects
- candid_config_key
- network_vxlan_ttl
- container_incremental_copy
- usb_optional_vendorid
- snapshot_scheduling
- container_copy_project
- clustering_server_address
- clustering_image_replication
- container_protection_shift
- snapshot_expiry
- container_backup_override_pool
- snapshot_expiry_creation
- network_leases_location
- resources_cpu_socket
- resources_gpu
- resources_numa
- kernel_features
- id_map_current
- event_location
- storage_api_remote_volume_snapshots
- network_nat_address
- container_nic_routes
- rbac
- cluster_internal_copy
- seccomp_notify
- lxc_features
- container_nic_ipvlan
api_status: stable
api_version: "1.0"
auth: trusted
public: false
auth_methods:
- tls
environment:
addresses: []
architectures:
- x86_64
- i686
certificate: |
-----BEGIN CERTIFICATE-----
MIICPzCCAcWgAwIBAgIRAP61HvbiUzW9cXFrKujLw+QwCgYIKoZIzj0EAwMwNTEc
MBoGA1UEChMTbGludXhjb250YWluZXJzLm9yZzEVMBMGA1UEAwwMcm9vdEBMdXh1
cmlhMB4XDTE5MDUyNjIyMjcwNFoXDTI5MDUyMzIyMjcwNFowNTEcMBoGA1UEChMT
bGludXhjb250YWluZXJzLm9yZzEVMBMGA1UEAwwMcm9vdEBMdXh1cmlhMHYwEAYH
KoZIzj0CAQYFK4EEACIDYgAESa7S7Em2M1CLImTNqbER+u85ny8UpU9jtAwtr29H
2ciAXLjjWP5jQE+/xePlEwFemgnO7+vE0Atm/Qs8TbVPVfL271hjhm/UKcysahbV
B+vULrfCXKEsk2HbP5YOj1w1o4GYMIGVMA4GA1UdDwEB/wQEAwIFoDATBgNVHSUE
DDAKBggrBgEFBQcDATAMBgNVHRMBAf8EAjAAMGAGA1UdEQRZMFeCB0x1eHVyaWGH
BMCoAQqHEP1L6KvTkgAAAAAAAAAADVyHEP1NhFm/xAAAaNY0//7CmCGHECoAECiW
yv52aNY0//7CmCGHEP1L6KvTkgAAaNY0//7CmCEwCgYIKoZIzj0EAwMDaAAwZQIx
ANX2O73o8rVwoet0LaDlvCWm7W4Wkix9nMdQCkRL3Baosle54voon+A1ntuNG33R
LwIwKpOKAC6Khx/wvy5R7aaDmc2mVZl/i+gy0B/zgS1oAu8RgKlntqnxG76SGelp
rMRY
-----END CERTIFICATE-----
certificate_fingerprint: e6c1c67b091b1fc718f9248de24e0ee1eee27b34c3915a5c0de93c7d2795a8fa
driver: lxc
driver_version: 3.1.0
kernel: Linux
kernel_architecture: x86_64
kernel_features:
netnsid_getifaddrs: "true"
seccomp_listener: "true"
shiftfs: "false"
uevent_injection: "true"
unpriv_fscaps: "true"
kernel_version: 5.1.4-arch1-1-ARCH
lxc_features:
mount_injection_file: "true"
network_gateway_device_route: "false"
network_ipvlan: "false"
network_l2proxy: "false"
seccomp_notify: "false"
project: default
server: lxd
server_clustered: false
server_name: Luxuria
server_pid: 905
server_version: "3.13"
storage: btrfs
storage_version: "4.4"
I have migrated containers from AUR/lxd installation of LXD into a snap version of LXD on another machine.
I could then start the containers just fine. After rebooting however, some of the containers fail.
EDIT: Looks like I can't create and start new containers either with the same error.
[0] % sudo lxc info --show-log pxe
Name: pxe
Location: none
Remote: unix://
Architecture: x86_64
Created: 2019/05/26 23:12 UTC
Status: Stopped
Type: persistent
Profiles: default
Log:
lxc pxe 20190526235338.286 WARN conf - conf.c:lxc_map_ids:2970 - newuidmap binary is missing
lxc pxe 20190526235338.286 WARN conf - conf.c:lxc_map_ids:2976 - newgidmap binary is missing
lxc pxe 20190526235338.287 WARN conf - conf.c:lxc_map_ids:2970 - newuidmap binary is missing
lxc pxe 20190526235338.287 WARN conf - conf.c:lxc_map_ids:2976 - newgidmap binary is missing
lxc pxe 20190526235338.330 ERROR dir - storage/dir.c:dir_mount:198 - Permission denied - Failed to mount "/var/snap/lxd/common/lxd/containers/pxe/rootfs" on "/var/snap/lxd/common/lxc/"
lxc pxe 20190526235338.331 ERROR conf - conf.c:lxc_mount_rootfs:1351 - Failed to mount rootfs "/var/snap/lxd/common/lxd/containers/pxe/rootfs" onto "/var/snap/lxd/common/lxc/" with options "(null)"
lxc pxe 20190526235338.331 ERROR conf - conf.c:lxc_setup_rootfs_prepare_root:3498 - Failed to setup rootfs for
lxc pxe 20190526235338.331 ERROR conf - conf.c:lxc_setup:3551 - Failed to setup rootfs
lxc pxe 20190526235338.331 ERROR start - start.c:do_start:1282 - Failed to setup container "pxe"
lxc pxe 20190526235338.331 ERROR sync - sync.c:__sync_wait:62 - An error occurred in another process (expected sequence number 5)
lxc pxe 20190526235338.372 ERROR start - start.c:__lxc_start:1975 - Failed to spawn container "pxe"
lxc pxe 20190526235338.372 ERROR lxccontainer - lxccontainer.c:wait_on_daemonized_start:864 - Received container state "ABORTING" instead of "RUNNING"
lxc pxe 20190526235338.373 WARN conf - conf.c:lxc_map_ids:2970 - newuidmap binary is missing
lxc pxe 20190526235338.373 WARN conf - conf.c:lxc_map_ids:2976 - newgidmap binary is missing
lxc 20190526235338.377 WARN commands - commands.c:lxc_cmd_rsp_recv:132 - Connection reset by peer - Failed to receive response for command "get_state"
lxc info NAME --show-log)lxc config show NAME --expanded)2019/05/27 02:00:53.335177 cmd_run.go:889: WARNING: cannot copy user Xauthority file: Xauthority file isn't owned by the current user 0
architecture: x86_64
config:
image.architecture: amd64
image.description: Archlinux current amd64 (20190213_04:19)
image.os: Archlinux
image.release: current
image.serial: "20190213_04:19"
volatile.base_image: dd44789451bb561a84228a3f148eec3b144c51ca8607d91bc9ec4d282cc5f628
volatile.eth0.hwaddr: 00:16:3e:d4:5e:18
volatile.idmap.base: "0"
volatile.idmap.current: '[{"Isuid":true,"Isgid":false,"Hostid":1000000,"Nsid":0,"Maprange":1000000000},{"Isuid":false,"Isgid":true,"Hostid":1000000,"Nsid":0,"Maprange":1000000000}]'
volatile.idmap.next: '[{"Isuid":true,"Isgid":false,"Hostid":1000000,"Nsid":0,"Maprange":1000000000},{"Isuid":false,"Isgid":true,"Hostid":1000000,"Nsid":0,"Maprange":1000000000}]'
volatile.last_state.idmap: '[{"Isuid":true,"Isgid":false,"Hostid":1000000,"Nsid":0,"Maprange":1000000000},{"Isuid":false,"Isgid":true,"Hostid":1000000,"Nsid":0,"Maprange":1000000000}]'
volatile.last_state.power: STOPPED
devices:
eth0:
name: eth0
nictype: bridged
parent: sysdbr0
type: nic
root:
path: /
pool: default
type: disk
ephemeral: false
profiles:
- default
stateful: false
description: ""
t=2019-05-27T01:51:19+0200 lvl=info msg="LXD 3.13 is starting in normal mode" path=/var/snap/lxd/common/lxd
t=2019-05-27T01:51:19+0200 lvl=info msg="Kernel uid/gid map:"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - u 0 0 4294967295"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - g 0 0 4294967295"
t=2019-05-27T01:51:19+0200 lvl=info msg="Configured LXD uid/gid map:"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - u 0 1000000 1000000000"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - g 0 1000000 1000000000"
t=2019-05-27T01:51:19+0200 lvl=warn msg="AppArmor support has been disabled because of lack of kernel support"
t=2019-05-27T01:51:19+0200 lvl=info msg="Kernel features:"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - netnsid-based network retrieval: yes"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - uevent injection: yes"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - seccomp listener: yes"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - unprivileged file capabilities: yes"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - shiftfs support: no"
t=2019-05-27T01:51:19+0200 lvl=info msg="Initializing local database"
t=2019-05-27T01:51:19+0200 lvl=info msg="Starting /dev/lxd handler:"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - binding devlxd socket" socket=/var/snap/lxd/common/lxd/devlxd/sock
t=2019-05-27T01:51:19+0200 lvl=info msg="REST API daemon:"
t=2019-05-27T01:51:19+0200 lvl=info msg=" - binding Unix socket" inherited=true socket=/var/snap/lxd/common/lxd/unix.socket
t=2019-05-27T01:51:19+0200 lvl=info msg="Initializing global database"
t=2019-05-27T01:51:19+0200 lvl=info msg="Initializing storage pools"
t=2019-05-27T01:51:19+0200 lvl=info msg="Initializing networks"
t=2019-05-27T01:51:19+0200 lvl=info msg="Pruning leftover image files"
t=2019-05-27T01:51:19+0200 lvl=info msg="Done pruning leftover image files"
t=2019-05-27T01:51:19+0200 lvl=info msg="Loading daemon configuration"
t=2019-05-27T01:51:19+0200 lvl=info msg="Started seccomp handler" path=/var/snap/lxd/common/lxd/seccomp.socket
t=2019-05-27T01:51:19+0200 lvl=info msg="Pruning expired images"
t=2019-05-27T01:51:19+0200 lvl=info msg="Done pruning expired images"
t=2019-05-27T01:51:19+0200 lvl=info msg="Pruning expired container backups"
t=2019-05-27T01:51:19+0200 lvl=info msg="Done pruning expired container backups"
t=2019-05-27T01:51:19+0200 lvl=info msg="Updating instance types"
t=2019-05-27T01:51:19+0200 lvl=info msg="Updating images"
t=2019-05-27T01:51:19+0200 lvl=info msg="Done updating instance types"
t=2019-05-27T01:51:19+0200 lvl=info msg="Done updating images"
t=2019-05-27T01:51:19+0200 lvl=info msg="Expiring log files"
t=2019-05-27T01:51:19+0200 lvl=info msg="Done expiring log files"
t=2019-05-27T01:51:19+0200 lvl=info msg="Starting container" action=start created=2019-05-27T00:46:09+0200 ephemeral=false name=ansible project=default stateful=false used=2019-05-27T01:02:21+0200
t=2019-05-27T01:51:19+0200 lvl=info msg="Started container" action=start created=2019-05-27T00:46:09+0200 ephemeral=false name=ansible project=default stateful=false used=2019-05-27T01:02:21+0200
t=2019-05-27T01:51:19+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:13:40+0200 ephemeral=false name=chris project=default stateful=false used=2019-05-27T01:14:55+0200
t=2019-05-27T01:51:19+0200 lvl=eror msg="Failed starting container" action=start created=2019-05-27T01:13:40+0200 ephemeral=false name=chris project=default stateful=false used=2019-05-27T01:14:55+0200
t=2019-05-27T01:51:19+0200 lvl=eror msg="Failed to start container 'chris': Failed to run: /snap/lxd/current/bin/lxd forkstart chris /var/snap/lxd/common/lxd/containers /var/snap/lxd/common/lxd/logs/chris/lxc.conf: "
t=2019-05-27T01:51:19+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:40:08+0200 ephemeral=false name=cloud project=default stateful=false used=2019-05-27T01:46:27+0200
t=2019-05-27T01:51:19+0200 lvl=info msg="Started container" action=start created=2019-05-27T01:40:08+0200 ephemeral=false name=cloud project=default stateful=false used=2019-05-27T01:46:27+0200
t=2019-05-27T01:51:19+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:32:42+0200 ephemeral=false name=proxy project=default stateful=false used=2019-05-27T01:35:54+0200
t=2019-05-27T01:51:20+0200 lvl=info msg="Started container" action=start created=2019-05-27T01:32:42+0200 ephemeral=false name=proxy project=default stateful=false used=2019-05-27T01:35:54+0200
t=2019-05-27T01:51:20+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:12:58+0200 ephemeral=false name=pxe project=default stateful=false used=2019-05-27T01:13:28+0200
t=2019-05-27T01:51:20+0200 lvl=eror msg="Failed starting container" action=start created=2019-05-27T01:12:58+0200 ephemeral=false name=pxe project=default stateful=false used=2019-05-27T01:13:28+0200
t=2019-05-27T01:51:20+0200 lvl=eror msg="Failed to start container 'pxe': Failed to run: /snap/lxd/current/bin/lxd forkstart pxe /var/snap/lxd/common/lxd/containers /var/snap/lxd/common/lxd/logs/pxe/lxc.conf: "
t=2019-05-27T01:51:20+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:27:21+0200 ephemeral=false name=tt-rss project=default stateful=false used=2019-05-27T01:31:27+0200
t=2019-05-27T01:51:20+0200 lvl=info msg="Started container" action=start created=2019-05-27T01:27:21+0200 ephemeral=false name=tt-rss project=default stateful=false used=2019-05-27T01:31:27+0200
t=2019-05-27T01:51:20+0200 lvl=info msg="Container initiated stop" action=stop created=2019-05-27T01:13:40+0200 ephemeral=false name=chris project=default stateful=false used=2019-05-27T01:51:19+0200
t=2019-05-27T01:51:20+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:15:12+0200 ephemeral=false name=wekan project=default stateful=false used=2019-05-27T01:20:46+0200
t=2019-05-27T01:51:20+0200 lvl=info msg="Started container" action=start created=2019-05-27T01:15:12+0200 ephemeral=false name=wekan project=default stateful=false used=2019-05-27T01:20:46+0200
t=2019-05-27T01:51:20+0200 lvl=info msg="Container initiated stop" action=stop created=2019-05-27T01:12:58+0200 ephemeral=false name=pxe project=default stateful=false used=2019-05-27T01:51:20+0200
t=2019-05-27T01:53:08+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:13:40+0200 ephemeral=false name=chris project=default stateful=false used=2019-05-27T01:51:19+0200
t=2019-05-27T01:53:08+0200 lvl=eror msg="Failed starting container" action=start created=2019-05-27T01:13:40+0200 ephemeral=false name=chris project=default stateful=false used=2019-05-27T01:51:19+0200
t=2019-05-27T01:53:09+0200 lvl=info msg="Container initiated stop" action=stop created=2019-05-27T01:13:40+0200 ephemeral=false name=chris project=default stateful=false used=2019-05-27T01:53:08+0200
t=2019-05-27T01:53:38+0200 lvl=info msg="Starting container" action=start created=2019-05-27T01:12:58+0200 ephemeral=false name=pxe project=default stateful=false used=2019-05-27T01:51:20+0200
t=2019-05-27T01:53:38+0200 lvl=eror msg="Failed starting container" action=start created=2019-05-27T01:12:58+0200 ephemeral=false name=pxe project=default stateful=false used=2019-05-27T01:51:20+0200
t=2019-05-27T01:53:38+0200 lvl=info msg="Container initiated stop" action=stop created=2019-05-27T01:12:58+0200 ephemeral=false name=pxe project=default stateful=false used=2019-05-27T01:53:38+0200
lxc monitor while reproducing the issue)What's the kernel version on the source and destination system?
The fact that it's a mount error you're getting may suggest some odd kernel behavior on the target.
Output of dmesg on the non-working system may also be useful.
dmesg
destination is 5.1.4, source should be 5.1.4 too, or at the very least from the 5.1.x series at time of migration.
I guess it doesn't have much to do with the migration process itself since the containers worked fine until reboot, and afterwards I can't even launch(creation works but doesn't start) new ones.
I figured out that the two machines that now won't run were the only two machines that didn't have security.privileged set to true.
I can run the two problematic containers and launch new ones fine if I set them as privileged.
Ok, so your kernel may have some configuration in place which interferes with unprivileged containers?
I'm running the default Arch kernel, how would I check for such?
@monstermunchkin any idea what may be going on here?
I was told this may be caused by incorrect permissions on
/var/snap/lxd/common/lxd/storage-pools/default/
/var/snap/lxd/common/lxd/storage-pools/default/containers/
However those paths do not seem to be used directly in case of btrfs on a loop device, would would I access the loop device to look? It doesn't seem to be mounted
@monstermunchkin any idea what may be going on here?
No idea, I don't have Arch running. However, I know that the default Arch kernel works fine with unprivileged containers.
Can you do:
This should help track down any actual permission issue anywhere.
[0] % lxc launch images:alpine/edge test-unpriv -c security.privileged=false
Creating test-unpriv
Starting test-unpriv
Error: Failed to run: /snap/lxd/current/bin/lxd forkstart test-unpriv /var/snap/lxd/common/lxd/containers /var/snap/lxd/common/lxd/logs/test-unpriv/lxc.conf:
Try `lxc info --show-log local:test-unpriv` for more info
[0] % stat -c "%a" /var/snap/lxd/common/lxd/containers/test-unpriv/rootfs
stat -c "%a" /var/snap/lxd/common/lxd/containers/test-unpriv
stat -c "%a" /var/snap/lxd/common/lxd/containers
stat -c "%a" /var/snap/lxd/common/lxd/storage-pools/default/containers/test-unpriv
stat -c "%a" /var/snap/lxd/common/lxd/storage-pools/default/containers
stat -c "%a" /var/snap/lxd/common/lxd/storage-pools/default
stat -c "%a" /var/snap/lxd/common/lxd/storage-pools
stat -c "%a" /var/snap/lxd/common/lxd
stat -c "%a" /var/snap/lxd/common/lxc
stat -c "%a" /var/snap/lxd/common
stat -c "%a" /var/snap/lxd
stat -c "%a" /var/snap
stat -c "%a" /var
stat -c "%a" /
stat: cannot stat '/var/snap/lxd/common/lxd/containers/test-unpriv/rootfs': No such file or directory
777
711
stat: cannot stat '/var/snap/lxd/common/lxd/storage-pools/default/containers/test-unpriv': No such file or directory
stat: cannot stat '/var/snap/lxd/common/lxd/storage-pools/default/containers': No such file or directory
711
711
711
755
755
755
755
755
755
Ok, those look fine, though we're missing details on 3 of them, can you try:
root@Luxuria : ~
[0] # nsenter -t $(pgrep daemon.start) -m -- stat -c "%a" /var/snap/lxd/common/lxd/containers/test-unpriv
nsenter -t $(pgrep daemon.start) -m -- stat -c "%a" /var/snap/lxd/common/lxd/storage-pools/default/containers/test-unpriv
nsenter -t $(pgrep daemon.start) -m -- stat -c "%a" /var/snap/lxd/common/lxd/storage-pools/default/containers
nsenter -t $(pgrep daemon.start) -m -- stat -c "%a" /var/snap/lxd/common/lxd/storage-pools/default
777
711
700
755
Ah, that 700 is going to be a problem.
Can you do:
Then try starting test-unpriv again?
I'm not able to execute chmod nor ls via this method due to the binaries appearing to be missing
[130] # nsenter -t $(pgrep daemon.start) -m -- ls /var/snap/lxd/common/lxd/storage-pools/default/containers
nsenter: failed to execute ls: No such file or directory
[130] # nsenter -t $(pgrep daemon.start) -m -- which ls
[1] # nsenter -t $(pgrep daemon.start) -m -- which chmod
[1] # nsenter -t $(pgrep daemon.start) -m -- which stat
/usr/bin//stat
Oh, that's a bit odd.
Does:
nsenter -t $(pgrep daemon.start) -m -- /bin/chmod 711 /var/snap/lxd/common/lxd/storage-pools/default/containers
Work?
It does, and I can now start the container.
I got the idea because we had this happen at our company so it was suggested on our wiki, meaning the root cause also happens on Ubuntu with btrfs.
Ok, we'll have to do a bit of testing to see what's going on. That directory is supposed to get created with the appropriate permissions when the pool is added.
It seems unlikely that all btrfs installations hit this issue as we have plenty of automated testing that would then have failed, so it's likely a bit harder than that.
Can you provide lxc storage show default too?
[0] % lxc storage show default
config:
size: 300GB
source: /var/snap/lxd/common/lxd/disks/default.img
description: ""
name: default
driver: btrfs
used_by:
- /1.0/containers/ansible
- /1.0/containers/chris
- /1.0/containers/cloud
- /1.0/containers/cloudNO
- /1.0/containers/proxy
- /1.0/containers/tt-rss
- /1.0/containers/vlmcsd
- /1.0/containers/wekan
- /1.0/images/8ac7f224edf247ed398e03eedef3e4d564ac334dac12a8794b03556b70409909
- /1.0/images/d1ee0732edc89f83b2328691aacb3f41b808782ca6517554fae04ac5538c720e
- /1.0/images/fcd380108cb80ee6a2c9717bd40fb059476832bb76a4375cae202445711294c5
- /1.0/profiles/default
status: Created
locations:
- none
I managed to find partial lxd init that I used
Name of the storage backend to use (btrfs, ceph, dir, lvm) [default=btrfs]:
Create a new BTRFS pool? (yes/no) [default=yes]:
Would you like to use an existing block device? (yes/no) [default=no]:
Size in GB of the new loop device (1GB minimum) [default=100GB]: 300GB
Thanks, we'll take a look.
I didn't dig too much into this, but basically the main blocker at fixing this is that we don't have a reproducer?
At first glance that lxd init snippet looks fine. We use that pattern (btrfs backed by a loopback device) all the times in automated tests and when developing.
@freeekanayaka I wonder if it has to do with what we put on it which causes the creation of intermediate directories.
What I'd like to confirm is that after:
That we have /var/lib/lxd/storage-pools/blah containing all the needed folders (containers, containers-snapshots, images, custom and custom-snapshots) and all with the expected permissions.
If we're missing some, then they may get created with MkdirAll calls causing unexpected permissions.
It seems the sub-dirs are created and have the correct permissions.
@C0rn3j So looks like a straight lxd init doesn't lead us to such broken permissions, do you see a different behavior on Arch?
https://paste.ubuntu.com/p/9X3xHY8hMb/ is what @freeekanayaka reported seeing and looks correct
I've tried lxd init with complete defaults on a fresh Arch VM and I can't reproduce this.
After lxd init on my initial install everything seemed to work until I rebooted.
I believe I might have forced a reboot via sysrq REISUB while system was waiting for lxd to shut down, so that may have something to do with it.
By the way .../containers has 755 perms by default from what I can see on the fresh VM, not 711.
After doing a mv, the perms are set to 711.
EDIT: Was told by coworker that it triggers on lxc mv, which is how I got it to trigger before I guess.
EDIT2: Got it to repro on lxc mv
Found the issue. This triggers on moving a privileged container from a remote to local machine.
Additionally lxc mv changes permissions from 755 to 711 even on an unprivileged move, which is different from the perms that the folder is created with.
Repro steps:
HOST - we are going to be bugging out the perms on
CLIENT - we are going to be transferring a container from
1) lxd init on HOST with all default options
2) lxd init on CLIENT with all default options.
3) lxc config set core.https_address "[::]:8443" and lxd remote add the machines between each other
4) lxc launch images:alpine/edge test-priv -c security.privileged=true on CLIENT.
5) lxc mv CLIENT:test-priv test-priv on HOST to set perms on .../containers to 700
@freeekanayaka should be enough for you to look at what's going on.
We'd expect container creation/migration to change the permissions of the container's directory itself, no the containers folder (the parent directory of the container).
@C0rn3j thanks for the great breakdown. I believe I nailed it now.