nixos/README.md: relax the requirement of providing option defaults (#334509)
[NixPkgs.git] / nixos / modules / system / boot / stage-1-init.sh
blob23e9df2189e78cd1d80368999bec46a92720fc2f
1 #! @shell@
3 targetRoot=/mnt-root
4 console=tty1
5 verbose="@verbose@"
7 info() {
8 if [[ -n "$verbose" ]]; then
9 echo "$@"
13 extraUtils="@extraUtils@"
14 export LD_LIBRARY_PATH=@extraUtils@/lib
15 export PATH=@extraUtils@/bin
16 ln -s @extraUtils@/bin /bin
17 # hardcoded in util-linux's mount helper search path `/run/wrappers/bin:/run/current-system/sw/bin:/sbin`
18 ln -s @extraUtils@/bin /sbin
20 # Copy the secrets to their needed location
21 if [ -d "@extraUtils@/secrets" ]; then
22 for secret in $(cd "@extraUtils@/secrets"; find . -type f); do
23 mkdir -p $(dirname "/$secret")
24 ln -s "@extraUtils@/secrets/$secret" "$secret"
25 done
28 # Stop LVM complaining about fd3
29 export LVM_SUPPRESS_FD_WARNINGS=true
31 fail() {
32 if [ -n "$panicOnFail" ]; then exit 1; fi
34 @preFailCommands@
36 # If starting stage 2 failed, allow the user to repair the problem
37 # in an interactive shell.
38 cat <<EOF
40 An error occurred in stage 1 of the boot process, which must mount the
41 root filesystem on \`$targetRoot' and then start stage 2. Press one
42 of the following keys:
44 EOF
45 if [ -n "$allowShell" ]; then cat <<EOF
46 i) to launch an interactive shell
47 f) to start an interactive shell having pid 1 (needed if you want to
48 start stage 2's init manually)
49 EOF
51 cat <<EOF
52 r) to reboot immediately
53 *) to ignore the error and continue
54 EOF
56 read -n 1 reply
58 if [ -n "$allowShell" -a "$reply" = f ]; then
59 exec setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console"
60 elif [ -n "$allowShell" -a "$reply" = i ]; then
61 echo "Starting interactive shell..."
62 setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" || fail
63 elif [ "$reply" = r ]; then
64 echo "Rebooting..."
65 reboot -f
66 else
67 info "Continuing..."
71 trap 'fail' 0
74 # Print a greeting.
75 info
76 info "\e[1;32m<<< @distroName@ Stage 1 >>>\e[0m"
77 info
79 # Make several required directories.
80 mkdir -p /etc/udev
81 touch /etc/fstab # to shut up mount
82 ln -s /proc/mounts /etc/mtab # to shut up mke2fs
83 touch /etc/udev/hwdb.bin # to shut up udev
84 touch /etc/initrd-release
86 # Function for waiting for device(s) to appear.
87 waitDevice() {
88 local device="$1"
89 # Split device string using ':' as a delimiter, bcachefs uses
90 # this for multi-device filesystems, i.e. /dev/sda1:/dev/sda2:/dev/sda3
91 local IFS
93 # bcachefs is the only known use for this at the moment
94 # Preferably, the 'UUID=' syntax should be enforced, but
95 # this is kept for compatibility reasons
96 if [ "$fsType" = bcachefs ]; then IFS=':'; fi
98 # USB storage devices tend to appear with some delay. It would be
99 # great if we had a way to synchronously wait for them, but
100 # alas... So just wait for a few seconds for the device to
101 # appear.
102 for dev in $device; do
103 if test ! -e $dev; then
104 echo -n "waiting for device $dev to appear..."
105 try=20
106 while [ $try -gt 0 ]; do
107 sleep 1
108 # also re-try lvm activation now that new block devices might have appeared
109 lvm vgchange -ay
110 # and tell udev to create nodes for the new LVs
111 udevadm trigger --action=add
112 if test -e $dev; then break; fi
113 echo -n "."
114 try=$((try - 1))
115 done
116 echo
117 [ $try -ne 0 ]
119 done
122 # Create the mount point if required.
123 makeMountPoint() {
124 local device="$1"
125 local mountPoint="$2"
126 local options="$3"
128 local IFS=,
130 # If we're bind mounting a file, the mount point should also be a file.
131 if ! [ -d "$device" ]; then
132 for opt in $options; do
133 if [ "$opt" = bind ] || [ "$opt" = rbind ]; then
134 mkdir -p "$(dirname "/mnt-root$mountPoint")"
135 touch "/mnt-root$mountPoint"
136 return
138 done
141 mkdir -m 0755 -p "/mnt-root$mountPoint"
144 # Mount special file systems.
145 specialMount() {
146 local device="$1"
147 local mountPoint="$2"
148 local options="$3"
149 local fsType="$4"
151 mkdir -m 0755 -p "$mountPoint"
152 mount -n -t "$fsType" -o "$options" "$device" "$mountPoint"
154 source @earlyMountScript@
156 # Copy initrd secrets from /.initrd-secrets to their actual destinations
157 if [ -d "/.initrd-secrets" ]; then
159 # Secrets are named by their full destination pathname and stored
160 # under /.initrd-secrets/
162 for secret in $(cd "/.initrd-secrets"; find . -type f); do
163 mkdir -p $(dirname "/$secret")
164 cp "/.initrd-secrets/$secret" "$secret"
165 done
168 # Log the script output to /dev/kmsg or /run/log/stage-1-init.log.
169 mkdir -p /tmp
170 mkfifo /tmp/stage-1-init.log.fifo
171 logOutFd=8 && logErrFd=9
172 eval "exec $logOutFd>&1 $logErrFd>&2"
173 if test -w /dev/kmsg; then
174 tee -i < /tmp/stage-1-init.log.fifo /proc/self/fd/"$logOutFd" | while read -r line; do
175 if test -n "$line"; then
176 echo "<7>stage-1-init: [$(date)] $line" > /dev/kmsg
178 done &
179 else
180 mkdir -p /run/log
181 tee -i < /tmp/stage-1-init.log.fifo /run/log/stage-1-init.log &
183 exec > /tmp/stage-1-init.log.fifo 2>&1
186 # Process the kernel command line.
187 export stage2Init=/init
188 for o in $(cat /proc/cmdline); do
189 case $o in
190 console=*)
191 set -- $(IFS==; echo $o)
192 params=$2
193 set -- $(IFS=,; echo $params)
194 console=$1
196 init=*)
197 set -- $(IFS==; echo $o)
198 stage2Init=$2
200 boot.persistence=*)
201 set -- $(IFS==; echo $o)
202 persistence=$2
204 boot.persistence.opt=*)
205 set -- $(IFS==; echo $o)
206 persistence_opt=$2
208 boot.trace|debugtrace)
209 # Show each command.
210 set -x
212 boot.shell_on_fail)
213 allowShell=1
215 boot.debug1|debug1) # stop right away
216 allowShell=1
217 fail
219 boot.debug1devices) # stop after loading modules and creating device nodes
220 allowShell=1
221 debug1devices=1
223 boot.debug1mounts) # stop after mounting file systems
224 allowShell=1
225 debug1mounts=1
227 boot.panic_on_fail|stage1panic=1)
228 panicOnFail=1
230 root=*)
231 # If a root device is specified on the kernel command
232 # line, make it available through the symlink /dev/root.
233 # Recognise LABEL= and UUID= to support UNetbootin.
234 set -- $(IFS==; echo $o)
235 if [ $2 = "LABEL" ]; then
236 root="/dev/disk/by-label/$3"
237 elif [ $2 = "UUID" ]; then
238 root="/dev/disk/by-uuid/$3"
239 else
240 root=$2
242 ln -s "$root" /dev/root
244 copytoram)
245 copytoram=1
247 findiso=*)
248 # if an iso name is supplied, try to find the device where
249 # the iso resides on
250 set -- $(IFS==; echo $o)
251 isoPath=$2
253 esac
254 done
256 # Set hostid before modules are loaded.
257 # This is needed by the spl/zfs modules.
258 @setHostId@
260 # Load the required kernel modules.
261 echo @extraUtils@/bin/modprobe > /proc/sys/kernel/modprobe
262 for i in @kernelModules@; do
263 info "loading module $(basename $i)..."
264 modprobe $i
265 done
268 # Create device nodes in /dev.
269 @preDeviceCommands@
270 info "running udev..."
271 ln -sfn /proc/self/fd /dev/fd
272 ln -sfn /proc/self/fd/0 /dev/stdin
273 ln -sfn /proc/self/fd/1 /dev/stdout
274 ln -sfn /proc/self/fd/2 /dev/stderr
275 mkdir -p /etc/systemd
276 ln -sfn @linkUnits@ /etc/systemd/network
277 mkdir -p /etc/udev
278 ln -sfn @udevRules@ /etc/udev/rules.d
279 mkdir -p /dev/.mdadm
280 systemd-udevd --daemon
281 udevadm trigger --action=add
282 udevadm settle
285 # XXX: Use case usb->lvm will still fail, usb->luks->lvm is covered
286 @preLVMCommands@
288 info "starting device mapper and LVM..."
289 lvm vgchange -ay
291 if test -n "$debug1devices"; then fail; fi
294 @postDeviceCommands@
297 # Check the specified file system, if appropriate.
298 checkFS() {
299 local device="$1"
300 local fsType="$2"
302 # Only check block devices.
303 if [ ! -b "$device" ]; then return 0; fi
305 # Don't check ROM filesystems.
306 if [ "$fsType" = iso9660 -o "$fsType" = udf ]; then return 0; fi
308 # Don't check resilient COWs as they validate the fs structures at mount time
309 if [ "$fsType" = btrfs -o "$fsType" = zfs -o "$fsType" = bcachefs ]; then return 0; fi
311 # Skip fsck for apfs as the fsck utility does not support repairing the filesystem (no -a option)
312 if [ "$fsType" = apfs ]; then return 0; fi
314 # Skip fsck for nilfs2 - not needed by design and no fsck tool for this filesystem.
315 if [ "$fsType" = nilfs2 ]; then return 0; fi
317 # Skip fsck for inherently readonly filesystems.
318 if [ "$fsType" = squashfs ]; then return 0; fi
320 # Skip fsck.erofs because it is still experimental.
321 if [ "$fsType" = erofs ]; then return 0; fi
323 # If we couldn't figure out the FS type, then skip fsck.
324 if [ "$fsType" = auto ]; then
325 echo 'cannot check filesystem with type "auto"!'
326 return 0
329 # Device might be already mounted manually
330 # e.g. NBD-device or the host filesystem of the file which contains encrypted root fs
331 if mount | grep -q "^$device on "; then
332 echo "skip checking already mounted $device"
333 return 0
336 # Optionally, skip fsck on journaling filesystems. This option is
337 # a hack - it's mostly because e2fsck on ext3 takes much longer to
338 # recover the journal than the ext3 implementation in the kernel
339 # does (minutes versus seconds).
340 if test -z "@checkJournalingFS@" -a \
341 \( "$fsType" = ext3 -o "$fsType" = ext4 -o "$fsType" = reiserfs \
342 -o "$fsType" = xfs -o "$fsType" = jfs -o "$fsType" = f2fs \)
343 then
344 return 0
347 echo "checking $device..."
349 fsck -V -a "$device"
350 fsckResult=$?
352 if test $(($fsckResult | 2)) = $fsckResult; then
353 echo "fsck finished, rebooting..."
354 sleep 3
355 reboot -f
358 if test $(($fsckResult | 4)) = $fsckResult; then
359 echo "$device has unrepaired errors, please fix them manually."
360 fail
363 if test $fsckResult -ge 8; then
364 echo "fsck on $device failed."
365 fail
368 return 0
371 escapeFstab() {
372 local original="$1"
374 # Replace space
375 local escaped="${original// /\\040}"
376 # Replace tab
377 echo "${escaped//$'\t'/\\011}"
380 # Function for mounting a file system.
381 mountFS() {
382 local device="$1"
383 local mountPoint="$2"
384 local options="$3"
385 local fsType="$4"
387 if [ "$fsType" = auto ]; then
388 fsType=$(blkid -o value -s TYPE "$device")
389 if [ -z "$fsType" ]; then fsType=auto; fi
392 # Filter out x- options, which busybox doesn't do yet.
393 local optionsFiltered="$(IFS=,; for i in $options; do if [ "${i:0:2}" != "x-" ]; then echo -n $i,; fi; done)"
394 # Prefix (lower|upper|work)dir with /mnt-root (overlayfs)
395 local optionsPrefixed="$( echo "$optionsFiltered" | sed -E 's#\<(lowerdir|upperdir|workdir)=#\1=/mnt-root#g' )"
397 echo "$device /mnt-root$mountPoint $fsType $optionsPrefixed" >> /etc/fstab
399 checkFS "$device" "$fsType"
401 # Create backing directories for overlayfs
402 if [ "$fsType" = overlay ]; then
403 for i in upper work; do
404 dir="$( echo "$optionsPrefixed" | grep -o "${i}dir=[^,]*" )"
405 mkdir -m 0700 -p "${dir##*=}"
406 done
409 info "mounting $device on $mountPoint..."
411 makeMountPoint "$device" "$mountPoint" "$optionsPrefixed"
413 # For ZFS and CIFS mounts, retry a few times before giving up.
414 # We do this for ZFS as a workaround for issue NixOS/nixpkgs#25383.
415 local n=0
416 while true; do
417 mount "/mnt-root$mountPoint" && break
418 if [ \( "$fsType" != cifs -a "$fsType" != zfs \) -o "$n" -ge 10 ]; then fail; break; fi
419 echo "retrying..."
420 sleep 1
421 n=$((n + 1))
422 done
424 # For bind mounts, busybox has a tendency to ignore options, which can be a
425 # security issue (e.g. "nosuid"). Remounting the partition seems to fix the
426 # issue.
427 mount "/mnt-root$mountPoint" -o "remount,$optionsPrefixed"
429 [ "$mountPoint" == "/" ] &&
430 [ -f "/mnt-root/etc/NIXOS_LUSTRATE" ] &&
431 lustrateRoot "/mnt-root"
433 true
436 lustrateRoot () {
437 local root="$1"
439 echo
440 echo -e "\e[1;33m<<< @distroName@ is now lustrating the root filesystem (cruft goes to /old-root) >>>\e[0m"
441 echo
443 mkdir -m 0755 -p "$root/old-root.tmp"
445 echo
446 echo "Moving impurities out of the way:"
447 for d in "$root"/*
449 [ "$d" == "$root/nix" ] && continue
450 [ "$d" == "$root/boot" ] && continue # Don't render the system unbootable
451 [ "$d" == "$root/old-root.tmp" ] && continue
453 mv -v "$d" "$root/old-root.tmp"
454 done
456 # Use .tmp to make sure subsequent invocations don't clash
457 mv -v "$root/old-root.tmp" "$root/old-root"
459 mkdir -m 0755 -p "$root/etc"
460 touch "$root/etc/NIXOS"
462 exec 4< "$root/old-root/etc/NIXOS_LUSTRATE"
464 echo
465 echo "Restoring selected impurities:"
466 while read -u 4 keeper; do
467 dirname="$(dirname "$keeper")"
468 mkdir -m 0755 -p "$root/$dirname"
469 cp -av "$root/old-root/$keeper" "$root/$keeper"
470 done
472 exec 4>&-
477 if test -e /sys/power/resume -a -e /sys/power/disk; then
478 if test -n "@resumeDevice@" && waitDevice "@resumeDevice@"; then
479 resumeDev="@resumeDevice@"
480 resumeInfo="$(udevadm info -q property "$resumeDev" )"
481 else
482 for sd in @resumeDevices@; do
483 # Try to detect resume device. According to Ubuntu bug:
484 # https://bugs.launchpad.net/ubuntu/+source/pm-utils/+bug/923326/comments/1
485 # when there are multiple swap devices, we can't know where the hibernate
486 # image will reside. We can check all of them for swsuspend blkid.
487 if waitDevice "$sd"; then
488 resumeInfo="$(udevadm info -q property "$sd")"
489 if [ "$(echo "$resumeInfo" | sed -n 's/^ID_FS_TYPE=//p')" = "swsuspend" ]; then
490 resumeDev="$sd"
491 break
494 done
496 if test -n "$resumeDev"; then
497 resumeMajor="$(echo "$resumeInfo" | sed -n 's/^MAJOR=//p')"
498 resumeMinor="$(echo "$resumeInfo" | sed -n 's/^MINOR=//p')"
499 echo "$resumeMajor:$resumeMinor" > /sys/power/resume 2> /dev/null || echo "failed to resume..."
503 @postResumeCommands@
505 # If we have a path to an iso file, find the iso and link it to /dev/root
506 if [ -n "$isoPath" ]; then
507 mkdir -p /findiso
509 for delay in 5 10; do
510 blkid | while read -r line; do
511 device=$(echo "$line" | sed 's/:.*//')
512 type=$(echo "$line" | sed 's/.*TYPE="\([^"]*\)".*/\1/')
514 mount -t "$type" "$device" /findiso
515 if [ -e "/findiso$isoPath" ]; then
516 ln -sf "/findiso$isoPath" /dev/root
517 break 2
518 else
519 umount /findiso
521 done
523 sleep "$delay"
524 done
527 # Try to find and mount the root device.
528 mkdir -p $targetRoot
530 exec 3< @fsInfo@
532 while read -u 3 mountPoint; do
533 read -u 3 device
534 read -u 3 fsType
535 read -u 3 options
537 # !!! Really quick hack to support bind mounts, i.e., where the
538 # "device" should be taken relative to /mnt-root, not /. Assume
539 # that every device that starts with / but doesn't start with /dev
540 # is a bind mount.
541 pseudoDevice=
542 case $device in
543 /dev/*)
545 //*)
546 # Don't touch SMB/CIFS paths.
547 pseudoDevice=1
550 device=/mnt-root$device
553 # Not an absolute path; assume that it's a pseudo-device
554 # like an NFS path (e.g. "server:/path").
555 pseudoDevice=1
557 esac
559 if test -z "$pseudoDevice" && ! waitDevice "$device"; then
560 # If it doesn't appear, try to mount it anyway (and
561 # probably fail). This is a fallback for non-device "devices"
562 # that we don't properly recognise.
563 echo "Timed out waiting for device $device, trying to mount anyway."
566 # Wait once more for the udev queue to empty, just in case it's
567 # doing something with $device right now.
568 udevadm settle
570 # If copytoram is enabled: skip mounting the ISO and copy its content to a tmpfs.
571 if [ -n "$copytoram" ] && [ "$device" = /dev/root ] && [ "$mountPoint" = /iso ]; then
572 fsType=$(blkid -o value -s TYPE "$device")
573 fsSize=$(blockdev --getsize64 "$device" || stat -Lc '%s' "$device")
575 mkdir -p /tmp-iso
576 mount -t "$fsType" /dev/root /tmp-iso
577 mountFS tmpfs /iso size="$fsSize" tmpfs
579 echo "copying ISO contents to RAM..."
580 cp -r /tmp-iso/* /mnt-root/iso/
582 umount /tmp-iso
583 rmdir /tmp-iso
584 if [ -n "$isoPath" ] && [ $fsType = "iso9660" ] && mountpoint -q /findiso; then
585 umount /findiso
587 continue
590 if [ "$mountPoint" = / ] && [ "$device" = tmpfs ] && [ ! -z "$persistence" ]; then
591 echo persistence...
592 waitDevice "$persistence"
593 echo enabling persistence...
594 mountFS "$persistence" "$mountPoint" "$persistence_opt" "auto"
595 continue
598 mountFS "$device" "$(escapeFstab "$mountPoint")" "$(escapeFstab "$options")" "$fsType"
599 done
601 exec 3>&-
604 @postMountCommands@
607 # Emit a udev rule for /dev/root to prevent systemd from complaining.
608 if [ -e /mnt-root/iso ]; then
609 eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=/mnt-root/iso)
610 else
611 eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=$targetRoot)
613 if [ "$ROOT_MAJOR" -a "$ROOT_MINOR" -a "$ROOT_MAJOR" != 0 ]; then
614 mkdir -p /run/udev/rules.d
615 echo 'ACTION=="add|change", SUBSYSTEM=="block", ENV{MAJOR}=="'$ROOT_MAJOR'", ENV{MINOR}=="'$ROOT_MINOR'", SYMLINK+="root"' > /run/udev/rules.d/61-dev-root-link.rules
619 # Stop udevd.
620 udevadm control --exit
622 # Reset the logging file descriptors.
623 # Do this just before pkill, which will kill the tee process.
624 exec 1>&$logOutFd 2>&$logErrFd
625 eval "exec $logOutFd>&- $logErrFd>&-"
627 # Kill any remaining processes, just to be sure we're not taking any
628 # with us into stage 2. But keep storage daemons like unionfs-fuse.
630 # Storage daemons are distinguished by an @ in front of their command line:
631 # https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/
632 for pid in $(pgrep -v -f '^@'); do
633 # Make sure we don't kill kernel processes, see #15226 and:
634 # http://stackoverflow.com/questions/12213445/identifying-kernel-threads
635 readlink "/proc/$pid/exe" &> /dev/null || continue
636 # Try to avoid killing ourselves.
637 [ $pid -eq $$ ] && continue
638 kill -9 "$pid"
639 done
641 if test -n "$debug1mounts"; then fail; fi
644 # Restore /proc/sys/kernel/modprobe to its original value.
645 echo /sbin/modprobe > /proc/sys/kernel/modprobe
648 # Start stage 2. `switch_root' deletes all files in the ramfs on the
649 # current root. The path has to be valid in the chroot not outside.
650 if [ ! -e "$targetRoot/$stage2Init" ]; then
651 stage2Check=${stage2Init}
652 while [ "$stage2Check" != "${stage2Check%/*}" ] && [ ! -L "$targetRoot/$stage2Check" ]; do
653 stage2Check=${stage2Check%/*}
654 done
655 if [ ! -L "$targetRoot/$stage2Check" ]; then
656 echo "stage 2 init script ($targetRoot/$stage2Init) not found"
657 fail
661 mkdir -m 0755 -p $targetRoot/proc $targetRoot/sys $targetRoot/dev $targetRoot/run
663 mount --move /proc $targetRoot/proc
664 mount --move /sys $targetRoot/sys
665 mount --move /dev $targetRoot/dev
666 mount --move /run $targetRoot/run
668 exec env -i $(type -P switch_root) "$targetRoot" "$stage2Init"
670 fail # should never be reached