ctdb-server: Use find_public_ip_vnn() in a couple of extra places
[samba4-gss.git] / ctdb / tests / scripts / integration.bash
blob65e974e4e366c8e51dd1397cb0f11350d03feaed
1 # Hey Emacs, this is a -*- shell-script -*- !!! :-)
3 . "${TEST_SCRIPTS_DIR}/common.sh"
5 ######################################################################
7 export CTDB_TIMEOUT=60
9 if [ -n "$CTDB_TEST_REMOTE_DIR" ] ; then
10 CTDB_TEST_WRAPPER="${CTDB_TEST_REMOTE_DIR}/test_wrap"
11 else
12 _d=$(cd "$TEST_SCRIPTS_DIR" && echo "$PWD")
13 CTDB_TEST_WRAPPER="$_d/test_wrap"
15 export CTDB_TEST_WRAPPER
17 # If $VALGRIND is set then use it whenever ctdb is called, but only if
18 # $CTDB is not already set.
19 [ -n "$CTDB" ] || export CTDB="${VALGRIND}${VALGRIND:+ }ctdb"
21 # why???
22 PATH="${TEST_SCRIPTS_DIR}:${PATH}"
24 ######################################################################
26 ctdb_test_on_cluster ()
28 [ -z "$CTDB_TEST_LOCAL_DAEMONS" ]
31 ctdb_test_exit ()
33 local status=$?
35 trap - 0
37 # run_tests.sh pipes stdout into tee. If the tee process is
38 # killed then any attempt to write to stdout (e.g. echo) will
39 # result in SIGPIPE, terminating the caller. Ignore SIGPIPE to
40 # ensure that all clean-up is run.
41 trap '' PIPE
43 # Avoid making a test fail from this point onwards. The test is
44 # now complete.
45 set +e
47 echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
49 eval "$ctdb_test_exit_hook" || true
50 unset ctdb_test_exit_hook
52 echo "Stopping cluster..."
53 ctdb_nodes_stop || ctdb_test_error "Cluster shutdown failed"
55 exit $status
58 ctdb_test_exit_hook_add ()
60 ctdb_test_exit_hook="${ctdb_test_exit_hook}${ctdb_test_exit_hook:+ ; }$*"
63 # Setting cleanup_pid to <pid>@<node> will cause <pid> to be killed on
64 # <node> when the test completes. To cancel, just unset cleanup_pid.
65 ctdb_test_cleanup_pid=""
66 ctdb_test_cleanup_pid_exit_hook ()
68 if [ -n "$ctdb_test_cleanup_pid" ] ; then
69 local pid="${ctdb_test_cleanup_pid%@*}"
70 local node="${ctdb_test_cleanup_pid#*@}"
72 try_command_on_node "$node" "kill ${pid}"
76 ctdb_test_exit_hook_add ctdb_test_cleanup_pid_exit_hook
78 ctdb_test_cleanup_pid_set ()
80 local node="$1"
81 local pid="$2"
83 ctdb_test_cleanup_pid="${pid}@${node}"
86 ctdb_test_cleanup_pid_clear ()
88 ctdb_test_cleanup_pid=""
91 # -n option means do not configure/start cluster
92 ctdb_test_init ()
94 trap "ctdb_test_exit" 0
96 ctdb_nodes_stop >/dev/null 2>&1 || true
98 if [ "$1" != "-n" ] ; then
99 echo "Configuring cluster..."
100 setup_ctdb || ctdb_test_error "Cluster configuration failed"
102 echo "Starting cluster..."
103 ctdb_init || ctdb_test_error "Cluster startup failed"
106 echo "*** SETUP COMPLETE AT $(date '+%F %T'), RUNNING TEST..."
109 ctdb_nodes_start_custom ()
111 if ctdb_test_on_cluster ; then
112 ctdb_test_error "ctdb_nodes_start_custom() on real cluster"
115 ctdb_nodes_stop >/dev/null 2>&1 || true
117 echo "Configuring cluster..."
118 setup_ctdb "$@" || ctdb_test_error "Cluster configuration failed"
120 echo "Starting cluster..."
121 ctdb_init || ctdb_test_fail "Cluster startup failed"
124 ctdb_test_skip_on_cluster ()
126 if ctdb_test_on_cluster ; then
127 ctdb_test_skip \
128 "SKIPPING this test - only runs against local daemons"
133 ctdb_nodes_restart ()
135 ctdb_nodes_stop "$@"
136 ctdb_nodes_start "$@"
139 ########################################
141 # Sets: $out, $outfile
142 # * The first 1KB of output is put into $out
143 # * Tests should use $outfile for handling large output
144 # * $outfile is removed after each test
145 out=""
146 outfile="${CTDB_TEST_TMP_DIR}/try_command_on_node.out"
148 outfile_cleanup ()
150 rm -f "$outfile"
153 ctdb_test_exit_hook_add outfile_cleanup
155 try_command_on_node ()
157 local nodespec="$1" ; shift
159 local verbose=false
160 local onnode_opts=""
162 while [ "${nodespec#-}" != "$nodespec" ] ; do
163 if [ "$nodespec" = "-v" ] ; then
164 verbose=true
165 else
166 onnode_opts="${onnode_opts}${onnode_opts:+ }${nodespec}"
168 nodespec="$1" ; shift
169 done
171 local cmd="$*"
173 local status=0
174 # Intentionally unquoted - might be empty
175 # shellcheck disable=SC2086
176 onnode -q $onnode_opts "$nodespec" "$cmd" >"$outfile" 2>&1 || status=$?
177 out=$(dd if="$outfile" bs=1k count=1 2>/dev/null)
179 if [ $status -ne 0 ] ; then
180 echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
181 cat "$outfile"
182 return $status
185 if $verbose ; then
186 echo "Output of \"$cmd\":"
187 cat "$outfile" || true
191 _run_onnode ()
193 local thing="$1"
194 shift
196 local options nodespec
198 while : ; do
199 case "$1" in
201 options="${options}${options:+ }${1}"
202 shift
205 nodespec="$1"
206 shift
207 break
208 esac
209 done
211 # shellcheck disable=SC2086
212 # $options can be multi-word
213 try_command_on_node $options "$nodespec" "${thing} $*"
216 ctdb_onnode ()
218 _run_onnode "$CTDB" "$@"
221 testprog_onnode ()
223 _run_onnode "${CTDB_TEST_WRAPPER} ${VALGRIND}" "$@"
226 function_onnode ()
228 _run_onnode "${CTDB_TEST_WRAPPER}" "$@"
231 sanity_check_output ()
233 local min_lines="$1"
234 local regexp="$2" # Should be anchored as necessary.
236 local ret=0
238 local num_lines
239 num_lines=$(wc -l <"$outfile" | tr -d '[:space:]')
240 echo "There are $num_lines lines of output"
241 if [ "$num_lines" -lt "$min_lines" ] ; then
242 ctdb_test_fail "BAD: that's less than the required number (${min_lines})"
245 local status=0
246 local unexpected # local doesn't pass through status of command on RHS.
247 unexpected=$(grep -Ev "$regexp" "$outfile") || status=$?
249 # Note that this is reversed.
250 if [ $status -eq 0 ] ; then
251 echo "BAD: unexpected lines in output:"
252 echo "$unexpected" | cat -A
253 ret=1
254 else
255 echo "Output lines look OK"
258 return $ret
261 select_test_node ()
263 try_command_on_node any ctdb pnn || return 1
265 test_node="$out"
266 echo "Selected node ${test_node}"
269 # This returns a list of "ip node" lines in $outfile
270 all_ips_on_node()
272 local node="$1"
273 try_command_on_node "$node" \
274 "$CTDB ip -X | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
277 _select_test_node_and_ips ()
279 try_command_on_node any \
280 "$CTDB ip -X all | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
282 test_node="" # this matches no PNN
283 test_node_ips=""
284 local ip pnn
285 while read -r ip pnn ; do
286 if [ -z "$test_node" ] && [ "$pnn" != "-1" ] ; then
287 test_node="$pnn"
289 if [ "$pnn" = "$test_node" ] ; then
290 test_node_ips="${test_node_ips}${test_node_ips:+ }${ip}"
292 done <"$outfile"
294 echo "Selected node ${test_node} with IPs: ${test_node_ips}."
295 test_ip="${test_node_ips%% *}"
297 # test_prefix used by caller
298 # shellcheck disable=SC2034
299 case "$test_ip" in
300 *:*) test_prefix="${test_ip}/128" ;;
301 *) test_prefix="${test_ip}/32" ;;
302 esac
304 [ -n "$test_node" ] || return 1
307 select_test_node_and_ips ()
309 local timeout=10
310 while ! _select_test_node_and_ips ; do
311 echo "Unable to find a test node with IPs assigned"
312 if [ $timeout -le 0 ] ; then
313 ctdb_test_error "BAD: Too many attempts"
314 return 1
316 sleep_for 1
317 timeout=$((timeout - 1))
318 done
320 return 0
323 # Sets: mask, iface
324 get_test_ip_mask_and_iface ()
326 # Find the interface
327 ctdb_onnode "$test_node" "ip -v -X"
328 iface=$(awk -F'|' -v ip="$test_ip" '$2 == ip { print $4 }' "$outfile")
330 if ctdb_test_on_cluster ; then
331 # Find the netmask
332 try_command_on_node "$test_node" ip addr show to "$test_ip"
333 mask="${out##*/}"
334 mask="${mask%% *}"
335 else
336 mask="24"
339 echo "$test_ip/$mask is on $iface"
342 ctdb_get_all_pnns ()
344 try_command_on_node -q all "$CTDB pnn"
345 all_pnns="$out"
348 # The subtlety is that "ctdb delip" will fail if the IP address isn't
349 # configured on a node...
350 delete_ip_from_all_nodes ()
352 _ip="$1"
354 ctdb_get_all_pnns
356 _nodes=""
358 for _pnn in $all_pnns ; do
359 all_ips_on_node "$_pnn"
360 while read -r _i _ ; do
361 if [ "$_ip" = "$_i" ] ; then
362 _nodes="${_nodes}${_nodes:+,}${_pnn}"
364 done <"$outfile"
365 done
367 try_command_on_node -pq "$_nodes" "$CTDB delip $_ip"
370 #######################################
372 sleep_for ()
374 echo -n "=${1}|"
375 for i in $(seq 1 "$1") ; do
376 echo -n '.'
377 sleep 1
378 done
379 echo '|'
382 _cluster_is_healthy ()
384 $CTDB nodestatus all >/dev/null
387 _cluster_is_recovered ()
389 node_has_status 0 recovered
392 _cluster_is_ready ()
394 _cluster_is_healthy && _cluster_is_recovered
397 cluster_is_healthy ()
399 if onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_healthy ; then
400 echo "Cluster is HEALTHY"
401 if ! onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_recovered ; then
402 echo "WARNING: cluster in recovery mode!"
404 return 0
407 echo "Cluster is UNHEALTHY"
409 echo "DEBUG AT $(date '+%F %T'):"
410 local i
411 for i in "onnode -q 0 $CTDB status" \
412 "onnode -q 0 onnode all $CTDB scriptstatus" ; do
413 echo "$i"
414 $i || true
415 done
417 return 1
420 wait_until_ready ()
422 local timeout="${1:-120}"
424 echo "Waiting for cluster to become ready..."
426 wait_until "$timeout" onnode -q any "$CTDB_TEST_WRAPPER" _cluster_is_ready
429 # This function is becoming nicely overloaded. Soon it will collapse! :-)
430 node_has_status ()
432 local pnn="$1"
433 local status="$2"
435 case "$status" in
436 recovered)
437 ! $CTDB status -n "$pnn" | \
438 grep -Eq '^Recovery mode:RECOVERY \(1\)$'
439 return
441 notlmaster)
442 ! $CTDB status | grep -Eq "^hash:.* lmaster:${pnn}\$"
443 return
445 esac
447 local bits
448 case "$status" in
449 unhealthy) bits="?|?|?|?|1|*" ;;
450 healthy) bits="?|?|?|?|0|*" ;;
451 disconnected) bits="1|*" ;;
452 connected) bits="0|*" ;;
453 banned) bits="?|?|1|*" ;;
454 unbanned) bits="?|?|0|*" ;;
455 disabled) bits="?|?|?|1|*" ;;
456 enabled) bits="?|?|?|0|*" ;;
457 stopped) bits="?|?|?|?|?|1|*" ;;
458 notstopped) bits="?|?|?|?|?|0|*" ;;
460 echo "node_has_status: unknown status \"$status\""
461 return 1
462 esac
463 local out _ line
465 out=$($CTDB -X status 2>&1) || return 1
468 read -r _
469 while read -r line ; do
470 # This needs to be done in 2 steps to
471 # avoid false matches.
472 local line_bits="${line#|"${pnn}"|*|}"
473 [ "$line_bits" = "$line" ] && continue
474 # shellcheck disable=SC2295
475 # This depends on $bits being a pattern
476 [ "${line_bits#${bits}}" != "$line_bits" ] && \
477 return 0
478 done
479 return 1
480 } <<<"$out" # Yay bash!
483 wait_until_node_has_status ()
485 local pnn="$1"
486 local status="$2"
487 local timeout="${3:-30}"
488 local proxy_pnn="${4:-any}"
490 echo "Waiting until node $pnn has status \"$status\"..."
492 if ! wait_until "$timeout" onnode "$proxy_pnn" \
493 "$CTDB_TEST_WRAPPER" node_has_status "$pnn" "$status" ; then
495 for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
496 echo "$i"
497 $i || true
498 done
500 return 1
505 # Useful for superficially testing IP failover.
506 # IPs must be on the given node.
507 # If the first argument is '!' then the IPs must not be on the given node.
508 ips_are_on_node ()
510 local negating=false
511 if [ "$1" = "!" ] ; then
512 negating=true ; shift
514 local node="$1" ; shift
515 local ips="$*"
517 local out
519 all_ips_on_node "$node"
521 local check
522 for check in $ips ; do
523 local ip pnn
524 while read -r ip pnn ; do
525 if [ "$check" = "$ip" ] ; then
526 if [ "$pnn" = "$node" ] ; then
527 if $negating ; then return 1 ; fi
528 else
529 if ! $negating ; then return 1 ; fi
531 ips="${ips/${ip}}" # Remove from list
532 break
534 # If we're negating and we didn't see the address then it
535 # isn't hosted by anyone!
536 if $negating ; then
537 ips="${ips/${check}}"
539 done <"$outfile"
540 done
542 ips="${ips// }" # Remove any spaces.
543 [ -z "$ips" ]
546 wait_until_ips_are_on_node ()
548 # Go to some trouble to print a use description of what is happening
549 local not=""
550 if [ "$1" == "!" ] ; then
551 not="no longer "
553 local node=""
554 local ips=""
555 local i
556 for i ; do
557 [ "$i" != "!" ] || continue
558 if [ -z "$node" ] ; then
559 node="$i"
560 continue
562 ips="${ips}${ips:+, }${i}"
563 done
564 echo "Waiting for ${ips} to ${not}be assigned to node ${node}"
566 wait_until 60 ips_are_on_node "$@"
569 node_has_some_ips ()
571 local node="$1"
573 local out
575 all_ips_on_node "$node"
577 while read -r ip pnn ; do
578 if [ "$node" = "$pnn" ] ; then
579 return 0
581 done <"$outfile"
583 return 1
586 wait_until_node_has_some_ips ()
588 echo "Waiting for some IPs to be assigned to node ${test_node}"
590 wait_until 60 node_has_some_ips "$@"
593 wait_until_node_has_no_ips ()
595 echo "Waiting until no IPs are assigned to node ${test_node}"
597 wait_until 60 ! node_has_some_ips "$@"
600 #######################################
602 ctdb_init ()
604 if ! ctdb_nodes_start ; then
605 echo "Cluster start failed"
606 return 1
609 if ! wait_until_ready 120 ; then
610 echo "Cluster didn't become ready"
611 return 1
614 echo "Setting RerecoveryTimeout to 1"
615 onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
617 echo "Forcing a recovery..."
618 onnode -q 0 "$CTDB recover"
619 sleep_for 2
621 if ! onnode -q all "$CTDB_TEST_WRAPPER _cluster_is_recovered" ; then
622 echo "Cluster has gone into recovery again, waiting..."
623 if ! wait_until 30/2 onnode -q all \
624 "$CTDB_TEST_WRAPPER _cluster_is_recovered" ; then
625 echo "Cluster did not come out of recovery"
626 return 1
630 if ! onnode 0 "$CTDB_TEST_WRAPPER _cluster_is_healthy" ; then
631 echo "Cluster became UNHEALTHY again [$(date)]"
632 return 1
635 echo "Doing a sync..."
636 onnode -q 0 "$CTDB sync"
638 echo "ctdb is ready"
639 return 0
642 ctdb_base_show ()
644 echo "${CTDB_BASE:-${CTDB_SCRIPTS_BASE}}"
647 #######################################
649 # sets: leader
650 _leader_get ()
652 local node="$1"
654 ctdb_onnode "$node" leader
655 # shellcheck disable=SC2154
656 # $out set by ctdb_onnode() above
657 leader="$out"
660 leader_get ()
662 local node="$1"
664 echo "Get leader"
665 _leader_get "$node"
666 echo "Leader is ${leader}"
667 echo
670 _leader_has_changed ()
672 local node="$1"
673 local leader_old="$2"
675 _leader_get "$node"
677 [ "$leader" != "$leader_old" ]
680 # uses: leader
681 wait_until_leader_has_changed ()
683 local node="$1"
685 echo
686 echo "Wait until leader changes..."
687 wait_until 30 _leader_has_changed "$node" "$leader"
688 echo "Leader changed to ${leader}"
691 #######################################
693 # sets: generation
694 _generation_get ()
696 local node="$1"
698 ctdb_onnode "$node" status
699 # shellcheck disable=SC2154
700 # $outfile set by ctdb_onnode() above
701 generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
704 generation_get ()
706 local node="$1"
708 echo "Get generation"
709 _generation_get "$node"
710 echo "Generation is ${generation}"
711 echo
714 _generation_has_changed ()
716 local node="$1"
717 local generation_old="$2"
719 _generation_get "$node"
721 [ "$generation" != "$generation_old" ]
724 # uses: generation
725 wait_until_generation_has_changed ()
727 local node="$1"
729 echo "Wait until generation changes..."
730 wait_until 30 _generation_has_changed "$node" "$generation"
731 echo "Generation changed to ${generation}"
732 echo
735 #######################################
737 wait_for_monitor_event ()
739 local pnn="$1"
740 local timeout=120
742 echo "Waiting for a monitor event on node ${pnn}..."
744 ctdb_onnode "$pnn" scriptstatus || {
745 echo "Unable to get scriptstatus from node $pnn"
746 return 1
749 mv "$outfile" "${outfile}.orig"
751 wait_until 120 _ctdb_scriptstatus_changed
754 _ctdb_scriptstatus_changed ()
756 ctdb_onnode "$pnn" scriptstatus || {
757 echo "Unable to get scriptstatus from node $pnn"
758 return 1
761 ! diff "$outfile" "${outfile}.orig" >/dev/null
764 #######################################
766 # If the given IP is hosted then print 2 items: maskbits and iface
767 ip_maskbits_iface ()
769 _addr="$1"
771 case "$_addr" in
772 *:*) _family="inet6" ; _bits=128 ;;
773 *) _family="inet" ; _bits=32 ;;
774 esac
776 # Literal backslashes in awk script
777 # shellcheck disable=SC1004
778 ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
779 awk -v family="${_family}" \
780 'NR == 1 { iface = $2; sub(":$", "", iface) } \
781 $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
782 print mask, iface, family }'
785 drop_ip ()
787 _addr="${1%/*}" # Remove optional maskbits
789 # Intentional word splitting
790 # shellcheck disable=SC2046,SC2086
791 set -- $(ip_maskbits_iface $_addr)
792 if [ -n "$1" ] ; then
793 _maskbits="$1"
794 _iface="$2"
795 echo "Removing public address $_addr/$_maskbits from device $_iface"
796 ip addr del "$_ip/$_maskbits" dev "$_iface" >/dev/null 2>&1 || true
800 drop_ips ()
802 for _ip ; do
803 drop_ip "$_ip"
804 done
807 #######################################
809 # $1: pnn, $2: DB name
810 db_get_path ()
812 ctdb_onnode -v "$1" "getdbstatus $2" | sed -n -e "s@^path: @@p"
815 # $1: pnn, $2: DB name
816 db_ctdb_cattdb_count_records ()
818 # Count the number of keys, excluding any that begin with '_'.
819 # This excludes at least the sequence number record in
820 # persistent/replicated databases. The trailing "|| :" forces
821 # the command to succeed when no records are matched.
822 ctdb_onnode "$1" "cattdb $2 | grep -c '^key([0-9][0-9]*) = \"[^_]' || :"
823 echo "$out"
826 # $1: pnn, $2: DB name, $3: key string, $4: value string, $5: RSN (default 7)
827 db_ctdb_tstore ()
829 _tdb=$(db_get_path "$1" "$2")
830 _rsn="${5:-7}"
831 ctdb_onnode "$1" tstore "$_tdb" "$3" "$4" "$_rsn"
834 # $1: pnn, $2: DB name, $3: dbseqnum (must be < 255!!!!!)
835 db_ctdb_tstore_dbseqnum ()
837 # "__db_sequence_number__" + trailing 0x00
838 _key='0x5f5f64625f73657175656e63655f6e756d6265725f5f00'
840 # Construct 8 byte (unit64_t) database sequence number. This
841 # probably breaks if $3 > 255
842 _value=$(printf "0x%02x%014x" "$3" 0)
844 db_ctdb_tstore "$1" "$2" "$_key" "$_value"
847 ########################################
849 # Make sure that $CTDB is set.
850 if [ -z "$CTDB" ] ; then
851 CTDB="ctdb"
854 if ctdb_test_on_cluster ; then
855 . "${TEST_SCRIPTS_DIR}/integration_real_cluster.bash"
856 else
857 . "${TEST_SCRIPTS_DIR}/integration_local_daemons.bash"
861 local="${CTDB_TEST_SUITE_DIR}/scripts/local.bash"
862 if [ -r "$local" ] ; then
863 . "$local"