From fbc780be4543aa44ca41542de4fb82c43f579513 Mon Sep 17 00:00:00 2001 From: Sergio Cazzolato Date: Wed, 4 Feb 2026 11:13:21 -0300 Subject: [PATCH 1/3] Improve how openstack backend waits for a server to boot This change is an improvemente to the boot check that openstack backend does. . First, it is getting rid of the ssh check which is not needed, having now the same logic than google backend. . It is added support for new systems which use /etc/ssh/sshd_config.d/ intead of /etc/ssh/sshd_config . It is added support for other architectures (this has been tested in amr/ppc64el/s390x) and all the different linux systems used by snapd. --- spread/openstack.go | 52 ++++++++------------------------------------- 1 file changed, 9 insertions(+), 43 deletions(-) diff --git a/spread/openstack.go b/spread/openstack.go index 97c17fca..0f9c1603 100644 --- a/spread/openstack.go +++ b/spread/openstack.go @@ -18,7 +18,6 @@ import ( "github.com/go-goose/goose/v5/neutron" "github.com/go-goose/goose/v5/nova" - "golang.org/x/crypto/ssh" "golang.org/x/net/context" ) @@ -154,15 +153,22 @@ func (s *openstackServer) Discard(ctx context.Context) error { return s.p.removeMachine(ctx, s) } +// Send the ready marker to ttyS0, the standard serial port for x86 / x86_64 architectures. +// Send the ready marker to ttyAMA0, the primary UART/serial port used in ARM / ARM64 (PL011). +// Fallback to /dev/console to ensure visibility on the system's primary output regardless of architecture. const openstackCloudInitScript = ` #cloud-config runcmd: - echo root:%s | chpasswd - sed -i 's/^\s*#\?\s*\(PermitRootLogin\|PasswordAuthentication\)\>.*/\1 yes/' /etc/ssh/sshd_config + - sed -i 's/^PermitRootLogin=/#PermitRootLogin=/g' /etc/ssh/sshd_config.d/* || true + - sed -i 's/^PasswordAuthentication=/#PasswordAuthentication=/g' /etc/ssh/sshd_config.d/* || true - test -d /etc/ssh/sshd_config.d && echo 'PermitRootLogin=yes' > /etc/ssh/sshd_config.d/00-spread.conf - test -d /etc/ssh/sshd_config.d && echo 'PasswordAuthentication=yes' >> /etc/ssh/sshd_config.d/00-spread.conf - pkill -o -HUP sshd || true - - echo '` + openstackReadyMarker + `' > /dev/ttyS0 + - test -c /dev/ttyS0 && echo '` + openstackReadyMarker + `' 1>/dev/ttyS0 2>/dev/null || true + - test -c /dev/ttyAMA0 && echo '` + openstackReadyMarker + `' 1>/dev/ttyAMA0 2>/dev/null || true + - test -c /dev/console && echo '` + openstackReadyMarker + `' 1>/dev/console 2>/dev/null || true ` const openstackReadyMarker = "MACHINE-IS-READY" @@ -386,43 +392,9 @@ func (p *openstackProvider) waitProvision(ctx context.Context, s *openstackServe panic("unreachable") } -var openstackServerBootTimeout = 2 * time.Minute +var openstackServerBootTimeout = 5 * time.Minute var openstackServerBootRetry = 5 * time.Second -func (p *openstackProvider) waitServerBootSSH(ctx context.Context, s *openstackServer) error { - config := &ssh.ClientConfig{ - User: "root", - Auth: []ssh.AuthMethod{ssh.Password(p.options.Password)}, - Timeout: 10 * time.Second, - HostKeyCallback: ssh.InsecureIgnoreHostKey(), - } - addr := s.address - if !strings.Contains(addr, ":") { - addr += ":22" - } - - // Iterate until the ssh connection to the host can be established - // In openstack the client cannot access to the serial console of the instance - timeout := time.After(openstackServerBootTimeout) - retry := time.NewTicker(openstackServerBootRetry) - defer retry.Stop() - - for { - select { - case <-timeout: - return fmt.Errorf("cannot ssh to the allocated instance: timeout reached") - case <-retry.C: - _, err := sshDial("tcp", addr, config) - if err == nil { - debugf("Connection to server %s established", s.d.Name) - return nil - } - case <-ctx.Done(): - return fmt.Errorf("cannot wait for %s to boot: interrupted", s) - } - } -} - var openstackSerialOutputTimeout = 30 * time.Second func (p *openstackProvider) getSerialConsoleOutput(s *openstackServer) (string, error) { @@ -495,12 +467,6 @@ func (p *openstackProvider) waitServerBoot(ctx context.Context, s *openstackServ if !errors.Is(err, openstackSerialConsoleErr) { return err } - // It is important to try ssh connection because serial console could - // be disabled in the nova configuration - err = p.waitServerBootSSH(ctx, s) - if err != nil { - return fmt.Errorf("cannot connect to server %s: %v", s, err) - } } return nil } From 92678f9cce03abb30343d84779c7cc0bbba23db8 Mon Sep 17 00:00:00 2001 From: Sergio Cazzolato Date: Wed, 4 Feb 2026 17:26:16 -0300 Subject: [PATCH 2/3] Update unit tests --- spread/openstack_test.go | 45 ---------------------------------------- 1 file changed, 45 deletions(-) diff --git a/spread/openstack_test.go b/spread/openstack_test.go index 76494c7d..c5cf48e2 100644 --- a/spread/openstack_test.go +++ b/spread/openstack_test.go @@ -12,7 +12,6 @@ import ( "github.com/go-goose/goose/v5/glance" goosehttp "github.com/go-goose/goose/v5/http" "github.com/go-goose/goose/v5/nova" - "golang.org/x/crypto/ssh" "github.com/snapcore/spread/spread" @@ -420,47 +419,3 @@ func (s *openstackFindImageSuite) TestOpenStackWaitServerBootSerialTimeout(c *C) err := spread.OpenStackWaitServerBoot(s.opst, context.TODO(), "test-id", "test-server", []string{"net-1"}) c.Check(err, ErrorMatches, "cannot find ready marker in console output for test-server: timeout reached") } - -func (s *openstackFindImageSuite) TestOpenStackWaitServerBootSSHHappy(c *C) { - count := 0 - spread.FakeSshDial(func(network, addr string, config *ssh.ClientConfig) (*ssh.Client, error) { - count++ - switch count { - case 1: - return nil, errors.New("connection error") - case 2: - return &ssh.Client{}, nil - } - c.Fatalf("should not reach here") - return nil, nil - }) - - restore := spread.FakeOpenStackServerBootTimeout(100*time.Millisecond, time.Nanosecond) - defer restore() - restore = spread.FakeOpenStackSerialOutputTimeout(50 * time.Millisecond) - defer restore() - - // force fallback to SSH - s.fakeOsClient.err = fmt.Errorf("serial not supported") - - err := spread.OpenStackWaitServerBoot(s.opst, context.TODO(), "test-id", "", []string{"net-1"}) - c.Check(err, IsNil) - c.Check(count, Equals, 2) -} - -func (s *openstackFindImageSuite) TestOpenStackWaitServerBootSSHTimeout(c *C) { - spread.FakeSshDial(func(network, addr string, config *ssh.ClientConfig) (*ssh.Client, error) { - return nil, errors.New("connection error") - }) - - restore := spread.FakeOpenStackServerBootTimeout(100*time.Millisecond, time.Nanosecond) - defer restore() - restore = spread.FakeOpenStackSerialOutputTimeout(50 * time.Millisecond) - defer restore() - - // force fallback to SSH - s.fakeOsClient.err = fmt.Errorf("serial not supported") - - err := spread.OpenStackWaitServerBoot(s.opst, context.TODO(), "test-id", "test-server", []string{"net-1"}) - c.Check(err, ErrorMatches, "cannot connect to server test-server: cannot ssh to the allocated instance: timeout reached") -} From 7adf1963e5b0539972b789b7b4a29c498d202f66 Mon Sep 17 00:00:00 2001 From: Sergio Cazzolato Date: Fri, 13 Feb 2026 13:42:40 -0300 Subject: [PATCH 3/3] Add extra check to make sure the server exist when we request console output --- spread/openstack.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spread/openstack.go b/spread/openstack.go index 0f9c1603..c6683bea 100644 --- a/spread/openstack.go +++ b/spread/openstack.go @@ -398,6 +398,12 @@ var openstackServerBootRetry = 5 * time.Second var openstackSerialOutputTimeout = 30 * time.Second func (p *openstackProvider) getSerialConsoleOutput(s *openstackServer) (string, error) { + _, err := s.p.computeClient.GetServer(s.d.Id) + if err != nil { + // this is when the server is removed + return "", fmt.Errorf("failed to retrieve the serial console, server removed: %s", s) + } + url := fmt.Sprintf("servers/%s/action", s.d.Id) var req struct {