From 7021faeffda4c3f12c95924853a1732bdf8b09b8 Mon Sep 17 00:00:00 2001 From: Pavel Aksenov <41126916+al-bashkir@users.noreply.github.com> Date: Fri, 6 Mar 2026 07:43:34 +0300 Subject: [PATCH 1/3] profiles: add interactive startup chooser and toggle-off default When no default_profile is configured and no --profile flag or PVETUI_PROFILE env var is provided, the bootstrap now prompts the user interactively instead of hard-coding a fallback to the literal name 'default' (which would error if no such profile existed). - Remove the implicit 'default' name fallback in ResolveProfile; empty string is returned to signal that interactive selection is needed. - Add promptProfileSelection: numbered terminal list (groups first, then individual profiles) with re-prompt on bad input and clean exit on EOF/Ctrl+C. Single profile with no groups is auto-selected silently. - Pre-check in bootstrap: if default_profile resolves to a name that no longer exists (stale after rename/delete), warn and fall back to the interactive chooser instead of hard-erroring. - setDefaultProfile now toggles: pressing s on the already-default entry clears default_profile to '' so the chooser runs on next startup. - Connection Profiles dialog re-opens after s so the updated star indicator is immediately visible. - Help bar updated: 's:toggle default'. --- internal/bootstrap/bootstrap.go | 119 ++++++++++++++++++ internal/profile/profile.go | 7 +- internal/ui/components/connection_profiles.go | 7 +- .../connection_profiles_operations.go | 31 +++-- 4 files changed, 144 insertions(+), 20 deletions(-) diff --git a/internal/bootstrap/bootstrap.go b/internal/bootstrap/bootstrap.go index 1d89fd13..8c405f01 100644 --- a/internal/bootstrap/bootstrap.go +++ b/internal/bootstrap/bootstrap.go @@ -6,10 +6,12 @@ package bootstrap import ( + "bufio" "flag" "fmt" "os" "sort" + "strconv" "strings" "github.com/devnullvoid/pvetui/internal/app" @@ -236,6 +238,37 @@ func Bootstrap(opts BootstrapOptions) (*BootstrapResult, error) { return nil, fmt.Errorf("profile resolution failed: %w", err) } + // If a profile/group name was resolved but no longer exists (e.g. stale + // default_profile after a rename or delete), warn and fall back to interactive + // selection rather than hard-erroring. + if selectedProfile != "" && len(cfg.Profiles) > 0 { + _, isProfile := cfg.Profiles[selectedProfile] + isGroup := cfg.IsGroup(selectedProfile) + if !isProfile && !isGroup { + fmt.Printf("⚠️ Profile/group '%s' not found — falling back to selection.\n", selectedProfile) + selectedProfile = "" + } + } + + // When no profile is resolved but profiles are configured, either auto-select + // (single profile, no groups) or prompt interactively. + if selectedProfile == "" && len(cfg.Profiles) > 0 { + groups := cfg.GetGroups() + if len(cfg.Profiles) == 1 && len(groups) == 0 { + // Only one profile available — auto-select silently. + for name := range cfg.Profiles { + selectedProfile = name + } + } else { + chosen, err := promptProfileSelection(cfg) + if err != nil { + fmt.Println("🚪 Exiting.") + os.Exit(0) + } + selectedProfile = chosen + } + } + // Determine if selected profile is an aggregate group or a standard profile var initialGroup string var startupProfile string @@ -589,3 +622,89 @@ func valueOrDash(value string) string { } return value } + +// promptProfileSelection interactively asks the user which profile or group to +// connect to when no default is configured. +// +// It returns the selected profile or group name, or an error if the user +// cancels or no valid input is provided. +func promptProfileSelection(cfg *config.Config) (string, error) { + // Collect groups (sorted) + groups := cfg.GetGroups() + groupNames := make([]string, 0, len(groups)) + for name := range groups { + groupNames = append(groupNames, name) + } + sort.Strings(groupNames) + + // Collect individual profile names (sorted) + profileNames := make([]string, 0, len(cfg.Profiles)) + for name := range cfg.Profiles { + profileNames = append(profileNames, name) + } + sort.Strings(profileNames) + + total := len(groupNames) + len(profileNames) + + // Build an ordered list: groups first, then profiles + type entry struct { + name string + isGroup bool + } + entries := make([]entry, 0, total) + for _, g := range groupNames { + entries = append(entries, entry{name: g, isGroup: true}) + } + for _, p := range profileNames { + entries = append(entries, entry{name: p, isGroup: false}) + } + + fmt.Println() + fmt.Println("No default profile configured.") + fmt.Println() + fmt.Println("Available connections:") + fmt.Println() + + idx := 1 + if len(groupNames) > 0 { + fmt.Println(" Groups:") + for _, g := range groupNames { + modeTag := "" + if cfg.IsClusterGroup(g) { + modeTag = " [cluster]" + } + fmt.Printf(" %d. %s (%d profiles)%s\n", idx, g, len(groups[g]), modeTag) + idx++ + } + fmt.Println() + } + + if len(profileNames) > 0 { + fmt.Println(" Profiles:") + for _, p := range profileNames { + addr := cfg.Profiles[p].Addr + user := cfg.Profiles[p].User + fmt.Printf(" %d. %s - %s (%s)\n", idx, p, addr, user) + idx++ + } + fmt.Println() + } + + scanner := bufio.NewScanner(os.Stdin) + for { + fmt.Printf("Select connection [1-%d]: ", total) + if !scanner.Scan() { + // EOF or Ctrl+C + fmt.Println() + return "", fmt.Errorf("selection canceled") + } + line := strings.TrimSpace(scanner.Text()) + n, err := strconv.Atoi(line) + if err != nil || n < 1 || n > total { + fmt.Printf("Invalid selection. Please enter a number between 1 and %d.\n", total) + continue + } + selected := entries[n-1] + return selected.name, nil + } +} diff --git a/internal/profile/profile.go b/internal/profile/profile.go index 6ab4fc3a..06df38c5 100644 --- a/internal/profile/profile.go +++ b/internal/profile/profile.go @@ -29,12 +29,7 @@ func ResolveProfile(flagProfile string, cfg *config.Config) (string, error) { return cfg.DefaultProfile, nil } - // If no explicit profile but profiles exist, use "default" - if len(cfg.Profiles) > 0 { - return "default", nil - } - - // No profile selected (no profiles configured) + // No default set - return empty string to signal interactive selection is needed return "", nil } diff --git a/internal/ui/components/connection_profiles.go b/internal/ui/components/connection_profiles.go index ce24012a..c7e9f0ca 100644 --- a/internal/ui/components/connection_profiles.go +++ b/internal/ui/components/connection_profiles.go @@ -383,13 +383,16 @@ func (a *App) showConnectionProfilesDialog() { case 's', 'S': - // Set as default - works for profiles and groups + // Toggle default - works for profiles and groups if selectionType == selectionTypeProfile || selectionType == selectionTypeGroup { a.CloseConnectionProfilesMenu() a.setDefaultProfile(selectionValue) + // Re-open the dialog so the updated ⭐ indicator is visible. + a.showConnectionProfilesDialog() + return nil } @@ -436,7 +439,7 @@ func (a *App) showConnectionProfilesDialog() { helpText := tview.NewTextView() - helpText.SetText("e:edit d:delete s:default") + helpText.SetText("e:edit d:delete s:toggle default") helpText.SetTextAlign(tview.AlignCenter) diff --git a/internal/ui/components/connection_profiles_operations.go b/internal/ui/components/connection_profiles_operations.go index d2fcbc40..bcd0f375 100644 --- a/internal/ui/components/connection_profiles_operations.go +++ b/internal/ui/components/connection_profiles_operations.go @@ -521,7 +521,9 @@ func (a *App) showDeleteProfileDialog(profileName string) { a.SetFocus(confirm) } -// setDefaultProfile sets the specified profile as the default profile. +// setDefaultProfile toggles the default profile. If the specified profile/group +// is already the default, it clears the default (so the user will be prompted on +// next startup). Otherwise it sets the given name as the new default. func (a *App) setDefaultProfile(profileName string) { // Check if the target exists (profile or group) if a.config.Profiles == nil { @@ -544,17 +546,23 @@ func (a *App) setDefaultProfile(profileName string) { } } - // Check if it's already the default - if a.config.DefaultProfile == profileName { - a.header.ShowError(fmt.Sprintf("'%s' is already the default startup selection.", profileName)) - return - } - - // Store the old default profile name for the message + // Store the old default for the success message. oldDefault := a.config.DefaultProfile - // Set the new default profile - a.config.DefaultProfile = profileName + var successMsg string + if a.config.DefaultProfile == profileName { + // Toggle off: clear the default so the user is prompted on next startup. + a.config.DefaultProfile = "" + successMsg = fmt.Sprintf("Default startup selection cleared (was '%s'). You will be prompted on next startup.", profileName) + } else { + // Set the new default. + a.config.DefaultProfile = profileName + if oldDefault != "" { + successMsg = fmt.Sprintf("Default startup selection changed from '%s' to '%s'.", oldDefault, profileName) + } else { + successMsg = fmt.Sprintf("'%s' set as default startup selection.", profileName) + } + } // Save the config configPath, found := config.FindDefaultConfigPath() @@ -581,8 +589,7 @@ func (a *App) setDefaultProfile(profileName string) { } } - // Show success message - a.header.ShowSuccess(fmt.Sprintf("Default startup selection changed from '%s' to '%s'.", oldDefault, profileName)) + a.header.ShowSuccess(successMsg) } // reEncryptConfigIfNeeded re-encrypts the config file with SOPS. From 5e9505caf26345ce1c362a2b735910a9d209fd30 Mon Sep 17 00:00:00 2001 From: Pavel Aksenov <41126916+al-bashkir@users.noreply.github.com> Date: Fri, 6 Mar 2026 11:51:47 +0300 Subject: [PATCH 2/3] fix: resolve concurrency and correctness issues in refresh pipeline Replace atomic.Bool refresh guard with generation token (atomic.Uint64) to prevent stale callbacks from clearing another refresh's guard. Add sync.RWMutex (connMu) protecting connection-state fields (client, groupManager, isGroupMode, isClusterMode) that were read by background goroutines without synchronization. Specific issues fixed: - WP1: refreshInProgress atomic.Bool replaced by refreshGen atomic.Uint64 with startRefresh/forceNewRefresh/endRefresh helpers; generation token ensures only the owning callback can release the guard. - WP2: connMu RWMutex guards all connection-state field writes; background goroutines use snapConn() to get a consistent lock-safe snapshot. deactivateGroupModes restructured to close resources outside the lock. - WP3: enrichGroupNodesParallel accumulates enriched nodes in a local slice; single swap into GlobalState inside QueueUpdateDraw eliminates the concurrent write-to-shared-slice data race. - WP4a: autoRefreshDataWithFooter captures UI selections (GetSelectedVM, GetSelectedNode, GetItemCount) on the UI goroutine before spawning the background goroutine; passed as selSnap struct to avoid tview reads off-thread. - WP4b: doFastRefresh VM enrichment callback stale guard changed from 'refreshGen != token' to 'current != 0 && current != token' so it aborts only on a newer refresh, not on normal completion; this fixes the header loading spinner never clearing and VM details not updating for the selected item after profile switch. - WP4c: Retry errors in GetFastFreshClusterStatus guest agent retry loop are now aggregated into enrichErr and surfaced to the onEnrichmentComplete callback instead of being silently dropped. --- internal/ui/components/app.go | 87 ++++- internal/ui/components/auto_refresh.go | 49 +-- .../ui/components/auto_refresh_execution.go | 152 ++++---- .../connection_profiles_operations.go | 108 ++++-- internal/ui/components/refresh.go | 366 ++++++++++++++---- internal/ui/components/refresh_vm.go | 156 ++------ internal/ui/components/vm_migration.go | 9 +- internal/ui/components/vm_operations.go | 9 +- pkg/api/client.go | 261 +++++++++++-- pkg/api/grouped-cluster.go | 17 +- pkg/api/vm.go | 60 ++- 11 files changed, 865 insertions(+), 409 deletions(-) diff --git a/internal/ui/components/app.go b/internal/ui/components/app.go index 2a556962..9639b15f 100644 --- a/internal/ui/components/app.go +++ b/internal/ui/components/app.go @@ -3,6 +3,8 @@ package components import ( "context" "fmt" + "sync" + "sync/atomic" "time" "github.com/gdamore/tcell/v2" @@ -23,6 +25,11 @@ import ( type App struct { *tview.Application + // connMu protects the connection-state fields below from concurrent access. + // Background goroutines (doManualRefresh, doFastRefresh, autoRefreshData, loadTasksData) + // read these fields; profile-switch goroutines (applyConnectionProfile, switchToClusterGroup) + // write them. Always use snapConn() for reads and connMu.Lock() for writes. + connMu sync.RWMutex client *api.Client groupManager *api.GroupClientManager isGroupMode bool @@ -58,11 +65,18 @@ type App struct { // Auto-refresh functionality autoRefreshEnabled bool - autoRefreshTicker *time.Ticker - autoRefreshStop chan bool + autoRefreshRunning bool autoRefreshCountdown int autoRefreshCountdownStop chan bool + // Refresh deduplication: prevents concurrent overlapping refreshes. + // refreshGen holds the current refresh generation token (0 = no refresh in progress, + // nonzero = a refresh is in progress with that token). Each refresh operation acquires + // a unique token via startRefresh or forceNewRefresh, and releases it via endRefresh. + // Old callbacks that call endRefresh with a stale token are no-ops, preventing the + // guard from being cleared by a refresh that was superseded by a profile switch. + refreshGen atomic.Uint64 + plugins map[string]Plugin pluginRegistry *pluginRegistry pluginCatalog []PluginInfo @@ -79,6 +93,61 @@ func (a *App) removePageIfPresent(name string) { } } +// startRefresh attempts to acquire the refresh guard. +// Returns (token, true) on success; (0, false) if another refresh is already in progress. +// The caller must pass the returned token to endRefresh when the refresh completes. +func (a *App) startRefresh() (uint64, bool) { + token := uint64(time.Now().UnixNano()) | 1 // ensure nonzero + if a.refreshGen.CompareAndSwap(0, token) { + return token, true + } + return 0, false +} + +// forceNewRefresh unconditionally acquires the refresh guard with a new token, +// discarding any in-flight refresh. Old callbacks whose endRefresh calls will be +// no-ops because their token won't match the current value. +// Used by profile switches where we need to guarantee a new refresh starts. +// The caller must pass the returned token to endRefresh (or doFastRefresh/doManualRefresh). +func (a *App) forceNewRefresh() uint64 { + token := uint64(time.Now().UnixNano()) | 1 // ensure nonzero + a.refreshGen.Store(token) + return token +} + +// endRefresh releases the refresh guard only if the token matches the current value. +// If the token doesn't match (stale callback from a superseded refresh), this is a no-op. +func (a *App) endRefresh(token uint64) { + a.refreshGen.CompareAndSwap(token, 0) +} + +// isRefreshActive returns true if a refresh is currently in progress. +func (a *App) isRefreshActive() bool { + return a.refreshGen.Load() != 0 +} + +// connData is a snapshot of connection-state fields captured under connMu. +type connData struct { + isGroupMode bool + isClusterMode bool + client *api.Client + groupManager *api.GroupClientManager +} + +// snapConn returns a consistent, lock-safe snapshot of connection state. +// Callers must use the returned values for subsequent operations instead of +// reading a.client / a.isGroupMode / etc. directly from background goroutines. +func (a *App) snapConn() connData { + a.connMu.RLock() + defer a.connMu.RUnlock() + return connData{ + isGroupMode: a.isGroupMode, + isClusterMode: a.isClusterMode, + client: a.client, + groupManager: a.groupManager, + } +} + // NewApp creates a new application instance with all UI components. func NewApp(ctx context.Context, client *api.Client, cfg *config.Config, configPath string, initialGroup string) *App { uiLogger := models.GetUILogger() @@ -549,12 +618,13 @@ func (a *App) ClearAPICache() { // In group mode, it returns the client for the VM's source profile. // In single-profile mode, it returns the main client. func (a *App) getClientForVM(vm *api.VM) (*api.Client, error) { - if a.isGroupMode { + conn := a.snapConn() + if conn.isGroupMode { if vm.SourceProfile == "" { return nil, fmt.Errorf("source profile not set for VM %s in group mode", vm.Name) } - profileClient, exists := a.groupManager.GetClient(vm.SourceProfile) + profileClient, exists := conn.groupManager.GetClient(vm.SourceProfile) if !exists { return nil, fmt.Errorf("profile '%s' not found in group manager", vm.SourceProfile) } @@ -566,19 +636,20 @@ func (a *App) getClientForVM(vm *api.VM) (*api.Client, error) { return profileClient.Client, nil } - return a.client, nil + return conn.client, nil } // getClientForNode returns the appropriate API client for a Node. // In group mode, it returns the client for the Node's source profile. // In single-profile mode, it returns the main client. func (a *App) getClientForNode(node *api.Node) (*api.Client, error) { - if a.isGroupMode { + conn := a.snapConn() + if conn.isGroupMode { if node.SourceProfile == "" { return nil, fmt.Errorf("source profile not set for Node %s in group mode", node.Name) } - profileClient, exists := a.groupManager.GetClient(node.SourceProfile) + profileClient, exists := conn.groupManager.GetClient(node.SourceProfile) if !exists { return nil, fmt.Errorf("profile '%s' not found in group manager", node.SourceProfile) } @@ -590,7 +661,7 @@ func (a *App) getClientForNode(node *api.Node) (*api.Client, error) { return profileClient.Client, nil } - return a.client, nil + return conn.client, nil } // createSyntheticGroup creates a synthetic cluster object from a list of nodes for group display. diff --git a/internal/ui/components/auto_refresh.go b/internal/ui/components/auto_refresh.go index 02ff49e2..7af3a714 100644 --- a/internal/ui/components/auto_refresh.go +++ b/internal/ui/components/auto_refresh.go @@ -39,19 +39,20 @@ func (a *App) startAutoRefresh() { return } - if a.autoRefreshTicker != nil { + if a.autoRefreshRunning { return // Already running } - a.autoRefreshStop = make(chan bool, 1) - a.autoRefreshTicker = time.NewTicker(10 * time.Second) // 10 second interval + a.autoRefreshRunning = true a.autoRefreshCountdown = 10 a.footer.UpdateAutoRefreshCountdown(a.autoRefreshCountdown) a.autoRefreshCountdownStop = make(chan bool, 1) - // Start countdown goroutine + // Start countdown goroutine using a proper ticker instead of busy-wait + sleep go func() { uiLogger := models.GetUILogger() + countdownTicker := time.NewTicker(1 * time.Second) + defer countdownTicker.Stop() for { select { @@ -59,9 +60,7 @@ func (a *App) startAutoRefresh() { return case <-a.ctx.Done(): return - default: - time.Sleep(1 * time.Second) - + case <-countdownTicker.C: if !a.autoRefreshEnabled { return } @@ -77,13 +76,16 @@ func (a *App) startAutoRefresh() { // Trigger refresh when countdown reaches 0 if a.autoRefreshCountdown == 0 { - // Only refresh if not currently loading something and no pending operations - if !a.header.IsLoading() && !models.GlobalState.HasPendingOperations() { + // Only refresh if not currently loading, no pending operations, + // and no other refresh (manual/fast/enrichment) is in progress. + if !a.header.IsLoading() && !models.GlobalState.HasPendingOperations() && !a.isRefreshActive() { uiLogger.Debug("Auto-refresh triggered by countdown") go a.autoRefreshDataWithFooter() } else { - if a.header.IsLoading() { + if a.isRefreshActive() { + uiLogger.Debug("Auto-refresh skipped - refresh already in progress") + } else if a.header.IsLoading() { uiLogger.Debug("Auto-refresh skipped - header loading operation in progress") } else { uiLogger.Debug("Auto-refresh skipped - pending VM/node operations in progress") @@ -100,15 +102,18 @@ func (a *App) startAutoRefresh() { } }() - // Spinner animation goroutine + // Spinner animation goroutine using a proper ticker go func() { + spinnerTicker := time.NewTicker(100 * time.Millisecond) + defer spinnerTicker.Stop() + for { select { + case <-a.autoRefreshCountdownStop: + return case <-a.ctx.Done(): return - default: - time.Sleep(100 * time.Millisecond) - + case <-spinnerTicker.C: if !a.autoRefreshEnabled { return } @@ -125,26 +130,12 @@ func (a *App) startAutoRefresh() { // stopAutoRefresh stops the auto-refresh timer. func (a *App) stopAutoRefresh() { - // Always stop and nil out the ticker, close channels, and reset countdown - if a.autoRefreshTicker != nil { - a.autoRefreshTicker.Stop() - a.autoRefreshTicker = nil - } - - if a.autoRefreshStop != nil { - select { - case a.autoRefreshStop <- true: - default: - } - close(a.autoRefreshStop) - a.autoRefreshStop = nil - } - if a.autoRefreshCountdownStop != nil { close(a.autoRefreshCountdownStop) a.autoRefreshCountdownStop = nil } + a.autoRefreshRunning = false a.autoRefreshCountdown = 0 a.footer.UpdateAutoRefreshCountdown(0) } diff --git a/internal/ui/components/auto_refresh_execution.go b/internal/ui/components/auto_refresh_execution.go index 41ccdb88..593237d0 100644 --- a/internal/ui/components/auto_refresh_execution.go +++ b/internal/ui/components/auto_refresh_execution.go @@ -9,52 +9,58 @@ import ( "github.com/devnullvoid/pvetui/pkg/api" ) -// autoRefreshDataWithFooter sets loading state and starts the data fetch in a new goroutine. +// autoRefreshDataWithFooter sets loading state, captures UI selections on the UI goroutine, +// and starts the data fetch in a new goroutine. +// Selection state must be read here (on the UI goroutine) before the goroutine is spawned. func (a *App) autoRefreshDataWithFooter() { + // Capture selections and set loading state while still on the UI goroutine. + // QueueUpdateDraw executes synchronously when already on the UI goroutine (which + // autoRefreshDataWithFooter always is, called from the ticker callback). + var snap selSnap a.QueueUpdateDraw(func() { a.footer.SetLoading(true) + snap = a.captureSelections() }) - go a.autoRefreshData() + go a.autoRefreshData(snap) } // autoRefreshData performs a lightweight refresh of performance data. -func (a *App) autoRefreshData() { - uiLogger := models.GetUILogger() - - // Store current selections to preserve them - var selectedVMID int - - var selectedVMNode string - - var selectedNodeName string - - var hasSelectedVM bool - - var hasSelectedNode bool - - if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { - selectedVMID = selectedVM.ID - selectedVMNode = selectedVM.Node - hasSelectedVM = true +// snap contains the UI selections captured on the UI goroutine before this goroutine started. +func (a *App) autoRefreshData(snap selSnap) { + // Acquire the refresh guard. The countdown check in startAutoRefresh already verified + // isRefreshActive() == false, but we CAS here to prevent a race between that check + // and the actual start of the refresh. + token, ok := a.startRefresh() + if !ok { + a.QueueUpdateDraw(func() { + a.footer.SetLoading(false) + }) + return } - if selectedNode := a.nodeList.GetSelectedNode(); selectedNode != nil { - selectedNodeName = selectedNode.Name - hasSelectedNode = true - } + uiLogger := models.GetUILogger() + + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() - // Check if search is currently active - searchWasActive := a.mainLayout.GetItemCount() > 4 + // Unpack UI selections captured on the UI goroutine before this goroutine was spawned. + hasSelectedVM := snap.hasSelectedVM + selectedVMID := snap.selectedVMID + selectedVMNode := snap.selectedVMNode + hasSelectedNode := snap.hasSelectedNode + selectedNodeName := snap.selectedNodeName + searchWasActive := snap.searchWasActive - if a.isGroupMode { + if conn.isGroupMode { // Group mode logic ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel() - nodes, vms, err := a.groupManager.GetGroupClusterResources(ctx, true) + nodes, vms, err := conn.groupManager.GetGroupClusterResources(ctx, true) if err != nil { uiLogger.Debug("Auto-refresh failed (group): %v", err) + a.endRefresh(token) a.QueueUpdateDraw(func() { a.footer.SetLoading(false) }) @@ -66,23 +72,28 @@ func (a *App) autoRefreshData() { nodeSearchState := models.GlobalState.GetSearchState(api.PageNodes) vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) - // Preserve detailed node data + // Build lookup map for O(N) node detail preservation instead of O(N*M) + existingNodeMap := make(map[string]*api.Node, len(models.GlobalState.OriginalNodes)) + for _, n := range models.GlobalState.OriginalNodes { + if n != nil { + existingNodeMap[n.Name+"|"+n.SourceProfile] = n + } + } + + // Preserve detailed node data using map lookup for _, freshNode := range nodes { - if freshNode != nil { - // Find the corresponding existing node with detailed data - for _, existingNode := range models.GlobalState.OriginalNodes { - if existingNode != nil && existingNode.Name == freshNode.Name && existingNode.SourceProfile == freshNode.SourceProfile { - // Preserve detailed fields - freshNode.Version = existingNode.Version - freshNode.KernelVersion = existingNode.KernelVersion - freshNode.CPUInfo = existingNode.CPUInfo - freshNode.LoadAvg = existingNode.LoadAvg - freshNode.CGroupMode = existingNode.CGroupMode - freshNode.Level = existingNode.Level - freshNode.Storage = existingNode.Storage - break - } - } + if freshNode == nil { + continue + } + + if existing, ok := existingNodeMap[freshNode.Name+"|"+freshNode.SourceProfile]; ok { + freshNode.Version = existing.Version + freshNode.KernelVersion = existing.KernelVersion + freshNode.CPUInfo = existing.CPUInfo + freshNode.LoadAvg = existing.LoadAvg + freshNode.CGroupMode = existing.CGroupMode + freshNode.Level = existing.Level + freshNode.Storage = existing.Storage } } @@ -105,10 +116,11 @@ func (a *App) autoRefreshData() { // Refresh tasks if on tasks page currentPage, _ := a.pages.GetFrontPage() if currentPage == api.PageTasks { + groupMgr := conn.groupManager // capture under connMu snapshot, not a.groupManager go func() { ctxTasks, cancelTasks := context.WithTimeout(context.Background(), 10*time.Second) defer cancelTasks() - tasks, err := a.groupManager.GetGroupTasks(ctxTasks) + tasks, err := groupMgr.GetGroupTasks(ctxTasks) if err == nil { a.QueueUpdateDraw(func() { if state := models.GlobalState.GetSearchState(api.PageTasks); state != nil && state.Filter != "" { @@ -127,8 +139,9 @@ func (a *App) autoRefreshData() { a.restoreSearchUI(searchWasActive, nodeSearchState, vmSearchState) // Start background enrichment for detailed node stats - // Pass false for isInitialLoad since this is auto-refresh - a.enrichGroupNodesSequentially(nodes, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive, false) + // Pass false for isInitialLoad since this is auto-refresh. + // enrichGroupNodesParallel will call endRefresh(token) when done. + a.enrichGroupNodesParallel(token, nodes, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive, false) a.autoRefreshCountdown = 10 @@ -140,10 +153,12 @@ func (a *App) autoRefreshData() { } - // Fetch fresh cluster resources data (this includes performance metrics) - cluster, err := a.client.GetFreshClusterStatus() + // Fetch fresh cluster resources without VM enrichment (lighter for periodic refresh). + // Guest agent data (IPs, filesystems) is preserved from the last full refresh. + cluster, err := conn.client.GetLightClusterStatus() if err != nil { uiLogger.Debug("Auto-refresh failed: %v", err) + a.endRefresh(token) a.QueueUpdateDraw(func() { a.footer.SetLoading(false) }) @@ -172,24 +187,28 @@ func (a *App) autoRefreshData() { // Update cluster status (this shows updated CPU/memory/storage totals) a.clusterStatus.Update(cluster) + // Build lookup map for O(N) node detail preservation instead of O(N*M) + existingNodeMap := make(map[string]*api.Node, len(models.GlobalState.OriginalNodes)) + for _, n := range models.GlobalState.OriginalNodes { + if n != nil { + existingNodeMap[n.Name] = n + } + } + // Preserve detailed node data while updating performance metrics for _, freshNode := range cluster.Nodes { - if freshNode != nil { - // Find the corresponding existing node with detailed data - for _, existingNode := range models.GlobalState.OriginalNodes { - if existingNode != nil && existingNode.Name == freshNode.Name { - // Preserve detailed fields that aren't in cluster resources - freshNode.Version = existingNode.Version - freshNode.KernelVersion = existingNode.KernelVersion - freshNode.CPUInfo = existingNode.CPUInfo - freshNode.LoadAvg = existingNode.LoadAvg - freshNode.CGroupMode = existingNode.CGroupMode - freshNode.Level = existingNode.Level - freshNode.Storage = existingNode.Storage - - break - } - } + if freshNode == nil { + continue + } + + if existing, ok := existingNodeMap[freshNode.Name]; ok { + freshNode.Version = existing.Version + freshNode.KernelVersion = existing.KernelVersion + freshNode.CPUInfo = existing.CPUInfo + freshNode.LoadAvg = existing.LoadAvg + freshNode.CGroupMode = existing.CGroupMode + freshNode.Level = existing.Level + freshNode.Storage = existing.Storage } } @@ -249,7 +268,7 @@ func (a *App) autoRefreshData() { if currentPage == api.PageTasks { // Refresh tasks data without showing loading indicator (background refresh) go func() { - tasks, err := a.client.GetClusterTasks() + tasks, err := conn.client.GetClusterTasks() if err == nil { a.QueueUpdateDraw(func() { // Check if there's an active search filter @@ -274,6 +293,7 @@ func (a *App) autoRefreshData() { // Show success message a.header.ShowSuccess("Data refreshed successfully") a.footer.SetLoading(false) + a.endRefresh(token) // Reset countdown after refresh is complete a.autoRefreshCountdown = 10 diff --git a/internal/ui/components/connection_profiles_operations.go b/internal/ui/components/connection_profiles_operations.go index bcd0f375..dc85d6dd 100644 --- a/internal/ui/components/connection_profiles_operations.go +++ b/internal/ui/components/connection_profiles_operations.go @@ -17,26 +17,27 @@ import ( func (a *App) deactivateGroupModes(uiLogger interface { Debug(format string, args ...interface{}) }) { - if a.isGroupMode { + // Capture old references and clear fields atomically under write lock. + // Resources are closed AFTER releasing the lock to avoid holding it + // during potentially slow network-level Close() operations. + a.connMu.Lock() + oldGroupManager := a.groupManager + oldClusterClient := a.clusterClient + a.groupManager = nil + a.isGroupMode = false + a.clusterClient = nil + a.isClusterMode = false + a.groupName = "" + a.connMu.Unlock() + + if oldGroupManager != nil { uiLogger.Debug("Disabling group mode") - if a.groupManager != nil { - a.groupManager.Close() - } - a.groupManager = nil - a.isGroupMode = false + oldGroupManager.Close() } - if a.isClusterMode { + if oldClusterClient != nil { uiLogger.Debug("Disabling cluster mode") - if a.clusterClient != nil { - a.clusterClient.Close() - } - a.clusterClient = nil - a.isClusterMode = false - } - - if a.groupName != "" { - a.groupName = "" + oldClusterClient.Close() } if a.tasksList != nil { @@ -82,9 +83,12 @@ func (a *App) applyConnectionProfile(profileName string) { uiLogger.Debug("New API client created successfully for profile %s", profileName) - // Update app client and VNC service immediately to ensure subsequent calls use the new client - // This must happen before manualRefresh() is called + // Update app client under write lock. deactivateGroupModes also acquires + // the lock internally for its field writes. + a.connMu.Lock() a.client = client + a.connMu.Unlock() + if a.vncService != nil { a.vncService.UpdateClient(client) } @@ -103,9 +107,12 @@ func (a *App) applyConnectionProfile(profileName string) { a.header.ShowSuccess("Switched to profile '" + profileName + "' successfully!") }) - // Then refresh data with new connection (this will update the UI) - uiLogger.Debug("Starting manual refresh with new client") - a.manualRefresh() + // Force a new refresh generation token, discarding any in-flight refresh from + // the previous profile. Old callbacks' endRefresh calls will be no-ops because + // their token won't match the new token. + token := a.forceNewRefresh() + uiLogger.Debug("Starting fast refresh with new client") + a.doFastRefresh(token) }() } @@ -203,13 +210,16 @@ func (a *App) switchToGroup(groupName string) { // Update app state a.QueueUpdateDraw(func() { - // Set group mode + // Set group mode fields under write lock so background goroutines that + // call snapConn() see a consistent view. The lock is brief (field assignments). // Note: We keep a.client around even in group mode to avoid breaking callbacks // that were set up during initialization. In group mode, we use a.groupManager // for operations instead of a.client. + a.connMu.Lock() a.groupManager = manager a.isGroupMode = true a.groupName = groupName + a.connMu.Unlock() // Update header a.updateHeaderWithActiveProfile() @@ -260,8 +270,10 @@ func (a *App) switchToGroup(groupName string) { // Start background enrichment for detailed node stats // This ensures nodes get Version, Kernel, LoadAvg etc. populated - // Pass true for isInitialLoad to show "Guest agent data loaded" message - a.enrichGroupNodesSequentially(nodes, false, "", false, 0, "", false, true) + // Pass true for isInitialLoad to show "Guest agent data loaded" message. + // Force a new refresh token so the guard is acquired for this enrichment. + enrichToken := a.forceNewRefresh() + a.enrichGroupNodesParallel(enrichToken, nodes, false, "", false, 0, "", false, true) } // Update selection and details @@ -388,41 +400,55 @@ func (a *App) switchToClusterGroup(groupName string) { return } uiLogger.Info("[CLUSTER] Failover callback: %s -> %s", oldProfile, newProfile) - a.client = cc.GetActiveClient() - if a.client == nil { + newClient := cc.GetActiveClient() + if newClient == nil { uiLogger.Error("[CLUSTER] Failover callback has nil active client for %s", newProfile) return } + a.connMu.Lock() + a.client = newClient + a.connMu.Unlock() if a.vncService != nil { - a.vncService.UpdateClient(a.client) + a.vncService.UpdateClient(newClient) } a.updateHeaderWithActiveProfile() a.header.ShowWarning(fmt.Sprintf("Failover: %s \u2192 %s", oldProfile, newProfile)) - go a.manualRefresh() + // Force a new refresh token for failover — any in-flight refresh from + // the previous node is superseded. + failoverToken := a.forceNewRefresh() + go a.doFastRefresh(failoverToken) }) }) // Start health checks cc.StartHealthCheck() - // Update app state on UI thread - a.QueueUpdateDraw(func() { - // Set cluster mode state - a.clusterClient = cc - a.isClusterMode = true - a.groupName = groupName - a.client = cc.GetActiveClient() - if a.vncService != nil { - a.vncService.UpdateClient(a.client) - } + // Set app state under write lock. These fields are read by background goroutines + // (doFastRefresh, autoRefreshData, etc.) so they must be protected. + a.connMu.Lock() + a.clusterClient = cc + a.isClusterMode = true + a.groupName = groupName + a.client = cc.GetActiveClient() + activeClient := a.client // capture for vncService without holding lock + a.connMu.Unlock() + + if a.vncService != nil { + a.vncService.UpdateClient(activeClient) + } - // Update header to show cluster mode + // Update header to show cluster mode (UI update) + a.QueueUpdateDraw(func() { a.updateHeaderWithActiveProfile() a.header.ShowSuccess(fmt.Sprintf("Connected to cluster '%s' via %s", groupName, cc.GetActiveProfileName())) }) - // Trigger refresh to load data through normal single-profile flow - a.manualRefresh() + // Force a new refresh generation token, discarding any in-flight refresh. + // Old callbacks' endRefresh calls will be no-ops. + clusterToken := a.forceNewRefresh() + + // Load data fast — show basic node/VM names immediately, enrich in background + a.doFastRefresh(clusterToken) }() } diff --git a/internal/ui/components/refresh.go b/internal/ui/components/refresh.go index 77135a76..476874cd 100644 --- a/internal/ui/components/refresh.go +++ b/internal/ui/components/refresh.go @@ -10,10 +10,51 @@ import ( "github.com/devnullvoid/pvetui/pkg/api" ) +// selSnap is a snapshot of the user's current selection state in the UI. +// It must be captured on the tview UI goroutine (inside a QueueUpdateDraw callback +// or before any goroutine is spawned when the caller is already on the UI goroutine). +type selSnap struct { + hasSelectedNode bool + selectedNodeName string + hasSelectedVM bool + selectedVMID int + selectedVMNode string + searchWasActive bool +} + +// captureSelections captures the user's current UI selections. +// MUST be called from the tview UI goroutine (inside QueueUpdateDraw or equivalent). +func (a *App) captureSelections() selSnap { + var s selSnap + if node := a.nodeList.GetSelectedNode(); node != nil { + s.hasSelectedNode = true + s.selectedNodeName = node.Name + } + if vm := a.vmList.GetSelectedVM(); vm != nil { + s.hasSelectedVM = true + s.selectedVMID = vm.ID + s.selectedVMNode = vm.Node + } + s.searchWasActive = a.mainLayout.GetItemCount() > 4 + return s +} + // manualRefresh refreshes all data manually. +// Acquires the refresh guard and delegates to doManualRefresh. func (a *App) manualRefresh() { - // * Check if there are any pending operations + token, ok := a.startRefresh() + if !ok { + return + } + a.doManualRefresh(token) +} + +// doManualRefresh is the internal implementation of a full refresh. +// Caller must have already acquired the refresh guard (token != 0). +func (a *App) doManualRefresh(token uint64) { + // Check if there are any pending operations if models.GlobalState.HasPendingOperations() { + a.endRefresh(token) a.showMessageSafe("Cannot refresh data while there are pending operations in progress") return } @@ -22,40 +63,25 @@ func (a *App) manualRefresh() { a.header.ShowLoading("Refreshing data...") a.footer.SetLoading(true) - // Store current selections for restoration - var hasSelectedNode, hasSelectedVM bool - - var selectedNodeName, selectedVMNode string - - var selectedVMID int - - if node := a.nodeList.GetSelectedNode(); node != nil { - hasSelectedNode = true - selectedNodeName = node.Name - } - - if vm := a.vmList.GetSelectedVM(); vm != nil { - hasSelectedVM = true - selectedVMID = vm.ID - selectedVMNode = vm.Node - } - - // Check if search is currently active - searchWasActive := a.mainLayout.GetItemCount() > 4 + // NOTE: UI state reads (selections, search) are intentionally NOT captured here. + // For the group mode path, they are captured inside QueueUpdateDraw (on the UI + // goroutine) before calling enrichGroupNodesParallel. For the single-profile path, + // doEnrichNodes captures them inside its own QueueUpdateDraw callback. This ensures + // all tview reads happen on the UI goroutine and are race-free. // Run data refresh in goroutine to avoid blocking UI go func() { - // Wait a moment for API changes to propagate to cluster resources endpoint - // This ensures we get fresh data after configuration updates - time.Sleep(500 * time.Millisecond) + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() - if a.isGroupMode { + if conn.isGroupMode { // Group mode logic ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - nodes, vms, err := a.groupManager.GetGroupClusterResources(ctx, true) + nodes, vms, err := conn.groupManager.GetGroupClusterResources(ctx, true) if err != nil { + a.endRefresh(token) a.QueueUpdateDraw(func() { a.header.ShowError(fmt.Sprintf("Refresh failed: %v", err)) a.footer.SetLoading(false) @@ -64,9 +90,10 @@ func (a *App) manualRefresh() { return } - // Debug logging retained at Debug level for troubleshooting. - a.QueueUpdateDraw(func() { + // Capture selections on the UI goroutine before starting background enrichment. + snap := a.captureSelections() + // Update GlobalState nodes/VMs; UI lists will be updated after enrichment to reduce flicker. models.GlobalState.OriginalNodes = nodes models.GlobalState.OriginalVMs = vms @@ -91,13 +118,17 @@ func (a *App) manualRefresh() { // Start background enrichment for detailed node stats // Pass false for isInitialLoad since this is a manual refresh - a.enrichGroupNodesSequentially(nodes, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive, false) + a.enrichGroupNodesParallel(token, nodes, + snap.hasSelectedNode, snap.selectedNodeName, + snap.hasSelectedVM, snap.selectedVMID, snap.selectedVMNode, + snap.searchWasActive, false) }) } else { // Single profile logic // Fetch fresh data bypassing cache - cluster, err := a.client.GetFreshClusterStatus() + cluster, err := conn.client.GetFreshClusterStatus() if err != nil { + a.endRefresh(token) a.QueueUpdateDraw(func() { a.header.ShowError(fmt.Sprintf("Refresh failed: %v", err)) a.footer.SetLoading(false) @@ -110,13 +141,153 @@ func (a *App) manualRefresh() { // version during the initial refresh update to avoid brief UI flicker. a.preserveClusterVersionIfMissing(cluster) - // Initial UI update and enrichment + // Initial UI update and enrichment. Selections are captured inside + // doEnrichNodes' QueueUpdateDraw callback on the UI goroutine. a.applyInitialClusterUpdate(cluster) - a.enrichNodesSequentially(cluster, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive) + // Manual refresh: show "Data refreshed successfully" after node enrichment completes + // (no VM enrichment callback follows in this path). + a.doEnrichNodes(cluster, conn.client, token, true) } }() } +// fastRefresh loads basic node/VM names immediately and enriches details in the background. +// This is the public entry point; it acquires the refresh guard and delegates to doFastRefresh. +// Used for normal fast-refresh (failover callbacks, etc.) where a refresh may already be running. +func (a *App) fastRefresh() { + token, ok := a.startRefresh() + if !ok { + return + } + a.doFastRefresh(token) +} + +// doFastRefresh is the implementation of fast refresh. +// Callers that already hold a generation token (e.g. profile switches via forceNewRefresh) +// should call this directly instead of fastRefresh. +// This is used for profile switching where perceived speed matters most — +// the user sees node and guest lists right away, then details (CPU, filesystems, guest agent) +// fill in progressively. +func (a *App) doFastRefresh(token uint64) { + if models.GlobalState.HasPendingOperations() { + a.endRefresh(token) + a.showMessageSafe("Cannot refresh data while there are pending operations in progress") + return + } + + a.header.ShowLoading("Loading data...") + a.footer.SetLoading(true) + + go func() { + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() + + if conn.isGroupMode { + // Group mode: delegate to manual refresh internal logic (already shows data before enrichment). + // We already hold the refresh token, so pass it through directly. + a.doManualRefresh(token) + return + } + + // Capture the client instance that is starting this refresh. + // Used for making API calls with the correct client during enrichment. + refreshClient := conn.client + + // Single profile: get basic data fast (no VM enrichment), show it, then enrich in background + cluster, err := refreshClient.GetFastFreshClusterStatus(func(enrichErr error) { + // VM enrichment complete callback — update VM list with enriched data + a.QueueUpdateDraw(func() { + // Stale guard: abort only if a *newer* refresh has superseded this one. + // refreshGen == 0 means this refresh completed normally via endRefresh — + // we still need to run to clear the header loading indicator and update + // VM details (doEnrichNodes calls endRefresh before this callback fires). + if current := a.refreshGen.Load(); current != 0 && current != token { + return + } + + if refreshClient.Cluster == nil { + return + } + + // Rebuild VM list from enriched cluster data + var enrichedVMs []*api.VM + for _, node := range refreshClient.Cluster.Nodes { + if node != nil { + for _, vm := range node.VMs { + if vm != nil { + enrichedVMs = append(enrichedVMs, vm) + } + } + } + } + + if len(enrichedVMs) > 0 { + // Preserve current VM selection + var selectedVMID int + var selectedVMNode string + var hasSelectedVM bool + if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { + selectedVMID = selectedVM.ID + selectedVMNode = selectedVM.Node + hasSelectedVM = true + } + + models.GlobalState.OriginalVMs = make([]*api.VM, len(enrichedVMs)) + copy(models.GlobalState.OriginalVMs, enrichedVMs) + + vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) + if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { + models.FilterVMs(vmSearchState.Filter) + a.vmList.SetVMs(models.GlobalState.FilteredVMs) + } else { + models.GlobalState.FilteredVMs = make([]*api.VM, len(enrichedVMs)) + copy(models.GlobalState.FilteredVMs, enrichedVMs) + a.vmList.SetVMs(models.GlobalState.FilteredVMs) + } + + // Restore VM selection + if hasSelectedVM { + vmList := a.vmList.GetVMs() + for i, vm := range vmList { + if vm != nil && vm.ID == selectedVMID && vm.Node == selectedVMNode { + a.vmList.SetCurrentItem(i) + break + } + } + } + + if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { + a.vmDetails.Update(selectedVM) + } + } + + if enrichErr != nil { + a.header.ShowWarning("Guest agent enrichment partially failed") + } else { + a.header.ShowSuccess("Guest agent data loaded") + } + }) + }) + if err != nil { + a.endRefresh(token) + a.QueueUpdateDraw(func() { + a.header.ShowError(fmt.Sprintf("Refresh failed: %v", err)) + a.footer.SetLoading(false) + }) + return + } + + // We have basic cluster data — show it immediately + a.preserveClusterVersionIfMissing(cluster) + a.applyInitialClusterUpdate(cluster) + + // Start node enrichment in background (Version, disks, updates). + // Selections are captured inside doEnrichNodes' QueueUpdateDraw on the UI goroutine. + // Fast refresh path: VM enrichment callback will show the final message, not this. + a.doEnrichNodes(cluster, refreshClient, token, false) + }() +} + func (a *App) preserveClusterVersionIfMissing(cluster *api.Cluster) { if cluster == nil || cluster.Version != "" { return @@ -174,12 +345,27 @@ func (a *App) applyInitialClusterUpdate(cluster *api.Cluster) { }) } -// enrichNodesSequentially enriches node data in parallel and finalizes the refresh -func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool, selectedNodeName string, hasSelectedVM bool, selectedVMID int, selectedVMNode string, searchWasActive bool) { +// doEnrichNodes enriches node data in parallel and finalizes the refresh. +// Selections are always captured inside the final QueueUpdateDraw callback (on the UI +// goroutine), ensuring race-free reads from tview widgets. +// The refreshClient parameter is the client used for API calls during enrichment. +// The token parameter is the generation token acquired at refresh start; if a newer +// refresh has started by the time enrichment completes, the callback is a no-op. +// When showFinalMessage is true, a "Data refreshed successfully" message is shown after +// enrichment completes. Set to false for the doFastRefresh path where the VM enrichment +// callback will show the final status message. +func (a *App) doEnrichNodes(cluster *api.Cluster, refreshClient *api.Client, token uint64, showFinalMessage bool) { go func() { var wg sync.WaitGroup - // Enrich nodes in parallel + // Start loading tasks in parallel with node enrichment (independent operation) + a.loadTasksData() + + // Accumulate enriched nodes into a local slice, then swap into GlobalState + // inside QueueUpdateDraw to avoid data races with the UI goroutine. + enrichedNodes := make([]*api.Node, len(cluster.Nodes)) + copy(enrichedNodes, cluster.Nodes) + for i, node := range cluster.Nodes { if node == nil { continue @@ -189,42 +375,47 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool go func(idx int, n *api.Node) { defer wg.Done() - freshNode, err := a.client.RefreshNodeData(n.Name) + freshNode, err := refreshClient.RefreshNodeData(n.Name) if err == nil && freshNode != nil { - // Preserve VMs from the FRESH cluster data (not the original stale data) - // This ensures we keep the updated VM names we just fetched + // Preserve VMs from the FRESH cluster data if cluster.Nodes[idx] != nil { freshNode.VMs = cluster.Nodes[idx].VMs } - - // Update only the specific node index in global state - // Safe to access specific index concurrently - if idx < len(models.GlobalState.OriginalNodes) { - models.GlobalState.OriginalNodes[idx] = freshNode - } + enrichedNodes[idx] = freshNode } }(i, node) } wg.Wait() - // Final update: rebuild VMs, cluster version, status, and complete refresh + // Final update on the UI goroutine a.QueueUpdateDraw(func() { + // Stale guard: if a newer refresh has started (e.g. profile switch), discard these results. + // endRefresh is a no-op here since the token no longer matches. + if a.refreshGen.Load() != token { + return + } + + // Capture selections on the UI goroutine (race-free tview access). + snap := a.captureSelections() + + // Apply enriched nodes to global state + models.GlobalState.OriginalNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.OriginalNodes, enrichedNodes) + // Re-apply node filters with enriched data nodeState := models.GlobalState.GetSearchState(api.PageNodes) if nodeState != nil && nodeState.Filter != "" { models.FilterNodes(nodeState.Filter) } else { - // Update filtered nodes from original (copy) - // We need to re-copy because OriginalNodes was updated in place - models.GlobalState.FilteredNodes = make([]*api.Node, len(models.GlobalState.OriginalNodes)) - copy(models.GlobalState.FilteredNodes, models.GlobalState.OriginalNodes) + models.GlobalState.FilteredNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.FilteredNodes, enrichedNodes) } a.nodeList.SetNodes(models.GlobalState.FilteredNodes) // Rebuild VM list from enriched nodes var vms []*api.VM - for _, n := range models.GlobalState.OriginalNodes { + for _, n := range enrichedNodes { if n != nil { for _, vm := range n.VMs { if vm != nil { @@ -234,11 +425,9 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool } } - // Update global VM state with enriched data models.GlobalState.OriginalVMs = make([]*api.VM, len(vms)) copy(models.GlobalState.OriginalVMs, vms) - // Apply VM filter if active vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { models.FilterVMs(vmSearchState.Filter) @@ -250,7 +439,7 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool } // Update cluster version from enriched nodes - for _, n := range models.GlobalState.OriginalNodes { + for _, n := range enrichedNodes { if n != nil && n.Version != "" { cluster.Version = fmt.Sprintf("Proxmox VE %s", n.Version) break @@ -258,34 +447,50 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool } a.clusterStatus.Update(cluster) - // Final selection restore and search UI restoration + // Restore selection (use vmSearchState already fetched above) nodeSearchState := models.GlobalState.GetSearchState(api.PageNodes) + a.restoreSelection(snap.hasSelectedVM, snap.selectedVMID, snap.selectedVMNode, vmSearchState, + snap.hasSelectedNode, snap.selectedNodeName, nodeSearchState) - a.restoreSelection(hasSelectedVM, selectedVMID, selectedVMNode, vmSearchState, - hasSelectedNode, selectedNodeName, nodeSearchState) - - // Update details if items are selected + // Update details for whatever is now selected if node := a.nodeList.GetSelectedNode(); node != nil { a.nodeDetails.Update(node, models.GlobalState.OriginalNodes) } - if vm := a.vmList.GetSelectedVM(); vm != nil { a.vmDetails.Update(vm) } - a.restoreSearchUI(searchWasActive, nodeSearchState, vmSearchState) - a.header.ShowSuccess("Data refreshed successfully") + a.restoreSearchUI(snap.searchWasActive, nodeSearchState, vmSearchState) + if showFinalMessage { + // Manual refresh path: no VM enrichment callback follows, so show success here. + a.header.ShowSuccess("Data refreshed successfully") + } + // Fast refresh path: the VM enrichment callback (in doFastRefresh) will show + // the final status message after guest agent data is loaded. a.footer.SetLoading(false) - a.loadTasksData() + a.endRefresh(token) }) }() } -// enrichGroupNodesSequentially enriches group node data in parallel and finalizes the refresh -func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bool, selectedNodeName string, hasSelectedVM bool, selectedVMID int, selectedVMNode string, searchWasActive bool, isInitialLoad bool) { +// enrichGroupNodesParallel enriches group node data in parallel and finalizes the refresh. +// token is the generation token that must be passed to endRefresh when enrichment completes. +func (a *App) enrichGroupNodesParallel(token uint64, nodes []*api.Node, hasSelectedNode bool, selectedNodeName string, hasSelectedVM bool, selectedVMID int, selectedVMNode string, searchWasActive bool, isInitialLoad bool) { go func() { var wg sync.WaitGroup + // Start loading tasks in parallel with node enrichment (independent operation) + a.loadTasksData() + + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() + + // Accumulate enriched nodes into a local slice to avoid data races. + // Concurrent goroutines write to their own index; the slice is swapped into + // GlobalState inside QueueUpdateDraw after all goroutines complete. + enrichedNodes := make([]*api.Node, len(nodes)) + copy(enrichedNodes, nodes) + // Create a context for the enrichment process ctx := context.Background() @@ -299,8 +504,8 @@ func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bo go func(idx int, n *api.Node) { defer wg.Done() - // We need to fetch the node status from the specific profile - freshNode, err := a.groupManager.GetNodeFromGroup(ctx, n.SourceProfile, n.Name) + // Fetch the node status from the specific profile's client + freshNode, err := conn.groupManager.GetNodeFromGroup(ctx, n.SourceProfile, n.Name) if err == nil && freshNode != nil { // Ensure Online status is set to true if we got a response @@ -326,30 +531,32 @@ func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bo // Ensure SourceProfile is preserved freshNode.SourceProfile = n.SourceProfile - // Update GlobalState - if idx < len(models.GlobalState.OriginalNodes) { - models.GlobalState.OriginalNodes[idx] = freshNode - } + // Write to local slice only — no GlobalState mutation from goroutines. + enrichedNodes[idx] = freshNode } }(i, node) } wg.Wait() - // Final update + // Final update — swap enriched nodes into GlobalState on the UI goroutine. a.QueueUpdateDraw(func() { + // Apply enriched nodes to global state (race-free: single UI goroutine). + models.GlobalState.OriginalNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.OriginalNodes, enrichedNodes) + // Re-apply node filters nodeState := models.GlobalState.GetSearchState(api.PageNodes) if nodeState != nil && nodeState.Filter != "" { models.FilterNodes(nodeState.Filter) } else { - models.GlobalState.FilteredNodes = make([]*api.Node, len(models.GlobalState.OriginalNodes)) - copy(models.GlobalState.FilteredNodes, models.GlobalState.OriginalNodes) + models.GlobalState.FilteredNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.FilteredNodes, enrichedNodes) } a.nodeList.SetNodes(models.GlobalState.FilteredNodes) // Update cluster status with the enriched nodes - syntheticCluster := a.createSyntheticGroup(models.GlobalState.OriginalNodes) + syntheticCluster := a.createSyntheticGroup(enrichedNodes) a.clusterStatus.Update(syntheticCluster) // Final selection restore and search UI restoration @@ -381,7 +588,7 @@ func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bo a.header.ShowSuccess("Data refreshed successfully") } a.footer.SetLoading(false) - a.loadTasksData() + a.endRefresh(token) }) }() } @@ -471,16 +678,19 @@ func (a *App) refreshNodeData(node *api.Node) { // loadTasksData loads and updates task data with proper filtering. func (a *App) loadTasksData() { go func() { + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() + var tasks []*api.ClusterTask var err error - if a.isGroupMode { + if conn.isGroupMode { // Create context with timeout for group operations ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - tasks, err = a.groupManager.GetGroupTasks(ctx) + tasks, err = conn.groupManager.GetGroupTasks(ctx) } else { - tasks, err = a.client.GetClusterTasks() + tasks, err = conn.client.GetClusterTasks() } if err == nil { diff --git a/internal/ui/components/refresh_vm.go b/internal/ui/components/refresh_vm.go index ed30da47..0410fad2 100644 --- a/internal/ui/components/refresh_vm.go +++ b/internal/ui/components/refresh_vm.go @@ -9,128 +9,32 @@ import ( // refreshVMData refreshes data for the selected VM. func (a *App) refreshVMData(vm *api.VM) { - // * Check if VM has pending operations - if isPending, pendingOperation := models.GlobalState.IsVMPending(vm); isPending { - a.showMessageSafe(fmt.Sprintf("Cannot refresh VM while '%s' is in progress", pendingOperation)) - return - } - - // Show loading indicator - a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s", vm.Name)) - - // Store VM identity for selection restoration - vmID := vm.ID - vmNode := vm.Node - - // Run refresh in goroutine to avoid blocking UI - go func() { - // Get the correct client for this VM (important for group mode) - client, err := a.getClientForVM(vm) - if err != nil { - a.QueueUpdateDraw(func() { - a.header.ShowError(fmt.Sprintf("Error refreshing VM: %v", err)) - }) - return - } - - // Fetch fresh VM data with callback for when enrichment completes - freshVM, err := client.RefreshVMData(vm, func(enrichedVM *api.VM) { - // This callback is called after guest agent data has been loaded - a.QueueUpdateDraw(func() { - // Update VM details if this VM is currently selected - if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil && selectedVM.ID == enrichedVM.ID && selectedVM.Node == enrichedVM.Node { - a.vmDetails.Update(enrichedVM) - } - }) - }) - if err != nil { - // If VM refresh fails (e.g., VM was migrated to a different node), - // fall back to a full refresh to find the VM on its new node - a.QueueUpdateDraw(func() { - a.header.ShowLoading("VM may have been migrated, performing full refresh") - }) - a.QueueUpdateDraw(func() { - a.manualRefresh() // This will find the VM on its new node - }) - - return - } - - // Update UI with fresh data on main thread - a.QueueUpdateDraw(func() { - // Get current search state - vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) - - // Find the VM in the global state and update it - for i, originalVM := range models.GlobalState.OriginalVMs { - if originalVM != nil && originalVM.ID == vmID && originalVM.Node == vmNode { - // Preserve SourceProfile from original VM (important for group mode) - if originalVM.SourceProfile != "" { - freshVM.SourceProfile = originalVM.SourceProfile - } - models.GlobalState.OriginalVMs[i] = freshVM - - break - } - } - - // Update filtered VMs if they exist - for i, filteredVM := range models.GlobalState.FilteredVMs { - if filteredVM != nil && filteredVM.ID == vmID && filteredVM.Node == vmNode { - // Preserve SourceProfile from filtered VM (important for group mode) - if filteredVM.SourceProfile != "" { - freshVM.SourceProfile = filteredVM.SourceProfile - } - models.GlobalState.FilteredVMs[i] = freshVM - - break - } - } - - // Also update the VM in the node's VM list - for _, node := range models.GlobalState.OriginalNodes { - if node != nil && node.Name == vmNode { - for i, nodeVM := range node.VMs { - if nodeVM != nil && nodeVM.ID == vmID { - node.VMs[i] = freshVM - - break - } - } - - break - } - } - - // Reapply filter if one is active - if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { - models.FilterVMs(vmSearchState.Filter) - } - - // Update the VM list display - a.vmList.SetVMs(models.GlobalState.FilteredVMs) - - // Preserve whichever VM the user is currently focused on (selection is restored by SetVMs). - if vmSearchState != nil { - vmSearchState.SelectedIndex = a.vmList.GetCurrentItem() - } - - // Update VM details for the currently selected VM. - if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { - a.vmDetails.Update(selectedVM) - } - - // Show success message - a.header.ShowSuccess(fmt.Sprintf("VM %s refreshed successfully", vm.Name)) - }) - }() + a.doRefreshVMData(vm, false) } // refreshVMDataAndTasks refreshes both VM data and tasks list. // This is useful for operations that affect both VM state and create tasks (like volume resize and snapshot rollback). func (a *App) refreshVMDataAndTasks(vm *api.VM) { + a.doRefreshVMData(vm, true) +} + +// doRefreshVMData is the shared implementation for VM data refresh. +// When withTasks is true, it also refreshes the tasks list after the VM data is updated. +func (a *App) doRefreshVMData(vm *api.VM, withTasks bool) { + // Check if VM has pending operations (skip for withTasks since the caller just queued the task) + if !withTasks { + if isPending, pendingOperation := models.GlobalState.IsVMPending(vm); isPending { + a.showMessageSafe(fmt.Sprintf("Cannot refresh VM while '%s' is in progress", pendingOperation)) + return + } + } + // Show loading indicator - a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s and tasks", vm.Name)) + if withTasks { + a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s and tasks", vm.Name)) + } else { + a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s", vm.Name)) + } // Store VM identity for selection restoration vmID := vm.ID @@ -159,13 +63,13 @@ func (a *App) refreshVMDataAndTasks(vm *api.VM) { }) if err != nil { // If VM refresh fails (e.g., VM was migrated to a different node), - // fall back to a full refresh to find the VM on its new node + // fall back to a full refresh to find the VM on its new node. + // Note: manualRefresh must NOT be called inside QueueUpdateDraw to avoid + // nested QueueUpdateDraw deadlocks (manualRefresh queues its own updates). a.QueueUpdateDraw(func() { a.header.ShowLoading("VM may have been migrated, performing full refresh") }) - a.QueueUpdateDraw(func() { - a.manualRefresh() // This will find the VM on its new node - }) + a.manualRefresh() return } @@ -234,11 +138,17 @@ func (a *App) refreshVMDataAndTasks(vm *api.VM) { a.vmDetails.Update(selectedVM) } - // Also refresh tasks to show any new tasks created by the operation - a.loadTasksData() + // Refresh tasks if requested (for operations that create tasks) + if withTasks { + a.loadTasksData() + } // Show success message - a.header.ShowSuccess(fmt.Sprintf("VM %s and tasks refreshed successfully", vm.Name)) + if withTasks { + a.header.ShowSuccess(fmt.Sprintf("VM %s and tasks refreshed successfully", vm.Name)) + } else { + a.header.ShowSuccess(fmt.Sprintf("VM %s refreshed successfully", vm.Name)) + } }) }() } diff --git a/internal/ui/components/vm_migration.go b/internal/ui/components/vm_migration.go index c62cf75e..17ad9cbd 100644 --- a/internal/ui/components/vm_migration.go +++ b/internal/ui/components/vm_migration.go @@ -186,13 +186,8 @@ func (a *App) performMigrationOperation(vm *api.VM, options *api.MigrationOption } else { a.header.ShowSuccess(fmt.Sprintf("Migration of %s to %s completed successfully", vm.Name, options.Target)) - // Clear API cache for the specific client - client, _ := a.getClientForVM(vm) - if client != nil { - client.ClearAPICache() - } - - // Full refresh needed as node changed + // manualRefresh handles selective cache invalidation; + // no need for a redundant full cache wipe here. a.manualRefresh() } }) diff --git a/internal/ui/components/vm_operations.go b/internal/ui/components/vm_operations.go index 3377c1d8..3bf7df2b 100644 --- a/internal/ui/components/vm_operations.go +++ b/internal/ui/components/vm_operations.go @@ -76,13 +76,8 @@ func (a *App) performVMDeleteOperation(vm *api.VM, forced bool) { } else { a.header.ShowSuccess(fmt.Sprintf("Successfully deleted %s", vm.Name)) - // Clear API cache for the specific client - client, _ := a.getClientForVM(vm) - if client != nil { - client.ClearAPICache() - } - - // Refresh to update the UI (VM should be gone) + // manualRefresh handles selective cache invalidation; + // no need for a redundant full cache wipe here. a.manualRefresh() } }) diff --git a/pkg/api/client.go b/pkg/api/client.go index 5599a43f..a2b67250 100644 --- a/pkg/api/client.go +++ b/pkg/api/client.go @@ -7,6 +7,7 @@ import ( "net/http" "net/url" "strings" + "sync" "time" "github.com/devnullvoid/pvetui/pkg/api/interfaces" @@ -276,6 +277,49 @@ func (c *Client) ClearAPICache() { } } +// ClearClusterCache selectively invalidates cluster-level cache entries +// instead of wiping the entire cache. This preserves node-level cached +// data (disks, updates) that rarely changes, avoiding unnecessary API +// round-trips during refresh. +func (c *Client) ClearClusterCache() { + clusterPaths := []string{ + "/cluster/status", + "/cluster/resources", + "/cluster/tasks", + } + + for _, path := range clusterPaths { + cacheKey := fmt.Sprintf("proxmox_api_%s_%s", c.baseURL, path) + cacheKey = strings.ReplaceAll(cacheKey, "/", "_") + + _ = c.cache.Delete(cacheKey) + } + + c.logger.Debug("Cluster-level cache entries cleared selectively") +} + +// ClearNodeCache selectively invalidates cache entries for a specific node. +// This is more efficient than clearing the entire cache when only one node's +// data needs to be refreshed. +func (c *Client) ClearNodeCache(nodeName string) { + nodePaths := []string{ + fmt.Sprintf("/nodes/%s/status", nodeName), + fmt.Sprintf("/nodes/%s/version", nodeName), + fmt.Sprintf("/nodes/%s/config", nodeName), + fmt.Sprintf("/nodes/%s/disks/list", nodeName), + fmt.Sprintf("/nodes/%s/apt/update", nodeName), + } + + for _, path := range nodePaths { + cacheKey := fmt.Sprintf("proxmox_api_%s_%s", c.baseURL, path) + cacheKey = strings.ReplaceAll(cacheKey, "/", "_") + + _ = c.cache.Delete(cacheKey) + } + + c.logger.Debug("Cache entries cleared for node %s", nodeName) +} + // GetCache returns the cache instance used by this client. // This is useful for sharing cache instances across multiple clients in group mode. func (c *Client) GetCache() interfaces.Cache { @@ -293,9 +337,107 @@ func (c *Client) BaseHostname() string { } // GetFreshClusterStatus retrieves cluster status bypassing cache completely. +// This includes VM enrichment with guest agent data for full accuracy. func (c *Client) GetFreshClusterStatus() (*Cluster, error) { - // Clear the cache first to ensure fresh data - c.ClearAPICache() + return c.getFreshClusterStatusInternal(true) +} + +// GetLightClusterStatus retrieves fresh cluster status without VM enrichment. +// This is faster than GetFreshClusterStatus and suitable for periodic auto-refresh +// where guest agent data (IPs, filesystems) doesn't need to be refreshed every cycle. +func (c *Client) GetLightClusterStatus() (*Cluster, error) { + return c.getFreshClusterStatusInternal(false) +} + +// GetFastFreshClusterStatus retrieves fresh cluster status (cache-busting) and returns +// basic data immediately. VM enrichment runs in the background and the provided callback +// is called when enrichment completes (with any error that occurred). +// This gives the UI instant node/VM names while detailed guest agent data loads asynchronously. +func (c *Client) GetFastFreshClusterStatus(onEnrichmentComplete func(error)) (*Cluster, error) { + // Clear cluster-level cache so we get truly fresh data + c.ClearClusterCache() + + cluster := &Cluster{ + Nodes: make([]*Node, 0), + StorageManager: NewStorageManager(), + lastUpdate: time.Now(), + } + + // 1. Get basic cluster status (node names, online/offline) + if err := c.getClusterBasicStatus(cluster); err != nil { + return nil, err + } + + // 2. Get cluster resources fresh (TTL=0) — gives us node stats + VM names/status + if err := c.processClusterResourcesWithCache(cluster, 0); err != nil { + return nil, err + } + + // 3. Calculate cluster-wide totals from the basic data + c.calculateClusterTotals(cluster) + + c.Cluster = cluster + + // 4. Run VM enrichment in background — guest agent IPs, filesystems, configs + go func() { + c.logger.Debug("[FAST-FRESH] Starting background VM enrichment for %d nodes", len(cluster.Nodes)) + + // Reset guestAgentChecked before enrichment (same as startup path) + for _, node := range cluster.Nodes { + if node.Online && node.VMs != nil { + for _, vm := range node.VMs { + vm.guestAgentChecked = false + } + } + } + + var enrichErr error + if err := c.EnrichVMs(cluster); err != nil { + c.logger.Debug("[FAST-FRESH] Error enriching VM data: %v", err) + enrichErr = err + } + + // Retry QEMU VMs with missing guest agent data (same as startup path) + time.Sleep(3 * time.Second) + for _, node := range cluster.Nodes { + if !node.Online || node.VMs == nil { + continue + } + for _, vm := range node.VMs { + if vm.Status == VMStatusRunning && vm.Type == VMTypeQemu && vm.AgentEnabled && (!vm.AgentRunning || len(vm.NetInterfaces) == 0) { + if err := c.GetVmStatus(vm); err != nil { + if strings.Contains(err.Error(), "guest agent is not running") { + // Expected: agent not yet ready; not a reportable error. + continue + } + // Unexpected retry error: aggregate into enrichErr so the caller + // is aware that some VMs may have incomplete guest agent data. + c.logger.Debug("[FAST-FRESH] Retry error for VM %d on %s: %v", vm.ID, node.Name, err) + if enrichErr == nil { + enrichErr = fmt.Errorf("guest agent retry: VM %d (%s): %w", vm.ID, node.Name, err) + } else { + enrichErr = fmt.Errorf("%w; VM %d (%s): %v", enrichErr, vm.ID, node.Name, err) + } + } + } + } + } + + if onEnrichmentComplete != nil { + onEnrichmentComplete(enrichErr) + } + }() + + return cluster, nil +} + +// getFreshClusterStatusInternal is the shared implementation for cluster status retrieval. +// When enrichVMs is true, running VMs are enriched with guest agent data (slower but more complete). +func (c *Client) getFreshClusterStatusInternal(enrichVMs bool) (*Cluster, error) { + // Selectively clear cluster-level cache entries rather than the entire cache. + // Node-level data (disks, updates) is invalidated per-node during enrichment, + // so clearing everything here wastes cached data that is still valid. + c.ClearClusterCache() // Create a fresh cluster with minimal cache TTL for resources cluster := &Cluster{ @@ -314,10 +456,12 @@ func (c *Client) GetFreshClusterStatus() (*Cluster, error) { return nil, err } - // 3. Enrich VMs with detailed status information - if err := c.EnrichVMs(cluster); err != nil { - // Log error but continue - c.logger.Debug("[CLUSTER] Error enriching VM data: %v", err) + // 3. Enrich VMs with detailed status information (skipped for light refresh) + if enrichVMs { + if err := c.EnrichVMs(cluster); err != nil { + // Log error but continue + c.logger.Debug("[CLUSTER] Error enriching VM data: %v", err) + } } // 4. Calculate cluster-wide totals @@ -365,15 +509,49 @@ func (c *Client) RefreshNodeData(nodeName string) (*Node, error) { } } - // Fetch fresh node data - freshNode, err := c.GetNodeStatus(nodeName) - if err != nil { + // Fetch node status, disks, and updates concurrently. + // Disks and updates only need the node name and are independent of each other. + var ( + freshNode *Node + statusErr error + disks []NodeDisk + updates []NodeUpdate + wg sync.WaitGroup + ) + + wg.Add(3) //nolint:mnd // status + disks + updates + + go func() { + defer wg.Done() + + freshNode, statusErr = c.GetNodeStatus(nodeName) + }() + + go func() { + defer wg.Done() + + if d, err := c.GetNodeDisks(nodeName); err == nil { + disks = d + } + }() + + go func() { + defer wg.Done() + + if u, err := c.GetNodeUpdates(nodeName); err == nil { + updates = u + } + }() + + wg.Wait() + + if statusErr != nil { // If we can't reach the node, it's likely offline if originalNode != nil { originalNode.Online = false } - return nil, fmt.Errorf("failed to refresh node %s: %w", nodeName, err) + return nil, fmt.Errorf("failed to refresh node %s: %w", nodeName, statusErr) } // If we successfully got node status, the node is online @@ -392,15 +570,9 @@ func (c *Client) RefreshNodeData(nodeName string) (*Node, error) { } } - // Fetch Disks - if disks, err := c.GetNodeDisks(nodeName); err == nil { - freshNode.Disks = disks - } - - // Fetch Updates - if updates, err := c.GetNodeUpdates(nodeName); err == nil { - freshNode.Updates = updates - } + // Apply concurrently-fetched disks and updates + freshNode.Disks = disks + freshNode.Updates = updates return freshNode, nil } @@ -594,14 +766,47 @@ func (c *Client) enrichNodeMissingDetails(node *Node) error { return nil } - fullStatus, err := c.GetNodeStatus(node.Name) - if err != nil { + // Fetch status, disks, and updates concurrently. + // All three only require the node name and are independent of each other. + var ( + fullStatus *Node + statusErr error + disks []NodeDisk + disksErr error + updates []NodeUpdate + updatesErr error + wg sync.WaitGroup + ) + + wg.Add(3) //nolint:mnd // status + disks + updates + + go func() { + defer wg.Done() + + fullStatus, statusErr = c.GetNodeStatus(node.Name) + }() + + go func() { + defer wg.Done() + + disks, disksErr = c.GetNodeDisks(node.Name) + }() + + go func() { + defer wg.Done() + + updates, updatesErr = c.GetNodeUpdates(node.Name) + }() + + wg.Wait() + + if statusErr != nil { // Mark node as offline if we can't reach it node.Online = false - c.logger.Debug("[CLUSTER] Node %s appears to be offline or unreachable for detail enrichment: %v", node.Name, err) + c.logger.Debug("[CLUSTER] Node %s appears to be offline or unreachable for detail enrichment: %v", node.Name, statusErr) // Return error for logging but don't make it critical - return fmt.Errorf("node %s offline/unreachable for details: %w", node.Name, err) + return fmt.Errorf("node %s offline/unreachable for details: %w", node.Name, statusErr) } // Only update fields not available in cluster resources @@ -611,18 +816,16 @@ func (c *Client) enrichNodeMissingDetails(node *Node) error { node.LoadAvg = fullStatus.LoadAvg node.lastMetricsUpdate = time.Now() - // Fetch Disks - if disks, err := c.GetNodeDisks(node.Name); err == nil { + if disksErr == nil { node.Disks = disks } else { - c.logger.Debug("Failed to fetch disks for node %s: %v", node.Name, err) + c.logger.Debug("Failed to fetch disks for node %s: %v", node.Name, disksErr) } - // Fetch Updates - if updates, err := c.GetNodeUpdates(node.Name); err == nil { + if updatesErr == nil { node.Updates = updates } else { - c.logger.Debug("Failed to fetch updates for node %s: %v", node.Name, err) + c.logger.Debug("Failed to fetch updates for node %s: %v", node.Name, updatesErr) } c.logger.Debug("[CLUSTER] Successfully enriched missing details for node: %s", node.Name) diff --git a/pkg/api/grouped-cluster.go b/pkg/api/grouped-cluster.go index 11a6d537..ce0f50f3 100644 --- a/pkg/api/grouped-cluster.go +++ b/pkg/api/grouped-cluster.go @@ -284,15 +284,16 @@ func (m *GroupClientManager) GetGroupClusterResources(ctx context.Context, fresh // Fetch nodes go func() { - nodes, err := m.GetGroupNodes(ctx) if fresh { - // Invalidate node status caches for each profile before returning + // Selectively invalidate cluster-level cache keys per profile + // instead of wiping the entire cache (preserves node disks, updates, etc.) for _, pc := range m.GetConnectedClients() { - if pc.Client != nil && pc.Client.cache != nil { - _ = pc.Client.cache.Clear() + if pc.Client != nil { + pc.Client.ClearClusterCache() } } } + nodes, err := m.GetGroupNodes(ctx) nodesChan <- result{nodes: nodes, err: err} }() @@ -326,10 +327,10 @@ func (m *GroupClientManager) GetNodeFromGroup( nodeName string, ) (*Node, error) { operation := func(pName string, client *Client) (interface{}, error) { - // Clear per-node caches to ensure fresh status - if client.cache != nil { - _ = client.cache.Clear() - } + // Selectively invalidate this node's cache keys instead of wiping the entire cache. + // This preserves disk/update/VM caches for other nodes in the same profile. + client.ClearNodeCache(nodeName) + node, err := client.GetNodeStatus(nodeName) if err != nil { return nil, fmt.Errorf("failed to get node status: %w", err) diff --git a/pkg/api/vm.go b/pkg/api/vm.go index 5d5335d0..6f31aa40 100644 --- a/pkg/api/vm.go +++ b/pkg/api/vm.go @@ -3,6 +3,7 @@ package api import ( "fmt" "strings" + "sync" ) // GetVmStatus retrieves current status metrics for a VM or LXC. @@ -303,12 +304,53 @@ func (c *Client) GetDetailedVmInfo(node, vmType string, vmid int) (*VM, error) { Type: vmType, } - // Get status information (cached) - var statusRes map[string]interface{} + // Fetch status and config concurrently -- they are independent API calls. + var ( + statusRes map[string]interface{} + configRes map[string]interface{} + statusErr error + configErr error + wg sync.WaitGroup + ) - statusEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/status/current", node, vmType, vmid) - if err := c.GetWithCache(statusEndpoint, &statusRes, VMDataTTL); err != nil { - return nil, fmt.Errorf("failed to get VM status: %w", err) + wg.Add(2) //nolint:mnd // status + config + + go func() { + defer wg.Done() + + var res map[string]interface{} + + statusEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/status/current", node, vmType, vmid) + if err := c.GetWithCache(statusEndpoint, &res, VMDataTTL); err != nil { + statusErr = fmt.Errorf("failed to get VM status: %w", err) + return + } + + statusRes = res + }() + + go func() { + defer wg.Done() + + var res map[string]interface{} + + configEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/config", node, vmType, vmid) + if err := c.GetWithCache(configEndpoint, &res, VMDataTTL); err != nil { + configErr = fmt.Errorf("failed to get VM config: %w", err) + return + } + + configRes = res + }() + + wg.Wait() + + if statusErr != nil { + return nil, statusErr + } + + if configErr != nil { + return nil, configErr } statusDataRaw := statusRes["data"] @@ -387,14 +429,6 @@ func (c *Client) GetDetailedVmInfo(node, vmType string, vmid int) (*VM, error) { vm.Pool = pool } - // Get config information (cached) - var configRes map[string]interface{} - - configEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/config", node, vmType, vmid) - if err := c.GetWithCache(configEndpoint, &configRes, VMDataTTL); err != nil { - return nil, fmt.Errorf("failed to get VM config: %w", err) - } - configData, ok := configRes["data"].(map[string]interface{}) if !ok { return nil, fmt.Errorf("invalid VM config response format") From f7f4a290b73653e16501f023535e28ed3c7ce60f Mon Sep 17 00:00:00 2001 From: Pavel Aksenov <41126916+al-bashkir@users.noreply.github.com> Date: Fri, 6 Mar 2026 13:40:41 +0300 Subject: [PATCH 3/3] fix: preserve VM enrichment data and unblock group auto-refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-refresh called GetLightClusterStatus() / GetGroupClusterResources() which return VMs with basic metrics only. The resulting VM objects were written directly into GlobalState, silently zeroing guest agent data (IPs, network interfaces, filesystems) and config details (boot order, CPU cores, storage devices, OS type, description) on every cycle. Add preserveVMEnrichmentData() helper that builds an O(N) lookup map from existing GlobalState VMs and carries all enrichment fields forward onto freshly-fetched VMs. Applied in both single-profile and group mode paths, matching the node-detail preservation pattern already in place. Group mode auto-refresh called enrichGroupNodesParallel(), which makes individual API calls to every node (version, disks, load avg, etc.) and takes 10-20+ seconds on multi-node setups. The refresh guard was held for the full duration, so the countdown timer always saw isRefreshActive == true at expiry, reset to 10, and skipped — then on the next expiry triggered another full enrichment. Result: perpetual spinner, countdown never completed, auto-refresh effectively broken. Remove enrichGroupNodesParallel from the auto-refresh path. Auto-refresh is now lightweight in both modes: metrics update, enriched data is preserved, UI lists and details update immediately, and the guard is released synchronously inside QueueUpdateDraw. Full node enrichment continues to run on manual refresh (R key) and profile switches only. --- .../ui/components/auto_refresh_execution.go | 124 +++++++++++++++++- 1 file changed, 118 insertions(+), 6 deletions(-) diff --git a/internal/ui/components/auto_refresh_execution.go b/internal/ui/components/auto_refresh_execution.go index 593237d0..8ca07fba 100644 --- a/internal/ui/components/auto_refresh_execution.go +++ b/internal/ui/components/auto_refresh_execution.go @@ -9,6 +9,67 @@ import ( "github.com/devnullvoid/pvetui/pkg/api" ) +// preserveVMEnrichmentData carries forward guest agent and config data from the last +// full refresh onto a freshly-fetched VM that only has basic metrics. +// existingVMs is keyed by "ID|Node" (single-profile) or "ID|Node|SourceProfile" (group mode). +func preserveVMEnrichmentData(fresh *api.VM, existingVMs map[string]*api.VM) { + if fresh == nil { + return + } + // Try group mode key first (includes SourceProfile), fall back to single-profile key. + key := fmt.Sprintf("%d|%s|%s", fresh.ID, fresh.Node, fresh.SourceProfile) + existing, ok := existingVMs[key] + if !ok { + key = fmt.Sprintf("%d|%s", fresh.ID, fresh.Node) + existing, ok = existingVMs[key] + } + if !ok { + return + } + + // Preserve guest agent data (only available from full enrichment) + if fresh.IP == "" { + fresh.IP = existing.IP + } + fresh.AgentEnabled = existing.AgentEnabled + fresh.AgentRunning = existing.AgentRunning + if len(fresh.NetInterfaces) == 0 { + fresh.NetInterfaces = existing.NetInterfaces + } + if len(fresh.Filesystems) == 0 { + fresh.Filesystems = existing.Filesystems + } + fresh.ConfiguredMACs = existing.ConfiguredMACs + + // Preserve config details (from config endpoint enrichment) + if len(fresh.ConfiguredNetworks) == 0 { + fresh.ConfiguredNetworks = existing.ConfiguredNetworks + } + if len(fresh.StorageDevices) == 0 { + fresh.StorageDevices = existing.StorageDevices + } + if fresh.BootOrder == "" { + fresh.BootOrder = existing.BootOrder + } + if fresh.CPUCores == 0 { + fresh.CPUCores = existing.CPUCores + } + if fresh.CPUSockets == 0 { + fresh.CPUSockets = existing.CPUSockets + } + if fresh.Architecture == "" { + fresh.Architecture = existing.Architecture + } + if fresh.OSType == "" { + fresh.OSType = existing.OSType + } + if fresh.Description == "" { + fresh.Description = existing.Description + } + fresh.OnBoot = existing.OnBoot + fresh.Enriched = existing.Enriched +} + // autoRefreshDataWithFooter sets loading state, captures UI selections on the UI goroutine, // and starts the data fetch in a new goroutine. // Selection state must be read here (on the UI goroutine) before the goroutine is spawned. @@ -97,6 +158,20 @@ func (a *App) autoRefreshData(snap selSnap) { } } + // Preserve VM guest agent / config data from last full refresh. + existingVMMap := make(map[string]*api.VM, len(models.GlobalState.OriginalVMs)) + for _, vm := range models.GlobalState.OriginalVMs { + if vm != nil { + key := fmt.Sprintf("%d|%s|%s", vm.ID, vm.Node, vm.SourceProfile) + existingVMMap[key] = vm + } + } + for _, vm := range vms { + if vm != nil { + preserveVMEnrichmentData(vm, existingVMMap) + } + } + // Update global state with fresh data models.GlobalState.OriginalNodes = make([]*api.Node, len(nodes)) models.GlobalState.FilteredNodes = make([]*api.Node, len(nodes)) @@ -108,10 +183,35 @@ func (a *App) autoRefreshData(snap selSnap) { copy(models.GlobalState.OriginalVMs, vms) copy(models.GlobalState.FilteredVMs, vms) + // Apply filters and update UI lists immediately (no background enrichment). + if nodeSearchState != nil && nodeSearchState.Filter != "" { + models.FilterNodes(nodeSearchState.Filter) + a.nodeList.SetNodes(models.GlobalState.FilteredNodes) + } else { + a.nodeList.SetNodes(models.GlobalState.OriginalNodes) + } + + if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { + models.FilterVMs(vmSearchState.Filter) + a.vmList.SetVMs(models.GlobalState.FilteredVMs) + } else { + a.vmList.SetVMs(models.GlobalState.OriginalVMs) + } + a.restoreSelection(hasSelectedVM, selectedVMID, selectedVMNode, vmSearchState, hasSelectedNode, selectedNodeName, nodeSearchState) - // Defer list/detail updates until enrichment completes to reduce flicker. + // Update cluster status with the fresh (metric-only) data + syntheticCluster := a.createSyntheticGroup(nodes) + a.clusterStatus.Update(syntheticCluster) + + // Update details if items are selected + if node := a.nodeList.GetSelectedNode(); node != nil { + a.nodeDetails.Update(node, models.GlobalState.OriginalNodes) + } + if vm := a.vmList.GetSelectedVM(); vm != nil { + a.vmDetails.Update(vm) + } // Refresh tasks if on tasks page currentPage, _ := a.pages.GetFrontPage() @@ -138,13 +238,12 @@ func (a *App) autoRefreshData(snap selSnap) { a.restoreSearchUI(searchWasActive, nodeSearchState, vmSearchState) - // Start background enrichment for detailed node stats - // Pass false for isInitialLoad since this is auto-refresh. - // enrichGroupNodesParallel will call endRefresh(token) when done. - a.enrichGroupNodesParallel(token, nodes, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive, false) + // Auto-refresh is lightweight — no background enrichment, release guard now. + a.header.ShowSuccess("Data refreshed successfully") + a.footer.SetLoading(false) + a.endRefresh(token) a.autoRefreshCountdown = 10 - a.footer.UpdateAutoRefreshCountdown(a.autoRefreshCountdown) }) @@ -212,6 +311,18 @@ func (a *App) autoRefreshData(snap selSnap) { } } + // Build lookup map for O(N) VM enrichment preservation instead of O(N*M). + // Light cluster status returns only basic metrics (CPU, memory, status); + // guest agent data (IPs, filesystems, configs) must be carried forward + // from the last full refresh to avoid "clearing" data on auto-refresh. + existingVMMap := make(map[string]*api.VM, len(models.GlobalState.OriginalVMs)) + for _, vm := range models.GlobalState.OriginalVMs { + if vm != nil { + key := fmt.Sprintf("%d|%s", vm.ID, vm.Node) + existingVMMap[key] = vm + } + } + // Rebuild VM list from fresh cluster data var vms []*api.VM @@ -219,6 +330,7 @@ func (a *App) autoRefreshData(snap selSnap) { if node != nil { for _, vm := range node.VMs { if vm != nil { + preserveVMEnrichmentData(vm, existingVMMap) vms = append(vms, vm) } }