diff --git a/internal/bootstrap/bootstrap.go b/internal/bootstrap/bootstrap.go index 1d89fd13..8c405f01 100644 --- a/internal/bootstrap/bootstrap.go +++ b/internal/bootstrap/bootstrap.go @@ -6,10 +6,12 @@ package bootstrap import ( + "bufio" "flag" "fmt" "os" "sort" + "strconv" "strings" "github.com/devnullvoid/pvetui/internal/app" @@ -236,6 +238,37 @@ func Bootstrap(opts BootstrapOptions) (*BootstrapResult, error) { return nil, fmt.Errorf("profile resolution failed: %w", err) } + // If a profile/group name was resolved but no longer exists (e.g. stale + // default_profile after a rename or delete), warn and fall back to interactive + // selection rather than hard-erroring. + if selectedProfile != "" && len(cfg.Profiles) > 0 { + _, isProfile := cfg.Profiles[selectedProfile] + isGroup := cfg.IsGroup(selectedProfile) + if !isProfile && !isGroup { + fmt.Printf("⚠️ Profile/group '%s' not found — falling back to selection.\n", selectedProfile) + selectedProfile = "" + } + } + + // When no profile is resolved but profiles are configured, either auto-select + // (single profile, no groups) or prompt interactively. + if selectedProfile == "" && len(cfg.Profiles) > 0 { + groups := cfg.GetGroups() + if len(cfg.Profiles) == 1 && len(groups) == 0 { + // Only one profile available — auto-select silently. + for name := range cfg.Profiles { + selectedProfile = name + } + } else { + chosen, err := promptProfileSelection(cfg) + if err != nil { + fmt.Println("🚪 Exiting.") + os.Exit(0) + } + selectedProfile = chosen + } + } + // Determine if selected profile is an aggregate group or a standard profile var initialGroup string var startupProfile string @@ -589,3 +622,89 @@ func valueOrDash(value string) string { } return value } + +// promptProfileSelection interactively asks the user which profile or group to +// connect to when no default is configured. +// +// It returns the selected profile or group name, or an error if the user +// cancels or no valid input is provided. +func promptProfileSelection(cfg *config.Config) (string, error) { + // Collect groups (sorted) + groups := cfg.GetGroups() + groupNames := make([]string, 0, len(groups)) + for name := range groups { + groupNames = append(groupNames, name) + } + sort.Strings(groupNames) + + // Collect individual profile names (sorted) + profileNames := make([]string, 0, len(cfg.Profiles)) + for name := range cfg.Profiles { + profileNames = append(profileNames, name) + } + sort.Strings(profileNames) + + total := len(groupNames) + len(profileNames) + + // Build an ordered list: groups first, then profiles + type entry struct { + name string + isGroup bool + } + entries := make([]entry, 0, total) + for _, g := range groupNames { + entries = append(entries, entry{name: g, isGroup: true}) + } + for _, p := range profileNames { + entries = append(entries, entry{name: p, isGroup: false}) + } + + fmt.Println() + fmt.Println("No default profile configured.") + fmt.Println() + fmt.Println("Available connections:") + fmt.Println() + + idx := 1 + if len(groupNames) > 0 { + fmt.Println(" Groups:") + for _, g := range groupNames { + modeTag := "" + if cfg.IsClusterGroup(g) { + modeTag = " [cluster]" + } + fmt.Printf(" %d. %s (%d profiles)%s\n", idx, g, len(groups[g]), modeTag) + idx++ + } + fmt.Println() + } + + if len(profileNames) > 0 { + fmt.Println(" Profiles:") + for _, p := range profileNames { + addr := cfg.Profiles[p].Addr + user := cfg.Profiles[p].User + fmt.Printf(" %d. %s - %s (%s)\n", idx, p, addr, user) + idx++ + } + fmt.Println() + } + + scanner := bufio.NewScanner(os.Stdin) + for { + fmt.Printf("Select connection [1-%d]: ", total) + if !scanner.Scan() { + // EOF or Ctrl+C + fmt.Println() + return "", fmt.Errorf("selection canceled") + } + line := strings.TrimSpace(scanner.Text()) + n, err := strconv.Atoi(line) + if err != nil || n < 1 || n > total { + fmt.Printf("Invalid selection. Please enter a number between 1 and %d.\n", total) + continue + } + selected := entries[n-1] + return selected.name, nil + } +} diff --git a/internal/profile/profile.go b/internal/profile/profile.go index 6ab4fc3a..06df38c5 100644 --- a/internal/profile/profile.go +++ b/internal/profile/profile.go @@ -29,12 +29,7 @@ func ResolveProfile(flagProfile string, cfg *config.Config) (string, error) { return cfg.DefaultProfile, nil } - // If no explicit profile but profiles exist, use "default" - if len(cfg.Profiles) > 0 { - return "default", nil - } - - // No profile selected (no profiles configured) + // No default set - return empty string to signal interactive selection is needed return "", nil } diff --git a/internal/ui/components/app.go b/internal/ui/components/app.go index 2a556962..9639b15f 100644 --- a/internal/ui/components/app.go +++ b/internal/ui/components/app.go @@ -3,6 +3,8 @@ package components import ( "context" "fmt" + "sync" + "sync/atomic" "time" "github.com/gdamore/tcell/v2" @@ -23,6 +25,11 @@ import ( type App struct { *tview.Application + // connMu protects the connection-state fields below from concurrent access. + // Background goroutines (doManualRefresh, doFastRefresh, autoRefreshData, loadTasksData) + // read these fields; profile-switch goroutines (applyConnectionProfile, switchToClusterGroup) + // write them. Always use snapConn() for reads and connMu.Lock() for writes. + connMu sync.RWMutex client *api.Client groupManager *api.GroupClientManager isGroupMode bool @@ -58,11 +65,18 @@ type App struct { // Auto-refresh functionality autoRefreshEnabled bool - autoRefreshTicker *time.Ticker - autoRefreshStop chan bool + autoRefreshRunning bool autoRefreshCountdown int autoRefreshCountdownStop chan bool + // Refresh deduplication: prevents concurrent overlapping refreshes. + // refreshGen holds the current refresh generation token (0 = no refresh in progress, + // nonzero = a refresh is in progress with that token). Each refresh operation acquires + // a unique token via startRefresh or forceNewRefresh, and releases it via endRefresh. + // Old callbacks that call endRefresh with a stale token are no-ops, preventing the + // guard from being cleared by a refresh that was superseded by a profile switch. + refreshGen atomic.Uint64 + plugins map[string]Plugin pluginRegistry *pluginRegistry pluginCatalog []PluginInfo @@ -79,6 +93,61 @@ func (a *App) removePageIfPresent(name string) { } } +// startRefresh attempts to acquire the refresh guard. +// Returns (token, true) on success; (0, false) if another refresh is already in progress. +// The caller must pass the returned token to endRefresh when the refresh completes. +func (a *App) startRefresh() (uint64, bool) { + token := uint64(time.Now().UnixNano()) | 1 // ensure nonzero + if a.refreshGen.CompareAndSwap(0, token) { + return token, true + } + return 0, false +} + +// forceNewRefresh unconditionally acquires the refresh guard with a new token, +// discarding any in-flight refresh. Old callbacks whose endRefresh calls will be +// no-ops because their token won't match the current value. +// Used by profile switches where we need to guarantee a new refresh starts. +// The caller must pass the returned token to endRefresh (or doFastRefresh/doManualRefresh). +func (a *App) forceNewRefresh() uint64 { + token := uint64(time.Now().UnixNano()) | 1 // ensure nonzero + a.refreshGen.Store(token) + return token +} + +// endRefresh releases the refresh guard only if the token matches the current value. +// If the token doesn't match (stale callback from a superseded refresh), this is a no-op. +func (a *App) endRefresh(token uint64) { + a.refreshGen.CompareAndSwap(token, 0) +} + +// isRefreshActive returns true if a refresh is currently in progress. +func (a *App) isRefreshActive() bool { + return a.refreshGen.Load() != 0 +} + +// connData is a snapshot of connection-state fields captured under connMu. +type connData struct { + isGroupMode bool + isClusterMode bool + client *api.Client + groupManager *api.GroupClientManager +} + +// snapConn returns a consistent, lock-safe snapshot of connection state. +// Callers must use the returned values for subsequent operations instead of +// reading a.client / a.isGroupMode / etc. directly from background goroutines. +func (a *App) snapConn() connData { + a.connMu.RLock() + defer a.connMu.RUnlock() + return connData{ + isGroupMode: a.isGroupMode, + isClusterMode: a.isClusterMode, + client: a.client, + groupManager: a.groupManager, + } +} + // NewApp creates a new application instance with all UI components. func NewApp(ctx context.Context, client *api.Client, cfg *config.Config, configPath string, initialGroup string) *App { uiLogger := models.GetUILogger() @@ -549,12 +618,13 @@ func (a *App) ClearAPICache() { // In group mode, it returns the client for the VM's source profile. // In single-profile mode, it returns the main client. func (a *App) getClientForVM(vm *api.VM) (*api.Client, error) { - if a.isGroupMode { + conn := a.snapConn() + if conn.isGroupMode { if vm.SourceProfile == "" { return nil, fmt.Errorf("source profile not set for VM %s in group mode", vm.Name) } - profileClient, exists := a.groupManager.GetClient(vm.SourceProfile) + profileClient, exists := conn.groupManager.GetClient(vm.SourceProfile) if !exists { return nil, fmt.Errorf("profile '%s' not found in group manager", vm.SourceProfile) } @@ -566,19 +636,20 @@ func (a *App) getClientForVM(vm *api.VM) (*api.Client, error) { return profileClient.Client, nil } - return a.client, nil + return conn.client, nil } // getClientForNode returns the appropriate API client for a Node. // In group mode, it returns the client for the Node's source profile. // In single-profile mode, it returns the main client. func (a *App) getClientForNode(node *api.Node) (*api.Client, error) { - if a.isGroupMode { + conn := a.snapConn() + if conn.isGroupMode { if node.SourceProfile == "" { return nil, fmt.Errorf("source profile not set for Node %s in group mode", node.Name) } - profileClient, exists := a.groupManager.GetClient(node.SourceProfile) + profileClient, exists := conn.groupManager.GetClient(node.SourceProfile) if !exists { return nil, fmt.Errorf("profile '%s' not found in group manager", node.SourceProfile) } @@ -590,7 +661,7 @@ func (a *App) getClientForNode(node *api.Node) (*api.Client, error) { return profileClient.Client, nil } - return a.client, nil + return conn.client, nil } // createSyntheticGroup creates a synthetic cluster object from a list of nodes for group display. diff --git a/internal/ui/components/auto_refresh.go b/internal/ui/components/auto_refresh.go index 02ff49e2..7af3a714 100644 --- a/internal/ui/components/auto_refresh.go +++ b/internal/ui/components/auto_refresh.go @@ -39,19 +39,20 @@ func (a *App) startAutoRefresh() { return } - if a.autoRefreshTicker != nil { + if a.autoRefreshRunning { return // Already running } - a.autoRefreshStop = make(chan bool, 1) - a.autoRefreshTicker = time.NewTicker(10 * time.Second) // 10 second interval + a.autoRefreshRunning = true a.autoRefreshCountdown = 10 a.footer.UpdateAutoRefreshCountdown(a.autoRefreshCountdown) a.autoRefreshCountdownStop = make(chan bool, 1) - // Start countdown goroutine + // Start countdown goroutine using a proper ticker instead of busy-wait + sleep go func() { uiLogger := models.GetUILogger() + countdownTicker := time.NewTicker(1 * time.Second) + defer countdownTicker.Stop() for { select { @@ -59,9 +60,7 @@ func (a *App) startAutoRefresh() { return case <-a.ctx.Done(): return - default: - time.Sleep(1 * time.Second) - + case <-countdownTicker.C: if !a.autoRefreshEnabled { return } @@ -77,13 +76,16 @@ func (a *App) startAutoRefresh() { // Trigger refresh when countdown reaches 0 if a.autoRefreshCountdown == 0 { - // Only refresh if not currently loading something and no pending operations - if !a.header.IsLoading() && !models.GlobalState.HasPendingOperations() { + // Only refresh if not currently loading, no pending operations, + // and no other refresh (manual/fast/enrichment) is in progress. + if !a.header.IsLoading() && !models.GlobalState.HasPendingOperations() && !a.isRefreshActive() { uiLogger.Debug("Auto-refresh triggered by countdown") go a.autoRefreshDataWithFooter() } else { - if a.header.IsLoading() { + if a.isRefreshActive() { + uiLogger.Debug("Auto-refresh skipped - refresh already in progress") + } else if a.header.IsLoading() { uiLogger.Debug("Auto-refresh skipped - header loading operation in progress") } else { uiLogger.Debug("Auto-refresh skipped - pending VM/node operations in progress") @@ -100,15 +102,18 @@ func (a *App) startAutoRefresh() { } }() - // Spinner animation goroutine + // Spinner animation goroutine using a proper ticker go func() { + spinnerTicker := time.NewTicker(100 * time.Millisecond) + defer spinnerTicker.Stop() + for { select { + case <-a.autoRefreshCountdownStop: + return case <-a.ctx.Done(): return - default: - time.Sleep(100 * time.Millisecond) - + case <-spinnerTicker.C: if !a.autoRefreshEnabled { return } @@ -125,26 +130,12 @@ func (a *App) startAutoRefresh() { // stopAutoRefresh stops the auto-refresh timer. func (a *App) stopAutoRefresh() { - // Always stop and nil out the ticker, close channels, and reset countdown - if a.autoRefreshTicker != nil { - a.autoRefreshTicker.Stop() - a.autoRefreshTicker = nil - } - - if a.autoRefreshStop != nil { - select { - case a.autoRefreshStop <- true: - default: - } - close(a.autoRefreshStop) - a.autoRefreshStop = nil - } - if a.autoRefreshCountdownStop != nil { close(a.autoRefreshCountdownStop) a.autoRefreshCountdownStop = nil } + a.autoRefreshRunning = false a.autoRefreshCountdown = 0 a.footer.UpdateAutoRefreshCountdown(0) } diff --git a/internal/ui/components/auto_refresh_execution.go b/internal/ui/components/auto_refresh_execution.go index 41ccdb88..8ca07fba 100644 --- a/internal/ui/components/auto_refresh_execution.go +++ b/internal/ui/components/auto_refresh_execution.go @@ -9,52 +9,119 @@ import ( "github.com/devnullvoid/pvetui/pkg/api" ) -// autoRefreshDataWithFooter sets loading state and starts the data fetch in a new goroutine. +// preserveVMEnrichmentData carries forward guest agent and config data from the last +// full refresh onto a freshly-fetched VM that only has basic metrics. +// existingVMs is keyed by "ID|Node" (single-profile) or "ID|Node|SourceProfile" (group mode). +func preserveVMEnrichmentData(fresh *api.VM, existingVMs map[string]*api.VM) { + if fresh == nil { + return + } + // Try group mode key first (includes SourceProfile), fall back to single-profile key. + key := fmt.Sprintf("%d|%s|%s", fresh.ID, fresh.Node, fresh.SourceProfile) + existing, ok := existingVMs[key] + if !ok { + key = fmt.Sprintf("%d|%s", fresh.ID, fresh.Node) + existing, ok = existingVMs[key] + } + if !ok { + return + } + + // Preserve guest agent data (only available from full enrichment) + if fresh.IP == "" { + fresh.IP = existing.IP + } + fresh.AgentEnabled = existing.AgentEnabled + fresh.AgentRunning = existing.AgentRunning + if len(fresh.NetInterfaces) == 0 { + fresh.NetInterfaces = existing.NetInterfaces + } + if len(fresh.Filesystems) == 0 { + fresh.Filesystems = existing.Filesystems + } + fresh.ConfiguredMACs = existing.ConfiguredMACs + + // Preserve config details (from config endpoint enrichment) + if len(fresh.ConfiguredNetworks) == 0 { + fresh.ConfiguredNetworks = existing.ConfiguredNetworks + } + if len(fresh.StorageDevices) == 0 { + fresh.StorageDevices = existing.StorageDevices + } + if fresh.BootOrder == "" { + fresh.BootOrder = existing.BootOrder + } + if fresh.CPUCores == 0 { + fresh.CPUCores = existing.CPUCores + } + if fresh.CPUSockets == 0 { + fresh.CPUSockets = existing.CPUSockets + } + if fresh.Architecture == "" { + fresh.Architecture = existing.Architecture + } + if fresh.OSType == "" { + fresh.OSType = existing.OSType + } + if fresh.Description == "" { + fresh.Description = existing.Description + } + fresh.OnBoot = existing.OnBoot + fresh.Enriched = existing.Enriched +} + +// autoRefreshDataWithFooter sets loading state, captures UI selections on the UI goroutine, +// and starts the data fetch in a new goroutine. +// Selection state must be read here (on the UI goroutine) before the goroutine is spawned. func (a *App) autoRefreshDataWithFooter() { + // Capture selections and set loading state while still on the UI goroutine. + // QueueUpdateDraw executes synchronously when already on the UI goroutine (which + // autoRefreshDataWithFooter always is, called from the ticker callback). + var snap selSnap a.QueueUpdateDraw(func() { a.footer.SetLoading(true) + snap = a.captureSelections() }) - go a.autoRefreshData() + go a.autoRefreshData(snap) } // autoRefreshData performs a lightweight refresh of performance data. -func (a *App) autoRefreshData() { - uiLogger := models.GetUILogger() - - // Store current selections to preserve them - var selectedVMID int - - var selectedVMNode string - - var selectedNodeName string - - var hasSelectedVM bool - - var hasSelectedNode bool - - if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { - selectedVMID = selectedVM.ID - selectedVMNode = selectedVM.Node - hasSelectedVM = true +// snap contains the UI selections captured on the UI goroutine before this goroutine started. +func (a *App) autoRefreshData(snap selSnap) { + // Acquire the refresh guard. The countdown check in startAutoRefresh already verified + // isRefreshActive() == false, but we CAS here to prevent a race between that check + // and the actual start of the refresh. + token, ok := a.startRefresh() + if !ok { + a.QueueUpdateDraw(func() { + a.footer.SetLoading(false) + }) + return } - if selectedNode := a.nodeList.GetSelectedNode(); selectedNode != nil { - selectedNodeName = selectedNode.Name - hasSelectedNode = true - } + uiLogger := models.GetUILogger() - // Check if search is currently active - searchWasActive := a.mainLayout.GetItemCount() > 4 + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() - if a.isGroupMode { + // Unpack UI selections captured on the UI goroutine before this goroutine was spawned. + hasSelectedVM := snap.hasSelectedVM + selectedVMID := snap.selectedVMID + selectedVMNode := snap.selectedVMNode + hasSelectedNode := snap.hasSelectedNode + selectedNodeName := snap.selectedNodeName + searchWasActive := snap.searchWasActive + + if conn.isGroupMode { // Group mode logic ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel() - nodes, vms, err := a.groupManager.GetGroupClusterResources(ctx, true) + nodes, vms, err := conn.groupManager.GetGroupClusterResources(ctx, true) if err != nil { uiLogger.Debug("Auto-refresh failed (group): %v", err) + a.endRefresh(token) a.QueueUpdateDraw(func() { a.footer.SetLoading(false) }) @@ -66,23 +133,42 @@ func (a *App) autoRefreshData() { nodeSearchState := models.GlobalState.GetSearchState(api.PageNodes) vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) - // Preserve detailed node data + // Build lookup map for O(N) node detail preservation instead of O(N*M) + existingNodeMap := make(map[string]*api.Node, len(models.GlobalState.OriginalNodes)) + for _, n := range models.GlobalState.OriginalNodes { + if n != nil { + existingNodeMap[n.Name+"|"+n.SourceProfile] = n + } + } + + // Preserve detailed node data using map lookup for _, freshNode := range nodes { - if freshNode != nil { - // Find the corresponding existing node with detailed data - for _, existingNode := range models.GlobalState.OriginalNodes { - if existingNode != nil && existingNode.Name == freshNode.Name && existingNode.SourceProfile == freshNode.SourceProfile { - // Preserve detailed fields - freshNode.Version = existingNode.Version - freshNode.KernelVersion = existingNode.KernelVersion - freshNode.CPUInfo = existingNode.CPUInfo - freshNode.LoadAvg = existingNode.LoadAvg - freshNode.CGroupMode = existingNode.CGroupMode - freshNode.Level = existingNode.Level - freshNode.Storage = existingNode.Storage - break - } - } + if freshNode == nil { + continue + } + + if existing, ok := existingNodeMap[freshNode.Name+"|"+freshNode.SourceProfile]; ok { + freshNode.Version = existing.Version + freshNode.KernelVersion = existing.KernelVersion + freshNode.CPUInfo = existing.CPUInfo + freshNode.LoadAvg = existing.LoadAvg + freshNode.CGroupMode = existing.CGroupMode + freshNode.Level = existing.Level + freshNode.Storage = existing.Storage + } + } + + // Preserve VM guest agent / config data from last full refresh. + existingVMMap := make(map[string]*api.VM, len(models.GlobalState.OriginalVMs)) + for _, vm := range models.GlobalState.OriginalVMs { + if vm != nil { + key := fmt.Sprintf("%d|%s|%s", vm.ID, vm.Node, vm.SourceProfile) + existingVMMap[key] = vm + } + } + for _, vm := range vms { + if vm != nil { + preserveVMEnrichmentData(vm, existingVMMap) } } @@ -97,18 +183,44 @@ func (a *App) autoRefreshData() { copy(models.GlobalState.OriginalVMs, vms) copy(models.GlobalState.FilteredVMs, vms) + // Apply filters and update UI lists immediately (no background enrichment). + if nodeSearchState != nil && nodeSearchState.Filter != "" { + models.FilterNodes(nodeSearchState.Filter) + a.nodeList.SetNodes(models.GlobalState.FilteredNodes) + } else { + a.nodeList.SetNodes(models.GlobalState.OriginalNodes) + } + + if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { + models.FilterVMs(vmSearchState.Filter) + a.vmList.SetVMs(models.GlobalState.FilteredVMs) + } else { + a.vmList.SetVMs(models.GlobalState.OriginalVMs) + } + a.restoreSelection(hasSelectedVM, selectedVMID, selectedVMNode, vmSearchState, hasSelectedNode, selectedNodeName, nodeSearchState) - // Defer list/detail updates until enrichment completes to reduce flicker. + // Update cluster status with the fresh (metric-only) data + syntheticCluster := a.createSyntheticGroup(nodes) + a.clusterStatus.Update(syntheticCluster) + + // Update details if items are selected + if node := a.nodeList.GetSelectedNode(); node != nil { + a.nodeDetails.Update(node, models.GlobalState.OriginalNodes) + } + if vm := a.vmList.GetSelectedVM(); vm != nil { + a.vmDetails.Update(vm) + } // Refresh tasks if on tasks page currentPage, _ := a.pages.GetFrontPage() if currentPage == api.PageTasks { + groupMgr := conn.groupManager // capture under connMu snapshot, not a.groupManager go func() { ctxTasks, cancelTasks := context.WithTimeout(context.Background(), 10*time.Second) defer cancelTasks() - tasks, err := a.groupManager.GetGroupTasks(ctxTasks) + tasks, err := groupMgr.GetGroupTasks(ctxTasks) if err == nil { a.QueueUpdateDraw(func() { if state := models.GlobalState.GetSearchState(api.PageTasks); state != nil && state.Filter != "" { @@ -126,12 +238,12 @@ func (a *App) autoRefreshData() { a.restoreSearchUI(searchWasActive, nodeSearchState, vmSearchState) - // Start background enrichment for detailed node stats - // Pass false for isInitialLoad since this is auto-refresh - a.enrichGroupNodesSequentially(nodes, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive, false) + // Auto-refresh is lightweight — no background enrichment, release guard now. + a.header.ShowSuccess("Data refreshed successfully") + a.footer.SetLoading(false) + a.endRefresh(token) a.autoRefreshCountdown = 10 - a.footer.UpdateAutoRefreshCountdown(a.autoRefreshCountdown) }) @@ -140,10 +252,12 @@ func (a *App) autoRefreshData() { } - // Fetch fresh cluster resources data (this includes performance metrics) - cluster, err := a.client.GetFreshClusterStatus() + // Fetch fresh cluster resources without VM enrichment (lighter for periodic refresh). + // Guest agent data (IPs, filesystems) is preserved from the last full refresh. + cluster, err := conn.client.GetLightClusterStatus() if err != nil { uiLogger.Debug("Auto-refresh failed: %v", err) + a.endRefresh(token) a.QueueUpdateDraw(func() { a.footer.SetLoading(false) }) @@ -172,24 +286,40 @@ func (a *App) autoRefreshData() { // Update cluster status (this shows updated CPU/memory/storage totals) a.clusterStatus.Update(cluster) + // Build lookup map for O(N) node detail preservation instead of O(N*M) + existingNodeMap := make(map[string]*api.Node, len(models.GlobalState.OriginalNodes)) + for _, n := range models.GlobalState.OriginalNodes { + if n != nil { + existingNodeMap[n.Name] = n + } + } + // Preserve detailed node data while updating performance metrics for _, freshNode := range cluster.Nodes { - if freshNode != nil { - // Find the corresponding existing node with detailed data - for _, existingNode := range models.GlobalState.OriginalNodes { - if existingNode != nil && existingNode.Name == freshNode.Name { - // Preserve detailed fields that aren't in cluster resources - freshNode.Version = existingNode.Version - freshNode.KernelVersion = existingNode.KernelVersion - freshNode.CPUInfo = existingNode.CPUInfo - freshNode.LoadAvg = existingNode.LoadAvg - freshNode.CGroupMode = existingNode.CGroupMode - freshNode.Level = existingNode.Level - freshNode.Storage = existingNode.Storage - - break - } - } + if freshNode == nil { + continue + } + + if existing, ok := existingNodeMap[freshNode.Name]; ok { + freshNode.Version = existing.Version + freshNode.KernelVersion = existing.KernelVersion + freshNode.CPUInfo = existing.CPUInfo + freshNode.LoadAvg = existing.LoadAvg + freshNode.CGroupMode = existing.CGroupMode + freshNode.Level = existing.Level + freshNode.Storage = existing.Storage + } + } + + // Build lookup map for O(N) VM enrichment preservation instead of O(N*M). + // Light cluster status returns only basic metrics (CPU, memory, status); + // guest agent data (IPs, filesystems, configs) must be carried forward + // from the last full refresh to avoid "clearing" data on auto-refresh. + existingVMMap := make(map[string]*api.VM, len(models.GlobalState.OriginalVMs)) + for _, vm := range models.GlobalState.OriginalVMs { + if vm != nil { + key := fmt.Sprintf("%d|%s", vm.ID, vm.Node) + existingVMMap[key] = vm } } @@ -200,6 +330,7 @@ func (a *App) autoRefreshData() { if node != nil { for _, vm := range node.VMs { if vm != nil { + preserveVMEnrichmentData(vm, existingVMMap) vms = append(vms, vm) } } @@ -249,7 +380,7 @@ func (a *App) autoRefreshData() { if currentPage == api.PageTasks { // Refresh tasks data without showing loading indicator (background refresh) go func() { - tasks, err := a.client.GetClusterTasks() + tasks, err := conn.client.GetClusterTasks() if err == nil { a.QueueUpdateDraw(func() { // Check if there's an active search filter @@ -274,6 +405,7 @@ func (a *App) autoRefreshData() { // Show success message a.header.ShowSuccess("Data refreshed successfully") a.footer.SetLoading(false) + a.endRefresh(token) // Reset countdown after refresh is complete a.autoRefreshCountdown = 10 diff --git a/internal/ui/components/connection_profiles.go b/internal/ui/components/connection_profiles.go index ce24012a..c7e9f0ca 100644 --- a/internal/ui/components/connection_profiles.go +++ b/internal/ui/components/connection_profiles.go @@ -383,13 +383,16 @@ func (a *App) showConnectionProfilesDialog() { case 's', 'S': - // Set as default - works for profiles and groups + // Toggle default - works for profiles and groups if selectionType == selectionTypeProfile || selectionType == selectionTypeGroup { a.CloseConnectionProfilesMenu() a.setDefaultProfile(selectionValue) + // Re-open the dialog so the updated ⭐ indicator is visible. + a.showConnectionProfilesDialog() + return nil } @@ -436,7 +439,7 @@ func (a *App) showConnectionProfilesDialog() { helpText := tview.NewTextView() - helpText.SetText("e:edit d:delete s:default") + helpText.SetText("e:edit d:delete s:toggle default") helpText.SetTextAlign(tview.AlignCenter) diff --git a/internal/ui/components/connection_profiles_operations.go b/internal/ui/components/connection_profiles_operations.go index d2fcbc40..dc85d6dd 100644 --- a/internal/ui/components/connection_profiles_operations.go +++ b/internal/ui/components/connection_profiles_operations.go @@ -17,26 +17,27 @@ import ( func (a *App) deactivateGroupModes(uiLogger interface { Debug(format string, args ...interface{}) }) { - if a.isGroupMode { + // Capture old references and clear fields atomically under write lock. + // Resources are closed AFTER releasing the lock to avoid holding it + // during potentially slow network-level Close() operations. + a.connMu.Lock() + oldGroupManager := a.groupManager + oldClusterClient := a.clusterClient + a.groupManager = nil + a.isGroupMode = false + a.clusterClient = nil + a.isClusterMode = false + a.groupName = "" + a.connMu.Unlock() + + if oldGroupManager != nil { uiLogger.Debug("Disabling group mode") - if a.groupManager != nil { - a.groupManager.Close() - } - a.groupManager = nil - a.isGroupMode = false + oldGroupManager.Close() } - if a.isClusterMode { + if oldClusterClient != nil { uiLogger.Debug("Disabling cluster mode") - if a.clusterClient != nil { - a.clusterClient.Close() - } - a.clusterClient = nil - a.isClusterMode = false - } - - if a.groupName != "" { - a.groupName = "" + oldClusterClient.Close() } if a.tasksList != nil { @@ -82,9 +83,12 @@ func (a *App) applyConnectionProfile(profileName string) { uiLogger.Debug("New API client created successfully for profile %s", profileName) - // Update app client and VNC service immediately to ensure subsequent calls use the new client - // This must happen before manualRefresh() is called + // Update app client under write lock. deactivateGroupModes also acquires + // the lock internally for its field writes. + a.connMu.Lock() a.client = client + a.connMu.Unlock() + if a.vncService != nil { a.vncService.UpdateClient(client) } @@ -103,9 +107,12 @@ func (a *App) applyConnectionProfile(profileName string) { a.header.ShowSuccess("Switched to profile '" + profileName + "' successfully!") }) - // Then refresh data with new connection (this will update the UI) - uiLogger.Debug("Starting manual refresh with new client") - a.manualRefresh() + // Force a new refresh generation token, discarding any in-flight refresh from + // the previous profile. Old callbacks' endRefresh calls will be no-ops because + // their token won't match the new token. + token := a.forceNewRefresh() + uiLogger.Debug("Starting fast refresh with new client") + a.doFastRefresh(token) }() } @@ -203,13 +210,16 @@ func (a *App) switchToGroup(groupName string) { // Update app state a.QueueUpdateDraw(func() { - // Set group mode + // Set group mode fields under write lock so background goroutines that + // call snapConn() see a consistent view. The lock is brief (field assignments). // Note: We keep a.client around even in group mode to avoid breaking callbacks // that were set up during initialization. In group mode, we use a.groupManager // for operations instead of a.client. + a.connMu.Lock() a.groupManager = manager a.isGroupMode = true a.groupName = groupName + a.connMu.Unlock() // Update header a.updateHeaderWithActiveProfile() @@ -260,8 +270,10 @@ func (a *App) switchToGroup(groupName string) { // Start background enrichment for detailed node stats // This ensures nodes get Version, Kernel, LoadAvg etc. populated - // Pass true for isInitialLoad to show "Guest agent data loaded" message - a.enrichGroupNodesSequentially(nodes, false, "", false, 0, "", false, true) + // Pass true for isInitialLoad to show "Guest agent data loaded" message. + // Force a new refresh token so the guard is acquired for this enrichment. + enrichToken := a.forceNewRefresh() + a.enrichGroupNodesParallel(enrichToken, nodes, false, "", false, 0, "", false, true) } // Update selection and details @@ -388,41 +400,55 @@ func (a *App) switchToClusterGroup(groupName string) { return } uiLogger.Info("[CLUSTER] Failover callback: %s -> %s", oldProfile, newProfile) - a.client = cc.GetActiveClient() - if a.client == nil { + newClient := cc.GetActiveClient() + if newClient == nil { uiLogger.Error("[CLUSTER] Failover callback has nil active client for %s", newProfile) return } + a.connMu.Lock() + a.client = newClient + a.connMu.Unlock() if a.vncService != nil { - a.vncService.UpdateClient(a.client) + a.vncService.UpdateClient(newClient) } a.updateHeaderWithActiveProfile() a.header.ShowWarning(fmt.Sprintf("Failover: %s \u2192 %s", oldProfile, newProfile)) - go a.manualRefresh() + // Force a new refresh token for failover — any in-flight refresh from + // the previous node is superseded. + failoverToken := a.forceNewRefresh() + go a.doFastRefresh(failoverToken) }) }) // Start health checks cc.StartHealthCheck() - // Update app state on UI thread - a.QueueUpdateDraw(func() { - // Set cluster mode state - a.clusterClient = cc - a.isClusterMode = true - a.groupName = groupName - a.client = cc.GetActiveClient() - if a.vncService != nil { - a.vncService.UpdateClient(a.client) - } + // Set app state under write lock. These fields are read by background goroutines + // (doFastRefresh, autoRefreshData, etc.) so they must be protected. + a.connMu.Lock() + a.clusterClient = cc + a.isClusterMode = true + a.groupName = groupName + a.client = cc.GetActiveClient() + activeClient := a.client // capture for vncService without holding lock + a.connMu.Unlock() + + if a.vncService != nil { + a.vncService.UpdateClient(activeClient) + } - // Update header to show cluster mode + // Update header to show cluster mode (UI update) + a.QueueUpdateDraw(func() { a.updateHeaderWithActiveProfile() a.header.ShowSuccess(fmt.Sprintf("Connected to cluster '%s' via %s", groupName, cc.GetActiveProfileName())) }) - // Trigger refresh to load data through normal single-profile flow - a.manualRefresh() + // Force a new refresh generation token, discarding any in-flight refresh. + // Old callbacks' endRefresh calls will be no-ops. + clusterToken := a.forceNewRefresh() + + // Load data fast — show basic node/VM names immediately, enrich in background + a.doFastRefresh(clusterToken) }() } @@ -521,7 +547,9 @@ func (a *App) showDeleteProfileDialog(profileName string) { a.SetFocus(confirm) } -// setDefaultProfile sets the specified profile as the default profile. +// setDefaultProfile toggles the default profile. If the specified profile/group +// is already the default, it clears the default (so the user will be prompted on +// next startup). Otherwise it sets the given name as the new default. func (a *App) setDefaultProfile(profileName string) { // Check if the target exists (profile or group) if a.config.Profiles == nil { @@ -544,17 +572,23 @@ func (a *App) setDefaultProfile(profileName string) { } } - // Check if it's already the default - if a.config.DefaultProfile == profileName { - a.header.ShowError(fmt.Sprintf("'%s' is already the default startup selection.", profileName)) - return - } - - // Store the old default profile name for the message + // Store the old default for the success message. oldDefault := a.config.DefaultProfile - // Set the new default profile - a.config.DefaultProfile = profileName + var successMsg string + if a.config.DefaultProfile == profileName { + // Toggle off: clear the default so the user is prompted on next startup. + a.config.DefaultProfile = "" + successMsg = fmt.Sprintf("Default startup selection cleared (was '%s'). You will be prompted on next startup.", profileName) + } else { + // Set the new default. + a.config.DefaultProfile = profileName + if oldDefault != "" { + successMsg = fmt.Sprintf("Default startup selection changed from '%s' to '%s'.", oldDefault, profileName) + } else { + successMsg = fmt.Sprintf("'%s' set as default startup selection.", profileName) + } + } // Save the config configPath, found := config.FindDefaultConfigPath() @@ -581,8 +615,7 @@ func (a *App) setDefaultProfile(profileName string) { } } - // Show success message - a.header.ShowSuccess(fmt.Sprintf("Default startup selection changed from '%s' to '%s'.", oldDefault, profileName)) + a.header.ShowSuccess(successMsg) } // reEncryptConfigIfNeeded re-encrypts the config file with SOPS. diff --git a/internal/ui/components/refresh.go b/internal/ui/components/refresh.go index 77135a76..476874cd 100644 --- a/internal/ui/components/refresh.go +++ b/internal/ui/components/refresh.go @@ -10,10 +10,51 @@ import ( "github.com/devnullvoid/pvetui/pkg/api" ) +// selSnap is a snapshot of the user's current selection state in the UI. +// It must be captured on the tview UI goroutine (inside a QueueUpdateDraw callback +// or before any goroutine is spawned when the caller is already on the UI goroutine). +type selSnap struct { + hasSelectedNode bool + selectedNodeName string + hasSelectedVM bool + selectedVMID int + selectedVMNode string + searchWasActive bool +} + +// captureSelections captures the user's current UI selections. +// MUST be called from the tview UI goroutine (inside QueueUpdateDraw or equivalent). +func (a *App) captureSelections() selSnap { + var s selSnap + if node := a.nodeList.GetSelectedNode(); node != nil { + s.hasSelectedNode = true + s.selectedNodeName = node.Name + } + if vm := a.vmList.GetSelectedVM(); vm != nil { + s.hasSelectedVM = true + s.selectedVMID = vm.ID + s.selectedVMNode = vm.Node + } + s.searchWasActive = a.mainLayout.GetItemCount() > 4 + return s +} + // manualRefresh refreshes all data manually. +// Acquires the refresh guard and delegates to doManualRefresh. func (a *App) manualRefresh() { - // * Check if there are any pending operations + token, ok := a.startRefresh() + if !ok { + return + } + a.doManualRefresh(token) +} + +// doManualRefresh is the internal implementation of a full refresh. +// Caller must have already acquired the refresh guard (token != 0). +func (a *App) doManualRefresh(token uint64) { + // Check if there are any pending operations if models.GlobalState.HasPendingOperations() { + a.endRefresh(token) a.showMessageSafe("Cannot refresh data while there are pending operations in progress") return } @@ -22,40 +63,25 @@ func (a *App) manualRefresh() { a.header.ShowLoading("Refreshing data...") a.footer.SetLoading(true) - // Store current selections for restoration - var hasSelectedNode, hasSelectedVM bool - - var selectedNodeName, selectedVMNode string - - var selectedVMID int - - if node := a.nodeList.GetSelectedNode(); node != nil { - hasSelectedNode = true - selectedNodeName = node.Name - } - - if vm := a.vmList.GetSelectedVM(); vm != nil { - hasSelectedVM = true - selectedVMID = vm.ID - selectedVMNode = vm.Node - } - - // Check if search is currently active - searchWasActive := a.mainLayout.GetItemCount() > 4 + // NOTE: UI state reads (selections, search) are intentionally NOT captured here. + // For the group mode path, they are captured inside QueueUpdateDraw (on the UI + // goroutine) before calling enrichGroupNodesParallel. For the single-profile path, + // doEnrichNodes captures them inside its own QueueUpdateDraw callback. This ensures + // all tview reads happen on the UI goroutine and are race-free. // Run data refresh in goroutine to avoid blocking UI go func() { - // Wait a moment for API changes to propagate to cluster resources endpoint - // This ensures we get fresh data after configuration updates - time.Sleep(500 * time.Millisecond) + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() - if a.isGroupMode { + if conn.isGroupMode { // Group mode logic ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - nodes, vms, err := a.groupManager.GetGroupClusterResources(ctx, true) + nodes, vms, err := conn.groupManager.GetGroupClusterResources(ctx, true) if err != nil { + a.endRefresh(token) a.QueueUpdateDraw(func() { a.header.ShowError(fmt.Sprintf("Refresh failed: %v", err)) a.footer.SetLoading(false) @@ -64,9 +90,10 @@ func (a *App) manualRefresh() { return } - // Debug logging retained at Debug level for troubleshooting. - a.QueueUpdateDraw(func() { + // Capture selections on the UI goroutine before starting background enrichment. + snap := a.captureSelections() + // Update GlobalState nodes/VMs; UI lists will be updated after enrichment to reduce flicker. models.GlobalState.OriginalNodes = nodes models.GlobalState.OriginalVMs = vms @@ -91,13 +118,17 @@ func (a *App) manualRefresh() { // Start background enrichment for detailed node stats // Pass false for isInitialLoad since this is a manual refresh - a.enrichGroupNodesSequentially(nodes, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive, false) + a.enrichGroupNodesParallel(token, nodes, + snap.hasSelectedNode, snap.selectedNodeName, + snap.hasSelectedVM, snap.selectedVMID, snap.selectedVMNode, + snap.searchWasActive, false) }) } else { // Single profile logic // Fetch fresh data bypassing cache - cluster, err := a.client.GetFreshClusterStatus() + cluster, err := conn.client.GetFreshClusterStatus() if err != nil { + a.endRefresh(token) a.QueueUpdateDraw(func() { a.header.ShowError(fmt.Sprintf("Refresh failed: %v", err)) a.footer.SetLoading(false) @@ -110,13 +141,153 @@ func (a *App) manualRefresh() { // version during the initial refresh update to avoid brief UI flicker. a.preserveClusterVersionIfMissing(cluster) - // Initial UI update and enrichment + // Initial UI update and enrichment. Selections are captured inside + // doEnrichNodes' QueueUpdateDraw callback on the UI goroutine. a.applyInitialClusterUpdate(cluster) - a.enrichNodesSequentially(cluster, hasSelectedNode, selectedNodeName, hasSelectedVM, selectedVMID, selectedVMNode, searchWasActive) + // Manual refresh: show "Data refreshed successfully" after node enrichment completes + // (no VM enrichment callback follows in this path). + a.doEnrichNodes(cluster, conn.client, token, true) } }() } +// fastRefresh loads basic node/VM names immediately and enriches details in the background. +// This is the public entry point; it acquires the refresh guard and delegates to doFastRefresh. +// Used for normal fast-refresh (failover callbacks, etc.) where a refresh may already be running. +func (a *App) fastRefresh() { + token, ok := a.startRefresh() + if !ok { + return + } + a.doFastRefresh(token) +} + +// doFastRefresh is the implementation of fast refresh. +// Callers that already hold a generation token (e.g. profile switches via forceNewRefresh) +// should call this directly instead of fastRefresh. +// This is used for profile switching where perceived speed matters most — +// the user sees node and guest lists right away, then details (CPU, filesystems, guest agent) +// fill in progressively. +func (a *App) doFastRefresh(token uint64) { + if models.GlobalState.HasPendingOperations() { + a.endRefresh(token) + a.showMessageSafe("Cannot refresh data while there are pending operations in progress") + return + } + + a.header.ShowLoading("Loading data...") + a.footer.SetLoading(true) + + go func() { + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() + + if conn.isGroupMode { + // Group mode: delegate to manual refresh internal logic (already shows data before enrichment). + // We already hold the refresh token, so pass it through directly. + a.doManualRefresh(token) + return + } + + // Capture the client instance that is starting this refresh. + // Used for making API calls with the correct client during enrichment. + refreshClient := conn.client + + // Single profile: get basic data fast (no VM enrichment), show it, then enrich in background + cluster, err := refreshClient.GetFastFreshClusterStatus(func(enrichErr error) { + // VM enrichment complete callback — update VM list with enriched data + a.QueueUpdateDraw(func() { + // Stale guard: abort only if a *newer* refresh has superseded this one. + // refreshGen == 0 means this refresh completed normally via endRefresh — + // we still need to run to clear the header loading indicator and update + // VM details (doEnrichNodes calls endRefresh before this callback fires). + if current := a.refreshGen.Load(); current != 0 && current != token { + return + } + + if refreshClient.Cluster == nil { + return + } + + // Rebuild VM list from enriched cluster data + var enrichedVMs []*api.VM + for _, node := range refreshClient.Cluster.Nodes { + if node != nil { + for _, vm := range node.VMs { + if vm != nil { + enrichedVMs = append(enrichedVMs, vm) + } + } + } + } + + if len(enrichedVMs) > 0 { + // Preserve current VM selection + var selectedVMID int + var selectedVMNode string + var hasSelectedVM bool + if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { + selectedVMID = selectedVM.ID + selectedVMNode = selectedVM.Node + hasSelectedVM = true + } + + models.GlobalState.OriginalVMs = make([]*api.VM, len(enrichedVMs)) + copy(models.GlobalState.OriginalVMs, enrichedVMs) + + vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) + if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { + models.FilterVMs(vmSearchState.Filter) + a.vmList.SetVMs(models.GlobalState.FilteredVMs) + } else { + models.GlobalState.FilteredVMs = make([]*api.VM, len(enrichedVMs)) + copy(models.GlobalState.FilteredVMs, enrichedVMs) + a.vmList.SetVMs(models.GlobalState.FilteredVMs) + } + + // Restore VM selection + if hasSelectedVM { + vmList := a.vmList.GetVMs() + for i, vm := range vmList { + if vm != nil && vm.ID == selectedVMID && vm.Node == selectedVMNode { + a.vmList.SetCurrentItem(i) + break + } + } + } + + if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { + a.vmDetails.Update(selectedVM) + } + } + + if enrichErr != nil { + a.header.ShowWarning("Guest agent enrichment partially failed") + } else { + a.header.ShowSuccess("Guest agent data loaded") + } + }) + }) + if err != nil { + a.endRefresh(token) + a.QueueUpdateDraw(func() { + a.header.ShowError(fmt.Sprintf("Refresh failed: %v", err)) + a.footer.SetLoading(false) + }) + return + } + + // We have basic cluster data — show it immediately + a.preserveClusterVersionIfMissing(cluster) + a.applyInitialClusterUpdate(cluster) + + // Start node enrichment in background (Version, disks, updates). + // Selections are captured inside doEnrichNodes' QueueUpdateDraw on the UI goroutine. + // Fast refresh path: VM enrichment callback will show the final message, not this. + a.doEnrichNodes(cluster, refreshClient, token, false) + }() +} + func (a *App) preserveClusterVersionIfMissing(cluster *api.Cluster) { if cluster == nil || cluster.Version != "" { return @@ -174,12 +345,27 @@ func (a *App) applyInitialClusterUpdate(cluster *api.Cluster) { }) } -// enrichNodesSequentially enriches node data in parallel and finalizes the refresh -func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool, selectedNodeName string, hasSelectedVM bool, selectedVMID int, selectedVMNode string, searchWasActive bool) { +// doEnrichNodes enriches node data in parallel and finalizes the refresh. +// Selections are always captured inside the final QueueUpdateDraw callback (on the UI +// goroutine), ensuring race-free reads from tview widgets. +// The refreshClient parameter is the client used for API calls during enrichment. +// The token parameter is the generation token acquired at refresh start; if a newer +// refresh has started by the time enrichment completes, the callback is a no-op. +// When showFinalMessage is true, a "Data refreshed successfully" message is shown after +// enrichment completes. Set to false for the doFastRefresh path where the VM enrichment +// callback will show the final status message. +func (a *App) doEnrichNodes(cluster *api.Cluster, refreshClient *api.Client, token uint64, showFinalMessage bool) { go func() { var wg sync.WaitGroup - // Enrich nodes in parallel + // Start loading tasks in parallel with node enrichment (independent operation) + a.loadTasksData() + + // Accumulate enriched nodes into a local slice, then swap into GlobalState + // inside QueueUpdateDraw to avoid data races with the UI goroutine. + enrichedNodes := make([]*api.Node, len(cluster.Nodes)) + copy(enrichedNodes, cluster.Nodes) + for i, node := range cluster.Nodes { if node == nil { continue @@ -189,42 +375,47 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool go func(idx int, n *api.Node) { defer wg.Done() - freshNode, err := a.client.RefreshNodeData(n.Name) + freshNode, err := refreshClient.RefreshNodeData(n.Name) if err == nil && freshNode != nil { - // Preserve VMs from the FRESH cluster data (not the original stale data) - // This ensures we keep the updated VM names we just fetched + // Preserve VMs from the FRESH cluster data if cluster.Nodes[idx] != nil { freshNode.VMs = cluster.Nodes[idx].VMs } - - // Update only the specific node index in global state - // Safe to access specific index concurrently - if idx < len(models.GlobalState.OriginalNodes) { - models.GlobalState.OriginalNodes[idx] = freshNode - } + enrichedNodes[idx] = freshNode } }(i, node) } wg.Wait() - // Final update: rebuild VMs, cluster version, status, and complete refresh + // Final update on the UI goroutine a.QueueUpdateDraw(func() { + // Stale guard: if a newer refresh has started (e.g. profile switch), discard these results. + // endRefresh is a no-op here since the token no longer matches. + if a.refreshGen.Load() != token { + return + } + + // Capture selections on the UI goroutine (race-free tview access). + snap := a.captureSelections() + + // Apply enriched nodes to global state + models.GlobalState.OriginalNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.OriginalNodes, enrichedNodes) + // Re-apply node filters with enriched data nodeState := models.GlobalState.GetSearchState(api.PageNodes) if nodeState != nil && nodeState.Filter != "" { models.FilterNodes(nodeState.Filter) } else { - // Update filtered nodes from original (copy) - // We need to re-copy because OriginalNodes was updated in place - models.GlobalState.FilteredNodes = make([]*api.Node, len(models.GlobalState.OriginalNodes)) - copy(models.GlobalState.FilteredNodes, models.GlobalState.OriginalNodes) + models.GlobalState.FilteredNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.FilteredNodes, enrichedNodes) } a.nodeList.SetNodes(models.GlobalState.FilteredNodes) // Rebuild VM list from enriched nodes var vms []*api.VM - for _, n := range models.GlobalState.OriginalNodes { + for _, n := range enrichedNodes { if n != nil { for _, vm := range n.VMs { if vm != nil { @@ -234,11 +425,9 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool } } - // Update global VM state with enriched data models.GlobalState.OriginalVMs = make([]*api.VM, len(vms)) copy(models.GlobalState.OriginalVMs, vms) - // Apply VM filter if active vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { models.FilterVMs(vmSearchState.Filter) @@ -250,7 +439,7 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool } // Update cluster version from enriched nodes - for _, n := range models.GlobalState.OriginalNodes { + for _, n := range enrichedNodes { if n != nil && n.Version != "" { cluster.Version = fmt.Sprintf("Proxmox VE %s", n.Version) break @@ -258,34 +447,50 @@ func (a *App) enrichNodesSequentially(cluster *api.Cluster, hasSelectedNode bool } a.clusterStatus.Update(cluster) - // Final selection restore and search UI restoration + // Restore selection (use vmSearchState already fetched above) nodeSearchState := models.GlobalState.GetSearchState(api.PageNodes) + a.restoreSelection(snap.hasSelectedVM, snap.selectedVMID, snap.selectedVMNode, vmSearchState, + snap.hasSelectedNode, snap.selectedNodeName, nodeSearchState) - a.restoreSelection(hasSelectedVM, selectedVMID, selectedVMNode, vmSearchState, - hasSelectedNode, selectedNodeName, nodeSearchState) - - // Update details if items are selected + // Update details for whatever is now selected if node := a.nodeList.GetSelectedNode(); node != nil { a.nodeDetails.Update(node, models.GlobalState.OriginalNodes) } - if vm := a.vmList.GetSelectedVM(); vm != nil { a.vmDetails.Update(vm) } - a.restoreSearchUI(searchWasActive, nodeSearchState, vmSearchState) - a.header.ShowSuccess("Data refreshed successfully") + a.restoreSearchUI(snap.searchWasActive, nodeSearchState, vmSearchState) + if showFinalMessage { + // Manual refresh path: no VM enrichment callback follows, so show success here. + a.header.ShowSuccess("Data refreshed successfully") + } + // Fast refresh path: the VM enrichment callback (in doFastRefresh) will show + // the final status message after guest agent data is loaded. a.footer.SetLoading(false) - a.loadTasksData() + a.endRefresh(token) }) }() } -// enrichGroupNodesSequentially enriches group node data in parallel and finalizes the refresh -func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bool, selectedNodeName string, hasSelectedVM bool, selectedVMID int, selectedVMNode string, searchWasActive bool, isInitialLoad bool) { +// enrichGroupNodesParallel enriches group node data in parallel and finalizes the refresh. +// token is the generation token that must be passed to endRefresh when enrichment completes. +func (a *App) enrichGroupNodesParallel(token uint64, nodes []*api.Node, hasSelectedNode bool, selectedNodeName string, hasSelectedVM bool, selectedVMID int, selectedVMNode string, searchWasActive bool, isInitialLoad bool) { go func() { var wg sync.WaitGroup + // Start loading tasks in parallel with node enrichment (independent operation) + a.loadTasksData() + + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() + + // Accumulate enriched nodes into a local slice to avoid data races. + // Concurrent goroutines write to their own index; the slice is swapped into + // GlobalState inside QueueUpdateDraw after all goroutines complete. + enrichedNodes := make([]*api.Node, len(nodes)) + copy(enrichedNodes, nodes) + // Create a context for the enrichment process ctx := context.Background() @@ -299,8 +504,8 @@ func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bo go func(idx int, n *api.Node) { defer wg.Done() - // We need to fetch the node status from the specific profile - freshNode, err := a.groupManager.GetNodeFromGroup(ctx, n.SourceProfile, n.Name) + // Fetch the node status from the specific profile's client + freshNode, err := conn.groupManager.GetNodeFromGroup(ctx, n.SourceProfile, n.Name) if err == nil && freshNode != nil { // Ensure Online status is set to true if we got a response @@ -326,30 +531,32 @@ func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bo // Ensure SourceProfile is preserved freshNode.SourceProfile = n.SourceProfile - // Update GlobalState - if idx < len(models.GlobalState.OriginalNodes) { - models.GlobalState.OriginalNodes[idx] = freshNode - } + // Write to local slice only — no GlobalState mutation from goroutines. + enrichedNodes[idx] = freshNode } }(i, node) } wg.Wait() - // Final update + // Final update — swap enriched nodes into GlobalState on the UI goroutine. a.QueueUpdateDraw(func() { + // Apply enriched nodes to global state (race-free: single UI goroutine). + models.GlobalState.OriginalNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.OriginalNodes, enrichedNodes) + // Re-apply node filters nodeState := models.GlobalState.GetSearchState(api.PageNodes) if nodeState != nil && nodeState.Filter != "" { models.FilterNodes(nodeState.Filter) } else { - models.GlobalState.FilteredNodes = make([]*api.Node, len(models.GlobalState.OriginalNodes)) - copy(models.GlobalState.FilteredNodes, models.GlobalState.OriginalNodes) + models.GlobalState.FilteredNodes = make([]*api.Node, len(enrichedNodes)) + copy(models.GlobalState.FilteredNodes, enrichedNodes) } a.nodeList.SetNodes(models.GlobalState.FilteredNodes) // Update cluster status with the enriched nodes - syntheticCluster := a.createSyntheticGroup(models.GlobalState.OriginalNodes) + syntheticCluster := a.createSyntheticGroup(enrichedNodes) a.clusterStatus.Update(syntheticCluster) // Final selection restore and search UI restoration @@ -381,7 +588,7 @@ func (a *App) enrichGroupNodesSequentially(nodes []*api.Node, hasSelectedNode bo a.header.ShowSuccess("Data refreshed successfully") } a.footer.SetLoading(false) - a.loadTasksData() + a.endRefresh(token) }) }() } @@ -471,16 +678,19 @@ func (a *App) refreshNodeData(node *api.Node) { // loadTasksData loads and updates task data with proper filtering. func (a *App) loadTasksData() { go func() { + // Snapshot connection state under lock so reads are race-free. + conn := a.snapConn() + var tasks []*api.ClusterTask var err error - if a.isGroupMode { + if conn.isGroupMode { // Create context with timeout for group operations ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - tasks, err = a.groupManager.GetGroupTasks(ctx) + tasks, err = conn.groupManager.GetGroupTasks(ctx) } else { - tasks, err = a.client.GetClusterTasks() + tasks, err = conn.client.GetClusterTasks() } if err == nil { diff --git a/internal/ui/components/refresh_vm.go b/internal/ui/components/refresh_vm.go index ed30da47..0410fad2 100644 --- a/internal/ui/components/refresh_vm.go +++ b/internal/ui/components/refresh_vm.go @@ -9,128 +9,32 @@ import ( // refreshVMData refreshes data for the selected VM. func (a *App) refreshVMData(vm *api.VM) { - // * Check if VM has pending operations - if isPending, pendingOperation := models.GlobalState.IsVMPending(vm); isPending { - a.showMessageSafe(fmt.Sprintf("Cannot refresh VM while '%s' is in progress", pendingOperation)) - return - } - - // Show loading indicator - a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s", vm.Name)) - - // Store VM identity for selection restoration - vmID := vm.ID - vmNode := vm.Node - - // Run refresh in goroutine to avoid blocking UI - go func() { - // Get the correct client for this VM (important for group mode) - client, err := a.getClientForVM(vm) - if err != nil { - a.QueueUpdateDraw(func() { - a.header.ShowError(fmt.Sprintf("Error refreshing VM: %v", err)) - }) - return - } - - // Fetch fresh VM data with callback for when enrichment completes - freshVM, err := client.RefreshVMData(vm, func(enrichedVM *api.VM) { - // This callback is called after guest agent data has been loaded - a.QueueUpdateDraw(func() { - // Update VM details if this VM is currently selected - if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil && selectedVM.ID == enrichedVM.ID && selectedVM.Node == enrichedVM.Node { - a.vmDetails.Update(enrichedVM) - } - }) - }) - if err != nil { - // If VM refresh fails (e.g., VM was migrated to a different node), - // fall back to a full refresh to find the VM on its new node - a.QueueUpdateDraw(func() { - a.header.ShowLoading("VM may have been migrated, performing full refresh") - }) - a.QueueUpdateDraw(func() { - a.manualRefresh() // This will find the VM on its new node - }) - - return - } - - // Update UI with fresh data on main thread - a.QueueUpdateDraw(func() { - // Get current search state - vmSearchState := models.GlobalState.GetSearchState(api.PageGuests) - - // Find the VM in the global state and update it - for i, originalVM := range models.GlobalState.OriginalVMs { - if originalVM != nil && originalVM.ID == vmID && originalVM.Node == vmNode { - // Preserve SourceProfile from original VM (important for group mode) - if originalVM.SourceProfile != "" { - freshVM.SourceProfile = originalVM.SourceProfile - } - models.GlobalState.OriginalVMs[i] = freshVM - - break - } - } - - // Update filtered VMs if they exist - for i, filteredVM := range models.GlobalState.FilteredVMs { - if filteredVM != nil && filteredVM.ID == vmID && filteredVM.Node == vmNode { - // Preserve SourceProfile from filtered VM (important for group mode) - if filteredVM.SourceProfile != "" { - freshVM.SourceProfile = filteredVM.SourceProfile - } - models.GlobalState.FilteredVMs[i] = freshVM - - break - } - } - - // Also update the VM in the node's VM list - for _, node := range models.GlobalState.OriginalNodes { - if node != nil && node.Name == vmNode { - for i, nodeVM := range node.VMs { - if nodeVM != nil && nodeVM.ID == vmID { - node.VMs[i] = freshVM - - break - } - } - - break - } - } - - // Reapply filter if one is active - if vmSearchState != nil && vmSearchState.HasActiveVMFilter() { - models.FilterVMs(vmSearchState.Filter) - } - - // Update the VM list display - a.vmList.SetVMs(models.GlobalState.FilteredVMs) - - // Preserve whichever VM the user is currently focused on (selection is restored by SetVMs). - if vmSearchState != nil { - vmSearchState.SelectedIndex = a.vmList.GetCurrentItem() - } - - // Update VM details for the currently selected VM. - if selectedVM := a.vmList.GetSelectedVM(); selectedVM != nil { - a.vmDetails.Update(selectedVM) - } - - // Show success message - a.header.ShowSuccess(fmt.Sprintf("VM %s refreshed successfully", vm.Name)) - }) - }() + a.doRefreshVMData(vm, false) } // refreshVMDataAndTasks refreshes both VM data and tasks list. // This is useful for operations that affect both VM state and create tasks (like volume resize and snapshot rollback). func (a *App) refreshVMDataAndTasks(vm *api.VM) { + a.doRefreshVMData(vm, true) +} + +// doRefreshVMData is the shared implementation for VM data refresh. +// When withTasks is true, it also refreshes the tasks list after the VM data is updated. +func (a *App) doRefreshVMData(vm *api.VM, withTasks bool) { + // Check if VM has pending operations (skip for withTasks since the caller just queued the task) + if !withTasks { + if isPending, pendingOperation := models.GlobalState.IsVMPending(vm); isPending { + a.showMessageSafe(fmt.Sprintf("Cannot refresh VM while '%s' is in progress", pendingOperation)) + return + } + } + // Show loading indicator - a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s and tasks", vm.Name)) + if withTasks { + a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s and tasks", vm.Name)) + } else { + a.header.ShowLoading(fmt.Sprintf("Refreshing VM %s", vm.Name)) + } // Store VM identity for selection restoration vmID := vm.ID @@ -159,13 +63,13 @@ func (a *App) refreshVMDataAndTasks(vm *api.VM) { }) if err != nil { // If VM refresh fails (e.g., VM was migrated to a different node), - // fall back to a full refresh to find the VM on its new node + // fall back to a full refresh to find the VM on its new node. + // Note: manualRefresh must NOT be called inside QueueUpdateDraw to avoid + // nested QueueUpdateDraw deadlocks (manualRefresh queues its own updates). a.QueueUpdateDraw(func() { a.header.ShowLoading("VM may have been migrated, performing full refresh") }) - a.QueueUpdateDraw(func() { - a.manualRefresh() // This will find the VM on its new node - }) + a.manualRefresh() return } @@ -234,11 +138,17 @@ func (a *App) refreshVMDataAndTasks(vm *api.VM) { a.vmDetails.Update(selectedVM) } - // Also refresh tasks to show any new tasks created by the operation - a.loadTasksData() + // Refresh tasks if requested (for operations that create tasks) + if withTasks { + a.loadTasksData() + } // Show success message - a.header.ShowSuccess(fmt.Sprintf("VM %s and tasks refreshed successfully", vm.Name)) + if withTasks { + a.header.ShowSuccess(fmt.Sprintf("VM %s and tasks refreshed successfully", vm.Name)) + } else { + a.header.ShowSuccess(fmt.Sprintf("VM %s refreshed successfully", vm.Name)) + } }) }() } diff --git a/internal/ui/components/vm_migration.go b/internal/ui/components/vm_migration.go index c62cf75e..17ad9cbd 100644 --- a/internal/ui/components/vm_migration.go +++ b/internal/ui/components/vm_migration.go @@ -186,13 +186,8 @@ func (a *App) performMigrationOperation(vm *api.VM, options *api.MigrationOption } else { a.header.ShowSuccess(fmt.Sprintf("Migration of %s to %s completed successfully", vm.Name, options.Target)) - // Clear API cache for the specific client - client, _ := a.getClientForVM(vm) - if client != nil { - client.ClearAPICache() - } - - // Full refresh needed as node changed + // manualRefresh handles selective cache invalidation; + // no need for a redundant full cache wipe here. a.manualRefresh() } }) diff --git a/internal/ui/components/vm_operations.go b/internal/ui/components/vm_operations.go index 3377c1d8..3bf7df2b 100644 --- a/internal/ui/components/vm_operations.go +++ b/internal/ui/components/vm_operations.go @@ -76,13 +76,8 @@ func (a *App) performVMDeleteOperation(vm *api.VM, forced bool) { } else { a.header.ShowSuccess(fmt.Sprintf("Successfully deleted %s", vm.Name)) - // Clear API cache for the specific client - client, _ := a.getClientForVM(vm) - if client != nil { - client.ClearAPICache() - } - - // Refresh to update the UI (VM should be gone) + // manualRefresh handles selective cache invalidation; + // no need for a redundant full cache wipe here. a.manualRefresh() } }) diff --git a/pkg/api/client.go b/pkg/api/client.go index 5599a43f..a2b67250 100644 --- a/pkg/api/client.go +++ b/pkg/api/client.go @@ -7,6 +7,7 @@ import ( "net/http" "net/url" "strings" + "sync" "time" "github.com/devnullvoid/pvetui/pkg/api/interfaces" @@ -276,6 +277,49 @@ func (c *Client) ClearAPICache() { } } +// ClearClusterCache selectively invalidates cluster-level cache entries +// instead of wiping the entire cache. This preserves node-level cached +// data (disks, updates) that rarely changes, avoiding unnecessary API +// round-trips during refresh. +func (c *Client) ClearClusterCache() { + clusterPaths := []string{ + "/cluster/status", + "/cluster/resources", + "/cluster/tasks", + } + + for _, path := range clusterPaths { + cacheKey := fmt.Sprintf("proxmox_api_%s_%s", c.baseURL, path) + cacheKey = strings.ReplaceAll(cacheKey, "/", "_") + + _ = c.cache.Delete(cacheKey) + } + + c.logger.Debug("Cluster-level cache entries cleared selectively") +} + +// ClearNodeCache selectively invalidates cache entries for a specific node. +// This is more efficient than clearing the entire cache when only one node's +// data needs to be refreshed. +func (c *Client) ClearNodeCache(nodeName string) { + nodePaths := []string{ + fmt.Sprintf("/nodes/%s/status", nodeName), + fmt.Sprintf("/nodes/%s/version", nodeName), + fmt.Sprintf("/nodes/%s/config", nodeName), + fmt.Sprintf("/nodes/%s/disks/list", nodeName), + fmt.Sprintf("/nodes/%s/apt/update", nodeName), + } + + for _, path := range nodePaths { + cacheKey := fmt.Sprintf("proxmox_api_%s_%s", c.baseURL, path) + cacheKey = strings.ReplaceAll(cacheKey, "/", "_") + + _ = c.cache.Delete(cacheKey) + } + + c.logger.Debug("Cache entries cleared for node %s", nodeName) +} + // GetCache returns the cache instance used by this client. // This is useful for sharing cache instances across multiple clients in group mode. func (c *Client) GetCache() interfaces.Cache { @@ -293,9 +337,107 @@ func (c *Client) BaseHostname() string { } // GetFreshClusterStatus retrieves cluster status bypassing cache completely. +// This includes VM enrichment with guest agent data for full accuracy. func (c *Client) GetFreshClusterStatus() (*Cluster, error) { - // Clear the cache first to ensure fresh data - c.ClearAPICache() + return c.getFreshClusterStatusInternal(true) +} + +// GetLightClusterStatus retrieves fresh cluster status without VM enrichment. +// This is faster than GetFreshClusterStatus and suitable for periodic auto-refresh +// where guest agent data (IPs, filesystems) doesn't need to be refreshed every cycle. +func (c *Client) GetLightClusterStatus() (*Cluster, error) { + return c.getFreshClusterStatusInternal(false) +} + +// GetFastFreshClusterStatus retrieves fresh cluster status (cache-busting) and returns +// basic data immediately. VM enrichment runs in the background and the provided callback +// is called when enrichment completes (with any error that occurred). +// This gives the UI instant node/VM names while detailed guest agent data loads asynchronously. +func (c *Client) GetFastFreshClusterStatus(onEnrichmentComplete func(error)) (*Cluster, error) { + // Clear cluster-level cache so we get truly fresh data + c.ClearClusterCache() + + cluster := &Cluster{ + Nodes: make([]*Node, 0), + StorageManager: NewStorageManager(), + lastUpdate: time.Now(), + } + + // 1. Get basic cluster status (node names, online/offline) + if err := c.getClusterBasicStatus(cluster); err != nil { + return nil, err + } + + // 2. Get cluster resources fresh (TTL=0) — gives us node stats + VM names/status + if err := c.processClusterResourcesWithCache(cluster, 0); err != nil { + return nil, err + } + + // 3. Calculate cluster-wide totals from the basic data + c.calculateClusterTotals(cluster) + + c.Cluster = cluster + + // 4. Run VM enrichment in background — guest agent IPs, filesystems, configs + go func() { + c.logger.Debug("[FAST-FRESH] Starting background VM enrichment for %d nodes", len(cluster.Nodes)) + + // Reset guestAgentChecked before enrichment (same as startup path) + for _, node := range cluster.Nodes { + if node.Online && node.VMs != nil { + for _, vm := range node.VMs { + vm.guestAgentChecked = false + } + } + } + + var enrichErr error + if err := c.EnrichVMs(cluster); err != nil { + c.logger.Debug("[FAST-FRESH] Error enriching VM data: %v", err) + enrichErr = err + } + + // Retry QEMU VMs with missing guest agent data (same as startup path) + time.Sleep(3 * time.Second) + for _, node := range cluster.Nodes { + if !node.Online || node.VMs == nil { + continue + } + for _, vm := range node.VMs { + if vm.Status == VMStatusRunning && vm.Type == VMTypeQemu && vm.AgentEnabled && (!vm.AgentRunning || len(vm.NetInterfaces) == 0) { + if err := c.GetVmStatus(vm); err != nil { + if strings.Contains(err.Error(), "guest agent is not running") { + // Expected: agent not yet ready; not a reportable error. + continue + } + // Unexpected retry error: aggregate into enrichErr so the caller + // is aware that some VMs may have incomplete guest agent data. + c.logger.Debug("[FAST-FRESH] Retry error for VM %d on %s: %v", vm.ID, node.Name, err) + if enrichErr == nil { + enrichErr = fmt.Errorf("guest agent retry: VM %d (%s): %w", vm.ID, node.Name, err) + } else { + enrichErr = fmt.Errorf("%w; VM %d (%s): %v", enrichErr, vm.ID, node.Name, err) + } + } + } + } + } + + if onEnrichmentComplete != nil { + onEnrichmentComplete(enrichErr) + } + }() + + return cluster, nil +} + +// getFreshClusterStatusInternal is the shared implementation for cluster status retrieval. +// When enrichVMs is true, running VMs are enriched with guest agent data (slower but more complete). +func (c *Client) getFreshClusterStatusInternal(enrichVMs bool) (*Cluster, error) { + // Selectively clear cluster-level cache entries rather than the entire cache. + // Node-level data (disks, updates) is invalidated per-node during enrichment, + // so clearing everything here wastes cached data that is still valid. + c.ClearClusterCache() // Create a fresh cluster with minimal cache TTL for resources cluster := &Cluster{ @@ -314,10 +456,12 @@ func (c *Client) GetFreshClusterStatus() (*Cluster, error) { return nil, err } - // 3. Enrich VMs with detailed status information - if err := c.EnrichVMs(cluster); err != nil { - // Log error but continue - c.logger.Debug("[CLUSTER] Error enriching VM data: %v", err) + // 3. Enrich VMs with detailed status information (skipped for light refresh) + if enrichVMs { + if err := c.EnrichVMs(cluster); err != nil { + // Log error but continue + c.logger.Debug("[CLUSTER] Error enriching VM data: %v", err) + } } // 4. Calculate cluster-wide totals @@ -365,15 +509,49 @@ func (c *Client) RefreshNodeData(nodeName string) (*Node, error) { } } - // Fetch fresh node data - freshNode, err := c.GetNodeStatus(nodeName) - if err != nil { + // Fetch node status, disks, and updates concurrently. + // Disks and updates only need the node name and are independent of each other. + var ( + freshNode *Node + statusErr error + disks []NodeDisk + updates []NodeUpdate + wg sync.WaitGroup + ) + + wg.Add(3) //nolint:mnd // status + disks + updates + + go func() { + defer wg.Done() + + freshNode, statusErr = c.GetNodeStatus(nodeName) + }() + + go func() { + defer wg.Done() + + if d, err := c.GetNodeDisks(nodeName); err == nil { + disks = d + } + }() + + go func() { + defer wg.Done() + + if u, err := c.GetNodeUpdates(nodeName); err == nil { + updates = u + } + }() + + wg.Wait() + + if statusErr != nil { // If we can't reach the node, it's likely offline if originalNode != nil { originalNode.Online = false } - return nil, fmt.Errorf("failed to refresh node %s: %w", nodeName, err) + return nil, fmt.Errorf("failed to refresh node %s: %w", nodeName, statusErr) } // If we successfully got node status, the node is online @@ -392,15 +570,9 @@ func (c *Client) RefreshNodeData(nodeName string) (*Node, error) { } } - // Fetch Disks - if disks, err := c.GetNodeDisks(nodeName); err == nil { - freshNode.Disks = disks - } - - // Fetch Updates - if updates, err := c.GetNodeUpdates(nodeName); err == nil { - freshNode.Updates = updates - } + // Apply concurrently-fetched disks and updates + freshNode.Disks = disks + freshNode.Updates = updates return freshNode, nil } @@ -594,14 +766,47 @@ func (c *Client) enrichNodeMissingDetails(node *Node) error { return nil } - fullStatus, err := c.GetNodeStatus(node.Name) - if err != nil { + // Fetch status, disks, and updates concurrently. + // All three only require the node name and are independent of each other. + var ( + fullStatus *Node + statusErr error + disks []NodeDisk + disksErr error + updates []NodeUpdate + updatesErr error + wg sync.WaitGroup + ) + + wg.Add(3) //nolint:mnd // status + disks + updates + + go func() { + defer wg.Done() + + fullStatus, statusErr = c.GetNodeStatus(node.Name) + }() + + go func() { + defer wg.Done() + + disks, disksErr = c.GetNodeDisks(node.Name) + }() + + go func() { + defer wg.Done() + + updates, updatesErr = c.GetNodeUpdates(node.Name) + }() + + wg.Wait() + + if statusErr != nil { // Mark node as offline if we can't reach it node.Online = false - c.logger.Debug("[CLUSTER] Node %s appears to be offline or unreachable for detail enrichment: %v", node.Name, err) + c.logger.Debug("[CLUSTER] Node %s appears to be offline or unreachable for detail enrichment: %v", node.Name, statusErr) // Return error for logging but don't make it critical - return fmt.Errorf("node %s offline/unreachable for details: %w", node.Name, err) + return fmt.Errorf("node %s offline/unreachable for details: %w", node.Name, statusErr) } // Only update fields not available in cluster resources @@ -611,18 +816,16 @@ func (c *Client) enrichNodeMissingDetails(node *Node) error { node.LoadAvg = fullStatus.LoadAvg node.lastMetricsUpdate = time.Now() - // Fetch Disks - if disks, err := c.GetNodeDisks(node.Name); err == nil { + if disksErr == nil { node.Disks = disks } else { - c.logger.Debug("Failed to fetch disks for node %s: %v", node.Name, err) + c.logger.Debug("Failed to fetch disks for node %s: %v", node.Name, disksErr) } - // Fetch Updates - if updates, err := c.GetNodeUpdates(node.Name); err == nil { + if updatesErr == nil { node.Updates = updates } else { - c.logger.Debug("Failed to fetch updates for node %s: %v", node.Name, err) + c.logger.Debug("Failed to fetch updates for node %s: %v", node.Name, updatesErr) } c.logger.Debug("[CLUSTER] Successfully enriched missing details for node: %s", node.Name) diff --git a/pkg/api/grouped-cluster.go b/pkg/api/grouped-cluster.go index 11a6d537..ce0f50f3 100644 --- a/pkg/api/grouped-cluster.go +++ b/pkg/api/grouped-cluster.go @@ -284,15 +284,16 @@ func (m *GroupClientManager) GetGroupClusterResources(ctx context.Context, fresh // Fetch nodes go func() { - nodes, err := m.GetGroupNodes(ctx) if fresh { - // Invalidate node status caches for each profile before returning + // Selectively invalidate cluster-level cache keys per profile + // instead of wiping the entire cache (preserves node disks, updates, etc.) for _, pc := range m.GetConnectedClients() { - if pc.Client != nil && pc.Client.cache != nil { - _ = pc.Client.cache.Clear() + if pc.Client != nil { + pc.Client.ClearClusterCache() } } } + nodes, err := m.GetGroupNodes(ctx) nodesChan <- result{nodes: nodes, err: err} }() @@ -326,10 +327,10 @@ func (m *GroupClientManager) GetNodeFromGroup( nodeName string, ) (*Node, error) { operation := func(pName string, client *Client) (interface{}, error) { - // Clear per-node caches to ensure fresh status - if client.cache != nil { - _ = client.cache.Clear() - } + // Selectively invalidate this node's cache keys instead of wiping the entire cache. + // This preserves disk/update/VM caches for other nodes in the same profile. + client.ClearNodeCache(nodeName) + node, err := client.GetNodeStatus(nodeName) if err != nil { return nil, fmt.Errorf("failed to get node status: %w", err) diff --git a/pkg/api/vm.go b/pkg/api/vm.go index 5d5335d0..6f31aa40 100644 --- a/pkg/api/vm.go +++ b/pkg/api/vm.go @@ -3,6 +3,7 @@ package api import ( "fmt" "strings" + "sync" ) // GetVmStatus retrieves current status metrics for a VM or LXC. @@ -303,12 +304,53 @@ func (c *Client) GetDetailedVmInfo(node, vmType string, vmid int) (*VM, error) { Type: vmType, } - // Get status information (cached) - var statusRes map[string]interface{} + // Fetch status and config concurrently -- they are independent API calls. + var ( + statusRes map[string]interface{} + configRes map[string]interface{} + statusErr error + configErr error + wg sync.WaitGroup + ) - statusEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/status/current", node, vmType, vmid) - if err := c.GetWithCache(statusEndpoint, &statusRes, VMDataTTL); err != nil { - return nil, fmt.Errorf("failed to get VM status: %w", err) + wg.Add(2) //nolint:mnd // status + config + + go func() { + defer wg.Done() + + var res map[string]interface{} + + statusEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/status/current", node, vmType, vmid) + if err := c.GetWithCache(statusEndpoint, &res, VMDataTTL); err != nil { + statusErr = fmt.Errorf("failed to get VM status: %w", err) + return + } + + statusRes = res + }() + + go func() { + defer wg.Done() + + var res map[string]interface{} + + configEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/config", node, vmType, vmid) + if err := c.GetWithCache(configEndpoint, &res, VMDataTTL); err != nil { + configErr = fmt.Errorf("failed to get VM config: %w", err) + return + } + + configRes = res + }() + + wg.Wait() + + if statusErr != nil { + return nil, statusErr + } + + if configErr != nil { + return nil, configErr } statusDataRaw := statusRes["data"] @@ -387,14 +429,6 @@ func (c *Client) GetDetailedVmInfo(node, vmType string, vmid int) (*VM, error) { vm.Pool = pool } - // Get config information (cached) - var configRes map[string]interface{} - - configEndpoint := fmt.Sprintf("/nodes/%s/%s/%d/config", node, vmType, vmid) - if err := c.GetWithCache(configEndpoint, &configRes, VMDataTTL); err != nil { - return nil, fmt.Errorf("failed to get VM config: %w", err) - } - configData, ok := configRes["data"].(map[string]interface{}) if !ok { return nil, fmt.Errorf("invalid VM config response format")