Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cmd/google_guest_agent/setup/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ func Run(ctx context.Context, c Config) error {
galog.Infof("Registered ACS watcher and handler")
} else {
galog.Infof("ACS watcher config enabled: %t, service account is present: %t, skipping ACS watcher and handler initialization. On Demand plugins will not be available.", c.EnableACSWatcher, conf.svcActPresent)

}

pm, err := manager.InitPluginManager(ctx, conf.id)
Expand Down
1 change: 1 addition & 0 deletions internal/acp/proto/agent_controlplane.proto
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ message GuestAgentModuleMetric {
WINDOWS_PASSWORD_RESET = 16;
CORE_PLUGIN_INITIALIZATION = 17;
GUEST_COMPAT_MANAGER_INITIALIZATION = 18;
PLUGIN_CLEANUP = 19;
}

enum ModuleStatus {
Expand Down
176 changes: 176 additions & 0 deletions internal/plugin/manager/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package manager

import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"time"

"github.com/GoogleCloudPlatform/galog"
acmpb "github.com/GoogleCloudPlatform/google-guest-agent/internal/acp/proto/google_guest_agent/acp"
"github.com/GoogleCloudPlatform/google-guest-agent/internal/utils/file"
)

// retryFailedRemovals removes all old plugin states.
//
// This function is called by the cleanup watcher to remove old plugin states.
// This will primarily retry removing plugins that weren't properly removed before.
func (m *PluginManager) retryFailedRemovals(ctx context.Context) (bool, error) {
galog.Debugf("Cleaning up old/failed plugin states")
// Check for plugins that aren't cached by the guest agent that should be removed.
pluginInstallLoc := filepath.Join(baseState(), pluginInstallDir)
pluginDirs, err := os.ReadDir(pluginInstallLoc)
if err != nil {
return true, fmt.Errorf("failed to read plugin install directory: %w", err)
}

// Keep track of the links to the plugin directories. We need to verify that
// the link points to a plugin directory that is no longer cached by the guest
// agent before removing it.
var links []string
failedRemovals := make(map[string]*Plugin)
m.pendingPluginRevisionsMu.RLock()
for _, p := range pluginDirs {
if !p.IsDir() {
links = append(links, p.Name())
continue
}
// Plugin directories have the form <name>_<revision>. The revision is
// typically a hash, so it should be safe to split by the last underscore.
nameIndex := strings.LastIndex(p.Name(), "_")
pluginName := p.Name()[:nameIndex]
pluginRevision := p.Name()[nameIndex+1:]
// We want to avoid cleaning up plugins that currently have a request in progress.
// This is to prevent a race condition where the plugin is removed in the
// middle of an installation request.
if m.inProgressPluginRequests[pluginName] {
continue
}
if _, err := m.Fetch(p.Name()); err != nil {
installPath := filepath.Join(pluginInstallLoc, p.Name())
failedRemovals[p.Name()] = &Plugin{Name: pluginName, Revision: pluginRevision, InstallPath: installPath, PluginType: PluginTypeDynamic}
galog.Debugf("Found leftover plugin %q, marking for removal", p.Name())
}
}
m.pendingPluginRevisionsMu.RUnlock()

// Remove the links to the plugin directories that are no longer cached by the guest agent.
for _, link := range links {
dest, err := os.Readlink(filepath.Join(pluginInstallLoc, link))
if err != nil {
continue
}
// Check if the link points to a plugin directory that is no longer cached
// by the guest agent.
if _, ok := failedRemovals[filepath.Base(dest)]; ok {
linkPath := filepath.Join(pluginInstallLoc, link)
galog.Debugf("Removing link %q", linkPath)
if err := os.Remove(linkPath); err != nil {
galog.Debugf("Unable to remove link %q: %v", linkPath, err)
}
}
}

var errs []error
noop := len(failedRemovals) == 0
for _, p := range failedRemovals {
if err := cleanup(ctx, p); err != nil {
errs = append(errs, err)
}
}
return noop, errors.Join(errs...)
}

func (m *PluginManager) cleanupOldState(ctx context.Context, path string) error {
re := regexp.MustCompile("^[0-9]+$")

if !file.Exists(path, file.TypeDir) {
// This is not an error, it just means there's nothing to clean up, which
// can happen if the agent is started for the first time or plugins were
// never installed.
galog.Debugf("Plugin state directory %q does not exist, skipping cleanup", path)
return nil
}

dirs, err := os.ReadDir(path)
if err != nil {
return fmt.Errorf("failed to read directory %q: %w", path, err)
}

currentID := m.currentInstanceID()

for _, dir := range dirs {
absPath := filepath.Join(path, dir.Name())
// Skip the current instance directory and any non-numeric directories,
// these are most likely not agent created.
if !dir.IsDir() || dir.Name() == currentID || !re.MatchString(dir.Name()) {
galog.V(2).Debugf("Skipping %q from plugin manager old state cleanup", absPath)
continue
}
galog.Debugf("Removing previous plugin state %q", absPath)
if err := os.RemoveAll(absPath); err != nil {
return fmt.Errorf("failed to remove file %q: %w", absPath, err)
}
}
return nil
}

// CleanupJob is a job that cleans up old plugin states.
type CleanupJob struct {
pm *PluginManager
}

// newCleanupJob creates a new cleanup job.
func newCleanupJob(pm *PluginManager) *CleanupJob {
return &CleanupJob{pm: pm}
}

// ID returns the ID of the cleanup job.
func (s *CleanupJob) ID() string {
return "cleanup"
}

// MetricName returns the metric name of the cleanup job.
func (s *CleanupJob) MetricName() acmpb.GuestAgentModuleMetric_Metric {
return acmpb.GuestAgentModuleMetric_PLUGIN_CLEANUP
}

// Interval returns the interval of the cleanup job. This is set to 24 hours
// to run once a day.
func (s *CleanupJob) Interval() (time.Duration, bool) {
return 24 * time.Hour, true
}

// ShouldEnable returns true if the cleanup job should be enabled.
//
// This should always be enabled to ensure that old plugin states are cleaned
// up.
func (s *CleanupJob) ShouldEnable(context.Context) bool {
return true
}

// Run runs the cleanup job.
func (s *CleanupJob) Run(ctx context.Context) (bool, error) {
galog.Debugf("Running cleanup job")
noop, err := s.pm.retryFailedRemovals(ctx)
galog.Debugf("Finished running cleanup job")
return noop, err
}
199 changes: 199 additions & 0 deletions internal/plugin/manager/cleanup_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package manager

import (
"context"
"fmt"
"os"
"path/filepath"
"testing"

"github.com/GoogleCloudPlatform/google-guest-agent/internal/utils/file"
)

// cleanupTestPlugin is a test plugin for cleanup.
type cleanupTestPlugin struct {
name, revision string
}

// String returns the full name of the plugin.
func (c *cleanupTestPlugin) String() string {
return fmt.Sprintf("%s_%s", c.name, c.revision)
}

func TestRetryFailedRemovals(t *testing.T) {
state := t.TempDir()
setBaseStateDir(t, state)
pluginDir := filepath.Join(state, "plugins")
if err := os.MkdirAll(pluginDir, 0755); err != nil {
t.Fatalf("os.MkdirAll(%s) failed unexpectedly with error: %v", pluginDir, err)
}

// Make some plugins.
failedPlugin := cleanupTestPlugin{name: "plugin1", revision: "revision1"}
uncachedPlugin := cleanupTestPlugin{name: "plugin2", revision: "revision2"}
runningPlugin := cleanupTestPlugin{name: "plugin3", revision: "revision3"}

// Create a plugin directory for each plugin.
if err := os.Mkdir(filepath.Join(pluginDir, failedPlugin.String()), 0755); err != nil {
t.Fatalf("os.Mkdir(%s) failed unexpectedly with error: %v", filepath.Join(pluginDir, failedPlugin.String()), err)
}
if err := os.Mkdir(filepath.Join(pluginDir, uncachedPlugin.String()), 0755); err != nil {
t.Fatalf("os.Mkdir(%s) failed unexpectedly with error: %v", filepath.Join(pluginDir, uncachedPlugin.String()), err)
}
if err := os.Mkdir(filepath.Join(pluginDir, runningPlugin.String()), 0755); err != nil {
t.Fatalf("os.Mkdir(%s) failed unexpectedly with error: %v", filepath.Join(pluginDir, runningPlugin.String()), err)
}

// Create links to the plugin directories.
if err := os.Symlink(filepath.Join(pluginDir, failedPlugin.String()), filepath.Join(pluginDir, failedPlugin.name)); err != nil {
t.Fatalf("os.Symlink(%s, %s) failed unexpectedly with error: %v", filepath.Join(pluginDir, failedPlugin.String()), filepath.Join(pluginDir, failedPlugin.name), err)
}
if err := os.Symlink(filepath.Join(pluginDir, uncachedPlugin.String()), filepath.Join(pluginDir, uncachedPlugin.name)); err != nil {
t.Fatalf("os.Symlink(%s, %s) failed unexpectedly with error: %v", filepath.Join(pluginDir, uncachedPlugin.String()), filepath.Join(pluginDir, uncachedPlugin.name), err)
}
if err := os.Symlink(filepath.Join(pluginDir, runningPlugin.String()), filepath.Join(pluginDir, runningPlugin.name)); err != nil {
t.Fatalf("os.Symlink(%s, %s) failed unexpectedly with error: %v", filepath.Join(pluginDir, runningPlugin.String()), filepath.Join(pluginDir, runningPlugin.name), err)
}

// Initialize the plugin manager with the test plugins. Only the running
// plugin is cached in the plugin manager.
plugins := make(map[string]*Plugin)
for _, p := range []cleanupTestPlugin{runningPlugin} {
plugins[p.String()] = &Plugin{Name: p.name, Revision: p.revision, InstallPath: filepath.Join(pluginDir, p.String()), PluginType: PluginTypeDynamic}
}
pm := &PluginManager{instanceID: "1234567890", plugins: plugins}

// Retry failed removals.
if _, err := pm.retryFailedRemovals(context.Background()); err != nil {
t.Fatalf("retryFailedRemovals(ctx) failed unexpectedly with error: %v", err)
}

tests := []struct {
name string
plugin cleanupTestPlugin
exists bool
}{
{
name: "failed-plugin-retried",
plugin: failedPlugin,
exists: false,
},
{
name: "uncached-plugin-removed",
plugin: uncachedPlugin,
exists: false,
},
{
name: "running-plugin-unchanged",
plugin: runningPlugin,
exists: true,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
dir := filepath.Join(pluginDir, tc.plugin.String())
if got := file.Exists(dir, file.TypeDir); got != tc.exists {
t.Errorf("retryFailedRemovals(ctx): dir %q exists: %t, should exist: %t", dir, got, tc.exists)
}

link := filepath.Join(pluginDir, tc.plugin.name)
if _, err := os.Stat(link); (err == nil) != tc.exists {
t.Errorf("retryFailedRemovals(ctx): link %q exists: %t, should exist: %t", link, err == nil, tc.exists)
}
})
}
}

func TestCleanupOldState(t *testing.T) {
state := t.TempDir()
oldInstance := filepath.Join(state, "1234567890")
newInstance := filepath.Join(state, "9876543210")
nonNumericDir := filepath.Join(state, "non-numeric-dir")
alphaNumericDir := filepath.Join(state, "abc-1234567890")
otherFile := filepath.Join(state, "random-file")

f, err := os.Create(otherFile)
if err != nil {
t.Fatalf("os.Create(%s) failed unexpectedly with error: %v", filepath.Join(state, "random-file"), err)
}
if err := f.Close(); err != nil {
t.Fatalf("f.Close(%s) failed unexpectedly with error: %v", otherFile, err)
}

for _, d := range []string{oldInstance, newInstance, nonNumericDir, alphaNumericDir} {
if err := os.MkdirAll(d, 0755); err != nil {
t.Fatalf("os.MkdirAll(%s) failed unexpectedly with error: %v", d, err)
}
}

pm := &PluginManager{instanceID: filepath.Base(newInstance)}
if err := pm.cleanupOldState(context.Background(), state); err != nil {
t.Fatalf("cleanupOldState(ctx, %s) failed unexpectedly with error: %v", state, err)
}

tests := []struct {
name string
path string
fType file.Type
exists bool
}{
{
name: "old-instance-cleanup",
path: oldInstance,
exists: false,
fType: file.TypeDir,
},
{
name: "new-instance-unchanged",
path: newInstance,
exists: true,
fType: file.TypeDir,
},
{
name: "non-numeric-dir-unchanged",
path: nonNumericDir,
exists: true,
fType: file.TypeDir,
},
{
name: "alpha-numeric-dir-unchanged",
path: alphaNumericDir,
exists: true,
fType: file.TypeDir,
},
{
name: "non-dir-file-unchanged",
path: otherFile,
exists: true,
fType: file.TypeFile,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
if got := file.Exists(tc.path, tc.fType); got != tc.exists {
t.Errorf("cleanupOldState(ctx, %s) ran, file %q exists: %t, should exist: %t", state, tc.path, got, tc.exists)
}
})
}

nonExistingDir := filepath.Join(state, "non-existing-dir")
if err := pm.cleanupOldState(context.Background(), nonExistingDir); err != nil {
t.Errorf("cleanupOldState(ctx, %s) failed unexpectedly with error: %v, want nil for non-existing directory", nonExistingDir, err)
}
}
Loading