Skip to content

[agent-smith] introduce file detector #21010

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions components/ee/agent-smith/example-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,11 @@
}
]
}
},
"filesystemScanning": {
"enabled": true,
"scanInterval": "5m",
"maxFileSize": 1024,
"workingArea": "/mnt/workingarea-mk2"
}
}
109 changes: 105 additions & 4 deletions components/ee/agent-smith/pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@ type Smith struct {
timeElapsedHandler func(t time.Time) time.Duration
notifiedInfringements *lru.Cache

detector detector.ProcessDetector
classifier classifier.ProcessClassifier
detector detector.ProcessDetector
classifier classifier.ProcessClassifier
fileDetector detector.FileDetector
fileClassifier classifier.FileClassifier
}

// NewAgentSmith creates a new agent smith
Expand Down Expand Up @@ -135,6 +137,32 @@ func NewAgentSmith(cfg config.Config) (*Smith, error) {
return nil, err
}

// Initialize filesystem detection if enabled
var filesystemDetec detector.FileDetector
var filesystemClass classifier.FileClassifier
if cfg.FilesystemScanning != nil && cfg.FilesystemScanning.Enabled {
// Create filesystem detector config
fsConfig := detector.FileScanningConfig{
Enabled: cfg.FilesystemScanning.Enabled,
ScanInterval: cfg.FilesystemScanning.ScanInterval.Duration,
MaxFileSize: cfg.FilesystemScanning.MaxFileSize,
WorkingArea: cfg.FilesystemScanning.WorkingArea,
}

// Create independent filesystem classifier (no dependency on process classifier)
filesystemClass, err = cfg.Blocklists.FileClassifier()
if err != nil {
log.WithError(err).Error("failed to create filesystem classifier")
} else {
filesystemDetec, err = detector.NewfileDetector(fsConfig, filesystemClass)
if err != nil {
log.WithError(err).Error("failed to create filesystem detector")
} else {
log.Info("Filesystem detector created successfully with independent classifier")
}
}
}

m := newAgentMetrics()
res := &Smith{
EnforcementRules: map[string]config.EnforcementRules{
Expand All @@ -150,8 +178,10 @@ func NewAgentSmith(cfg config.Config) (*Smith, error) {

wsman: wsman,

detector: detec,
classifier: class,
detector: detec,
classifier: class,
fileDetector: filesystemDetec,
fileClassifier: filesystemClass,

notifiedInfringements: lru.New(notificationCacheSize),
metrics: m,
Expand Down Expand Up @@ -227,17 +257,34 @@ type classifiedProcess struct {
Err error
}

type classifiedFile struct {
F detector.File
C *classifier.Classification
Err error
}

// Start gets a stream of Infringements from Run and executes a callback on them to apply a Penalty
func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace, []config.PenaltyKind)) {
ps, err := agent.detector.DiscoverProcesses(ctx)
if err != nil {
log.WithError(err).Fatal("cannot start process detector")
}

// Start filesystem detection if enabled
var fs <-chan detector.File
if agent.fileDetector != nil {
fs, err = agent.fileDetector.DiscoverFiles(ctx)
if err != nil {
log.WithError(err).Warn("cannot start filesystem detector")
}
}

var (
wg sync.WaitGroup
cli = make(chan detector.Process, 500)
clo = make(chan classifiedProcess, 50)
fli = make(chan detector.File, 100)
flo = make(chan classifiedFile, 25)
)
agent.metrics.RegisterClassificationQueues(cli, clo)

Expand Down Expand Up @@ -268,6 +315,25 @@ func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace
}()
}

// Filesystem classification workers (fewer than process workers)
if agent.fileClassifier != nil {
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for file := range fli {
class, err := agent.fileClassifier.MatchesFile(file.Path)
if err == nil && class.Level == classifier.LevelNoMatch {
log.Infof("File classification: no match - %s", file.Path)
continue
}
log.Infof("File classification result: %s (level: %s, err: %v)", file.Path, class.Level, err)
flo <- classifiedFile{F: file, C: class, Err: err}
}
}()
}
}

defer log.Info("agent smith main loop ended")

// We want to fill the classifier in a Go routine seaparete from using the classification
Expand All @@ -288,6 +354,15 @@ func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace
// we're overfilling the classifier worker
agent.metrics.classificationBackpressureInDrop.Inc()
}
case file, ok := <-fs:
if !ok {
continue
}
select {
case fli <- file:
default:
// filesystem queue full, skip this file
}
}
}
}()
Expand Down Expand Up @@ -319,6 +394,32 @@ func (agent *Smith) Start(ctx context.Context, callback func(InfringingWorkspace
},
},
})
case fileClass := <-flo:
log.Infof("Received classified file from flo channel")
file, cl, err := fileClass.F, fileClass.C, fileClass.Err
if err != nil {
log.WithError(err).WithFields(log.OWI(file.Workspace.OwnerID, file.Workspace.WorkspaceID, file.Workspace.InstanceID)).WithField("path", file.Path).Error("cannot classify filesystem file")
continue
}

log.WithField("path", file.Path).WithField("severity", cl.Level).WithField("message", cl.Message).
WithFields(log.OWI(file.Workspace.OwnerID, file.Workspace.WorkspaceID, file.Workspace.InstanceID)).
Info("filesystem signature detected")

_, _ = agent.Penalize(InfringingWorkspace{
SupervisorPID: file.Workspace.PID,
Owner: file.Workspace.OwnerID,
InstanceID: file.Workspace.InstanceID,
WorkspaceID: file.Workspace.WorkspaceID,
GitRemoteURL: []string{file.Workspace.GitURL},
Infringements: []Infringement{
{
Kind: config.GradeKind(config.InfringementExec, common.Severity(cl.Level)), // Reuse exec for now
Description: fmt.Sprintf("filesystem signature: %s", cl.Message),
CommandLine: []string{file.Path}, // Use file path as "command"
},
},
})
}
}
}
Expand Down
75 changes: 75 additions & 0 deletions components/ee/agent-smith/pkg/classifier/classifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ type ProcessClassifier interface {
Matches(executable string, cmdline []string) (*Classification, error)
}

// FileClassifier matches filesystem files against signatures
type FileClassifier interface {
MatchesFile(filePath string) (*Classification, error)
GetFileSignatures() []*Signature
}

func NewCommandlineClassifier(name string, level Level, allowList []string, blockList []string) (*CommandlineClassifier, error) {
al := make([]*regexp.Regexp, 0, len(allowList))
for _, a := range allowList {
Expand Down Expand Up @@ -173,6 +179,7 @@ type SignatureMatchClassifier struct {
}

var _ ProcessClassifier = &SignatureMatchClassifier{}
var _ FileClassifier = &SignatureMatchClassifier{}

var sigNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierSignature}

Expand Down Expand Up @@ -223,6 +230,63 @@ func (sigcl *SignatureMatchClassifier) Matches(executable string, cmdline []stri
return sigNoMatch, nil
}

// MatchesFile checks if a filesystem file matches any filesystem signatures
func (sigcl *SignatureMatchClassifier) MatchesFile(filePath string) (c *Classification, err error) {
filesystemSignatures := sigcl.GetFileSignatures()

if len(filesystemSignatures) == 0 {
return sigNoMatch, nil
}

// Skip filename matching - the filesystem detector already filtered files
// based on signature filename patterns, so any file that reaches here
// should be checked for content matching against all filesystem signatures
matchingSignatures := filesystemSignatures

// Open file for signature matching
r, err := os.Open(filePath)
if err != nil {
var reason string
if errors.Is(err, fs.ErrNotExist) {
reason = processMissNotFound
} else if errors.Is(err, os.ErrPermission) {
reason = processMissPermissionDenied
} else {
reason = processMissOther
}
log.WithFields(logrus.Fields{
"filePath": filePath,
"reason": reason,
}).WithError(err).Debug("filesystem signature classification miss")
return sigNoMatch, nil
}
defer r.Close()

var serr error

src := SignatureReadCache{
Reader: r,
}
for _, sig := range matchingSignatures {
match, err := sig.Matches(&src)
if match {
return &Classification{
Level: sigcl.DefaultLevel,
Classifier: ClassifierSignature,
Message: fmt.Sprintf("filesystem signature matches %s", sig.Name),
}, nil
}
if err != nil {
serr = err
}
}
if serr != nil {
return nil, serr
}

return sigNoMatch, nil
}

type SignatureReadCache struct {
Reader io.ReaderAt
header []byte
Expand All @@ -240,6 +304,17 @@ func (sigcl *SignatureMatchClassifier) Collect(m chan<- prometheus.Metric) {
sigcl.signatureHitTotal.Collect(m)
}

// GetFileSignatures returns signatures that are configured for filesystem domain
func (sigcl *SignatureMatchClassifier) GetFileSignatures() []*Signature {
var filesystemSignatures []*Signature
for _, sig := range sigcl.Signatures {
if sig.Domain == DomainFileSystem {
filesystemSignatures = append(filesystemSignatures, sig)
}
}
return filesystemSignatures
}

// CompositeClassifier combines multiple classifiers into one. The first match wins.
type CompositeClassifier []ProcessClassifier

Expand Down
Loading
Loading