Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/28788-name-software-checksum
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* Added software name into checksum calculation for macos apps
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package tables

import (
"database/sql"
"fmt"
)

func init() {
MigrationClient.AddMigration(Up_20251010153829, Down_20251010153829)
}

func Up_20251010153829(tx *sql.Tx) error {
var minID, maxID sql.NullInt64
err := tx.QueryRow(`
SELECT MIN(id), MAX(id)
FROM software
WHERE source = 'apps'
AND bundle_identifier IS NOT NULL
AND bundle_identifier != ''
`).Scan(&minID, &maxID)
if err != nil {
return fmt.Errorf("getting ID range: %w", err)
}

if !minID.Valid || !maxID.Valid {
return nil
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using MAX(id), is ok, even if a new hosts checks in during the migration. For example,
Query runs and returns: MIN = 1, MAX = 1000
NEW host checks in inserts row with id = 1001
Migration finishes at id = 1000
Row 1001 is MISSED by migration. But it should be using the new checksum code, so we don't need to worry about migrating.

Comment on lines +14 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

WHERE clause is too restrictive—should update all apps.

The migration filters to source = 'apps' AND bundle_identifier IS NOT NULL AND bundle_identifier != '', but the Go code change at server/fleet/software.go:152 now includes s.Name in the checksum for all apps, regardless of whether they have a bundle_identifier.

Impact:

  • Apps with empty or NULL bundle_identifier won't be migrated
  • When osquery re-reports these apps, Fleet will compute a new checksum (with name)
  • Checksum mismatch → Fleet creates duplicate software entries

Recommended fix:

Remove the bundle_identifier filter from both queries:

 	err := tx.QueryRow(`
 		SELECT MIN(id), MAX(id)
 		FROM software
 		WHERE source = 'apps'
-		  AND bundle_identifier IS NOT NULL
-		  AND bundle_identifier != ''
 	`).Scan(&minID, &maxID)
 		softwareStmt := `
 		UPDATE software SET
 			checksum = UNHEX(
 			MD5(
 				-- concatenate with separator \x00
 				CONCAT_WS(CHAR(0),
 					version,
 					source,
 					bundle_identifier,
 					` + "`release`" + `,
 					arch,
 					vendor,
 					extension_for,
 					extension_id,
 					name
 				)
 			)
 		)
 		WHERE source = 'apps'
-		  AND bundle_identifier IS NOT NULL
-		  AND bundle_identifier != ''
 		  AND id >= ? AND id <= ?
 		`

As per coding guidelines: SQL queries must have correct filtering to avoid non-deterministic results and data inconsistencies.

Also applies to: 54-56

🤖 Prompt for AI Agents
In
server/datastore/mysql/migrations/tables/20251010153829_AddNameToSoftwareCheckumCalculation.go
around lines 14-27 (and also adjust the similar query at lines 54-56), the WHERE
clause incorrectly restricts rows to "bundle_identifier IS NOT NULL AND
bundle_identifier != ''", but the checksum change now applies to all apps;
remove the bundle_identifier conditions so the SQL only filters by source =
'apps' to include all app rows, leaving the rest of the query, Scan and error
handling unchanged.


const batchSize = 10000
for startID := minID.Int64; startID <= maxID.Int64; startID += batchSize {
endID := startID + batchSize - 1
if endID > maxID.Int64 {
endID = maxID.Int64
}

softwareStmt := `
UPDATE software SET
checksum = UNHEX(
Copy link
Contributor Author

@ksykulev ksykulev Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we set name_source to basic on all of these? We are getting rid of updateTargetedBundleIDs so there will no longer be any names on the software table set by the "renaming" functionality.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't think we would modify them either way because if they got set to the newer form we can trust those more than just filename based.

MD5(
-- concatenate with separator \x00
CONCAT_WS(CHAR(0),
version,
source,
bundle_identifier,
` + "`release`" + `,
arch,
vendor,
extension_for,
extension_id,
name
)
)
)
WHERE source = 'apps'
AND bundle_identifier IS NOT NULL
AND bundle_identifier != ''
AND id >= ? AND id <= ?
`
_, err = tx.Exec(softwareStmt, startID, endID)
if err != nil {
return fmt.Errorf("updating software checksums (batch %d-%d): %w", startID, endID, err)
}
}

return nil
}

func Down_20251010153829(tx *sql.Tx) error {
return nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package tables

import (
"crypto/md5" //nolint:gosec // MD5 is used for checksums, not security
"database/sql"
"fmt"
"strings"
"testing"

"github.com/stretchr/testify/require"
)

func TestUp_20251010153829(t *testing.T) {
db := applyUpToPrev(t)

computeOldChecksum := func(name, version, source, bundleID, release, arch, vendor, extensionFor, extensionID string) []byte {
h := md5.New() //nolint:gosec
cols := []string{version, source, bundleID, release, arch, vendor, extensionFor, extensionID}
if source != "apps" {
cols = append([]string{name}, cols...)
}
_, _ = fmt.Fprint(h, strings.Join(cols, "\x00"))
return h.Sum(nil)
}

computeNewChecksum := func(name, version, source, bundleID, release, arch, vendor, extensionFor, extensionID string) []byte {
h := md5.New() //nolint:gosec
cols := []string{version, source, bundleID, release, arch, vendor, extensionFor, extensionID, name}
_, _ = fmt.Fprint(h, strings.Join(cols, "\x00"))
return h.Sum(nil)
}

insertTitle := `INSERT INTO software_titles (name, source, extension_for, bundle_identifier) VALUES (?, ?, ?, ?)`
result, err := db.Exec(insertTitle, "Test App", "apps", "", "com.test.app")
require.NoError(t, err)
titleID, err := result.LastInsertId()
require.NoError(t, err)

insertSoftware := `INSERT INTO software
(name, version, source, bundle_identifier, ` + "`release`" + `, arch, vendor, extension_for, extension_id, checksum, title_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`

// software with bundle_identifier should be updated
app1Name := "GoLand.app"
app1BundleID := "com.jetbrains.goland"
app1OldChecksum := computeOldChecksum(app1Name, "2023.1", "apps", app1BundleID, "", "x86_64", "JetBrains", "", "")
app1NewChecksum := computeNewChecksum(app1Name, "2023.1", "apps", app1BundleID, "", "x86_64", "JetBrains", "", "")
_, err = db.Exec(insertSoftware, app1Name, "2023.1", "apps", app1BundleID, "", "x86_64", "JetBrains", "", "", app1OldChecksum, titleID)
require.NoError(t, err)

app2Name := "GoLand 2.app"
app2BundleID := "com.jetbrains.goland"
app2OldChecksum := computeOldChecksum(app2Name, "2023.2", "apps", app2BundleID, "", "x86_64", "JetBrains", "", "")
app2NewChecksum := computeNewChecksum(app2Name, "2023.2", "apps", app2BundleID, "", "x86_64", "JetBrains", "", "")
_, err = db.Exec(insertSoftware, app2Name, "2023.2", "apps", app2BundleID, "", "x86_64", "JetBrains", "", "", app2OldChecksum, titleID)
require.NoError(t, err)

// softwares without bundle_identifier - no update
app3Name := "SomeApp.app"
app3OldChecksum := computeOldChecksum(app3Name, "1.0", "apps", "", "", "x86_64", "Vendor", "", "")
_, err = db.Exec(insertSoftware, app3Name, "1.0", "apps", nil, "", "x86_64", "Vendor", "", "", app3OldChecksum, titleID)
require.NoError(t, err)

app4Name := "AnotherApp.app"
app4OldChecksum := computeOldChecksum(app4Name, "2.0", "apps", "", "", "arm64", "Another Vendor", "", "")
_, err = db.Exec(insertSoftware, app4Name, "2.0", "apps", "", "", "arm64", "Another Vendor", "", "", app4OldChecksum, titleID)
require.NoError(t, err)

// Windows software - no update
winName := "Notepad++"
winOldChecksum := computeOldChecksum(winName, "8.5.0", "programs", "", "", "x86_64", "Don Ho", "", "")
_, err = db.Exec(insertSoftware, winName, "8.5.0", "programs", nil, "", "x86_64", "Don Ho", "", "", winOldChecksum, titleID)
require.NoError(t, err)

// Linux software - no update
linuxName := "vim"
linuxOldChecksum := computeOldChecksum(linuxName, "8.2", "deb_packages", "", "1ubuntu1", "amd64", "Ubuntu", "", "")
_, err = db.Exec(insertSoftware, linuxName, "8.2", "deb_packages", nil, "1ubuntu1", "amd64", "Ubuntu", "", "", linuxOldChecksum, titleID)
require.NoError(t, err)

applyNext(t, db)

type softwareRow struct {
Name string `db:"name"`
Source string `db:"source"`
BundleIdentifier sql.NullString `db:"bundle_identifier"`
Checksum []byte `db:"checksum"`
}

var software []softwareRow
err = db.Select(&software, `SELECT name, source, bundle_identifier, checksum FROM software ORDER BY name`)
require.NoError(t, err)
require.Len(t, software, 6)

for _, sw := range software {
switch sw.Name {
case app1Name:
require.Equal(t, app1NewChecksum, sw.Checksum)
require.True(t, sw.BundleIdentifier.Valid)
require.Equal(t, app1BundleID, sw.BundleIdentifier.String)
case app2Name:
require.Equal(t, app2NewChecksum, sw.Checksum)
require.True(t, sw.BundleIdentifier.Valid)
require.Equal(t, app2BundleID, sw.BundleIdentifier.String)
case app3Name:
require.Equal(t, app3OldChecksum, sw.Checksum)
require.False(t, sw.BundleIdentifier.Valid)
case app4Name:
require.Equal(t, app4OldChecksum, sw.Checksum)
require.True(t, sw.BundleIdentifier.Valid)
require.Equal(t, "", sw.BundleIdentifier.String)
case winName:
require.Equal(t, winOldChecksum, sw.Checksum)
require.Equal(t, "programs", sw.Source)
case linuxName:
require.Equal(t, linuxOldChecksum, sw.Checksum)
require.Equal(t, "deb_packages", sw.Source)
default:
t.Fatalf("Unexpected software entry: %s", sw.Name)
}
}
}
4 changes: 2 additions & 2 deletions server/datastore/mysql/schema.sql

Large diffs are not rendered by default.

Loading
Loading