From 3a2a689796dcce1e053b4ef367302db9566523c3 Mon Sep 17 00:00:00 2001 From: Dante Catalfamo <43040593+dantecatalfamo@users.noreply.github.com> Date: Thu, 16 Jan 2025 10:13:22 -0500 Subject: [PATCH] Don't expire iOS devices prematurely (#25436) #25406 The `last_seen_times` table is only updates when osquery hits one of its authenticated endpoints, meaning it isn't updated when devices without osquery, like iphones, are enrolled. I've left a [comment](https://github.com/fleetdm/fleet/issues/25406#issuecomment-2590637318) on the original issue explaining how this happens. Originally, if there was no `last_seen_time`, the fallback value would be the `created_at` value on the `hosts` table, so ios devices would always get deleted once they were added X number of days ago. In its place, I've added the `detail_updated_at` column on the `hosts` table as the fallback value, and only use `created_at` if that is also empty. `detail_updated_at` is updated every time a full detail refetch completes. In the case of ios/ipados, [this is done using MDM](https://github.com/fleetdm/fleet/blob/cd5c0e8aed10664458f597b5d9600dd20bf3fdac/server/service/apple_mdm.go#L3101). `detail_updated_at` is updated less frequently than `last_seen_times`, only once every hour or so instead of every 30 seconds, but since expiration policies are set on the scale of days instead of hours, this should be fine. The way I've QA'd this is by adding an iOS device to my fleet instance, waited 24 hours, and set the expiration policy to 24 hours. --- changes/25406-premature-ios-deletion | 1 + server/datastore/mysql/hosts.go | 8 ++- server/datastore/mysql/hosts_test.go | 87 ++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 changes/25406-premature-ios-deletion diff --git a/changes/25406-premature-ios-deletion b/changes/25406-premature-ios-deletion new file mode 100644 index 000000000000..ba5f83f643f2 --- /dev/null +++ b/changes/25406-premature-ios-deletion @@ -0,0 +1 @@ +- Fixed bug where iOS devices were being removed prematurely by expiration policy diff --git a/server/datastore/mysql/hosts.go b/server/datastore/mysql/hosts.go index a044e96998db..9e972faa3d5a 100644 --- a/server/datastore/mysql/hosts.go +++ b/server/datastore/mysql/hosts.go @@ -3119,11 +3119,15 @@ func (ds *Datastore) CleanupExpiredHosts(ctx context.Context) ([]uint, error) { // DELETE FROM hosts WHERE id in (SELECT host_id FROM host_seen_times WHERE seen_time < DATE_SUB(NOW(), INTERVAL ? DAY)) // This means a full table scan for hosts, and for big deployments, that's not ideal // so instead, we get the ids one by one and delete things one by one - // it might take longer, but it should lock only the row we need + // it might take longer, but it should lock only the row we need. + // + // host_seen_time entries are not available for ios/ipados devices, since they're updated on + // osquery check-in. Instead we fall back to detail_updated_at, which is updated every time a + // full detail refetch happens. findHostsSql := `SELECT h.id FROM hosts h LEFT JOIN host_seen_times hst ON h.id = hst.host_id - WHERE COALESCE(hst.seen_time, h.created_at) < DATE_SUB(NOW(), INTERVAL ? DAY)` + WHERE COALESCE(hst.seen_time, h.detail_updated_at, h.created_at) < DATE_SUB(NOW(), INTERVAL ? DAY)` var allIdsToDelete []uint // Process hosts using global expiry diff --git a/server/datastore/mysql/hosts_test.go b/server/datastore/mysql/hosts_test.go index 48be71a9d49b..7b4319c5b0cc 100644 --- a/server/datastore/mysql/hosts_test.go +++ b/server/datastore/mysql/hosts_test.go @@ -119,6 +119,7 @@ func TestHosts(t *testing.T) { {"HostsListByDiskEncryptionStatus", testHostsListMacOSSettingsDiskEncryptionStatus}, {"HostsListFailingPolicies", printReadsInTest(testHostsListFailingPolicies)}, {"HostsExpiration", testHostsExpiration}, + {"IOSHostExpiration", testIOSHostsExpiration}, {"TeamHostsExpiration", testTeamHostsExpiration}, {"HostsIncludesScheduledQueriesInPackStats", testHostsIncludesScheduledQueriesInPackStats}, {"HostsAllPackStats", testHostsAllPackStats}, @@ -4148,6 +4149,92 @@ func testHostsExpiration(t *testing.T, ds *Datastore) { require.Len(t, hosts, 5) } +func testIOSHostsExpiration(t *testing.T, ds *Datastore) { + // iOS/iPadOS devices don't have host_seen_times, meaning they + // would previously rely on created_at records for removal, + // and get deleted once the host's age was beyong the expiry + // window. We now check detail_updated_at, something that gets + // updated every time details are refetched, if seen time is + // not present. + hostExpiryWindow := 70 + + ac, err := ds.AppConfig(context.Background()) + require.NoError(t, err) + + ac.HostExpirySettings.HostExpiryEnabled = false + ac.HostExpirySettings.HostExpiryWindow = hostExpiryWindow + + err = ds.SaveAppConfig(context.Background(), ac) + require.NoError(t, err) + + for i := 0; i < 10; i++ { + platform := "ios" + if i%2 == 0 { + platform = "ipados" + } + detailsUpdated := time.Now() + if i >= 5 { + detailsUpdated = detailsUpdated.Add(time.Duration(-1*(hostExpiryWindow+1)*24) * time.Hour) + } + + _, err := ds.NewHost(context.Background(), &fleet.Host{ + DetailUpdatedAt: detailsUpdated, + LabelUpdatedAt: time.Now(), + PolicyUpdatedAt: time.Now(), + UUID: fmt.Sprintf("%d", i), + Hostname: fmt.Sprintf("foo.local%d", i), + Platform: platform, + }) + require.NoError(t, err) + } + + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + // There are no host_seen_times for ios/ipados devices + if _, err := q.ExecContext(context.Background(), "DELETE FROM host_seen_times"); err != nil { + return err + } + // Make sure created_at is old enough to always get + // removed, we want to make sure that + // detail_updated_at is the column being checked + if _, err := q.ExecContext(context.Background(), "UPDATE hosts SET created_at = '2020-01-01 00:00:01'"); err != nil { + return err + } + return nil + }) + + filter := fleet.TeamFilter{User: test.UserAdmin} + + hosts := listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 10) + require.Len(t, hosts, 10) + + _, err = ds.CleanupExpiredHosts(context.Background()) + require.NoError(t, err) + + // host expiration is still disabled + hosts = listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 10) + require.Len(t, hosts, 10) + + // once enabled, it works + ac.HostExpirySettings.HostExpiryEnabled = true + err = ds.SaveAppConfig(context.Background(), ac) + require.NoError(t, err) + + deleted, err := ds.CleanupExpiredHosts(context.Background()) + require.NoError(t, err) + require.Len(t, deleted, 5) + + hosts = listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 5) + require.Len(t, hosts, 5) + + // And it doesn't remove more than it should + deleted, err = ds.CleanupExpiredHosts(context.Background()) + require.NoError(t, err) + require.Len(t, deleted, 0) + + hosts = listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 5) + require.Len(t, hosts, 5) +} + func testTeamHostsExpiration(t *testing.T, ds *Datastore) { // Set global host expiry windows const hostExpiryWindow = 70