Skip to content

Commit

Permalink
Amend distributed discovery queries behavior, document and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasmrod committed Oct 18, 2022
1 parent 55c8362 commit ad6452a
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 19 deletions.
66 changes: 66 additions & 0 deletions docs/wiki/deployment/remote.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,72 @@ As of osquery version 2.1.2, the distributed write API includes a top-level `sta
}
```

### Discovery queries on distributed queries

Distributed queries support "discovery queries", which are similar in semantics to [discovery queries in Packs](./configuration.md#discovery-queries).
A distributed discovery query controls whether or not a distributed query will be executed on a host.
- If a distributed query has no corresponding discovery query, then it is always executed on the host.
- If a discovery query returns one or more results, then its corresponding distributed query will be executed on the host.
- If a discovery query returns no results, then its corresponding distributed query will not be executed on the host.

Sample of a `distributed/read` response with discovery queries:
```json
{
"queries": {
"always_execute": "select version from osquery_info;",
"windows_info": "select name from system_info;",
"darwin_time": "select day from time;"
},
"discovery": {
"windows_info": "select * from os_version where platform='windows';",
"darwin_time": "select * from os_version where platform='darwin';"
}
}
```

When processing the above `distributed/read` response, osquery will:
- Always execute the `"always_execute"` query because there isn't a discovery query defined for it.
- Only execute `"windows_info"` query on Windows hosts.
- Only execute `"darwin_time"` on macOS hosts.

Here's the corresponding osquery `distributed/write` request on a Windows host:
```json
{
"node_key": "...",
"queries": {
"always_execute": [
{"version": "5.5.1"}
],
"windows_info": [
{"name": "windows"}
]
},
"statuses": {
"always_execute": 0,
"windows_info": 0
}
}
```

Here's the corresponding osquery `distributed/write` request on a macOS host:
```json
{
"node_key": "...",
"queries": {
"always_execute": [
{"version": "5.4.0"}
],
"darwin_time": [
{"day": "17"}
]
},
"statuses": {
"always_execute": 0,
"darwin_time": 0
}
}
```

## Customizations

There are several unlisted flags to further control the remote settings. These controls are helpful if using a somewhat opaque API.
Expand Down
19 changes: 9 additions & 10 deletions osquery/distributed/distributed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,9 @@ Status Distributed::acceptWork(const std::string& work) {
return Status(1, "Error Parsing JSON");
}

std::set<std::string> queries_to_run;

auto hasDiscoveryQueries = false;

// Check for and run discovery queries first
// Check for and run discovery queries first.
// Store their result in discovery_results.
std::map<std::string, bool> discovery_results;
if (doc.doc().HasMember("discovery")) {
const auto& queries = doc.doc()["discovery"];
assert(queries.IsObject());
Expand All @@ -288,15 +286,12 @@ Status Distributed::acceptWork(const std::string& work) {
if (query.empty() || name.empty()) {
return Status(1, "Distributed discovery query is not a string");
}
hasDiscoveryQueries = true;

SQL sql(query);
if (!sql.getStatus().ok()) {
return Status(1, "Distributed discovery query has an SQL error");
}
if (sql.rows().size() > 0) {
queries_to_run.insert(name);
}
discovery_results.insert({name, (sql.rows().size() > 0)});
}
}
}
Expand All @@ -317,7 +312,11 @@ Status Distributed::acceptWork(const std::string& work) {
return Status(1, "Distributed query is not a string");
}

if (!hasDiscoveryQueries || queries_to_run.count(name)) {
// If a query does not have a corresponding discovery query
// or it does and it returned results, then store the query
// for execution.
const auto result = discovery_results.find(name);
if (result == discovery_results.cend() || result->second) {
setDatabaseValue(kDistributedQueries, name, query);
}
}
Expand Down
1 change: 1 addition & 0 deletions osquery/distributed/distributed.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ class Distributed {
FRIEND_TEST(DistributedTests, test_run_queries_with_denylisted_query);
FRIEND_TEST(DistributedTests, test_check_and_set_as_running);
FRIEND_TEST(DistributedTests, test_accept_work_basic);
FRIEND_TEST(DistributedTests, test_accept_work_without_discovery);
FRIEND_TEST(DistributedTests, test_accept_work_with_discovery);
FRIEND_TEST(DistributedTests, test_accept_work_with_discovery_all_fail);
};
Expand Down
40 changes: 31 additions & 9 deletions osquery/distributed/tests/distributed_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,23 @@ TEST_F(DistributedTests, test_accept_work_basic) {
}
}

TEST_F(DistributedTests, test_accept_work_without_discovery) {
auto dist = Distributed();

const std::string work = R"json(
{
"queries": {
"q1": "SELECT * FROM system_info;",
"q2": "SELECT * FROM time;"
}
}
)json";
auto s = dist.acceptWork(work);
ASSERT_TRUE(s.ok()) << s.getMessage();
const auto queryNames = dist.getPendingQueries();
ASSERT_EQ(queryNames.size(), 2);
}

TEST_F(DistributedTests, test_accept_work_with_discovery) {
auto dist = Distributed();

Expand All @@ -395,15 +412,20 @@ TEST_F(DistributedTests, test_accept_work_with_discovery) {

// Query q1 has a discovery query with > 0 results.
// Query q2 has a discovery query with 0 results, thus it won't be executed.
// Query q3 does not have a discovery query, thus it won't be executed.
const auto queries = dist.getPendingQueries();
ASSERT_EQ(queries.size(), 1);
auto queryName = queries[0];
EXPECT_EQ(queryName, "q1");
std::string actualQuery;
s = getDatabaseValue(kDistributedQueries, queryName, actualQuery);
ASSERT_TRUE(s.ok()) << s.getMessage();
EXPECT_EQ(actualQuery, "SELECT * FROM system_info;");
// Query q3 does not have a discovery query, thus it will be executed.
const auto queryNames = dist.getPendingQueries();
ASSERT_EQ(queryNames.size(), 2);
for (const auto& queryName : queryNames) {
ASSERT_TRUE(queryName == "q1" || queryName == "q3");
std::string actualQuery;
s = getDatabaseValue(kDistributedQueries, queryName, actualQuery);
ASSERT_TRUE(s.ok()) << s.getMessage();
std::string expectedQuery = "SELECT * FROM system_info;";
if (queryName == "q3") {
expectedQuery = "SELECT * FROM osquery_info;";
}
EXPECT_EQ(actualQuery, expectedQuery);
}
}

// Tests https://github.com/osquery/osquery/issues/5260.
Expand Down

0 comments on commit ad6452a

Please sign in to comment.