Skip to content

Commit

Permalink
[Fleet] added agent logs top errors from 100 hits (#162135)
Browse files Browse the repository at this point in the history
## Summary

Closes #148976

Added back top errors from agent logs by querying 100 hits and counting
the top occurrences.


### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
  • Loading branch information
juliaElastic authored Jul 18, 2023
1 parent b5965a3 commit 10c289d
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 25 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';

import { getAgentLogsTopErrors } from './agent_logs_top_errors';

describe('getAgentLogsTopErrors', () => {
it('should return top 3 errors from 100 hits', async () => {
const esClientMock = {
search: jest.fn().mockImplementation((params) => {
if (params.index === 'logs-elastic_agent-*')
return {
hits: {
hits: [
{
_source: {
message: 'error 2',
},
},
{
_source: {
message: 'error 2',
},
},
{
_source: {
message: 'error 3',
},
},
{
_source: {
message: 'error 3',
},
},
{
_source: {
message: 'error 3',
},
},
{
_source: {
message: 'error 1',
},
},
],
},
};
else
return {
hits: {
hits: [
{
_source: {
message: 'fleet server error 2',
},
},
{
_source: {
message: 'fleet server error 2',
},
},
{
_source: {
message: 'fleet server error 1',
},
},
],
},
};
}),
} as unknown as ElasticsearchClient;

const topErrors = await getAgentLogsTopErrors(esClientMock);
expect(topErrors).toEqual({
agent_logs_top_errors: ['error 3', 'error 2', 'error 1'],
fleet_server_logs_top_errors: ['fleet server error 2', 'fleet server error 1'],
});
});
});
49 changes: 28 additions & 21 deletions x-pack/plugins/fleet/server/collectors/agent_logs_top_errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@

import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';

import { sortBy } from 'lodash';

import { DATA_TIERS } from '../../common/constants';

import { appContextService } from '../services';

export interface AgentLogsData {
Expand All @@ -29,10 +33,16 @@ export async function getAgentLogsTopErrors(
const queryTopMessages = (index: string) =>
esClient.search({
index,
size: 0,
size: 100,
_source: ['message'],
query: {
bool: {
filter: [
{
terms: {
_tier: DATA_TIERS,
},
},
{
term: {
'log.level': 'error',
Expand All @@ -48,35 +58,32 @@ export async function getAgentLogsTopErrors(
],
},
},
aggs: {
message_sample: {
sampler: {
shard_size: 200,
},
aggs: {
categories: {
categorize_text: {
field: 'message',
size: 10,
},
},
},
},
},
});

const transformBuckets = (resp: any) =>
((resp?.aggregations?.message_sample as any)?.categories?.buckets ?? [])
const getTopErrors = (resp: any) => {
const counts = (resp?.hits.hits ?? []).reduce((acc: any, curr: any) => {
if (!acc[curr._source.message]) {
acc[curr._source.message] = 0;
}
acc[curr._source.message]++;
return acc;
}, {});
const top3 = sortBy(
Object.entries(counts).map(([key, value]) => ({ key, value })),
'value'
)
.slice(0, 3)
.map((bucket: any) => bucket.key);
.reverse();
return top3.map(({ key, value }) => key);
};

const agentResponse = await queryTopMessages('logs-elastic_agent-*');

const fleetServerResponse = await queryTopMessages('logs-elastic_agent.fleet_server-*');

return {
agent_logs_top_errors: transformBuckets(agentResponse),
fleet_server_logs_top_errors: transformBuckets(fleetServerResponse),
agent_logs_top_errors: getTopErrors(agentResponse),
fleet_server_logs_top_errors: getTopErrors(fleetServerResponse),
};
} catch (error) {
if (error.statusCode === 404) {
Expand Down
4 changes: 2 additions & 2 deletions x-pack/plugins/fleet/server/collectors/register.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import type { FleetServerUsage } from './fleet_server_collector';
import { getAgentPoliciesUsage } from './agent_policies';
import type { AgentPanicLogsData } from './agent_logs_panics';
import { getPanicLogsLastHour } from './agent_logs_panics';
import { getAgentLogsTopErrors } from './agent_logs_top_errors';

export interface Usage {
agents_enabled: boolean;
Expand Down Expand Up @@ -64,8 +65,7 @@ export const fetchFleetUsage = async (
fleet_server_config: await getFleetServerConfig(soClient),
agent_policies: await getAgentPoliciesUsage(soClient),
...(await getPanicLogsLastHour(esClient)),
// TODO removed top errors telemetry as it causes this issue: https://github.com/elastic/kibana/issues/148976
// ...(await getAgentLogsTopErrors(esClient)),
...(await getAgentLogsTopErrors(esClient)),
};
return usage;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,12 @@ describe('fleet usage telemetry', () => {
message: 'stderr panic some other panic',
},
],
// agent_logs_top_errors: ['stderr panic close of closed channel'],
// fleet_server_logs_top_errors: ['failed to unenroll offline agents'],
agent_logs_top_errors: [
'stderr panic some other panic',
'stderr panic close of closed channel',
'this should not be included in metrics',
],
fleet_server_logs_top_errors: ['failed to unenroll offline agents'],
})
);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,4 +247,18 @@ export const fleetUsagesSchema: RootSchema<any> = {
},
},
},
agent_logs_top_errors: {
type: 'array',
items: {
type: 'text',
_meta: { description: 'Top messages from agent error logs' },
},
},
fleet_server_logs_top_errors: {
type: 'array',
items: {
type: 'text',
_meta: { description: 'Top messages from fleet server error logs' },
},
},
};

0 comments on commit 10c289d

Please sign in to comment.