Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dataset quality] Added malformed docs column to table #172462

Merged
merged 19 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions packages/kbn-apm-synthtrace/src/scenarios/malformed_logs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { LogDocument, log, generateShortId, generateLongId } from '@kbn/apm-synthtrace-client';
import { Scenario } from '../cli/scenario';
import { withClient } from '../lib/utils/with_client';

const MORE_THAN_1024_CHARS =
'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?';

const scenario: Scenario<LogDocument> = async (runOptions) => {
return {
generate: ({ range, clients: { logsEsClient } }) => {
const { logger } = runOptions;

// Logs Data logic
const MESSAGE_LOG_LEVELS = [
{ message: 'A simple log', level: 'info' },
{
message: 'Another log message',
level: 'debug',
},
{ message: 'Error with certificate: "ca_trusted_fingerprint"', level: 'error' },
];
const CLOUD_PROVIDERS = ['gcp', 'aws', 'azure'];
const CLOUD_REGION = ['eu-central-1', 'us-east-1', 'area-51'];

// "ignore_above": 1024 in mapping
const MALFORMED_LOG_LEVEL = MORE_THAN_1024_CHARS;

// "ignore_above": 1024 in mapping
const MALFORMED_CLOUD_REGION = MORE_THAN_1024_CHARS;

const CLUSTER = [
{ clusterId: generateShortId(), clusterName: 'synth-cluster-1' },
{ clusterId: generateShortId(), clusterName: 'synth-cluster-2' },
{ clusterId: generateShortId(), clusterName: 'synth-cluster-3' },
];

const SERVICE_NAMES = Array(3)
.fill(null)
.map((_, idx) => `synth-service-${idx}`);

const datasetSynth1Logs = (timestamp: number) => {
const index = Math.floor(Math.random() * 3);
return log
.create()
.dataset('synth.1')
.message(MESSAGE_LOG_LEVELS[index].message as string)
.logLevel(MESSAGE_LOG_LEVELS[index].level)
.service(SERVICE_NAMES[index])
.defaults({
'trace.id': generateShortId(),
'agent.name': 'synth-agent',
'orchestrator.cluster.name': CLUSTER[index].clusterName,
'orchestrator.cluster.id': CLUSTER[index].clusterId,
'orchestrator.resource.id': generateShortId(),
'cloud.provider': CLOUD_PROVIDERS[Math.floor(Math.random() * 3)],
'cloud.region': CLOUD_REGION[index],
'cloud.availability_zone': `${CLOUD_REGION[index]}a`,
'cloud.project.id': generateShortId(),
'cloud.instance.id': generateShortId(),
'log.file.path': `/logs/${generateLongId()}/error.txt`,
})
.timestamp(timestamp);
};

const datasetSynth2Logs = (i: number, timestamp: number) => {
const index = Math.floor(Math.random() * 3);
const isMalformed = i % 60 === 0;
return log
.create()
.dataset('synth.2')
.message(MESSAGE_LOG_LEVELS[index].message as string)
.logLevel(isMalformed ? MALFORMED_LOG_LEVEL : MESSAGE_LOG_LEVELS[index].level)
.service(SERVICE_NAMES[index])
.defaults({
'trace.id': generateShortId(),
'agent.name': 'synth-agent',
'orchestrator.cluster.name': CLUSTER[index].clusterName,
'orchestrator.cluster.id': CLUSTER[index].clusterId,
'orchestrator.resource.id': generateShortId(),
'cloud.provider': CLOUD_PROVIDERS[Math.floor(Math.random() * 3)],
'cloud.region': CLOUD_REGION[index],
'cloud.availability_zone': `${CLOUD_REGION[index]}a`,
'cloud.project.id': generateShortId(),
'cloud.instance.id': generateShortId(),
'log.file.path': `/logs/${generateLongId()}/error.txt`,
})
.timestamp(timestamp);
};

const datasetSynth3Logs = (i: number, timestamp: number) => {
const index = Math.floor(Math.random() * 3);
const isMalformed = i % 10 === 0;
return log
.create()
.dataset('synth.3')
.message(MESSAGE_LOG_LEVELS[index].message as string)
.logLevel(isMalformed ? MALFORMED_LOG_LEVEL : MESSAGE_LOG_LEVELS[index].level)
.service(SERVICE_NAMES[index])
.defaults({
'trace.id': generateShortId(),
'agent.name': 'synth-agent',
'orchestrator.cluster.name': CLUSTER[index].clusterName,
'orchestrator.cluster.id': CLUSTER[index].clusterId,
'orchestrator.resource.id': generateShortId(),
'cloud.provider': CLOUD_PROVIDERS[Math.floor(Math.random() * 3)],
'cloud.region': CLOUD_REGION[index],
'cloud.availability_zone': isMalformed
? MALFORMED_CLOUD_REGION
: `${CLOUD_REGION[index]}a`,
'cloud.project.id': generateShortId(),
'cloud.instance.id': generateShortId(),
'log.file.path': `/logs/${generateLongId()}/error.txt`,
})
.timestamp(timestamp);
};

const logs = range
.interval('1m')
.rate(1)
.generator((timestamp) => {
return Array(200)
.fill(0)
.flatMap((_, index) => [
datasetSynth1Logs(timestamp),
datasetSynth2Logs(index, timestamp),
datasetSynth3Logs(index, timestamp),
]);
});

return withClient(
logsEsClient,
logger.perf('generating_logs', () => logs)
);
},
};
};

export default scenario;
2 changes: 1 addition & 1 deletion packages/kbn-optimizer/limits.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pageLoadAssetSize:
dashboard: 82025
dashboardEnhanced: 65646
data: 454087
datasetQuality: 35000
datasetQuality: 50624
dataViewEditor: 28082
dataViewFieldEditor: 27000
dataViewManagement: 5100
Expand Down
67 changes: 67 additions & 0 deletions x-pack/plugins/dataset_quality/common/api_types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import * as rt from 'io-ts';

export const datasetStatRt = rt.intersection([
rt.type({
name: rt.string,
}),
rt.partial({
size: rt.string,
sizeBytes: rt.number,
lastActivity: rt.number,
integration: rt.string,
}),
]);

export const integrationIconRt = rt.intersection([
rt.type({
path: rt.string,
src: rt.string,
}),
rt.partial({
title: rt.string,
size: rt.string,
type: rt.string,
}),
]);

export const integrationRt = rt.intersection([
rt.type({
name: rt.string,
}),
rt.partial({
title: rt.string,
version: rt.string,
icons: rt.array(integrationIconRt),
}),
]);

export const malformedDocsRt = rt.type({
dataset: rt.string,
percentage: rt.number,
});

export type MalformedDocs = rt.TypeOf<typeof malformedDocsRt>;

export const getDataStreamsStatsResponseRt = rt.exact(
rt.intersection([
rt.type({
dataStreamsStats: rt.array(datasetStatRt),
}),
rt.type({
integrations: rt.array(integrationRt),
}),
])
);

export const getDataStreamsMalformedDocsStatsResponseRt = rt.exact(
rt.type({
malformedDocs: rt.array(malformedDocsRt),
})
);
4 changes: 3 additions & 1 deletion x-pack/plugins/dataset_quality/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@
*/

export const DATASET_QUALITY_APP_ID = 'dataset_quality';
export const DATA_STREAMS_STATS_URL = '/internal/dataset_quality/data_streams/stats';

export const POOR_QUALITY_MINIMUM_PERCENTAGE = 3;
export const DEGRADED_QUALITY_MINIMUM_PERCENTAGE = 0;
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export class DataStreamStat {
sizeBytes?: DataStreamStatType['size_bytes'];
lastActivity?: DataStreamStatType['last_activity'];
integration?: IntegrationType;
malformedDocs?: number;

private constructor(dataStreamStat: DataStreamStat) {
this.name = dataStreamStat.name;
Expand All @@ -23,6 +24,7 @@ export class DataStreamStat {
this.sizeBytes = dataStreamStat.sizeBytes;
this.lastActivity = dataStreamStat.lastActivity;
this.integration = dataStreamStat.integration;
this.malformedDocs = dataStreamStat.malformedDocs;
}

public static create(dataStreamStat: DataStreamStatType) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { MalformedDocsStatType } from './types';

export class MalformedDocsStat {
dataset: MalformedDocsStatType['dataset'];
percentage: MalformedDocsStatType['percentage'];

private constructor(malformedDocsStat: MalformedDocsStat) {
this.dataset = malformedDocsStat.dataset;
this.percentage = malformedDocsStat.percentage;
}

public static create(malformedDocsStat: MalformedDocsStatType) {
return new MalformedDocsStat(malformedDocsStat);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,15 @@ export type GetDataStreamsStatsQuery = GetDataStreamsStatsParams['query'];
export type GetDataStreamsStatsResponse =
APIReturnType<`GET /internal/dataset_quality/data_streams/stats`>;
export type DataStreamStatServiceResponse = DataStreamStat[];
export type DataStreamStatType = GetDataStreamsStatsResponse['dataStreamsStats'][0];
export type IntegrationType = GetDataStreamsStatsResponse['integrations'][0];
export type DataStreamStatType = GetDataStreamsStatsResponse['dataStreamsStats'][0] & {
integration?: IntegrationType;
};

export type GetDataStreamsMalformedDocsStatsParams =
APIClientRequestParamsOf<`GET /internal/dataset_quality/data_streams/malformed_docs`>['params'];
export type GetDataStreamsMalformedDocsStatsQuery = GetDataStreamsMalformedDocsStatsParams['query'];
export type GetDataStreamsMalformedDocsStatsResponse =
APIReturnType<`GET /internal/dataset_quality/data_streams/malformed_docs`>;
export type DataStreamMalformedDocsStatServiceResponse = MalformedDocsStatType[];
export type MalformedDocsStatType = GetDataStreamsMalformedDocsStatsResponse['malformedDocs'][0];
12 changes: 12 additions & 0 deletions x-pack/plugins/dataset_quality/common/es_fields/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const _IGNORED = '_ignored';

export const DATA_STREAM_DATASET = 'data_stream.dataset';
export const DATA_STREAM_NAMESPACE = 'data_stream.namespace';
export const DATA_STREAM_TYPE = 'data_stream.type';
Loading