Skip to content

Commit

Permalink
Merge pull request #16 from optimyze/tim/read-from-downsampled-indexes
Browse files Browse the repository at this point in the history
Read from downsampled indexes
  • Loading branch information
rockdaboot authored Feb 21, 2022
2 parents 071113b + f8ec5a0 commit 43651e7
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 61 deletions.
26 changes: 24 additions & 2 deletions src/plugins/profiling/server/routes/flamegraph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
*/

function getExeFileName(exe: any, type: number) {
if (exe.FileName === undefined) {
if (exe?.FileName === undefined) {
console.log("MISSING EXE")
return '';
}
if (exe.FileName !== '') {
Expand Down Expand Up @@ -58,12 +59,33 @@ function getLabel(frame: any, executable: any, type: number) {
}

export class FlameGraph {
// sampleRate is 1/5^N, with N being the downsampled index the events were fetched from.
// N=0: full events table (sampleRate is 1)
// N=1: downsampled by 5 (sampleRate is 0.2)
// ...
sampleRate: number;

// totalCount is the sum(Count) of all events in the filter range in the
// downsampled index we were looking at.
// To estimate how many events we have in the full events index: totalCount / sampleRate.
// Do the same for single entries in the events array.
totalCount: number;

events: any;
stacktraces: any;
stackframes: any;
executables: any;

constructor(events: any, stackTraces: any, stackFrames: any, executables: any) {
constructor(
sampleRate: number,
totalCount: number,
events: any,
stackTraces: any,
stackFrames: any,
executables: any
) {
this.sampleRate = sampleRate;
this.totalCount = totalCount;
this.events = events;
this.stacktraces = stackTraces;
this.stackframes = stackFrames;
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/profiling/server/routes/mappings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import { AggregationsAggregationContainer } from '@elastic/elasticsearch/lib/api/types';

interface ProjectTimeQuery {
export interface ProjectTimeQuery {
bool: {
filter: Array<
| {
Expand Down
57 changes: 57 additions & 0 deletions src/plugins/profiling/server/routes/search_flamechart.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { getSampledTraceEventsIndex, DownsampledEventsIndex } from './search_flamechart';

describe('Using down-sampled indexes', () => {
test('getSampledTraceEventsIndex', () => {
const targetSampleSize = 20000;
const initialExp = 6;
const tests: Array<{
sampleCountFromPow6: number;
expected: DownsampledEventsIndex;
}> = [
{
// stay with the input downsampled index
sampleCountFromPow6: targetSampleSize,
expected: { name: 'profiling-events-5pow6', sampleRate: 1 / 5 ** 6 },
},
{
// stay with the input downsampled index
sampleCountFromPow6: targetSampleSize * 5 - 1,
expected: { name: 'profiling-events-5pow6', sampleRate: 1 / 5 ** 6 },
},
{
// go down one downsampling step
sampleCountFromPow6: targetSampleSize * 5,
expected: { name: 'profiling-events-5pow7', sampleRate: 1 / 5 ** 7 },
},
{
// go up one downsampling step
sampleCountFromPow6: targetSampleSize - 1,
expected: { name: 'profiling-events-5pow5', sampleRate: 1 / 5 ** 5 },
},
{
// go to the full events index
sampleCountFromPow6: 0,
expected: { name: 'profiling-events', sampleRate: 1 },
},
{
// go to the most downsampled index
sampleCountFromPow6: targetSampleSize * 5 ** 8,
expected: { name: 'profiling-events-5pow11', sampleRate: 1 / 5 ** 11 },
},
];

for (const t of tests) {
expect(getSampledTraceEventsIndex(targetSampleSize, t.sampleCountFromPow6, initialExp)).toEqual(
t.expected
);
}
});
});
143 changes: 85 additions & 58 deletions src/plugins/profiling/server/routes/search_flamechart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,47 +12,56 @@ import { getRemoteRoutePaths } from '../../common';
import { FlameGraph } from './flamegraph';
import { newProjectTimeQuery } from './mappings';

function getSampledTraceEventsIndex(
sampleSize: number,
sampleCountFromSmallestTable: number
): [string, number, number] {
if (sampleCountFromSmallestTable === 0) {
// If this happens the returned estimatedSampleCount may be very wrong.
// Hardcode sampleCountFromSmallestTable to 1 else an estimatedSampleCount
// of zero is returned.
sampleCountFromSmallestTable = 1;
}

const sampleRates: number[] = [1000, 125, 25, 5, 1];

let tableName: string;
let sampleRate = 0;
let estimatedSampleCount = 0;

for (let i = 0; i < sampleRates.length; i++) {
sampleRate = sampleRates[i];
export interface DownsampledEventsIndex {
name: string;
sampleRate: number;
}

// Fractional floats have an inherent inaccuracy,
// see https://docs.python.org/3/tutorial/floatingpoint.htm) for details.
// Examples:
// 0.01 as float32: 0.009999999776482582092285156250 (< 0.01)
// 0.01 as float64: 0.010000000000000000208166817117 (> 0.01)
// But if "N / f >= 1", which is the case below as N is >= 1 and f is <= 1,
// we end up with the correct result (tested with a small Go program).
estimatedSampleCount = sampleCountFromSmallestTable * (sampleRates[0] / sampleRate);
const downsampledIndex = 'profiling-events-5pow';

// Return the index that has between targetSampleSize..targetSampleSize*samplingFactor entries.
// The starting point is the number of entries from the profiling-events-5pow<initialExp> index.
//
// More details on how the down-sampling works can be found at the write path
// https://github.com/elastic/prodfiler/blob/bdcc2711c6cd7e89d63b58a17329fb9fdbabe008/pf-elastic-collector/elastic.go
export function getSampledTraceEventsIndex(
targetSampleSize: number,
sampleCountFromInitialExp: number,
initialExp: number
): DownsampledEventsIndex {
const maxExp = 11;
const samplingFactor = 5;
const fullEventsIndex: DownsampledEventsIndex = { name: 'profiling-events', sampleRate: 1 };

if (sampleCountFromInitialExp === 0) {
// Take the shortcut to the full events index.
return fullEventsIndex;
}

if (estimatedSampleCount >= sampleSize) {
break;
if (sampleCountFromInitialExp >= samplingFactor * targetSampleSize) {
// Search in more down-sampled indexes.
for (let i = initialExp + 1; i <= maxExp; i++) {
sampleCountFromInitialExp /= samplingFactor;
if (sampleCountFromInitialExp < samplingFactor * targetSampleSize) {
return { name: downsampledIndex + i, sampleRate: 1 / samplingFactor ** i };
}
}
// If we come here, it means that the most sparse index still holds too many items.
// The only problem is the query time, the result set is good.
return { name: downsampledIndex + 11, sampleRate: 1 / samplingFactor ** maxExp };
} else if (sampleCountFromInitialExp < targetSampleSize) {
// Search in less down-sampled indexes.
for (let i = initialExp -1; i >= 1; i--) {
sampleCountFromInitialExp *= samplingFactor;
if (sampleCountFromInitialExp >= targetSampleSize) {
return { name: downsampledIndex + i, sampleRate: 1 / samplingFactor ** i };
}
}
}

if (sampleRate === 1) {
tableName = 'profiling-events';
} else {
tableName = 'profiling-events-' + sampleRate.toString();
return fullEventsIndex;
}

return [tableName, 1 / sampleRate, estimatedSampleCount];
return { name: downsampledIndex + initialExp, sampleRate: 1 / samplingFactor ** initialExp };
}

export function registerFlameChartSearchRoute(router: IRouter<DataRequestHandlerContext>) {
Expand All @@ -71,64 +80,80 @@ export function registerFlameChartSearchRoute(router: IRouter<DataRequestHandler
},
},
async (context, request, response) => {
const { index, projectID, timeFrom, timeTo } = request.query;
const sampleSize = 200;
const { projectID, timeFrom, timeTo } = request.query;
const targetSampleSize = 20000; // minimum number of samples to get statistically sound results
const topN = 200; // collect data for the top N unique stack traces

try {
const esClient = context.core.elasticsearch.client.asCurrentUser;
const filter = newProjectTimeQuery(projectID!, timeFrom!, timeTo!);

// const resp = await getCountResponse(context, filter);
// Start with counting the results in the index down-sampled by 5^6.
// That is in the middle of our down-sampled indexes.
const initialExp = 6;
const resp = await esClient.search({
index: 'profiling-events-1000',
index: downsampledIndex + initialExp,
body: {
query: filter,
size: 0,
track_total_hits: true,
},
});
const sampleCountFromInitialExp = resp.body.hits.total.value as number;

const sampleCountFromSmallestTable = resp.body.hits.total.value as number;
console.log('sampleCountFromPow6', sampleCountFromInitialExp);

const [eventsIndex, sampleRate, estimatedSampleCount] = getSampledTraceEventsIndex(
sampleSize,
sampleCountFromSmallestTable
const eventsIndex = getSampledTraceEventsIndex(
targetSampleSize,
sampleCountFromInitialExp,
initialExp
);

// eslint-disable-next-line no-console
console.log('Index', eventsIndex, sampleRate, estimatedSampleCount);
console.log('EventsIndex', eventsIndex);

const resEvents = await esClient.search({
index: eventsIndex,
index: eventsIndex.name,
body: {
size: 0,
query: {
function_score: {
query: filter,
},
},
query: filter,
aggs: {
sample: {
sampler: {
shard_size: sampleSize,
group_by: {
terms: {
field: 'StackTraceID',
size: topN,
},
aggs: {
group_by: {
terms: {
field: 'StackTraceID',
size: sampleSize,
sum_count: {
sum: {
field: 'Count',
},
},
},
},
total_count: {
sum: {
field: 'Count',
},
},
},
},
});
// console.log(JSON.stringify(resEvents, null, 2));

const tracesDocIDs: string[] = [];
resEvents.body.aggregations.sample.group_by.buckets.forEach((stackTraceItem: any) => {
let sumCount = 0;
let docCount = 0;
resEvents.body.aggregations.group_by.buckets.forEach((stackTraceItem: any) => {
tracesDocIDs.push(stackTraceItem.key);
sumCount += stackTraceItem.sum_count.value;
docCount += stackTraceItem.doc_count;
});
const totalCount: number = resEvents.body.aggregations.total_count.value;
const otherDocCount: number = resEvents.body.aggregations.group_by.sum_other_doc_count;

console.log('docCount', docCount, 'otherDocCount', otherDocCount);
console.log('sumCount', sumCount, 'totalCount', totalCount);

const resStackTraces = await esClient.mget<any>({
index: 'profiling-stacktraces',
Expand Down Expand Up @@ -166,6 +191,8 @@ export function registerFlameChartSearchRoute(router: IRouter<DataRequestHandler
});

const flamegraph = new FlameGraph(
eventsIndex.sampleRate,
totalCount,
resEvents.body,
resStackTraces.body.docs,
resStackFrames.body.docs,
Expand Down

0 comments on commit 43651e7

Please sign in to comment.