Skip to content

Commit

Permalink
[Obs AI Assistant] Remove ES|QL escaping for index names (elastic#183028
Browse files Browse the repository at this point in the history
)

There were some last minute changes to ES|QL that will ship in 8.14 that
we need to take into account:

- [BUCKET is now an aggregation
function](elastic/elasticsearch#107272)
- [index names can no longer be escaped with backticks
](elastic/elasticsearch#108431)

I'm also including a change that translates `=` to `==` in WHERE
commands, and more useful error messages (map a syntax error to the
command where it occurred).

As BUCKET is often used for timeseries data and we replaced single and
double quotes around index names with backticks, this introduces a high
chance of generating syntactically invalid queries. This PR updates the
docs and examples and removes the correction from `"` and `'` to "`"
  • Loading branch information
dgieselaar authored May 9, 2024
1 parent 24a34a4 commit 6004cad
Show file tree
Hide file tree
Showing 63 changed files with 468 additions and 380 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ describe('correctCommonEsqlMistakes', () => {

it('replaces aliasing via the AS keyword with the = operator', () => {
expectQuery(`FROM logs-* | STATS COUNT() AS count`, 'FROM logs-*\n| STATS count = COUNT()');

expectQuery(`FROM logs-* | STATS COUNT() as count`, 'FROM logs-*\n| STATS count = COUNT()');

expectQuery(
`FROM logs-* | STATS AVG(transaction.duration.histogram) AS avg_request_latency, PERCENTILE(transaction.duration.histogram, 95) AS p95`,
`FROM logs-*
Expand All @@ -42,11 +45,33 @@ describe('correctCommonEsqlMistakes', () => {
});

it(`replaces " or ' escaping in FROM statements with backticks`, () => {
expectQuery(`FROM "logs-*" | LIMIT 10`, 'FROM `logs-*`\n| LIMIT 10');
expectQuery(`FROM 'logs-*' | LIMIT 10`, 'FROM `logs-*`\n| LIMIT 10');
expectQuery(`FROM "logs-*" | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
expectQuery(`FROM 'logs-*' | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
expectQuery(`FROM logs-* | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
});

it('replaces = as equal operator with ==', () => {
expectQuery(
`FROM logs-*\n| WHERE service.name = "foo"`,
`FROM logs-*\n| WHERE service.name == "foo"`
);

expectQuery(
`FROM logs-*\n| WHERE service.name = "foo" AND service.environment = "bar"`,
`FROM logs-*\n| WHERE service.name == "foo" AND service.environment == "bar"`
);

expectQuery(
`FROM logs-*\n| WHERE (service.name = "foo" AND service.environment = "bar") OR agent.name = "baz"`,
`FROM logs-*\n| WHERE (service.name == "foo" AND service.environment == "bar") OR agent.name == "baz"`
);

expectQuery(
`FROM logs-*\n| WHERE \`what=ever\` = "foo=bar"`,
`FROM logs-*\n| WHERE \`what=ever\` == "foo=bar"`
);
});

it('replaces single-quote escaped strings with double-quote escaped strings', () => {
expectQuery(
`FROM nyc_taxis
Expand Down Expand Up @@ -102,7 +127,7 @@ describe('correctCommonEsqlMistakes', () => {
| EVAL "@timestamp" = TO_DATETIME(timestamp)
| WHERE statement LIKE 'SELECT%'
| STATS avg_duration = AVG(duration)`,
`FROM \`postgres-logs*\`
`FROM postgres-logs*
| GROK message "%{TIMESTAMP_ISO8601:timestamp} %{TZ} \[%{NUMBER:process_id}\]: \[%{NUMBER:log_line}\] user=%{USER:user},db=%{USER:database},app=\[%{DATA:application}\],client=%{IP:client_ip} LOG: duration: %{NUMBER:duration:float} ms statement: %{GREEDYDATA:statement}"
| EVAL @timestamp = TO_DATETIME(timestamp)
| WHERE statement LIKE "SELECT%"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function split(value: string, splitToken: string) {
return statements;
}

function splitIntoCommands(query: string) {
export function splitIntoCommands(query: string) {
const commands: string[] = split(query, '|');

return commands.map((command) => {
Expand Down Expand Up @@ -93,8 +93,8 @@ function removeColumnQuotesAndEscape(column: string) {
function replaceAsKeywordWithAssignments(command: string) {
return command.replaceAll(/^STATS\s*(.*)/g, (__, statsOperations: string) => {
return `STATS ${statsOperations.replaceAll(
/(,\s*)?(.*?)\sAS\s([`a-zA-Z0-9.\-_]+)/g,
'$1$3 = $2'
/(,\s*)?(.*?)\s(AS|as)\s([`a-zA-Z0-9.\-_]+)/g,
'$1$4 = $2'
)}`;
});
}
Expand Down Expand Up @@ -196,6 +196,30 @@ function escapeExpressionsInSort(sortCommand: string) {
return `SORT ${columnsInSort.join(', ')}`;
}

function ensureEqualityOperators(whereCommand: string) {
const body = whereCommand.split(/^WHERE /)[1];

const byChar = body.split('');

let next = '';
let isColumnName = false;
byChar.forEach((char, index) => {
next += char;

if (!isColumnName && char === '=' && byChar[index - 1] === ' ' && byChar[index + 1] === ' ') {
next += '=';
}

if (!isColumnName && (char === '`' || char.match(/[a-z@]/i))) {
isColumnName = true;
} else if (isColumnName && (char === '`' || !char.match(/[a-z@0-9]/i))) {
isColumnName = false;
}
});

return `WHERE ${next}`;
}

export function correctCommonEsqlMistakes(content: string, log: Logger) {
return content.replaceAll(/```esql\n(.*?)\n```/gms, (_, query: string) => {
const commands = splitIntoCommands(query.trim());
Expand All @@ -206,12 +230,14 @@ export function correctCommonEsqlMistakes(content: string, log: Logger) {
switch (name) {
case 'FROM':
formattedCommand = formattedCommand
.replaceAll(/FROM "(.*)"/g, 'FROM `$1`')
.replaceAll(/FROM '(.*)'/g, 'FROM `$1`');
.replaceAll(/FROM "(.*)"/g, 'FROM $1')
.replaceAll(/FROM '(.*)'/g, 'FROM $1')
.replaceAll(/FROM `(.*)`/g, 'FROM $1');
break;

case 'WHERE':
formattedCommand = replaceSingleQuotesWithDoubleQuotes(formattedCommand);
formattedCommand = ensureEqualityOperators(formattedCommand);
break;

case 'EVAL':
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
BUCKET

Syntax
BUCKET(expression, buckets, from, to)
Parameters
field
Numeric or date expression from which to derive buckets.
Expand All @@ -11,73 +10,94 @@ from
Start of the range. Can be a number or a date expressed as a string.
to
End of the range. Can be a number or a date expressed as a string.
DescriptionCreates human-friendly buckets and returns a value for each row that corresponds
to the resulting bucket the row falls into.Using a target number of buckets, a start of a range, and an end of a range,
BUCKET picks an appropriate bucket size to generate the target number of
buckets or fewer. For example, asking for at most 20 buckets over a year results
in monthly buckets:
DescriptionCreates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.Supported types
ExamplesBUCKET can work in two modes: one in which the size of the bucket is computed
based on a buckets count recommendation (four parameters) and a range, and
another in which the bucket size is provided directly (two parameters).Using a target number of buckets, a start of a range, and an end of a range,
BUCKET picks an appropriate bucket size to generate the target number of buckets or fewer.
For example, asking for at most 20 buckets over a year results in monthly buckets:
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| EVAL month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| KEEP hire_date, month
| STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| SORT hire_date
```

The goal isn’t to provide exactly the target number of buckets, it’s to pick a
range that people are comfortable with that provides at most the target number
of buckets.Combine BUCKET with
STATS ... BY to create a histogram:
The goal isn’t to provide exactly the target number of buckets,
it’s to pick a range that people are comfortable with that provides at most the target number of buckets.Combine BUCKET with an aggregation to create a histogram:
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| EVAL month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| STATS hires_per_month = COUNT(*) BY month
| STATS hires_per_month = COUNT(*) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| SORT month
```

BUCKET does not create buckets that don’t match any documents.
That’s why this example is missing 1985-03-01 and other dates.
Asking for more buckets can result in a smaller range. For example, asking for
at most 100 buckets in a year results in weekly buckets:
Asking for more buckets can result in a smaller range.
For example, asking for at most 100 buckets in a year results in weekly buckets:
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| EVAL week = BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| STATS hires_per_week = COUNT(*) BY week
| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| SORT week
```

BUCKET does not filter any rows. It only uses the provided range to
pick a good bucket size. For rows with a value outside of the range, it returns
a bucket value that corresponds to a bucket outside the range. Combine
BUCKET with WHERE to filter rows.
BUCKET can also operate on numeric fields. For example, to create a
salary histogram:
BUCKET does not filter any rows. It only uses the provided range to pick a good bucket size.
For rows with a value outside of the range, it returns a bucket value that corresponds to a bucket outside the range.
Combine`BUCKET` with WHERE to filter rows.
If the desired bucket size is known in advance, simply provide it as the second
argument, leaving the range out:
```esql
FROM employees
| EVAL bs = BUCKET(salary, 20, 25324, 74999)
| STATS COUNT(*) by bs
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week)
| SORT week
```

When providing the bucket size as the second parameter, it must be a time
duration or date period.
BUCKET can also operate on numeric fields. For example, to create a salary histogram:
```esql
FROM employees
| STATS COUNT(*) by bs = BUCKET(salary, 20, 25324, 74999)
| SORT bs
```

Unlike the earlier example that intentionally filters on a date range, you
rarely want to filter on a numeric range. You have to find the min and max
separately. ES|QL doesn’t yet have an easy way to do that automatically.ExamplesCreate hourly buckets for the last 24 hours, and calculate the number of events
per hour:
Unlike the earlier example that intentionally filters on a date range, you rarely want to filter on a numeric range.
You have to find the min and max separately. ES|QL doesn’t yet have an easy way to do that automatically.The range can be omitted if the desired bucket size is known in advance. Simply
provide it as the second argument:
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)
| SORT b
```

When providing the bucket size as the second parameter, it must be
of a floating point type.
Create hourly buckets for the last 24 hours, and calculate the number of events per hour:
```esql
FROM sample_data
| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
| EVAL bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
| STATS COUNT(*) BY bucket
| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
```

Create monthly buckets for the year 1985, and calculate the average salary by
hiring month:
Create monthly buckets for the year 1985, and calculate the average salary by hiring month
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| EVAL bucket = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| STATS AVG(salary) BY bucket
| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| SORT bucket
```

BUCKET may be used in both the aggregating and grouping part of the
STATS …​ BY …​ command provided that in the aggregating
part the function is referenced by an alias defined in the
grouping part, or that it is invoked with the exact same expression:
```esql
FROM employees
| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)
| SORT b1, b2
| KEEP s1, b1, s2, b2
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Cast (::)

The :: operator provides a convenient alternative syntax to the TO_<type>
conversion functions.
```esql
ROW ver = CONCAT(("0"::INT + 1)::STRING, ".2.3")::VERSION
```
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
CONCAT

Syntax
CONCAT(string1, string2[, ..., stringN])
Parameters
stringX
string1
Strings to concatenate.
DescriptionConcatenates two or more strings.Example
string2
Strings to concatenate.
DescriptionConcatenates two or more strings.Supported types
Example
```esql
FROM employees
| KEEP first_name, last_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@ DATE_DIFF
Syntax
Parameters
unit
Time difference unit.
Time difference unit
startTimestamp
Start timestamp.
A string representing a start timestamp
endTimestamp
End timestamp.
DescriptionSubtracts the startTimestamp from the endTimestamp and returns the
difference in multiples of unit. If startTimestamp is later than the
endTimestamp, negative values are returned.
Supported types
A string representing an end timestamp
DescriptionSubtracts the startTimestamp from the endTimestamp and returns the difference in multiples of unit. If startTimestamp is later than the endTimestamp, negative values are returned.
Note that while there is an overlap between the function’s supported units and
ES|QL’s supported time span literals, these sets are distinct and not
interchangeable. Similarly, the supported abbreviations are conveniently shared
with implementations of this function in other established products and not
necessarily common with the date-time nomenclature used by Elasticsearch.Supported types
Example
```esql
ROW date1 = TO_DATETIME("2023-12-02T11:00:00.000Z"), date2 = TO_DATETIME("2023-12-02T11:00:00.001Z")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,19 @@
DATE_EXTRACT

Syntax
DATE_EXTRACT(date_part, date)
Parameters
date_part
Part of the date to extract. Can be: aligned_day_of_week_in_month,
aligned_day_of_week_in_year, aligned_week_of_month, aligned_week_of_year,
ampm_of_day, clock_hour_of_ampm, clock_hour_of_day, day_of_month,
day_of_week, day_of_year, epoch_day, era, hour_of_ampm, hour_of_day,
instant_seconds, micro_of_day, micro_of_second, milli_of_day,
milli_of_second, minute_of_day, minute_of_hour, month_of_year,
nano_of_day, nano_of_second, offset_seconds, proleptic_month,
second_of_day, second_of_minute, year, or year_of_era. Refer to
java.time.temporal.ChronoField
for a description of these values.
If null, the function returns null.
datePart
Part of the date to extract. Can be: aligned_day_of_week_in_month, aligned_day_of_week_in_year, aligned_week_of_month, aligned_week_of_year, ampm_of_day, clock_hour_of_ampm, clock_hour_of_day, day_of_month, day_of_week, day_of_year, epoch_day, era, hour_of_ampm, hour_of_day, instant_seconds, micro_of_day, micro_of_second, milli_of_day, milli_of_second, minute_of_day, minute_of_hour, month_of_year, nano_of_day, nano_of_second, offset_seconds, proleptic_month, second_of_day, second_of_minute, year, or year_of_era. Refer to java.time.temporal.ChronoField for a description of these values. If null, the function returns null.
date
Date expression. If null, the function returns null.
DescriptionExtracts parts of a date, like year, month, day, hour.Examples
DescriptionExtracts parts of a date, like year, month, day, hour.Supported types
Examples
```esql
ROW date = DATE_PARSE("yyyy-MM-dd", "2022-05-06")
| EVAL year = DATE_EXTRACT("year", date)
```

Find all events that occurred outside of business hours (before 9 AM or after 5
PM), on any given date:
Find all events that occurred outside of business hours (before 9 AM or after 5PM), on any given date:
```esql
FROM sample_data
| WHERE DATE_EXTRACT("hour_of_day", @timestamp) < 9 AND DATE_EXTRACT("hour_of_day", @timestamp) >= 17
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
DATE_FORMAT

Syntax
DATE_FORMAT([format,] date)
Parameters
format
Date format (optional). If no format is specified, the
yyyy-MM-dd'T'HH:mm:ss.SSSZ format is used. If null, the function returns
null.
dateFormat
Date format (optional). If no format is specified, the yyyy-MM-dd'T'HH:mm:ss.SSSZ format is used. If null, the function returns null.
date
Date expression. If null, the function returns null.
DescriptionReturns a string representation of a date, in the provided format.Example
DescriptionReturns a string representation of a date, in the provided format.Supported types
Example
```esql
FROM employees
| KEEP first_name, last_name, hire_date
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
DATE_PARSE

Syntax
DATE_PARSE([format,] date_string)
Parameters
format
The date format. Refer to the
DateTimeFormatter
documentation for the syntax. If null, the function returns null.
date_string
Date expression as a string. If null or an empty string, the function returns
null.
DescriptionReturns a date by parsing the second argument using the format specified in the
first argument.Example
datePattern
The date format. Refer to the DateTimeFormatter documentation for the syntax. If null, the function returns null.
dateString
Date expression as a string. If null or an empty string, the function returns null.
DescriptionReturns a date by parsing the second argument using the format specified in the first argument.Supported types
Example
```esql
ROW date_string = "2022-05-06"
| EVAL date = DATE_PARSE("yyyy-MM-dd", date_string)
Expand Down
Loading

0 comments on commit 6004cad

Please sign in to comment.