Skip to content

Commit

Permalink
re-enable e2e UI tests on CI (#1961)
Browse files Browse the repository at this point in the history
#1692 is still open. This PR is not an ideal approach, but it's a quick
win while we wait for that issue to be resolved.

By retrying failing tests up to 3 times, we _should_ be fine to
re-enable these on CI. If a test is failing > 3 times, there's likely a
legitimate issue occuring.
  • Loading branch information
joeyorlando authored May 23, 2023
1 parent 06bd045 commit c793e55
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 99 deletions.
50 changes: 32 additions & 18 deletions .github/workflows/linting-and-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -262,15 +262,24 @@ jobs:
pytest -x
end-to-end-tests:
# TODO: reenable this job once https://github.com/grafana/oncall/issues/1692 is fixed
if: ${{ false }}
runs-on: ubuntu-latest
# default "ubuntu-latest" runners only provide 2 CPU cores + 7GB of RAM. this seems to lead to HTTP 504s from
# the oncall backend, and hence, flaky tests. Let's use CI runners w/ more resources to avoid this (plus
# this will allow us to run more backend containers and parralelize the tests)
runs-on: ubuntu-latest-8-cores
name: "End to end tests - Grafana: ${{ matrix.grafana-image-tag }}"
strategy:
matrix:
grafana-image-tag:
- 8.5.22
- 9.2.6
# OnCall doesn't work on the following versions of Grafana
# - 8.5.22
# - 9.0.0
# - 9.1.0

# 9.2.0 is the earliest version where things work
- 9.2.13
- 9.3.14
- 9.4.10
- 9.5.2
- main
- latest
fail-fast: false
Expand Down Expand Up @@ -331,10 +340,9 @@ jobs:
- name: Load engine Docker image on the nodes of the cluster
run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar

# spin up 2 engine, 2 celery, and 2 grafana pods, this will allow us to parralelize the integration tests
# spin up 3 engine, 3 celery, and 3 grafana pods, this will allow us to parralelize the integration tests,
# and complete them much faster by using multiple test processes
# With just 1 engine/celery/grafana pod, the backend crawls to a halt when there is > 1 parallelized integration
# test process
# With just 1 engine/celery/grafana pod, the backend crawls to a halt when there is > 1 parallelized integration test process
#
# by settings grafana.plugins to [] and configuring grafana.extraVolumeMounts we are using the locally built
# OnCall plugin rather than the latest published version
Expand All @@ -346,14 +354,14 @@ jobs:
--values ./helm/simple.yml \
--values ./helm/values-local-image.yml \
--set-json 'env=[{"name":"GRAFANA_CLOUD_NOTIFICATIONS_ENABLED","value":"False"}]' \
--set engine.replicaCount=1 \
--set celery.replicaCount=1 \
--set engine.replicaCount=3 \
--set celery.replicaCount=3 \
--set celery.worker_beat_enabled="False" \
--set oncall.twilio.accountSid="${{ secrets.TWILIO_ACCOUNT_SID }}" \
--set oncall.twilio.authToken="${{ secrets.TWILIO_AUTH_TOKEN }}" \
--set oncall.twilio.phoneNumber="\"${{ secrets.TWILIO_PHONE_NUMBER }}"\" \
--set oncall.twilio.verifySid="${{ secrets.TWILIO_VERIFY_SID }}" \
--set grafana.replicas=1 \
--set grafana.replicas=3 \
--set grafana.image.tag=${{ matrix.grafana-image-tag }} \
--set grafana.env.GF_SECURITY_ADMIN_USER=oncall \
--set grafana.env.GF_SECURITY_ADMIN_PASSWORD=oncall \
Expand All @@ -378,12 +386,19 @@ jobs:
path: "~/.cache/ms-playwright"
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}-chromium-firefox-webkit

- name: Install Playwright binaries/dependencies
# For the next two steps, use the binary directly from node_modules/.bin as opposed to npx playwright
# due to this bug (https://github.com/microsoft/playwright/issues/13188)
- name: Install Playwright Browsers
if: steps.playwright-cache.outputs.cache-hit != 'true'
# https://stackoverflow.com/questions/65900299/install-single-dependency-from-package-json-with-yarn
run: |
yarn add "@playwright/test@${{ env.PLAYWRIGHT_VERSION }}"
npx playwright install --with-deps chromium firefox webkit
working-directory: grafana-plugin
run: ./node_modules/.bin/playwright install --with-deps chromium firefox webkit

# use the cached browsers, but we still need to install the necessary system dependencies
# (system deps are installed in the cache-miss step above by the --with-deps flag)
- name: Install Playwright System Dependencies
if: steps.playwright-cache.outputs.cache-hit == 'true'
working-directory: grafana-plugin
run: ./node_modules/.bin/playwright install-deps chromium firefox webkit

- name: Await k8s pods and other resources up
uses: jupyterhub/action-k8s-await-workloads@v1
Expand All @@ -408,8 +423,7 @@ jobs:
GRAFANA_PASSWORD: oncall
MAILSLURP_API_KEY: ${{ secrets.MAILSLURP_API_KEY }}
working-directory: ./grafana-plugin
# -x = exit command after first failing test
run: yarn test:integration -x
run: yarn test:integration

# always spit out the engine and celery logs, AFTER the e2e tests have completed
# can be helpful for debugging failing/flaky tests
Expand Down
12 changes: 9 additions & 3 deletions grafana-plugin/integration-tests/globalSetup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { goToGrafanaPage } from './utils/navigation';
/**
* go to config page and wait for plugin icon to be available on left-hand navigation
*/
export const configureOnCallPlugin = async (page: Page): Promise<void> => {
const configureOnCallPlugin = async (page: Page): Promise<void> => {
// plugin configuration can safely be skipped for non open-source environments
if (!IS_OPEN_SOURCE) {
return;
Expand All @@ -31,8 +31,14 @@ export const configureOnCallPlugin = async (page: Page): Promise<void> => {
await clickButton({ page, buttonText: 'Connect' });
}

// wait for the "Connected to OnCall" message to know that everything is properly configured
await expect(page.getByTestId('status-message-block')).toHaveText(/Connected to OnCall.*/);
/**
* wait for the "Connected to OnCall" message to know that everything is properly configured
*
* Regarding increasing the timeout for the "plugin configured" assertion:
* This is because it can sometimes take a bit longer for the backend sync to finish. The default assertion
* timeout is 5s, which is sometimes not enough if the backend is under load
*/
await expect(page.getByTestId('status-message-block')).toHaveText(/Connected to OnCall.*/, { timeout: 25_000 });
};

/**
Expand Down
19 changes: 15 additions & 4 deletions grafana-plugin/integration-tests/schedules/quality.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,24 @@ test('check schedule quality for simple 1-user schedule', async ({ page }) => {
const onCallScheduleName = generateRandomValue();
await createOnCallSchedule(page, onCallScheduleName);

await expect(page.locator('div[class*="ScheduleQuality"]')).toHaveText('Quality: Great');
/**
* this page.reload() call is a hack to temporarily get around this issue
* https://github.com/grafana/oncall/issues/1968
*/
await page.reload({ waitUntil: 'networkidle' });

await page.hover('div[class*="ScheduleQuality"]');
await expect(page.locator('div[class*="ScheduleQualityDetails"] >> span[class*="Text"] >> nth=2 ')).toHaveText(
const scheduleQualityElement = page.getByTestId('schedule-quality');

await expect(scheduleQualityElement).toHaveText('Quality: Great', { timeout: 15_000 });

await scheduleQualityElement.hover();

const scheduleQualityDetailsElement = page.getByTestId('schedule-quality-details');

await expect(scheduleQualityDetailsElement.locator('span[class*="Text"] >> nth=2 ')).toHaveText(
'Schedule has no gaps'
);
await expect(page.locator('div[class*="ScheduleQualityDetails"] >> span[class*="Text"] >> nth=3 ')).toHaveText(
await expect(scheduleQualityDetailsElement.locator('span[class*="Text"] >> nth=3 ')).toHaveText(
'Schedule is perfectly balanced'
);
});
11 changes: 8 additions & 3 deletions grafana-plugin/playwright.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const config: PlaywrightTestConfig = {
testDir: './integration-tests',
globalSetup: './integration-tests/globalSetup.ts',
/* Maximum time one test can run for. */
timeout: 90 * 1000,
timeout: 60 * 1000,
expect: {
/**
* Maximum time expect() should wait for the condition to be met.
Expand All @@ -26,8 +26,13 @@ const config: PlaywrightTestConfig = {
fullyParallel: true,
/* Fail the build on CI if you accidentally left test.only in the source code. */
forbidOnly: !!process.env.CI,
/* Retry on CI only */
retries: process.env.CI ? 3 : 0,
/**
* Retry on CI only
*
* NOTE: until we fix this issue (https://github.com/grafana/oncall/issues/1692) which occasionally leads
* to flaky tests.. let's just retry failed tests. If the same test fails 3 times, you know something must be up
*/
retries: !!process.env.CI ? 3 : 0,
workers: 1,
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
reporter: 'html',
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const ScheduleQuality: FC<ScheduleQualityProps> = ({ schedule, lastUpdated }) =>

return (
<>
<div className={cx('root')}>
<div className={cx('root')} data-testid="schedule-quality">
{relatedEscalationChains?.length > 0 && schedule?.number_of_escalation_chains > 0 && (
<TooltipBadge
borderType="link"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export const ScheduleQualityDetails: FC<ScheduleQualityDetailsProps> = ({ qualit
const warningComments = comments.filter((c) => c.type === 'warning');

return (
<div className={cx('root')}>
<div className={cx('root')} data-testid="schedule-quality-details">
<div className={cx('container')}>
<div className={cx('container', 'container--withLateralPadding')}>
<Text type={cx('secondary', 'header')}>
Expand Down
3 changes: 1 addition & 2 deletions grafana-plugin/src/plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -620,8 +620,7 @@
}
],
"dependencies": {
"grafanaDependency": ">=8.3.2",
"grafanaVersion": "8.3",
"grafanaDependency": ">=9.2.0",
"plugins": []
}
}

0 comments on commit c793e55

Please sign in to comment.