Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] Handle index db corruption and warn in UI #3150

Merged
merged 3 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 3.19.4
- Handle index db corruption and warn in UI (mihran113)

## 3.19.3 Apr 17, 2024
- Resolve issue with new runs after tracking queue shutdown (mihran113)
- Reset base path when opening new tabs (mihran113)
Expand Down
16 changes: 16 additions & 0 deletions aim/storage/union.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import heapq
import logging
import os
import shutil

import aimrocks

import cachetools.func
Expand Down Expand Up @@ -145,6 +147,8 @@ class ValuesIterator(ItemsIterator):


class DB(object):
_corruption_warned = False

def __init__(self, db_path: str, db_name: str, opts, read_only: bool = False):
assert read_only
self.db_path = db_path
Expand Down Expand Up @@ -181,6 +185,18 @@ class DB(object):
index_path = os.path.join(self.db_path, self.db_name, "index")
try:
index_db = self._get_db(index_prefix, index_path, self._dbs)
# do a random read to check if index db is corrupted or not
index_db.get(index_prefix)
except aimrocks.errors.RocksIOError:
# delete index db and mark as corrupted
corruption_marker = Path(index_path) / '.corrupted'
if not corruption_marker.exists():
logger.warning('Corrupted index db. Deleting the index db to avoid errors. '
'Please run `aim storage reindex command to restore optimal performance.`')
shutil.rmtree(index_path)
Path(index_path).mkdir()
corruption_marker.touch()
index_db = None
except Exception:
index_db = None
logger.info('No index was detected')
Expand Down
1 change: 1 addition & 0 deletions aim/web/api/projects/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class ProjectApiOut(BaseModel):
path: str
description: str
telemetry_enabled: int
warn_index: Optional[bool] = False


class ProjectParamsOut(BaseModel):
Expand Down
13 changes: 12 additions & 1 deletion aim/web/api/projects/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from datetime import timedelta
from typing import Optional, Tuple

from logging import getLogger
from collections import Counter
from fastapi import Depends, HTTPException, Query, Header
from aim.web.api.utils import APIRouter # wrapper for fastapi.APIRouter
Expand All @@ -21,6 +21,8 @@

projects_router = APIRouter()

logger = getLogger()


@projects_router.get('/', response_model=ProjectApiOut)
async def project_api():
Expand All @@ -29,11 +31,20 @@ async def project_api():
if not project.exists():
raise HTTPException(status_code=404)

# check if the index db was corrupted and deleted
corruption_marker = os.path.join(project.repo_path, 'meta', 'index', '.corrupted')
warning_message = ''
if os.path.exists(corruption_marker):
warning_message = 'Index db was corrupted and deleted. ' \
'Please run `aim storage reindex` command to restore optimal performance.'
logger.warning(warning_message)

return {
'name': project.name,
'path': project.path,
'description': project.description,
'telemetry_enabled': 0,
'warn_index': bool(warning_message),
}


Expand Down
12 changes: 12 additions & 0 deletions aim/web/ui/src/App.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import React from 'react';
import { BrowserRouter, Switch, Route, Redirect } from 'react-router-dom';
import { useModel } from 'hooks';

import { loader } from '@monaco-editor/react';

Expand All @@ -16,8 +17,12 @@ import PageWrapper from 'pages/PageWrapper';

import routes from 'routes/routes';

import projectsModel from 'services/models/projects/projectsModel';

import { inIframe } from 'utils/helper';

import { IProjectsModelState } from './types/services/models/projects/projectsModel';

import './App.scss';

const basePath = getBasePath(false);
Expand All @@ -32,6 +37,7 @@ loader.config({
});

function App(): React.FunctionComponentElement<React.ReactNode> {
const projectsData = useModel<Partial<IProjectsModelState>>(projectsModel);
React.useEffect(() => {
let timeoutId: number;
const preloader = document.getElementById('preload-spinner');
Expand All @@ -56,6 +62,12 @@ function App(): React.FunctionComponentElement<React.ReactNode> {
<b>keep server running</b> for a better experience
</AlertBanner>
)}
{projectsData?.project?.warn_index && (
<AlertBanner type='warning'>
Index db was corrupted and deleted. Please run
<b>`aim storage reindex`</b> command to restore optimal performance.
</AlertBanner>
)}
<div className='pageContainer'>
<ErrorBoundary>
<SideBar />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export interface IProject {
name?: string;
path?: string;
telemetry_enabled?: string | boolean;
warn_index?: boolean;
}

export interface IProjectParamsMetrics {
Expand Down
Loading