Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix crash on invalid notebook files #1050

Merged
merged 1 commit into from
Feb 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion packages/pyright-internal/src/analyzer/program.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,13 @@ export class Program {
}

addTrackedFile(fileUri: Uri, isThirdPartyImport = false, isInPyTypedPackage = false, isTypeshedFile = false) {
const cells = getIPythonCells(this.fileSystem, fileUri, this.console);
let cells;
try {
cells = getIPythonCells(this.fileSystem, fileUri, this.console);
} catch (e) {
this.console.error(e instanceof Error ? e.message : String(e));
return;
}
const sourceFileInfos: SourceFileInfo[] = [];
if (cells) {
cells.forEach((_, index) => {
Expand Down
43 changes: 26 additions & 17 deletions packages/pyright-internal/src/analyzer/sourceFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,29 +102,33 @@ const getFileContent = (fileSystem: FileSystem, uri: Uri, console: ConsoleInterf
/**
* if this is a notebook, gets the cells in this file that contain python code.
* note that each {@link SourceFile} is an individual cell, but this function returns
* all of them
* all of them. throws an exception if the contents of the notebook are invalid (eg. not json)
* @returns `undefined` if not a notebook
*/
export const getIPythonCells = (fileSystem: FileSystem, uri: Uri, console: ConsoleInterface) => {
if (!uri.hasExtension('.ipynb')) {
return undefined;
return;
}
const fileContent = getFileContent(fileSystem, uri, console);
if (!fileContent) {
return undefined;
return;
}
try {
const parsedNotebook = JSON.parse(fileContent) as INotebookContent;
return parsedNotebook.cells.filter(
(cell) =>
cell.cell_type === 'code' &&
// i guess there's no standard way to specify the language of individual cells? so we check the metadata vscode adds for
// cells that have a different language to the notebook's language. idk if this is supported in any other editor tho
(typeof cell.metadata.vscode !== 'object' ||
cell.metadata.vscode === null ||
!('languageId' in cell.metadata.vscode) ||
// i don't think vscode ever sets the language to python, or maybe it does for python cells in non-python notebooks?
cell.metadata.vscode.languageId === 'python')
);
} catch (e) {
throw new Error(`failed to parse jupyter notebook ${uri.toUserVisibleString()} - ${e}`);
}
const parsedNotebook = JSON.parse(fileContent) as INotebookContent;
return parsedNotebook.cells.filter(
(cell) =>
cell.cell_type === 'code' &&
// i guess there's no standard way to specify the language of individual cells? so we check the metadata vscode adds for
// cells that have a different language to the notebook's language. idk if this is supported in any other editor tho
(typeof cell.metadata.vscode !== 'object' ||
cell.metadata.vscode === null ||
!('languageId' in cell.metadata.vscode) ||
// i don't think vscode ever sets the language to python, or maybe it does for python cells in non-python notebooks?
cell.metadata.vscode.languageId === 'python')
);
};

// A monotonically increasing number used to create unique file IDs.
Expand Down Expand Up @@ -554,8 +558,13 @@ export class SourceFile {
if (cellIndex === undefined) {
throw new Error(`something went wrong, failed to get cell index for ${this._uri}`);
}
//TODO: this isnt ideal because it re-reads the file for each cell which is unnecessary
const source = getIPythonCells(this.fileSystem, this.getRealUri(), this._console)?.[cellIndex].source;
let source;
try {
//TODO: this isnt ideal because it re-reads the file for each cell which is unnecessary
source = getIPythonCells(this.fileSystem, this.getRealUri(), this._console)?.[cellIndex].source;
} catch (e) {
this._console.error(e instanceof Error ? e.message : String(e));
}
return typeof source === 'string' ? source : source?.join('');
}

Expand Down
10 changes: 1 addition & 9 deletions packages/pyright-internal/src/tests/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,7 @@ import { AnalysisResults } from '../analyzer/analysis';
import { existsSync } from 'fs';
import { NoAccessHost } from '../common/host';
import { tExpect } from 'typed-jest-expect';

class ErrorTrackingNullConsole extends NullConsole {
errors = new Array<string>();

override error(message: string) {
this.errors.push(message);
super.error(message);
}
}
import { ErrorTrackingNullConsole } from './testUtils';

describe(`config test'}`, () => {
const tempFile = new RealTempFile();
Expand Down
12 changes: 11 additions & 1 deletion packages/pyright-internal/src/tests/notebooks.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { tExpect } from 'typed-jest-expect';
import { DiagnosticRule } from '../common/diagnosticRules';
import { typeAnalyzeSampleFiles, validateResultsButBased } from './testUtils';
import { ErrorTrackingNullConsole, typeAnalyzeSampleFiles, validateResultsButBased } from './testUtils';

test('symbol from previous cell', () => {
const analysisResults = typeAnalyzeSampleFiles(['notebook.ipynb']);
Expand All @@ -24,3 +24,13 @@ test('non-python cell', () => {
const analysisResults = typeAnalyzeSampleFiles(['notebook2.ipynb']);
tExpect(analysisResults.length).toStrictEqual(0);
});

test('invalid notebook file', () => {
const console = new ErrorTrackingNullConsole();
typeAnalyzeSampleFiles(['notebook3.ipynb'], undefined, console);
tExpect(console.errors.length).toStrictEqual(1);
tExpect(console.errors[0]).toMatch(
// .* at the end because the error message is slightly different on windows and linux
/failed to parse jupyter notebook .* - SyntaxError: Unexpected token .*/
);
});
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
asdf
15 changes: 12 additions & 3 deletions packages/pyright-internal/src/tests/testUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { Program } from '../analyzer/program';
import { NameTypeWalker } from '../analyzer/testWalker';
import { TypeEvaluator } from '../analyzer/typeEvaluatorTypes';
import { ConfigOptions, ExecutionEnvironment, getStandardDiagnosticRuleSet } from '../common/configOptions';
import { ConsoleWithLogLevel, NullConsole } from '../common/console';
import { ConsoleInterface, NullConsole } from '../common/console';
import { fail } from '../common/debug';
import { Diagnostic, DiagnosticCategory } from '../common/diagnostic';
import { DiagnosticSink } from '../common/diagnosticSink';
Expand All @@ -40,6 +40,15 @@ import { InlayHintSettings } from '../workspaceFactory';
// running the tests.
(global as any).__rootDirectory = path.resolve();

export class ErrorTrackingNullConsole extends NullConsole {
errors = new Array<string>();

override error(message: string) {
this.errors.push(message);
super.error(message);
}
}

export interface FileAnalysisResult {
fileUri: Uri;
cell?: number | undefined;
Expand Down Expand Up @@ -99,7 +108,7 @@ type ConfigOptionsArg = ConfigOptions | ((serviceProvider: ServiceProvider) => C

const createProgram = (
configOptions: ConfigOptionsArg = new ConfigOptions(Uri.empty()),
console?: ConsoleWithLogLevel
console?: ConsoleInterface
) => {
const tempFile = new RealTempFile();
const fs = createFromRealFileSystem(tempFile);
Expand All @@ -117,7 +126,7 @@ const createProgram = (
export function typeAnalyzeSampleFiles(
fileNames: string[],
configOptions: ConfigOptionsArg = new ConfigOptions(Uri.empty()),
console?: ConsoleWithLogLevel
console?: ConsoleInterface
): FileAnalysisResult[] {
const program = createProgram(configOptions, console);
const fileUris = fileNames.map((name) => UriEx.file(resolveSampleFilePath(name)));
Expand Down
Loading