diff --git a/mypy/build.py b/mypy/build.py index 909d793b00029..494dfdf3ae018 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -562,6 +562,7 @@ class BuildManager: not only for debugging, but also required for correctness, in particular to check consistency of the fine-grained dependency cache. fscache: A file system cacher + ast_cache: AST cache to speed up mypy daemon """ def __init__(self, data_dir: str, @@ -645,6 +646,14 @@ def __init__(self, data_dir: str, self.processed_targets = [] # type: List[str] # Missing stub packages encountered. self.missing_stub_packages = set() # type: Set[str] + # Cache for mypy ASTs that have completed semantic analysis + # pass 1. When multiple files are added to the build in a + # single daemon increment, only one of the files gets added + # per step and the others are discarded. This gets repeated + # until all the files have been added. This means that a + # new file can be processed O(n**2) times. This cache + # avoids most of this redundant work. + self.ast_cache = {} # type: Dict[str, Tuple[MypyFile, List[ErrorInfo]]] def dump_stats(self) -> None: if self.options.dump_build_stats: @@ -1994,8 +2003,14 @@ def parse_file(self) -> None: return manager = self.manager + + # Can we reuse a previously parsed AST? This avoids redundant work in daemon. + cached = self.id in manager.ast_cache modules = manager.modules - manager.log("Parsing %s (%s)" % (self.xpath, self.id)) + if not cached: + manager.log("Parsing %s (%s)" % (self.xpath, self.id)) + else: + manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id)) with self.wrap_context(): source = self.source @@ -2026,21 +2041,36 @@ def parse_file(self) -> None: self.source_hash = compute_hash(source) self.parse_inline_configuration(source) - self.tree = manager.parse_file(self.id, self.xpath, source, - self.ignore_all or self.options.ignore_errors, - self.options) + if not cached: + self.tree = manager.parse_file(self.id, self.xpath, source, + self.ignore_all or self.options.ignore_errors, + self.options) - modules[self.id] = self.tree + else: + # Reuse a cached AST + self.tree = manager.ast_cache[self.id][0] + manager.errors.set_file_ignored_lines( + self.xpath, + self.tree.ignored_lines, + self.ignore_all or self.options.ignore_errors) + + if not cached: + # Make a copy of any errors produced during parse time so that + # fine-grained mode can repeat them when the module is + # reprocessed. + self.early_errors = list(manager.errors.error_info_map.get(self.xpath, [])) + else: + self.early_errors = manager.ast_cache[self.id][1] - # Make a copy of any errors produced during parse time so that - # fine-grained mode can repeat them when the module is - # reprocessed. - self.early_errors = list(manager.errors.error_info_map.get(self.xpath, [])) + modules[self.id] = self.tree - self.semantic_analysis_pass1() + if not cached: + self.semantic_analysis_pass1() self.check_blockers() + manager.ast_cache[self.id] = (self.tree, self.early_errors) + def parse_inline_configuration(self, source: str) -> None: """Check for inline mypy: options directive and parse them.""" flags = get_mypy_comments(source) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 30002c09641d8..eb53935db297d 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -373,7 +373,7 @@ def cmd_recheck(self, assert remove is None and update is None messages = self.fine_grained_increment_follow_imports(sources) res = self.increment_output(messages, sources, is_tty, terminal_width) - self.fscache.flush() + self.flush_caches() self.update_stats(res) return res @@ -392,10 +392,15 @@ def check(self, sources: List[BuildSource], else: messages = self.fine_grained_increment_follow_imports(sources) res = self.increment_output(messages, sources, is_tty, terminal_width) - self.fscache.flush() + self.flush_caches() self.update_stats(res) return res + def flush_caches(self) -> None: + self.fscache.flush() + if self.fine_grained_manager: + self.fine_grained_manager.flush_cache() + def update_stats(self, res: Dict[str, Any]) -> None: if self.fine_grained_manager: manager = self.fine_grained_manager.manager @@ -852,7 +857,7 @@ def cmd_suggest(self, out += "\n" return {'out': out, 'err': "", 'status': 0} finally: - self.fscache.flush() + self.flush_caches() def cmd_hang(self) -> Dict[str, object]: """Hang for 100 seconds, as a debug hack.""" diff --git a/mypy/server/update.py b/mypy/server/update.py index a9f931429a45e..085c143fadd18 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -288,6 +288,14 @@ def trigger(self, target: str) -> List[str]: self.previous_messages = self.manager.errors.new_messages()[:] return self.update(changed_modules, []) + def flush_cache(self) -> None: + """Flush AST cache. + + This needs to be called after each increment, or file changes won't + be detected reliably. + """ + self.manager.ast_cache.clear() + def update_one(self, changed_modules: List[Tuple[str, str]], initial_set: Set[str], diff --git a/mypy/suggestions.py b/mypy/suggestions.py index b66ba6d6118dc..8df180d825b42 100644 --- a/mypy/suggestions.py +++ b/mypy/suggestions.py @@ -640,6 +640,7 @@ def reload(self, state: State, check_errors: bool = False) -> List[str]: If check_errors is true, raise an exception if there are errors. """ assert state.path is not None + self.fgmanager.flush_cache() return self.fgmanager.update([(state.id, state.path)], []) def ensure_loaded(self, state: State, force: bool = False) -> MypyFile: diff --git a/mypy/test/testmerge.py b/mypy/test/testmerge.py index c9f04c2abef6f..c7fcbda01c04c 100644 --- a/mypy/test/testmerge.py +++ b/mypy/test/testmerge.py @@ -124,6 +124,7 @@ def build(self, source: str, testcase: DataDrivenTestCase) -> Optional[BuildResu def build_increment(self, manager: FineGrainedBuildManager, module_id: str, path: str) -> Tuple[MypyFile, Dict[Expression, Type]]: + manager.flush_cache() manager.update([(module_id, path)], []) module = manager.manager.modules[module_id] type_map = manager.graph[module_id].type_map()