From 3c643c5b45bed0fe7e9438895d6bf89fcd5452ea Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Wed, 3 Jun 2020 00:00:54 +0200 Subject: [PATCH] Run ansible-doc with longer plugin lists. On failure, use binary search to get error messages for individual plugins, and still collect docs for as many as possible of the successful plugins at once. --- antsibull/docs_parsing/ansible_doc.py | 117 +++++++++++++++++--------- 1 file changed, 78 insertions(+), 39 deletions(-) diff --git a/antsibull/docs_parsing/ansible_doc.py b/antsibull/docs_parsing/ansible_doc.py index 41f1799b9..199edbd17 100644 --- a/antsibull/docs_parsing/ansible_doc.py +++ b/antsibull/docs_parsing/ansible_doc.py @@ -48,6 +48,42 @@ class ParsingError(Exception): """Error raised while parsing plugins for documentation.""" +def _combined_ansible_doc(ansible_doc, plugin_type, plugin_names): + if not plugin_names: + return {} + + try: + result = ansible_doc('-t', plugin_type, '--json', *plugin_names) + except Exception as e: + result = e + + if isinstance(result, Exception) or isinstance(result, sh.ErrorReturnCode): + if len(plugin_names) == 1: + return { + plugin_names[0]: (result, None) + } + + # print('failure in', len(plugin_names), plugin_names) + + if len(plugin_names) < 10: + # Simple loop + parts = [[plugin_name] for plugin_name in plugin_names] + else: + # Divide and conquer + middle = len(plugin_names) // 2 + parts = [plugin_names[:middle], plugin_names[middle:]] + + result = {} + for part in parts: + result.update(_combined_ansible_doc(ansible_doc, plugin_type, part)) + return result + + stdout = result.stdout.decode("utf-8", errors="surrogateescape") + result = json.loads(_filter_non_json_lines(stdout)[0]) + + return {plugin_name: (None, plugin_info) for plugin_name, plugin_info in result.items()} + + async def _get_plugin_info(plugin_type: str, ansible_doc: 'sh.Command') -> Dict[str, Any]: """ Retrieve info about all Ansible plugins of a particular type. @@ -78,47 +114,50 @@ async def _get_plugin_info(plugin_type: str, ansible_doc: 'sh.Command') -> Dict[ # For each plugin, get its documentation extractors = {} - for plugin_name in plugin_map.keys(): - extractors[plugin_name] = loop.run_in_executor(None, ansible_doc, '-t', plugin_type, - '--json', plugin_name) - plugin_info = await asyncio.gather(*extractors.values(), return_exceptions=True) + plugin_names = list(plugin_map.keys()) + block_size = 128 + for i in range(0, len(plugin_names), block_size): + block = plugin_names[i:i + block_size] + extractors[i] = loop.run_in_executor(None, _combined_ansible_doc, ansible_doc, plugin_type, block) + plugin_info_blocks = await asyncio.gather(*extractors.values(), return_exceptions=True) results = {} - for plugin_name, ansible_doc_results in zip(extractors, plugin_info): - err_msg = [] - stdout = ansible_doc_results.stdout.decode("utf-8", errors="surrogateescape") - stderr = ansible_doc_results.stderr.decode("utf-8", errors="surrogateescape") - - if isinstance(ansible_doc_results, Exception): - formatted_exception = traceback.format_exception(None, ansible_doc_results, - ansible_doc_results.__traceback__) - err_msg.append(f'Exception while parsing documentation for {plugin_type} plugin:' - f' {plugin_name}. Will not document this plugin.') - err_msg.append(f'Exception:\n{"".join(formatted_exception)}') - - # Note: Exception will also be True. - if isinstance(ansible_doc_results, sh.ErrorReturnCode): - err_msg.append(f'Full process stdout:\n{stdout}') - err_msg.append(f'Full process stderr:\n{stderr}') - - if err_msg: - sys.stderr.write('\n'.join(err_msg)) - continue - - # ansible-doc returns plugins shipped with ansible-base using no namespace and collection. - # For now, we fix these entries to use the ansible.builtin collection here. The reason we - # do it here instead of as part of a general normalization step is that other plugins - # (site-specific ones from ANSIBLE_LIBRARY, for instance) will also be returned with no - # collection name. We know that we don't have any of those in this code (because we set - # ANSIBLE_LIBRARY and other plugin path variables to /dev/null) so we can safely fix this - # here but not outside the ansible-doc backend. - fqcn = plugin_name - try: - get_fqcn_parts(fqcn) - except ValueError: - fqcn = f'ansible.builtin.{plugin_name}' - - results[fqcn] = json.loads(_filter_non_json_lines(stdout)[0])[plugin_name] + for plugin_info_block in plugin_info_blocks: + for plugin_name, (exception, plugin_info) in plugin_info_block.items(): + if exception is not None: + err_msg = [] + stdout = exception.stdout.decode("utf-8", errors="surrogateescape") + stderr = exception.stderr.decode("utf-8", errors="surrogateescape") + + if isinstance(exception, Exception): + formatted_exception = traceback.format_exception(None, exception, + exception.__traceback__) + err_msg.append(f'Exception while parsing documentation for {plugin_type} plugin:' + f' {plugin_name}. Will not document this plugin.') + err_msg.append(f'Exception:\n{"".join(formatted_exception)}') + + # Note: Exception will also be True. + if isinstance(exception, sh.ErrorReturnCode): + err_msg.append(f'Full process stdout:\n{stdout}') + err_msg.append(f'Full process stderr:\n{stderr}') + + sys.stderr.write('\n'.join(err_msg)) + continue + + # ansible-doc returns plugins shipped with ansible-base using no namespace and collection. + # For now, we fix these entries to use the ansible.builtin collection here. The reason we + # do it here instead of as part of a general normalization step is that other plugins + # (site-specific ones from ANSIBLE_LIBRARY, for instance) will also be returned with no + # collection name. We know that we don't have any of those in this code (because we set + # ANSIBLE_LIBRARY and other plugin path variables to /dev/null) so we can safely fix this + # here but not outside the ansible-doc backend. + fqcn = plugin_name + try: + get_fqcn_parts(fqcn) + except ValueError: + fqcn = f'ansible.builtin.{plugin_name}' + + results[fqcn] = plugin_info return results