Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace bundled conda-index with standalone conda-index package #4690

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3d54dfa
depend on conda-index
dholth Jan 4, 2023
653cb83
delegate indexing to conda-index package
dholth Jan 4, 2023
5ef0f1f
remove obsolute index_file test
dholth Jan 4, 2023
71eab18
add news
dholth Jan 4, 2023
0596350
add conda-index to install_conda_build_test_deps
dholth Jan 4, 2023
272caf9
Merge remote-tracking branch 'origin/main' into conda-index
dholth Jan 6, 2023
05d7f05
format conda_build/index.py for pre-commit
dholth Jan 6, 2023
dd76ea6
remove unnecessary import fallback
dholth Jan 6, 2023
8eeaec6
linter changes
dholth Jan 6, 2023
f9fffbd
update windows ci deps
dholth Jan 6, 2023
c706da4
python-libarchive-c is required after all, for utils
dholth Jan 6, 2023
c1e57ef
remove test_index (exists in standalone conda-index tests)
dholth Jan 6, 2023
e59b860
deal with update_index(subdir) instead of update_index(dir)
dholth Jan 6, 2023
ad6c0f7
use single thread when indexing for build
dholth Jan 7, 2023
d9cc516
Merge remote-tracking branch 'origin/main' into conda-index
dholth Jan 20, 2023
560ad71
remove build_index module
dholth Feb 3, 2023
1e8f1d9
Merge remote-tracking branch 'origin/main' into conda-index
dholth Feb 3, 2023
74b2d7c
format environ.py
dholth Feb 3, 2023
f047145
add conda-index dependency to recipe
dholth Feb 3, 2023
e27c8fb
add conda-index to requirements.txt
dholth Feb 3, 2023
1eb6752
restore subdirs fix
dholth Feb 3, 2023
2e7c40c
Merge remote-tracking branch 'origin/main' into conda-index
dholth Feb 4, 2023
cc63b28
ruamelize
dholth Feb 4, 2023
fe3647e
adjust distutils patch deps
dholth Feb 4, 2023
ad31a23
lint fixes
dholth Feb 4, 2023
8c0117b
keyword arguments
dholth Feb 4, 2023
55e5ba2
indent
dholth Feb 4, 2023
51ba55a
Merge remote-tracking branch 'origin/main' into conda-index
dholth Feb 22, 2023
a6cbec4
remove package from test
dholth Feb 22, 2023
f1d52e2
Apply suggestions from code review
dholth Feb 24, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/github/install_conda_build_test_deps
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function install_conda_build_test_deps_fn()
_PKGS+=(${DEF_CHAN}::ripgrep ${DEF_CHAN}::pyflakes ${DEF_CHAN}::beautifulsoup4 ${DEF_CHAN}::chardet ${DEF_CHAN}::pycrypto ${DEF_CHAN}::glob2 ${DEF_CHAN}::psutil ${DEF_CHAN}::pytz ${DEF_CHAN}::tqdm)
_PKGS+=(${DEF_CHAN}::conda-package-handling ${DEF_CHAN}::perl ${DEF_CHAN}::python-libarchive-c)
_PKGS+=(${DEF_CHAN}::pip ${DEF_CHAN}::numpy ${DEF_CHAN}::pkginfo)
_PKGS+=(${DEF_CHAN}::conda-index)
if [[ $(uname) =~ .*inux.* ]] && [[ ! ${MACOS_ARM64} == yes ]] ; then
_PKGS+=(${DEF_CHAN}::patchelf)
fi
Expand Down
18 changes: 8 additions & 10 deletions conda_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
from .utils import (CONDA_PACKAGE_EXTENSION_V1, CONDA_PACKAGE_EXTENSION_V2,
CONDA_PACKAGE_EXTENSIONS, env_var, glob,
shutil_move_more_retrying, tmp_chdir)
from conda_build import environ, source, tarcheck, utils
from conda_build import environ, source, tarcheck, utils, build_index
from conda_build.config import Config
from conda_build.index import get_build_index, update_index
from conda_build.index import update_index
from conda_build.render import (output_yaml, bldpkg_path, render_recipe, reparse, distribute_variants,
expand_outputs, try_download, execute_download_actions,
add_upstream_pins)
Expand Down Expand Up @@ -2375,14 +2375,12 @@ def build(m, stats, post=None, need_source_download=True, need_reparse_in_env=Fa
subdir = ('noarch' if (m.noarch or m.noarch_python)
else m.config.host_subdir)
if m.is_cross:
get_build_index(subdir=subdir, bldpkgs_dir=m.config.bldpkgs_dir,
output_folder=m.config.output_folder, channel_urls=m.config.channel_urls,
debug=m.config.debug, verbose=m.config.verbose, locking=m.config.locking,
timeout=m.config.timeout, clear_cache=True)
get_build_index(subdir=subdir, bldpkgs_dir=m.config.bldpkgs_dir,
output_folder=m.config.output_folder, channel_urls=m.config.channel_urls,
debug=m.config.debug, verbose=m.config.verbose, locking=m.config.locking,
timeout=m.config.timeout, clear_cache=True)
build_index.get_build_index(subdir, m.config.bldpkgs_dir, m.config.output_folder, True,
False, m.config.channel_urls, m.config.debug, m.config.verbose, locking=m.config.locking, timeout=m.config.timeout
)
build_index.get_build_index(subdir, m.config.bldpkgs_dir, m.config.output_folder, True,
False, m.config.channel_urls, m.config.debug, m.config.verbose, locking=m.config.locking, timeout=m.config.timeout
)
else:
if not provision_only:
print("STOPPING BUILD BEFORE POST:", m.dist())
Expand Down
272 changes: 272 additions & 0 deletions conda_build/build_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
# Copyright (C) 2014 Anaconda, Inc
# SPDX-License-Identifier: BSD-3-Clause
"""
conda-build's use of conda-index, delegated to now-separate conda-index package.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copied to make keeping track of all the changes easier, and to discourage future conda_build.index imports

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify, we couldn't continue to have index.py as the place for this code to live? Or asked differently, the get_build_index function, is that something that we have in conda-index? I'm a little worried we're introducing another place where users will end up importing from :)

Copy link
Contributor Author

@dholth dholth Jan 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_build_index is only in conda-build. It is mostly about fetching repodata.json instead of generating repodata.json. It would of course be possible to keep it in conda_build.index

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However we might want to raise deprecationwarning when conda_build.index is imported.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this submodule should exist.

Plus, all of conda.index should be deprecated with warnings to import from conda-index instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean conda.index or conda_build.index

Reminder that conda-build's get_build_index doesn't belong in conda-index.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kenodegard @jezdez I moved this module back to conda_build.index

"""

import conda_index.index

import json
import os
from os.path import (
dirname,
)
import sys
import time

from functools import partial
import logging

from concurrent.futures import Executor

from conda_build import conda_interface, utils
from .conda_interface import context
from .conda_interface import CondaHTTPError, get_index, url_path
from .utils import (
JSONDecodeError,
get_logger,
)

log = get_logger(__name__)


# use this for debugging, because ProcessPoolExecutor isn't pdb/ipdb friendly
class DummyExecutor(Executor):
def map(self, func, *iterables):
for iterable in iterables:
for thing in iterable:
yield func(thing)


try:
from conda.base.constants import NAMESPACES_MAP, NAMESPACE_PACKAGE_NAMES
except ImportError:
NAMESPACES_MAP = { # base package name, namespace
"python": "python",
"r": "r",
"r-base": "r",
"mro-base": "r",
"mro-base_impl": "r",
"erlang": "erlang",
"java": "java",
"openjdk": "java",
"julia": "julia",
"latex": "latex",
"lua": "lua",
"nodejs": "js",
"perl": "perl",
"php": "php",
"ruby": "ruby",
"m2-base": "m2",
"msys2-conda-epoch": "m2w64",
}
NAMESPACE_PACKAGE_NAMES = frozenset(NAMESPACES_MAP)
NAMESPACES = frozenset(NAMESPACES_MAP.values())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it wouldn't be surprising if conda.base.constants exists in all conda > 4.7 or the oldest one we can depend on.


local_index_timestamp = 0
cached_index = None
local_subdir = ""
local_output_folder = ""
cached_channels = []
channel_data = {}


# TODO: support for libarchive seems to have broken ability to use multiple threads here.
# The new conda format is so much faster that it more than makes up for it. However, it
# would be nice to fix this at some point.
try:
_os_cpu_count = os.cpu_count() or 1 # can be None in rare cases
except AttributeError:
_os_cpu_count = 1
MAX_THREADS_DEFAULT = _os_cpu_count

if (
sys.platform == "win32"
): # see https://github.com/python/cpython/commit/8ea0fd85bc67438f679491fae29dfe0a3961900a
MAX_THREADS_DEFAULT = min(48, MAX_THREADS_DEFAULT)
LOCK_TIMEOUT_SECS = 3 * 3600
LOCKFILE_NAME = ".lock"

# TODO: this is to make sure that the index doesn't leak tokens. It breaks use of private channels, though.
# os.environ['CONDA_ADD_ANACONDA_TOKEN'] = "false"


def get_build_index(
subdir,
bldpkgs_dir,
output_folder=None,
clear_cache=False,
omit_defaults=False,
channel_urls=None,
debug=False,
verbose=True,
locking=None,
timeout=None,
):
global local_index_timestamp
global local_subdir
global local_output_folder
global cached_index
global cached_channels
global channel_data
mtime = 0

channel_urls = list(utils.ensure_list(channel_urls))

if not output_folder:
output_folder = dirname(bldpkgs_dir)

# check file modification time - this is the age of our local index.
index_file = os.path.join(output_folder, subdir, "repodata.json")
if os.path.isfile(index_file):
mtime = os.path.getmtime(index_file)

if (
clear_cache
or not os.path.isfile(index_file)
or local_subdir != subdir
or local_output_folder != output_folder
or mtime > local_index_timestamp
or cached_channels != channel_urls
):

# priority: (local as either croot or output_folder IF NOT EXPLICITLY IN CHANNEL ARGS),
# then channels passed as args (if local in this, it remains in same order),
# then channels from condarc.
urls = list(channel_urls)

loggers = utils.LoggingContext.default_loggers + [__name__]
if debug:
log_context = partial(utils.LoggingContext, logging.DEBUG, loggers=loggers)
elif verbose:
log_context = partial(utils.LoggingContext, logging.WARN, loggers=loggers)
else:
log_context = partial(
utils.LoggingContext, logging.CRITICAL + 1, loggers=loggers
)
with log_context():
# this is where we add the "local" channel. It's a little smarter than conda, because
# conda does not know about our output_folder when it is not the default setting.
if os.path.isdir(output_folder):
local_path = url_path(output_folder)
# replace local with the appropriate real channel. Order is maintained.
urls = [url if url != "local" else local_path for url in urls]
if local_path not in urls:
urls.insert(0, local_path)
_ensure_valid_channel(output_folder, subdir)
update_index(output_folder, verbose=debug)

# replace noarch with native subdir - this ends up building an index with both the
# native content and the noarch content.

if subdir == "noarch":
subdir = conda_interface.subdir
try:
cached_index = get_index(
channel_urls=urls,
prepend=not omit_defaults,
use_local=False,
use_cache=context.offline,
platform=subdir,
)
# HACK: defaults does not have the many subfolders we support. Omit it and
# try again.
except CondaHTTPError:
if "defaults" in urls:
urls.remove("defaults")
cached_index = get_index(
channel_urls=urls,
prepend=omit_defaults,
use_local=False,
use_cache=context.offline,
platform=subdir,
)

expanded_channels = {rec.channel for rec in cached_index.values()}

superchannel = {}
# we need channeldata.json too, as it is a more reliable source of run_exports data
for channel in expanded_channels:
if channel.scheme == "file":
location = channel.location
if utils.on_win:
location = location.lstrip("/")
elif not os.path.isabs(channel.location) and os.path.exists(
os.path.join(os.path.sep, channel.location)
):
location = os.path.join(os.path.sep, channel.location)
channeldata_file = os.path.join(
location, channel.name, "channeldata.json"
)
retry = 0
max_retries = 1
if os.path.isfile(channeldata_file):
while retry < max_retries:
try:
with open(channeldata_file, "r+") as f:
channel_data[channel.name] = json.load(f)
break
except (OSError, JSONDecodeError):
time.sleep(0.2)
retry += 1
else:
# download channeldata.json for url
if not context.offline:
try:
channel_data[channel.name] = utils.download_channeldata(
channel.base_url + "/channeldata.json"
)
except CondaHTTPError:
continue
# collapse defaults metachannel back into one superchannel, merging channeldata
if channel.base_url in context.default_channels and channel_data.get(
channel.name
):
packages = superchannel.get("packages", {})
packages.update(channel_data[channel.name])
superchannel["packages"] = packages
channel_data["defaults"] = superchannel
local_index_timestamp = os.path.getmtime(index_file)
local_subdir = subdir
local_output_folder = output_folder
cached_channels = channel_urls
return cached_index, local_index_timestamp, channel_data


def _ensure_valid_channel(local_folder, subdir):
for folder in {subdir, "noarch"}:
path = os.path.join(local_folder, folder)
if not os.path.isdir(path):
os.makedirs(path)


def update_index(
dir_path,
check_md5=False,
channel_name=None,
patch_generator=None,
threads=MAX_THREADS_DEFAULT,
verbose=False,
progress=False,
hotfix_source_repo=None,
subdirs=None,
warn=True,
current_index_versions=None,
debug=False,
index_file=None,
):
return conda_index.index.update_index(
dir_path,
check_md5=check_md5,
channel_name=channel_name,
patch_generator=patch_generator,
threads=threads,
verbose=verbose,
progress=progress,
# hotfix_source_repo=None, # unused
subdirs=subdirs,
warn=warn,
current_index_versions=current_index_versions,
debug=debug,
# index_file=None, # unused
)
12 changes: 6 additions & 6 deletions conda_build/cli/main_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

from conda_build.conda_interface import ArgumentParser

from conda_build import api
from conda_build.index import MAX_THREADS_DEFAULT
from conda_build.utils import DEFAULT_SUBDIRS
from conda_index import api
from conda_index.index import MAX_THREADS_DEFAULT
from conda_index.utils import DEFAULT_SUBDIRS

logging.basicConfig(level=logging.INFO)

Expand Down Expand Up @@ -77,7 +77,7 @@ def parse_args(args):
)
p.add_argument(
"-f", "--file",
help="A file that contains a new line separated list of packages to add to repodata.",
help="A file that contains a new line separated list of packages to add to repodata. Deprecated, will be removed in a future version of conda build",
action="store"
)

Expand All @@ -90,8 +90,8 @@ def execute(args):

api.update_index(args.dir, check_md5=args.check_md5, channel_name=args.channel_name,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would love to delete this whole file.

conda-index has a nice click CLI, but doesn't take the same arguments.

Copy link
Contributor Author

@dholth dholth Jan 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might be able to use the non-api update_index, the api version is basically "call update_index for every dir in a list"

threads=args.threads, subdir=args.subdir, patch_generator=args.patch_generator,
verbose=args.verbose, progress=args.progress, hotfix_source_repo=args.hotfix_source_repo,
current_index_versions=args.current_index_versions_file, index_file=args.file)
verbose=args.verbose, progress=args.progress,
current_index_versions=args.current_index_versions_file)


def main():
Expand Down
21 changes: 8 additions & 13 deletions conda_build/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@
from .conda_interface import reset_context
from .conda_interface import get_version_from_git_tag

from conda_build import utils
from conda_build import utils, build_index
from conda_build.exceptions import BuildLockError, DependencyNeedsBuildingError
from conda_build.features import feature_list
from conda_build.index import get_build_index
from conda_build.os_utils import external
from conda_build.utils import ensure_list, prepend_bin_path, env_var
from conda_build.variants import get_default_variant
Expand Down Expand Up @@ -776,9 +775,10 @@ def get_install_actions(prefix, specs, env, retries=0, subdir=None,

bldpkgs_dirs = ensure_list(bldpkgs_dirs)

index, index_ts, _ = get_build_index(subdir, list(bldpkgs_dirs)[0], output_folder=output_folder,
channel_urls=channel_urls, debug=debug, verbose=verbose,
locking=locking, timeout=timeout)
bldpkgs_dir = list(bldpkgs_dirs)[0]
index, index_ts, _ = build_index.get_build_index(subdir, bldpkgs_dir, output_folder, False,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inline function courtesy of pycharm refactor feature

False, channel_urls, debug, verbose, locking=locking, timeout=timeout
)
specs = tuple(utils.ensure_valid_spec(spec) for spec in specs if not str(spec).endswith('@'))

if ((specs, env, subdir, channel_urls, disable_pip) in cached_actions and
Expand Down Expand Up @@ -889,14 +889,9 @@ def create_env(prefix, specs_or_actions, env, config, subdir, clear_cache=True,
channel_urls=tuple(config.channel_urls))
else:
actions = specs_or_actions
index, _, _ = get_build_index(subdir=subdir,
bldpkgs_dir=config.bldpkgs_dir,
output_folder=config.output_folder,
channel_urls=config.channel_urls,
debug=config.debug,
verbose=config.verbose,
locking=config.locking,
timeout=config.timeout)
index, _, _ = build_index.get_build_index(subdir, config.bldpkgs_dir, config.output_folder, False,
False, config.channel_urls, config.debug, config.verbose, locking=config.locking, timeout=config.timeout
)
utils.trim_empty_keys(actions)
display_actions(actions, index)
if utils.on_win:
Expand Down
Loading