diff --git a/README.md b/README.md index 857406075e0..f77b5702f71 100644 --- a/README.md +++ b/README.md @@ -34,16 +34,6 @@ ------ -## News - -___NEW!___ _[nx-cugraph](https://rapids.ai/nx-cugraph/)_, a NetworkX backend that provides GPU acceleration to NetworkX with zero code change. -``` -> pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com -> export NETWORKX_AUTOMATIC_BACKENDS=cugraph -``` -That's it. NetworkX now leverages cuGraph for accelerated graph algorithms. - ----- ## Table of contents diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py index 0b411c2eed2..d84069ddec8 100644 --- a/python/cugraph/cugraph/cores/core_number.py +++ b/python/cugraph/cugraph/cores/core_number.py @@ -23,19 +23,16 @@ def core_number(G, degree_type="bidirectional"): """ Compute the core numbers for the nodes of the graph G. A k-core of a graph - is a maximal subgraph that contains nodes of degree k or more. - A node has a core number of k if it belongs a k-core but not to k+1-core. - This call does not support a graph with self-loops and parallel - edges. + is a maximal subgraph that contains nodes of degree k or more. A node has + a core number of k if it belongs to a k-core but not to k+1-core. This + call does not support a graph with self-loops and parallel edges. Parameters ---------- G : cuGraph.Graph or networkx.Graph - The graph should contain undirected edges where undirected edges are - represented as directed edges in both directions. While this graph - can contain edge weights, they don't participate in the calculation + The current implementation only supports undirected graphs. The graph + can contain edge weights, but they don't participate in the calculation of the core numbers. - The current implementation only supports undirected graphs. .. deprecated:: 24.12 Accepting a ``networkx.Graph`` is deprecated and will be removed in a @@ -43,9 +40,10 @@ def core_number(G, degree_type="bidirectional"): the ``nx-cugraph`` backend. See: https://rapids.ai/nx-cugraph/ degree_type: str, (default="bidirectional") - This option determines if the core number computation should be based - on input, output, or both directed edges, with valid values being - "incoming", "outgoing", and "bidirectional" respectively. + This option is currently ignored. This option may eventually determine + if the core number computation should be based on input, output, or + both directed edges, with valid values being "incoming", "outgoing", + and "bidirectional" respectively. Returns ------- @@ -63,7 +61,13 @@ def core_number(G, degree_type="bidirectional"): >>> from cugraph.datasets import karate >>> G = karate.get_graph(download=True) >>> df = cugraph.core_number(G) - + >>> df.head() + vertex core_number + 0 33 4 + 1 0 4 + 2 32 4 + 3 2 4 + 4 1 4 """ G, isNx = ensure_cugraph_obj_for_nx(G) @@ -71,11 +75,14 @@ def core_number(G, degree_type="bidirectional"): if G.is_directed(): raise ValueError("input graph must be undirected") - if degree_type not in ["incoming", "outgoing", "bidirectional"]: - raise ValueError( - f"'degree_type' must be either incoming, " - f"outgoing or bidirectional, got: {degree_type}" - ) + # degree_type is currently ignored until libcugraph supports directed + # graphs for core_number. Once supporteed, degree_type should be checked + # like so: + # if degree_type not in ["incoming", "outgoing", "bidirectional"]: + # raise ValueError( + # f"'degree_type' must be either incoming, " + # f"outgoing or bidirectional, got: {degree_type}" + # ) vertex, core_number = pylibcugraph_core_number( resource_handle=ResourceHandle(), diff --git a/python/cugraph/cugraph/dask/cores/core_number.py b/python/cugraph/cugraph/dask/cores/core_number.py index 4ae1fb547d1..3266348f735 100644 --- a/python/cugraph/cugraph/dask/cores/core_number.py +++ b/python/cugraph/cugraph/dask/cores/core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -53,15 +53,15 @@ def core_number(input_graph, degree_type="bidirectional"): Parameters ---------- input_graph : cugraph.graph - cuGraph graph descriptor, should contain the connectivity information, - (edge weights are not used in this algorithm). - The current implementation only supports undirected graphs. + The current implementation only supports undirected graphs. The graph + can contain edge weights, but they don't participate in the calculation + of the core numbers. degree_type: str, (default="bidirectional") - This option determines if the core number computation should be based - on input, output, or both directed edges, with valid values being - "incoming", "outgoing", and "bidirectional" respectively. - + This option is currently ignored. This option may eventually determine + if the core number computation should be based on input, output, or + both directed edges, with valid values being "incoming", "outgoing", + and "bidirectional" respectively. Returns ------- @@ -77,11 +77,14 @@ def core_number(input_graph, degree_type="bidirectional"): if input_graph.is_directed(): raise ValueError("input graph must be undirected") - if degree_type not in ["incoming", "outgoing", "bidirectional"]: - raise ValueError( - f"'degree_type' must be either incoming, " - f"outgoing or bidirectional, got: {degree_type}" - ) + # degree_type is currently ignored until libcugraph supports directed + # graphs for core_number. Once supporteed, degree_type should be checked + # like so: + # if degree_type not in ["incoming", "outgoing", "bidirectional"]: + # raise ValueError( + # f"'degree_type' must be either incoming, " + # f"outgoing or bidirectional, got: {degree_type}" + # ) # Initialize dask client client = default_client() diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py index 15c30700fc3..63389cbc16a 100644 --- a/python/cugraph/cugraph/datasets/dataset.py +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -352,7 +352,9 @@ def get_dask_graph( If True, stores the transpose of the adjacency matrix. Required for certain algorithms. """ - if self._edgelist is None: + if self._edgelist is None or not isinstance( + self._edgelist, dask_cudf.DataFrame + ): self.get_dask_edgelist(download=download) if create_using is None: diff --git a/python/cugraph/cugraph/tests/core/test_core_number.py b/python/cugraph/cugraph/tests/core/test_core_number.py index a01b837ff61..b50e60ceb89 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number.py +++ b/python/cugraph/cugraph/tests/core/test_core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,11 +32,15 @@ def setup_function(): # ============================================================================= # Pytest fixtures # ============================================================================= -degree_type = ["incoming", "outgoing"] +# FIXME: degree_type is currently unsupported (ignored) +# degree_type = ["incoming", "outgoing"] +# fixture_params = gen_fixture_params_product( +# (UNDIRECTED_DATASETS, "graph_file"), +# (degree_type, "degree_type"), +# ) fixture_params = gen_fixture_params_product( (UNDIRECTED_DATASETS, "graph_file"), - (degree_type, "degree_type"), ) @@ -46,7 +50,9 @@ def input_combo(request): This fixture returns a dictionary containing all input params required to run a Core number algo """ - parameters = dict(zip(("graph_file", "degree_type"), request.param)) + # FIXME: degree_type is not supported so do not test with different values + # parameters = dict(zip(("graph_file", "degree_type"), request.param)) + parameters = {"graph_file": request.param[0]} graph_file = parameters["graph_file"] G = graph_file.get_graph() @@ -69,7 +75,8 @@ def input_combo(request): def test_core_number(input_combo): G = input_combo["G"] Gnx = input_combo["Gnx"] - degree_type = input_combo["degree_type"] + # FIXME: degree_type is currently unsupported (ignored) + # degree_type = input_combo["degree_type"] nx_core_number_results = cudf.DataFrame() dic_results = nx.core_number(Gnx) @@ -80,7 +87,7 @@ def test_core_number(input_combo): ) core_number_results = ( - cugraph.core_number(G, degree_type) + cugraph.core_number(G) .sort_values("vertex") .reset_index(drop=True) .rename(columns={"core_number": "cugraph_core_number"}) @@ -109,8 +116,3 @@ def test_core_number_invalid_input(input_combo): with pytest.raises(ValueError): cugraph.core_number(G) - - invalid_degree_type = "invalid" - G = input_combo["G"] - with pytest.raises(ValueError): - cugraph.core_number(G, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py index 1138c1dc488..2c2c7e40a22 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py +++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py @@ -17,7 +17,7 @@ import cugraph import cugraph.dask as dcg -from cugraph.datasets import karate, dolphins, karate_asymmetric +from cugraph.datasets import karate, dolphins # ============================================================================= @@ -35,7 +35,8 @@ def setup_function(): DATASETS = [karate, dolphins] -DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"] +# FIXME: degree_type is currently unsupported (ignored) +# DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"] # ============================================================================= @@ -43,9 +44,9 @@ def setup_function(): # ============================================================================= -def get_sg_results(dataset, degree_type): +def get_sg_results(dataset): G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) - res = cugraph.core_number(G, degree_type) + res = cugraph.core_number(G) res = res.sort_values("vertex").reset_index(drop=True) return res @@ -57,23 +58,23 @@ def get_sg_results(dataset, degree_type): @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) -@pytest.mark.parametrize("degree_type", DEGREE_TYPE) -def test_sg_core_number(dask_client, dataset, degree_type, benchmark): +# @pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_sg_core_number(dask_client, dataset, benchmark): # This test is only for benchmark purposes. sg_core_number_results = None G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) - sg_core_number_results = benchmark(cugraph.core_number, G, degree_type) + sg_core_number_results = benchmark(cugraph.core_number, G) assert sg_core_number_results is not None @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) -@pytest.mark.parametrize("degree_type", DEGREE_TYPE) -def test_core_number(dask_client, dataset, degree_type, benchmark): +# @pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_core_number(dask_client, dataset, benchmark): dataset.get_dask_edgelist(download=True) # reload with MG edgelist dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) - result_core_number = benchmark(dcg.core_number, dg, degree_type) + result_core_number = benchmark(dcg.core_number, dg) result_core_number = ( result_core_number.drop_duplicates() .compute() @@ -82,7 +83,7 @@ def test_core_number(dask_client, dataset, degree_type, benchmark): .rename(columns={"core_number": "mg_core_number"}) ) - expected_output = get_sg_results(dataset, degree_type) + expected_output = get_sg_results(dataset) # Update the mg core number with sg core number results # for easy comparison using cuDF DataFrame methods. @@ -90,13 +91,3 @@ def test_core_number(dask_client, dataset, degree_type, benchmark): counts_diffs = result_core_number.query("mg_core_number != sg_core_number") assert len(counts_diffs) == 0 - - -@pytest.mark.mg -def test_core_number_invalid_input(): - dg = karate_asymmetric.get_graph(create_using=cugraph.Graph(directed=True)) - - invalid_degree_type = 3 - - with pytest.raises(ValueError): - dcg.core_number(dg, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py index c9fb73babb8..ed3a796121c 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py @@ -156,12 +156,10 @@ def networkx_call(M, benchmark_callable=None): # FIXME: This compare is shared across several tests... it should be # a general utility -def compare(src1, dst1, val1, src2, dst2, val2): - # +def assert_results_equal(src1, dst1, val1, src2, dst2, val2): # We will do comparison computations by using dataframe # merge functions (essentially doing fast joins). We # start by making two data frames - # df1 = cudf.DataFrame() df1["src1"] = src1 df1["dst1"] = dst1 @@ -174,19 +172,18 @@ def compare(src1, dst1, val1, src2, dst2, val2): if val2 is not None: df2["val2"] = val2 - # - # Check to see if all pairs in the original data frame - # still exist in the new data frame. If we join (merge) - # the data frames where (src1[i]=src2[i]) and (dst1[i]=dst2[i]) - # then we should get exactly the same number of entries in - # the data frame if we did not lose any data. - # + # Check to see if all pairs in df1 still exist in the new (merged) data + # frame. If we join (merge) the data frames where (src1[i]=src2[i]) and + # (dst1[i]=dst2[i]) then we should get exactly the same number of entries + # in the data frame if we did not lose any data. join = df1.merge(df2, left_on=["src1", "dst1"], right_on=["src2", "dst2"]) + # Print detailed differences on test failure if len(df1) != len(join): join2 = df1.merge( df2, how="left", left_on=["src1", "dst1"], right_on=["src2", "dst2"] ) + orig_option = pd.get_option("display.max_rows") pd.set_option("display.max_rows", 500) print("df1 = \n", df1.sort_values(["src1", "dst1"])) print("df2 = \n", df2.sort_values(["src2", "dst2"])) @@ -196,6 +193,7 @@ def compare(src1, dst1, val1, src2, dst2, val2): .to_pandas() .query("src2.isnull()", engine="python"), ) + pd.set_option("display.max_rows", orig_option) assert len(df1) == len(join) @@ -485,7 +483,7 @@ def test_all_pairs_jaccard_with_topk(): worst_coeff = all_pairs_jaccard_results["jaccard_coeff"].min() better_than_k = jaccard_results[jaccard_results["jaccard_coeff"] > worst_coeff] - compare( + assert_results_equal( all_pairs_jaccard_results["first"], all_pairs_jaccard_results["second"], all_pairs_jaccard_results["jaccard_coeff"], @@ -494,7 +492,7 @@ def test_all_pairs_jaccard_with_topk(): jaccard_results["jaccard_coeff"], ) - compare( + assert_results_equal( better_than_k["first"], better_than_k["second"], better_than_k["jaccard_coeff"], diff --git a/python/pylibcugraph/pylibcugraph/core_number.pyx b/python/pylibcugraph/pylibcugraph/core_number.pyx index e754ef2c65e..48d9c5de429 100644 --- a/python/pylibcugraph/pylibcugraph/core_number.pyx +++ b/python/pylibcugraph/pylibcugraph/core_number.pyx @@ -66,14 +66,14 @@ def core_number(ResourceHandle resource_handle, referencing data and running algorithms. graph : SGGraph or MGGraph - The input graph, for either Single or Multi-GPU operations. + The input graph, for either single or multi-GPU operations. The input + graph must be symmetric (the is_symmetric property must be True). degree_type: str - This option determines if the core number computation should be based - on input, output, or both directed edges, with valid values being - "incoming", "outgoing", and "bidirectional" respectively. - This option is currently ignored in this release, and setting it will - result in a warning. + This option is currently ignored. This option may eventually determine + if the core number computation should be based on input, output, or + both directed edges, with valid values being "incoming", "outgoing", + and "bidirectional" respectively. do_expensive_check: bool If True, performs more extensive tests on the inputs to ensure @@ -98,14 +98,14 @@ def core_number(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - degree_type_map = { - "incoming": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, - "outgoing": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_OUT, - "bidirectional": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_INOUT} - + # When supported, degree_type string should be mapped to constant like so: + # degree_type_map = { + # "incoming": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, + # "outgoing": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_OUT, + # "bidirectional": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_INOUT} error_code = cugraph_core_number(c_resource_handle_ptr, c_graph_ptr, - degree_type_map[degree_type], + cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, do_expensive_check, &result_ptr, &error_ptr)