From 2804db61738fafab5ba183b6bdc4a77abf25df16 Mon Sep 17 00:00:00 2001 From: Matin Nuhamunada Date: Mon, 22 Apr 2024 23:22:47 +0200 Subject: [PATCH] fix: correct antismash version on example --- .examples/notebooks/custom_tutorial.ipynb | 88 ++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/.examples/notebooks/custom_tutorial.ipynb b/.examples/notebooks/custom_tutorial.ipynb index f6ec332b..9c91b52b 100644 --- a/.examples/notebooks/custom_tutorial.ipynb +++ b/.examples/notebooks/custom_tutorial.ipynb @@ -219,7 +219,7 @@ "outputs": [], "source": [ "# Define the paths to the input files\n", - "antismash_regions_file = report_directory / \"tables/df_regions_antismash_7.0.0.csv\"\n", + "antismash_regions_file = report_directory / f\"tables/df_regions_antismash_{antismash_version}.csv\"\n", "display(Markdown(f\">`{antismash_regions_file}`\"))" ] }, @@ -1143,6 +1143,92 @@ "display(HTML(filename=str(outfile)))" ] }, + { + "cell_type": "markdown", + "id": "88f5552d-f998-4b99-a86b-87b902710c7f", + "metadata": {}, + "source": [ + "### Adding adjacency edge\n", + "If you have a high quality (complete) genomes, it might be interesting to see the position of each BGC regions relative to each other in a genome. Unfortunately, there are limited physics-based layout algorithm in python, so it is better to use Cytoscape or Gephi to visualize the network" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce910590-50af-4505-b565-e6c555558963", + "metadata": {}, + "outputs": [], + "source": [ + "antismash_table = report_dir / f\"tables/df_regions_antismash_{antismash_version}.csv\"\n", + "df_antismash = pd.read_csv(antismash_table)\n", + "\n", + "# Create a new graph\n", + "filtered_graph = nx.Graph()\n", + "\n", + "# Iterate over the nodes in the original graph\n", + "for node, data in G.nodes(data=True):\n", + " # If the node meets the condition, add it to the new graph\n", + " if data.get('node_trace') == 'BGC':\n", + " filtered_graph.add_node(node, **data)\n", + "\n", + "# Now add only the edges that connect the nodes in the new graph\n", + "for u, v, data in G.edges(data=True):\n", + " if filtered_graph.has_node(u) and filtered_graph.has_node(v):\n", + " filtered_graph.add_edge(u, v, **data)\n", + "\n", + "for i in df_antismash.index:\n", + " current_bgc = df_antismash.loc[i].to_dict()\n", + " next_index = i + 1\n", + " if next_index < len(df_antismash):\n", + " neighbor_bgc = df_antismash.loc[next_index].to_dict()\n", + " if current_bgc[\"accession\"] == neighbor_bgc[\"accession\"]:\n", + " distance = neighbor_bgc[\"start_pos\"] - current_bgc[\"end_pos\"]\n", + " assert distance > 0\n", + " if not filtered_graph.has_edge(current_bgc[\"bgc_id\"], neighbor_bgc[\"bgc_id\"]):\n", + " filtered_graph.add_edge(current_bgc[\"bgc_id\"], neighbor_bgc[\"bgc_id\"], distance_bp=distance, relation_type=\"genomic_adjacency\")\n", + "\n", + "display(Markdown(get_graph_stats(filtered_graph, \"integrated_graph_with_adjacency\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f0ee577-3e01-4ee7-8b9d-b73420933edb", + "metadata": {}, + "outputs": [], + "source": [ + "# Get self-loops\n", + "self_loops = nx.selfloop_edges(filtered_graph)\n", + "\n", + "# Check if there are any self-loops\n", + "if self_loops is not None:\n", + " # Remove self-loops\n", + " filtered_graph.remove_edges_from(self_loops)\n", + "\n", + "# Iterate over the nodes of the graph, getting the node and its attributes\n", + "for n, data in filtered_graph.nodes(data=True):\n", + " # Create a list of keys to remove after iterating over the dictionary\n", + " keys_to_remove = []\n", + " # Iterate over the items in the attributes dictionary\n", + " for k, v in data.items():\n", + " # Check if the value is not of a type compatible with GraphML\n", + " if isinstance(v, (list, tuple)):\n", + " data[k] = \", \".join([str(i) for i in v])\n", + " elif v is None:\n", + " # Add the key to the list of keys to remove\n", + " keys_to_remove.append(k)\n", + " elif not isinstance(v, (int, float, str, bool, np.int64)):\n", + " print(f\"Node {n} has attribute {k} of incompatible type {type(v)}\")\n", + " # Remove the keys with None values\n", + " for key in keys_to_remove:\n", + " del filtered_graph.nodes[n][key]\n", + "\n", + "outfile = Path(f\"assets/data/bigscape_{bigscape_cutoff}_as{antismash_version}_with_genomic_position.graphml\")\n", + "outfile.parent.mkdir(parents=True, exist_ok=True)\n", + "nx.write_graphml(filtered_graph, outfile)\n", + "display(Markdown(f\"[Download Graph]({str(outfile)})\"+'{:target=\"_blank\" .md-button}'))" + ] + }, { "cell_type": "markdown", "id": "d7cc0c3e-8b47-47b7-bfbc-31398bcefd1d",