Skip to content

Commit

Permalink
fix: correct antismash version on example
Browse files Browse the repository at this point in the history
  • Loading branch information
matinnuhamunada committed Apr 22, 2024
1 parent 6c61073 commit 2804db6
Showing 1 changed file with 87 additions and 1 deletion.
88 changes: 87 additions & 1 deletion .examples/notebooks/custom_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@
"outputs": [],
"source": [
"# Define the paths to the input files\n",
"antismash_regions_file = report_directory / \"tables/df_regions_antismash_7.0.0.csv\"\n",
"antismash_regions_file = report_directory / f\"tables/df_regions_antismash_{antismash_version}.csv\"\n",
"display(Markdown(f\">`{antismash_regions_file}`\"))"
]
},
Expand Down Expand Up @@ -1143,6 +1143,92 @@
"display(HTML(filename=str(outfile)))"
]
},
{
"cell_type": "markdown",
"id": "88f5552d-f998-4b99-a86b-87b902710c7f",
"metadata": {},
"source": [
"### Adding adjacency edge\n",
"If you have a high quality (complete) genomes, it might be interesting to see the position of each BGC regions relative to each other in a genome. Unfortunately, there are limited physics-based layout algorithm in python, so it is better to use Cytoscape or Gephi to visualize the network"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce910590-50af-4505-b565-e6c555558963",
"metadata": {},
"outputs": [],
"source": [
"antismash_table = report_dir / f\"tables/df_regions_antismash_{antismash_version}.csv\"\n",
"df_antismash = pd.read_csv(antismash_table)\n",
"\n",
"# Create a new graph\n",
"filtered_graph = nx.Graph()\n",
"\n",
"# Iterate over the nodes in the original graph\n",
"for node, data in G.nodes(data=True):\n",
" # If the node meets the condition, add it to the new graph\n",
" if data.get('node_trace') == 'BGC':\n",
" filtered_graph.add_node(node, **data)\n",
"\n",
"# Now add only the edges that connect the nodes in the new graph\n",
"for u, v, data in G.edges(data=True):\n",
" if filtered_graph.has_node(u) and filtered_graph.has_node(v):\n",
" filtered_graph.add_edge(u, v, **data)\n",
"\n",
"for i in df_antismash.index:\n",
" current_bgc = df_antismash.loc[i].to_dict()\n",
" next_index = i + 1\n",
" if next_index < len(df_antismash):\n",
" neighbor_bgc = df_antismash.loc[next_index].to_dict()\n",
" if current_bgc[\"accession\"] == neighbor_bgc[\"accession\"]:\n",
" distance = neighbor_bgc[\"start_pos\"] - current_bgc[\"end_pos\"]\n",
" assert distance > 0\n",
" if not filtered_graph.has_edge(current_bgc[\"bgc_id\"], neighbor_bgc[\"bgc_id\"]):\n",
" filtered_graph.add_edge(current_bgc[\"bgc_id\"], neighbor_bgc[\"bgc_id\"], distance_bp=distance, relation_type=\"genomic_adjacency\")\n",
"\n",
"display(Markdown(get_graph_stats(filtered_graph, \"integrated_graph_with_adjacency\")))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f0ee577-3e01-4ee7-8b9d-b73420933edb",
"metadata": {},
"outputs": [],
"source": [
"# Get self-loops\n",
"self_loops = nx.selfloop_edges(filtered_graph)\n",
"\n",
"# Check if there are any self-loops\n",
"if self_loops is not None:\n",
" # Remove self-loops\n",
" filtered_graph.remove_edges_from(self_loops)\n",
"\n",
"# Iterate over the nodes of the graph, getting the node and its attributes\n",
"for n, data in filtered_graph.nodes(data=True):\n",
" # Create a list of keys to remove after iterating over the dictionary\n",
" keys_to_remove = []\n",
" # Iterate over the items in the attributes dictionary\n",
" for k, v in data.items():\n",
" # Check if the value is not of a type compatible with GraphML\n",
" if isinstance(v, (list, tuple)):\n",
" data[k] = \", \".join([str(i) for i in v])\n",
" elif v is None:\n",
" # Add the key to the list of keys to remove\n",
" keys_to_remove.append(k)\n",
" elif not isinstance(v, (int, float, str, bool, np.int64)):\n",
" print(f\"Node {n} has attribute {k} of incompatible type {type(v)}\")\n",
" # Remove the keys with None values\n",
" for key in keys_to_remove:\n",
" del filtered_graph.nodes[n][key]\n",
"\n",
"outfile = Path(f\"assets/data/bigscape_{bigscape_cutoff}_as{antismash_version}_with_genomic_position.graphml\")\n",
"outfile.parent.mkdir(parents=True, exist_ok=True)\n",
"nx.write_graphml(filtered_graph, outfile)\n",
"display(Markdown(f\"[Download Graph]({str(outfile)})\"+'{:target=\"_blank\" .md-button}'))"
]
},
{
"cell_type": "markdown",
"id": "d7cc0c3e-8b47-47b7-bfbc-31398bcefd1d",
Expand Down

0 comments on commit 2804db6

Please sign in to comment.