Skip to content

Commit

Permalink
fix: clean strange char
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu authored and matinnuhamunada committed Mar 26, 2024
1 parent 5d07bfe commit 00dc38d
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion workflow/scripts/alleleome_get_core_genes_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@
from Bio.SeqRecord import SeqRecord


def remove_special_char(s):
if "/" in str(s):
return s.replace("/", "_")
elif "'" in str(s):
return s.replace("'", "_variant")
elif "(" in str(s):
s = s.replace("(", "_")
return s.replace(")", "")
else:
return s


def load_data(roary_path):
"""
Load data from Roary output.
Expand All @@ -25,7 +37,7 @@ def load_data(roary_path):
roary_path / "df_gene_presence_locustag.csv", index_col="Gene", low_memory=False
)
df_gene_presence_locustag.index = [
str(i).replace("/", "_") for i in list(df_gene_presence_locustag.index)
remove_special_char(str(i)) for i in list(df_gene_presence_locustag.index)
]
return df_gene_presence_binary, df_gene_presence_locustag

Expand Down

0 comments on commit 00dc38d

Please sign in to comment.