Skip to content

Commit

Permalink
include test data
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurVM committed Nov 18, 2022
1 parent 49de937 commit 478b996
Show file tree
Hide file tree
Showing 57 changed files with 108 additions and 179 deletions.
2 changes: 1 addition & 1 deletion src/Afanc/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__="v0.7-alpha"
__version__="0.8a-alpha"
21 changes: 17 additions & 4 deletions src/Afanc/autodatabase/assemblyQC.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,20 @@ def mash(args, taxon_id, fastas):
"""

mashdist_out = path.abspath(f"{taxon_id}_mashdist.txt")
mash_sketchline = f"mash sketch -o ref {' '.join(fastas)}"

## WORKING SKETCHBLOCK ##
# for fa in fastas:
# mash_sketchline = f"mash sketch -o ref {fa}"
# command(mash_sketchline, "MASH").run_comm_quiet(0, args.stdout, args.stderr)
#
# mash_distline = f"mash dist *msh> {mashdist_out}"
# command(mash_distline, "MASH").run_comm_quiet(0, args.stdout, args.stderr)

## OLD SKETCHBLOCK ##
## take only the first 1000 fastas for mash distance
## this is to avoid max args OS errors
## not ideal, but if large numbers of assemblies exist for a single tip level taxa, then it could be further split
mash_sketchline = f"mash sketch -o ref {' '.join(fastas[:500])}"
mash_distline = f"mash dist ref.msh ref.msh > {mashdist_out}"
command(mash_sketchline, "MASH").run_comm_quiet(0, args.stdout, args.stderr)
command(mash_distline, "MASH").run_comm_quiet(0, args.stdout, args.stderr)
Expand Down Expand Up @@ -101,6 +114,6 @@ def fastaMove(args, calcArray, tax, modeVal, modeRange):
# write to file list of high quality assemblies
cleanList = str(tax[0]) + "_clean.txt"
with open(cleanList, "w") as file_out:
for elem in cleanFasta:
move(elem, args.cleanFasta_WDir)
# file_out.write("%s\n" % elem)
for fasta in cleanFasta:
move(fasta, args.cleanFasta_WDir)
file_out.write(f"{fasta}\n")
1 change: 1 addition & 0 deletions src/Afanc/autodatabase/runFuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def makeK2db(args):
def makeKronaChart(args):
from Afanc.utilities.runCommands import command

## TODO: Fix this module for custom taxonomy
subprocessID = "KRONA"
vprint(
subprocessID,
Expand Down
19 changes: 9 additions & 10 deletions src/Afanc/autodatabase/taxadd.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ def addTaxon(taxname, mother_clade_taxid, names_df, nodes_df):
to the database.
"""

## max_taxid increased by a factor of 10 to it's length - 1
## this should avoid future conflicts
## max_taxid increased by 1
max_taxid = max(names_df[0])
taxid = max_taxid + ( 10**(len(str(max_taxid))-1) )
taxid = int(max_taxid + 1)

## hacky way of dealing with this, but anything above species level should not be getting introduced into the tax db anyway
if taxname.count(" ")>1:
Expand Down Expand Up @@ -88,7 +87,7 @@ def getTaxidNames(taxname, mother_clade, names_df, nodes_df):
## block for dealing with taxon missing from the ncbi taxonomy database
if taxid == None:

print(f"Cannot find {taxname} in ncbi taxonomy database.")
# print(f"Cannot find {taxname} in ncbi taxonomy database.")

### TAXADD BLOCK ###
### IN DEVELOPMENT ###
Expand All @@ -97,13 +96,13 @@ def getTaxidNames(taxname, mother_clade, names_df, nodes_df):
if mother_clade != None:
## find taxid for the mother clade
mother_clade_unformatted = mother_clade.replace("_", " ")
print(f"Attempting to find {mother_clade_unformatted} in ncbi taxonomy database...", end=" ")
# print(f"Attempting to find {mother_clade_unformatted} in ncbi taxonomy database...", end=" ")
mother_taxid = search_taxon(mother_clade_unformatted, names_df)

## if no taxon exists for the mother clade, find taxid for the genus
if mother_taxid == None:
genus = mother_clade.split("_")[0]
print(f"Attempting to find {genus} in ncbi taxonomy database...")
# print(f"Attempting to find {genus} in ncbi taxonomy database...")
genus_taxid = search_taxon(genus, names_df)

## if the genus does not exist within the ncbi taxonomy database, then call a fail
Expand All @@ -112,18 +111,18 @@ def getTaxidNames(taxname, mother_clade, names_df, nodes_df):

## else if the genus does exist within the database, add both the mother and daughter taxa
else:
print(f"Found {genus_taxid}.", end="\n")
# print(f"Found {genus_taxid}.", end="\n")
mother_taxid, names_df, nodes_df = addTaxon(mother_clade_unformatted, genus_taxid, names_df, nodes_df)
taxid, names_df, nodes_df = addTaxon(taxname, mother_taxid, names_df, nodes_df)

else:
print(f"Found {mother_taxid}.", end="\n")
# print(f"Found {mother_taxid}.", end="\n")
taxid, names_df, nodes_df = addTaxon(taxname, mother_taxid, names_df, nodes_df)

## if no mother clade is given, try to find the genus in the taxonomy database
else:
genus = taxname.split(" ")[0]
print(f"Attempting to find genus {genus} in ncbi taxonomy database...", end=" ")
# print(f"Attempting to find genus {genus} in ncbi taxonomy database...", end=" ")
genus_taxid = search_taxon(genus, names_df)

## if the genus does not exist within the ncbi taxonomy database, then call a fail
Expand All @@ -132,7 +131,7 @@ def getTaxidNames(taxname, mother_clade, names_df, nodes_df):

## else if the genus does exist within the database, add both the mother and daughter taxa
else:
print(f"Found {genus_taxid}.", end="\n")
# print(f"Found {genus_taxid}.", end="\n")
taxid, names_df, nodes_df = addTaxon(taxname, genus_taxid, names_df, nodes_df)


Expand Down
4 changes: 2 additions & 2 deletions src/Afanc/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ def initLogFiles(args):

parser_autodb.add_argument('-n', '--ncbi_date',
type=checkDate,
default="2020-05-01",
default="2022-05-01",
action='store',
help='The date of NCBI taxonomy to download. Must be of the form YYYY-05-MM. Default=2020-05-01.')
help='The date of NCBI taxonomy to download. Must be of the form YYYY-05-MM. Default=2022-05-01.')

parser_autodb.add_argument('-m', '--mode_range',
type=float,
Expand Down
5 changes: 4 additions & 1 deletion src/Afanc/screen/report/parseK2report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from os import path

from Afanc.screen.tree import Tree
from Afanc.utilities.generalUtils import gendbdict


def parseK2line(line):
Expand Down Expand Up @@ -186,6 +185,10 @@ def parseK2reportMain(args, dbdict):

base_nodes, root_node = readK2report(report_path)
best_hits = find_best_hit(root_node, args.pct_threshold, args.num_threshold, args.local_threshold)

if len(best_hits) == 0:
return None

out_json = makeJson(best_hits, args.output_prefix, args.reportsDir, args.pct_threshold, args.num_threshold, args.local_threshold, dbdict)

return out_json
130 changes: 0 additions & 130 deletions src/Afanc/screen/report/parseK2report_old.py

This file was deleted.

Loading

0 comments on commit 478b996

Please sign in to comment.