-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreadsRetrieval.py
executable file
·76 lines (66 loc) · 3.14 KB
/
readsRetrieval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
#*****************************************************************************
# Name: MTG-Link
# Description: Local assembly tool for linked-reads data
# Copyright (C) 2020 INRAE
# Author: Anne Guichard
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#*****************************************************************************
"""Module 'readsRetrieval.py': Reads Retrieval
The module 'readsRetrieval.py' enables to retrieve the reads whose barcode is observed in chunk/flank regions surrounding the gap/target.
"""
from __future__ import print_function
import os
import re
import subprocess
import sys
#----------------------------------------------------
# retrieveReadsWithLRezQueryFastq function
#----------------------------------------------------
def retrieveReadsWithLRezQueryFastq(gfa_name, reads, index, allBarcodesLists, threads):
"""
To retrieve the reads associated to the barcodes extracted on chunk/flank regions, with `LRez query fastq`.
`LRez query fastq` enables to query a barcodes index and a fastq file to retrieve alignments containing the query barcodes.
Args:
- gfa_name: str
name of the GFA file containing the gaps' coordinates
- reads: file
barcoded FASTQ file of linked reads
- index: file
index of barcodes
- gapLabel: str
label of the gap/target
- allBarcodesLists: file
file containing all the lists of the extracted barcodes (all the lists obtained for all gaps/targets)
- threads: int
number of threads to use
"""
try:
# LRez query fastq.
command = ["LRez", "query", "fastq", "--fastq", reads, "--index", index, "--collectionOfLists", allBarcodesLists, "--threads", str(threads), "--gzip"]
queryFastqLog = str(gfa_name) + "_LRezQueryFastq.log"
try:
with open(queryFastqLog, "a") as log:
subprocess.run(command, stderr=log)
except IOError as err:
print("File 'readsRetrieval.py', function 'retrieveReadsWithLRezQueryFastq()': Unable to open or write to the file {}. \nIOError-{}".format(str(queryFastqLog), err))
sys.exit(1)
# Remove the raw files obtained from `LRez query fastq`.
if os.path.getsize(queryFastqLog) == 0:
subprocess.run(["rm", queryFastqLog])
except Exception as e:
print("File 'readsRetrieval.py': Something wrong with the function 'retrieveReadsWithLRezQueryFastq()'")
print("Exception-{}".format(e))
sys.exit(1)