Skip to content

Commit

Permalink
#73 started exploratory code
Browse files Browse the repository at this point in the history
  • Loading branch information
weka511 committed Jan 1, 2021
1 parent 712e669 commit 236ccf0
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 7 deletions.
42 changes: 35 additions & 7 deletions BA9F.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,30 @@

import argparse
from helpers import read_strings
#from snp import SuffixTree
from snp import SuffixArray
import time
from numpy import argmin

def FindLongestRepeat(string):
pass

def FindShortestNonShared(s,t):
text = s + '$' + t + '#'
r,p,lcp = SuffixArray(text,auxiliary=True,padLCP=True)
Candidates = []
Rejects = []
for i in range(len(r)):
if r[i]<= len(s)+ 2:
Candidates.append((i,text[r[i]:]))
print (f'{i:2d} {r[i]:2d} {lcp[i]:2d} {l} {text[r[i]:]}')
Pairs = []
previous_from_s = False
for i in range(len(lcp)):
from_s = r[i]<len(s)+2
if from_s and previous_from_s:
Pairs.append(i)
previous_from_s = from_s
candidate_LCPs = [lcp[i] if i in Pairs else len(lcp) for i in range(len(lcp))]
index = argmin(candidate_LCPs)
return text[r[index]:r[index]+candidate_LCPs[index]]

if __name__=='__main__':
start = time.time()
parser = argparse.ArgumentParser('BA9D Find the Longest Repeat in a String')
Expand All @@ -31,12 +49,22 @@ def FindLongestRepeat(string):
parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset')
args = parser.parse_args()
if args.sample:
print (FindLongestRepeat('ATATCGTTTTATCGTT'))
print (FindShortestNonShared('CCAAGCTGCTAGAGG','CATGCTGGGCTGGCT'))

if args.extra:
pass
Input,Expected = read_strings('data/ShortestNonSharedSubstring.txt',init=0)
Actual = FindShortestNonShared(Input[0],Input[1])
print (len(Expected[0]),len(Actual))
print (Expected[0])
print (Actual)

if args.rosalind:
pass
Input = read_strings(f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt')
Result = FindShortestNonShared(Input[0],Input[1])
print (Result)
with open(f'{os.path.basename(__file__).split(".")[0]}.txt','w') as f:
f.write(f'{Result}\n')

elapsed = time.time()-start
minutes = int(elapsed/60)
seconds = elapsed-60*minutes
Expand Down
5 changes: 5 additions & 0 deletions data/ShortestNonSharedSubstring.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Input
GAGCGATGTGTAAAGACGGGCCTAGTGTGTTATTGGTGAGGATCCGCAAATTCCGCTTTAACATCGCTCAACCGTCCACGAGCAGCTCGCGGCTTGTTTGATTTTGCTCACCCGGAACTAAGCCTTCAATAATTGCGGGACACTTCTACTTTCCATGTATTAAAACCTTATAATTCCGAGAGACTGACATTTTAACCCAGGGATTCTCGGGGCTTGTAGGTCACTGGCGTTAGCTCTGCCAATGTTCTTCATCGCCGAAATTAAACTCGTGACCTTGCCCTACGGTTTGAAACGTTGGATTCCTAGCATTCGGTGCAACGGGAGTTCCATACCAGCAGTTAAGGACCGGGTTGCCCCGTCCCACTACGAGCAGCCGTTAGAAAACAGTTCTACCGGAGGCTATCCCGCACCACGGGTTTTCTTAGTGAAAGGGACTGCGCAGCCATCGAAGAGTAGGGGGAGTCAGAGAGAGGCAGGCTTGTTGGGCTGATACATCTAGTTTACTAAATAGCCTTAATGGCGTCCCCCTCTTCGTTGATGCGCGTGGCCTGTGAAATTAGGCAGGGCCCAATGAGCAAGGCTGATTACTATCTAATTGCAGAGCGCAATGCTCTCATATATTATTATCCATGAATCTCATTTCACTAATCAGAAACGTG
CACCTCGTCAATACAACAAAAGGCGGCTCGCTTAAAGGGCGCAGCTAGTTCCTCCCCCTCTCATTGGGACATAGTCAACCTGCTAATCCGGATTCGAATGGATTATTCCGTAATTGAACGGTAATTTAGTGAGCTTCGCAGTAAACGATAGATGCGAGCTCTAGCAGGCCACTGACTATATAAACGCCAACACTAGTGCCGTGCATGGACGACTCGATGTACTATAGATTTGCACAGGTATGACCGGAGGAGCGGGACTGCCTAGGCTATAGGGAACGGGGAGTATTGGGAGCCTTTTAGGCCCTCGTCATATCCCTTAACGTTCCCGCGCAGCTAAATTGTGGAACCGGAAAACAATGGATCTGCTTATTTTTGTAGGCTTGGTTAAGCGAAACGGATCAAAATAAACAAAGAATTAATCAATGAACTAACCAACGAAGTAAGCAAGGATATACATAGATTTATTCATTGATCTATCCATCGATGTATGCATGGACACAGACTTACTCACTGACCTACCCACCGACGTACGCACGGAGAGTTAGTCAGTGAGCTAGCCAGCGAGGTAGGCAGGGTTTTCTTTGTTCCTTCGTTGCTTGGTCTCTGTCCCTCCGTCGCTCGGTGTGCCTGCGTGGCTGGGCCCCGCCGGCGCGGGGAAA
Output
AAACC

0 comments on commit 236ccf0

Please sign in to comment.