從fasta中抽取需要的序列:
Python方法一:(這種方式最慢)
由于平時(shí)工作比較忙,工作中遇到的問題都沒有及時(shí)記錄,現(xiàn)在有空時(shí)間了所以整理一下一些常見的問題及技巧供大家參考:
!/usr/bin/python
import sys
def Fasta(inputfile) :
f = open(inputfile,"r")
fastadic = {}
while True :
line = f.readline().rstrip()
if len(line) == 0 :
break
else :
pass
if line.startswith(">") :
name = line.split()[0]
fastadic[name] = ""
else :
fastadic[name]+=line
f.close()
return fastadic
def usage():
print ("1,fasta\n2,list")
if len(sys.argv) != 3 :
usage()
sys.exit()
print (sys.argv[0])
fastafile = sys.argv[1]
listfile = sys.argv[2]
dic = Fasta(fastafile)
ff=open(listfile,"r")
while 1:
line1 = ff.readline().rstrip()
if len(line1) == 0 :
break
else :
pass
array = line1.split("\t")
if ">"+array[0] in dic.keys() :
print (">"+array[0]+"\n"+dic[">"+array[0]])
else :
print ("Error:"+line1)
ff.close()
Python方法二:(次慢)
!/usr/bin/python
import sys
def Fasta(inputfile) :
f = open(inputfile,"r")
fastadic = {}
while True :
line = f.readline().rstrip()
if len(line) == 0 :
break
else :
pass
if line.startswith(">") :
name = line.split()[0]
fastadic[name] = []
else :
fastadic[name].append(line)
f.close()
return fastadic
def usage():
print ("1,fasta\n2,list")
if len(sys.argv) != 3 :
usage()
sys.exit()
print (sys.argv[0])
fastafile = sys.argv[1]
listfile = sys.argv[2]
dic = Fasta(fastafile)
ff=open(listfile,"r")
while 1:
line1 = ff.readline().rstrip()
if len(line1) == 0 :
break
else :
pass
array = line1.split("\t")
if ">"+array[0] in dic.keys() :
print (">"+array[0]+"\n"+"".join(dic[">"+array[0]]))
else :
print ("Error:"+line1)
ff.close()
Perl 方法三(最快)
!/usr/bin/perl -w
die "perl ARGV[0] ;
while (<I>) {
chomp;
if (/^>/) {
@name = split/\s+/,fasta{
fasta{
_ ;
}
}
close I;
open T,_ ;
if (exists tax[0]}) {
print ">".fasta{">".$tax[0]}, "\n" ;
}
}
close T;