def orf(sequence):
# 读互补链
rev_seq = sequence[::-1].replace('C','g').replace('G','c').replace('T','a').replace('A','t').upper()
codonTable = {
'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
'TAC':'Y', 'TAT':'Y', 'TAA':'', 'TAG':'',
'TGC':'C', 'TGT':'C', 'TGA':'', 'TGG':'W',
}
pro_list = []
for start in range(len(sequence)-3):
proUeinsequence = ''
# ATG 是启动子
if codonTable[sequence[start:start+3]] == 'M':
# 开始遍历翻译
for n in range(start,len(sequence),3):
if sequence[n:n+3] in codonTable.keys():
# 拼接出字符窜
proUeinsequence += codonTable[sequence[n:n+3]]
# 遍历到最后为空
if codonTable[sequence[n:n+3]] == '':
# 如果字符窜不为空
if proUeinsequence != '':
# 把字符窜加入字典
pro_list.append(proUeinsequence)
break
# 遍历互补链
for start in range(len(rev_seq)-3):
proUeinsequence = ''
if codonTable[rev_seq[start:start+3]] == 'M':
for n in range(start,len(rev_seq),3):
if rev_seq[n:n+3] in codonTable.keys():
proUeinsequence += codonTable[rev_seq[n:n+3]]
if codonTable[rev_seq[n:n+3]] == '':
if proUeinsequence != '':
pro_list.append(proUeinsequence)
break
# 返回找到启动子的正反链的集合
return set(pro_list)
# 创建一个list,
seq_list = []
stseq = ''
for line in open('18_Open_reading_frames.txt'):
if line[0] == '>':
if stseq != '':
seq_list.append(stseq)
stseq = ''
else:
stseq = stseq + line.strip('
')
seq_list.append(stseq)
proteins = orf(seq_list[0])
for one in proteins:
print (one)
M
MLLGSFRLIPKETLIQVAGSSPCNLS
MGMTPRLGLESLLE
MTPRLGLESLLE
image.png
19. Enumerating Gene Orders(穷举排列)
image.png
解题思路
给定n,要求给出所有排列。
第一行给出排列数目,接下去输出所有排列组合
import itertools
set1 = []
n = int(input("请输入一个整数:"))
a = n*(n-1)
print(a)
for i in range(1,n+1):
set1.append(i)
b = itertools.permutations(set1,n)
for one in list(b):
print (' '.join(map(str,list(one))))
image.png