4 Convert between FASTA-formatted input and one-sequence-per-line output so that
5 the sequences can be easily manipulated with UNIX text tools (e.g., grep,
6 head, wc, split, sort, etc.).
8 In order for '--inverse' to work correctly, the same flags must be supplied as
9 were supplied during the forward conversion (the script does not try to
10 guess). With '--defline=after', the conversion should be perfectly
11 invertible, modulo whitespace and wrapping. For '--defline=omit', an
12 artificial defline will be constructed based on the filename and line number.
17 greylag, a collection of programs for MS/MS protein analysis
18 Copyright (C) 2006-2008 Stowers Institute for Medical Research
20 This program is free software: you can redistribute it and/or modify
21 it under the terms of the GNU General Public License as published by
22 the Free Software Foundation, either version 3 of the License, or
23 (at your option) any later version.
25 This program is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 GNU General Public License for more details.
30 You should have received a copy of the GNU General Public License
31 along with this program. If not, see <http://www.gnu.org/licenses/>.
34 Stowers Institute for Medical Research
36 Kansas City, Missouri 64110
49 # no backtrace on SIGPIPE
52 signal
.signal(signal
.SIGPIPE
, signal
.SIG_DFL
)
59 # if we're unit testing, just throw an exception
60 if __name__
!= "__main__":
61 raise Exception((s
+ " (fatal error)") % args
)
62 print >> sys
.stderr
, ("error: " + s
) % args
66 greylag
.chase_error
= error
70 print >> sys
.stderr
, "warning: %s [at %s:%s]" \
71 % (message
, fileinput
.filename(), fileinput
.filelineno())
74 def write_flattened_locus(options
, defline
, sequence
):
75 if options
.defline
== 'after':
76 print '%s%s>%s' % (sequence
, options
.delimiter
, defline
)
77 elif options
.defline
== 'before':
78 if options
.delimiter
in defline
:
79 warn("delimiter present in defline")
80 print '>%s%s%s' % (defline
, options
.delimiter
, sequence
)
86 parser
= optparse
.OptionParser(usage
="usage: %prog [options] [<file>...]",
88 parser
.add_option("-d", "--delimiter", dest
="delimiter", default
='\t',
89 help="delimiter between defline and sequence"
90 " [default TAB]", metavar
="STRING")
91 parser
.add_option("-D", "--defline", dest
="defline",
92 choices
=('before', 'after', 'omit'), default
="after",
93 help="position of defline with respect to sequence, one"
94 " of 'before', 'after' [default], or 'omit'",
96 parser
.add_option("-i", "--inverse", dest
="inverse", action
="store_true",
97 help="do the inverse transformation (flat to FASTA)")
99 parser
.add_option("-w", "--wrap", dest
="wrap", type="int",
100 default
=DEFAULT_WRAP
,
101 help="for --inverse, wrap sequence to specified width"
102 " [default %s, 0 means don't wrap at all]" % DEFAULT_WRAP
,
104 parser
.add_option("-v", "--verbose", dest
="verbose", action
="store_true",
106 parser
.add_option("--copyright", action
="store_true", dest
="copyright",
107 help="print copyright and exit")
108 options
, args
= parser
.parse_args()
114 if not options
.inverse
:
116 files
= [ sys
.stdin
]
118 files
= [ open(fn
) for fn
in args
]
121 for locusname
, defline
, sequence
in greylag
.read_fasta_file(f
):
122 write_flattened_locus(options
, defline
, sequence
)
124 for line
in fileinput
.input(args
):
125 if options
.defline
!= 'omit':
126 parts
= line
.split(options
.delimiter
, 1)
128 error("input line lacks delimiter")
129 if options
.defline
== 'before':
130 defline
, sequence
= parts
132 sequence
, defline
= parts
135 defline
= "%s:%s" % (fileinput
.filename(),
136 fileinput
.filelineno())
137 sequence
= sequence
.strip()
138 print defline
.strip()
140 for start
in range(0, len(sequence
), options
.wrap
):
141 print sequence
[start
:start
+options
.wrap
]
146 if __name__
== '__main__':