8 Various tools for interpreting and displaying text strings.
14 Takes a list, puts it into a string with commas and the word "and" before the last item.
18 Takes a string, returns 1 if the string is all letters, 0 if not.
22 Takes a string, tests to see if it meets this pattern: optional + or
23 -, 0 or more digits, followed by either: "." and one or more digits,
24 or, just one or more digits. This should catch most normal ways that a
25 user would enter a number. This function might be improved by
26 returning the number that was contained in the string instead of just
27 "1" (in case perl can't cast it on its own... i've never checked to
28 see if perl can parse an initial "+" for instance)
32 Takes a string and returns the string without leading or trailing
35 =head2 remove_all_whitespaces
37 Takes a string and returns it without any whitespaces in it at all
38 anymore. If you sent in a spaced sentence, your spaces would be
41 =head2 strip_unprintables
43 [NOT YET IMPLEMENTED -apparently as of March 2009] This function is
44 still under development. It is meant to clean input of escape
45 characters in preparation for display, database insertion,
46 etc. However, different machines apparently are working with different
47 character sets, so the higher characters cannot be cleaned reliably
48 (when i tried to clean higher characters, this function produced
49 different output on my machine than it did on the devel machine). For
50 now, it just cleans out the lower characters.
55 Desc: abbreviate some latin words in your string and return
56 the new abbreviated version
58 Ret : string with abbreviations
62 my $tomato = 'Lycopersicon esculentum';
63 my $abbr = abbr_latin($tomato);
65 #will print 'L. esculentum'
67 Currently abbreviates Solanum, Lycopersicon, Capsicum, Nicotiana,
72 package CXGN
::Tools
::Text
;
84 remove_all_whitespaces
95 use base qw
/Exporter/;
97 #returns the contents of the array in a string of the form "$_[0], $_[1],...., and $_[end]"
100 : ( @_ == 1 ) ?
$_[0]
101 : ( @_ == 2 ) ?
join( " and ", @_ )
102 : join( ", ", @_[ 0 .. ( $#_ - 1 ) ], "and $_[-1]" );
105 #test a string to see if it is one continuous string of letters
108 if ( defined($string)
109 && $string =~ /^[A-Za-z]+$/i
110 ) #if there are one or more letters with no spaces in the string
117 #test a string to see if it is a number
120 if ( defined($string)
121 && $string =~ /^([+\-]?)\d*(\.\d+|\d+)$/
122 ) #optional + or -, 0 or more digits, followed by (. and one or more digits) or (just one or more digits)
129 #trim whitespace from string
132 $string =~ s/^\s+|\s+$//g if defined $string;
136 #remove_all all whitespace in string
137 sub remove_all_whitespaces
{
139 if ( defined($string) ) {
147 if ( defined($string) ) {
148 $string =~ s/Solanum/S\./g;
149 $string =~ s/Lycopersicon/L\./g;
150 $string =~ s/Capsicum/C\./g;
151 $string =~ s/Nicotiana/N\./g;
152 $string =~ s/Coffea/C\./g;
157 =head2 sanitize_string
159 Usage: my $sanitized = sanitize_string($dirty)
160 Desc: removes {, }, <, >, and ; characters from
161 string $dirty and returns the sanitized
168 sub sanitize_string
{
171 $s =~ s/\}|\{|\>|\<|\;//g;
176 =head2 function format_field_text()
182 Description: formats a post or topic text for display.
183 Note that it converts certain embedded tags to
184 html links. This function does not assure security
185 - use the get_encoded_arguments in the CXGN::Page
186 object for that purpose.
190 [link][ref][\ref][\link] the difference between [link] and [ilink] is that [link] add
191 [ilink][ref][\ref][\ilink] http:// if do not find it. [ilink] not.
198 sub format_field_text
{
199 my $post_text = shift;
201 # support vB script url tag
202 while ($post_text =~ /\[url\](.*?)\[\/url\
]/g
) {
204 my $replace_link = $link;
205 if ($link !~ /^http/i) {
206 $replace_link = "http:\/\/$link";
209 $post_text =~ s/\[url\]$link\[\/url\]/\
<a href
=\"$replace_link\"\
>$replace_link\
<\
/a\>/g;
212 while ($post_text =~ /\[link\](.*?)\[ref\](.*?)\[\/ref\
]\
[\
/link\]/g ) {
215 my $replace_link = $link;
216 if ($link !~ /^http/i) {
217 $replace_link = "http:\/\/$link";
220 $post_text =~ s/\[link\]$link\[ref\]$ref\[\/ref\]\[\/link\
]/\<a href=\"$replace_link\"\>$ref<\/a\
>/g
;
222 ## New tag, internal link. [ilink] that works in the same way that link but do not any http:// if do not find it
223 while ($post_text =~ /\[ilink\](.*?)\[ref\](.*?)\[\/ref\
]\
[\
/ilink\]/g ) {
226 my $replace_link = $link;
228 $post_text =~ s/\[ilink\]$link\[ref\]$ref\[\/ref\]\[\/ilink\
]/\<a href=\"$replace_link\"\>$ref<\/a\
>/g
;
231 while ($post_text =~ /\[i\](.*?)\[\/i\
]/g
) {
233 my $replace_text = $itext;
236 $post_text =~ s/\[i\]$itext\[\/i\]/\
<i\
>$replace_text\
<\
/i\>/g;
238 # convert newlines to <br /> tags
240 $post_text =~ s/\n/\<br \/\
>/g
;
246 =head2 to_tsquery_string
248 Desc: format a plain-text string for feeding to Postgres to_tsquery
250 Args: list of strings to convert
251 Ret : in scalar context: the first converted string,
252 in list context: list of converted strings
256 my $teststring = 'gi|b4ogus123|blah is bogus & I hate it!';
257 to_tsquery_string($teststring);
258 #returns 'gi\\|b4ogus123\\|blah|is|bogus|\\&|I|hate|it\\!'
262 sub to_tsquery_string
{
270 # Escape ampersands and exclamation points
273 # Escape parentheses and colons.
276 # And together all strings
281 =head2 from_tsquery_string
283 Desc: attempt to recover the original string from the product
284 of to_tsquery_string()
285 Args: list of strings
286 Ret : list of de-munged strings
292 sub from_tsquery_string
{
296 next unless defined $_;
297 s/(?<!\\)&/ /g; #& not preceded by backslashes is a space
298 s/\\\\([^\\])/$1/g; #anything double-backslashed
299 s/\\(.)/$1/g; #anything single-backslashed
301 return wantarray ?
@args : $args[0];
304 =head2 parse_pg_arraystr
306 Usage: my $arrayref = parse_pg_arraystr('{1234,543}');
307 Desc : parse the string representation of a postgres array, returning
309 Args : string representation of postgres array
315 sub parse_pg_arraystr
{
318 return [] unless $str;
322 #remove leading and trailing braces
324 or croak
"malformed array string '$origstr'";
326 or croak
"malformed array string '$origstr'";
330 if ( $str =~ /^"/ ) {
342 =head2 commify_number
345 Ret : a string containing the commified version of it
347 Example: commify_number(230400) returns '230,400'
353 return undef unless defined $_;
354 1 while s/^(-?\d+)(\d{3})/$1,$2/;
359 =head2 truncate_string
361 Desc: truncate a string that might be long so that it fits in a manageable
362 length, adding an arbitrary string (default '…') to the end if
363 necessary. If the string is shorter than the given truncation
364 length, simply returns the string unaltered. If the truncated
365 string would have whitespace between the end of the given
366 string and the addon string, drops that whitespace.
367 Args: string to truncate, optional truncation length (default 50),
368 optional truncation addon (default '...')
369 Ret : in scalar context: truncated string
370 in list context: (truncated string,
371 boolean telling whether string was truncated)
374 truncate_string('Honk if you love ducks',6);
378 truncate_string('Honk if you love cats',5);
381 #because this function drops trailing whitespace
385 sub truncate_string
{
386 my ($string,$length,$addon) = @_;
390 my $was_truncated = 0;
391 if( length($string) > $length) {
392 $string = substr($string,0,$length).$addon;
396 return wantarray ?
($string,$was_truncated) : $string;
401 john binns - zombieite@gmail.com
402 Robert Buels - rmb32@cornell.edu