a class to extract sequences from the genome
[cxgn-corelibs.git] / lib / CXGN / Tools / Tsearch.pm
blob7de5f21d1004db4eb51fde8536d12dae748d77f9
1 package CXGN::Tools::Tsearch;
2 use strict;
3 use warnings;
5 =head1 NAME
7 Functions for working with the Postgres tsearch module used for text indexing
9 =head1 SYNOPSIS
12 =head1 DESCRIPTION
14 =head1 Authors
16 Naama Menda (nm249@cornell.edu)
18 =cut
21 =head2 process_string
23 Usage: CXGN::Tsearch::process_string('some string' , 1)
24 Desc: a class function for preparing a string for matching to a tsvector field
25 Ret: a processed string
26 Args: string , and optional flag for removing spaces and dots
27 Side Effects:
28 Example:
30 =cut
32 sub process_string {
33 my $string = shift;
34 my $spaces=shift;
36 if ($spaces) {
37 print STDERR "process string about to replace spaces and dots for string $string ...\n";
38 $string =~ s/\s//g; #replace spaces with nothing
39 $string =~ s/\.//g; #replace dots with nothing - important for a multiple synonym string
42 $string =~ s/\(.*?\)//g;
43 $string =~ s/\s/&/g;
44 $string =~ s/:/&/g;
45 $string =~ s/&+/&/g;
46 $string =~ s/((\`.*?&)|(\'.*?&))/&/g;
47 $string =~ s/((\`.*?$)|(\'.*?$))//g;
48 $string =~ s/^&|&$//;
49 $string =~ s/\(|\)//g;
50 $string =~ s/\^//g;
52 #$cvterm_hash->{name} =~ s/(.+?)\s\b(\w*)\/(\w*)\b/$1&$2|$1&$3/;
53 return $string;
57 =head2 do_insert
59 Usage: $cvterm->do_insert($match_type, [pub_id, rank, headline] )
60 Desc: insert a new cvterm_pub_rank or a locus_pub_rank
61 Ret: number of lines stored in _pub_rank table
62 Args: match_type and list of lists
63 Side Effects: calls the store function
64 Example:
66 =cut
68 sub do_insert {
69 my $self=shift;
70 my $match_type=shift;
71 my $array_ref=shift;
72 my @array = @$array_ref;
73 my $count=0;
75 for my $ref(@array) {
76 my $pub_id = @$ref[0];
77 my $rank= @$ref[1];
78 my $headline=@$ref[2];
80 #Check that there is not already a locus-publication link with this match type:
81 #Look at the match_type of each row where locus_id and pub_id match the current ones.
82 #If any of these are "name_abstract" then we do not insert our result into the table.
83 $self->set_pub_id($pub_id);
84 $self->set_rank($rank);
85 $self->set_match_type($match_type);
87 $headline =~ s/\'//g; #gets rid of single quotes from headline text
88 $self->set_headline($headline);
89 my $store= $self->store(); #store function should call exists function!
90 if ($store) { $count++ ; }
92 #if (!$array) { $array=0 ; }
93 print STDERR "Found " .scalar(@array) . " lines. Inserted $count $match_type rows into _pub_ranking table**\n";
94 return $count;
98 return 1;