4 package CXGN
::Page
::IdentifierEngine
;
8 CXGN::Page::IdentifierMonkey - class that can do things with plaintext
9 identifier strings used for various pieces of data.
13 Things objects of this class can do:
14 - given a string, guess the namespace of an identifier
15 - given a plaintext identifier and its namespace, return an
16 HTML link to its proper information page
17 - given a string identifier, parse it into its component parts
30 use Class
::MethodMaker
32 new
=> [qw
/-init new/],
35 #use base qw/Class::Singleton/;
41 Usage: my $eng = CXGN::Page::IdentifierEngine->instance;
42 Desc : get a CXGN identifier engine
43 Ret : the identifier engine object
45 Side Effects: makes a new engine
50 #new is generated by Class::MethodMaker above
54 #put your initialization here
59 =head2 guess_namespace
61 Usage: my $ns_name = $eng->guess_namespace('SGN-U3223'); #returns 'sgn_u'
62 Desc : guess the namespace of a given identifier
63 Ret : the string name of the namespace
64 Args : string identifier
68 =head3 Supported Namespaces
71 =head3 Namespaces Not Yet Supported
73 sgn_u - SGN unigene identifiers 'SGN-U2342'
74 sgn_e - SGN EST identifiers 'SGN-E234223'
75 est - other kinds of EST identifiers 'cLEC-23-A23'
76 bac_end - BAC end identifiers 'LE_HBa0123A12_SP6_2342'
78 bac_cu - old-style Cornell BAC identifiers 'P234A23'
79 marker - marker names. good luck with this one. 'TG23'
92 Usage: $eng->get_link('SGN-U22222'); #gives a link to the unigene info page
93 Desc : given a plaintext identifier, gives a link to the info page for that
94 particular piece of data. If namespace and page name are not given,
95 it guesses the namespace and uses the default info page.
97 Args : ( identifier string, (optional) namespace name, (optional) page name )
101 =head3 Registered Pages
104 clone_info - info page for a genomic clone
105 clone_search - search for a genomic clone
106 unigene - info page for a unigene
107 est - info page for an est
116 my $est_link = "/search/est.pl?request_id=$urlencode{$id}";
117 my $marker_link = "/search/markers/markerinfo.pl?name=$urlencode{$id}";
118 my $unigene_link = "/search/unigene.pl?unigene_id=$urlencode{$id}";
119 my $ncbi_link = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=$urlencode{$id}";
120 my $arabidopsis_link = "http://www.arabidopsis.org/servlets/TairObject?type=locus&name=$urlencode{$id}";
123 "SGN(\\-|\\_)U\\d{5,8}" => ($unigene_link, "sgn_unigene"),
124 "sgn(\\-|\\_)e\\d{5,8}" => ($est_link, "sgn_est"),
125 "[A-Z]{1,2}\\d{1,3}" => ($marker_link, "sgn_rflp_marker"),
126 "C2\\_At\\dg\\d+" => ($marker_link, "sgn_cosii_marker"),
127 "At\\dg\\d+(\\.\\d+){0,1}" => ($arabidopsis_link, "tair_locus"),
128 "(tomato|lycopersicon esculentum|solanum lycopersicum)" => ("/content/sgn_data.pl#Solanumlycopersicum()", "sol_species"),
129 "(potato|solanum tuberosum)" => ("/content/sgn_data.pl#Solanumtuberosum", "sol_species"),
130 "(eggplant|solanum melongena)" => ("/content/sgn_data.pl#Solanummelongena", "sol_species"),
131 "(pepper|capsicum|capsicum annuum)" => ("/content/sgn_data.pl#Capsicumannuum", "sol_species"),
132 "(petunia|petunia hybrida)" => ("/content/sgn_data.pl#petunia", "sol_species"),
133 "(arabidopsis|arabidopsis thaliana|A.\\s+thaliana)" => ("http://www.arabidopsis.org/about/arabidopsis.jsp", "sol_species"),
134 "(coffee|coffea|coffea\\s+arabica|coffea\\s+robusta)" => ("/content/coffee.pl", "sol_species"),
137 print STDERR
"Finding the link for $id...\n";
138 foreach my $pattern (keys(%id_mapping)) {
139 if ($id =~ m/$pattern/i) {
140 print STDERR
"found link for $pattern: $id_mapping{$pattern}\n";
141 return ($id_mapping{$pattern})[0];