added sol100 and chado cvterm pages to validate_all.t
[sgn.git] / lib / CXGN / Page / IdentifierEngine.pm
blobdb2508146c8f7c29b1b63eeb9b91e24aede59e49
4 package CXGN::Page::IdentifierEngine;
6 =head1 NAME
8 CXGN::Page::IdentifierMonkey - class that can do things with plaintext
9 identifier strings used for various pieces of data.
11 =head1 DESCRIPTION
13 Things objects of this class can do:
14 - given a string, guess the namespace of an identifier
15 - given a plaintext identifier and its namespace, return an
16 HTML link to its proper information page
17 - given a string identifier, parse it into its component parts
19 =head1 SYNOPSIS
21 coming soon
23 =cut
25 use strict;
26 use warnings;
28 our %urlencode;
29 use Tie::UrlEncoder;
30 use Class::MethodMaker
32 new => [qw/-init new/],
35 #use base qw/Class::Singleton/;
37 =head1 METHODS
39 =head2 new
41 Usage: my $eng = CXGN::Page::IdentifierEngine->instance;
42 Desc : get a CXGN identifier engine
43 Ret : the identifier engine object
44 Args : none
45 Side Effects: makes a new engine
46 Example:
48 =cut
50 #new is generated by Class::MethodMaker above
52 sub init {
53 my $self = shift;
54 #put your initialization here
59 =head2 guess_namespace
61 Usage: my $ns_name = $eng->guess_namespace('SGN-U3223'); #returns 'sgn_u'
62 Desc : guess the namespace of a given identifier
63 Ret : the string name of the namespace
64 Args : string identifier
65 Side Effects: none
66 Example:
68 =head3 Supported Namespaces
71 =head3 Namespaces Not Yet Supported
73 sgn_u - SGN unigene identifiers 'SGN-U2342'
74 sgn_e - SGN EST identifiers 'SGN-E234223'
75 est - other kinds of EST identifiers 'cLEC-23-A23'
76 bac_end - BAC end identifiers 'LE_HBa0123A12_SP6_2342'
77 bac - BAC identifiers
78 bac_cu - old-style Cornell BAC identifiers 'P234A23'
79 marker - marker names. good luck with this one. 'TG23'
81 =cut
85 sub guess_namespace {
86 my $self = shift;
90 =head2 get_link
92 Usage: $eng->get_link('SGN-U22222'); #gives a link to the unigene info page
93 Desc : given a plaintext identifier, gives a link to the info page for that
94 particular piece of data. If namespace and page name are not given,
95 it guesses the namespace and uses the default info page.
96 Ret :
97 Args : ( identifier string, (optional) namespace name, (optional) page name )
98 Side Effects: none
99 Example:
101 =head3 Registered Pages
103 To Do:
104 clone_info - info page for a genomic clone
105 clone_search - search for a genomic clone
106 unigene - info page for a unigene
107 est - info page for an est
109 =cut
111 sub get_link {
112 my $self = shift;
113 my $id = shift;
116 my $est_link = "/search/est.pl?request_id=$urlencode{$id}";
117 my $marker_link = "/search/markers/markerinfo.pl?name=$urlencode{$id}";
118 my $unigene_link = "/search/unigene.pl?unigene_id=$urlencode{$id}";
119 my $ncbi_link = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=$urlencode{$id}";
120 my $arabidopsis_link = "http://www.arabidopsis.org/servlets/TairObject?type=locus&name=$urlencode{$id}";
122 my %id_mapping = (
123 "SGN(\\-|\\_)U\\d{5,8}" => ($unigene_link, "sgn_unigene"),
124 "sgn(\\-|\\_)e\\d{5,8}" => ($est_link, "sgn_est"),
125 "[A-Z]{1,2}\\d{1,3}" => ($marker_link, "sgn_rflp_marker"),
126 "C2\\_At\\dg\\d+" => ($marker_link, "sgn_cosii_marker"),
127 "At\\dg\\d+(\\.\\d+){0,1}" => ($arabidopsis_link, "tair_locus"),
128 "(tomato|lycopersicon esculentum|solanum lycopersicum)" => ("/content/sgn_data.pl#Solanumlycopersicum()", "sol_species"),
129 "(potato|solanum tuberosum)" => ("/content/sgn_data.pl#Solanumtuberosum", "sol_species"),
130 "(eggplant|solanum melongena)" => ("/content/sgn_data.pl#Solanummelongena", "sol_species"),
131 "(pepper|capsicum|capsicum annuum)" => ("/content/sgn_data.pl#Capsicumannuum", "sol_species"),
132 "(petunia|petunia hybrida)" => ("/content/sgn_data.pl#petunia", "sol_species"),
133 "(arabidopsis|arabidopsis thaliana|A.\\s+thaliana)" => ("http://www.arabidopsis.org/about/arabidopsis.jsp", "sol_species"),
134 "(coffee|coffea|coffea\\s+arabica|coffea\\s+robusta)" => ("/content/coffee.pl", "sol_species"),
137 print STDERR "Finding the link for $id...\n";
138 foreach my $pattern (keys(%id_mapping)) {
139 if ($id =~ m/$pattern/i) {
140 print STDERR "found link for $pattern: $id_mapping{$pattern}\n";
141 return ($id_mapping{$pattern})[0];
153 1;#do not remove