5 CXGN::Phylo::Species_name_map - a package that handles variant species names (e.g. tomato, Lycopersicon_esculentum, Solanum_lycopersicum)
8 This is for handling different species names that may be used. There is a hash whose keys are variants, and
9 values are the standard versions of the species names.
10 A default hash is set up in new. You can add other variant/standard pairs. The keys are all
11 in a standard format with words separated by _, initial letter uc, others lc. e.g. if $sname = POTATO
12 get_standard_name($sname) will transform this to Potato, and return the value corresponding to this key,
13 which would be Solanum_tuberosum.
17 Tom York (tly2@cornell.edu)
21 This class implements the following functions:
25 package CXGN
::Phylo
::Species_name_map
;
26 # this is a class to map variant species names to standard species names
30 Synopsis: $snm->CXGN::Phylo::Species_name_map->new();
32 Returns: ref to newly constructed Species_name_map object with a default set of key/value (species/std species) pairs.
33 Description: The objects hash has a key name_hash, whose corresponding value is a reference to a hash whose
34 keys are variant species names and values are standard species names. Both the keys and values of name_hash are
35 in a standard format using _ as separator, and all lowercase except first char. which is uppercase (see to_standard_format).
42 my $self = bless $args, $class;
44 %{$self->{name_hash
}} = ();
45 #list of default standard names
47 'Solanum_lycopersicum',
56 'Arabidopsis_thaliana',
59 'Brachypodium_distachyon',
60 'Medicago_truncatula',
67 'Populus_trichocarpa',
68 'Selaginella_moellendorffii',
70 'Chlamydomonas_reinhardtii',
71 'Physcomitrella_patens',
73 'Amborella_trichopoda',
74 'Phoenix_dactylifera',
86 'Gossypium_raimondii',
88 'Thellungiella_parvula',
100 'Linum_usitatissimum'
103 # first set up some standard name associations
104 foreach my $s (@std_species) {
105 $self->set_standard_name($s, $s);
108 $self->set_standard_name('tomato', 'Solanum_lycopersicum');
109 $self->set_standard_name('potato', 'Solanum_tuberosum');
110 $self->set_standard_name('eggplant', 'Solanum_melongena');
111 $self->set_standard_name('pepper', 'Capsicum_annuum');
112 $self->set_standard_name('tobacco', 'Nicotiana_tabacum');
113 $self->set_standard_name('petunia', 'Petunia'); # species?
114 $self->set_standard_name('sweet_potato', 'Ipomoea_batatas');
115 $self->set_standard_name('coffee', 'Coffea_arabica'); #what about C. canephora?
116 $self->set_standard_name('arabica_coffee', 'Coffea_arabica'); #what about C. canephora?
117 $self->set_standard_name('robusta_coffee', 'Coffea_canephora');
118 $self->set_standard_name('rice', 'Oryza_sativa');
119 $self->set_standard_name('brachypodium', 'Brachypodium_distachyon');
121 $self->set_standard_name('snapdragon', 'Antirrhinum'); #species Majus?
122 $self->set_standard_name('arabidopsis', 'Arabidopsis_thaliana');
124 $self->set_standard_name('Solanum betaceum', 'Solanum_betaceum');
125 $self->set_standard_name('tamarillo', 'Solanum_betaceum');
127 $self->set_standard_name('Physalis philadelphica', 'Physalis_philadelphica');
128 $self->set_standard_name('tomatillo', 'Physalis_philadelphica');
130 $self->set_standard_name('Lycopersicon Esculentum', $self->get_standard_name('tomato'));
131 $self->set_standard_name('selaginella', 'Selaginella_moellendorffii');
132 $self->set_standard_name('maize', 'Zea_mays');
133 $self->set_standard_name('sorghum', 'Sorghum_bicolor');
134 $self->set_standard_name('castorbean', 'Ricinus_communis');
135 $self->set_standard_name('grape', 'Vitis_vinifera');
136 $self->set_standard_name('papaya', 'Carica_papaya');
137 $self->set_standard_name('soy', 'Glycine_max');
138 $self->set_standard_name('soybean', 'Glycine_max');
139 $self->set_standard_name('medicago', 'Medicago_truncatula');
140 $self->set_standard_name('poplar', 'Populus_trichocarpa');
142 $self->set_standard_name('chlamydomonas', 'Chlamydomonas_reinhardtii');
143 $self->set_standard_name('physcomitrella', 'Physcomitrella_patens');
144 $self->set_standard_name('loblolly_pine', 'Pinus_taeda');
145 $self->set_standard_name('amborella', 'Amborella_trichopoda');
146 $self->set_standard_name('foxtail_millet', 'Setaria_italica');
147 $self->set_standard_name('date_palm', 'Phoenix_dactylifera');
148 $self->set_standard_name('wheat', 'Triticum_aestivum');
149 $self->set_standard_name('barley', 'Hordeum_vulgare');
151 $self->set_standard_name('peach', 'Prunus_persica');
152 $self->set_standard_name('peanut', 'Arachis_hypogaea');
153 $self->set_standard_name('pigeon_pea', 'Cajanus_cajan');
154 $self->set_standard_name('lotus', 'Lotus_japonica');
155 $self->set_standard_name('apple', 'Malus_domestica');
156 $self->set_standard_name('cannabis', 'Cannabis_sativa');
157 $self->set_standard_name('hemp', 'Cannabis_sativa');
158 $self->set_standard_name('woodland_strawberry', 'Fragaria_vesca');
159 $self->set_standard_name('cucumber', 'Cucumis_sativus');
160 $self->set_standard_name('flax', 'Linum_usitatissimum');
161 $self->set_standard_name('cassava', 'Manihot_esculenta');
162 # print STDOUT "in CXGN::Phylo::Species_name_map->new(). tomato std name: ", $self->get_standard_name("tomato"), "\n";
166 =head2 function set_standard_name
168 Synopsis: $snm->set_standard_name("tomatillo", "Physalis philadelphica")
169 Arguments: List of two strings; the second becomes the standard species name corresponding to the first.
171 Side effects: Stores a key value pair in the hash
172 Description: Both arguments are transformed by to_standard_format; these transformed args become a key-value pair of name_hash.
176 sub set_standard_name
{
180 $var = CXGN
::Phylo
::Species_name_map
->to_standard_format($var); #so hash keys are in standard format e.g. " solanum LYCOPERSICUM " -> "Solanum_lycopersicum"
181 $std = CXGN
::Phylo
::Species_name_map
->to_standard_format($std); # so hash vals are in standard format
182 $self->{name_hash
}->{$var} = $std;
185 =head2 function get_standard_name
187 Synopsis: my $std_species = $snm->get_standard_name("tomatillo")
188 Arguments: A species name string;
189 Returns: The corresponding standard name.
190 Description: The argument is transformed by to_standard_format, and this becomes a key for which the value is returned.
194 sub get_standard_name
{
197 $var = CXGN
::Phylo
::Species_name_map
->to_standard_format($var); #e.g. " solanum LYCOPERSICUM " -> "Solanum_lycopersicum"
198 return $self->{name_hash
}->{$var};
201 =head2 function copy()
203 Synopsis: my $snm_copy = $snm->copy()
204 Arguments: a Species_name_map object
205 Returns: a Species_name_map object, a copy of $snm
206 Description: Starts with a new default object, and copies the hash to it
210 sub copy
{ # just copy the hash
212 my $new = CXGN
::Phylo
::Species_name_map
->new();
213 foreach my $k (keys %$self) {
214 $new->set_standard_name($k, $self->get_standard_name($k));
219 =head2 function to_standard_format
221 Synopsis: my $str = CXGN::Phylo::Species_name_map->to_standard_format($sp_name);
223 Returns: A string put into a standard format by removing leading and trailing whitespace, splitting at whitespace or _,
224 making lc, joining with _, then make first char uc. E.g.: " solanum _TUBEROSUM " becomes "Solanum_tuberosum".
225 Note that applying this multiple times gives same result as applying once.
226 Description: The idea is to have all the hash keys of a Species_name_map be in this format, so that some minor variations
227 (e.g. potato, Potato) would share the same key
231 sub to_standard_format
{ # remove initial, final whitespace, replace whitespace and _ with single space separating pieces which are ucfirst lc
234 if (defined $species) {
235 $species =~ s/^\s+//; # remove initial whitespace
236 $species =~ s/\s+$//; # remove final whitespace
237 my @word_list = split(/[\s_]+/, $species); # split on _, whitespace
238 map($_ = lc $_, @word_list); # -> all lowercase
239 $species = join("_", @word_list); # join with _'s
240 $species = ucfirst $species; # Capitalize first letter (genus name)
245 sub to_display_format
{
252 =head2 function get_map_string
254 Synopsis: my $str = $snm->get_map_string()
255 Arguments: a Species_name_map object
256 Returns: A string with the keys and values (standard species) of the hash in form:
257 (potato => Solanum_tuberosum, tomato => Solanum_lycopersicum)
265 foreach my $s (keys %{$self->{name_hash
}}) {
266 $string .= $s . " => " . $self->get_standard_name($s) . ", ";
268 $string =~ s/,\s*$//g; # eliminate final comma and whitespace