Added to perldoc instructions about the use of the environment variables.
[cxgn-corelibs.git] / lib / Chado / Builder.pm
blobceef37bf8ed6aae571b3e4e127393f384fe077f3
1 # $Id: Builder.pm,v 1.34 2007/02/20 16:23:53 briano Exp $
2 package Chado::Builder;
3 # vim: set ft=perl ts=2 expandtab:
5 use strict;
6 use base 'Module::Build';
7 use Carp;
8 use Data::Dumper;
9 use File::Spec::Functions 'catfile';
10 use File::Path;
11 use File::Copy;
12 use Data::Dumper;
13 use Template;
14 use XML::Simple;
15 use LWP::Simple qw(mirror is_success status_message);
16 use Log::Log4perl;
17 use DBI;
18 Log::Log4perl::init('load/etc/log.conf');
19 no warnings;
21 =head1 ACTIONS
23 = item prepdb()
25 Calls the psql command and pipes in the contents of the
26 load/etc/initialize.sql file. Put any insert statements that
27 your data load needs here.
29 =item ncbi()
31 Load action for all NCBI data.
33 =item mageml()
35 fixfixfix
37 =item ontologies()
39 loads ontologies by running gmod_load_ontology.pl on all files in
40 $(DATA)/ontology
42 =item tokenize()
44 processes templates specified in configuration file, filling in
45 platform-specific variable values
47 =item _last
49 =cut
51 =head2 ACTION_prepdb
53 Title : ACTION_prepdb
54 Usage :
55 Function: Executes any SQL statements in the load/etc/initialize.sql file.
56 Example :
57 Returns :
58 Args :
60 =cut
62 sub ACTION_prepdb {
63 # the build object $m
64 my $m = shift;
65 # the XML config object
66 my $conf = $m->conf;
68 $m->log->info("entering ACTION_prepdb");
70 my $db_name = $conf->{'database'}{'db_name'} || '';
71 my $db_host = $conf->{'database'}{'db_host'} || '';
72 my $db_port = $conf->{'database'}{'db_port'} || '';
73 my $db_user = $conf->{'database'}{'db_username'} || '';
74 my $build_dir = $conf->{'build'}{'working_dir'} || '';
75 my $init_sql = catfile( $build_dir, 'load', 'etc', 'initialize.sql' );
76 my $sys_call = "psql -h $db_host -p $db_port -U $db_user -f $init_sql $db_name";
78 $m->log->debug("system call: $sys_call");
80 system( $sys_call ) == 0 or croak "Error executing '$sys_call': $?";
82 $m->log->info("leaving ACTION_prepdb");
85 =head2 ACTION_ncbi
87 Title : ACTION_ncbi
88 Usage :
89 Function: Load action for all NCBI data.
90 Example :
91 Returns :
92 Args :
94 =cut
95 sub ACTION_ncbi {
96 # the build object $m
97 my $m = shift;
98 # the XML config object
99 my $conf = $m->conf;
101 $m->log->info("entering ACTION_ncbi");
103 # print out the available refseq datasets
104 my %ncbis = printAndReadOptions($m,$conf,"ncbi");
106 # now that I know what you want mirror files and load
107 # fetchAndLoadFiles is called for each possible type
108 # but only actively loaded for those the user selects
109 fetchAndLoadFiles($m, $conf, "refseq", "./load/bin/load_gff3.pl --organism Human --srcdb DB:refseq --gfffile", \%ncbis);
110 fetchAndLoadFiles($m, $conf, "locuslink", "./load/bin/load_locuslink.pl", \%ncbis);
111 $m->log->info("leaving ACTION_ncbi");
114 sub ACTION_mageml {
115 my $m = shift;
116 my $conf = $m->conf;
118 $m->log->info("entering ACTION_mageml");
120 print "Available MAGE-ML annotation files:\n";
122 my $i = 1;
123 my %ml = ();
124 foreach my $mageml ( sort keys %{ $conf->{mageml} } ) {
125 $ml{$i} = $mageml;
126 print "[$i] $mageml\n";
127 $i++;
129 print "\n";
131 my $chosen = $m->prompt(
132 "Which ontologies would you like to load (Comma delimited)? [0]"
134 $m->notes( 'affymetrix' => $chosen );
136 my %mageml = map { $ml{$_} => $conf->{mageml}{ $ml{$_} } } split ',', $chosen;
138 foreach my $mageml ( keys %mageml ) {
139 print "fetching files for $mageml\n";
141 my $load = 0;
142 foreach my $file ( @{ $mageml{$mageml}{file} } ) {
144 my $fullpath = catfile $conf->{path}{data}, $file->{local};
145 $fullpath =~ s!^(.+)/[^/]*!$1!;
147 unless ( -d $fullpath ) {
148 $m->log->debug("mkpath $fullpath");
149 mkpath( $fullpath, 0, 0711 )
150 or print "Couldn't make path '$fullpath': $!\n";
153 print " +", $file->{remote}, "\n";
154 $load = 1 if $m->_mirror( $file->{remote}, $file->{local} );
155 $load = 1 unless $m->_loaded( $fullpath );
157 next unless $load;
159 print " loading...";
161 my $sys_call = "./load/bin/load_affymetrix.pl $fullpath";
162 $m->log->debug( "system call: $sys_call" );
164 my $result = system( $sys_call );
165 if ( $result != 0 ) {
166 die "failed: $!\n";
168 else {
169 $m->_loaded( $fullpath, 1 );
170 print "done!\n";
175 $m->log->info("leaving ACTION_mageml");
178 sub ACTION_ontologies {
179 my $m = shift;
180 my $conf = $m->conf;
182 my $db_name = $conf->{'database'}{'db_name'} || '';
183 my $db_host = $conf->{'database'}{'db_host'} || '';
184 my $db_port = $conf->{'database'}{'db_port'} || '';
185 my $db_user = $conf->{'database'}{'db_username'} || '';
186 my $db_pass = $conf->{'database'}{'db_password'} || '';
188 $db_pass = '' if (ref $db_pass eq 'HASH');
190 $m->log->info("entering ACTION_ontologies");
192 print "Available ontologies:\n";
194 my %ont = ();
195 foreach my $ontology ( keys %{ $conf->{ontology} } ) {
196 $ont{ $conf->{ontology}->{$ontology}->{order} } = $ontology;
198 foreach my $key ( sort {$a <=> $b} keys %ont ) { print "[$key] ", $ont{$key}, "\n"; }
199 print "\n";
201 my $chosen = $m->prompt("Which ontologies would you like to load (Comma delimited)? [0]");
202 $m->notes( 'ontologies' => $chosen );
204 my %ontologies = map { $_ => $conf->{ontology}{ $ont{$_} } } split ',',
205 $chosen;
207 foreach my $ontology ( sort {$a <=> $b} keys %ontologies ) {
208 print "fetching files for ", $ont{$ontology}, "\n";
210 my $file = $ontologies{$ontology}{file};
212 my $load = 0;
213 foreach my $file (
214 grep { $_->{type} eq 'definitions' } @{ $ontologies{$ontology}{file} }
216 my $fullpath = catfile($conf->{path}{data}, $file->{local});
217 $fullpath =~ s!^(.+)/[^/]*!$1!;
218 unless ( -d $fullpath ) {
219 $m->log->debug("mkpath $fullpath");
220 mkpath( $fullpath, 0, 0711 )
221 or print "Couldn't make path '$fullpath': $!\n";
223 if ($file->{method} =~ /mirror/) {
224 print " +", $file->{remote}, "\n";
225 $load = 1 if $m->_mirror( $file->{remote}, $file->{local} );
227 else { # it is a local file
228 copy( $file->{remote} , $fullpath );
229 $load = 1;
233 my ($deffile) =
234 grep { $_ if $_->{type} eq 'definitions' }
235 @{ $ontologies{$ontology}{file} };
237 foreach my $file (
238 grep { ($_->{type} eq 'ontology') or ($_->{type} eq 'obo') } @{ $ontologies{$ontology}{file} }
240 my $fullpath = catfile($conf->{path}{data}, $file->{local});
241 $fullpath =~ s!^(.+)/[^/]*!$1!;
242 unless ( -d $fullpath ) {
243 $m->log->debug("mkpath $fullpath");
244 mkpath( $fullpath, 0, 0711 )
245 or print "Couldn't make path '$fullpath': $!\n";
248 print " +", $file->{remote}, "\n";
250 if ($file->{method} =~ /mirror/) {
251 $load = 1 if $m->_mirror( $file->{remote}, $file->{local} );
253 else { #local file
254 copy( $file->{remote}, $fullpath );
255 $load = 1;
258 next unless $load;
260 print " loading...";
262 # my $sys_call = join( ' ',
263 # './load/bin/gmod_load_ontology.pl',
264 # catfile( $conf->{'path'}{'data'}, $file->{'local'} ),
265 # catfile( $conf->{'path'}{'data'}, $deffile->{'local'} )
266 # );
269 #creating chadoxml from either obo or ontology files
270 my $sys_call;
271 if ($file->{type} eq 'obo') {
272 $sys_call = join( ' ',
273 'go2fmt.pl -p obo_text -w xml',
274 catfile( $conf->{'path'}{'data'}, $file->{'local'}),
275 '| go-apply-xslt oboxml_to_chadoxml - >',
276 catfile( $conf->{'path'}{'data'}, $file->{'local'}.'xml')
278 } elsif ($file->{type} eq 'ontology') {
279 $sys_call = join( ' ',
280 'go2fmt.pl -p go_ont -w xml',
281 catfile( $conf->{'path'}{'data'}, $file->{'local'}),
282 '| go-apply-xslt oboxml_to_chadoxml - >',
283 catfile( $conf->{'path'}{'data'}, $file->{'local'}.'xml')
285 } else {
286 die "what kind of file is ".$_->{type}."?";
289 $m->log->debug( "system call: $sys_call" );
291 my $result = system( $sys_call );
293 if ( $result != 0 ) {
294 print "System call '$sys_call' failed: $?\n";
295 $m->log->fatal("failed: $?");
296 die;
299 # loading chadoxml
300 my $stag_string = "stag-storenode.pl -d 'dbi:Pg:dbname=$db_name;host=$db_host;port=$db_port'";
301 $stag_string .= " --user $db_user " if $db_user;
302 $stag_string .= " --password $db_pass " if $db_pass;
303 $sys_call = join( ' ',
304 $stag_string,
305 catfile( $conf->{'path'}{'data'}, $file->{'local'}.'xml')
308 $m->log->debug( "system call: $sys_call" );
310 $result = system( $sys_call );
312 if ( $result != 0 ) {
313 print "System call '$sys_call' failed: $?\n";
314 $m->log->fatal("failed: $?");
315 die;
318 if ($deffile) {
319 $sys_call = join( ' ',
320 'go2fmt.pl -p go_def -w xml',
321 catfile( $conf->{'path'}{'data'}, $deffile->{'local'}),
322 '| go-apply-xslt oboxml_to_chadoxml - >',
323 catfile( $conf->{'path'}{'data'}, $deffile->{'local'}.'xml')
326 $m->log->debug( "system call: $sys_call" );
328 $result = system( $sys_call );
330 if ( $result != 0 ) {
331 print "System call '$sys_call' failed: $?\n";
332 $m->log->fatal("failed: $?");
333 die;
337 $sys_call = join( ' ',
338 "stag-storenode.pl -d 'dbi:Pg:dbname=$db_name;host=$db_host;port=$db_port'",
339 catfile( $conf->{'path'}{'data'}, $deffile->{'local'}.'xml')
342 $m->log->debug( "system call: $sys_call" );
344 $result = system( $sys_call );
348 if ( $result != 0 ) {
349 print "System call '$sys_call' failed: $?\n";
350 $m->log->fatal("failed: $?");
351 die;
353 else {
354 $m->_loaded( catfile($conf->{'path'}{'data'}, $file->{'local'}), 1 );
355 $m->_loaded( catfile($conf->{'path'}{'data'}, $deffile->{'local'}), 1 ) if $deffile;
356 print "done!\n";
357 $m->log->debug("done!");
362 #fix up DBIx::DBStag stomping on part_of and derives_from
363 $m->log->debug("fix up DBIx::DBStag stomping on part_of and derives_from");
364 my $dbh = DBI->connect("dbi:Pg:dbname=$db_name;host=$db_host;port=$db_port",
365 $db_user, $db_pass);
366 $dbh->do("UPDATE cvterm SET
367 cv_id = (SELECT cv_id FROM cv WHERE name='relationship')
368 WHERE name='derives_from'");
369 $dbh->do("UPDATE cvterm SET
370 cv_id = (SELECT cv_id FROM cv WHERE name='relationship')
371 WHERE name='part_of'");
372 $dbh->disconnect;
374 $m->log->info("leaving ACTION_ontologies");
377 sub ACTION_tokenize {
378 my $m = shift;
379 my $conf = $m->conf;
381 $m->log->info('entering ACTION_tokenize');
383 my $template = Template->new(
385 INTERPOLATE => 0,
386 RELATIVE => 1,
388 ) || ( $m->log->fatal("Template error: $Template::ERROR") and die );
390 foreach my $templatefile ( keys %{ $conf->{template}{file} } ) {
392 #there is an order of preference in which keys are added.
393 #this affects which config sections clobber which others, beware.
394 my $tokens = {%{$conf->{database}}, %{$conf->{build}}};
396 if(ref($conf->{template}{file}{$templatefile}) eq 'HASH'){
397 $tokens->{ $_ } = $conf->{template}{file}{$templatefile}{$_} foreach keys %{ $conf->{template}{file}{$templatefile}};
400 #knock out empty hashes (like undef db_password)
401 foreach my $token (keys %{$tokens}){
402 undef($tokens->{$token}) if(ref($tokens->{$token}) eq 'HASH' and !keys %{$tokens->{$token}});
405 my $tokenized;
407 $m->log->debug(Dumper($tokens));
409 $template->process(
410 $conf->{template}{file}{$templatefile}{in},
411 $tokens,
412 \$tokenized,
413 ) || ( $m->log->fatal( "Template error: " . $template->error() ) and die );
414 open( OUT, '>' . $conf->{template}{file}{$templatefile}{out} );
415 print OUT $tokenized;
416 close(OUT);
419 $m->log->info('leaving ACTION_tokenize');
422 =head1 NON-ACTIONS
424 =cut
426 =head2 fetchAndLoadFiles
428 Title : fetchAndLoadFiles
429 Usage : fetchAndLoadFiles(<build_obj>, <xml_conf_obj>, <file_type>...)
430 Function: Calls internal methods to mirror files specified for this file_type in the xml_conf_obj
431 Example :
432 Returns :
433 Args :
435 =cut
436 sub fetchAndLoadFiles {
437 my ( $m, $conf, $type, $command, $itm ) = @_;
438 $m->log->info('entering fetchAndLoadFiles');
440 foreach my $key ( keys %$itm ) {
441 print "fetching files for $key\n";
443 my $load = 0;
444 foreach my $file ( @{ $itm->{$key}{file} } ) {
446 # check to see if this command can handle this type
447 if ( $file->{type} eq $type ) {
448 my $fullpath = catfile( $conf->{path}{data}, $file->{local});
449 $fullpath =~ s!^(.+)/[^/]*!$1!;
451 unless ( -d $fullpath ) {
452 $m->log->debug("mkpath $fullpath");
453 mkpath( $fullpath, 0, 0711 )
454 or print "Couldn't make path '$fullpath': $!\n";
457 print " +", $file->{remote}, "\n";
458 $load = 1 if $m->_mirror( $file->{remote}, $file->{local} );
459 $load = 1 unless $m->_loaded( $fullpath );
461 next unless $load;
463 print " loading...";
465 my $sys_call = join( ' ', $command, $fullpath );
466 $m->log->debug( "system call: $sys_call" );
468 my $result = system( $sys_call );
470 if ( $result != 0 ) {
471 print "failed: $!\n";
472 $m->log->fatal("failed: $!");
473 die;
475 else {
476 $m->_loaded( $fullpath, 1 );
477 print "done!\n";
478 $m->log->debug("done!");
484 $m->log->info('leaving fetchAndLoadFiles');
488 =head2 printAndReadOptions
490 Title : printAndReadOptions
491 Usage : prints out and reads options from the XML file
492 Function:
493 Example :
494 Returns :
495 Args : m=build obj, conf=conf obj, option=which option to pull from the conf XML file
498 =cut
499 sub printAndReadOptions
501 my ($m,$conf,$option) = @_;
502 print "Available $option Items:\n";
504 my $i = 1;
505 my %itm = ();
506 foreach my $item (sort keys %{ $conf->{$option} })
508 $itm{$i} = $item;
509 print "[$i] $item\n";
510 $i++;
512 print "\n";
514 my $chosen = $m->prompt("Which items would you like to load (Comma delimited)? [0]");
515 $m->notes("$option"."s" => $chosen);
517 my %options = map {$itm{$_} => $conf->{$option}{$itm{$_}}} split ',',$chosen;
518 return(%options);
521 sub property {
522 my $m = shift;
523 my $key = shift;
524 my $val = $m->{properties}{$key};
525 $val =~ s/^$key=//;
526 return $val;
529 sub conf {
530 my $self = shift;
531 return $self->{conf} if defined $self->{conf};
533 my $file = $self->property('load_conf');
534 $self->{conf} = XMLin($file,
535 ForceArray => ['token','path','file'],
536 KeyAttr => [qw(tt2 input token name file)],
537 ContentKey => '-value'
540 return $self->{conf};
543 sub log {
544 my $m = shift;
545 if(!$m->{log}){
546 my $pack = ref($m);
547 $pack =~ s/::/./g;
548 $m->{log} = Log::Log4perl->get_logger($pack);
549 $m->{log}->info("starting log for $pack");
551 return $m->{log};
554 sub _loaded {
555 my $m = shift;
556 my $conf = $m->conf;
557 my ( $file, $touch ) = @_;
558 $file .= '_' . $conf->{'build'}{'load_touchext'};
559 if ($touch) {
560 open( T, '>' . $file );
561 print T "\n";
562 close(T);
563 return 1;
565 else {
566 return 1 if -f $file;
567 return 0;
571 sub _mirror {
572 my $m = shift;
573 my $conf = $m->conf;
574 my ($remote,$local) = @_;
575 $local = $conf->{'path'}{'data'} .'/'. $local;
577 if( $m->_loaded($local) ){
578 print " already loaded, remove touchfile to reload. skipping\n";
579 return 0;
582 #mirror the file
583 my $rc = mirror($remote, $local);
585 if ($rc == 304) {
586 print " ". $local ." is up to date\n";
587 return 0;
588 } elsif (!is_success($rc)) {
589 print " $rc ", status_message($rc), " (",$remote,")\n";
590 return 0;
591 } else {
592 #file is new, load it
593 print " updated\n";
594 return 1;