1 # $Id: Builder.pm,v 1.34 2007/02/20 16:23:53 briano Exp $
2 package Chado
::Builder
;
3 # vim: set ft=perl ts=2 expandtab:
6 use base
'Module::Build';
9 use File
::Spec
::Functions
'catfile';
15 use LWP
::Simple
qw(mirror is_success status_message);
18 Log
::Log4perl
::init
('load/etc/log.conf');
25 Calls the psql command and pipes in the contents of the
26 load/etc/initialize.sql file. Put any insert statements that
27 your data load needs here.
31 Load action for all NCBI data.
39 loads ontologies by running gmod_load_ontology.pl on all files in
44 processes templates specified in configuration file, filling in
45 platform-specific variable values
55 Function: Executes any SQL statements in the load/etc/initialize.sql file.
65 # the XML config object
68 $m->log->info("entering ACTION_prepdb");
70 my $db_name = $conf->{'database'}{'db_name'} || '';
71 my $db_host = $conf->{'database'}{'db_host'} || '';
72 my $db_port = $conf->{'database'}{'db_port'} || '';
73 my $db_user = $conf->{'database'}{'db_username'} || '';
74 my $build_dir = $conf->{'build'}{'working_dir'} || '';
75 my $init_sql = catfile
( $build_dir, 'load', 'etc', 'initialize.sql' );
76 my $sys_call = "psql -h $db_host -p $db_port -U $db_user -f $init_sql $db_name";
78 $m->log->debug("system call: $sys_call");
80 system( $sys_call ) == 0 or croak
"Error executing '$sys_call': $?";
82 $m->log->info("leaving ACTION_prepdb");
89 Function: Load action for all NCBI data.
98 # the XML config object
101 $m->log->info("entering ACTION_ncbi");
103 # print out the available refseq datasets
104 my %ncbis = printAndReadOptions
($m,$conf,"ncbi");
106 # now that I know what you want mirror files and load
107 # fetchAndLoadFiles is called for each possible type
108 # but only actively loaded for those the user selects
109 fetchAndLoadFiles
($m, $conf, "refseq", "./load/bin/load_gff3.pl --organism Human --srcdb DB:refseq --gfffile", \
%ncbis);
110 fetchAndLoadFiles
($m, $conf, "locuslink", "./load/bin/load_locuslink.pl", \
%ncbis);
111 $m->log->info("leaving ACTION_ncbi");
118 $m->log->info("entering ACTION_mageml");
120 print "Available MAGE-ML annotation files:\n";
124 foreach my $mageml ( sort keys %{ $conf->{mageml
} } ) {
126 print "[$i] $mageml\n";
131 my $chosen = $m->prompt(
132 "Which ontologies would you like to load (Comma delimited)? [0]"
134 $m->notes( 'affymetrix' => $chosen );
136 my %mageml = map { $ml{$_} => $conf->{mageml
}{ $ml{$_} } } split ',', $chosen;
138 foreach my $mageml ( keys %mageml ) {
139 print "fetching files for $mageml\n";
142 foreach my $file ( @
{ $mageml{$mageml}{file
} } ) {
144 my $fullpath = catfile
$conf->{path
}{data
}, $file->{local};
145 $fullpath =~ s!^(.+)/[^/]*!$1!;
147 unless ( -d
$fullpath ) {
148 $m->log->debug("mkpath $fullpath");
149 mkpath
( $fullpath, 0, 0711 )
150 or print "Couldn't make path '$fullpath': $!\n";
153 print " +", $file->{remote
}, "\n";
154 $load = 1 if $m->_mirror( $file->{remote
}, $file->{local} );
155 $load = 1 unless $m->_loaded( $fullpath );
161 my $sys_call = "./load/bin/load_affymetrix.pl $fullpath";
162 $m->log->debug( "system call: $sys_call" );
164 my $result = system( $sys_call );
165 if ( $result != 0 ) {
169 $m->_loaded( $fullpath, 1 );
175 $m->log->info("leaving ACTION_mageml");
178 sub ACTION_ontologies
{
182 my $db_name = $conf->{'database'}{'db_name'} || '';
183 my $db_host = $conf->{'database'}{'db_host'} || '';
184 my $db_port = $conf->{'database'}{'db_port'} || '';
185 my $db_user = $conf->{'database'}{'db_username'} || '';
186 my $db_pass = $conf->{'database'}{'db_password'} || '';
188 $db_pass = '' if (ref $db_pass eq 'HASH');
190 $m->log->info("entering ACTION_ontologies");
192 print "Available ontologies:\n";
195 foreach my $ontology ( keys %{ $conf->{ontology
} } ) {
196 $ont{ $conf->{ontology
}->{$ontology}->{order
} } = $ontology;
198 foreach my $key ( sort {$a <=> $b} keys %ont ) { print "[$key] ", $ont{$key}, "\n"; }
201 my $chosen = $m->prompt("Which ontologies would you like to load (Comma delimited)? [0]");
202 $m->notes( 'ontologies' => $chosen );
204 my %ontologies = map { $_ => $conf->{ontology
}{ $ont{$_} } } split ',',
207 foreach my $ontology ( sort {$a <=> $b} keys %ontologies ) {
208 print "fetching files for ", $ont{$ontology}, "\n";
210 my $file = $ontologies{$ontology}{file
};
214 grep { $_->{type
} eq 'definitions' } @
{ $ontologies{$ontology}{file
} }
216 my $fullpath = catfile
($conf->{path
}{data
}, $file->{local});
217 $fullpath =~ s!^(.+)/[^/]*!$1!;
218 unless ( -d
$fullpath ) {
219 $m->log->debug("mkpath $fullpath");
220 mkpath
( $fullpath, 0, 0711 )
221 or print "Couldn't make path '$fullpath': $!\n";
223 if ($file->{method
} =~ /mirror/) {
224 print " +", $file->{remote
}, "\n";
225 $load = 1 if $m->_mirror( $file->{remote
}, $file->{local} );
227 else { # it is a local file
228 copy
( $file->{remote
} , $fullpath );
234 grep { $_ if $_->{type
} eq 'definitions' }
235 @
{ $ontologies{$ontology}{file
} };
238 grep { ($_->{type
} eq 'ontology') or ($_->{type
} eq 'obo') } @
{ $ontologies{$ontology}{file
} }
240 my $fullpath = catfile
($conf->{path
}{data
}, $file->{local});
241 $fullpath =~ s!^(.+)/[^/]*!$1!;
242 unless ( -d
$fullpath ) {
243 $m->log->debug("mkpath $fullpath");
244 mkpath
( $fullpath, 0, 0711 )
245 or print "Couldn't make path '$fullpath': $!\n";
248 print " +", $file->{remote
}, "\n";
250 if ($file->{method
} =~ /mirror/) {
251 $load = 1 if $m->_mirror( $file->{remote
}, $file->{local} );
254 copy
( $file->{remote
}, $fullpath );
262 # my $sys_call = join( ' ',
263 # './load/bin/gmod_load_ontology.pl',
264 # catfile( $conf->{'path'}{'data'}, $file->{'local'} ),
265 # catfile( $conf->{'path'}{'data'}, $deffile->{'local'} )
269 #creating chadoxml from either obo or ontology files
271 if ($file->{type
} eq 'obo') {
272 $sys_call = join( ' ',
273 'go2fmt.pl -p obo_text -w xml',
274 catfile
( $conf->{'path'}{'data'}, $file->{'local'}),
275 '| go-apply-xslt oboxml_to_chadoxml - >',
276 catfile
( $conf->{'path'}{'data'}, $file->{'local'}.'xml')
278 } elsif ($file->{type
} eq 'ontology') {
279 $sys_call = join( ' ',
280 'go2fmt.pl -p go_ont -w xml',
281 catfile
( $conf->{'path'}{'data'}, $file->{'local'}),
282 '| go-apply-xslt oboxml_to_chadoxml - >',
283 catfile
( $conf->{'path'}{'data'}, $file->{'local'}.'xml')
286 die "what kind of file is ".$_->{type
}."?";
289 $m->log->debug( "system call: $sys_call" );
291 my $result = system( $sys_call );
293 if ( $result != 0 ) {
294 print "System call '$sys_call' failed: $?\n";
295 $m->log->fatal("failed: $?");
300 my $stag_string = "stag-storenode.pl -d 'dbi:Pg:dbname=$db_name;host=$db_host;port=$db_port'";
301 $stag_string .= " --user $db_user " if $db_user;
302 $stag_string .= " --password $db_pass " if $db_pass;
303 $sys_call = join( ' ',
305 catfile
( $conf->{'path'}{'data'}, $file->{'local'}.'xml')
308 $m->log->debug( "system call: $sys_call" );
310 $result = system( $sys_call );
312 if ( $result != 0 ) {
313 print "System call '$sys_call' failed: $?\n";
314 $m->log->fatal("failed: $?");
319 $sys_call = join( ' ',
320 'go2fmt.pl -p go_def -w xml',
321 catfile
( $conf->{'path'}{'data'}, $deffile->{'local'}),
322 '| go-apply-xslt oboxml_to_chadoxml - >',
323 catfile
( $conf->{'path'}{'data'}, $deffile->{'local'}.'xml')
326 $m->log->debug( "system call: $sys_call" );
328 $result = system( $sys_call );
330 if ( $result != 0 ) {
331 print "System call '$sys_call' failed: $?\n";
332 $m->log->fatal("failed: $?");
337 $sys_call = join( ' ',
338 "stag-storenode.pl -d 'dbi:Pg:dbname=$db_name;host=$db_host;port=$db_port'",
339 catfile
( $conf->{'path'}{'data'}, $deffile->{'local'}.'xml')
342 $m->log->debug( "system call: $sys_call" );
344 $result = system( $sys_call );
348 if ( $result != 0 ) {
349 print "System call '$sys_call' failed: $?\n";
350 $m->log->fatal("failed: $?");
354 $m->_loaded( catfile
($conf->{'path'}{'data'}, $file->{'local'}), 1 );
355 $m->_loaded( catfile
($conf->{'path'}{'data'}, $deffile->{'local'}), 1 ) if $deffile;
357 $m->log->debug("done!");
362 #fix up DBIx::DBStag stomping on part_of and derives_from
363 $m->log->debug("fix up DBIx::DBStag stomping on part_of and derives_from");
364 my $dbh = DBI
->connect("dbi:Pg:dbname=$db_name;host=$db_host;port=$db_port",
366 $dbh->do("UPDATE cvterm SET
367 cv_id = (SELECT cv_id FROM cv WHERE name='relationship')
368 WHERE name='derives_from'");
369 $dbh->do("UPDATE cvterm SET
370 cv_id = (SELECT cv_id FROM cv WHERE name='relationship')
371 WHERE name='part_of'");
374 $m->log->info("leaving ACTION_ontologies");
377 sub ACTION_tokenize
{
381 $m->log->info('entering ACTION_tokenize');
383 my $template = Template
->new(
388 ) || ( $m->log->fatal("Template error: $Template::ERROR") and die );
390 foreach my $templatefile ( keys %{ $conf->{template
}{file
} } ) {
392 #there is an order of preference in which keys are added.
393 #this affects which config sections clobber which others, beware.
394 my $tokens = {%{$conf->{database
}}, %{$conf->{build
}}};
396 if(ref($conf->{template
}{file
}{$templatefile}) eq 'HASH'){
397 $tokens->{ $_ } = $conf->{template
}{file
}{$templatefile}{$_} foreach keys %{ $conf->{template
}{file
}{$templatefile}};
400 #knock out empty hashes (like undef db_password)
401 foreach my $token (keys %{$tokens}){
402 undef($tokens->{$token}) if(ref($tokens->{$token}) eq 'HASH' and !keys %{$tokens->{$token}});
407 $m->log->debug(Dumper
($tokens));
410 $conf->{template
}{file
}{$templatefile}{in},
413 ) || ( $m->log->fatal( "Template error: " . $template->error() ) and die );
414 open( OUT
, '>' . $conf->{template
}{file
}{$templatefile}{out
} );
415 print OUT
$tokenized;
419 $m->log->info('leaving ACTION_tokenize');
426 =head2 fetchAndLoadFiles
428 Title : fetchAndLoadFiles
429 Usage : fetchAndLoadFiles(<build_obj>, <xml_conf_obj>, <file_type>...)
430 Function: Calls internal methods to mirror files specified for this file_type in the xml_conf_obj
436 sub fetchAndLoadFiles
{
437 my ( $m, $conf, $type, $command, $itm ) = @_;
438 $m->log->info('entering fetchAndLoadFiles');
440 foreach my $key ( keys %$itm ) {
441 print "fetching files for $key\n";
444 foreach my $file ( @
{ $itm->{$key}{file
} } ) {
446 # check to see if this command can handle this type
447 if ( $file->{type
} eq $type ) {
448 my $fullpath = catfile
( $conf->{path
}{data
}, $file->{local});
449 $fullpath =~ s!^(.+)/[^/]*!$1!;
451 unless ( -d
$fullpath ) {
452 $m->log->debug("mkpath $fullpath");
453 mkpath
( $fullpath, 0, 0711 )
454 or print "Couldn't make path '$fullpath': $!\n";
457 print " +", $file->{remote
}, "\n";
458 $load = 1 if $m->_mirror( $file->{remote
}, $file->{local} );
459 $load = 1 unless $m->_loaded( $fullpath );
465 my $sys_call = join( ' ', $command, $fullpath );
466 $m->log->debug( "system call: $sys_call" );
468 my $result = system( $sys_call );
470 if ( $result != 0 ) {
471 print "failed: $!\n";
472 $m->log->fatal("failed: $!");
476 $m->_loaded( $fullpath, 1 );
478 $m->log->debug("done!");
484 $m->log->info('leaving fetchAndLoadFiles');
488 =head2 printAndReadOptions
490 Title : printAndReadOptions
491 Usage : prints out and reads options from the XML file
495 Args : m=build obj, conf=conf obj, option=which option to pull from the conf XML file
499 sub printAndReadOptions
501 my ($m,$conf,$option) = @_;
502 print "Available $option Items:\n";
506 foreach my $item (sort keys %{ $conf->{$option} })
509 print "[$i] $item\n";
514 my $chosen = $m->prompt("Which items would you like to load (Comma delimited)? [0]");
515 $m->notes("$option"."s" => $chosen);
517 my %options = map {$itm{$_} => $conf->{$option}{$itm{$_}}} split ',',$chosen;
524 my $val = $m->{properties
}{$key};
531 return $self->{conf
} if defined $self->{conf
};
533 my $file = $self->property('load_conf');
534 $self->{conf
} = XMLin
($file,
535 ForceArray
=> ['token','path','file'],
536 KeyAttr
=> [qw(tt2 input token name file)],
537 ContentKey
=> '-value'
540 return $self->{conf
};
548 $m->{log} = Log
::Log4perl
->get_logger($pack);
549 $m->{log}->info("starting log for $pack");
557 my ( $file, $touch ) = @_;
558 $file .= '_' . $conf->{'build'}{'load_touchext'};
560 open( T
, '>' . $file );
566 return 1 if -f
$file;
574 my ($remote,$local) = @_;
575 $local = $conf->{'path'}{'data'} .'/'. $local;
577 if( $m->_loaded($local) ){
578 print " already loaded, remove touchfile to reload. skipping\n";
583 my $rc = mirror
($remote, $local);
586 print " ". $local ." is up to date\n";
588 } elsif (!is_success
($rc)) {
589 print " $rc ", status_message
($rc), " (",$remote,")\n";
592 #file is new, load it