9 load_images.pl -D database_name -H hostname -i dirname -r chado table name [script will load image ids into ChadoTableprop ]
13 Loads images into the SGN database, using the SGN::Image framework.
14 Then link the loaded image with the user-supplied chado objects (e.g. stock, nd_experiment)
16 Requires the following parameters:
30 map file. If provided links between stock names - image file name , is read from a mapping file.
31 Row labels are expected to be unique file names, column header for the associated stocks is 'name'
35 a dirname that contains image filenames or subdirectories named after database accessions, containing one or more images (see option -d) .
39 use name - from sgn_people.sp_person.
43 the dir where the database stores the images (the concatenated values from image_path and image_dir from sgn_local.conf or sgn.conf)
47 files are stored in sub directories named after database accessions
51 image file extension. Defaults to 'jpg'
55 trial mode . Nothing will be stored.
59 Errors and messages are output on STDERR.
63 Naama Menda (nm249@cornell.edu) October 2010.
65 Tweaks and move to sgn/bin: Lukas Mueller (lam87@cornell.edu) December 2023.
71 use CXGN
::Metadata
::Schema
;
72 use CXGN
::Metadata
::Metadbdata
;
73 use CXGN
::DB
::InsertDBH
;
75 use Bio
::Chado
::Schema
;
76 use CXGN
::People
::Person
;
78 use Data
::Dumper qw
/ Dumper /;
84 use CXGN
::Tools
::File
::Spreadsheet
;
85 use File
::Glob qw
| bsd_glob
|;
87 our ($opt_H, $opt_D, $opt_t, $opt_i, $opt_u, $opt_r, $opt_d, $opt_e, $opt_m, $opt_b);
88 getopts
('H:D:u:i:e:f:tdr:m:b:');
94 my $db_image_dir = $opt_b;
95 my $chado_table = $opt_r;
96 my $ext = $opt_e || 'jpg';
98 if (!$dbhost && !$dbname) {
99 print "dbhost = $dbhost , dbname = $dbname\n";
100 print "opt_t = $opt_t, opt_u = $opt_u, opt_r = $chado_table, opt_i = $dirname\n";
104 if (!$dirname) { print "dirname = $dirname\n" ; usage
(); }
106 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
110 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] }
114 print STDERR
"Generate metadata_id... ";
115 my $metadata_schema = CXGN
::Metadata
::Schema
->connect("dbi:Pg:database=$dbname;host=".$dbh->dbhost(), "postgres", $dbh->dbpass(), {on_connect_do
=> "SET search_path TO 'metadata', 'public'", });
117 my $sp_person_id= CXGN
::People
::Person
->get_person_by_username($dbh, $sp_person);
121 #my $ch = SGN::Context->new();
122 print "PLEASE VERIFY:\n";
123 print "Using dbhost: $dbhost. DB name: $dbname. \n";
124 print "Path to image is: $db_image_dir\n";
127 if ($a !~ /[yY]/) { exit(); }
129 my %image_hash = (); # used to retrieve images that are already loaded
130 my %connections = (); # keep track of object -- image connections that have already been made.
132 print STDERR
"Caching stock table...\n";
133 my $object_rs = $schema->resultset("Stock::Stock")->search( { } ) ;
134 while (my $object = $object_rs->next ) {
135 my $id = $object->stock_id;
136 my $name = $object->uniquename;
137 $name2id{lc($name)} = $id;
140 # cache image chado object - image links to prevent reloading of the
143 print "Caching image $chado_table links...\n";
145 my $q = "SELECT * FROM phenome.stock_image";
146 my $sth = $dbh->prepare($q);
148 while ( my $hashref = $sth->fetchrow_hashref() ) {
149 my $image_id = $hashref->{image_id
};
150 my $chado_table_id = $hashref->{stock_id
}; ##### table specific
152 if ($chado_table_id % 10000 == 0) {
153 print STDERR
"CACHING $chado_table_id\n";
156 my $i = CXGN
::Image
->new(dbh
=>$dbh, image_id
=>$image_id, image_dir
=>$db_image_dir); # SGN::Image...$ch
157 my $original_filename = $i->get_original_filename();
158 $image_hash{$original_filename} = $i; # this doesn't have the file extension
159 $connections{$image_id."-".$chado_table_id}++;
162 #open (ERR, ">load_bcs_images.err") || die "Can't open error file\n";
166 @files = bsd_glob
"$dirname/*.$ext";
169 @files = bsd_glob
"$dirname/*" if $opt_d ;
172 print STDERR
"DIRS = ".(join("\n", @files))."\n";
176 my $new_image_count = 0;
178 my $metadata = CXGN
::Metadata
::Metadbdata
->new($metadata_schema, $sp_person);
179 my $metadata_id = $metadata->store()->get_metadata_id();
181 #read from spreadsheet:
182 my $map_file = $opt_m; #
186 my $s = CXGN
::Tools
::File
::Spreadsheet
->new($map_file); #
187 my @rows = $s->row_labels(); #
188 foreach my $file_name (@rows) { #
189 my $stock_name = $s->value_at($file_name, 'name'); #
190 $name_map{$file_name} = $stock_name;
194 print STDERR
"Starting to process ".scalar(@files)." images...\n";
196 foreach my $file (@files) {
199 @sub_files = ($file);
200 @sub_files = bsd_glob
"$file/*"; # if $opt_d;
202 print STDERR
"FILES FOR $file: ".Dumper
(\
@sub_files)."\n";
204 my $object = basename
($file, ".$ext" );
206 # if (!$plot) { die "File $file has no object name in it!"; }
207 my $stock = $schema->resultset("Stock::Stock")->find( {
208 stock_id
=> $name2id{ lc($object) } } );
209 foreach my $filename (@sub_files) {
213 print STDERR
"FILENAME NOW: $filename\n";
214 my $image_base = basename
($filename);
215 my ($object_name, $description, $extension);
217 $object_name = $name_map{$object . "." . $ext } ;
220 print STDERR
"OBJECT = $object...\n";
221 # if ($image_base =~ /(.*?)\_(.*?)(\..*?)?$/) {
222 if ($image_base =~ m/(.*)(\.$ext)/i) {
226 if ($image_base =~ m/(.*)\_(.*)/) {
232 $object_name = $image_base;
234 print STDERR
"Object: $object OBJECT NAME: $object_name DESCRPTION: $description EXTENSIO: $extension\n";
237 print STDOUT
"Processing file $file...\n";
238 print STDOUT
"Loading $object_name, image $filename\n";
239 print STDERR
"Loading $object_name, image $filename\n";
240 my $image_id; # this will be set later, depending if the image is new or not
241 if (! -e
$filename) {
242 warn "The specified file $filename does not exist! Skipping...\n";
246 if (!exists($name2id{lc($object)})) {
247 message
("$object does not exist in the database...\n");
251 print STDERR
"Adding $filename...\n";
252 if (exists($image_hash{$filename})) {
253 print STDERR
"$filename is already loaded into the database...\n";
254 $image_id = $image_hash{$filename}->get_image_id();
255 $connections{$image_id."-".$name2id{lc($object)}}++;
256 if ($connections{$image_id."-".$name2id{lc($object)}} > 1) {
257 print STDERR
"The connection between $object and image $filename has already been made. Skipping...\n";
259 elsif ($image_hash{$filename}) {
260 print STDERR
qq { Associating
$chado_table $name2id{lc($object)} with already loaded image
$filename...\n };
264 print STDERR
qq { Generating new image object
for image
$filename and associating it with
$chado_table $object, id
$name2id{lc($object) } ...\n };
267 print STDOUT
qq { Would associate file
$filename to
$chado_table $object_name, id
$name2id{lc($object)}\n };
271 my $image = CXGN
::Image
->new(dbh
=>$dbh, image_dir
=>$db_image_dir);
272 $image_hash{$filename}=$image;
275 ($image_id, $error) = $image->process_image("$filename", $chado_table , $name2id{lc($object)}, 1);
277 print STDERR
"IMAGE ID $image_id, ERROR: $error\n";
279 if ($error eq "ok") {
282 $image->set_description("$description");
283 $image->set_name(basename
($filename , ".$ext"));
284 $image->set_sp_person_id($sp_person_id);
285 $image->set_obsolete("f");
286 $image_id = $image->store();
287 #link the image with the BCS object
289 my $image_subpath = $image->image_subpath();
290 print STDERR
"FINAL IMAGE PATH = $db_image_dir/$image_subpath\n";
297 print STDERR
"Connecting image $filename and id $image_id with stock ".$stock->stock_id()."\n";
298 #store the image_id - stock_id link
299 my $q = "INSERT INTO phenome.stock_image (stock_id, image_id, metadata_id) VALUES (?,?,?)";
300 my $sth = $dbh->prepare($q);
301 $sth->execute($stock->stock_id, $image_id, $metadata_id);
305 print STDERR
"ERROR OCCURRED WHILE SAVING NEW INFORMATION. $@\n";
322 print STDERR
"Inserted $new_image_count images.\n";
323 print STDERR
"Done. \n";
326 print "Usage: load_images.pl -D dbname [ cxgn | sandbox ] -H dbhost -t [trial mode ] -i input dir -r chado table name for the object to link with the image \n";
332 print STDERR
$message;