6 load fish data and images into database
12 load_fish_stuff.pl -i fishlist -t -D database -H host -I image_dir
20 B<input> a list of paths to the *BAC_Locations.csv files that contain the data to be loaded. This can be generated by
21 find -name *ocations.csv > fishlist
25 B<test> test the loading but do not actually put it in the database
29 B<database> the database where the fish data will be loaded
37 B<image_dir> the directory to store the images
45 This script loads fish data and images. The columns in the csv files are expected to be in the following comma-delimited order:
47 BAC_ID number,Photo_ID,SC_ID,SC_rel_len%,SC_AR,BAC_loc_arm,BAC%from_kc
49 Make sure to set DBHOST, DBNAME, DBUSER, and DBPASS accordingly. Also, check dir that images are stored.
55 Lukas Mueller and Suzy Strickler
71 use CXGN
::DB
::InsertDBH
;
72 use CXGN
::Genomic
::Clone
;
75 our ($opt_i, $opt_D, $opt_H, $opt_I, $opt_t, $opt_h);
77 getopts
('i:D:H:I:th');
79 if (!$opt_i && !$opt_D && !$opt_H && !$opt_I && !$opt_t && !$opt_h){
80 print "Wrong script usage. Printing help\n\n";
88 #Get dir to store images
89 my $image_dir = $opt_I || die("INPUT ERROR: Image directory was not supplied (-I </dir/>).\n");
91 #list of all "Locations.csv" files ex: 01-26-12/Tomato_10P/Tomato_10P_BAC_Locations.csv
92 my $master_file = $opt_i || die("INPUT ERROR: Input option was not supplied (-i <fish_list>).\n");
93 my @files = read_file
($master_file);
95 our $dbh = CXGN
::DB
::InsertDBH
->new(
99 dbargs
=> {AutoCommit
=> 0,
103 ###To Add:check for duplicates in file to be loaded
105 foreach my $file (@files) {
108 my $path = dirname
($file); #path will now be determined from csv file
109 my @fishinfo = read_file
($file);
111 foreach my $fi (@fishinfo) {
114 if ($fi =~ m/^"/ || $fi =~ m/^,/){
115 next; #should check that it is in this order: BAC ID number, etc
119 if ($fi =~ m/SL_EcoRI/i || $fi =~ m/SL_s/i || $fi =~ m/SL_MboI/i || $fi =~ m/SL_FOS/i){
127 my ($bac_id, $photo_id, $sc_id, $sc_rel_len, $sc_ar, $bac_loc_arm, $per_from_kin) = split (/,/, $fi);
128 #$sc_rel_len and $sc_ar are not loaded
130 my $clone_id = CXGN
::Genomic
::Clone
->retrieve_from_clone_name("$lib$bac_id");
133 print STDERR
"$lib$bac_id not found. Skipping!!!\n";
138 @images = glob("$path/*$bac_id/Photo_ID_$photo_id/*.jpg");
139 my $num_images = @images;
141 #Check that there are 3 image files in the directory
142 if ($num_images != 3){
143 print STDERR
"Images in $path associated with experient $photo_id are missing. SKIPPING!\n";
151 $bac_loc_arm =~ tr/sSlL/PPQQ/;
153 print STDERR
"\n\nFound clone_id $clone_id for experiment $photo_id on $sc_id $bac_loc_arm at $per_from_kin\n".(join "\n",@images)."\n";
157 $fish_result_id = insert_fish_result
(
160 chromo_num
=> $sc_id,
161 chromo_arm
=> $bac_loc_arm,
162 experiment_name
=> $photo_id,
163 percent_from_centromere
=> $per_from_kin / 100, # in the db, stored as 0..1
164 fish_experimenter_name
=> 'fish_stack',
165 clone_id
=> $clone_id,
169 print "this is fish result id " . $fish_result_id . "\n";
173 foreach my $image(@images) {
174 my $i = CXGN
::Image
->new(dbh
=>$dbh, image_dir
=>$image_dir); #'/data/prod/public/images/image_files');
175 if ($i->process_image($image, "fish", $fish_result_id)) {
176 $i->set_description("FISH Localization of $bac_id on chromosome $sc_id arm $bac_loc_arm at $per_from_kin");
177 $i->set_sp_person_id(233);
178 $i->set_obsolete('f');
180 $image_id = $i->get_image_id();
182 insert_fish_result_image
($fish_result_id, $image_id);
186 print "Could not store image $image. Skipping!!!\n";
193 print STDERR
"Saving of $bac_id, chr $sc_id arm $bac_loc_arm at dist $per_from_kin failed due to $@";
197 print STDERR
"Committing info for $sc_id.\n";
205 sub insert_fish_result
{
208 #Check if record is already in database
209 my $check_dup_query = "SELECT * FROM sgn.fish_result WHERE clone_id = ? AND experiment_name = ?";
210 my $cdq = $dbh->prepare($check_dup_query);
214 $fd->{experiment_name
}
217 my $rows = $cdq->rows();
219 #Insert the record if it is not already in database
221 print STDERR
"This record " . $fd->{experiment_name
} . " is already in database. SKIPPING\n";
226 my $result_insert_query = #fish_result is table name
227 "INSERT INTO sgn.fish_result
228 (chromo_num, chromo_arm,
229 experiment_name, percent_from_centromere,
230 clone_id, fish_experimenter_id, map_id)
231 SELECT ?, ?, ?, ?, ?,
232 (SELECT fish_experimenter_id
233 FROM sgn.fish_experimenter
234 WHERE fish_experimenter_name = ?),
237 WHERE short_name = 'Tomato FISH map')";
238 my $dbh = $fd->{dbh
};
239 my $sth = $dbh->prepare($result_insert_query);
241 print STDERR
"name=" . $fd->{experiment_name
} . "\n";
242 $sth->execute( #This is the data inserted for the ? in the sql above
245 $fd->{experiment_name
},
246 $fd->{percent_from_centromere
},
248 $fd->{fish_experimenter_name
},
251 my $frh = $dbh->prepare("select currval('fish_result_fish_result_id_seq')");
253 my ($fish_result_id) = $frh->fetchrow_array();
255 return $fish_result_id;
259 sub insert_fish_result_image
{
260 my $fish_result_id = shift;
261 my $image_id = shift;
262 print STDERR
"This is the image id " . $image_id . "\n";
264 my $q = "INSERT INTO fish_result_image (fish_result_id, image_id) VALUES (?,?)";
265 my $frih = $dbh->prepare($q);
266 $frih->execute($fish_result_id, $image_id);
277 Side_Effects: exit of the script
278 Example: if (!@ARGV) {
290 This script loads fish data and images into the database.
294 load_fish_stuff.pl [-h] -i fish_list -t test -D database -H host -I image_dir
298 load_fish_stuff.pl -i fish_list.csv -t -D sandbox -H localhost -I /data/prod/public/images/image_files_sandbox
302 -i <input> input list of locations of fish csv files
303 -t <test> test mode, does not actually load data
304 -D <database> the database to load the fish data
306 -I <image_dir> the directory to store the images