Merge pull request #5205 from solgenomics/topic/generic_trial_upload
[sgn.git] / cgi-bin / sequencing / tpf.pl
blob562e030acf496c2ecc0e3dc07087eff357d52e3f
2 use strict;
4 use File::Temp qw//;
5 use File::Spec;
7 use CXGN::DB::Connection;
8 use CXGN::TomatoGenome::BACPublish qw/tpf_agp_files/;
9 use CXGN::Genomic::Clone;
10 use CXGN::Genomic::CloneIdentifiers qw/parse_clone_ident/;
11 use CXGN::Login;
12 use CXGN::Page;
13 use CXGN::Page::FormattingHelpers qw( info_section_html
14 page_title_html
15 columnar_table_html
16 info_table_html
17 modesel
19 use CXGN::People;
20 use CXGN::Publish qw/publish/;
22 #tpf_agp is in the current directory
23 use CXGN::TomatoGenome::tpf_agp qw(
24 format_validation_report
25 filename
26 published_ftp_download_links
27 tabdelim_to_html
28 tabdelim_to_array
29 modtime_string
32 my $page = CXGN::Page->new("Tiling Path Display","Rob");
33 my $dbh = CXGN::DB::Connection->new();
35 #get input arguments and validate
36 my ($chr) = $page->get_arguments(qw/chr/);
37 $chr += 0; #force chr to be numeric
38 $chr ||= 1;
40 #if we have a file upload, validate it according to its type and store it
41 if(my $upload = $page->get_upload ) {
42 #get the active login
43 my $sp_person_id = CXGN::Login->new($dbh)->verify_session();
44 my $sp = CXGN::People::Person->new($dbh, $sp_person_id);
45 $sp->get_username
46 or do {print $page->header; graceful_exit("Username not found.")};
48 #check that the login has permission to upload for this chromosome
49 unless($sp->get_user_type eq 'curator'
50 || grep {$_ == $chr && $_ <= 12 && $_ >= 1} $sp->get_projects_associated_with_person) {
51 print $page->header(); graceful_exit('your account does not have privileges to upload files for that chromosome');
54 my $validation_report = validate_and_publish_tpf($upload->fh,$chr);
55 $page->message_page('TPF validation failed',$validation_report) if $validation_report;
59 $page->header('TPF Display',"Chromosome Tiling Paths");
60 print <<EOHTML;
61 <p>The tiling path listings below have been developed and submitted to
62 SGN by the individual <a href="/about/tomato_sequencing.pl">tomato
63 chromosome sequencing projects</a>.</p>
65 <p>The listing below shows the clones that make up the current planned
66 tiling path for this chromosome, along with any gaps that have yet
67 to be bridged by a sequence of clones.</p>
69 <p>It is in TPF format, which consists of three columns:</p>
70 <ol>
71 <li>the NCBI/EMBL Accession of the BAC sequence</li>
72 <li>the SGN name of the BAC sequence</li>
73 <li>a name for each contiguous group of BACs, internal to each chromosome sequencing project</li>
74 </ol>
76 <p>These files are also available for download <a href="ftp://ftp.sgn.cornell.edu/tomato_genome/tpf">via FTP</a>.</p>
77 EOHTML
79 if ($chr > 12 or $chr < 1){
80 graceful_exit("Oops! Chromosome number must be between 1-12.");
83 print qq|<div style="text-align: center; font-weight: bold; margin-bottom: 0.5em">Chromosome</div>\n|;
84 print modesel( [map {["?chr=$_",$_]} 1..12], $chr-1);
85 print "<br />\n";
88 #now show the current tpf and agp files
90 my $published_tpf;
91 eval {
92 ($published_tpf,undef) = tpf_agp_files($chr);
94 if ($@){
95 graceful_exit($@);
98 my ($published_tpf_ftp,undef) = published_ftp_download_links($chr);
99 print info_section_html( title => 'Tiling Path Format (TPF)',
100 subtitle => modtime_string($published_tpf).' '.$published_tpf_ftp,
101 contents => tpf_to_html($published_tpf) || <<EOHTML,
102 <center><b>No TPF file is currently available for chromosome $chr</b></center>
103 EOHTML
105 print info_section_html( title => "Other Chromosome $chr Resources",
106 contents => <<EOHTML,
107 <ul>
108 <li>Tomato maps: <a href="/cview/index.pl">Genetic</a> | <a href="/cview/map.pl?map_id=9&amp;physical=1">Physical</a> | <a href="/cview/map.pl?map_id=13">BAC FISH results</a></li>
109 <li><a href="/organism/Solanum_lycopersicum/clone_sequencing?chr=$chr">Chromosome $chr BAC list</a></li>
110 <li><a href="agp.pl?chr=$chr">Chromosome $chr Assembly (AGP)</a></li>
111 </ul>
112 EOHTML
114 $page->footer;
118 # we should NOT be using error_page! Use this instead!
119 sub graceful_exit {
121 my ($message) = @_;
122 print $message;
123 print $page->footer;
124 exit;
128 #CHECK:
129 #correct number of columns
130 #correct data type of each column
131 #correctly formed bac identifiers
132 #bac identifiers are on the correct chromosome
133 sub validate_and_publish_tpf {
134 my ($tpf_fh,$chr) = @_;
135 my $publishing_filename = filename($chr,'tpf');
136 my $tmp = File::Temp->new( UNLINK => 1, SUFFIX => '.tpf' );
137 my @errors;
138 my $line_ctr = 0;
139 my $err = sub {
140 push @errors, "Line $line_ctr: ".shift;
142 #copy the tpf file from the apache-spooled temp file. while doing
143 #that, go over each line and validate it
144 while(my $line = <$tpf_fh>) {
145 $line_ctr++;
146 chomp $line;
147 $line =~ s/\r$//;
148 unless($line =~ /^\s*#/) {
149 my @d = split /\s+/,$line;
150 @d == 3 or $err->("Incorrect column count: ".scalar(@d)." columns found");
151 if( $d[0] =~ /^GAP$/ ) {
152 $d[1] =~ /^type-[1234]$|/ or $err->("If first column is GAP, second column must be a type-[1234] gap type");
153 $d[2] eq '?' or $err->("If first column is 'GAP', third column must be '?' (it is '$d[2]')");
155 elsif( $d[0] eq '?' || $d[0] =~ /^\S{5,}$/ ) {
156 my $p = parse_clone_ident($d[1],qw/agi_bac_with_chrom agi_bac/)
157 or $err->("If line is not a gap, second column must be a valid SGN BAC clone identifier");
158 if($p) {
159 my $c = CXGN::Genomic::Clone->retrieve_from_parsed_name($p)
160 or $err->("Unknown clone identifier '$d[1]' in second column");
161 if($c) {
162 $c->chromosome_num == $chr
163 or $err->("BAC with identifier '$d[1]' is not registered as being on chromosome $chr");
164 $d[1] = $c->clone_name_with_chromosome;
168 else {
169 $err->("Invalid first column '$d[0]'");
171 print $tmp join("\t",@d)."\n";
172 } else {
173 print $tmp "$line\n";
176 close $tmp; #make sure we flush the writes we made
177 if(@errors) {
178 return format_validation_report(@errors);
179 } else {
180 publish(['cp', "$tmp", $publishing_filename]);
181 return;
185 sub tpf_to_html {
186 my ($filename) = @_;
187 if($filename && -f $filename) {
188 return tabdelim_to_html($filename,undef,
189 #this sub looks at the contents of each
190 #table cell and might emit some style rules
191 #to highlight it
192 sub {
193 my ($contents) = @_;
194 if ($contents =~ /^\s*#/) {
195 'color: green'
196 } elsif ( $contents =~ /^\s*GAP\s*$/ ) {
197 'color: red'
198 } elsif ( $contents =~ /^\s*\?\s*$/ ) {
199 'font-weight: bold'