added sol100 and chado cvterm pages to validate_all.t
[sgn.git] / cgi-bin / sequencing / tpf.pl
blob1d89fbfc86c9b6d782e7096a750aee773fde11f5
2 use strict;
4 use File::Temp qw//;
5 use File::Spec;
7 use CXGN::DB::Connection;
8 use CXGN::TomatoGenome::BACPublish qw/tpf_agp_files/;
9 use CXGN::Genomic::Clone;
10 use CXGN::Genomic::CloneIdentifiers qw/parse_clone_ident/;
11 use CXGN::Login;
12 use CXGN::Page;
13 use CXGN::Page::FormattingHelpers qw( info_section_html
14 page_title_html
15 columnar_table_html
16 info_table_html
17 modesel
19 use CXGN::People;
20 use CXGN::Publish qw/publish/;
22 #tpf_agp is in the current directory
23 use CXGN::TomatoGenome::tpf_agp qw(
24 format_validation_report
25 filename
26 published_ftp_download_links
27 tabdelim_to_html
28 tabdelim_to_array
29 modtime_string
31 use CXGN::VHost;
33 my $page = CXGN::Page->new("Tiling Path Display","Rob");
34 my $dbh = CXGN::DB::Connection->new();
36 #get input arguments and validate
37 my ($chr) = $page->get_arguments(qw/chr/);
38 $chr += 0; #force chr to be numeric
39 $chr ||= 1;
41 #if we have a file upload, validate it according to its type and store it
42 if(my $upload = $page->get_upload ) {
43 #get the active login
44 my $sp_person_id = CXGN::Login->new($dbh)->verify_session();
45 my $sp = CXGN::People::Person->new($dbh, $sp_person_id);
46 $sp->get_username
47 or do {print $page->header; graceful_exit("Username not found.")};
49 #check that the login has permission to upload for this chromosome
50 unless($sp->get_user_type eq 'curator'
51 || grep {$_ == $chr && $_ <= 12 && $_ >= 1} $sp->get_projects_associated_with_person) {
52 print $page->header(); graceful_exit('your account does not have privileges to upload files for that chromosome');
55 my $validation_report = validate_and_publish_tpf($upload->fh,$chr);
56 $page->message_page('TPF validation failed',$validation_report) if $validation_report;
60 $page->header('TPF Display',"Chromosome Tiling Paths");
61 print <<EOHTML;
62 <p>The tiling path listings below have been developed and submitted to
63 SGN by the individual <a href="/about/tomato_sequencing.pl">tomato
64 chromosome sequencing projects</a>.</p>
66 <p>The listing below shows the clones that make up the current planned
67 tiling path for this chromosome, along with any gaps that have yet
68 to be bridged by a sequence of clones.</p>
70 <p>It is in TPF format, which consists of three columns:</p>
71 <ol>
72 <li>the NCBI/EMBL Accession of the BAC sequence</li>
73 <li>the SGN name of the BAC sequence</li>
74 <li>a name for each contiguous group of BACs, internal to each chromosome sequencing project</li>
75 </ol>
77 <p>These files are also available for download <a href="ftp://ftp.sgn.cornell.edu/tomato_genome/tpf">via FTP</a>.</p>
78 EOHTML
80 if ($chr > 12 or $chr < 1){
81 graceful_exit("Oops! Chromosome number must be between 1-12.");
84 print qq|<div style="text-align: center; font-weight: bold; margin-bottom: 0.5em">Chromosome</div>\n|;
85 print modesel( [map {["?chr=$_",$_]} 1..12], $chr-1);
86 print "<br />\n";
89 #now show the current tpf and agp files
91 my $published_tpf;
92 eval {
93 ($published_tpf,undef) = tpf_agp_files($chr);
95 if ($@){
96 graceful_exit($@);
99 my ($published_tpf_ftp,undef) = published_ftp_download_links($chr);
100 print info_section_html( title => 'Tiling Path Format (TPF)',
101 subtitle => modtime_string($published_tpf).' '.$published_tpf_ftp,
102 contents => tpf_to_html($published_tpf) || <<EOHTML,
103 <center><b>No TPF file is currently available for chromosome $chr</b></center>
104 EOHTML
106 print info_section_html( title => "Other Chromosome $chr Resources",
107 contents => <<EOHTML,
108 <ul>
109 <li>Tomato maps: <a href="/cview/index.pl">Genetic</a> | <a href="/cview/map.pl?map_id=9&amp;physical=1">Physical</a> | <a href="/cview/map.pl?map_id=13">BAC FISH results</a></li>
110 <li><a href="/tomato/genome_data.pl?chr=$chr">Chromosome $chr BACs in Genome Browser</a></li>
111 <li><a href="agp.pl?chr=$chr">Chromosome $chr Assembly (AGP)</a></li>
112 </ul>
113 EOHTML
115 $page->footer;
119 # we should NOT be using error_page! Use this instead!
120 sub graceful_exit {
122 my ($message) = @_;
123 print $message;
124 print $page->footer;
125 exit;
129 #CHECK:
130 #correct number of columns
131 #correct data type of each column
132 #correctly formed bac identifiers
133 #bac identifiers are on the correct chromosome
134 sub validate_and_publish_tpf {
135 my ($tpf_fh,$chr) = @_;
136 my $publishing_filename = filename($chr,'tpf');
137 my $tmp = File::Temp->new( UNLINK => 1, SUFFIX => '.tpf' );
138 my @errors;
139 my $line_ctr = 0;
140 my $err = sub {
141 push @errors, "Line $line_ctr: ".shift;
143 #copy the tpf file from the apache-spooled temp file. while doing
144 #that, go over each line and validate it
145 while(my $line = <$tpf_fh>) {
146 $line_ctr++;
147 chomp $line;
148 $line =~ s/\r$//;
149 unless($line =~ /^\s*#/) {
150 my @d = split /\s+/,$line;
151 @d == 3 or $err->("Incorrect column count: ".scalar(@d)." columns found");
152 if( $d[0] =~ /^GAP$/ ) {
153 $d[1] =~ /^type-[1234]$|/ or $err->("If first column is GAP, second column must be a type-[1234] gap type");
154 $d[2] eq '?' or $err->("If first column is 'GAP', third column must be '?' (it is '$d[2]')");
156 elsif( $d[0] eq '?' || $d[0] =~ /^\S{5,}$/ ) {
157 my $p = parse_clone_ident($d[1],qw/agi_bac_with_chrom agi_bac/)
158 or $err->("If line is not a gap, second column must be a valid SGN BAC clone identifier");
159 if($p) {
160 my $c = CXGN::Genomic::Clone->retrieve_from_parsed_name($p)
161 or $err->("Unknown clone identifier '$d[1]' in second column");
162 if($c) {
163 $c->chromosome_num == $chr
164 or $err->("BAC with identifier '$d[1]' is not registered as being on chromosome $chr");
165 $d[1] = $c->clone_name_with_chromosome;
169 else {
170 $err->("Invalid first column '$d[0]'");
172 print $tmp join("\t",@d)."\n";
173 } else {
174 print $tmp "$line\n";
177 close $tmp; #make sure we flush the writes we made
178 if(@errors) {
179 return format_validation_report(@errors);
180 } else {
181 publish(['cp', "$tmp", $publishing_filename]);
182 return;
186 sub tpf_to_html {
187 my ($filename) = @_;
188 if($filename && -f $filename) {
189 return tabdelim_to_html($filename,undef,
190 #this sub looks at the contents of each
191 #table cell and might emit some style rules
192 #to highlight it
193 sub {
194 my ($contents) = @_;
195 if ($contents =~ /^\s*#/) {
196 'color: green'
197 } elsif ( $contents =~ /^\s*GAP\s*$/ ) {
198 'color: red'
199 } elsif ( $contents =~ /^\s*\?\s*$/ ) {
200 'font-weight: bold'