new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / fetchNCBI.pl
blob5660feea9873b0ab043465814601b202cc1fda73
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Mozilla::CA;
5 use LWP::Simple;
6 use Data::Dump qw(ddx);
8 my $deltra = 200;
10 my $infile = 'snpcandidatforpcr.out';
11 my %Genes;
12 open IN,'<',$infile or die $!;
13 open OUT,'>','pcr.fa' or die $!;
14 while (<IN>) {
15 chomp;
16 my @dat = split /\t/;
17 next if @dat < 3;
18 my (undef,$id) = split /\./,$dat[0];
19 next if exists $Genes{$id};
20 ++$Genes{$id};
21 print "$id, $dat[2], $dat[3]\n";
22 my ($left,$right) = ($dat[2]-$deltra,$dat[2]+$deltra);
23 my $url = 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id='.$id.'&db=nuccore&report=fasta&fmt_mask=0&maxplex=1&sendto=t&from='.$left.'&to='.$right.'&maxdownloadsize=1000000';
24 $url = 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id='.$id.'&db=nuccore&report=fasta&fmt_mask=0&maxplex=1';
25 #my $content = get($url);
26 my $content = `curl -s \'$url\'`;
27 my @ret = split /\n/,$content;
28 my $head = shift @ret;
29 $head =~ s/^>//;
30 #my $out = ">${id}_$dat[2] $head\n" . join('',@ret);
31 my $out = ">${id} $head\n" . join('',@ret);
32 print OUT "$out\n\n";
34 close OUT;
35 close IN;
37 __END__
39 my $content = get('https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=DS362220&db=nuccore&report=fasta&fmt_mask=0&maxplex=1&sendto=t&from=121868&to=122268&maxdownloadsize=1000000');
40 #die "Couldn't get it!" unless defined $content;
42 print "[$content]\n";