perl/etc/justonce/fetchNCBI.pl

   1 #!/usr/bin/env perl
   2 use strict;
   3 use warnings;
   4 use Mozilla::CA;
   5 use LWP::Simple;
   6 use Data::Dump qw(ddx);
   7
   8 my $deltra = 200;
   9
  10 my $infile = 'snpcandidatforpcr.out';
  11 my %Genes;
  12 open IN,'<',$infile or die $!;
  13 open OUT,'>','pcr.fa' or die $!;
  14 while (<IN>) {
  15         chomp;
  16         my @dat = split /\t/;
  17         next if @dat < 3;
  18         my (undef,$id) = split /\./,$dat[0];
  19         next if exists $Genes{$id};
  20         ++$Genes{$id};
  21         print "$id, $dat[2], $dat[3]\n";
  22         my ($left,$right) = ($dat[2]-$deltra,$dat[2]+$deltra);
  23         my $url = 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id='.$id.'&db=nuccore&report=fasta&fmt_mask=0&maxplex=1&sendto=t&from='.$left.'&to='.$right.'&maxdownloadsize=1000000';
  24         $url = 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id='.$id.'&db=nuccore&report=fasta&fmt_mask=0&maxplex=1';
  25         #my $content = get($url);
  26         my $content = `curl -s \'$url\'`;
  27         my @ret = split /\n/,$content;
  28         my $head = shift @ret;
  29         $head =~ s/^>//;
  30         #my $out = ">${id}_$dat[2] $head\n" . join('',@ret);
  31         my $out = ">${id} $head\n" . join('',@ret);
  32         print OUT "$out\n\n";
  33 }
  34 close OUT;
  35 close IN;
  36
  37 __END__
  38
  39 my $content = get('https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=DS362220&db=nuccore&report=fasta&fmt_mask=0&maxplex=1&sendto=t&from=121868&to=122268&maxdownloadsize=1000000');
  40 #die "Couldn't get it!" unless defined $content;
  41
  42 print "[$content]\n";