modified: pixi.toml
[GalaxyCodeBases.git] / released / pIRS.old / pIRS_simulator / extra / make_pure_ref_genome.pl
blob798178bbc9ca083b44d91939328731ac4f8ebfe1
1 #!/usr/bin/perl
2 use strict;
4 #usage: remove the N bases in the sequence, to get a pure sequnence used for simulation
7 my $ori_len = 0;
8 my $pure_len = 0;
9 my $loss_rate = 0;
11 $/=">";<>;$/="\n";
12 while (<>) {
13 my $title = $_;
14 my $seq_name = $1 if($title =~ /^(\S+)/);
16 $/=">";
17 my $seq=<>;
18 chomp $seq;
19 $/="\n";
21 $seq =~ s/\s//g;
22 $ori_len = length($seq);
23 $seq =~ s/[^acgtACGT]//g;
24 $seq = uc($seq);
25 $pure_len = length($seq);
27 Display_seq(\$seq);
28 print ">$title$seq";
31 $loss_rate = ($ori_len - $pure_len) / $ori_len if($ori_len);
33 print STDERR "ori_len:$ori_len\tpure_len:$pure_len\tloss_rate:$loss_rate\n";
37 #display a sequence in specified number on each line
38 #usage: disp_seq(\$string,$num_line);
39 # disp_seq(\$string);
40 #############################################
41 sub Display_seq{
42 my $seq_p=shift;
43 my $num_line=(@_) ? shift : 50; ##set the number of charcters in each line
44 my $disp;
46 $$seq_p =~ s/\s//g;
47 for (my $i=0; $i<length($$seq_p); $i+=$num_line) {
48 $disp .= substr($$seq_p,$i,$num_line)."\n";
50 $$seq_p = ($disp) ? $disp : "\n";
52 #############################################