new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / PbePopGen / make_ped.pl
blob845d94c9c63325f59f3f86bfe3ee0c6fcfead3e3
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
5 die "This program makes PED file using gzipped fasta files.\nAuther: Woody
6 Usage: $0 <list (column1: sample, column2: file, column3: 1=male|2=female, column4: population)> <output prefix>\n" if @ARGV < 2;
8 open LIST, "<", "$ARGV[0]";
9 open GT, ">", "$ARGV[1].ped";
10 open SNP, ">", "$ARGV[1].pedsnp";
11 open IND, ">", "$ARGV[1].pedind";
13 my @id;
14 my %sex;
15 my %pop;
16 my %file;
17 while (<LIST>) {
18 chomp;
19 my @a = split / /;
20 push @id, $a[0];
21 $sex{$a[0]} = $a[2];
22 $pop{$a[0]} = $a[3];
23 open $file{$a[0]}, "-|", "zcat $a[1]";
25 close LIST;
30 my %chr_gt;
31 my %chr_coo;
32 while (readline $file{$id[0]}) {
33 chomp;
34 s/^>chr//;
35 last unless /^\w\d$/;
36 my $chr0 = $_;
37 my $seq0 = readline $file{$id[0]};
38 chomp $seq0;
39 my $len = length $seq0;
40 my @seq;
41 push @seq, $seq0;
42 for (@id) {
43 next if $_ eq $id[0];
44 my $chr1 = readline $file{$_};
45 chomp $chr1;
46 warn if $chr1 ne ">chr$chr0";
47 my $seq1 = readline $file{$_};
48 push @seq, $seq1;
50 for my $coo (0 .. $len-1) {
51 my @base;
52 my %base;
53 for (@seq) {
54 my $b = substr($_, $coo, 1);
55 push @base, $b;
56 ++$base{$b};
58 my $nb = keys %base;
59 if ($base{N}) {
60 next;
61 # next if $nb == 2;
62 # next if $base{N} > 10;
63 } else {
64 next unless $nb == 2;
66 for (0 .. @id-1) {
67 $chr_gt{$chr0}[$_] .= $base[$_];
69 push @{$chr_coo{$chr0}}, $coo;
73 my %base_gt = ("A","A A", "C","C C", "G","G G", "T","T T", "R","A G", "Y","C T", "M","A C", "K","G T", "S","G C", "W","A T", "N","0 0");
74 for my $i (0 .. @id-1) {
75 close $file{$id[$i]};
76 print IND "0\t$id[$i]\t0\t0\t$sex{$id[$i]}\t$pop{$id[$i]}\n";
77 print GT "0\t$id[$i]\t0\t0\t$sex{$id[$i]}\t$pop{$id[$i]}";
78 for my $c (sort keys %chr_gt) {
79 my $len = length $chr_gt{$c}[$i];
80 for (0 .. $len-1) {
81 my $b = substr $chr_gt{$c}[$i], $_, 1;
82 print GT "\t$base_gt{$b}";
85 print GT "\n";
87 close IND;
88 close GT;
89 my %chr_num = qw / A1 1 A2 2 A3 3 B1 4 B2 5 B3 6 B4 7 C1 8 C2 9 D1 10 D2 11 D3 12 D4 13 E1 14 E2 15 E3 16 F1 17 F2 18 /;
90 for my $c (sort keys %chr_coo) {
91 for (0 .. @{$chr_coo{$c}}-1) {
92 print SNP "$chr_num{$c}\tchr${c}snp";
93 printf SNP "%08d", 1+$_;
94 print SNP "\t0\t$chr_coo{$c}[$_]\n";
97 close SNP;