modified: makefile
[GalaxyCodeBases.git] / tools / genotyping / pl / groupreader.pl
blob10889bf5d840c40dcec3eeee962483931024e8e8
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use lib '/nas/RD_09C/resequencing/soft/lib';
5 #use GalaxyXS::ChromByte;
6 use Data::Dump qw(ddx);
8 unless (@ARGV > 0) {
9 print "perl $0 <group_file> <tmap output> <Out_file>\n";
10 exit 0;
13 my ($in,$mapf,$outf)=@ARGV;
14 my (%MarkerGroup,@MarkerOrder,%MarkerCM);
16 open I,'<',$in or die $!;
18 local $/=">\n";
19 while (<I>) {
20 chomp;
21 my @dat=split /\n/;
22 #print join("|",@dat),"\n";
23 my ($id,$name)=split /=/,shift @dat,2;
24 #print "$id\t$name\n";
25 die "[x]ID confict found !" if exists $MarkerGroup{$id};
26 $MarkerGroup{$id}=[$name,\@dat];
29 close I;
30 #ddx \%MarkerGroup;
32 open T,'<',$mapf or die $!;
33 $_=<T>;
34 die "[x]Not a Tmap output file !" unless /^name/;
35 open O,'>',$outf or die $!;
36 while (<T>) {
37 my ($id,$cm)=split /\s+/; # the file is TSV, but, ID comes with tailing 20h ...
38 my ($name,$datref)=@{$MarkerGroup{$id}};
39 #push @MarkerOrder,$id;
40 #$MarkerCM{$id}=$cm;
41 print O "#$name\n";
42 for (@{$datref}) {
43 print O $_,"\t$cm\n";
45 print O "#\n";
48 __END__
49 ./groupreader.pl deChr12.group deChr12.imo deChr12.cm
50 cat ../chrorder | while read a;do ./groupreader.pl de$a.group de$a.imo de$a.cm;done &
52 http://www.maizemap.org/iMapDB/Overview/Anchoring_Rules.html
53 Anchoring Rules
54 Rules developed to help make unambiguous contig: genetic map assignments
56 Rule 1.
57 One contig should have one position on the integrated map.
59 Rule 2.
60 Contig:marker associations made by hybridization of a probe to a single BAC in the contig will be filtered out before anchoring. Contig:marker associations made by detection of a single BAC by PCR-based analysis of the BAC DNA pools will be accepted if no other conflicts exist for that contig.
62 Rule 3.
63 If two linked markers* hit one shared contig, the contig will be anchored to the position of those markers, even if the markers also hit other contigs.
65 Rule 4.
66 Multiple contigs can be assigned to a single position if they are detected by probes corresponding to closely linked markers.
68 Rule 5.
69 If a marker detects multiple BACs that are uniquely assembled in one contig, and no conflicts exist for that contig, the contig can be assigned to the locus corresponding to that marker.
72 *. The closely linked markers are those that are within 5 marker's positions or within 10 cM with their neighbors on the genetic map meanwhile the markers with same coordinates are considered as same position.