3 # Align paired bisulfite-converted DNA reads to a genome.
5 # This assumes that reads1.fastq are all from the converted strand
6 # (i.e. they have C->T conversions) and reads2.fastq are all from the
7 # reverse-complement (i.e. they have G->A conversions).
9 # "GNU parallel" needs to be installed.
15 lastdb -w 2 -u bisulfite_f.seed my_f mygenome.fa
16 lastdb -w 2 -u bisulfite_r.seed my_r mygenome.fa
18 $(basename $0) my_f my_r reads1.fastq reads2.fastq readgroup_id > results.maf
24 # Try to get the LAST programs into the PATH, if they aren't already:
25 PATH
=$PATH:$
(dirname $0)/..
/src
:$
(dirname $0)/..
/scripts
27 tmp
=/scratch
/brentp
/$$
28 trap 'rm -f $tmp.*' EXIT
30 cat > $tmp.fmat
<< 'EOF'
38 cat > $tmp.rmat
<< 'EOF'
46 cat > $tmp.
script << 'EOF'
49 lastal
-m10 -p $1.fmat
-s1 -Q1 -e120 -i1 "$2" "$4" > $t.t1f
50 lastal
-m10 -p $1.rmat
-s0 -Q1 -e120 -i1 "$3" "$4" > $t.t1r
51 last-merge-batches.py
$t.t1f
$t.t1r
> $t.t1
54 lastal
-m10 -p $1.fmat
-s0 -Q1 -e120 -i1 "$2" "$5" > $t.t2f
55 lastal
-m10 -p $1.rmat
-s1 -Q1 -e120 -i1 "$3" "$5" > $t.t2r
56 last-merge-batches.py
$t.t2f
$t.t2r
> $t.t2
59 last-pair-probs.py
-m0.1
$t.t1
$t.t2 |
60 perl
-F'(\s+)' -ane '$F[12] =~ y/ta/CG/ if /^s/ and $s++ % 2; print @F'
64 # use less as it does zless too
65 # Convert C to t, and all other letters to uppercase:
66 perl
-pe 'y/Cca-z/ttA-Z/ if $. % 4 == 2' <(less $3) |
split -l1800000 -a5 - $tmp.1
68 # Convert G to a, and all other letters to uppercase:
69 perl
-pe 'y/Gga-z/aaA-Z/ if $. % 4 == 2' <(less $4) |
split -l1800000 -a5 - $tmp.2
71 parallel
--noswap -j 4 --gnu --xapply sh
$tmp.
script $tmp "$1" "$2" ::: $tmp.1* ::: $tmp.2* > $tmp.merge.maf
74 maf-convert.py sam
-d $tmp.merge.maf
-r "ID:$rg SM:$rg"