#on tuber
#using tassel 4

#run once
####################################################
#get scripts from github
cd /export/home/jeremy/programs
git clone https://github.com/jeremyde/shell_scripts.git
#symlink and bwa index reference
mkdir /export/home/jeremy/gbs/bwa_indexed/Manihot_esculenta/
cd /export/home/jeremy/gbs/bwa_indexed/Manihot_esculenta/
ln -s /export/species/Manihot_esculenta/AM560-2/assembly/Mesculenta_147_hardmasked.fa .
ln -s /export/species/Manihot_esculenta/AM560-2/assembly/Mesculenta_147_hardmasked.fa.gz .
bwa index -a is Mesculenta_RM.fa.gz
####################################################

#Started on 11-15-13
export PATH="/export/home/jeremy/programs/tassel4-standalone":$PATH
export PATH="/export/home/jeremy/programs/vcftools_0.1.11/perl":$PATH
export PERL5LIB="/export/home/jeremy/programs/vcftools_0.1.11/perl":$PERL5LIB
mkdir /export/home/jeremy/gbs/gbs_11-15-13
cd /export/home/jeremy/gbs/gbs_11-15-13
mkdir reference ;mkdir fastq; mkdir tagCounts; mkdir mergedTBT; mkdir tbtbyte; mkdir mergedTagCounts; mkdir topm; mkdir vcf; mkdir vcf_merged; mkdir hmp
ln -s /export/home/jeremy/gbs/bwa_indexed/Manihot_esculenta/* reference/
ln -s /export/species/Manihot_esculenta/gbs/fastq/* fastq/
cp /export/species/Manihot_esculenta/gbs/keyfiles/KeyfileIITA100713.txt keyfile.txt
/export/home/jeremy/programs/shell_scripts/each_in_own_dir.sh fastq/
/export/home/jeremy/programs/shell_scripts/gbs/fastq_to_tag_count.sh fastq/ tagCounts/ keyfile.txt apeki
#runs for about 45 minutes ^
screen -d -L -m -S merge_mult run_pipeline.pl -fork1 -MergeMultipleTagCountPlugin -i tagCounts -o mergedTagCounts/cassava.cnt -c 50 -endPlugin -runfork1
screen -d -L -m -S merge_mult_t run_pipeline.pl -fork1 -MergeMultipleTagCountPlugin -i tagCounts -o mergedTagCounts/cassava.cnt -c 50 -t -endPlugin -runfork1
/export/home/jeremy/programs/shell_scripts/gbs/fastq_to_tbt.sh fastq/ tbtbyte/ keyfile.txt apeki mergedTagCounts/cassava.cnt
#runs for about 1 hour ^^
screen -d -L -m -S merge_tags_by_taxa run_pipeline.pl -fork1 -MergeTagsByTaxaFilesPlugin -x -i tbtbyte -o mergedTBT/cassava.tbt.byte -endPlugin -runfork1
#runs for about 4 hours ^
bwa aln -t 11 reference/Mesculenta_147_hardmasked.fa.gz mergedTagCounts/cassava.cnt.fq > mergedTagCounts/cassava.sai
bwa samse reference/Mesculenta_147_hardmasked.fa.gz mergedTagCounts/cassava.sai mergedTagCounts/cassava.cnt.fq > mergedTagCounts/cassava.sam
screen -d -L -m -S SAM_convert run_pipeline.pl -fork1 -SAMConverterPlugin -i mergedTagCounts/cassava.sam -o topm/cassava.topm.bin -endPlugin -runfork1
/export/home/jeremy/programs/shell_scripts/gbs/split_discoverySNP.sh ./mergedTBT/cassava.tbt.byte topm/cassava.topm.bin ./hmp/GBSGenos.chr+.hmp.txt 0.001 0.4 reference/Mesculenta_147_hardmasked.fa 1 12977 14
#runs for about 3 hours ^
/export/home/jeremy/programs/shell_scripts/gbs/merge_dup.sh hmp/GBSGenos.chr+.vcf hmp/GBSGenos.merged+.vcf 1 12977 14
#runs for about 1.5 hours ^
mv vcf/GBSGenos.merged* vcf_merged
cd vcf_merged
find . -size 14k -exec rm {} \;
vcf-concat GBSGenos.merged* > File.vcf
#runs for about 0.5 Hours ^
cat File.vcf | vcf-annotate -f +/c=3,10/d=4180 -H >File.filtered.vcf
#runs for about 2 hours ^
cat File.filtered.vcf | awk -f /export/home/jeremy/programs/shell_scripts/gbs/snp_and_indel_sep.awk
mv snv.vcf File.filtered.snp_only.vcf
gzip File.filtered.snp_only.vcf