#!/bin/bash # If you adapt this script for your own use, you will need to set these two variables based on your environment. # SV_DIR is the installation directory for SVToolkit - it must be an exported environment variable. # SV_TMPDIR is a directory for writing temp files, which may be large if you have a large data set. export SV_DIR=`cd .. && pwd` SV_TMPDIR=./whole_tmpdir echo ${SV_DIR} runDir=$3."parallel" bam=$1 gender=$2 DIR="/MGMSTAR1/SHARED/ANALYSIS/RESOURCES/SVToolkit/" sites=$3."parallel.discovery.vcf" genotypes=$3."parallel.genotypes.vcf" echo ${bam} # These executables must be on your path. which java > /dev/null || exit 1 which Rscript > /dev/null || exit 1 which samtools > /dev/null || exit 1 # For SVAltAlign, you must use the version of bwa compatible with Genome STRiP. export PATH=${SV_DIR}/bwa:${PATH} export LD_LIBRARY_PATH=${SV_DIR}/bwa:${LD_LIBRARY_PATH} mx="-Xmx4g" classpath="${SV_DIR}/lib/SVToolkit.jar:${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar:${SV_DIR}/lib/gatk/Queue.jar" SVMASK="/MGMSTAR1/SHARED/ANALYSIS/RESOURCES/SVToolkit/all.svmask.fasta" mkdir -p ${runDir}/logs || exit 1 mkdir -p ${runDir}/metadata || exit 1 # Display version information. java -cp ${classpath} ${mx} -jar ${SV_DIR}/lib/SVToolkit.jar #cmd=java -cp ${classpath} ${mx} echo ${classpath},${mx} # Run preprocessing. # For large scale use, you should use -reduceInsertSizeDistributions, but this is too slow for the installation test. # The method employed by -computeGCProfiles requires a GC mask and is currently only supported for human genomes. java -XX:+UseParallelOldGC -XX:ParallelGCThreads=30 -cp ${classpath} ${mx} \ org.broadinstitute.gatk.queue.QCommandLine \ -S ${SV_DIR}/qscript/SVPreprocess.q \ -S ${SV_DIR}/qscript/SVQScript.q \ -gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \ --disableJobReport \ -cp ${classpath} \ -configFile conf/genstrip_installtest_parameters.txt \ -tempDir ${SV_TMPDIR} \ -R /MGMSTAR1/SHARED/RESOURCES/HG19_GENOME/hg19.fa \ -genomeMaskFile ${SVMASK} \ -ploidyMapFile ${DIR}/Homo_sapiens_assembly19.ploidymap.txt \ -copyNumberMaskFile /MGMSTAR1/SHARED/ANALYSIS/RESOURCES/SVToolkit/all_chr_gcmask.fasta \ -genderMapFile ${gender} \ -runDirectory ${runDir} \ -md ${runDir}/metadata \ -disableGATKTraversal \ -useMultiStep \ -reduceInsertSizeDistributions true \ -computeGCProfiles true \ -computeReadCounts true \ -jobLogDir ${runDir}/logs \ -I ${bam} \ -run \ || exit 1 echo "pre-processing_completed" # Run discovery. java -XX:+UseParallelOldGC -XX:ParallelGCThreads=40 -cp ${classpath} ${mx} \ -XX:ParallelGCThreads=40 \ org.broadinstitute.gatk.queue.QCommandLine \ -S ${SV_DIR}/qscript/SVDiscovery.q \ -S ${SV_DIR}/qscript/SVQScript.q \ -gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \ --disableJobReport \ -cp ${classpath} \ -configFile conf/genstrip_installtest_parameters.txt \ -tempDir ${SV_TMPDIR} \ -R /MGMSTAR1/SHARED/RESOURCES/HG19_GENOME/hg19.fa \ -genomeMaskFile ${SVMASK} \ -genderMapFile ${gender} \ -runDirectory ${runDir} \ -md ${runDir}/metadata \ -disableGATKTraversal \ -jobLogDir ${runDir}/logs \ -minimumSize 100 \ -maximumSize 1000000 \ -suppressVCFCommandLines \ -I ${bam} \ -O $3."entire.deletions.vcf" \ -run \ || exit 1 echo "discovery_Completed" #(grep -v ^##fileDate= ${sites} | grep -v ^##source= | grep -v ^##reference= | diff -q - benchmark/${sites}) \ # || { echo "Error: test results do not match benchmark data"; exit 1; } # Run genotyping on the discovered sites. java -cp ${classpath} ${mx} \ org.broadinstitute.gatk.queue.QCommandLine \ -S ${SV_DIR}/qscript/SVGenotyper.q \ -S ${SV_DIR}/qscript/SVQScript.q \ -gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \ --disableJobReport \ -cp ${classpath} \ -configFile conf/genstrip_installtest_parameters.txt \ -tempDir ${SV_TMPDIR} \ -R /MGMSTAR1/SHARED/RESOURCES/HG19_GENOME/hg19.fa \ -genomeMaskFile ${SVMASK} \ -genderMapFile ${gender} \ -runDirectory ${runDir} \ -md ${runDir}/metadata \ -disableGATKTraversal \ -jobLogDir ${runDir}/logs \ -I ${bam} \ -vcf $3."entire.deletions.vcf" \ -O ${genotypes} \ -run \ || exit 1 #(grep -v ^##fileDate= ${genotypes} | grep -v ^##source= | grep -v ^##contig= | grep -v ^##reference= | diff -q - benchmark/${genotypes}) \ # || { echo "Error: test results do not match benchmark data"; exit 1; }