task QC_Task { File tumorBam File tumorBamIdx File normalBam File normalBamIdx File regionFile File readGroupBlackList File captureNormalsDBRCLZip String caseId String controlId command <<< ############################################### # Copy Number QC for Capture #Make lane lists for tumor and normal #MakeLaneList_12 java -jar /usr/local/bin/MakeLaneList.jar ${tumorBam} ${caseId}.lanelist.txt ; java -jar /usr/local/bin/MakeLaneList.jar ${normalBam} ${controlId}.lanelist.txt ; #make region coverages per lane for tumor #RegionCovPerLane_18 for CHROM in `seq 1 24` ; do echo "Working on $CHROM for ${tumorBam}" SCATTER_DIR="scatter.$CHROM" ; mkdir -v $SCATTER_DIR OUTPATH=`echo -ne "$SCATTER_DIR/chr$CHROM.${caseId}.rcl"` ; echo "OUTPATH is $OUTPATH" echo "chrom is $CHROM" java -jar /usr/local/bin/RegionCovPerLane.jar ${tumorBam} ${regionFile} $OUTPATH $CHROM done ; wait ; /usr/local/bin/rcl.gather.sh ${caseId}.rcl ; rm -rf scatter.* ; #make region coverages per lane for control #RegionCovPerLane_18 for CHROM in `seq 1 24` ; do echo "Working on $CHROM for ${normalBam}" SCATTER_DIR="scatter.$CHROM" ; mkdir -v $SCATTER_DIR OUTPATH=`echo -ne "$SCATTER_DIR/chr$CHROM.${controlId}.rcl"` ; echo "OUTPATH is $OUTPATH" echo "chrom is $CHROM" java -jar /usr/local/bin/RegionCovPerLane.jar ${normalBam} ${regionFile} $OUTPATH $CHROM done ; wait ; /usr/local/bin/rcl.gather.sh ${controlId}.rcl ; rm -rf scatter.* ; #run the mat-lab based report #CopyNumberQCReport_27 cp -vfr /CopyNumberQCReport_27/unzip/* . #Make a file of files paths #This command aims to list the zip files contents, filtering out all but the file paths and then write the paths to a file unzip -l ${captureNormalsDBRCLZip} |sed -r "s/^\s+//g"|sed -r "s/^[0-9]+//g"|sed -r "s/^\s*//g"|sed -r "s/^\S*//g"|sed -r "s/^\s*[0-9:]*\s*//g"|grep -Pv '^Date\s+Time\s+Name'|grep -Pv '^[\-\s]*$'|grep -Pv '\.zip$'|grep -Pv '^files$' > capture_normals_db_wdl #this command actually performs the unzipping unzip ${captureNormalsDBRCLZip} ./run_fh_CopyNumberQCReport.sh /opt/MATLAB/MATLAB_Compiler_Runtime/v710/ ${caseId}_${controlId} \ ${caseId}.rcl ${controlId}.rcl ${caseId}.lanelist.txt ${controlId}.lanelist.txt \ ${readGroupBlackList} ${regionFile} capture_normals_db_wdl NA NA #zip up the output zip CopyNumQCout.zip report.html num.mixups.txt CASE_ID_CONTROL_ID_* >>> runtime { docker: "broadinstitute/eddiesqcdocker" memory: "24 GB" defaultDisks: "local-disk 100 SSD" } output { #lane lists File tumorBamLaneList="${caseId}.lanelist.txt" File normalBamLaneList="${controlId}.lanelist.txt" File tumorRCL="${caseId}.rcl" File normalRCL="${controlId}.rcl" #CopyNumQC File CopyNumQCOutZip="CopyNumQCout.zip" } } task PicardMultipleMetrics { File bam File bamIndex File refFasta File HapMapVCF File picardTargetIntervals File picardGenotypes File picardHapMap File picardBaitIntervals command <<< for BAM in ${bam} ; do BASE=`basename $BAM` ; echo "Processing $BAM ..." ; /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar CollectMultipleMetrics \ CREATE_MD5_FILE=true I=$BAM O=$BASE.multiple_metrics R=${refFasta} PROGRAM=CollectAlignmentSummaryMetrics \ PROGRAM=CollectInsertSizeMetrics PROGRAM=QualityScoreDistribution PROGRAM=MeanQualityByCycle \ PROGRAM=CollectBaseDistributionByCycle PROGRAM=CollectSequencingArtifactMetrics \ PROGRAM=CollectQualityYieldMetrics PROGRAM=CollectGcBiasMetrics ; done ; #zip up reports BASE=`basename ${bam}` ; zip piccard_multiple_metrics.zip $BASE.multiple_metrics.* #collect oxog metrics /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar CollectOxoGMetrics \ I=${bam} \ O=oxoG_metrics.txt \ R=${refFasta} #validation summary /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar ValidateSamFile \ I=${bam} MODE=SUMMARY > validation_summary.txt 2>&1 #validation verbose /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar ValidateSamFile \ I=${bam} MODE=VERBOSE > validation_verbose.txt 2>&1 #cross check #Exception in thread "main" java.lang.IllegalStateException: Haplotype map file must contain header /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar \ CrosscheckReadGroupFingerprints HAPLOTYPE_MAP=${picardHapMap} I=${bam} O="picard_crosscheck_report.txt" #CheckFingerprint /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar CheckFingerprint \ I=${bam} GENOTYPES=${picardGenotypes} HAPLOTYPE_MAP=${picardHapMap} SUMMARY_OUTPUT="fingerprinting_summary_metrics.txt" DETAIL_OUTPUT="fingerprinting_detail_metrics.txt" #HS metrics /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar CollectHsMetrics I=${bam} \ BAIT_INTERVALS=${picardBaitIntervals} TARGET_INTERVALS=${picardTargetIntervals} OUTPUT="HSMetrics.txt" #Duplicates /usr/local/jre1.8.0_73/bin/java -Xmx4g -jar /usr/local/bin/picardtools-2.1.0.jar MarkDuplicates\ I=${bam} O="duplicate_records.bam" METRICS_FILE="duplicate_metrics.txt" >>> runtime { docker: "broadinstitute/eddiesqcdocker" memory: "24 GB" defaultDisks: "local-disk 100 SSD" } output { File fpDetails="fingerprinting_detail_metrics.txt" File fpSummary="fingerprinting_summary_metrics.txt" File hsMetrics="HSMetrics.txt" File picardCCREport="picard_crosscheck_report.txt" File validation_summary="validation_summary.txt" File validation_verbose="validation_verbose.txt" File metricsReportsZip="piccard_multiple_metrics.zip" File oxoG_metrics="oxoG_metrics.txt" File duplicateRecords="duplicate_records.bam" File duplicateMetrics="duplicate_metrics.txt" } } task ContESTTask { File tumorBam File tumorBamIdx File normalBam File normalBamIdx File refFasta File refFastaIdx File refFastaDict File exomeIntervals File SNP6Bed File HapMapVCF command <<< #run contest java -Xmx1024m -Djava.io.tmpdir=/tmp -cp /usr/local/bin/Queue-1.4-437-g6b8a9e1-svn-35362.jar org.broadinstitute.sting.gatk.CommandLineGATK -T Contamination -I:eval ${tumorBam} -I:genotype ${normalBam} -et NO_ET -L ${exomeIntervals} -L ${SNP6Bed} -isr INTERSECTION -R ${refFasta} -l INFO -pf ${HapMapVCF} -o contamination.af.txt --trim_fraction 0.03 --beta_threshold 0.05 -br contamination.base_report.txt -mbc 100 --min_genotype_depth 30 --min_genotype_ratio 0.8 #Get contamination column and value CONTAM_COL=`head -1 contamination.af.txt|tr "\t" "\n"|grep -Pin '^contamination$'|grep -Po '^\d+:'|tr -d ":"` ; CONTAM_VAL=`cut -f $CONTAM_COL contamination.af.txt|tail -1` ; CONTAM_NM=`cut -f1 contamination.af.txt|tail -1 `; #write contam data for validation echo -ne "$CONTAM_NM\t$CONTAM_VAL\n" > contamination_validation.array_free.txt #Contamination validation/consensus python /usr/local/populateConsensusContamination_v26/contaminationConsensus.py --pass_snp_qc false\ --output contest_validation.output.tsv \ --annotation contamination_percentage_consensus_capture\ --array contamination_validation.array_free.txt --noarray contamination_validation.array_free.txt >>> runtime { docker: "broadinstitute/eddiesqcdocker" memory: "24 GB" defaultDisks: "local-disk 100 SSD" } output { File contamDataFile="contamination_validation.array_free.txt" File contestAFFile="contamination.af.txt" File contestBaseReport="contamination.base_report.txt" File validationOutput="contest_validation.output.tsv" } } task CrossCheckLaneFingerprints { File tumorBam File normalBam File tumorBamIdx File normalBamIdx String pairName File HaplotypeDBForCrossCheck String validationStringencyLevel command <<< #ConvertDependentsList[version=8] (custom!) #prepare input for the convert command echo -ne "${pairName}_Normal\t${normalBam}\n${pairName}_Tumor\t${tumorBam}\n" > conv_file.input.tsv #make the options file OPTIONS_FILE=${pairName}_CCLFP.options echo -ne "I=${normalBam}\nI=${tumorBam}\n" > $OPTIONS_FILE ; #Cross Check! #CrosscheckLaneFingerprints[version=9] mkdir -v tmp java -Xmx3g -jar /usr/local/bin/CrosscheckReadGroupFingerprints.jar OPTIONS_FILE=$OPTIONS_FILE H=${HaplotypeDBForCrossCheck} TMP_DIR=`pwd`/tmp QUIET=true EXIT_CODE_WHEN_MISMATCH=0 OUTPUT=crosscheck.stats.txt VALIDATION_STRINGENCY=${validationStringencyLevel} #CrosscheckLaneFingerprintsReport[version=10] #Run report /usr/local/bin/crosscheck_report.pl --stats crosscheck.stats.txt ${normalBam} ${tumorBam} conv_file.input.tsv >>> runtime{ docker: "broadinstitute/eddiesqcdocker" memory: "24 GB" defaultDisks: "local-disk 100 SSD" } output { File crossCheckStats="crosscheck.stats.txt" File crossCheckReport="report.html" } } workflow QC_Workflow { File tumorBam File normalBam File tumorBamIdx File normalBamIdx File regionFile String caseId String controlId File readGroupBlackList File captureNormalsDBRCLZip String SampleName String pairName File refFasta File refFastaIdx File refFastaDict File exomeIntervals File SNP6Bed File HapMapVCF File HaplotypeDBForCrossCheck String validationStringencyLevel File picardGenotypes File picardHapMap File picardBaitIntervals File picardTargetIntervals call QC_Task { input : tumorBam=tumorBam, normalBam=normalBam, tumorBamIdx=tumorBamIdx, normalBamIdx=normalBamIdx, regionFile=regionFile, caseId=caseId, controlId=controlId, readGroupBlackList=readGroupBlackList, captureNormalsDBRCLZip=captureNormalsDBRCLZip } call ContESTTask { input : tumorBam=tumorBam, tumorBamIdx=tumorBamIdx, normalBam=normalBam, normalBamIdx=normalBamIdx, refFasta=refFasta, refFastaIdx=refFastaIdx, refFastaDict=refFastaDict, exomeIntervals=exomeIntervals, SNP6Bed=SNP6Bed, HapMapVCF=HapMapVCF } call CrossCheckLaneFingerprints { input: tumorBam=tumorBam, normalBam=normalBam, tumorBamIdx=tumorBamIdx, normalBamIdx=normalBamIdx, pairName=pairName, HaplotypeDBForCrossCheck=HaplotypeDBForCrossCheck, validationStringencyLevel=validationStringencyLevel } #Piccard call PicardMultipleMetrics as tumorMM { input: bam=tumorBam, bamIndex=tumorBamIdx, refFasta=refFasta, HapMapVCF=HapMapVCF, picardGenotypes=picardGenotypes, picardHapMap=picardHapMap, picardBaitIntervals=picardBaitIntervals, picardTargetIntervals=picardTargetIntervals } call PicardMultipleMetrics as normalMM { input: bam=normalBam, bamIndex=normalBamIdx, refFasta=refFasta, HapMapVCF=HapMapVCF, picardGenotypes=picardGenotypes, picardHapMap=picardHapMap, picardBaitIntervals=picardBaitIntervals, picardTargetIntervals=picardTargetIntervals } }