GATK's Pipeline for calling variants with rnaseq data

#!/usr/bin/sh

genome=/home/chenfan/mnt/sdc/RNAseq/hs37d5.fasta

refgene=/home/chenfan/mnt/sdc/RNAseq/hg19_Refgene.gtf

picard=/home/chenfan/mnt/sdc/RNAseq/picard.jar

gatk=/home/chenfan/mnt/sdc/RNAseq/GenomeAnalysisTK.jar

knownindel=/home/chenfan/VQSR/data/Mills_and_1000G_gold_standard.indels.b37.vcf

knownsnp=/home/chenfan/VQSR/data/1000G_phase1.snps.high_confidence.b37.vcf

result_path=/home/chenfan/mnt/sdc/RNAseq/result

#------------------------------STAR 2-pass alignment steps-----------------------------------------

echo "------------------------------STAR 2-pass alignment steps-----------------------------------------"

in_dir=${result_path}/1_bamtofastq_output

out_dir=${result_path}/2_star_output

cd ${out_dir}/1index

STAR --runMode genomeGenerate --genomeDir ${out_dir}/1index --genomeFastaFiles $genome --sjdbGTFfile $refgene --runThreadN 30

cd ${out_dir}/1pass

STAR --genomeDir ${out_dir}/1index --readFilesIn ${in_dir}/1.fastq  ${in_dir}/2.fastq --runThreadN 30

cd ${out_dir}/2index

STAR --runMode genomeGenerate --genomeDir ${out_dir}/2index --genomeFastaFiles $genome --sjdbFileChrStartEnd ${out_dir}/1pass/SJ.out.tab --sjdbOverhang 154 --runThreadN 30

cd ${out_dir}/2pass

STAR --genomeDir ${out_dir}/2index --readFilesIn ${in_dir}/1.fastq ${in_dir}/2.fastq  --runThreadN 30

#------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------

echo "------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/3_picard_output

cd ${out_dir}

java -jar $picard AddOrReplaceReadGroups I=${in_dir}/2pass/Aligned.out.sam O=${out_dir}/sort_addRG_align.bam SO=coordinate RGID=hap1 RGLB=rnaseq RGPL=illumina RGPU=runname RGSM=C20

java -jar $picard MarkDuplicates I=${out_dir}/sort_addRG_align.bam O=${out_dir}/markdup_sort_addRG_align.bam  CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=${out_dir}/output.metrics

#-------------------------------SplitNCigarReads---------------------------------------

echo "------------------------------SplitNCigarReads-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/4_splitncigarReads_output

cd ${out_dir}

java -jar $gatk -T SplitNCigarReads -R $genome -I ${in_dir}/markdup_sort_addRG_align.bam -o ${out_dir}/split_markdup_sort_addRG_align.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS

#-------------------------------Indel Realignment ---------------------------------------

echo "------------------------------Indel Realignment -----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/5_indelrealign_output

cd ${out_dir}

java -jar $gatk \

-T RealignerTargetCreator \

-nt 32 \

-R $genome \

-I ${in_dir}/split_markdup_sort_addRG_align.bam \

-o ${out_dir}/realign_interval.list \

-known $knownindel

java -jar $gatk \

-T IndelRealigner \

-R $genome \

-I ${in_dir}/split_markdup_sort_addRG_align.bam \

-known $knownindel \

-o ${out_dir}/realign_split_markdup_sort_addRG_align.bam \

-targetIntervals ${out_dir}/realign_interval.list

#-------------------------------Base Recalibration---------------------------------------

echo "------------------------------Base Recalibration-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/6_bqsr_output

cd ${out_dir}

java -jar $gatk \

-T BaseRecalibrator \

-nct 32 \

-R $genome \

-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \

-knownSites $knownsnp \

-knownSites $knownindel \

-o ${out_dir}/recal_data.table

java -jar $gatk \

-T PrintReads \

-nct 32 \

-R $genome \

-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \

-BQSR ${out_dir}/recal_data.table \

-o ${out_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam

#-------------------------------Variant calling---------------------------------------

echo "------------------------------Variant calling-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/7_callvariants_output

cd ${out_dir}

java -jar $gatk \

-T HaplotypeCaller \

-nct 32 \

-R $genome \

-ploidy 1 \

-I ${in_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam \

-dontUseSoftClippedBases \

-stand_call_conf 20.0 \

-o ${out_dir}/Hap1_rnaseq.vcf

#-------------------------------Variant filtering---------------------------------------

echo "------------------------------Variant filtering-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/8_filtervariants_output

cd ${out_dir}

java -jar $gatk \

-T VariantFiltration \

-R $genome \

-V ${in_dir}/Hap1_rnaseq.vcf \

-window 35 \

-cluster 3 \

--filterName FS -filter "FS>30.0" \

--filterName QD -filter "QD<2.0" \

-o ${out_dir}/filtered_Hap1_rnaseq.vcf

java -jar $gatk \

-T SelectVariants \

-ef \

-R $genome \

-V ${out_dir}/filtered_Hap1_rnaseq.vcf \

-o ${out_dir}/ef_Hap1_C20_rnaseq.vcf

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • #!/bin/bash PICARD=/home/lq/storage/call_variants/softwar...
    晨语凡心阅读 990评论 0 3
  • Spring Cloud为开发人员提供了快速构建分布式系统中一些常见模式的工具(例如配置管理,服务发现,断路器,智...
    卡卡罗2017阅读 135,253评论 19 139
  • Introduction What is Bowtie 2? Bowtie 2 is an ultrafast a...
    wzz阅读 5,921评论 0 5
  • 孩子,高考来临之即,我和妈妈不想用“学习砥砺人生,高考决定命运”等这样的口号来教导你。 我深深记得有一位作家...
    晨熙_88f9阅读 1,139评论 0 2
  • 朋友,一生中会分好几个阶段,每个阶段你都会结实新朋友。但请别忘了老朋友。 我从小学到大学,遇到所有的人又有...
    婳_阅读 148评论 0 0