This script follows from A0.get_samples.sh to process ChIP-seq/ATAC-seq sequencing data, including adapter trimming, alignment, duplicate removal, and quality control calculation comparing raw vs. processed.

B0_PE_chip_atac_process.sh

Arguments

PREFIX

sample directory where fastq files are located in a" directory fastq (thanks to 00.get-samples.sh) "

THREADS

allocated threads for the job; note that memory amt" proportional to number of threads / 20 * 128G (Ruddle)" which ends up being about 6GB per thread"

Details

This script requires the following:

  • cutadapt 1.10

  • bowtie2 2.2.9

  • samtools 1.3.1, htslib 1.3.1

  • fastqc 0.11.2

  • quip 1.1.8

Examples


#' Usage statement
if [ $# -eq 0 ] || [ $1 == "-h" ] || [ $1 == "--help" ]; then
    echo "Usage: $0  "
    echo ""
    echo "  Processes Paired-End ChIP-seq and ATAC-seq data, following from the"
    echo "acquisition of said data via 00.get-samples.sh. This pipeline includes"
    echo "adapter trimming, alignment, duplicate removal, and QC calculations."
    echo ""
    echo ""
    exit 1
fi

#' Preset parameters
MEMPERTHREAD=6    # GB; 128 GB of RAM / 20 cores per node
GENOMEDIR=/home/ra364/Reference/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/genome
TRUSEQ_LEFT=AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
TRUSEQ_RIGHT=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
BLACKLIST=/home/ra364/Reference/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/mm10_blacklist_ENSEMBL.bed

#' 0. Read in Parameters
SDIR=$(readlink -f $1)            # Sample directory
PREFIX=$(basename $SDIR)          # prefix for files 
THREADS=$2

#' 0. Echo parameters back
echo ""
echo "Beginning to process your paired-end ChIP-seq/ATAC-seq data.."
echo ".. The Bowtie2 Index is: $GENOMEDIR"
echo ".. Memory per thread is: $MEMPERTHREAD GB"
echo ".. Cutting the TruSeq Adapters (see cutadapt documentation)"
echo ".. Filtering reads from the blacklist: $BLACKLIST"
echo ""
echo "Working on: $SDIR"
echo ".. Using prefix: $PREFIX"
echo ".. Using $THREADS threads"
echo ""


#' 1. Concatenate fastq files into R1 and R2
echo "01. Concatenating fastq files together.."
mkdir -p $SDIR/raw
cd $SDIR
# cat fastq/*R1*.fastq.gz > raw/${PREFIX}_R1.fastq.gz
# cat fastq/*R2*.fastq.gz > raw/${PREFIX}_R2.fastq.gz
cat <(quip --stdout fastq/*R1*.fastq.qp) <(gunzip --stdout fastq/*R1*.fastq.gz) | gzip - > raw/${PREFIX}_R1.fastq.gz
cat <(quip --stdout fastq/*R2*.fastq.qp) <(gunzip --stdout fastq/*R2*.fastq.gz) | gzip - > raw/${PREFIX}_R2.fastq.gz


#' 2. Cut adapters (TruSeq)
echo "02. Cutting adapters and trimming poor quality ends.."
mkdir -p $SDIR/cutadapt
cd $SDIR/cutadapt

cutadapt \
    -a $TRUSEQ_LEFT -A $TRUSEQ_RIGHT \
    --quality-cutoff=30,20 \
    -o ${PREFIX}_R1.cut.fastq -p ${PREFIX}_R2.cut.fastq \
    ../raw/${PREFIX}_R1.fastq.gz ../raw/${PREFIX}_R2.fastq.gz > ${PREFIX}_cutadapt-report.txt

echo ".. Zipping up the cut and trimmed sequences.."
gzip -f ${PREFIX}_R1.cut.fastq
gzip -f ${PREFIX}_R2.cut.fastq


#' 3. Alignment with Bowtie2
echo "03. Aligning with Bowtie2..this will take a while.."
mkdir -p $SDIR/bowtie2
cd $SDIR/bowtie2

bowtie2 \
    --threads $THREADS \
    --very-sensitive \
    --maxins 2000 \
    --no-discordant \
    --met-file ${PREFIX}_metrics.log \
    -x $GENOMEDIR \
    -1 ../cutadapt/${PREFIX}_R1.cut.fastq.gz \
    -2 ../cutadapt/${PREFIX}_R2.cut.fastq.gz 2> tmp_summary.log | samtools view -bS - -o ${PREFIX}_bt2.bam 

echo ".. Cleaning summary file.."
grep -v Warning tmp_summary.log > ${PREFIX}_summary.log

#' 4. Samtools pipeline
echo "04. Processing with samtools to sort, fixmates, remove dups, and index.."
mkdir -p $SDIR/samtools
cd $SDIR/samtools 

echo ".. Sorting by name and fixing mates.."
samtools sort -n -@ $THREADS -m $(($MEMPERTHREAD * $THREADS))G -T tmp -O sam ../bowtie2/${PREFIX}_bt2.bam | \
    samtools fixmate -r - fixm.bam

echo ".. Sorting by coordinates.."
samtools sort -@ $THREADS -m $(($MEMPERTHREAD * $THREADS))G -T tmp -O bam fixm.bam > fixms.bam

echo ".. Removing blacklisted regions.."
samtools view fixms.bam -b -h -o ${PREFIX}_in-blacklist.bam -U fixmsb.bam -L $BLACKLIST

echo ".. Removing duplicates.."
samtools rmdup fixmsb.bam ${PREFIX}_bt2-st.bam > /dev/null 2>&1

echo ".. Indexing.."
samtools index ${PREFIX}_bt2-st.bam

echo ".. Cleaning up.."
rm fixm.bam; rm fixms.bam; rm fixmsb.bam

echo ".. Calculating samtools statistics.."
samtools flagstat ${PREFIX}_bt2-st.bam > ${PREFIX}_bt2-st_flagstat.log
samtools idxstats ${PREFIX}_bt2-st.bam > ${PREFIX}_bt2-st_idxstats.log


#' 5. Quality Control
echo "05. Running FastQC on raw and processed data.."
mkdir -p $SDIR/FastQC
cd $SDIR/FastQC
mkdir -p fqc_raw; mkdir -p fqc_bt2-st

echo ".. Processing raw data.."
zcat ../raw/${PREFIX}*fastq.gz | fastqc stdin -o fqc_raw --noextract -t $THREADS
mv fqc_raw/stdin_fastqc.zip ${PREFIX}_raw_fastqc.zip

echo ".. Processing processed data.."
fastqc ../samtools/${PREFIX}_bt2-st.bam -o fqc_bt2-st --noextract -t $THREADS


#' 6. Success?
echo ""
echo "Pipeline completed."