Calls peaks using MUSIC software, following from combining samples into condition/replicate aware directory structures (see D0.combine_by_condition.sh. The provided tab-delimited table used in that step can be reused for this script.

E1_call_peaks_music.sh

Arguments

COND

condition directory from a repdat folder where sample and inputs" are organized based on condition."

TABLE

tab-delimited table specifying the CONDITION_ID, SAMPLE_ID," and INPUT_ID as columns, and a fourth TYPE column for punctate" vs. broad peak-calling, with any additional columns being" ignored. The first line (header) will be ignored."

Details

This script will output a new folder, music, within the condition directory.

This script requires the following:

  • tab separated database specifying condition, sample, and input (NA if none)

  • MUSIC v???

See also

https://github.com/gersteinlab/MUSIC

Examples


## Preset parameters
MAPPABILITY=/home/ra364/Reference/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/mappability_mm10_50bp

##' 0. Read in Parameters
COND=$(readlink -f $1)
TAB=$(readlink -f $2)

##' 0. Echo parameters back
echo ""
echo "Beginning to process your ChIP-seq/ATAC-seq data using MUSIC.."
echo ""
echo "Working on samples in: $COND"
echo ""


##' 1. Routine for all samples within condition

##' Per sample within the sample directory
for I in $(ls -d $COND/samples/*); do 
    PREFIX=$(basename $I)
    SDIR=$(readlink -f $I)

    ##' Reading database table for parameters
    SAMPLEID=$(grep $PREFIX $TAB | awk '{print $2}')
    INPUTID=$(grep $PREFIX $TAB | awk '{print $3}')
    PEAKTYPE=$(grep $PREFIX $TAB | awk '{print $4}')
    SEQTYPE=$(grep $PREFIX $TAB | awk '{print $5}')

    IDIR=$(readlink -f $COND/inputs/$INPUTID)
    IPREFIX=$(basename $IDIR)

    echo ""
    echo ".. Calling peaks for: $PREFIX"

    ##' Make directories 
    echo ".. Make directories.."
    mkdir -p $SDIR/music/chip/preprocess $SDIR/music/chip/sorted $SDIR/music/chip/dedup 
    mkdir -p $SDIR/music/chip/SSER $SDIR/music/chip/ER
    mkdir -p $IDIR/music/input/preprocess $IDIR/music/input/sorted $IDIR/music/input/dedup

    ##' Interpreting database params
    ##' Sample processing
    echo ".. Sample processing.."
    cd $SDIR/music/chip/preprocess
    samtools view $SDIR/bam_clean/${PREFIX}.bam | MUSIC -preprocess SAM stdin $SDIR/music/chip/preprocess

    cd $SDIR/music/chip/sorted
    MUSIC -sort_reads $SDIR/music/chip/preprocess $SDIR/music/chip/sorted

    cd $SDIR/music/chip/dedup
    MUSIC -remove_duplicates $SDIR/music/chip/sorted 2 $SDIR/music/chip/dedup

    ##' Input processing - check if it has already been processed e.g. chr_ids IS zero size
    if [ $INPUTID != "NA" ] && [ ! -s $IDIR/music/input/dedup/chr_ids.txt ]; then
	echo ".. Input processing.."
	cd $IDIR/music/input/preprocess
	samtools view $IDIR/bam_clean/${IPREFIX}.bam | MUSIC -preprocess SAM stdin $IDIR/music/input/preprocess

	cd $IDIR/music/input/sorted
	MUSIC -sort_reads $IDIR/music/input/preprocess $IDIR/music/input/sorted

	cd $IDIR/music/input/dedup
	MUSIC -remove_duplicates $IDIR/music/input/sorted 2 $IDIR/music/input/dedup

	cd $SDIR/music/chip
    fi

    ##' Run MUSIC with default params and auto select of l_p parameter
    ##' Only works for data with INPUT specified and processed
    cd $SDIR/music/chip/SSER
    if [ $INPUTID != "NA" ] && [ -s $IDIR/music/input/dedup/chr_ids.txt ]; then
	##' Check peaktype, run appropriate caller
	if [ $PEAKTYPE == "broad" ]; then
	    echo ".. Using broad setting.."
	    CALLER="-get_optimal_broad_ERs"
	else
	    echo ".. Using punctate setting.."
	    CALLER="-get_optimal_punctate_ERs"
	fi

	echo ".. Run peak caller.."
	run_MUSIC.csh $CALLER $SDIR/music/chip/dedup $IDIR/music/input/dedup $MAPPABILITY
    fi

    ##' Run MUSIC for data without input specified
    if [ $INPUTID == "NA" ]; then
	echo ".. Run peak caller without input.."
	echo ".. Assuming ATAC-seq data, using get_multiscale_punctate_ERs mode.."
	MUSIC -get_multiscale_punctate_ERs -chip $SDIR/music/chip/dedup -mapp $MAPPABILITY -l_mapp 50 
    fi

    ##' Move results and add "chr" to first field
    mv ERs* ../ER 
    cd ../ER
    for J in $(ls ERs*); do
	awk '{OFS = "\t"; FS = "\t"} {$1="chr"$1; print $0}' $J > tmp.txt
	mv tmp.txt $J
    done

    echo ""
done


echo ""
echo "Peak-calling completed."
echo ""