This script follows from R0.process_rna.sh to quantify RNA-seq sequencing data, performing the analysis using Kallisto to quantify transcript-level abundances.

R1_run_kallisto.sh

Arguments

PREFIX

sample directory where fastq files are located in a" directory fastq (thanks to 00.get-samples.sh) "

THREADS

allocated threads for the job; note that memory amt" proportional to number of threads / 20 * 128G (Ruddle)" which ends up being about 6GB per thread"

Details

This script requires the following:

  • samtools 1.3.1, htslib 1.3.1

  • kallisto >=0.43.0

Examples


#' Preset parameters
MEMPERTHREAD=6    # GB; 128 GB of RAM / 20 cores per node
IDX=/home/ra364/Reference/Mus_musculus/Ensembl/GRCm38/Sequence/KallistoIndex/transcripts.idx

#' 0. Read in Parameters
SDIR=$(readlink -f $1)            # Sample directory
PREFIX=$(basename $SDIR)          # prefix for files 
THREADS=$2

#' Reads assignment
#' R1=$SDIR/cutadapt/${PREFIX}_R1.cut.fastq.gz
#' R2=$SDIR/cutadapt/${PREFIX}_R2.cut.fastq.gz
#' NOT WORKING WITH CUTADAPT OUTPUT FOR WHATEVER REASON
R1=$SDIR/raw/${PREFIX}_R1*fastq.gz
R2=$SDIR/raw/${PREFIX}_R2*fastq.gz

#' Check if R2 does not exist -> if true, run in single end mode
if [ ! -f $R2 ]; then
    SINGLE_END=ON
else
    SINGLE_END=OFF
fi



#' 0. Echo parameters back
echo ""
echo "Beginning to process your paired-end RNA-seq data.."
echo ".. The Kallisto Index is: $IDX"
echo ".. Memory per thread is: $MEMPERTHREAD GB"
echo ""
echo "Working on: $SDIR"
echo ".. Using prefix: $PREFIX"
echo ".. Using $THREADS threads"
echo ""

if [ $SINGLE_END == "ON" ]; then
    echo "Using single-end mode.."
    echo ".. R1: $R1"
    echo ""
    echo "Using non-stranded method.."
else 
    echo "Using paired-end mode.."
    echo ".. R1: $R1"
    echo ".. R2: $R2"
    echo ""
    echo "Using non-stranded method.."
#'    echo "Using reverse-stranded method..[--rf-stranded]"
fi
echo ""


#' 1. Run Kallisto 
echo "01. Quantifying RNA-seq with Kallisto.."
mkdir -p $SDIR/kallisto

if [ $SINGLE_END == "ON" ]; then
    echo ".. Assuming fragment-length=200 and sd=20.."
    kallisto quant \
	--single \
	--fragment-length=200 \
	--sd=20 \
	--index=$IDX \
	--output-dir $SDIR/kallisto \
	--bootstrap-samples 100 \
	--bias \
	--threads=$THREADS \
	$R1 $R2 
else
    kallisto quant \
	--index=$IDX \
	--output-dir $SDIR/kallisto \
	--bootstrap-samples 100 \
	--bias \
	--threads=$THREADS \
	$R1 $R2 
fi

#' 2. Success?
echo ""
echo "Pipeline completed."