Collects data into data type folders, using symbolic links for storing representation. Data can then be easily transfered from HPC to local for ease of computation.

Z0_collect_data.sh

Arguments

INPUT_DIR

super directory which holds collection of sample folders

OUTPUT_DIR

directory which will hold the links"

Examples


Preset parameters
INDIR=$(readlink -f $1)
OUTDIR=$(readlink -f $2)

echo ""
echo "Beginning collection process on from $INDIR .."
echo ".. Collecting: "
echo ".. - ChIP-seq: bams, bigwigs, macs2, bcp, music"
echo ".. - RNA-seq: kallisto"
echo ""
echo "Placing in: $OUTDIR"
echo ""

mkdir -p $OUTDIR
cd $OUTDIR
mkdir -p bam bigwig macs2 bcp music kallisto

for I in $(ls -d $INDIR/*); do 
 
    #' Easy cases: Sample ID already embedded in file name
    cd $OUTDIR/bam
    if [[ -e $I/bam_clean ]]; then
	for J in $(ls $I/bam_clean/*.bam*); do 
	    ln -s $(readlink -f $J) .
	done
    fi

    cd $OUTDIR/bigwig
    if [[ -e $I/bigwig ]]; then
	for J in $(ls $I/bigwig/*.bw); do 
	    ln -s $(readlink -f $J) .
	done
    fi

    cd $OUTDIR/macs2
    if [[ -e $I/macs2 ]]; then
	for J in $(ls $I/macs2/*Peak); do 
	    ln -s $(readlink -f $J) .
	done
    fi

    cd $OUTDIR/bcp
    if [[ -e $I/bcp ]]; then
	for J in $(ls $I/bcp/*bcp.bed); do 
	    ln -s $(readlink -f $J) .
	done
    fi

    #' Need to embed sample ID as prefix
    cd $OUTDIR/music
    if [[ -e $I/music ]]; then
	for J in $(ls $I/music/chip/ER/*.bed); do 
	    ln -s $(readlink -f $J) $(basename $I)_music_$(basename $J)
	done
    fi
   
    #' Need to link over whole folder
    cd $OUTDIR/kallisto
    if [[ -e $I/kallisto ]]; then
	for J in $(ls -d $I/kallisto); do 
	    ln -s $(readlink -f $J) $(basename $I)_kallisto
	done
    fi

done