#! /bin/sh # modified from original code as file names are different: https://github.com/guertinlab/Nascent_RNA_Methods/blob/main/PRO_normalization # put this in your path: normalize_bedGraph.py # put this executable in your path: normalization_factor.R #normalization_factor_v2.R and normalize_bedGraph_v2.py are present in the same directory and they have been modifed accordingly. set -e module load R/4.1.2 export PATH=${PATH}:/home/FCAM/rmukherjee/compartmentModel_project_datasets/Jonkers_et_al_2014 # for normalization_bedgraph_v2.py export PATH=${PATH}:/home/FCAM/rmukherjee/Summer2022Rotation/R_files # for bigWigToBedGraph while getopts "c:" OPTION do case $OPTION in c) chrSizes=$OPTARG ;; esac done bigWigFilesDir=/home/FCAM/rmukherjee/compartmentModel_project_datasets/Jonkers_et_al_2014/fastqfiles cd ${bigWigFilesDir} if [ $chrSizes ]; then Rscript /home/FCAM/rmukherjee/compartmentModel_project_datasets/Jonkers_et_al_2014/normalization_factor.R # modified version of normalization_factor.R from Guertin lab's GitHub #for i in V65_cntrl_rep1_plus.bigWig V65_FP_treat_25min_rep1_plus.bigWig V65_TRP_treat_12min_rep1_plus.bigWig; # name modified for i in V65_cntrl_rep1_plus.bigWig V65_TRP_treat_12min_rep1_plus.bigWig; # name modified do name=$(echo $i | awk -F"_rep1_plus.bigWig" '{print $1}') # name modified printf "%s - processing individual replicates\n" ${name} basename=$(echo $i | awk -F"_" '{print $1}') # count the number of replicates reps=$(ls ${name}_rep?_plus.bigWig | wc -w | bc) # modified name for j in ${name}_rep?_plus.bigWig; do repNum=$(echo $j | awk -F"rep" '{print $NF}' | awk -F"_plus" '{print $1}') # take the num# of replicate invscalePlus=$(grep "${name}_rep${repNum}_plus.bigWig" ${basename}_normalization.txt | awk -F" " '{print $2}' | bc) invscaleMinus=$(grep "${name}_rep${repNum}_minus.bigWig" ${basename}_normalization.txt | awk -F" " '{print $2}' | bc) echo $invscalePlus echo $invscaleMinus # scale to 10 million invScale=$(expr $invscalePlus + $invscaleMinus) scale=$(bc <<< "scale=3 ; 10000000 / $invScale") # normalised to 1e7 reads bigWigToBedGraph ${name}_rep${repNum}_plus.bigWig ${name}_rep${repNum}_plus.bedGraph bigWigToBedGraph ${name}_rep${repNum}_minus.bigWig ${name}_rep${repNum}_minus.bedGraph normalize_bedGraph_v2.py -i ${name}_rep${repNum}_plus.bedGraph -s $scale -o ${name}_rep${repNum}_plus_normalized.bedGraph normalize_bedGraph_v2.py -i ${name}_rep${repNum}_minus.bedGraph -s $scale -o ${name}_rep${repNum}_minus_normalized.bedGraph bedGraphToBigWig ${name}_rep${repNum}_plus_normalized.bedGraph $chrSizes ${name}_rep${repNum}_plus_normalized.bigWig bedGraphToBigWig ${name}_rep${repNum}_minus_normalized.bedGraph $chrSizes ${name}_rep${repNum}_minus_normalized.bigWig done printf "%s - creating and merging normalized replicate bigWigs\n" ${name} plusfiles=$(ls ${name}_rep*_plus_normalized.bigWig) bigWigMerge $plusfiles tmpPlus.bg minusfiles=$(ls ${name}_rep*_minus_normalized.bigWig) bigWigMerge -threshold=-10000000000 $minusfiles tmpMinus.bg # for bigWigMerge utility: -threshold=0.N - don't output values at or below this threshold. Default is 0.0 scaleall=$(bc <<< "scale=4 ; 1.0 / $reps") normalize_bedGraph_v2.py -i tmpPlus.bg -s $scaleall -o ${name}_plus_normalized.bg # scaleall=1/rep# normalize_bedGraph_v2.py -i tmpMinus.bg -s $scaleall -o ${name}_minus_normalized.bg sort -k1,1 -k2,2n ${name}_plus_normalized.bg > ${name}_plus_normalized_sorted.bg sort -k1,1 -k2,2n ${name}_minus_normalized.bg > ${name}_minus_normalized_sorted.bg bedGraphToBigWig ${name}_plus_normalized_sorted.bg $chrSizes ${name}_plus_normalized.bigWig bedGraphToBigWig ${name}_minus_normalized_sorted.bg $chrSizes ${name}_minus_normalized.bigWig rm ${name}_plus_normalized.bg rm ${name}_minus_normalized.bg rm ${name}_plus_normalized_sorted.bg rm ${name}_minus_normalized_sorted.bg rm tmpPlus.bg rm tmpMinus.bg done fi