Chapter 12 Solutions to Exercises

12.0.1 Basic Usage

Corresponding exercises: see Section 1.6.1

  1. echo "Hi"  
    echo "Alien!"  
    echo "Hi"; echo "Alien!"  
  2. date -R  

12.0.2 Files and Folders

Corresponding exercises: see Section 2.5.1

  1. mkdir bashExercises  
    cd bashExercises  
  2. mkdir human chimp  
  3. cd human  
    touch DNA.txt brain.txt food.txt  
    ls  
  4. cp food.txt ../chimp  
  5. cd ../chimp  
    ls  
  6. mv ../human/DNA.txt .  
    ls ../human  
    ls  
  7. ls -l  
    ls -l ../human  
  8. rm ../human/brain.txt  
  9. rm -r ../human  

12.0.3 Redirection

Corresponding exercises: see Section 3.5.1

  1. echo "completely empty" > brain.txt  
  2. cat brain.txt  
  3. echo "...but still so smart" >> brain.txt  
  4. touch heart.txt  
    touch soul.txt  
    ls > allEmpty.txt  
  5. echo "23 * 57" | bc  

12.0.4 Variable declaration

Corresponding exercises: see Section 4.4.1

  1. a=5  
    b=18  
    echo "$a * $b" | bc  
  2. course="Introduction to UNIX"  
    chapter="Chapter 4"  
    echo "${course}: $chapter!"  
    # on a Mac, exclamation marks inside quotes cause issues. Any of these versions work instead:  
    echo "${course}: $chapter"'!'  
    echo ${course}: $chapter!  
  3. message=42  
    file="answer.txt"  
    echo $message > $file  
  4. truth=$message  
    message=43  
    echo "The truth is $truth not $message"  

12.0.5 Conditionals

Corresponding exercises: see Section 5.5.1

  1. x=5  
    if [ $x -lt 10 ]; then echo "indeed"; else echo "nope"; fi  
    x=10  
    #use arrow up to run the if statement again  
  2. touch FileA.txt  
    mkdir myDir  
    that=FileA.txt  
    if [ -f $that ]; then echo "is a file"; else if [ -d $that ]; then echo "is a directory"; fi; fi  
    that=myDir  
    #use arrow up to run the if statement again  
    that=FileB.txt  
    #use arrow up to run the if statement again  

12.0.6 Loops

Corresponding exercises: see Section 5.5.2

  1. for i in bananas mangos ants; do echo $i; done >> food.txt  
  2. for i in `seq 1 100`; do echo "banana"; done >> food.txt
    
    Can also be done using while:  
    counter=0; while [ 100 -gt $counter ]; do echo "banana" >> food.txt; let counter=counter+1; done  
  3. n="1 2 3"  
    for i in $n; do head -n $i food.txt; done  
    n="10 20 30"  
    # use arrow up to rerun the same for loop again  
  4. for i in ‘seq 1 100‘; do echo -n "ACGT"; done >> DNA.txt
    
    Can also be done using while:  
    counter=0; while [ 100 -gt $counter ]; do echo "ACGT"; let counter=counter+1; done >> DNA.txt  
  5. num=1; while read line; do echo "Line $num = $line"; let num=num+1; done < food.txt  
  6. for i in `seq 1 10`; do mkdir myDir_$i; done  
  7. for d in myDir_*; do echo "I'm in directory $d" > $d/$d.txt; done  
  8. mkdir all  
    for d in myDir_*; do cd $d; mv * ../all; cd ..; done  
    #solution without loop: mv myDir_*/* all  
  9. for i in `seq 1 100`; do for j in `seq 1 $i`; do echo "Line $j"; done > File_$i.txt; done  
  10. for file in File_*.txt; do n=$(wc -l $file | awk '{print $1}'); echo "This file has $n lines" >> $file; done  
  11. i=1; while read line; do echo "File_5.txt, line $i: $line"; let i=i+1; done < File_5.txt  
  12. for file in File_*.txt; do  
    i=1;  
    while read line; do  
    echo "$file, line $i: $line";  
    let i=i+1;  
    done < $file > $file.annotated  
    done  
  13. ls File_*[2,3].txt > allFiles.txt  
    while read line; do head -n7 $line; done < allFiles.txt > someLines.txt  
  14. i=0; j=1;  
    for k in `seq 1 50`; do echo "$j"; z=$(echo "$i+$j" | bc); i=$j; j=$z; done  

12.0.7 Permission systems

Corresponding exercises: see Section 6.9.1

  1. Read permissions: chimp  
    Write permissions: None  
    Execute permissions: None  
  2. Read permissions: chimp, gorilla, alien  
    Write permissions: chimp, gorilla  
    Execute permissions: chimp  
  3. Read permissions: chimp, gorilla, alien  
    Write permissions: None  
    Execute permissions: alien  

12.0.8 Changing Permissions

Corresponding exercises: see Section 6.9.2

  1. echo "I and I alone!" > myPrecious.txt  
    chmod -rw myPrecious.txt  
    chmod u+rw myPrecious.txt  
    ls -l  
  2. chmod -rw myPrecious.txt  
    chmod g+rw myPrecious.txt  
    ls -l  
    cat myPrecious.txt  
    # Note: you should get a "Permission denied" error, since you as a user do not have read permissions.  
  3. rm myPrecious.txt  
    # Since you do not have read and write rights but still are the owner of the file, you can delete it, but must confirm.  
  4. mkdir "It’s all yours!"  
    touch It\'s\ all\ yours\!/yours.txt  
    ls "It’s all yours!"  
    # Instead of using quotes, you can also escape the spaces and the exclamation mark. E.g.:  
    ls It’s\ all\ yours\!  
  5. chmod u-x "It’s all yours!"  
    cd "It’s all yours!"  
    # Note: you should get a "Permission denied" error.  
    ls "It’s all yours!"  
    # Note: you should be able to see yours.txt, but also get an error that you can not access the directory itself.  
  6. chmod u+x "It’s all yours!"  
    rm -r "It’s all yours!"  
    # Note: you need execution rights to recursively delete a directory! This is because you need to enter the directory first to delete its content.  
  7. vim love.sh  
    #!/bin/bash  
    echo "I like BASH!"  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x love.sh  
    for i in `seq 1 100`; do ./love.sh; done  

12.0.9 Writing BASH Scripts

Corresponding exercises: see Section 6.9.3

  1. vim dog.sh  
    #!/bin/bash  
    echo "Here comes $1, a $2 years old dog of $3 color."  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x dog.sh  
    ./dog.sh "Max" "15" "brown"  
    ./dog.sh "Selma" "5" "blond"  
  2. vim like.sh  
    #!/bin/bash  
    echo "I like $1!"  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x like.sh  
    # you can call your script three times like this:  
    ./like.sh "biology"  
    ./like.sh "computer science"  
    ./like.sh "bioinformatics"  
    # or alternatively, you write a for-loop:  
    for x in "biology" "computer science" "bioinformatics"; do ./like.sh "$x"; done  
  3. vim reasons.sh  
    #!/bin/bash  
    echo $1 >> whyILikeBASH.txt  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x reasons.sh  
    # you can call your script three times like this:  
    ./reasons.sh "powerful"  
    ./reasons.sh "flexible"  
    ./reasons.sh "fast"  
    # or alternatively, you write a for-loop:  
    for x in "powerful" "flexible" "flexible"; do ./reasons.sh "$x"; done  
    cat whyILikeBASH.txt  
  4. vim append.sh  
    #!/bin/bash  
    echo $1 >> $2  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x append.sh  
    for n in Bern Fribourg Lausanne; do ./append.sh $n words.txt; done  
    cat words.txt  
  5. vim append2.sh  
    #!/bin/bash  
    if [ ! -e $2 ]  
    then echo "Created by append2.sh" > $2  
    fi  
    echo $1 >> $2  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x append2.sh  
    for n in hungry thirsty sleepy; do ./append2.sh $n moreWords.txt  
    done  
    cat moreWords.txt  
  6. vim positive.sh  
    #!/bin/bash  
    if [ $1 -gt 0 ]  
    then echo $1 > "${1}.txt"  
    fi  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x positive.sh  
    for n in "-10" "0" "10"; do ./positive.sh $n  
    done  
    ls  
  7. #!/bin/bash  
    for name in `ls`  
    do if [ -d $name ]  
    then echo "$name is a directory"  
    else echo "$name is a file"  
    fi  
    done  
    # Leave vim by pressing Esc, then :wq  
    chmod u+x explain.sh  
    mkdir testdir1 testdir2  
    touch testfile1 testfile2  
    ./explain.sh  
  8. vim helper.sh  
    #!/bin/bash  
    (  
    echo "#!/bin/bash"  
    echo "for i in \`seq 1 \$2\`"  
    echo "do echo \$1"  
    echo "done"  
    ) > print.sh  
    chmod u+x print.sh  
    ./print.sh $1 $2  
    # Leave vim by pressing Esc, then :wq
    
    chmod u+x helper.sh  
    ./helper.sh "Gotcha!" 10  

12.0.10 Text Files

Corresponding exercises: see Section 7.4.1

  1. for i in `seq 1 1000`; do echo "This is line $i" >> myFile.txt; done  
  2. more myFile.txt  
    less myFile.txt  
  3. cut -d' ' -f4 myFile.txt  
  4. head -n7 myFile.txt > myShortFile.txt  
    tail -n7 myFile >> myShortFile.txt  
  5. wc -l myShortFile  
  6. cut -d' ' -f1 myFile.txt | tail -n17  
  7. echo "A file with numbered lines" > header.txt  
    cat myFile.txt >> header.txt  
  8. tail -n+2 header.txt > myFiles2.txt  
    wc myFile.txt myFile2.txt  
    head myFile.txt myFile2.txt  
    tail myFile.txt myFile2.txt  
  9. cmp myFile.txt myFile2.txt  

12.0.11 Zipping

Corresponding exercises: see Section 8.5.1

  1. more BanthracisProteome.txt  
    less BanthracisProteome.txt  
  2. head -n200 BanthracisProteome.txt > short.txt  
    gzip short.txt  
  3. head -n200 BanthracisProteome.txt | gzip > short.txt.gz  
  4. zcat short.txt.gz | head -n100 | gzip > shorter.txt.gz  
  5. i=1  
    zcat shorter.txt.gz | head -n100 | while read line;  
    do echo $line > Line${i}.txt  
    let i=i+1  
    done  
  6. tar -czf all.tar Line*.txt  
  7. gzip Line*.txt  
    tar -cf zipped.tar Line*.txt.gz  
    ls -sh *.tar  
  8. mkdir all  
    cd all  
    cp ../all.tar .  
    tar -xf all.tar  
  9. rm -r Line* *.tar all  

12.0.12 Warm up

Corresponding exercises: see Section 9.7.1

  1. # Press i to enter insert mode.  
    # Then type AGCTTGCGACA...  
    # Press Esc to enter command mode.  
    # Type :w to save and :q to quit (shorter: type :wq).  
  2. cat DNA.txt | sed 's/A/G/g'  
  3. cat DNA.txt | sed 's/A/G/g' >> DNA.txt  
    cat DNA.txt  

12.0.13 grep

Corresponding exercises: see Section 9.7.2

  1. grep ID BanthracisProteome.txt | less  
  2. grep -m 10 123 BanthracisProteome.txt  
  3. grep -v 123 BanthracisProteome.txt | tail  
  4. tail -n+100001 BanthracisProteome.txt | grep -c out  
  5. grep -ci grep BanthracisProteome.txt  
  6. #!/bin/bash  
    grep -m10 -i $2 $1 > $3  
    grep -i $2 $1 | tail >> $3  
    chmod +x extract.sh  
    ./extract.sh BanthracisProteome.txt ab[1-9] abc.txt  
    # There is an option to combine head and tail in one grep call:  
    grep -i $2 $1 | tee >(head > $3) | tail >> $3  

12.0.14 tr and sed

Corresponding exercises: see Section 9.7.3

  1. grep -m7 F[A-Z]R BanthracisProteome.txt | tr 'A-Z' 'a-z'  
    grep -m7 F[A-Z]R BanthracisProteome.txt | sed 'y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdef  
    ghijklmnopqrstuvwxyz/'  
    Of course there is a regular expression way with sed:  
    sed -e 's/\(.*\)/\L\1/'  
  2. head BanthracisProteome.txt | tr -s ' '  
  3. grep Reviewed BanthracisProteome.txt | tr -d ';' | tr -s ' '> reviewed.txt  
  4. sed -i 's/Reviewed/tscheggt/g'  
  5. sed -i '5,9 s/tschegt/reviewed/' reviewed.txt  

12.0.15 Sorting

Corresponding exercises: see Section 9.7.4

  1. cut -f1 -d ' ' reviewed.txt | sort | tail  
  2. cut -f4 -d ' ' reviewed.txt | sort -n | tail -n1  
    cut -f4 -d ' ' reviewed.txt | sort -n | head -n1  
  3. cut -f4 -d' ' reviewed.txt | sort -n | uniq -d | wc -l  
  4. cut -f4 -d' ' reviewed.txt | sort -n | uniq -c | sort | tail -n1  
  5. seq 100 > numbers.txt  
    grep 'KEGG' BanthracisProteome.txt | tr -s ' ' | cut -f3 -d' ' | head -n100 > KEGG.txt  
    paste numbers.txt KEGG.txt > combined.txt  
  6. shuff combined.txt | head -n50 > shuff.txt  
    sort shuff.txt > shuff.sorted  
    join shuff.sorted combined.txt  

12.0.16 Regular expressions

Corresponding exercises: see Section 9.7.5

  1. grep -P "SEQUENCE\s+[0-9]+\s+AA" BanthracisProteome.txt  
  2. grep -E "GO:0009[0-9]{3}" BanthracisProteome.txt  
  3. grep -E "[0-9]{2}-[A-Z]{3}-[0-9]{4}" BanthracisProteome.txt  
  4. for e in groovy.gorilla@jungle.com hey.dude@cool.com secret@cia.com blah.blah.internet no.domain@short; do  
      echo $e | grep -E "[a-zA-Z]+\.[a-zA-Z]+@[a-zA-Z]+\.[a-zA-Z]{2,3}"  
    done  
  5. echo "My dear Ronald Fisher, I hope you enjoyed reading the book about Thomas Bayes the other day. Kind regards, Gertrude Cox" | grep -E "[A-Z][a-z]+ [A-Z][a-z]+"  
  6. echo "To refresh during a boring session, I jumped 234.26 meters from 46.9462873,7.4446943 to 46.9450720,7.4462414." | sed -E 's/([0-9]+\.[0-9]+),([0-9]+\.[0-9]+)/\1°,\2°/g'  
  7. echo "My name is Kaa, so trusssst in meeeee" | sed -E 's/([a-z])\1+/\1/g'  
  8. echo "My name is Kaa, so trusssst in meeeee" | sed -E 's/(([a-z])\2+)/[\1]/g'  
  9. echo "2.5x(y(6.0 + z) + 6.3) - 7.2(5.1 - a) + (1.8 - b)(3.2 + (c - 1.9))" | sed -E 's#\(([0-9a-z\.\+-\*/ ]*\([0-9a-z\.\+-\*/ ]*\)[0-9a-z\.\+-\*\/ ]*)\)#\[\1\]#g'  

12.0.17 At the beginning it feels awk-ward

Corresponding exercises: see Section 10.6.1

  1. seq 1 100 | tr ' ' '\n' > numbers.txt  
  2. awk '$1 >= 107 && $1 <= 121' numbers.txt  
  3. awk '$1 ~ /100/' numbers.txt  
  4. awk '{print $1, log($1)}' numbers.txt > moreNumbers.txt  
  5. awk '{if($1 % 2){evenOdd = "odd"} else {evenOdd = "even"} print $1, log($1), evenOdd}' numbers.txt > moreNumbers.txt  
  6. wc -l moreNumbers.txt  
    awk '{++n}END{print n}' moreNumbers.txt  
  7. awk '{++n; first += $1; second += $2}END{print first/n, second/n}' moreNumbers.txt  
  8. awk '$3=="odd" {++n; first += $1; second += $2}END{print first/n, second/n}' moreNumbers.txt  
  9.  awk '$1>1 {print $2-prev} {prev=$2}' moreNumbers.txt  
  10. awk '{if(n==9){print out $2; out = ""; n=0}else{out = out $2 "\t"; ++n}}' moreNumbers.txt  

12.0.18 awk on the Banthracis proteome

Corresponding exercises: see Section 10.6.2

  1. awk '$1 =="ID"' BanthracisProteome.txt > prots.txt  
  2. awk '{++tot; if($3=="Reviewed;") {++x}} END {print 100*(x/tot), "%"}' prots.txt  
  3. #!/bin/bash  
    x=$(grep -c "Reviewed" prots.txt)  
    tot=$(wc -l prots.txt | cut -f1 -d' ')  
    percent=$(echo "scale=5; 100 * $x / $tot" | bc)  
    echo "${percent}%"
    
    # solution: 13.36246%  
  4. awk '$1=="ID" {tot = tot+$4} END {print tot}' BanthracisProteome.txt
    
    # solution: 1439306  
  5. awk '$1=="ID" {print $2, $4}' BanthracisProteome.txt | sort > len.txt  
    awk '$1=="ID" && $2!~/[0-9]/ {print $2, $3}' BanthracisProteome.txt | sort > status.txt  
    join len.txt status.txt > len_status.txt  
  6. # if 1st column is ID, store name \& len and empty the seq variable. If 1st column is SQ, set addseq=1. If addseq=1, add sequence to seq. If 1st column is //, set addseq=0 (stop adding sequences) and print.  
    # The order of these commands is crucial. e.g. if the last two conditions are switched, you would add "//" to the seq.  
    awk 'BEGIN {addseq=0}; {if ($1 == "ID") {name= $2; len=$4; seq="";} else {if ($1 == "SQ") {addseq=1;} else {if ($1 == "//") {addseq=0; print name, len, seq;} else {if (addseq == 1) {seq = seq $1 $2 $3 $4 $5 $6}}}}}' BanthracisProteome.txt> seq.txt  
  7. awk 'BEGIN {test=0}; {if (length($3) != $2) {print "ERR: lengths differ for " $0; test=1;}}; END {if (test == 1) {print "ERR: something went wrong";} else {print "file checked"}}' seq.txt  

12.0.19 R and bash

Corresponding exercises: see Section 11.1.1

  1. mu=10; echo "cat(rnorm(1000, $mu))" | R --slave | cut -d" " -f3- | tr " " "\n" | awk '{s+=$0; tot+=1};END {print s/tot}'  
  2. #!bin/bash  
    wget https://data.geo.admin.ch/ch.meteoschweiz.messwerte-lufttemperatur-10min/ch.meteoschweiz.messwerte-lufttemperatur-10min_en.csv  
    cut -d";" -f4,6 ch.meteoschweiz.messwerte-lufttemperatur-10min en.csv | tail -n+2 > cols.csv  
    echo ’file <- read.csv("cols.csv", sep = ";"); pdf("altVsTemp.pdf"); plot(file[,2], file[,1],  
    xlab = "altitude", ylab = "temperature", main = "altitude vs temperature"); dev.off() | R  
    --slave  
    cat ch.meteoschweiz.messwerte-lufttemperatur-10min en.csv | grep Fribourg | cut -d";" -f4  
    rm ch.meteoschweiz.messwerte-lufttemperatur-10min en.csv  
    rm cols.csv  
  3. awk '$1 == "SQ" {print $3, $5}' BanthracisProteome.txt > lenWeight.txt  
    echo 'lenWeight <- read.table("lenWeight.txt"); pdf("lenVsWeight.pdf"); plot(lenWeight[,1],  
    lenWeight[,2]); dev.off()' | R --slave  
    rm lenWeight.txt  
  4. #!/bin/bash  
    for go in GO:0005886 GO:0005737 GO:0003677 GO:0005524 GO:0016021; do  
    #if $1=ID, save length and set "found" to 0;  
    #if $1=DR and $2=GO (go-term found) set "found" to 1;  
    #if the end of the protein is reached ($1=//), and a GO-term was found (found=1), print the length.  
    awk -vGO=${go} '$1=="ID"{len=$4; found=0}; $1=="DR" && $2~"GO" {found=1}; $1=="//"&&found{print len}' BanthracisProteome.txt > ${go}_length.txt  
    echo "lengths <- read.table('${go}_length.txt'); pdf('${go}_length.pdf'); hist(lengths[,1],main = '${go}'); dev.off()" | R --slave;  
    done