Chapter 12 Solutions to Exercises

12.0.1 Basic Usage

Corresponding exercises: see Section 1.6.1

echo "Hi"  
echo "Alien!"  
echo "Hi"; echo "Alien!"

```
date -R  
```

12.0.2 Files and Folders

Corresponding exercises: see Section 2.5.1

mkdir bashExercises  
cd bashExercises

```
mkdir human chimp  
```

cd human  
touch DNA.txt brain.txt food.txt  
ls

```
cp food.txt ../chimp  
```
```
cd ../chimp  
ls  
```

mv ../human/DNA.txt .  
ls ../human  
ls

```
ls -l  
ls -l ../human  
```
```
rm ../human/brain.txt  
```
```
rm -r ../human  
```

12.0.3 Redirection

Corresponding exercises: see Section 3.5.1

```
echo "completely empty" > brain.txt  
```
```
cat brain.txt  
```

echo "...but still so smart" >> brain.txt

touch heart.txt  
touch soul.txt  
ls > allEmpty.txt

```
echo "23 * 57" | bc  
```

12.0.4 Variable declaration

Corresponding exercises: see Section 4.4.1

```
a=5  
b=18  
echo "$a * $b" | bc  
```

course="Introduction to UNIX"  
chapter="Chapter 4"  
echo "${course}: $chapter!"  
# on a Mac, exclamation marks inside quotes cause issues. Any of these versions work instead:  
echo "${course}: $chapter"'!'  
echo ${course}: $chapter!

message=42  
file="answer.txt"  
echo $message > $file

truth=$message  
message=43  
echo "The truth is $truth not $message"

12.0.5 Conditionals

Corresponding exercises: see Section 5.5.1

x=5  
if [ $x -lt 10 ]; then echo "indeed"; else echo "nope"; fi  
x=10  
#use arrow up to run the if statement again

touch FileA.txt  
mkdir myDir  
that=FileA.txt  
if [ -f $that ]; then echo "is a file"; else if [ -d $that ]; then echo "is a directory"; fi; fi  
that=myDir  
#use arrow up to run the if statement again  
that=FileB.txt  
#use arrow up to run the if statement again

12.0.6 Loops

Corresponding exercises: see Section 5.5.2

for i in bananas mangos ants; do echo $i; done >> food.txt

for i in `seq 1 100`; do echo "banana"; done >> food.txt

Can also be done using while:  
counter=0; while [ 100 -gt $counter ]; do echo "banana" >> food.txt; let counter=counter+1; done

n="1 2 3"  
for i in $n; do head -n $i food.txt; done  
n="10 20 30"  
# use arrow up to rerun the same for loop again

for i in ‘seq 1 100‘; do echo -n "ACGT"; done >> DNA.txt

Can also be done using while:  
counter=0; while [ 100 -gt $counter ]; do echo "ACGT"; let counter=counter+1; done >> DNA.txt

num=1; while read line; do echo "Line $num = $line"; let num=num+1; done < food.txt

for i in `seq 1 10`; do mkdir myDir_$i; done

for d in myDir_*; do echo "I'm in directory $d" > $d/$d.txt; done

mkdir all  
for d in myDir_*; do cd $d; mv * ../all; cd ..; done  
#solution without loop: mv myDir_*/* all

for i in `seq 1 100`; do for j in `seq 1 $i`; do echo "Line $j"; done > File_$i.txt; done

for file in File_*.txt; do n=$(wc -l $file | awk '{print $1}'); echo "This file has $n lines" >> $file; done

i=1; while read line; do echo "File_5.txt, line $i: $line"; let i=i+1; done < File_5.txt

for file in File_*.txt; do  
i=1;  
while read line; do  
echo "$file, line $i: $line";  
let i=i+1;  
done < $file > $file.annotated  
done

ls File_*[2,3].txt > allFiles.txt  
while read line; do head -n7 $line; done < allFiles.txt > someLines.txt

i=0; j=1;  
for k in `seq 1 50`; do echo "$j"; z=$(echo "$i+$j" | bc); i=$j; j=$z; done

12.0.7 Permission systems

Corresponding exercises: see Section 6.9.1

Read permissions: chimp  
Write permissions: None  
Execute permissions: None

Read permissions: chimp, gorilla, alien  
Write permissions: chimp, gorilla  
Execute permissions: chimp

Read permissions: chimp, gorilla, alien  
Write permissions: None  
Execute permissions: alien

12.0.8 Changing Permissions

Corresponding exercises: see Section 6.9.2

echo "I and I alone!" > myPrecious.txt  
chmod -rw myPrecious.txt  
chmod u+rw myPrecious.txt  
ls -l

chmod -rw myPrecious.txt  
chmod g+rw myPrecious.txt  
ls -l  
cat myPrecious.txt  
# Note: you should get a "Permission denied" error, since you as a user do not have read permissions.

rm myPrecious.txt  
# Since you do not have read and write rights but still are the owner of the file, you can delete it, but must confirm.

mkdir "It’s all yours!"  
touch It\'s\ all\ yours\!/yours.txt  
ls "It’s all yours!"  
# Instead of using quotes, you can also escape the spaces and the exclamation mark. E.g.:  
ls It’s\ all\ yours\!

chmod u-x "It’s all yours!"  
cd "It’s all yours!"  
# Note: you should get a "Permission denied" error.  
ls "It’s all yours!"  
# Note: you should be able to see yours.txt, but also get an error that you can not access the directory itself.

chmod u+x "It’s all yours!"  
rm -r "It’s all yours!"  
# Note: you need execution rights to recursively delete a directory! This is because you need to enter the directory first to delete its content.

vim love.sh  
#!/bin/bash  
echo "I like BASH!"  
# Leave vim by pressing Esc, then :wq  
chmod u+x love.sh  
for i in `seq 1 100`; do ./love.sh; done

12.0.9 Writing BASH Scripts

Corresponding exercises: see Section 6.9.3

vim dog.sh  
#!/bin/bash  
echo "Here comes $1, a $2 years old dog of $3 color."  
# Leave vim by pressing Esc, then :wq  
chmod u+x dog.sh  
./dog.sh "Max" "15" "brown"  
./dog.sh "Selma" "5" "blond"

vim like.sh  
#!/bin/bash  
echo "I like $1!"  
# Leave vim by pressing Esc, then :wq  
chmod u+x like.sh  
# you can call your script three times like this:  
./like.sh "biology"  
./like.sh "computer science"  
./like.sh "bioinformatics"  
# or alternatively, you write a for-loop:  
for x in "biology" "computer science" "bioinformatics"; do ./like.sh "$x"; done

vim reasons.sh  
#!/bin/bash  
echo $1 >> whyILikeBASH.txt  
# Leave vim by pressing Esc, then :wq  
chmod u+x reasons.sh  
# you can call your script three times like this:  
./reasons.sh "powerful"  
./reasons.sh "flexible"  
./reasons.sh "fast"  
# or alternatively, you write a for-loop:  
for x in "powerful" "flexible" "flexible"; do ./reasons.sh "$x"; done  
cat whyILikeBASH.txt

vim append.sh  
#!/bin/bash  
echo $1 >> $2  
# Leave vim by pressing Esc, then :wq  
chmod u+x append.sh  
for n in Bern Fribourg Lausanne; do ./append.sh $n words.txt; done  
cat words.txt

vim append2.sh  
#!/bin/bash  
if [ ! -e $2 ]  
then echo "Created by append2.sh" > $2  
fi  
echo $1 >> $2  
# Leave vim by pressing Esc, then :wq  
chmod u+x append2.sh  
for n in hungry thirsty sleepy; do ./append2.sh $n moreWords.txt  
done  
cat moreWords.txt

vim positive.sh  
#!/bin/bash  
if [ $1 -gt 0 ]  
then echo $1 > "${1}.txt"  
fi  
# Leave vim by pressing Esc, then :wq  
chmod u+x positive.sh  
for n in "-10" "0" "10"; do ./positive.sh $n  
done  
ls

#!/bin/bash  
for name in `ls`  
do if [ -d $name ]  
then echo "$name is a directory"  
else echo "$name is a file"  
fi  
done  
# Leave vim by pressing Esc, then :wq  
chmod u+x explain.sh  
mkdir testdir1 testdir2  
touch testfile1 testfile2  
./explain.sh

vim helper.sh  
#!/bin/bash  
(  
echo "#!/bin/bash"  
echo "for i in \`seq 1 \$2\`"  
echo "do echo \$1"  
echo "done"  
) > print.sh  
chmod u+x print.sh  
./print.sh $1 $2  
# Leave vim by pressing Esc, then :wq

chmod u+x helper.sh  
./helper.sh "Gotcha!" 10

12.0.10 Text Files

Corresponding exercises: see Section 7.4.1

for i in `seq 1 1000`; do echo "This is line $i" >> myFile.txt; done

```
more myFile.txt  
less myFile.txt  
```
```
cut -d' ' -f4 myFile.txt  
```

head -n7 myFile.txt > myShortFile.txt  
tail -n7 myFile >> myShortFile.txt

```
wc -l myShortFile  
```
```
cut -d' ' -f1 myFile.txt | tail -n17  
```

echo "A file with numbered lines" > header.txt  
cat myFile.txt >> header.txt

tail -n+2 header.txt > myFiles2.txt  
wc myFile.txt myFile2.txt  
head myFile.txt myFile2.txt  
tail myFile.txt myFile2.txt

```
cmp myFile.txt myFile2.txt  
```

12.0.11 Zipping

Corresponding exercises: see Section 8.5.1

more BanthracisProteome.txt  
less BanthracisProteome.txt

head -n200 BanthracisProteome.txt > short.txt  
gzip short.txt

head -n200 BanthracisProteome.txt | gzip > short.txt.gz

zcat short.txt.gz | head -n100 | gzip > shorter.txt.gz

i=1  
zcat shorter.txt.gz | head -n100 | while read line;  
do echo $line > Line${i}.txt  
let i=i+1  
done

```
tar -czf all.tar Line*.txt  
```

gzip Line*.txt  
tar -cf zipped.tar Line*.txt.gz  
ls -sh *.tar

mkdir all  
cd all  
cp ../all.tar .  
tar -xf all.tar

```
rm -r Line* *.tar all  
```

12.0.12 Warm up

Corresponding exercises: see Section 9.7.1

# Press i to enter insert mode.  
# Then type AGCTTGCGACA...  
# Press Esc to enter command mode.  
# Type :w to save and :q to quit (shorter: type :wq).

```
cat DNA.txt | sed 's/A/G/g'  
```

cat DNA.txt | sed 's/A/G/g' >> DNA.txt  
cat DNA.txt

12.0.13 grep

Corresponding exercises: see Section 9.7.2

```
grep ID BanthracisProteome.txt | less  
```
```
grep -m 10 123 BanthracisProteome.txt  
```

grep -v 123 BanthracisProteome.txt | tail

tail -n+100001 BanthracisProteome.txt | grep -c out

```
grep -ci grep BanthracisProteome.txt  
```

#!/bin/bash  
grep -m10 -i $2 $1 > $3  
grep -i $2 $1 | tail >> $3  
chmod +x extract.sh  
./extract.sh BanthracisProteome.txt ab[1-9] abc.txt  
# There is an option to combine head and tail in one grep call:  
grep -i $2 $1 | tee >(head > $3) | tail >> $3

12.0.14 tr and sed

Corresponding exercises: see Section 9.7.3

grep -m7 F[A-Z]R BanthracisProteome.txt | tr 'A-Z' 'a-z'  
grep -m7 F[A-Z]R BanthracisProteome.txt | sed 'y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdef  
ghijklmnopqrstuvwxyz/'  
Of course there is a regular expression way with sed:  
sed -e 's/\(.*\)/\L\1/'

head BanthracisProteome.txt | tr -s ' '

grep Reviewed BanthracisProteome.txt | tr -d ';' | tr -s ' '> reviewed.txt

```
sed -i 's/Reviewed/tscheggt/g'  
```

sed -i '5,9 s/tschegt/reviewed/' reviewed.txt

12.0.15 Sorting

Corresponding exercises: see Section 9.7.4

cut -f1 -d ' ' reviewed.txt | sort | tail

cut -f4 -d ' ' reviewed.txt | sort -n | tail -n1  
cut -f4 -d ' ' reviewed.txt | sort -n | head -n1

cut -f4 -d' ' reviewed.txt | sort -n | uniq -d | wc -l

cut -f4 -d' ' reviewed.txt | sort -n | uniq -c | sort | tail -n1

seq 100 > numbers.txt  
grep 'KEGG' BanthracisProteome.txt | tr -s ' ' | cut -f3 -d' ' | head -n100 > KEGG.txt  
paste numbers.txt KEGG.txt > combined.txt

shuf combined.txt | head -n50 > shuff.txt  
sort shuff.txt > shuff.sorted  
sort combined.txt > combined.sorted  
join shuff.sorted combined.sorted

12.0.16 Regular expressions

Corresponding exercises: see Section 9.7.5

grep -P "SEQUENCE\s+[0-9]+\s+AA" BanthracisProteome.txt

grep -E "GO:0009[0-9]{3}" BanthracisProteome.txt

grep -E "[0-9]{2}-[A-Z]{3}-[0-9]{4}" BanthracisProteome.txt

for e in groovy.gorilla@jungle.com hey.dude@cool.com secret@cia.com blah.blah.internet no.domain@short; do  
  echo $e | grep -E "[a-zA-Z]+\.[a-zA-Z]+@[a-zA-Z]+\.[a-zA-Z]{2,3}"  
done

echo "My dear Ronald Fisher, I hope you enjoyed reading the book about Thomas Bayes the other day. Kind regards, Gertrude Cox" | grep -E "[A-Z][a-z]+ [A-Z][a-z]+"

echo "To refresh during a boring session, I jumped 234.26 meters from 46.9462873,7.4446943 to 46.9450720,7.4462414." | sed -E 's/([0-9]+\.[0-9]+),([0-9]+\.[0-9]+)/\1°,\2°/g'

echo "My name is Kaa, so trusssst in meeeee" | sed -E 's/([a-z])\1+/\1/g'

echo "My name is Kaa, so trusssst in meeeee" | sed -E 's/(([a-z])\2+)/[\1]/g'

echo "2.5x(y(6.0 + z) + 6.3) - 7.2(5.1 - a) + (1.8 - b)(3.2 + (c - 1.9))" | sed -E 's#\(([0-9a-z\.\+-\*/ ]*\([0-9a-z\.\+-\*/ ]*\)[0-9a-z\.\+-\*\/ ]*)\)#\[\1\]#g'

12.0.17 At the beginning it feels awk-ward

Corresponding exercises: see Section 10.6.1

```
seq 1 100 | tr ' ' '\n' > numbers.txt  
```

awk '$1 >= 107 && $1 <= 121' numbers.txt

```
awk '$1 ~ /100/' numbers.txt  
```

awk '{print $1, log($1)}' numbers.txt > moreNumbers.txt

awk '{if($1 % 2){evenOdd = "odd"} else {evenOdd = "even"} print $1, log($1), evenOdd}' numbers.txt > moreNumbers.txt

wc -l moreNumbers.txt  
awk '{++n}END{print n}' moreNumbers.txt

awk '{++n; first += $1; second += $2}END{print first/n, second/n}' moreNumbers.txt

awk '$3=="odd" {++n; first += $1; second += $2}END{print first/n, second/n}' moreNumbers.txt

 awk '$1>1 {print $2-prev} {prev=$2}' moreNumbers.txt

awk '{if(n==9){print out $2; out = ""; n=0}else{out = out $2 "\t"; ++n}}' moreNumbers.txt

12.0.18 awk on the Banthracis proteome

Corresponding exercises: see Section 10.6.2

awk '$1 =="ID"' BanthracisProteome.txt > prots.txt

awk '{++tot; if($3=="Reviewed;") {++x}} END {print 100*(x/tot), "%"}' prots.txt

#!/bin/bash  
x=$(grep -c "Reviewed" prots.txt)  
tot=$(wc -l prots.txt | cut -f1 -d' ')  
percent=$(echo "scale=5; 100 * $x / $tot" | bc)  
echo "${percent}%"

# solution: 13.36246%

awk '$1=="ID" {tot = tot+$4} END {print tot}' BanthracisProteome.txt

# solution: 1439306

awk '$1=="ID" {print $2, $4}' BanthracisProteome.txt | sort > len.txt  
awk '$1=="ID" && $2!~/[0-9]/ {print $2, $3}' BanthracisProteome.txt | sort > status.txt  
join len.txt status.txt > len_status.txt

# if 1st column is ID, store name \& len and empty the seq variable. If 1st column is SQ, set addseq=1. If addseq=1, add sequence to seq. If 1st column is //, set addseq=0 (stop adding sequences) and print.  
# The order of these commands is crucial. e.g. if the last two conditions are switched, you would add "//" to the seq.  
awk 'BEGIN {addseq=0}; {if ($1 == "ID") {name= $2; len=$4; seq="";} else {if ($1 == "SQ") {addseq=1;} else {if ($1 == "//") {addseq=0; print name, len, seq;} else {if (addseq == 1) {seq = seq $1 $2 $3 $4 $5 $6}}}}}' BanthracisProteome.txt> seq.txt

awk 'BEGIN {test=0}; {if (length($3) != $2) {print "ERR: lengths differ for " $0; test=1;}}; END {if (test == 1) {print "ERR: something went wrong";} else {print "file checked"}}' seq.txt

12.0.19 R and bash

Corresponding exercises: see Section 11.1.1

mu=10; echo "cat(rnorm(1000, $mu))" | R --slave | cut -d" " -f3- | tr " " "\n" | awk '{s+=$0; tot+=1};END {print s/tot}'

#!bin/bash  
wget https://data.geo.admin.ch/ch.meteoschweiz.messwerte-lufttemperatur-10min/ch.meteoschweiz.messwerte-lufttemperatur-10min_en.csv  
cut -d";" -f4,6 ch.meteoschweiz.messwerte-lufttemperatur-10min en.csv | tail -n+2 > cols.csv  
echo ’file <- read.csv("cols.csv", sep = ";"); pdf("altVsTemp.pdf"); plot(file[,2], file[,1],  
xlab = "altitude", ylab = "temperature", main = "altitude vs temperature"); dev.off()’ | R  
--slave  
cat ch.meteoschweiz.messwerte-lufttemperatur-10min en.csv | grep Fribourg | cut -d";" -f4  
rm ch.meteoschweiz.messwerte-lufttemperatur-10min en.csv  
rm cols.csv

awk '$1 == "SQ" {print $3, $5}' BanthracisProteome.txt > lenWeight.txt  
echo 'lenWeight <- read.table("lenWeight.txt"); pdf("lenVsWeight.pdf"); plot(lenWeight[,1],  
lenWeight[,2]); dev.off()' | R --slave  
rm lenWeight.txt

#!/bin/bash  
for go in GO:0005886 GO:0005737 GO:0003677 GO:0005524 GO:0016021; do  
#if $1=ID, save length and set "found" to 0;  
#if $1=DR and $2=GO (go-term found) set "found" to 1;  
#if the end of the protein is reached ($1=//), and a GO-term was found (found=1), print the length.  
awk -vGO=${go} '$1=="ID"{len=$4; found=0}; $1=="DR" && $2~"GO" {found=1}; $1=="//"&&found{print len}' BanthracisProteome.txt > ${go}_length.txt  
echo "lengths <- read.table('${go}_length.txt'); pdf('${go}_length.pdf'); hist(lengths[,1],main = '${go}'); dev.off()" | R --slave;  
done