diff --git a/.gitattributes b/.gitattributes index 5ffe9c3c9..bb05dd6ce 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ page/* linguist-vendored +* text=auto \ No newline at end of file diff --git a/.github/workflows/DeployPage.yml b/.github/workflows/DeployPage.yml index 1a93563bc..aa680074d 100644 --- a/.github/workflows/DeployPage.yml +++ b/.github/workflows/DeployPage.yml @@ -38,16 +38,23 @@ jobs: # `@def prepath = "YourPackage.jl/web"` - run: julia -e ' using Pkg; - Pkg.add(["NodeJS", "JSON", "Franklin", "PkgPage"]); + Pkg.add(["NodeJS", "JSON", "Franklin"]); using NodeJS; - run(`$(npm_cmd()) install highlight.js purgecss`); - import PkgPage; - PkgPage.optimize(input="website", output="Fall20"); - ls; - pwd;' + cd("website"); + run(`$(npm_cmd()) install highlight.js`); + using Franklin; + optimize(); + mkdir("__site2"); + mv("__site", "__site2/Fall20"); + write("__site2/index.html", """ + + + + """);' - name: Build and Deploy uses: JamesIves/github-pages-deploy-action@releases/v3 with: SSH: true BRANCH: gh-pages - FOLDER: website/__site + FOLDER: website/__site2 diff --git a/README.md b/README.md index 66603e731..efee82343 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # 18.S191: Introduction to computational thinking for real-world problems -

Go to Lectures and Problem Sets :balloon:

+

Go to course website :balloon:

-## Fall 2020 (9 Units) +Welcome to the new **18.S191** course! -Welcome to the new MIT course 18.S191! +This is an introductory course on Computational Thinking. We use the [Julia programming language](http://www.julialang.org) to approach real-world problems in varied areas applying data analysis and computational and mathematical modeling. In this class you will learn computer science, software, algorithms, applications, and mathematics as an integrated whole. -This is an introductory course on Computational Thinking, using the modern [Julia programming language](http://www.julialang.org), to approach real-world problems in the following areas via data analysis and computational and mathematical modeling. In this class you will learn applications, computer science, software, algorithms, and mathematics as an integrated whole. +Topics include: - Image analysis - Particle dynamics and ray tracing @@ -14,23 +14,27 @@ This is an introductory course on Computational Thinking, using the modern [Juli - Climate modeling -### Course Materials -Course materials will be posted [here](course-materials.md). - -Julia learning materials may be found [here](http://www.julialang.org/learning) - ## Professors -[Alan Edelman](http://math.mit.edu/~edelman), [David P. Sanders](http://sistemas.fciencias.unam.mx/~dsanders/), [Grant Sanderson](https://www.3blue1brown.com/about), & [James Schloss](https://eapsweb.mit.edu/people/jars) +[Alan Edelman](http://math.mit.edu/~edelman), [David P. Sanders](http://sistemas.fciencias.unam.mx/~dsanders/), [Grant Sanderson](https://www.3blue1brown.com/about), [James Schloss](https://eapsweb.mit.edu/people/jars), [Benoit Forget](https://web.mit.edu/nse/people/faculty/forget.html) + +### Course Material + +The course material includes **recorded lectures** and **Pluto notebooks**, these are available on the [course website](https://computationalthinking.mit.edu/Fall20), which will be updated regularly. + +Every week comes with a problem set where you apply the material to a real problem. Homework sets are **interactive [Pluto notebooks](https://github.com/fonsp/Pluto.jl)** - you get automatic visualizations and verification while you work on your code. + +Julia learning materials may be found [here](http://www.julialang.org/learning) ## Logistics -TR 2:30–3:30pm, online -- Tuesdays: Prerecorded videos, released on YouTube (link to follow later) +TR 2:30–3:30pm Eastern, online (Go to the lecture page on this site to stream it.) + +- Tuesdays: Prerecorded videos, released on YouTube and played live on this site. - Thursdays: Live sessions (same YouTube link 2:30–3) and MIT-only discussion (3-3:30); link to follow @@ -39,14 +43,12 @@ Start date: September 1, 2020 Office hours TBD. - - ### Discussion forum and homework submission -- [Discord](https://discord.gg/amhdfp): discussion +- [Discord](https://discord.gg/Z5qnVf8): discussion (we encourage you to hang out here during class!) -- [Piazza](https://piazza.com/class/kd33x1xnfyq3b1): allows for anonymity to other students, discussion +- [Piazza](https://piazza.com/class/kd33x1xnfyq3b1): (MIT only) allows for anonymity to other students, discussion -- [Canvas](https://canvas.mit.edu/courses/5637): homework submission +- [Canvas](https://canvas.mit.edu/courses/5637): (MIT only) homework submission. If you're a non-MIT student, please find a partner to cross-grade homeworks via Discord. ### Evaluation @@ -57,7 +59,10 @@ Office hours TBD. * No exams -Problem sets consist of coding and will be submitted online via Canvas. +Problem sets consist of code. MIT students enrolled in the course must submit homeworks via Canvas. If you are not a student then we encourage you to join the Discord and find a cross-grading partner. + -

Go to Lectures and Problem Sets :balloon:

+

Go to course website :balloon:

diff --git a/course-materials.md b/course-materials.md deleted file mode 100644 index 7f517d4de..000000000 --- a/course-materials.md +++ /dev/null @@ -1,38 +0,0 @@ -

18.S191 Fall 2020 Course materials

-

Introduction to Computational Thinking with Application to Real-World Problems

- -

Homeworks

-These are in the form of Pluto notebooks. Homeworks will be released on Thursdays and due on Thursdays 11:59pm Eastern time. - -HW0 is for you to get your system set up correctly and to test our grading software. You must submit it but it will not count towards your grade. - -|Homework|Due date|Link|Resources| -|--|--|--|--| -|HW0 (not graded) | Thursday 3 September | | | - - -[md:setup]:https://github.com/mitmath/18S191/blob/master/homework/homework0/Installing%20Julia%20%2B%20Pluto.md -[jl:hw0]:https://github.com/mitmath/18S191/blob/master/homework/homework0/hw0.jl -[html:hw0]:https://htmlpreview.github.io/?https://github.com/mitmath/18S191/blob/master/homework/homework0/hw0.html - -

Lectures

- -Lectures will be uploaded to the Julia youtube channel. View the [full playlist here](https://www.youtube.com/playlist?list=PLP8iPy9hna6Q2Kr16aWPOKE0dz9OnsnIJ). - - - -| Lecture | Date | Extras | Lectures | -| ----------- | ----------- | ---------------------------------------------------- | ---------------------- | -| 1 | September 1 | | | - - - - -[video:1intros]:https://www.youtube.com/watch?v=vxjRWtWoD_w -[video:1ted]:https://www.youtube.com/watch?v=qGW0GT1rCvs&list=PLP8iPy9hna6Q2Kr16aWPOKE0dz9OnsnIJ&index=6&t=0s -[video:1pluto]:https://www.youtube.com/watch?v=OOjKEgbt8AI - -[video:1images]:https://www.youtube.com/watch?v=DGojI9xcCfg -[jl:1images]:https://github.com/mitmath/18S191/blob/master/lecture_notebooks/Lecture%201%20-%20Images.jl -[video:1arrays]:https://www.youtube.com/watch?v=foN1_hAGfNg&list=PLP8iPy9hna6Q2Kr16aWPOKE0dz9OnsnIJ&index=4&t=0s - diff --git a/homework/homework1/hw1.html b/homework/homework1/hw1.html new file mode 100644 index 000000000..16de602bf --- /dev/null +++ b/homework/homework1/hw1.html @@ -0,0 +1,773 @@ + + + + + ⚡ Pluto.jl ⚡ + + + + + + + + + + + + +

homework 1, version 3

+
12.4 μs

Submission by: Jazzy Doe (jazz@mit.edu)

+
11.1 ms

Homework 1 - convolutions

+

18.S191, fall 2020

+

This notebook contains built-in, live answer checks! In some exercises you will see a coloured box, which runs a test case on your code, and provides feedback based on the result. Simply edit the code, run it, and the check runs again.

+

For MIT students: there will also be some additional (secret) test cases that will be run as part of the grading process, and we will look at your notebook and write comments.

+

Feel free to ask questions!

+
17.5 μs
student
8.3 ms

Let's create a package environment:

+
8 μs
143 ms

We set up Images.jl again:

+
7.7 μs
101 s





2 μs

Exercise 1 - Manipulating vectors (1D images)

+

A Vector is a 1D array. We can think of that as a 1D image.

+
10.6 μs
example_vector
8.7 μs
360 ms

Exerise 1.1

+

👉 Make a random vector random_vect of length 10 using the rand function.

+
8.5 μs
random_vect
missing
6.1 μs
239 μs

Here we go!

Replace missing with your answer.

+ +
+
30.5 ms

Hint

You can find out more about any function (like rand) by creating a new cell and typing:

+
?rand
+

Once the Live Docs are open, you can select any code to learn more about it. It might be useful to leave it open all the time, and get documentation while you type code.

+ +
+
121 ms

👉 Make a function mean using a for loop, which computes the mean/average of a vector of numbers.

+
7 μs
mean (generic function with 1 method)
24.6 μs
missing
196 μs

Here we go!

Replace missing with your answer.

+ +
+
16.9 μs

👉 Define m to be the mean of random_vect.

+
13.2 μs
m
missing
2.7 μs

Here we go!

Replace missing with your answer.

+ +
+
13.2 μs

👉 Write a function demean, which takes a vector x and subtracts the mean from each value in x.

+
12.9 μs
demean (generic function with 1 method)
29 μs

Let's check that the mean of the demean(random_vect) is 0:

+

Due to floating-point round-off error it may not be exactly 0.

+
23.2 μs

Info

The following cells error because random_vect is not yet defined. Have you done the first exercise?

+
+
31.6 μs

UndefVarError: copy_of_random_vect not defined

  1. top-level scope@Local: 1
  2. top-level scope@Parse.jl:111
  3. (::Distributed.var"#103#104"{Distributed.CallMsg{:call}})()@process_messages.jl:290
  4. run_work_thunk(::Distributed.var"#103#104"{Distributed.CallMsg{:call}}, ::Bool)@process_messages.jl:79
  5. run_work_thunk(::Distributed.RemoteValue, ::Function)@process_messages.jl:88
  6. (::Distributed.var"#96#98"{Distributed.RemoteValue,Distributed.var"#103#104"{Distributed.CallMsg{:call}}})()@task.jl:356
---

MethodError: no method matching copy(::Missing)

Closest candidates are:

copy(!Matched::Random.DSFMT.GF2X) at /build/julia/src/julia-1.5.0/usr/share/julia/stdlib/v1.5/Random/src/DSFMT.jl:111

copy(!Matched::DataStructures.IntSet) at /home/shashi/.julia/packages/DataStructures/6txFj/src/int_set.jl:11

copy(!Matched::Markdown.MD) at /build/julia/src/julia-1.5.0/usr/share/julia/stdlib/v1.5/Markdown/src/parse/parse.jl:30

...

  1. top-level scope@Local: 1
  2. top-level scope@Parse.jl:111
  3. (::Distributed.var"#103#104"{Distributed.CallMsg{:call}})()@process_messages.jl:290
  4. run_work_thunk(::Distributed.var"#103#104"{Distributed.CallMsg{:call}}, ::Bool)@process_messages.jl:79
  5. run_work_thunk(::Distributed.RemoteValue, ::Function)@process_messages.jl:88
  6. (::Distributed.var"#96#98"{Distributed.RemoteValue,Distributed.var"#103#104"{Distributed.CallMsg{:call}}})()@task.jl:356
---

Exercise 1.2

+

👉 Generate a vector of 100 zeros. Change the center 20 elements to 1.

+
7.8 μs
create_bar (generic function with 1 method)
38.4 μs
152 μs

Here we go!

Replace missing with your answer.

+ +
+
17.3 μs

Exercise 1.3

+

👉 Write a function that turns a Vector of Vectors into a Matrix.

+
12.3 μs
vecvec_to_matrix (generic function with 1 method)
27.1 μs
missing
28.9 ms

Here we go!

Replace missing with your answer.

+ +
+
23.6 μs

👉 Write a function that turns a Matrix into aVector of Vectors .

+
7.5 μs
matrix_to_vecvec (generic function with 1 method)
24.6 μs
missing
47.3 ms

Here we go!

Replace missing with your answer.

+ +
+
35.9 μs
colored_line (generic function with 2 methods)
67.9 μs





1.3 μs

Exercise 2 - Manipulating images

+

In this exercise we will get familiar with matrices (2D arrays) in Julia, by manipulating images. Recall that in Julia images are matrices of RGB color objects.

+

Let's load a picture of Philip again.

+
13.9 μs
philip_file
"/tmp/jl_4reFYZ"
996 ms
philip
2.9 s

Hi there Philip

+
10.7 μs

Exercise 2.1

+

👉 Write a function mean_colors that accepts an object called image. It should calculate the mean (average) amounts of red, green and blue in the image and return a tuple (r, g, b) of those means.

+
10.3 μs
mean_colors (generic function with 1 method)
58.7 μs
missing
155 μs

Here we go!

Replace missing with your answer.

+ +
+
49.5 ms
742 ns

Exercise 2.2

+

👉 Look up the documentation on the floor function. Use it to write a function quantize(x::Number) that takes in a value x (which you can assume is between 0 and 1) and "quantizes" it into bins of width 0.1. For example, check that 0.267 gets mapped to 0.2.

+
9.1 μs
quantize (generic function with 3 methods)
109 μs
205 μs

Here we go!

Replace missing with your answer.

+ +
+
14.3 μs

Exercise 2.3

+

👉 Write the second method of the function quantize, i.e. a new version of the function with the same name. This method will accept a color object called color, of the type AbstractRGB.

+

Write the function in the same cell as quantize(x::Number) from the last exercise. 👆

+

Here, ::AbstractRGB is a type annotation. This ensures that this version of the function will be chosen when passing in an object whose type is a subtype of the AbstractRGB abstract type. For example, both the RGB and RGBX types satisfy this.

+

The method you write should return a new RGB object, in which each component (r, g and b) are quantized.

+
20.8 μs

Exercise 2.4

+

👉 Write a method quantize(image::AbstractMatrix) that quantizes an image by quantizing each pixel in the image. (You may assume that the matrix is a matrix of color objects.)

+

Write the function in the same cell as quantize(x::Number) from the last exercise. 👆

+
10.9 μs

Let's apply your method!

+
7.7 μs
missing
307 μs

Exercise 2.5

+

👉 Write a function invert that inverts a color, i.e. sends (r,g,b) to (1r,1g,1b).

+
12 μs
invert (generic function with 1 method)
27.4 μs

Let's invert some colors:

+
11.1 μs
black
12.2 ms
missing
212 μs
red
8.3 μs
missing
3.7 μs

Can you invert the picture of Philip?

+
12.3 μs
philip_inverted
missing
3.4 μs

Exercise 2.6

+

👉 Write a function noisify(x::Number, s) to add randomness of intensity s to a value x, i.e. to add a random value between s and +s to x. If the result falls outside the range (0,1) you should "clamp" it to that range. (Note that Julia has a clamp function, but you should write your own function myclamp(x).)

+
10.4 μs
noisify (generic function with 3 methods)
81.5 μs

Hint

The rand function generates (uniform) random floating-point numbers between 0 and 1.

+ +
+
18.2 μs

👉 Write the second method noisify(c::AbstractRGB, s) to add random noise of intensity s to each of the (r,g,b) values in a colour.

+

Write the function in the same cell as noisify(x::Number) from the last exercise. 👆

+
11.3 μs
0.0
247 ms
missing
149 μs
1.2 μs

👉 Write the third method noisify(image::AbstractMatrix, s) to noisify each pixel of an image.

+

Write the function in the same cell as noisify(x::Number) from the last exercise. 👆

+
18.5 μs
0.0
134 μs
missing
229 μs
696 ns

👉 For which noise intensity does it become unrecognisable?

+

You may need noise intensities larger than 1. Why?

+
8.9 μs
answer_about_noise_intensity

The image is unrecognisable with intensity ...

+
14.5 μs
728 ns
19.7 s
decimate (generic function with 2 methods)
72.1 μs





1.8 μs

Exercise 3 - Convolutions

+

As we have seen in the videos, we can produce cool effects using the mathematical technique of convolutions. We input one image M and get a new image M back.

+

Conceptually we think of M as a matrix. In practice, in Julia it will be a Matrix of color objects, and we may need to take that into account. Ideally, however, we should write a generic function that will work for any type of data contained in the matrix.

+

A convolution works on a small window of an image, i.e. a region centered around a given point (i,j). We will suppose that the window is a square region with odd side length 2+1, running from ,,0,,.

+

The result of the convolution over a given window, centred at the point (i,j) is a single number; this number is the value that we will use for Mi,j. (Note that neighbouring windows overlap.)

+

To get started let's restrict ourselves to convolutions in 1D. So a window is just a 1D region from to .

+
21.3 μs

+

Let's create a vector v of random numbers of length n=100.

+
5.8 μs
n
100
2.5 μs
v
190 ms

Feel free to experiment with different values!

+
8.9 μs

Exercise 3.1

+

You've seen some colored lines in this notebook to visualize arrays. Can you make another one?

+

👉 Try plotting our vector v using colored_line(v).

+
9.4 μs
658 ns

Try changing n and v around. Notice that you can run the cell v = rand(n) again to regenerate new random values.

+
8.6 μs

Exercise 3.2

+

We need to decide how to handle the boundary conditions, i.e. what happens if we try to access a position in the vector v beyond 1:n. The simplest solution is to assume that vi is 0 outside the original vector; however, this may lead to strange boundary effects.

+

A better solution is to use the closest value that is inside the vector. Effectively we are extending the vector and copying the extreme values into the extended positions. (Indeed, this is one way we could implement this; these extra positions are called ghost cells.)

+

👉 Write a function extend(v, i) that checks whether the position i is inside 1:n. If so, return the ith component of v; otherwise, return the nearest end value.

+
24.8 μs
extend (generic function with 1 method)
65.3 μs

Some test cases:

+
17.5 μs
missing
210 μs
missing
4.3 μs
missing
14.1 μs

Extended with 0:

+
7.3 μs
123 ms

Extended with your extend:

+
9.4 μs
missing
55.8 μs

Here we go!

Replace missing with your answer.

+ +
+
201 μs

Exercise 3.3

+

👉 Write a function blur_1D(v, l) that blurs a vector v with a window of length l by averaging the elements within a window from to . This is called a box blur.

+
13.3 μs
blur_1D (generic function with 1 method)
43.1 μs
72.1 ms

Exercise 3.4

+

👉 Apply the box blur to your vector v. Show the original and the new vector by creating two cells that call colored_line. Make the parameter interactive, and call it l_box instead of just l to avoid a variable naming conflict.

+
8.8 μs
744 ns

Hint

Have a look at Exercise 2 to see an example of adding interactivity with a slider. You can read the Interactivity and the PlutoUI sample notebooks (right click -> Open in new tab) to learn more.

+ +
+
21.1 μs

Exercise 3.5

+

The box blur is a simple example of a convolution, i.e. a linear function of a window around each point, given by

+

vi=nvinkn,

+

where k is a vector called a kernel.

+

Again, we need to take care about what happens if vin falls off the end of the vector.

+

👉 Write a function convolve_vector(v, k) that performs this convolution. You need to think of the vector k as being centred on the position i. So n in the above formula runs between and , where 2+1 is the length of the vector k. You will need to do the necessary manipulation of indices.

+
18.7 μs
convolve_vector (generic function with 1 method)
27 μs

Hint

l = (length(k) - 1) ÷ 2

+ +
+
16.9 μs
4.7 μs
test_convolution
missing
183 μs

Edit the cell above, or create a new cell with your own test cases!

+
7.8 μs

Here we go!

Replace missing with your answer.

+ +
+
25.4 μs

Exercise 3.6

+

👉 Write a function gaussian_kernel.

+

The definition of a Gaussian in 1D is

+

G(x)=12πσ2exp(x22σ2)

+

We need to sample (i.e. evaluate) this at each pixel in a region of size n2, and then normalize so that the sum of the resulting kernel is 1.

+

For simplicity you can take σ=1.

+
15.1 μs
gaussian_kernel (generic function with 1 method)
28.8 μs

Let's test your kernel function!

+
5.9 μs
gaussian_kernel_size_1D
3
2.4 μs
173 μs
test_gauss_1D_a
194 μs
3.3 μs
test_gauss_1D_b
10.9 μs





2.9 μs

Exercise 4 - Convolutions of images

+

Now let's move to 2D images. The convolution is then given by a kernel matrix K:

+

Mi,j=k,lMik,jlKk,l,

+

where the sum is over the possible values of k and l in the window. Again we think of the window as being centered at (i,j).

+

A common notation for this operation is :

+

M=MK.

+
17.5 μs

Exercise 4.1

+

👉 Write a function extend_mat that takes a matrix M and indices i and j, and returns the closest element of the matrix.

+
8.7 μs
extend_mat (generic function with 1 method)
37.1 μs

Hint

num_rows, num_columns = size(M)

+ +
+
14.6 μs

Let's test it!

+
7.5 μs
small_image
179 ms

Extended with 0:

+
6.6 μs
387 ms

Extended with your extend:

+
7.5 μs
9×9 Array{Missing,2}:
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
+ missing  missing  missing  missing  missing  missing  missing  missing  missing
83.7 ms

Here we go!

Replace missing with your answer.

+ +
+
188 μs
283×223 Array{Missing,2}:
+ missing  missing  missing  missing  missing  …  missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing  …  missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ ⋮                                            ⋱           ⋮                 
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing  …  missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
+ missing  missing  missing  missing  missing     missing  missing  missing  missing
178 ms

Exercise 4.2

+

👉 Implement a function convolve_image(M, K).

+
16 μs
convolve_image (generic function with 1 method)
36.9 μs

Hint

num_rows, num_columns = size(K)

+ +
+
16.5 μs

Let's test it out! 🎃

+
28.5 μs
test_image_with_border
204 ms
K_test
3×3 Array{Float64,2}:
+ 0.0  0.0  0.0
+ 0.5  0.0  0.5
+ 0.0  0.0  0.0
134 ms
missing
176 μs

Edit K_test to create your own test case!

+
17.8 μs
missing
312 μs

+

You can create all sorts of effects by choosing the kernel in a smart way. Today, we will implement two special kernels, to produce a Gaussian blur and a Sobel edge detect filter.

+

Make sure that you have watched the lecture about convolutions!

+
24 μs

Exercise 4.3

+

👉 Apply a Gaussian blur to an image.

+

Here, the 2D Gaussian kernel will be defined as

+

G(x,y)=12πσ2e(x2+y2)2σ2

+
10 μs
with_gaussian_blur (generic function with 1 method)
24.5 μs

Let's make it interactive. 💫

+
9.5 μs
+ + +
+
+ + +
+ +
+ +
+
+ +
+ + Enable webcam + +
+ + +
+
49.2 ms
missing
145 μs
664 ms

Exercise 4.4

+

👉 Create a Sobel edge detection filter.

+

Here, we will need to create two separate filters that separately detect edges in the horizontal and vertical directions:

+

Gx=([121][1 0 1])A=[101202101]AGy=([101][1 2 1])A=[121000121]A

+

Here A is the array corresponding to your image. We can think of these as derivatives in the x and y directions.

+

Then we combine them by finding the magnitude of the gradient (in the sense of multivariate calculus) by defining

+

Gtotal=Gx2+Gy2.

+

For simplicity you can choose one of the "channels" (colours) in the image to apply this to.

+
37.8 μs
with_sobel_edge_detect (generic function with 1 method)
24.3 μs
+ + +
+
+ + +
+ +
+ +
+
+ +
+ + Enable webcam + +
+ + +
+
92.6 μs
missing
146 μs
172 ms





1.3 μs

Exercise 5 - Lecture transcript

+

(MIT students only)

+

Please see the Canvas post for transcript document for week 1 here.

+

We need each of you to correct about 100 lines (see instructions in the beginning of the document.)

+

👉 Please mention the name of the video and the line ranges you edited:

+
69.7 μs
lines_i_edited

Convolution, lines 100-0 (for example)

+
32.8 μs





1.4 μs

Oops!

Before you submit, remember to fill in your name and kerberos ID at the top of this notebook!

+
+
53.7 μs





2 μs
hint (generic function with 1 method)
34.1 μs
almost (generic function with 1 method)
203 μs
still_missing (generic function with 2 methods)
88.6 μs
keep_working (generic function with 2 methods)
88.8 μs
yays
29 ms
correct (generic function with 2 methods)
69.8 μs
not_defined (generic function with 1 method)
42.5 μs
7.4 ms
camera_input (generic function with 1 method)
92.8 μs
process_raw_camera_data (generic function with 1 method)
73.5 μs
+ + + \ No newline at end of file diff --git a/homework/homework1/hw1.jl b/homework/homework1/hw1.jl new file mode 100644 index 000000000..2a95163c8 --- /dev/null +++ b/homework/homework1/hw1.jl @@ -0,0 +1,1530 @@ +### A Pluto.jl notebook ### +# v0.11.13 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ 83eb9ca0-ed68-11ea-0bc5-99a09c68f867 +md"_homework 1, version 4_" + +# ╔═╡ ac8ff080-ed61-11ea-3650-d9df06123e1f +md""" + +# **Homework 1** - _convolutions_ +`18.S191`, fall 2020 + +This notebook contains _built-in, live answer checks_! In some exercises you will see a coloured box, which runs a test case on your code, and provides feedback based on the result. Simply edit the code, run it, and the check runs again. + +_For MIT students:_ there will also be some additional (secret) test cases that will be run as part of the grading process, and we will look at your notebook and write comments. + +Feel free to ask questions! +""" + +# ╔═╡ 911ccbce-ed68-11ea-3606-0384e7580d7c +# edit the code below to set your name and kerberos ID (i.e. email without @mit.edu) + +student = (name = "Jazzy Doe", kerberos_id = "jazz") + +# press the ▶ button in the bottom right of this cell to run your edits +# or use Shift+Enter + +# you might need to wait until all other cells in this notebook have completed running. +# scroll down the page to see what's up + +# ╔═╡ 8ef13896-ed68-11ea-160b-3550eeabbd7d +md""" + +Submission by: **_$(student.name)_** ($(student.kerberos_id)@mit.edu) +""" + +# ╔═╡ 5f95e01a-ee0a-11ea-030c-9dba276aba92 +md"_Let's create a package environment:_" + +# ╔═╡ 65780f00-ed6b-11ea-1ecf-8b35523a7ac0 +begin + import Pkg + Pkg.activate(mktempdir()) +end + +# ╔═╡ 74b008f6-ed6b-11ea-291f-b3791d6d1b35 +begin + Pkg.add(["Images", "ImageMagick"]) + using Images +end + +# ╔═╡ 6b30dc38-ed6b-11ea-10f3-ab3f121bf4b8 +begin + Pkg.add("PlutoUI") + using PlutoUI +end + +# ╔═╡ 67461396-ee0a-11ea-3679-f31d46baa9b4 +md"_We set up Images.jl again:_" + +# ╔═╡ 540ccfcc-ee0a-11ea-15dc-4f8120063397 +md""" +## **Exercise 1** - _Manipulating vectors (1D images)_ + +A `Vector` is a 1D array. We can think of that as a 1D image. + +""" + +# ╔═╡ 467856dc-eded-11ea-0f83-13d939021ef3 +example_vector = [0.5, 0.4, 0.3, 0.2, 0.1, 0.0, 0.7, 0.0, 0.7, 0.9] + +# ╔═╡ ad6a33b0-eded-11ea-324c-cfabfd658b56 +md"#### Exerise 1.1 +👉 Make a random vector `random_vect` of length 10 using the `rand` function. +" + +# ╔═╡ f51333a6-eded-11ea-34e6-bfbb3a69bcb0 +random_vect = missing # replace this with your code! + +# ╔═╡ cf738088-eded-11ea-2915-61735c2aa990 +md"👉 Make a function `mean` using a `for` loop, which computes the mean/average of a vector of numbers." + +# ╔═╡ 0ffa8354-edee-11ea-2883-9d5bfea4a236 +function mean(x) + + return missing +end + +# ╔═╡ 1f104ce4-ee0e-11ea-2029-1d9c817175af +mean([1, 2, 3]) + +# ╔═╡ 1f229ca4-edee-11ea-2c56-bb00cc6ea53c +md"👉 Define `m` to be the mean of `random_vect`." + +# ╔═╡ 2a391708-edee-11ea-124e-d14698171b68 +m = missing + +# ╔═╡ e2863d4c-edef-11ea-1d67-332ddca03cc4 +md"""👉 Write a function `demean`, which takes a vector `x` and subtracts the mean from each value in `x`.""" + +# ╔═╡ ec5efe8c-edef-11ea-2c6f-afaaeb5bc50c +function demean(x) + + return missing +end + +# ╔═╡ 29e10640-edf0-11ea-0398-17dbf4242de3 +md"Let's check that the mean of the `demean(random_vect)` is 0: + +_Due to floating-point round-off error it may *not* be *exactly* 0._" + +# ╔═╡ 6f67657e-ee1a-11ea-0c2f-3d567bcfa6ea +if ismissing(random_vect) + md""" + !!! info + The following cells error because `random_vect` is not yet defined. Have you done the first exercise? + """ +end + +# ╔═╡ 73ef1d50-edf0-11ea-343c-d71706874c82 +copy_of_random_vect = copy(random_vect); # in case demean modifies `x` + +# ╔═╡ 38155b5a-edf0-11ea-3e3f-7163da7433fb +mean(demean(copy_of_random_vect)) + +# ╔═╡ a5f8bafe-edf0-11ea-0da3-3330861ae43a +md""" +#### Exercise 1.2 + +👉 Generate a vector of 100 zeros. Change the center 20 elements to 1. +""" + +# ╔═╡ b6b65b94-edf0-11ea-3686-fbff0ff53d08 +function create_bar() + + return missing +end + +# ╔═╡ 22f28dae-edf2-11ea-25b5-11c369ae1253 +md""" +#### Exercise 1.3 + +👉 Write a function that turns a `Vector` of `Vector`s into a `Matrix`. +""" + +# ╔═╡ 8c19fb72-ed6c-11ea-2728-3fa9219eddc4 +function vecvec_to_matrix(vecvec) + + return missing +end + +# ╔═╡ c4761a7e-edf2-11ea-1e75-118e73dadbed +vecvec_to_matrix([[1,2], [3,4]]) + +# ╔═╡ 393667ca-edf2-11ea-09c5-c5d292d5e896 +md""" + + +👉 Write a function that turns a `Matrix` into a`Vector` of `Vector`s . +""" + +# ╔═╡ 9f1c6d04-ed6c-11ea-007b-75e7e780703d +function matrix_to_vecvec(matrix) + + return missing +end + +# ╔═╡ 70955aca-ed6e-11ea-2330-89b4d20b1795 +matrix_to_vecvec([6 7; 8 9]) + +# ╔═╡ 5da8cbe8-eded-11ea-2e43-c5b7cc71e133 +begin + colored_line(x::Vector{<:Real}) = Gray.(Float64.((hcat(x)'))) + colored_line(x::Any) = nothing +end + +# ╔═╡ 56ced344-eded-11ea-3e81-3936e9ad5777 +colored_line(example_vector) + +# ╔═╡ b18e2c54-edf1-11ea-0cbf-85946d64b6a2 +colored_line(random_vect) + +# ╔═╡ d862fb16-edf1-11ea-36ec-615d521e6bc0 +colored_line(create_bar()) + +# ╔═╡ e083b3e8-ed61-11ea-2ec9-217820b0a1b4 +md""" +## **Exercise 2** - _Manipulating images_ + +In this exercise we will get familiar with matrices (2D arrays) in Julia, by manipulating images. +Recall that in Julia images are matrices of `RGB` color objects. + +Let's load a picture of Philip again. +""" + +# ╔═╡ c5484572-ee05-11ea-0424-f37295c3072d +philip_file = download("https://i.imgur.com/VGPeJ6s.jpg") + +# ╔═╡ e86ed944-ee05-11ea-3e0f-d70fc73b789c +md"_Hi there Philip_" + +# ╔═╡ c54ccdea-ee05-11ea-0365-23aaf053b7d7 +md""" +#### Exercise 2.1 +👉 Write a function **`mean_colors`** that accepts an object called `image`. It should calculate the mean (average) amounts of red, green and blue in the image and return a tuple `(r, g, b)` of those means. +""" + +# ╔═╡ f6898df6-ee07-11ea-2838-fde9bc739c11 +function mean_colors(image) + + return missing +end + +# ╔═╡ d75ec078-ee0d-11ea-3723-71fb8eecb040 + + +# ╔═╡ f68d4a36-ee07-11ea-0832-0360530f102e +md""" +#### Exercise 2.2 +👉 Look up the documentation on the `floor` function. Use it to write a function `quantize(x::Number)` that takes in a value $x$ (which you can assume is between 0 and 1) and "quantizes" it into bins of width 0.1. For example, check that 0.267 gets mapped to 0.2. +""" + +# ╔═╡ f6991a50-ee07-11ea-0bc4-1d68eb028e6a +begin + function quantize(x::Number) + + return missing + end + + function quantize(color::AbstractRGB) + # you will write me in a later exercise! + return missing + end + + function quantize(image::AbstractMatrix) + # you will write me in a later exercise! + return missing + end +end + +# ╔═╡ f6a655f8-ee07-11ea-13b6-43ca404ddfc7 +quantize(0.267), quantize(0.91) + +# ╔═╡ f6b218c0-ee07-11ea-2adb-1968c4fd473a +md""" +#### Exercise 2.3 +👉 Write the second **method** of the function `quantize`, i.e. a new *version* of the function with the *same* name. This method will accept a color object called `color`, of the type `AbstractRGB`. + +_Write the function in the same cell as `quantize(x::Number)` from the last exercise. 👆_ + +Here, `::AbstractRGB` is a **type annotation**. This ensures that this version of the function will be chosen when passing in an object whose type is a **subtype** of the `AbstractRGB` abstract type. For example, both the `RGB` and `RGBX` types satisfy this. + +The method you write should return a new `RGB` object, in which each component ($r$, $g$ and $b$) are quantized. +""" + +# ╔═╡ f6bf64da-ee07-11ea-3efb-05af01b14f67 +md""" +#### Exercise 2.4 +👉 Write a method `quantize(image::AbstractMatrix)` that quantizes an image by quantizing each pixel in the image. (You may assume that the matrix is a matrix of color objects.) + +_Write the function in the same cell as `quantize(x::Number)` from the last exercise. 👆_ +""" + +# ╔═╡ 25dad7ce-ee0b-11ea-3e20-5f3019dd7fa3 +md"Let's apply your method!" + +# ╔═╡ f6cc03a0-ee07-11ea-17d8-013991514d42 +md""" +#### Exercise 2.5 +👉 Write a function `invert` that inverts a color, i.e. sends $(r, g, b)$ to $(1 - r, 1-g, 1-b)$. +""" + +# ╔═╡ 63e8d636-ee0b-11ea-173d-bd3327347d55 +function invert(color::AbstractRGB) + + return missing +end + +# ╔═╡ 2cc2f84e-ee0d-11ea-373b-e7ad3204bb00 +md"Let's invert some colors:" + +# ╔═╡ b8f26960-ee0a-11ea-05b9-3f4bc1099050 +black = RGB(0.0, 0.0, 0.0) + +# ╔═╡ 5de3a22e-ee0b-11ea-230f-35df4ca3c96d +invert(black) + +# ╔═╡ 4e21e0c4-ee0b-11ea-3d65-b311ae3f98e9 +red = RGB(0.8, 0.1, 0.1) + +# ╔═╡ 6dbf67ce-ee0b-11ea-3b71-abc05a64dc43 +invert(red) + +# ╔═╡ 846b1330-ee0b-11ea-3579-7d90fafd7290 +md"Can you invert the picture of Philip?" + +# ╔═╡ 943103e2-ee0b-11ea-33aa-75a8a1529931 +philip_inverted = missing + +# ╔═╡ f6d6c71a-ee07-11ea-2b63-d759af80707b +md""" +#### Exercise 2.6 +👉 Write a function `noisify(x::Number, s)` to add randomness of intensity $s$ to a value $x$, i.e. to add a random value between $-s$ and $+s$ to $x$. If the result falls outside the range $(0, 1)$ you should "clamp" it to that range. (Note that Julia has a `clamp` function, but you should write your own function `myclamp(x)`.) +""" + +# ╔═╡ f6e2cb2a-ee07-11ea-06ee-1b77e34c1e91 +begin + function noisify(x::Number, s) + + return missing + end + + function noisify(color::AbstractRGB, s) + # you will write me in a later exercise! + return missing + end + + function noisify(image::AbstractMatrix, s) + # you will write me in a later exercise! + return missing + end +end + +# ╔═╡ f6fc1312-ee07-11ea-39a0-299b67aee3d8 +md""" +👉 Write the second method `noisify(c::AbstractRGB, s)` to add random noise of intensity $s$ to each of the $(r, g, b)$ values in a colour. + +_Write the function in the same cell as `noisify(x::Number)` from the last exercise. 👆_ +""" + +# ╔═╡ 774b4ce6-ee1b-11ea-2b48-e38ee25fc89b +@bind color_noise Slider(0:0.01:1, show_value=true) + +# ╔═╡ 7e4aeb70-ee1b-11ea-100f-1952ba66f80f +noisify(red, color_noise) + +# ╔═╡ 6a05f568-ee1b-11ea-3b6c-83b6ada3680f + + +# ╔═╡ f70823d2-ee07-11ea-2bb3-01425212aaf9 +md""" +👉 Write the third method `noisify(image::AbstractMatrix, s)` to noisify each pixel of an image. + +_Write the function in the same cell as `noisify(x::Number)` from the last exercise. 👆_ +""" + +# ╔═╡ e70a84d4-ee0c-11ea-0640-bf78653ba102 +@bind philip_noise Slider(0:0.01:8, show_value=true) + +# ╔═╡ 9604bc44-ee1b-11ea-28f8-7f7af8d0cbb2 + + +# ╔═╡ f714699e-ee07-11ea-08b6-5f5169861b57 +md""" +👉 For which noise intensity does it become unrecognisable? + +You may need noise intensities larger than 1. Why? + +""" + +# ╔═╡ bdc2df7c-ee0c-11ea-2e9f-7d2c085617c1 +answer_about_noise_intensity = md""" +The image is unrecognisable with intensity ... +""" + +# ╔═╡ 81510a30-ee0e-11ea-0062-8b3327428f9d + + +# ╔═╡ e3b03628-ee05-11ea-23b6-27c7b0210532 +decimate(image, ratio=5) = image[1:ratio:end, 1:ratio:end] + +# ╔═╡ c8ecfe5c-ee05-11ea-322b-4b2714898831 +philip = let + original = Images.load(philip_file) + decimate(original, 8) +end + +# ╔═╡ 5be9b144-ee0d-11ea-2a8d-8775de265a1d +mean_colors(philip) + +# ╔═╡ 9751586e-ee0c-11ea-0cbb-b7eda92977c9 +quantize(philip) + +# ╔═╡ ac15e0d0-ee0c-11ea-1eaf-d7f88b5df1d7 +noisify(philip, philip_noise) + +# ╔═╡ e08781fa-ed61-11ea-13ae-91a49b5eb74a +md""" + +## **Exercise 3** - _Convolutions_ + +As we have seen in the videos, we can produce cool effects using the mathematical technique of **convolutions**. We input one image $M$ and get a new image $M'$ back. + +Conceptually we think of $M$ as a matrix. In practice, in Julia it will be a `Matrix` of color objects, and we may need to take that into account. Ideally, however, we should write a **generic** function that will work for any type of data contained in the matrix. + +A convolution works on a small **window** of an image, i.e. a region centered around a given point $(i, j)$. We will suppose that the window is a square region with odd side length $2\ell + 1$, running from $-\ell, \ldots, 0, \ldots, \ell$. + +The result of the convolution over a given window, centred at the point $(i, j)$ is a *single number*; this number is the value that we will use for $M'_{i, j}$. +(Note that neighbouring windows overlap.) + +To get started let's restrict ourselves to convolutions in 1D. +So a window is just a 1D region from $-\ell$ to $\ell$. + +""" + +# ╔═╡ 7fc8ee1c-ee09-11ea-1382-ad21d5373308 +md""" +--- + +Let's create a vector `v` of random numbers of length `n=100`. +""" + +# ╔═╡ 7fcd6230-ee09-11ea-314f-a542d00d582e +n = 100 + +# ╔═╡ 7fdb34dc-ee09-11ea-366b-ffe10d1aa845 +v = rand(n) + +# ╔═╡ 7fe9153e-ee09-11ea-15b3-6f24fcc20734 +md"_Feel free to experiment with different values!_" + +# ╔═╡ 80108d80-ee09-11ea-0368-31546eb0d3cc +md""" +#### Exercise 3.1 +You've seen some colored lines in this notebook to visualize arrays. Can you make another one? + +👉 Try plotting our vector `v` using `colored_line(v)`. +""" + +# ╔═╡ 01070e28-ee0f-11ea-1928-a7919d452bdd + + +# ╔═╡ 7522f81e-ee1c-11ea-35af-a17eb257ff1a +md"Try changing `n` and `v` around. Notice that you can run the cell `v = rand(n)` again to regenerate new random values." + +# ╔═╡ 801d90c0-ee09-11ea-28d6-61b806de26dc +md""" +#### Exercise 3.2 +We need to decide how to handle the **boundary conditions**, i.e. what happens if we try to access a position in the vector `v` beyond `1:n`. The simplest solution is to assume that $v_{i}$ is 0 outside the original vector; however, this may lead to strange boundary effects. + +A better solution is to use the *closest* value that is inside the vector. Effectively we are extending the vector and copying the extreme values into the extended positions. (Indeed, this is one way we could implement this; these extra positions are called **ghost cells**.) + +👉 Write a function `extend(v, i)` that checks whether the position $i$ is inside `1:n`. If so, return the $i$th component of `v`; otherwise, return the nearest end value. +""" + +# ╔═╡ 802bec56-ee09-11ea-043e-51cf1db02a34 +function extend(v, i) + + return missing +end + +# ╔═╡ b7f3994c-ee1b-11ea-211a-d144db8eafc2 +md"_Some test cases:_" + +# ╔═╡ 803905b2-ee09-11ea-2d52-e77ff79693b0 +extend(v, 1) + +# ╔═╡ 80479d98-ee09-11ea-169e-d166eef65874 +extend(v, -8) + +# ╔═╡ 805691ce-ee09-11ea-053d-6d2e299ee123 +extend(v, n + 10) + +# ╔═╡ 806e5766-ee0f-11ea-1efc-d753cd83d086 +md"Extended with 0:" + +# ╔═╡ 38da843a-ee0f-11ea-01df-bfa8b1317d36 +colored_line([0, 0, example_vector..., 0, 0]) + +# ╔═╡ 9bde9f92-ee0f-11ea-27f8-ffef5fce2b3c +md"Extended with your `extend`:" + +# ╔═╡ 45c4da9a-ee0f-11ea-2c5b-1f6704559137 +if extend(v,1) === missing + missing +else + colored_line([extend(example_vector, i) for i in -1:12]) +end + +# ╔═╡ 80664e8c-ee09-11ea-0702-711bce271315 +md""" +#### Exercise 3.3 +👉 Write a function `blur_1D(v, l)` that blurs a vector `v` with a window of length `l` by averaging the elements within a window from $-\ell$ to $\ell$. This is called a **box blur**. +""" + +# ╔═╡ 807e5662-ee09-11ea-3005-21fdcc36b023 +function blur_1D(v, l) + + return missing +end + +# ╔═╡ 808deca8-ee09-11ea-0ee3-1586fa1ce282 +let + try + test_v = rand(n) + original = copy(test_v) + blur_1D(test_v, 5) + if test_v != original + md""" + !!! danger "Oopsie!" + It looks like your function _modifies_ `v`. Can you write it without doing so? Maybe you can use `copy`. + """ + end + catch + end +end + +# ╔═╡ 809f5330-ee09-11ea-0e5b-415044b6ac1f +md""" +#### Exercise 3.4 +👉 Apply the box blur to your vector `v`. Show the original and the new vector by creating two cells that call `colored_line`. Make the parameter $\ell$ interactive, and call it `l_box` instead of just `l` to avoid a variable naming conflict. +""" + +# ╔═╡ ca1ac5f4-ee1c-11ea-3d00-ff5268866f87 + + +# ╔═╡ 80ab64f4-ee09-11ea-29b4-498112ed0799 +md""" +#### Exercise 3.5 +The box blur is a simple example of a **convolution**, i.e. a linear function of a window around each point, given by + +$$v'_{i} = \sum_{n} \, v_{i - n} \, k_{n},$$ + +where $k$ is a vector called a **kernel**. + +Again, we need to take care about what happens if $v_{i -n }$ falls off the end of the vector. + +👉 Write a function `convolve_vector(v, k)` that performs this convolution. You need to think of the vector $k$ as being *centred* on the position $i$. So $n$ in the above formula runs between $-\ell$ and $\ell$, where $2\ell + 1$ is the length of the vector $k$. You will need to do the necessary manipulation of indices. +""" + +# ╔═╡ 28e20950-ee0c-11ea-0e0a-b5f2e570b56e +function convolve_vector(v, k) + + return missing +end + +# ╔═╡ 93284f92-ee12-11ea-0342-833b1a30625c +test_convolution = let + v = [1, 10, 100, 1000, 10000] + k = [0, 1, 0] + convolve_vector(v, k) +end + +# ╔═╡ 5eea882c-ee13-11ea-0d56-af81ecd30a4a +colored_line(test_convolution) + +# ╔═╡ cf73f9f8-ee12-11ea-39ae-0107e9107ef5 +md"_Edit the cell above, or create a new cell with your own test cases!_" + +# ╔═╡ 80b7566a-ee09-11ea-3939-6fab470f9ec8 +md""" +#### Exercise 3.6 +👉 Write a function `gaussian_kernel`. + +The definition of a Gaussian in 1D is + +$$G(x) = \frac{1}{\sqrt{2\pi \sigma^2}} \exp \left( \frac{-x^2}{2\sigma^2} \right)$$ + +We need to **sample** (i.e. evaluate) this at each pixel in a region of size $n^2$, +and then **normalize** so that the sum of the resulting kernel is 1. + +For simplicity you can take $\sigma=1$. +""" + +# ╔═╡ 1c8b4658-ee0c-11ea-2ede-9b9ed7d3125e +function gaussian_kernel(n) + + return missing +end + +# ╔═╡ f8bd22b8-ee14-11ea-04aa-ab16fd01826e +md"Let's test your kernel function!" + +# ╔═╡ 2a9dd06a-ee13-11ea-3f84-67bb309c77a8 +gaussian_kernel_size_1D = 3 # change this value, or turn me into a slider! + +# ╔═╡ 38eb92f6-ee13-11ea-14d7-a503ac04302e +test_gauss_1D_a = let + v = random_vect + k = gaussian_kernel(gaussian_kernel_size_1D) + + if k !== missing + convolve_vector(v, k) + end +end + +# ╔═╡ b424e2aa-ee14-11ea-33fa-35491e0b9c9d +colored_line(test_gauss_1D_a) + +# ╔═╡ 24c21c7c-ee14-11ea-1512-677980db1288 +test_gauss_1D_b = let + v = create_bar() + k = gaussian_kernel(gaussian_kernel_size_1D) + + if k !== missing + convolve_vector(v, k) + end +end + +# ╔═╡ bc1c20a4-ee14-11ea-3525-63c9fa78f089 +colored_line(test_gauss_1D_b) + +# ╔═╡ b01858b6-edf3-11ea-0826-938d33c19a43 +md""" + + +## **Exercise 4** - _Convolutions of images_ + +Now let's move to 2D images. The convolution is then given by a **kernel** matrix $K$: + +$$M'_{i, j} = \sum_{k, l} \, M_{i- k, j - l} \, K_{k, l},$$ + +where the sum is over the possible values of $k$ and $l$ in the window. Again we think of the window as being *centered* at $(i, j)$. + +A common notation for this operation is $*$: + +$$M' = M * K.$$ +""" + +# ╔═╡ 7c1bc062-ee15-11ea-30b1-1b1e76520f13 +md""" +#### Exercise 4.1 +👉 Write a function `extend_mat` that takes a matrix `M` and indices `i` and `j`, and returns the closest element of the matrix. +""" + +# ╔═╡ 7c2ec6c6-ee15-11ea-2d7d-0d9401a5e5d1 +function extend_mat(M::AbstractMatrix, i, j) + + return missing +end + +# ╔═╡ 9afc4dca-ee16-11ea-354f-1d827aaa61d2 +md"_Let's test it!_" + +# ╔═╡ cf6b05e2-ee16-11ea-3317-8919565cb56e +small_image = Gray.(rand(5,5)) + +# ╔═╡ e3616062-ee27-11ea-04a9-b9ec60842a64 +md"Extended with `0`:" + +# ╔═╡ e5b6cd34-ee27-11ea-0d60-bd4796540b18 +[get(small_image, (i, j), Gray(0)) for (i,j) in Iterators.product(-1:7,-1:7)] + +# ╔═╡ d06ea762-ee27-11ea-2e9c-1bcff86a3fe0 +md"Extended with your `extend`:" + +# ╔═╡ e1dc0622-ee16-11ea-274a-3b6ec9e15ab5 +[extend_mat(small_image, i, j) for (i,j) in Iterators.product(-1:7,-1:7)] + +# ╔═╡ 3cd535e4-ee26-11ea-2482-fb4ad43dda19 +let + philip_head = philip[250:430,110:230] + [extend_mat(philip_head, i, j) for (i,j) in Iterators.product(-50:size(philip_head,1)+51, (-50:size(philip_head,2)+51))] +end + +# ╔═╡ 7c41f0ca-ee15-11ea-05fb-d97a836659af +md""" +#### Exercise 4.2 +👉 Implement a function `convolve_image(M, K)`. +""" + +# ╔═╡ 8b96e0bc-ee15-11ea-11cd-cfecea7075a0 +function convolve_image(M::AbstractMatrix, K::AbstractMatrix) + + return missing +end + +# ╔═╡ 5a5135c6-ee1e-11ea-05dc-eb0c683c2ce5 +md"_Let's test it out! 🎃_" + +# ╔═╡ 577c6daa-ee1e-11ea-1275-b7abc7a27d73 +test_image_with_border = [get(small_image, (i, j), Gray(0)) for (i,j) in Iterators.product(-1:7,-1:7)] + +# ╔═╡ 275a99c8-ee1e-11ea-0a76-93e3618c9588 +K_test = [ + 0 0 0 + 1/2 0 1/2 + 0 0 0 +] + +# ╔═╡ 42dfa206-ee1e-11ea-1fcd-21671042064c +convolve_image(test_image_with_border, K_test) + +# ╔═╡ 6e53c2e6-ee1e-11ea-21bd-c9c05381be07 +md"_Edit_ `K_test` _to create your own test case!_" + +# ╔═╡ e7f8b41a-ee25-11ea-287a-e75d33fbd98b +convolve_image(philip, K_test) + +# ╔═╡ 8a335044-ee19-11ea-0255-b9391246d231 +md""" +--- + +You can create all sorts of effects by choosing the kernel in a smart way. Today, we will implement two special kernels, to produce a **Gaussian blur** and a **Sobel edge detect** filter. + +Make sure that you have watched [the lecture](https://www.youtube.com/watch?v=8rrHTtUzyZA) about convolutions! +""" + +# ╔═╡ 7c50ea80-ee15-11ea-328f-6b4e4ff20b7e +md""" +#### Exercise 4.3 +👉 Apply a **Gaussian blur** to an image. + +Here, the 2D Gaussian kernel will be defined as + +$$G(x,y)=\frac{1}{2\pi \sigma^2}e^{\frac{-(x^2+y^2)}{2\sigma^2}}$$ +""" + +# ╔═╡ aad67fd0-ee15-11ea-00d4-274ec3cda3a3 +function with_gaussian_blur(image) + + return missing +end + +# ╔═╡ 8ae59674-ee18-11ea-3815-f50713d0fa08 +md"_Let's make it interactive. 💫_" + +# ╔═╡ 7c6642a6-ee15-11ea-0526-a1aac4286cdd +md""" +#### Exercise 4.4 +👉 Create a **Sobel edge detection filter**. + +Here, we will need to create two separate filters that separately detect edges in the horizontal and vertical directions: + +```math +\begin{align} + +G_x &= \left(\begin{bmatrix} +1 \\ +2 \\ +1 \\ +\end{bmatrix} \otimes [1~0~-1] +\right) * A = \begin{bmatrix} +1 & 0 & -1 \\ +2 & 0 & -2 \\ +1 & 0 & -1 \\ +\end{bmatrix}*A\\ +G_y &= \left( +\begin{bmatrix} +1 \\ +0 \\ +-1 \\ +\end{bmatrix} \otimes [1~2~1] +\right) * A = \begin{bmatrix} +1 & 2 & 1 \\ +0 & 0 & 0 \\ +-1 & -2 & -1 \\ +\end{bmatrix}*A +\end{align} +``` +Here $A$ is the array corresponding to your image. +We can think of these as derivatives in the $x$ and $y$ directions. + +Then we combine them by finding the magnitude of the **gradient** (in the sense of multivariate calculus) by defining + +$$G_\text{total} = \sqrt{G_x^2 + G_y^2}.$$ + +For simplicity you can choose one of the "channels" (colours) in the image to apply this to. +""" + +# ╔═╡ 9eeb876c-ee15-11ea-1794-d3ea79f47b75 +function with_sobel_edge_detect(image) + + return missing +end + +# ╔═╡ 1b85ee76-ee10-11ea-36d7-978340ef61e6 +md""" +## **Exercise 5** - _Lecture transcript_ +_(MIT students only)_ + +Please see the Canvas post for transcript document for week 1 [here](https://canvas.mit.edu/courses/5637/discussion_topics/27880). + +We need each of you to correct about 100 lines (see instructions in the beginning of the document.) + +👉 Please mention the name of the video and the line ranges you edited: +""" + +# ╔═╡ 477d0a3c-ee10-11ea-11cf-07b0e0ce6818 +lines_i_edited = md""" +Convolution, lines 100-0 (_for example_) +""" + +# ╔═╡ 8ffe16ce-ee20-11ea-18bd-15640f94b839 +if student.kerberos_id === "jazz" + md""" +!!! danger "Oops!" + **Before you submit**, remember to fill in your name and kerberos ID at the top of this notebook! + """ +end + +# ╔═╡ 5516c800-edee-11ea-12cf-3f8c082ef0ef +hint(text) = Markdown.MD(Markdown.Admonition("hint", "Hint", [text])) + +# ╔═╡ b1d5ca28-edf6-11ea-269e-75a9fb549f1d +hint(md"You can find out more about any function (like `rand`) by creating a new cell and typing: + +``` +?rand +``` + +Once the Live Docs are open, you can select any code to learn more about it. It might be useful to leave it open all the time, and get documentation while you type code.") + +# ╔═╡ f6ef2c2e-ee07-11ea-13a8-2512e7d94426 +hint(md"The `rand` function generates (uniform) random floating-point numbers between $0$ and $1$.") + +# ╔═╡ ea435e58-ee11-11ea-3785-01af8dd72360 +hint(md"Have a look at Exercise 2 to see an example of adding interactivity with a slider. You can read the [Interactivity](./sample/Interactivity.jl) and the [PlutoUI](./sample/PlutoUI.jl) sample notebooks _(right click -> Open in new tab)_ to learn more.") + +# ╔═╡ e9aadeee-ee1d-11ea-3525-95f6ba5fda31 +hint(md"`l = (length(k) - 1) ÷ 2`") + +# ╔═╡ 649df270-ee24-11ea-397e-79c4355e38db +hint(md"`num_rows, num_columns = size(M)`") + +# ╔═╡ 0cabed84-ee1e-11ea-11c1-7d8a4b4ad1af +hint(md"`num_rows, num_columns = size(K)`") + +# ╔═╡ 57360a7a-edee-11ea-0c28-91463ece500d +almost(text) = Markdown.MD(Markdown.Admonition("warning", "Almost there!", [text])) + +# ╔═╡ dcb8324c-edee-11ea-17ff-375ff5078f43 +still_missing(text=md"Replace `missing` with your answer.") = Markdown.MD(Markdown.Admonition("warning", "Here we go!", [text])) + +# ╔═╡ 58af703c-edee-11ea-2963-f52e78fc2412 +keep_working(text=md"The answer is not quite right.") = Markdown.MD(Markdown.Admonition("danger", "Keep working on it!", [text])) + +# ╔═╡ f3d00a9a-edf3-11ea-07b3-1db5c6d0b3cf +yays = [md"Great!", md"Yay ❤", md"Great! 🎉", md"Well done!", md"Keep it up!", md"Good job!", md"Awesome!", md"You got the right answer!", md"Let's move on to the next section."] + +# ╔═╡ 5aa9dfb2-edee-11ea-3754-c368fb40637c +correct(text=rand(yays)) = Markdown.MD(Markdown.Admonition("correct", "Got it!", [text])) + +# ╔═╡ 74d44e22-edee-11ea-09a0-69aa0aba3281 +not_defined(variable_name) = Markdown.MD(Markdown.Admonition("danger", "Oopsie!", [md"Make sure that you define a variable called **$(Markdown.Code(string(variable_name)))**"])) + +# ╔═╡ 397941fc-edee-11ea-33f2-5d46c759fbf7 +if !@isdefined(random_vect) + not_defined(:random_vect) +elseif ismissing(random_vect) + still_missing() +elseif !(random_vect isa Vector) + keep_working(md"`random_vect` should be a `Vector`.") +elseif length(random_vect) != 10 + keep_working(md"`random_vect` does not have the correct size.") +else + correct() +end + +# ╔═╡ 38dc80a0-edef-11ea-10e9-615255a4588c +if !@isdefined(mean) + not_defined(:mean) +else + let + result = mean([1,2,3]) + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif result != 2 + keep_working() + else + correct() + end + end +end + +# ╔═╡ 2b1ccaca-edee-11ea-34b0-c51659f844d0 +if !@isdefined(m) + not_defined(:m) +elseif ismissing(m) + still_missing() +elseif !(m isa Number) + keep_working(md"`m` should be a number.") +elseif m != mean(random_vect) + keep_working() +else + correct() +end + +# ╔═╡ e3394c8a-edf0-11ea-1bb8-619f7abb6881 +if !@isdefined(create_bar) + not_defined(:create_bar) +else + let + result = create_bar() + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif !(result isa Vector) || length(result) != 100 + keep_working(md"The result should be a `Vector` with 100 elements.") + elseif result[[1,50,100]] != [0,1,0] + keep_working() + else + correct() + end + end +end + +# ╔═╡ adfbe9b2-ed6c-11ea-09ac-675262f420df +if !@isdefined(vecvec_to_matrix) + not_defined(:vecvec_to_matrix) +else + let + input = [[6,7],[8,9]] + + result = vecvec_to_matrix(input) + shouldbe = [6 7; 8 9] + + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif !(result isa Matrix) + keep_working(md"The result should be a `Matrix`") + elseif result != shouldbe && result != shouldbe' + keep_working() + else + correct() + end + end +end + +# ╔═╡ e06b7fbc-edf2-11ea-1708-fb32599dded3 +if !@isdefined(matrix_to_vecvec) + not_defined(:matrix_to_vecvec) +else + let + input = [6 7 8; 8 9 10] + result = matrix_to_vecvec(input) + shouldbe = [[6,7,8],[8,9,10]] + shouldbe2 = [[6,8], [7,9], [8,10]] + + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif result != shouldbe && result != shouldbe2 + keep_working() + else + correct() + end + end +end + +# ╔═╡ 4d0158d0-ee0d-11ea-17c3-c169d4284acb +if !@isdefined(mean_colors) + not_defined(:mean_colors) +else + let + input = reshape([RGB(1.0, 1.0, 1.0), RGB(1.0, 1.0, 0.0)], (2,1)) + + result = mean_colors(input) + shouldbe = (1.0, 1.0, 0.5) + shouldbe2 = RGB(shouldbe...) + + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif !(result == shouldbe) && !(result == shouldbe2) + keep_working() + else + correct() + end + end +end + +# ╔═╡ c905b73e-ee1a-11ea-2e36-23b8e73bfdb6 +if !@isdefined(quantize) + not_defined(:quantize) +else + let + result = quantize(.3) + + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif result != .3 + if quantize(0.35) == .3 + almost(md"What should quantize(`0.2`) be?") + else + keep_working() + end + elseif quantize(0.56) != .5 + almost(md"What should quantize(`0.68`) be?") + else + correct() + end + end +end + +# ╔═╡ bcf98dfc-ee1b-11ea-21d0-c14439500971 +if !@isdefined(extend) + not_defined(:extend) +else + let + result = extend([6,7],-10) + + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif result != 6 || extend([6,7],10) != 7 + keep_working() + else + correct() + end + end +end + +# ╔═╡ 7ffd14f8-ee1d-11ea-0343-b54fb0333aea +if !@isdefined(convolve_vector) + not_defined(:convolve_vector) +else + let + x = [1, 10, 100] + result = convolve_vector(x, [0, 1, 1]) + shouldbe = [11, 110, 200] + shouldbe2 = [2, 11, 110] + + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif !(result isa AbstractVector) + keep_working(md"The returned object is not a `Vector`.") + elseif size(result) != size(x) + keep_working(md"The returned vector has the wrong dimensions.") + elseif result != shouldbe && result != shouldbe2 + keep_working() + else + correct() + end + end +end + +# ╔═╡ efd1ceb4-ee1c-11ea-350e-f7e3ea059024 +if !@isdefined(extend_mat) + not_defined(:extend_mat) +else + let + input = [42 37; 1 0] + result = extend_mat(input, -2, -2) + + if ismissing(result) + still_missing() + elseif isnothing(result) + keep_working(md"Did you forget to write `return`?") + elseif result != 42 || extend_mat(input, -1, 3) != 37 + keep_working() + else + correct() + end + end +end + +# ╔═╡ 115ded8c-ee0a-11ea-3493-89487315feb7 +bigbreak = html"




"; + +# ╔═╡ 54056a02-ee0a-11ea-101f-47feb6623bec +bigbreak + +# ╔═╡ 45815734-ee0a-11ea-2982-595e1fc0e7b1 +bigbreak + +# ╔═╡ 4139ee66-ee0a-11ea-2282-15d63bcca8b8 +bigbreak + +# ╔═╡ 27847dc4-ee0a-11ea-0651-ebbbb3cfd58c +bigbreak + +# ╔═╡ 0001f782-ee0e-11ea-1fb4-2b5ef3d241e2 +bigbreak + +# ╔═╡ 91f4778e-ee20-11ea-1b7e-2b0892bd3c0f +bigbreak + +# ╔═╡ 5842895a-ee10-11ea-119d-81e4c4c8c53b +bigbreak + +# ╔═╡ dfb7c6be-ee0d-11ea-194e-9758857f7b20 +function camera_input(;max_size=200, default_url="https://i.imgur.com/SUmi94P.png") +""" + + + +
+
+ + +
+ +
+ +
+
+ +
+ + Enable webcam + +
+ + +
+""" |> HTML +end + +# ╔═╡ 94c0798e-ee18-11ea-3212-1533753eabb6 +@bind gauss_raw_camera_data camera_input(;max_size=100) + +# ╔═╡ 1a0324de-ee19-11ea-1d4d-db37f4136ad3 +@bind sobel_raw_camera_data camera_input(;max_size=100) + +# ╔═╡ e15ad330-ee0d-11ea-25b6-1b1b3f3d7888 + +function process_raw_camera_data(raw_camera_data) + # the raw image data is a long byte array, we need to transform it into something + # more "Julian" - something with more _structure_. + + # The encoding of the raw byte stream is: + # every 4 bytes is a single pixel + # every pixel has 4 values: Red, Green, Blue, Alpha + # (we ignore alpha for this notebook) + + # So to get the red values for each pixel, we take every 4th value, starting at + # the 1st: + reds_flat = UInt8.(raw_camera_data["data"][1:4:end]) + greens_flat = UInt8.(raw_camera_data["data"][2:4:end]) + blues_flat = UInt8.(raw_camera_data["data"][3:4:end]) + + # but these are still 1-dimensional arrays, nicknamed 'flat' arrays + # We will 'reshape' this into 2D arrays: + + width = raw_camera_data["width"] + height = raw_camera_data["height"] + + # shuffle and flip to get it in the right shape + reds = reshape(reds_flat, (width, height))' / 255.0 + greens = reshape(greens_flat, (width, height))' / 255.0 + blues = reshape(blues_flat, (width, height))' / 255.0 + + # we have our 2D array for each color + # Let's create a single 2D array, where each value contains the R, G and B value of + # that pixel + + RGB.(reds, greens, blues) +end + +# ╔═╡ f461f5f2-ee18-11ea-3d03-95f57f9bf09e +gauss_camera_image = process_raw_camera_data(gauss_raw_camera_data); + +# ╔═╡ a75701c4-ee18-11ea-2863-d3042e71a68b +with_gaussian_blur(gauss_camera_image) + +# ╔═╡ 1ff6b5cc-ee19-11ea-2ca8-7f00c204f587 +sobel_camera_image = Gray.(process_raw_camera_data(sobel_raw_camera_data)); + +# ╔═╡ 1bf94c00-ee19-11ea-0e3c-e12bc68d8e28 +with_sobel_edge_detect(sobel_camera_image) + +# ╔═╡ Cell order: +# ╠═83eb9ca0-ed68-11ea-0bc5-99a09c68f867 +# ╟─8ef13896-ed68-11ea-160b-3550eeabbd7d +# ╟─ac8ff080-ed61-11ea-3650-d9df06123e1f +# ╠═911ccbce-ed68-11ea-3606-0384e7580d7c +# ╟─5f95e01a-ee0a-11ea-030c-9dba276aba92 +# ╠═65780f00-ed6b-11ea-1ecf-8b35523a7ac0 +# ╟─67461396-ee0a-11ea-3679-f31d46baa9b4 +# ╠═74b008f6-ed6b-11ea-291f-b3791d6d1b35 +# ╟─54056a02-ee0a-11ea-101f-47feb6623bec +# ╟─540ccfcc-ee0a-11ea-15dc-4f8120063397 +# ╟─467856dc-eded-11ea-0f83-13d939021ef3 +# ╠═56ced344-eded-11ea-3e81-3936e9ad5777 +# ╟─ad6a33b0-eded-11ea-324c-cfabfd658b56 +# ╠═f51333a6-eded-11ea-34e6-bfbb3a69bcb0 +# ╟─b18e2c54-edf1-11ea-0cbf-85946d64b6a2 +# ╟─397941fc-edee-11ea-33f2-5d46c759fbf7 +# ╟─b1d5ca28-edf6-11ea-269e-75a9fb549f1d +# ╟─cf738088-eded-11ea-2915-61735c2aa990 +# ╠═0ffa8354-edee-11ea-2883-9d5bfea4a236 +# ╠═1f104ce4-ee0e-11ea-2029-1d9c817175af +# ╟─38dc80a0-edef-11ea-10e9-615255a4588c +# ╟─1f229ca4-edee-11ea-2c56-bb00cc6ea53c +# ╠═2a391708-edee-11ea-124e-d14698171b68 +# ╟─2b1ccaca-edee-11ea-34b0-c51659f844d0 +# ╟─e2863d4c-edef-11ea-1d67-332ddca03cc4 +# ╠═ec5efe8c-edef-11ea-2c6f-afaaeb5bc50c +# ╟─29e10640-edf0-11ea-0398-17dbf4242de3 +# ╟─6f67657e-ee1a-11ea-0c2f-3d567bcfa6ea +# ╠═38155b5a-edf0-11ea-3e3f-7163da7433fb +# ╠═73ef1d50-edf0-11ea-343c-d71706874c82 +# ╟─a5f8bafe-edf0-11ea-0da3-3330861ae43a +# ╠═b6b65b94-edf0-11ea-3686-fbff0ff53d08 +# ╟─d862fb16-edf1-11ea-36ec-615d521e6bc0 +# ╟─e3394c8a-edf0-11ea-1bb8-619f7abb6881 +# ╟─22f28dae-edf2-11ea-25b5-11c369ae1253 +# ╠═8c19fb72-ed6c-11ea-2728-3fa9219eddc4 +# ╠═c4761a7e-edf2-11ea-1e75-118e73dadbed +# ╟─adfbe9b2-ed6c-11ea-09ac-675262f420df +# ╟─393667ca-edf2-11ea-09c5-c5d292d5e896 +# ╠═9f1c6d04-ed6c-11ea-007b-75e7e780703d +# ╠═70955aca-ed6e-11ea-2330-89b4d20b1795 +# ╟─e06b7fbc-edf2-11ea-1708-fb32599dded3 +# ╟─5da8cbe8-eded-11ea-2e43-c5b7cc71e133 +# ╟─45815734-ee0a-11ea-2982-595e1fc0e7b1 +# ╟─e083b3e8-ed61-11ea-2ec9-217820b0a1b4 +# ╠═c5484572-ee05-11ea-0424-f37295c3072d +# ╠═c8ecfe5c-ee05-11ea-322b-4b2714898831 +# ╟─e86ed944-ee05-11ea-3e0f-d70fc73b789c +# ╟─c54ccdea-ee05-11ea-0365-23aaf053b7d7 +# ╠═f6898df6-ee07-11ea-2838-fde9bc739c11 +# ╠═5be9b144-ee0d-11ea-2a8d-8775de265a1d +# ╟─4d0158d0-ee0d-11ea-17c3-c169d4284acb +# ╠═d75ec078-ee0d-11ea-3723-71fb8eecb040 +# ╟─f68d4a36-ee07-11ea-0832-0360530f102e +# ╠═f6991a50-ee07-11ea-0bc4-1d68eb028e6a +# ╠═f6a655f8-ee07-11ea-13b6-43ca404ddfc7 +# ╟─c905b73e-ee1a-11ea-2e36-23b8e73bfdb6 +# ╟─f6b218c0-ee07-11ea-2adb-1968c4fd473a +# ╟─f6bf64da-ee07-11ea-3efb-05af01b14f67 +# ╟─25dad7ce-ee0b-11ea-3e20-5f3019dd7fa3 +# ╠═9751586e-ee0c-11ea-0cbb-b7eda92977c9 +# ╟─f6cc03a0-ee07-11ea-17d8-013991514d42 +# ╠═63e8d636-ee0b-11ea-173d-bd3327347d55 +# ╟─2cc2f84e-ee0d-11ea-373b-e7ad3204bb00 +# ╟─b8f26960-ee0a-11ea-05b9-3f4bc1099050 +# ╠═5de3a22e-ee0b-11ea-230f-35df4ca3c96d +# ╠═4e21e0c4-ee0b-11ea-3d65-b311ae3f98e9 +# ╠═6dbf67ce-ee0b-11ea-3b71-abc05a64dc43 +# ╟─846b1330-ee0b-11ea-3579-7d90fafd7290 +# ╠═943103e2-ee0b-11ea-33aa-75a8a1529931 +# ╟─f6d6c71a-ee07-11ea-2b63-d759af80707b +# ╠═f6e2cb2a-ee07-11ea-06ee-1b77e34c1e91 +# ╟─f6ef2c2e-ee07-11ea-13a8-2512e7d94426 +# ╟─f6fc1312-ee07-11ea-39a0-299b67aee3d8 +# ╟─774b4ce6-ee1b-11ea-2b48-e38ee25fc89b +# ╠═7e4aeb70-ee1b-11ea-100f-1952ba66f80f +# ╟─6a05f568-ee1b-11ea-3b6c-83b6ada3680f +# ╟─f70823d2-ee07-11ea-2bb3-01425212aaf9 +# ╠═e70a84d4-ee0c-11ea-0640-bf78653ba102 +# ╠═ac15e0d0-ee0c-11ea-1eaf-d7f88b5df1d7 +# ╟─9604bc44-ee1b-11ea-28f8-7f7af8d0cbb2 +# ╟─f714699e-ee07-11ea-08b6-5f5169861b57 +# ╠═bdc2df7c-ee0c-11ea-2e9f-7d2c085617c1 +# ╟─81510a30-ee0e-11ea-0062-8b3327428f9d +# ╠═6b30dc38-ed6b-11ea-10f3-ab3f121bf4b8 +# ╟─e3b03628-ee05-11ea-23b6-27c7b0210532 +# ╟─4139ee66-ee0a-11ea-2282-15d63bcca8b8 +# ╟─e08781fa-ed61-11ea-13ae-91a49b5eb74a +# ╟─7fc8ee1c-ee09-11ea-1382-ad21d5373308 +# ╠═7fcd6230-ee09-11ea-314f-a542d00d582e +# ╠═7fdb34dc-ee09-11ea-366b-ffe10d1aa845 +# ╟─7fe9153e-ee09-11ea-15b3-6f24fcc20734 +# ╟─80108d80-ee09-11ea-0368-31546eb0d3cc +# ╠═01070e28-ee0f-11ea-1928-a7919d452bdd +# ╟─7522f81e-ee1c-11ea-35af-a17eb257ff1a +# ╟─801d90c0-ee09-11ea-28d6-61b806de26dc +# ╠═802bec56-ee09-11ea-043e-51cf1db02a34 +# ╟─b7f3994c-ee1b-11ea-211a-d144db8eafc2 +# ╠═803905b2-ee09-11ea-2d52-e77ff79693b0 +# ╠═80479d98-ee09-11ea-169e-d166eef65874 +# ╠═805691ce-ee09-11ea-053d-6d2e299ee123 +# ╟─806e5766-ee0f-11ea-1efc-d753cd83d086 +# ╟─38da843a-ee0f-11ea-01df-bfa8b1317d36 +# ╟─9bde9f92-ee0f-11ea-27f8-ffef5fce2b3c +# ╟─45c4da9a-ee0f-11ea-2c5b-1f6704559137 +# ╟─bcf98dfc-ee1b-11ea-21d0-c14439500971 +# ╟─80664e8c-ee09-11ea-0702-711bce271315 +# ╠═807e5662-ee09-11ea-3005-21fdcc36b023 +# ╟─808deca8-ee09-11ea-0ee3-1586fa1ce282 +# ╟─809f5330-ee09-11ea-0e5b-415044b6ac1f +# ╠═ca1ac5f4-ee1c-11ea-3d00-ff5268866f87 +# ╟─ea435e58-ee11-11ea-3785-01af8dd72360 +# ╟─80ab64f4-ee09-11ea-29b4-498112ed0799 +# ╠═28e20950-ee0c-11ea-0e0a-b5f2e570b56e +# ╟─e9aadeee-ee1d-11ea-3525-95f6ba5fda31 +# ╟─5eea882c-ee13-11ea-0d56-af81ecd30a4a +# ╠═93284f92-ee12-11ea-0342-833b1a30625c +# ╟─cf73f9f8-ee12-11ea-39ae-0107e9107ef5 +# ╟─7ffd14f8-ee1d-11ea-0343-b54fb0333aea +# ╟─80b7566a-ee09-11ea-3939-6fab470f9ec8 +# ╠═1c8b4658-ee0c-11ea-2ede-9b9ed7d3125e +# ╟─f8bd22b8-ee14-11ea-04aa-ab16fd01826e +# ╠═2a9dd06a-ee13-11ea-3f84-67bb309c77a8 +# ╟─b424e2aa-ee14-11ea-33fa-35491e0b9c9d +# ╠═38eb92f6-ee13-11ea-14d7-a503ac04302e +# ╟─bc1c20a4-ee14-11ea-3525-63c9fa78f089 +# ╠═24c21c7c-ee14-11ea-1512-677980db1288 +# ╟─27847dc4-ee0a-11ea-0651-ebbbb3cfd58c +# ╠═b01858b6-edf3-11ea-0826-938d33c19a43 +# ╟─7c1bc062-ee15-11ea-30b1-1b1e76520f13 +# ╠═7c2ec6c6-ee15-11ea-2d7d-0d9401a5e5d1 +# ╟─649df270-ee24-11ea-397e-79c4355e38db +# ╟─9afc4dca-ee16-11ea-354f-1d827aaa61d2 +# ╠═cf6b05e2-ee16-11ea-3317-8919565cb56e +# ╟─e3616062-ee27-11ea-04a9-b9ec60842a64 +# ╟─e5b6cd34-ee27-11ea-0d60-bd4796540b18 +# ╟─d06ea762-ee27-11ea-2e9c-1bcff86a3fe0 +# ╟─e1dc0622-ee16-11ea-274a-3b6ec9e15ab5 +# ╟─efd1ceb4-ee1c-11ea-350e-f7e3ea059024 +# ╟─3cd535e4-ee26-11ea-2482-fb4ad43dda19 +# ╟─7c41f0ca-ee15-11ea-05fb-d97a836659af +# ╠═8b96e0bc-ee15-11ea-11cd-cfecea7075a0 +# ╟─0cabed84-ee1e-11ea-11c1-7d8a4b4ad1af +# ╟─5a5135c6-ee1e-11ea-05dc-eb0c683c2ce5 +# ╟─577c6daa-ee1e-11ea-1275-b7abc7a27d73 +# ╠═275a99c8-ee1e-11ea-0a76-93e3618c9588 +# ╠═42dfa206-ee1e-11ea-1fcd-21671042064c +# ╟─6e53c2e6-ee1e-11ea-21bd-c9c05381be07 +# ╠═e7f8b41a-ee25-11ea-287a-e75d33fbd98b +# ╟─8a335044-ee19-11ea-0255-b9391246d231 +# ╠═7c50ea80-ee15-11ea-328f-6b4e4ff20b7e +# ╠═aad67fd0-ee15-11ea-00d4-274ec3cda3a3 +# ╟─8ae59674-ee18-11ea-3815-f50713d0fa08 +# ╟─94c0798e-ee18-11ea-3212-1533753eabb6 +# ╠═a75701c4-ee18-11ea-2863-d3042e71a68b +# ╟─f461f5f2-ee18-11ea-3d03-95f57f9bf09e +# ╟─7c6642a6-ee15-11ea-0526-a1aac4286cdd +# ╠═9eeb876c-ee15-11ea-1794-d3ea79f47b75 +# ╟─1a0324de-ee19-11ea-1d4d-db37f4136ad3 +# ╠═1bf94c00-ee19-11ea-0e3c-e12bc68d8e28 +# ╟─1ff6b5cc-ee19-11ea-2ca8-7f00c204f587 +# ╟─0001f782-ee0e-11ea-1fb4-2b5ef3d241e2 +# ╠═1b85ee76-ee10-11ea-36d7-978340ef61e6 +# ╠═477d0a3c-ee10-11ea-11cf-07b0e0ce6818 +# ╟─91f4778e-ee20-11ea-1b7e-2b0892bd3c0f +# ╟─8ffe16ce-ee20-11ea-18bd-15640f94b839 +# ╟─5842895a-ee10-11ea-119d-81e4c4c8c53b +# ╟─5516c800-edee-11ea-12cf-3f8c082ef0ef +# ╟─57360a7a-edee-11ea-0c28-91463ece500d +# ╟─dcb8324c-edee-11ea-17ff-375ff5078f43 +# ╟─58af703c-edee-11ea-2963-f52e78fc2412 +# ╟─f3d00a9a-edf3-11ea-07b3-1db5c6d0b3cf +# ╟─5aa9dfb2-edee-11ea-3754-c368fb40637c +# ╟─74d44e22-edee-11ea-09a0-69aa0aba3281 +# ╟─115ded8c-ee0a-11ea-3493-89487315feb7 +# ╟─dfb7c6be-ee0d-11ea-194e-9758857f7b20 +# ╟─e15ad330-ee0d-11ea-25b6-1b1b3f3d7888 diff --git a/homework/homework2/hw2.html b/homework/homework2/hw2.html new file mode 100644 index 000000000..726ca241a --- /dev/null +++ b/homework/homework2/hw2.html @@ -0,0 +1,360 @@ + + + + + ⚡ Pluto.jl ⚡ + + + + + + + + + + + + +

homework 2, version 1

+
34.4 μs

Submission by: Jazzy Doe (jazz@mit.edu)

+
14.3 ms

Homework 2: Dynamic programming

+

18.S191, fall 2020

+

This notebook contains built-in, live answer checks! In some exercises you will see a coloured box, which runs a test case on your code, and provides feedback based on the result. Simply edit the code, run it, and the check runs again.

+

For MIT students: there will also be some additional (secret) test cases that will be run as part of the grading process, and we will look at your notebook and write comments.

+

Feel free to ask questions!

+
35.7 μs
student
11.8 ms

Let's create a package environment:

+
8.1 μs
177 ms
134 s
img
7 s

Arrays: Slices and views

+

In the lecture (included below) we learned about what array views are. In this exercise we will add to that understanding and look at an important use of views: to reduce the amount of memory allocations when reading sub-sequences within an array.

+

We will use the BenchmarkTools package to emperically understand the effects of using views.

+
9.1 μs
+
4.8 μs

Shrinking an array

+

Below is a function called remove_in_each_row(img, pixels). It takes a matrix img and a vector of integers, pixels, and shrinks the image by 1 pixel in width by removing the element img[i, pixels[i]] in every row. This function is one of the building blocks of the Image Seam algorithm we saw in the lecture.

+

Read it and convince yourself that it is correct.

+
26.6 μs
remove_in_each_row (generic function with 1 method)
79.8 μs

Let's use it to remove the pixels on the diagonal. These are the image dimensions before and after doing so:

+
5.3 μs
66.7 ms





2.1 μs

Exercise 1 - Making it efficient

+

We can use the @benchmark macro from the BenchmarkTools.jl package to benchmark fragments of Julia code.

+

@benchmark takes an expression and runs it a number of times to obtain statistics about the run time and memory allocation. We generally take the minimum time as the most stable measurement of performance (for reasons discussed in the paper on BenchmarkTools)

+
15.1 μs

First, as an example, let's benchmark the remove_in_each_row function we defined above by passing in our image and a some indices to remove.

+
9.3 μs
performance_experiment_default
BenchmarkTools.Trial: 
+  memory estimate:  1.44 MiB
+  allocs estimate:  1029
+  --------------
+  minimum time:     1.175 ms (0.00% GC)
+  median time:      1.872 ms (0.00% GC)
+  mean time:        2.134 ms (4.22% GC)
+  maximum time:     9.729 ms (62.48% GC)
+  --------------
+  samples:          2336
+  evals/sample:     1
12.9 s

Exercise 1.1

+

vcat(x, y) is used in julia to concatenate two arrays vertically. This actually creates a new array of size length(x) + length(y) and copies x and y into it. We use it in remove_in_each_row to create rows which have one pixel less.

+

While using vcat might make it easy to write the first version of our function, it's strictly not necessary.

+

👉 In remove_in_each_row_no_vcat below, figure out a way to avoid the use of vcat and modify the function to avoid it.

+
14.1 μs
remove_in_each_row_no_vcat (generic function with 1 method)
80.4 μs
performance_experiment_without_vcat
BenchmarkTools.Trial: 
+  memory estimate:  1.44 MiB
+  allocs estimate:  1029
+  --------------
+  minimum time:     1.089 ms (0.00% GC)
+  median time:      1.835 ms (0.00% GC)
+  mean time:        2.035 ms (4.44% GC)
+  maximum time:     9.181 ms (0.00% GC)
+  --------------
+  samples:          2448
+  evals/sample:     1
13.5 s

If you did it correctly, you should see that this benchmark shows the function running faster! And "memory estimate" should also show a smaller number, and so should "allocs estimate" which is the number of allocations done per call.

+
9.7 μs

Keep working on it!

We are still using (roughly) the same number of allocations as the default implementation.

+ +
+
51.9 μs

Exercise 1.2

+

👉 How many estimated allocations did this optimization reduce, and how can you explain most of them?

+
11.1 μs
no_vcat_observation

<Your answer here>

+
17.5 μs
12 μs

Exercise 1.3 - view-based optimization

+

👉 In the below remove_in_each_row_views function, implement the same optimization to remove vcat and use @view or @views to avoid creating copies or slices of the img array.

+

Pluto will automatically time your change with @benchmark below.

+
12.2 μs
remove_in_each_row_views (generic function with 1 method)
79.5 μs
performance_experiment_views
BenchmarkTools.Trial: 
+  memory estimate:  1.44 MiB
+  allocs estimate:  1029
+  --------------
+  minimum time:     1.058 ms (0.00% GC)
+  median time:      1.834 ms (0.00% GC)
+  mean time:        2.020 ms (4.20% GC)
+  maximum time:     8.557 ms (64.18% GC)
+  --------------
+  samples:          2466
+  evals/sample:     1
12.7 s

Keep working on it!

We are still using (roughly) the same number of allocations as the default implementation.

+ +
+
53.5 μs

Final tally:

+
9.7 μs
12.9 μs
53.1 μs

Exercise 1.4

+

Nice! If you did your optimizations right, you should be able to get down the estimated allocations to a single digit number!

+

👉 How many allocations were avoided by adding the @view optimization over the vcat optimization? Why is this?

+
23.6 μs
views_observation

<your answer here>

+
9.8 μs
5.8 μs





1.8 μs

Brightness and Energy

+
22.7 μs

First, we will define a brightness function for a pixel (a color) as the mean of the red, green and blue values.

+

You should use this function whenever the problem set asks you to deal with brightness of a pixel.

+
23.8 μs
brightness (generic function with 2 methods)
87.1 μs
1 s

We provide you with a convolve function below.

+
19 μs
convolve (generic function with 1 method)
37.7 μs
float_to_color (generic function with 1 method)
40.8 μs
43.4 ms

finally we define the energy function which takes the Sobel gradients along x and y directions and computes the norm of the gradient for each pixel.

+
14.5 μs
energy (generic function with 2 methods)
88.6 μs
187 ms





1.7 μs

Exercise 2 - Building up to dynamic programming

+

In this exercise and the following ones, we will use the computational problem of Seam carving. We will think through all the "gut reaction" solutions, and then finally end up with the dynamic programming solution that we saw in the lecture.

+

In the process we will understand the performance and accuracy of each iteration of our solution.

+

How to implement the solutions:

+

For every variation of the algorithm, your job is to write a function which takes a matrix of energies, and an index for a pixel on the first row, and computes a "seam" starting at that pixel.

+

The function should return a vector of as many integers as there are rows in the input matrix where each number points out a pixel to delete from the corresponding row. (it acts as the input to remove_in_each_row).

+
31.1 μs

Exercise 2.1 - The greedy approach

+

The first approach discussed in the lecture (included below) is the greedy approach: you start from your top pixel, and at each step you just look at the three neighbors below. The next pixel in the seam is the neighbor with the lowest energy.

+
30.6 μs
+
22.8 μs

👉 Implement the greedy approach.

+
11.8 μs
random_seam (generic function with 1 method)
125 μs
greedy_seam (generic function with 1 method)
54.3 μs

Before we apply your function to our test image, let's try it out on a small matrix of energies (displayed here in grayscale), just like in the lecture snippet above (clicking on the video will take you to the right part of the video). Light pixels have high energy, dark pixels signify low energy.

+
23.9 μs
720 ns

Starting pixel: 1

+
84.7 ms
47.8 μs
617 ms
90.7 μs

Let's try it on the bigger image!

+
10.1 μs

Compute shrunk image:

+
42.1 ms
2.3 μs
2.2 μs

Exercise 2.2 - Recursion

+

A common pattern in algorithm design is the idea of solving a problem as the combination of solutions to subproblems.

+

The classic example, is a Fibonacci number generator.

+

The recursive implementation of Fibonacci looks something like this

+
25.6 μs
fib (generic function with 1 method)
45.9 μs

Notice that you can call a function from within itself which may call itself and so on until a base case is reached. Then the program will combine the result from the base case up to the final result.

+

In the case of the Fibonacci function, we added the solutions to the subproblems fib(n-1), fib(n-2) to produce fib(n).

+

An analogy can be drawn to the process of mathematical induction in mathematics. And as with mathematical induction there are parts to constructing such a recursive algorithm:

+
    +
  • Defining a base case

    +
  • +
  • Defining an recursion i.e. finding a solution to the problem as a combination of solutions to smaller problems.

    +
  • +
+
16.8 μs

👉 Define a least_energy function which returns:

+
    +
  1. the lowest possible total energy for a seam starting at the pixel at (i,j);

    +
  2. +
  3. the column to jump to on the next move (in row i+1),

    +
  4. +
+

which is one of j1, j or j+1, up toboundary conditions.

+

Return these two values in a tuple.

+
22.6 μs
pika
716 ms
11 ms
least_energy (generic function with 1 method)
60.7 μs

Hint

You can call the least_energy function recursively within itself to obtain the least energy of the adjacent cells and add the energy at the current cell to get the total energy.

+ +
+
153 ms

This is so elegant, correct, but inefficient! If you check this checkbox , you will see the number of accesses made to the energies array it took to compute the least energy from the pixel (1,7):

+
167 μs
1.5 μs

Whoa!

+
29.6 μs
14.8 ms

Exercise 2.3 - Exhaustive search with recursion

+

Now use the least_energy function you defined above to define the recursive_seam function which takes the energies matrix and a starting pixel, and computes the seam with the lowest energy from that starting pixel.

+

This will give you the method used in the lecture to perform exhaustive search of all possible paths.

+
24.4 μs
recursive_seam (generic function with 1 method)
114 μs

Compute shrunk image:

+
274 μs
2 μs
2.1 μs

Exercise 2.4

+
    +
  • State clearly why this algorithm does an exhaustive search of all possible paths.

    +
  • +
  • How many valid seams are there in an image of size m×n?

    +
  • +
+
10.7 μs
exhaustive_observation

<your answer here>

+
22.3 μs

Exercise 3 - Memoization

+

Memoization is the name given to the technique of storing results to expensive function calls that will be accessed more than once.

+

As stated in the video, the function least_energy is called repeatedly with the same arguments. In fact, we call it on the order of 3n times, when there are only really m×n unique ways to call it!

+

Lets implement memoization on this function with first a dictionary for storage.

+
35.3 μs

Exercise 3.1 - Dictionary as storage

+

Let's make a memoized version of least_energy function which takes a dictionary and first checks to see if the dictionary contains the key (i,j) if it does, returns the value stored in that place, if not, will compute it, and store it in the dictionary at key (i, j) and return the value it computed.

+

memoized_least_energy(energies, starting_pixel, memory)

+

This function must recursively call itself, and pass the same memory object it received as an argument.

+

You are expected to read and understand the documentation on dictionaries to find out how to:

+
    +
  1. Create a dictionary

    +
  2. +
  3. Check if a key is stored in the dictionary

    +
  4. +
  5. Access contents of the dictionary by a key.

    +
  6. +
+
41.7 μs
memoized_least_energy (generic function with 1 method)
86.2 μs
recursive_memoized_seam (generic function with 1 method)
105 μs

Compute shrunk image:

+
237 μs
2.3 μs
1.7 μs

Exercise 3.2 - Matrix as storage

+

The dictionary-based memoization we tried above works well in general as there is no restriction on what type of keys can be used.

+

But in our particular case, we can use a matrix as a storage, since a matrix is naturally keyed by two integers.

+

Write a variation of matrix_memoized_least_energy and matrix_memoized_seam which use a matrix as storage.

+
22.5 μs
matrix_memoized_least_energy (generic function with 1 method)
130 μs
matrix_memoized_seam (generic function with 1 method)
111 μs

Compute shrunk image:

+
111 μs
2.5 μs
1.7 μs





1.8 μs

Exercise 4 - Dynamic programming without recursion

+

Now it's easy to see that the above algorithm is equivalent to one that populates the memory matrix in a for loop.

+

Exercise 4.1

+

👉 Write a function which takes the energies and returns the least energy matrix which has the least possible seam energy for each pixel. This was shown in the lecture, but attempt to write it on your own.

+
16.4 μs
least_energy_matrix (generic function with 1 method)
27.6 μs
8.3 μs

Exercise 4.2

+

👉 Write a function which, when given the matrix returned by least_energy_matrix and a starting pixel (on the first row), computes the least energy seam from that pixel.

+
13.1 μs
seam_from_precomputed_least_energy (generic function with 1 method)
86.4 μs

Compute shrunk image:

+
251 μs
2.5 μs
2.5 μs
9.1 μs

Oops!

Before you submit, remember to fill in your name and kerberos ID at the top of this notebook!

+
+
22.4 μs





3.5 μs

Function library

+

Just some helper functions used in the notebook.

+
9.9 μs
shrink_n (generic function with 2 methods)
174 μs
mark_path (generic function with 1 method)
72.9 μs
pencil (generic function with 1 method)
63.9 μs
decimate (generic function with 1 method)
66.9 μs
hint (generic function with 1 method)
42.2 μs
almost (generic function with 1 method)
50.3 μs
still_missing (generic function with 2 methods)
87.3 μs
keep_working (generic function with 2 methods)
59.9 μs
yays
37.9 ms
correct (generic function with 2 methods)
52.3 μs
not_defined (generic function with 1 method)
79 μs
hbox (generic function with 2 methods)
118 μs
vbox (generic function with 2 methods)
42.3 μs
8.8 ms
+ + + \ No newline at end of file diff --git a/homework/homework2/hw2.jl b/homework/homework2/hw2.jl new file mode 100644 index 000000000..7b6e74fb7 --- /dev/null +++ b/homework/homework2/hw2.jl @@ -0,0 +1,958 @@ +### A Pluto.jl notebook ### +# v0.11.12 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ 86e1ee96-f314-11ea-03f6-0f549b79e7c9 +begin + using Pkg + Pkg.activate(mktempdir()) +end + +# ╔═╡ a4937996-f314-11ea-2ff9-615c888afaa8 +begin + Pkg.add([ + "Images", + "ImageMagick", + "Compose", + "ImageFiltering", + "TestImages", + "Statistics", + "PlutoUI", + "BenchmarkTools" + ]) + + using Images + using TestImages + using ImageFiltering + using Statistics + using PlutoUI + using BenchmarkTools +end + +# ╔═╡ e6b6760a-f37f-11ea-3ae1-65443ef5a81a +md"_homework 2, version 2.1_" + +# ╔═╡ 85cfbd10-f384-11ea-31dc-b5693630a4c5 +md""" + +# **Homework 2**: _Dynamic programming_ +`18.S191`, fall 2020 + +This notebook contains _built-in, live answer checks_! In some exercises you will see a coloured box, which runs a test case on your code, and provides feedback based on the result. Simply edit the code, run it, and the check runs again. + +_For MIT students:_ there will also be some additional (secret) test cases that will be run as part of the grading process, and we will look at your notebook and write comments. + +Feel free to ask questions! +""" + +# ╔═╡ 33e43c7c-f381-11ea-3abc-c942327456b1 +# edit the code below to set your name and kerberos ID (i.e. email without @mit.edu) + +student = (name = "Jazzy Doe", kerberos_id = "jazz") + +# you might need to wait until all other cells in this notebook have completed running. +# scroll around the page to see what's up + +# ╔═╡ ec66314e-f37f-11ea-0af4-31da0584e881 +md""" + +Submission by: **_$(student.name)_** ($(student.kerberos_id)@mit.edu) +""" + +# ╔═╡ 938185ec-f384-11ea-21dc-b56b7469f798 +md"_Let's create a package environment:_" + +# ╔═╡ 0d144802-f319-11ea-0028-cd97a776a3d0 +#img = load(download("https://upload.wikimedia.org/wikipedia/commons/thumb/a/a4/Piet_Mondriaan%2C_1930_-_Mondrian_Composition_II_in_Red%2C_Blue%2C_and_Yellow.jpg/300px-Piet_Mondriaan%2C_1930_-_Mondrian_Composition_II_in_Red%2C_Blue%2C_and_Yellow.jpg")) +#img = load(download("https://upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Hilma_af_Klint_-_Group_IX_SUW%2C_The_Swan_No._1_%2813947%29.jpg/477px-Hilma_af_Klint_-_Group_IX_SUW%2C_The_Swan_No._1_%2813947%29.jpg")) +img = load(download("https://i.imgur.com/4SRnmkj.png")) + +# ╔═╡ cc9fcdae-f314-11ea-1b9a-1f68b792f005 +md""" +# Arrays: Slices and views + +In the lecture (included below) we learned about what array views are. In this exercise we will add to that understanding and look at an important use of `view`s: to reduce the amount of memory allocations when reading sub-sequences within an array. + +We will use the `BenchmarkTools` package to emperically understand the effects of using views. +""" + +# ╔═╡ b49a21a6-f381-11ea-1a98-7f144c55c9b7 +html""" + +""" + +# ╔═╡ b49e8cc8-f381-11ea-1056-91668ac6ae4e +md""" +## Shrinking an array + +Below is a function called `remove_in_each_row(img, pixels)`. It takes a matrix `img` and a vector of integers, `pixels`, and shrinks the image by 1 pixel in width by removing the element `img[i, pixels[i]]` in every row. This function is one of the building blocks of the Image Seam algorithm we saw in the lecture. + +Read it and convince yourself that it is correct. +""" + +# ╔═╡ e799be82-f317-11ea-3ae4-6d13ece3fe10 +function remove_in_each_row(img, column_numbers) + @assert size(img, 1) == length(column_numbers) # same as the number of rows + m, n = size(img) + local img′ = similar(img, m, n-1) # create a similar image with one less column + + # The prime (′) in the variable name is written as \prime + # You cannot use apostrophe for this! (Apostrophe means the transpose of a matrix) + + for (i, j) in enumerate(column_numbers) + img′[i, :] = vcat(img[i, 1:j-1], img[i, j+1:end]) + end + img′ +end + +# ╔═╡ c075a8e6-f382-11ea-2263-cd9507324f4f +md"Let's use it to remove the pixels on the diagonal. These are the image dimensions before and after doing so:" + +# ╔═╡ 9cced1a8-f326-11ea-0759-0b2f22e5a1db +(before=size(img), after=size(remove_in_each_row(img, 1:size(img, 1)))) + +# ╔═╡ 1d893998-f366-11ea-0828-512de0c44915 +md""" +## **Exercise 1** - _Making it efficient_ + +We can use the `@benchmark` macro from the [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl) package to benchmark fragments of Julia code. + +`@benchmark` takes an expression and runs it a number of times to obtain statistics about the run time and memory allocation. We generally take the minimum time as the most stable measurement of performance ([for reasons discussed in the paper on BenchmarkTools](http://www-math.mit.edu/~edelman/publications/robust_benchmarking.pdf)) +""" + +# ╔═╡ 59991872-f366-11ea-1036-afe313fb4ec1 +md""" +First, as an example, let's benchmark the `remove_in_each_row` function we defined above by passing in our image and a some indices to remove. +""" + +# ╔═╡ e501ea28-f326-11ea-252a-53949fd9ef57 +performance_experiment_default = @benchmark remove_in_each_row(img, 1:size(img, 1)) + +# ╔═╡ f7915918-f366-11ea-2c46-2f4671ae8a22 +md""" +#### Exercise 1.1 + +`vcat(x, y)` is used in julia to concatenate two arrays vertically. This actually creates a new array of size `length(x) + length(y)` and copies `x` and `y` into it. We use it in `remove_in_each_row` to create rows which have one pixel less. + +While using `vcat` might make it easy to write the first version of our function, it's strictly not necessary. + +👉 In `remove_in_each_row_no_vcat` below, figure out a way to avoid the use of `vcat` and modify the function to avoid it. +""" + +# ╔═╡ 37d4ea5c-f327-11ea-2cc5-e3774c232c2b +function remove_in_each_row_no_vcat(img, column_numbers) + @assert size(img, 1) == length(column_numbers) # same as the number of rows + m, n = size(img) + local img′ = similar(img, m, n-1) # create a similar image with one less column + + for (i, j) in enumerate(column_numbers) + # EDIT THE FOLLOWING LINE and split it into two lines + # to avoid using `vcat`. + img′[i, :] .= vcat(img[i, 1:j-1], img[i, j+1:end]) + end + img′ +end + +# ╔═╡ 67717d02-f327-11ea-0988-bfe661f57f77 +performance_experiment_without_vcat = @benchmark remove_in_each_row_no_vcat(img, 1:size(img, 1)) + +# ╔═╡ 9e149cd2-f367-11ea-28ef-b9533e8a77bb +md""" +If you did it correctly, you should see that this benchmark shows the function running faster! And "memory estimate" should also show a smaller number, and so should "allocs estimate" which is the number of allocations done per call. +""" + +# ╔═╡ ba1619d4-f389-11ea-2b3f-fd9ba71cf7e3 +md""" +#### Exercise 1.2 + +👉 How many estimated allocations did this optimization reduce, and how can you explain most of them? +""" + +# ╔═╡ e49235a4-f367-11ea-3913-f54a4a6b2d6b +no_vcat_observation = md""" + +""" + +# ╔═╡ 837c43a4-f368-11ea-00a3-990a45cb0cbd +md""" + +#### Exercise 1.3 - `view`-based optimization + +👉 In the below `remove_in_each_row_views` function, implement the same optimization to remove `vcat` and use `@view` or `@views` to avoid creating copies or slices of the `img` array. + +Pluto will automatically time your change with `@benchmark` below. +""" + +# ╔═╡ 90a22cc6-f327-11ea-1484-7fda90283797 +function remove_in_each_row_views(img, column_numbers) + @assert size(img, 1) == length(column_numbers) # same as the number of rows + m, n = size(img) + local img′ = similar(img, m, n-1) # create a similar image with one less column + + for (i, j) in enumerate(column_numbers) + # EDIT THE FOLLOWING LINE and split it into two lines + # to avoid using `vcat`. + img′[i, :] .= vcat(img[i, 1:j-1], img[i, j+1:end]) + end + img′ +end + +# ╔═╡ 3335e07c-f328-11ea-0e6c-8d38c8c0ad5b +performance_experiment_views = @benchmark begin + remove_in_each_row_views(img, 1:size(img, 1)) +end + +# ╔═╡ 40d6f562-f329-11ea-2ee4-d7806a16ede3 +md"Final tally:" + +# ╔═╡ 4f0975d8-f329-11ea-3d10-59a503f8d6b2 +( + default = performance_experiment_default, + without_vcat = performance_experiment_without_vcat, + views = performance_experiment_views, +) + +# ╔═╡ dc63d32a-f387-11ea-37e2-6f3666a72e03 +⧀(a, b) = minimum(a).allocs + size(img, 1) ÷ 2 < minimum(b).allocs; + +# ╔═╡ 7eaa57d2-f368-11ea-1a70-c7c7e54bd0b1 +md""" + +#### Exercise 1.4 + +Nice! If you did your optimizations right, you should be able to get down the estimated allocations to a single digit number! + +👉 How many allocations were avoided by adding the `@view` optimization over the `vcat` optimization? Why is this? +""" + +# ╔═╡ fd819dac-f368-11ea-33bb-17148387546a +views_observation = md""" + +""" + +# ╔═╡ 318a2256-f369-11ea-23a9-2f74c566549b +md""" +## _Brightness and Energy_ +""" + +# ╔═╡ 7a44ba52-f318-11ea-0406-4731c80c1007 +md""" +First, we will define a `brightness` function for a pixel (a color) as the mean of the red, green and blue values. + +You should use this function whenever the problem set asks you to deal with _brightness_ of a pixel. +""" + +# ╔═╡ 6c7e4b54-f318-11ea-2055-d9f9c0199341 +begin + brightness(c::RGB) = mean((c.r, c.g, c.b)) + brightness(c::RGBA) = mean((c.r, c.g, c.b)) +end + +# ╔═╡ 74059d04-f319-11ea-29b4-85f5f8f5c610 +Gray.(brightness.(img)) + +# ╔═╡ 0b9ead92-f318-11ea-3744-37150d649d43 +md"""We provide you with a convolve function below. +""" + +# ╔═╡ d184e9cc-f318-11ea-1a1e-994ab1330c1a +convolve(img, k) = imfilter(img, reflect(k)) # uses ImageFiltering.jl package +# behaves the same way as the `convolve` function used in Lecture 2 +# You were asked to implement this in homework 1. + +# ╔═╡ cdfb3508-f319-11ea-1486-c5c58a0b9177 +float_to_color(x) = RGB(max(0, -x), max(0, x), 0) + +# ╔═╡ 5fccc7cc-f369-11ea-3b9e-2f0eca7f0f0e +md""" +finally we define the `energy` function which takes the Sobel gradients along x and y directions and computes the norm of the gradient for each pixel. +""" + +# ╔═╡ 6f37b34c-f31a-11ea-2909-4f2079bf66ec +begin + energy(∇x, ∇y) = sqrt.(∇x.^2 .+ ∇y.^2) + function energy(img) + ∇y = convolve(brightness.(img), Kernel.sobel()[1]) + ∇x = convolve(brightness.(img), Kernel.sobel()[2]) + energy(∇x, ∇y) + end +end + +# ╔═╡ 9fa0cd3a-f3e1-11ea-2f7e-bd73b8e3f302 +float_to_color.(energy(img)) + +# ╔═╡ 87afabf8-f317-11ea-3cb3-29dced8e265a +md""" +## **Exercise 2** - _Building up to dynamic programming_ + +In this exercise and the following ones, we will use the computational problem of Seam carving. We will think through all the "gut reaction" solutions, and then finally end up with the dynamic programming solution that we saw in the lecture. + +In the process we will understand the performance and accuracy of each iteration of our solution. + +### How to implement the solutions: + +For every variation of the algorithm, your job is to write a function which takes a matrix of energies, and an index for a pixel on the first row, and computes a "seam" starting at that pixel. + +The function should return a vector of as many integers as there are rows in the input matrix where each number points out a pixel to delete from the corresponding row. (it acts as the input to `remove_in_each_row`). +""" + +# ╔═╡ 8ba9f5fc-f31b-11ea-00fe-79ecece09c25 +md""" +#### Exercise 2.1 - _The greedy approach_ + +The first approach discussed in the lecture (included below) is the _greedy approach_: you start from your top pixel, and at each step you just look at the three neighbors below. The next pixel in the seam is the neighbor with the lowest energy. + +""" + +# ╔═╡ f5a74dfc-f388-11ea-2577-b543d31576c6 +html""" + +""" + +# ╔═╡ c3543ea4-f393-11ea-39c8-37747f113b96 +md""" +👉 Implement the greedy approach. +""" + +# ╔═╡ 2f9cbea8-f3a1-11ea-20c6-01fd1464a592 +random_seam(m, n, i) = reduce((a, b) -> [a..., clamp(last(a) + rand(-1:1), 1, n)], 1:m-1; init=[i]) + +# ╔═╡ abf20aa0-f31b-11ea-2548-9bea4fab4c37 +function greedy_seam(energies, starting_pixel::Int) + # you can delete the body of this function - it's just a placeholder. + random_seam(size(energies)..., starting_pixel) +end + +# ╔═╡ 5430d772-f397-11ea-2ed8-03ee06d02a22 +md"Before we apply your function to our test image, let's try it out on a small matrix of energies (displayed here in grayscale), just like in the lecture snippet above (clicking on the video will take you to the right part of the video). Light pixels have high energy, dark pixels signify low energy." + +# ╔═╡ f580527e-f397-11ea-055f-bb9ea8f12015 +# try +# if length(Set(greedy_seam(greedy_test, 5))) == 1 +# md"Right now you are seeing the placeholder function. (You haven't done the exercise yet!) This is a straight line from the starting pixel." +# end +# catch end + +# ╔═╡ 7ddee6fc-f394-11ea-31fc-5bd665a65bef +greedy_test = Gray.(rand(Float64, (8,10))); + +# ╔═╡ 6f52c1a2-f395-11ea-0c8a-138a77f03803 +md"Starting pixel: $(@bind greedy_starting_pixel Slider(1:size(greedy_test, 2); show_value=true))" + +# ╔═╡ 9945ae78-f395-11ea-1d78-cf6ad19606c8 +md"_Let's try it on the bigger image!_" + +# ╔═╡ 87efe4c2-f38d-11ea-39cc-bdfa11298317 +md"Compute shrunk image: $(@bind shrink_greedy CheckBox())" + +# ╔═╡ 52452d26-f36c-11ea-01a6-313114b4445d +md""" +#### Exercise 2.2 - _Recursion_ + +A common pattern in algorithm design is the idea of solving a problem as the combination of solutions to subproblems. + +The classic example, is a [Fibonacci number](https://en.wikipedia.org/wiki/Fibonacci_number) generator. + +The recursive implementation of Fibonacci looks something like this +""" + +# ╔═╡ 2a98f268-f3b6-11ea-1eea-81c28256a19e +function fib(n) + # base case (basis) + if n == 0 || n == 1 # `||` means "or" + return 1 + end + + # recursion (induction) + return fib(n-1) + fib(n-2) +end + +# ╔═╡ 32e9a944-f3b6-11ea-0e82-1dff6c2eef8d +md""" +Notice that you can call a function from within itself which may call itself and so on until a base case is reached. Then the program will combine the result from the base case up to the final result. + +In the case of the Fibonacci function, we added the solutions to the subproblems `fib(n-1)`, `fib(n-2)` to produce `fib(n)`. + +An analogy can be drawn to the process of mathematical induction in mathematics. And as with mathematical induction there are parts to constructing such a recursive algorithm: + +- Defining a base case +- Defining an recursion i.e. finding a solution to the problem as a combination of solutions to smaller problems. + +""" + +# ╔═╡ 9101d5a0-f371-11ea-1c04-f3f43b96ca4a +md""" +👉 Define a `least_energy` function which returns: +1. the lowest possible total energy for a seam starting at the pixel at $(i, j)$; +2. the column to jump to on the next move (in row $i + 1$), +which is one of $j-1$, $j$ or $j+1$, up to boundary conditions. + +Return these two values in a tuple. +""" + +# ╔═╡ 8ec27ef8-f320-11ea-2573-c97b7b908cb7 +## returns lowest possible sum energy at pixel (i, j), and the column to jump to in row i+1. +function least_energy(energies, i, j) + # base case + # if i == something + # return energies[...] # no need for recursive computation in the base case! + # end + # + # induction + # combine results from recursive calls to `least_energy`. +end + +# ╔═╡ a7f3d9f8-f3bb-11ea-0c1a-55bbb8408f09 +md""" +This is so elegant, correct, but inefficient! If you **check this checkbox** $(@bind compute_access CheckBox()), you will see the number of accesses made to the energies array it took to compute the least energy from the pixel (1,7): +""" + +# ╔═╡ 18e0fd8a-f3bc-11ea-0713-fbf74d5fa41a +md"Whoa!" + +# ╔═╡ cbf29020-f3ba-11ea-2cb0-b92836f3d04b +begin + struct AccessTrackerArray{T,N} <: AbstractArray{T, N} + data::Array{T,N} + accesses::Ref{Int} + end + track_access(x) = AccessTrackerArray(x, Ref(0)) + + Base.IndexStyle(::Type{AccessTrackerArray}) = IndexLinear() + + Base.size(x::AccessTrackerArray) = size(x.data) + Base.getindex(x::AccessTrackerArray, i::Int...) = (x.accesses[] += 1; x.data[i...]) + Base.setindex!(x::AccessTrackerArray, v, i...) = (x.accesses[] += 1; x.data[i...] = v;) +end + +# ╔═╡ 8bc930f0-f372-11ea-06cb-79ced2834720 +md""" +#### Exercise 2.3 - _Exhaustive search with recursion_ + +Now use the `least_energy` function you defined above to define the `recursive_seam` function which takes the energies matrix and a starting pixel, and computes the seam with the lowest energy from that starting pixel. + +This will give you the method used in the lecture to perform [exhaustive search of all possible paths](https://youtu.be/rpB6zQNsbQU?t=839). +""" + +# ╔═╡ 85033040-f372-11ea-2c31-bb3147de3c0d +function recursive_seam(energies, starting_pixel) + m, n = size(energies) + # Replace the following line with your code. + [rand(1:starting_pixel) for i=1:m] +end + +# ╔═╡ 1d55333c-f393-11ea-229a-5b1e9cabea6a +md"Compute shrunk image: $(@bind shrink_recursive CheckBox())" + +# ╔═╡ c572f6ce-f372-11ea-3c9a-e3a21384edca +md""" +#### Exercise 2.4 + +- State clearly why this algorithm does an exhaustive search of all possible paths. +- How does the number of possible seam grow as n increases for a `m×n` image? (Big O notation is fine, or an approximation is fine). +""" + +# ╔═╡ 6d993a5c-f373-11ea-0dde-c94e3bbd1552 +exhaustive_observation = md""" + +""" + +# ╔═╡ ea417c2a-f373-11ea-3bb0-b1b5754f2fac +md""" +## **Exercise 3** - _Memoization_ + +**Memoization** is the name given to the technique of storing results to expensive function calls that will be accessed more than once. + +As stated in the video, the function `least_energy` is called repeatedly with the same arguments. In fact, we call it on the order of $3^n$ times, when there are only really $m \times n$ unique ways to call it! + +Lets implement memoization on this function with first a [dictionary](https://docs.julialang.org/en/v1/base/collections/#Dictionaries) for storage. +""" + +# ╔═╡ 56a7f954-f374-11ea-0391-f79b75195f4d +md""" +#### Exercise 3.1 - _Dictionary as storage_ + +Let's make a memoized version of least_energy function which takes a dictionary and +first checks to see if the dictionary contains the key (i,j) if it does, returns the value stored in that place, if not, will compute it, and store it in the dictionary at key (i, j) and return the value it computed. + + +`memoized_least_energy(energies, starting_pixel, memory)` + +This function must recursively call itself, and pass the same `memory` object it received as an argument. + +You are expected to read and understand the [documentation on dictionaries](https://docs.julialang.org/en/v1/base/collections/#Dictionaries) to find out how to: + +1. Create a dictionary +2. Check if a key is stored in the dictionary +3. Access contents of the dictionary by a key. +""" + +# ╔═╡ b1d09bc8-f320-11ea-26bb-0101c9a204e2 +function memoized_least_energy(energies, i, j, memory) + m, n = size(energies) + + # Replace the following line with your code. + [starting_pixel for i=1:m] +end + +# ╔═╡ 3e8b0868-f3bd-11ea-0c15-011bbd6ac051 +function recursive_memoized_seam(energies, starting_pixel) + memory = Dict{Tuple{Int,Int}, Float64}() # location => least energy. + # pass this every time you call memoized_least_energy. + m, n = size(energies) + + # Replace the following line with your code. + [rand(1:starting_pixel) for i=1:m] +end + +# ╔═╡ 4e3bcf88-f3c5-11ea-3ada-2ff9213647b7 +md"Compute shrunk image: $(@bind shrink_dict CheckBox())" + +# ╔═╡ cf39fa2a-f374-11ea-0680-55817de1b837 +md""" +### Exercise 3.2 - _Matrix as storage_ + +The dictionary-based memoization we tried above works well in general as there is no restriction on what type of keys can be used. + +But in our particular case, we can use a matrix as a storage, since a matrix is naturally keyed by two integers. + +Write a variation of `matrix_memoized_least_energy` and `matrix_memoized_seam` which use a matrix as storage. +""" + +# ╔═╡ c8724b5e-f3bd-11ea-0034-b92af21ca12d +function matrix_memoized_least_energy(energies, i, j, memory) + m, n = size(energies) + + # Replace the following line with your code. + [starting_pixel for i=1:m] +end + +# ╔═╡ be7d40e2-f320-11ea-1b56-dff2a0a16e8d +function matrix_memoized_seam(energies, starting_pixel) + memory = zeros(size(energies)) # use this as storage -- intially it's all zeros + m, n = size(energies) + + # Replace the following line with your code. + [starting_pixel for i=1:m] +end + +# ╔═╡ 507f3870-f3c5-11ea-11f6-ada3bb087634 +md"Compute shrunk image: $(@bind shrink_matrix CheckBox())" + +# ╔═╡ 24792456-f37b-11ea-07b2-4f4c8caea633 +md""" +## **Exercise 4** - _Dynamic programming without recursion_ + +Now it's easy to see that the above algorithm is equivalent to one that populates the memory matrix in a for loop. + +#### Exercise 4.1 + +👉 Write a function which takes the energies and returns the least energy matrix which has the least possible seam energy for each pixel. This was shown in the lecture, but attempt to write it on your own. +""" + +# ╔═╡ ff055726-f320-11ea-32f6-2bf38d7dd310 +function least_energy_matrix(energies) + copy(energies) +end + +# ╔═╡ 92e19f22-f37b-11ea-25f7-e321337e375e +md""" +#### Exercise 4.2 + +👉 Write a function which, when given the matrix returned by `least_energy_matrix` and a starting pixel (on the first row), computes the least energy seam from that pixel. +""" + +# ╔═╡ 795eb2c4-f37b-11ea-01e1-1dbac3c80c13 +function seam_from_precomputed_least_energy(energies, starting_pixel::Int) + least_energies = least_energy_matrix(energies) + m, n = size(least_energies) + + # Replace the following line with your code. + [starting_pixel for i=1:m] +end + +# ╔═╡ 51df0c98-f3c5-11ea-25b8-af41dc182bac +md"Compute shrunk image: $(@bind shrink_bottomup CheckBox())" + +# ╔═╡ 0fbe2af6-f381-11ea-2f41-23cd1cf930d9 +if student.kerberos_id === "jazz" + md""" +!!! danger "Oops!" + **Before you submit**, remember to fill in your name and kerberos ID at the top of this notebook! + """ +end + +# ╔═╡ 6b4d6584-f3be-11ea-131d-e5bdefcc791b +md"## Function library + +Just some helper functions used in the notebook." + +# ╔═╡ ef88c388-f388-11ea-3828-ff4db4d1874e +function mark_path(img, path) + img′ = copy(img) + m = size(img, 2) + for (i, j) in enumerate(path) + # To make it easier to see, we'll color not just + # the pixels of the seam, but also those adjacent to it + for j′ in j-1:j+1 + img′[i, clamp(j′, 1, m)] = RGB(1,0,1) + end + end + img′ +end + +# ╔═╡ 437ba6ce-f37d-11ea-1010-5f6a6e282f9b +function shrink_n(img, n, min_seam, imgs=[]; show_lightning=true) + n==0 && return push!(imgs, img) + + e = energy(img) + seam_energy(seam) = sum(e[i, seam[i]] for i in 1:size(img, 1)) + _, min_j = findmin(map(j->seam_energy(min_seam(e, j)), 1:size(e, 2))) + min_seam_vec = min_seam(e, min_j) + img′ = remove_in_each_row(img, min_seam_vec) + if show_lightning + push!(imgs, mark_path(img, min_seam_vec)) + else + push!(imgs, img′) + end + shrink_n(img′, n-1, min_seam, imgs) +end + +# ╔═╡ f6571d86-f388-11ea-0390-05592acb9195 +if shrink_greedy + greedy_carved = shrink_n(img, 200, greedy_seam) + md"Shrink by: $(@bind greedy_n Slider(1:200; show_value=true))" +end + +# ╔═╡ f626b222-f388-11ea-0d94-1736759b5f52 +if shrink_greedy + greedy_carved[greedy_n] +end + +# ╔═╡ d88bc272-f392-11ea-0efd-15e0e2b2cd4e +if shrink_recursive + recursive_carved = shrink_n(pika, 3, recursive_seam) + md"Shrink by: $(@bind recursive_n Slider(1:3, show_value=true))" +end + +# ╔═╡ e66ef06a-f392-11ea-30ab-7160e7723a17 +if shrink_recursive + recursive_carved[recursive_n] +end + +# ╔═╡ 4e3ef866-f3c5-11ea-3fb0-27d1ca9a9a3f +if shrink_dict + dict_carved = shrink_n(img, 200, recursive_memoized_seam) + md"Shrink by: $(@bind dict_n Slider(1:200, show_value=true))" +end + +# ╔═╡ 6e73b1da-f3c5-11ea-145f-6383effe8a89 +if shrink_dict + dict_carved[dict_n] +end + +# ╔═╡ 50829af6-f3c5-11ea-04a8-0535edd3b0aa +if shrink_matrix + matrix_carved = shrink_n(img, 200, matrix_memoized_seam) + md"Shrink by: $(@bind matrix_n Slider(1:200, show_value=true))" +end + +# ╔═╡ 9e56ecfa-f3c5-11ea-2e90-3b1839d12038 +if shrink_matrix + matrix_carved[matrix_n] +end + +# ╔═╡ 51e28596-f3c5-11ea-2237-2b72bbfaa001 +if shrink_bottomup + bottomup_carved = shrink_n(img, 200, seam_from_precomputed_least_energy) + md"Shrink by: $(@bind bottomup_n Slider(1:200, show_value=true))" +end + +# ╔═╡ 0a10acd8-f3c6-11ea-3e2f-7530a0af8c7f +if shrink_bottomup + bottomup_carved[bottomup_n] +end + +# ╔═╡ ef26374a-f388-11ea-0b4e-67314a9a9094 +function pencil(X) + f(x) = RGB(1-x,1-x,1-x) + map(f, X ./ maximum(X)) +end + +# ╔═╡ 6bdbcf4c-f321-11ea-0288-fb16ff1ec526 +function decimate(img, n) + img[1:n:end, 1:n:end] +end + +# ╔═╡ ddba07dc-f3b7-11ea-353e-0f67713727fc +# Do not make this image bigger, it will be infeasible to compute. +pika = decimate(load(download("https://art.pixilart.com/901d53bcda6b27b.png")),150) + +# ╔═╡ 73b52fd6-f3b9-11ea-14ed-ebfcab1ce6aa +size(pika) + +# ╔═╡ fa8e2772-f3b6-11ea-30f7-699717693164 +if compute_access + tracked = track_access(energy(pika)) + least_energy(tracked, 1,7) + tracked.accesses[] +end + +# ╔═╡ ffc17f40-f380-11ea-30ee-0fe8563c0eb1 +hint(text) = Markdown.MD(Markdown.Admonition("hint", "Hint", [text])) + +# ╔═╡ 9f18efe2-f38e-11ea-0871-6d7760d0b2f6 +hint(md"You can call the `least_energy` function recursively within itself to obtain the least energy of the adjacent cells and add the energy at the current cell to get the total energy.") + +# ╔═╡ ffc40ab2-f380-11ea-2136-63542ff0f386 +almost(text) = Markdown.MD(Markdown.Admonition("warning", "Almost there!", [text])) + +# ╔═╡ ffceaed6-f380-11ea-3c63-8132d270b83f +still_missing(text=md"Replace `missing` with your answer.") = Markdown.MD(Markdown.Admonition("warning", "Here we go!", [text])) + +# ╔═╡ ffde44ae-f380-11ea-29fb-2dfcc9cda8b4 +keep_working(text=md"The answer is not quite right.") = Markdown.MD(Markdown.Admonition("danger", "Keep working on it!", [text])) + +# ╔═╡ 980b1104-f394-11ea-0948-21002f26ee25 +function visualize_seam_algorithm(algorithm, test_img, starting_pixel) + seam = algorithm(test_img, starting_pixel) + + display_img = RGB.(test_img) + for (i, j) in enumerate(seam) + try + display_img[i, j] = RGB(0.9, 0.3, 0.6) + catch ex + if ex isa BoundsError + return keep_working("") + end + # the solution might give an illegal index + end + end + display_img +end; + +# ╔═╡ 2a7e49b8-f395-11ea-0058-013e51baa554 +visualize_seam_algorithm(greedy_seam, greedy_test, greedy_starting_pixel) + +# ╔═╡ ffe326e0-f380-11ea-3619-61dd0592d409 +yays = [md"Great!", md"Yay ❤", md"Great! 🎉", md"Well done!", md"Keep it up!", md"Good job!", md"Awesome!", md"You got the right answer!", md"Let's move on to the next section."] + +# ╔═╡ fff5aedc-f380-11ea-2a08-99c230f8fa32 +correct(text=rand(yays)) = Markdown.MD(Markdown.Admonition("correct", "Got it!", [text])) + +# ╔═╡ e3519118-f387-11ea-0c61-e1c2de1c24c1 +if performance_experiment_without_vcat ⧀ performance_experiment_default + correct() +else + keep_working(md"We are still using (roughly) the same number of allocations as the default implementation.") +end + +# ╔═╡ d4ea4222-f388-11ea-3c8d-db0d651f5282 +if performance_experiment_views ⧀ performance_experiment_default + if minimum(performance_experiment_views).allocs < 10 + correct() + else + keep_working(md"We are still using (roughly) the same number of allocations as the implementation without `vcat`.") + end +else + keep_working(md"We are still using (roughly) the same number of allocations as the default implementation.") +end + +# ╔═╡ 00026442-f381-11ea-2b41-bde1fff66011 +not_defined(variable_name) = Markdown.MD(Markdown.Admonition("danger", "Oopsie!", [md"Make sure that you define a variable called **$(Markdown.Code(string(variable_name)))**"])) + +# ╔═╡ 145c0f58-f384-11ea-2b71-09ae83f66da2 +if !@isdefined(views_observation) + not_defined(:views_observation) +end + +# ╔═╡ d7a9c000-f383-11ea-1516-cf71102d8e94 +if !@isdefined(views_observation) + not_defined(:views_observation) +end + +# ╔═╡ e0622780-f3b4-11ea-1f44-59fb9c5d2ebd +if !@isdefined(least_energy_matrix) + not_defined(:least_energy_matrix) +end + +# ╔═╡ 946b69a0-f3a2-11ea-2670-819a5dafe891 +if !@isdefined(seam_from_precomputed_least_energy) + not_defined(:seam_from_precomputed_least_energy) +end + +# ╔═╡ fbf6b0fa-f3e0-11ea-2009-573a218e2460 +function hbox(x, y, gap=16; sy=size(y), sx=size(x)) + w,h = (max(sx[1], sy[1]), + gap + sx[2] + sy[2]) + + slate = fill(RGB(1,1,1), w,h) + slate[1:size(x,1), 1:size(x,2)] .= RGB.(x) + slate[1:size(y,1), size(x,2) + gap .+ (1:size(y,2))] .= RGB.(y) + slate +end + +# ╔═╡ f010933c-f318-11ea-22c5-4d2e64cd9629 +begin + hbox( + float_to_color.(convolve(brightness.(img), Kernel.sobel()[1])), + float_to_color.(convolve(brightness.(img), Kernel.sobel()[2]))) +end + +# ╔═╡ 256edf66-f3e1-11ea-206e-4f9b4f6d3a3d +vbox(x,y, gap=16) = hbox(x', y')' + +# ╔═╡ 00115b6e-f381-11ea-0bc6-61ca119cb628 +bigbreak = html"




"; + +# ╔═╡ c086bd1e-f384-11ea-3b26-2da9e24360ca +bigbreak + +# ╔═╡ 8d558c4c-f328-11ea-0055-730ead5d5c34 +bigbreak + +# ╔═╡ f7eba2b6-f388-11ea-06ad-0b861c764d61 +bigbreak + +# ╔═╡ 4f48c8b8-f39d-11ea-25d2-1fab031a514f +bigbreak + +# ╔═╡ 48089a00-f321-11ea-1479-e74ba71df067 +bigbreak + +# ╔═╡ Cell order: +# ╟─e6b6760a-f37f-11ea-3ae1-65443ef5a81a +# ╟─ec66314e-f37f-11ea-0af4-31da0584e881 +# ╟─85cfbd10-f384-11ea-31dc-b5693630a4c5 +# ╠═33e43c7c-f381-11ea-3abc-c942327456b1 +# ╟─938185ec-f384-11ea-21dc-b56b7469f798 +# ╠═86e1ee96-f314-11ea-03f6-0f549b79e7c9 +# ╠═a4937996-f314-11ea-2ff9-615c888afaa8 +# ╠═0d144802-f319-11ea-0028-cd97a776a3d0 +# ╟─cc9fcdae-f314-11ea-1b9a-1f68b792f005 +# ╟─b49a21a6-f381-11ea-1a98-7f144c55c9b7 +# ╟─b49e8cc8-f381-11ea-1056-91668ac6ae4e +# ╠═e799be82-f317-11ea-3ae4-6d13ece3fe10 +# ╟─c075a8e6-f382-11ea-2263-cd9507324f4f +# ╠═9cced1a8-f326-11ea-0759-0b2f22e5a1db +# ╟─c086bd1e-f384-11ea-3b26-2da9e24360ca +# ╟─1d893998-f366-11ea-0828-512de0c44915 +# ╟─59991872-f366-11ea-1036-afe313fb4ec1 +# ╠═e501ea28-f326-11ea-252a-53949fd9ef57 +# ╟─f7915918-f366-11ea-2c46-2f4671ae8a22 +# ╠═37d4ea5c-f327-11ea-2cc5-e3774c232c2b +# ╠═67717d02-f327-11ea-0988-bfe661f57f77 +# ╟─9e149cd2-f367-11ea-28ef-b9533e8a77bb +# ╟─e3519118-f387-11ea-0c61-e1c2de1c24c1 +# ╟─ba1619d4-f389-11ea-2b3f-fd9ba71cf7e3 +# ╠═e49235a4-f367-11ea-3913-f54a4a6b2d6b +# ╟─145c0f58-f384-11ea-2b71-09ae83f66da2 +# ╟─837c43a4-f368-11ea-00a3-990a45cb0cbd +# ╠═90a22cc6-f327-11ea-1484-7fda90283797 +# ╠═3335e07c-f328-11ea-0e6c-8d38c8c0ad5b +# ╟─d4ea4222-f388-11ea-3c8d-db0d651f5282 +# ╟─40d6f562-f329-11ea-2ee4-d7806a16ede3 +# ╟─4f0975d8-f329-11ea-3d10-59a503f8d6b2 +# ╟─dc63d32a-f387-11ea-37e2-6f3666a72e03 +# ╟─7eaa57d2-f368-11ea-1a70-c7c7e54bd0b1 +# ╠═fd819dac-f368-11ea-33bb-17148387546a +# ╟─d7a9c000-f383-11ea-1516-cf71102d8e94 +# ╟─8d558c4c-f328-11ea-0055-730ead5d5c34 +# ╟─318a2256-f369-11ea-23a9-2f74c566549b +# ╟─7a44ba52-f318-11ea-0406-4731c80c1007 +# ╠═6c7e4b54-f318-11ea-2055-d9f9c0199341 +# ╠═74059d04-f319-11ea-29b4-85f5f8f5c610 +# ╟─0b9ead92-f318-11ea-3744-37150d649d43 +# ╠═d184e9cc-f318-11ea-1a1e-994ab1330c1a +# ╠═cdfb3508-f319-11ea-1486-c5c58a0b9177 +# ╠═f010933c-f318-11ea-22c5-4d2e64cd9629 +# ╟─5fccc7cc-f369-11ea-3b9e-2f0eca7f0f0e +# ╠═6f37b34c-f31a-11ea-2909-4f2079bf66ec +# ╠═9fa0cd3a-f3e1-11ea-2f7e-bd73b8e3f302 +# ╟─f7eba2b6-f388-11ea-06ad-0b861c764d61 +# ╟─87afabf8-f317-11ea-3cb3-29dced8e265a +# ╟─8ba9f5fc-f31b-11ea-00fe-79ecece09c25 +# ╟─f5a74dfc-f388-11ea-2577-b543d31576c6 +# ╟─c3543ea4-f393-11ea-39c8-37747f113b96 +# ╟─2f9cbea8-f3a1-11ea-20c6-01fd1464a592 +# ╠═abf20aa0-f31b-11ea-2548-9bea4fab4c37 +# ╟─5430d772-f397-11ea-2ed8-03ee06d02a22 +# ╟─f580527e-f397-11ea-055f-bb9ea8f12015 +# ╟─6f52c1a2-f395-11ea-0c8a-138a77f03803 +# ╟─2a7e49b8-f395-11ea-0058-013e51baa554 +# ╟─7ddee6fc-f394-11ea-31fc-5bd665a65bef +# ╟─980b1104-f394-11ea-0948-21002f26ee25 +# ╟─9945ae78-f395-11ea-1d78-cf6ad19606c8 +# ╟─87efe4c2-f38d-11ea-39cc-bdfa11298317 +# ╟─f6571d86-f388-11ea-0390-05592acb9195 +# ╟─f626b222-f388-11ea-0d94-1736759b5f52 +# ╟─52452d26-f36c-11ea-01a6-313114b4445d +# ╠═2a98f268-f3b6-11ea-1eea-81c28256a19e +# ╟─32e9a944-f3b6-11ea-0e82-1dff6c2eef8d +# ╟─9101d5a0-f371-11ea-1c04-f3f43b96ca4a +# ╠═ddba07dc-f3b7-11ea-353e-0f67713727fc +# ╠═73b52fd6-f3b9-11ea-14ed-ebfcab1ce6aa +# ╠═8ec27ef8-f320-11ea-2573-c97b7b908cb7 +# ╟─9f18efe2-f38e-11ea-0871-6d7760d0b2f6 +# ╟─a7f3d9f8-f3bb-11ea-0c1a-55bbb8408f09 +# ╟─fa8e2772-f3b6-11ea-30f7-699717693164 +# ╟─18e0fd8a-f3bc-11ea-0713-fbf74d5fa41a +# ╟─cbf29020-f3ba-11ea-2cb0-b92836f3d04b +# ╟─8bc930f0-f372-11ea-06cb-79ced2834720 +# ╠═85033040-f372-11ea-2c31-bb3147de3c0d +# ╠═1d55333c-f393-11ea-229a-5b1e9cabea6a +# ╠═d88bc272-f392-11ea-0efd-15e0e2b2cd4e +# ╠═e66ef06a-f392-11ea-30ab-7160e7723a17 +# ╟─c572f6ce-f372-11ea-3c9a-e3a21384edca +# ╠═6d993a5c-f373-11ea-0dde-c94e3bbd1552 +# ╠═ea417c2a-f373-11ea-3bb0-b1b5754f2fac +# ╟─56a7f954-f374-11ea-0391-f79b75195f4d +# ╠═b1d09bc8-f320-11ea-26bb-0101c9a204e2 +# ╠═3e8b0868-f3bd-11ea-0c15-011bbd6ac051 +# ╠═4e3bcf88-f3c5-11ea-3ada-2ff9213647b7 +# ╠═4e3ef866-f3c5-11ea-3fb0-27d1ca9a9a3f +# ╠═6e73b1da-f3c5-11ea-145f-6383effe8a89 +# ╟─cf39fa2a-f374-11ea-0680-55817de1b837 +# ╠═c8724b5e-f3bd-11ea-0034-b92af21ca12d +# ╠═be7d40e2-f320-11ea-1b56-dff2a0a16e8d +# ╟─507f3870-f3c5-11ea-11f6-ada3bb087634 +# ╠═50829af6-f3c5-11ea-04a8-0535edd3b0aa +# ╠═9e56ecfa-f3c5-11ea-2e90-3b1839d12038 +# ╟─4f48c8b8-f39d-11ea-25d2-1fab031a514f +# ╟─24792456-f37b-11ea-07b2-4f4c8caea633 +# ╠═ff055726-f320-11ea-32f6-2bf38d7dd310 +# ╟─e0622780-f3b4-11ea-1f44-59fb9c5d2ebd +# ╟─92e19f22-f37b-11ea-25f7-e321337e375e +# ╠═795eb2c4-f37b-11ea-01e1-1dbac3c80c13 +# ╠═51df0c98-f3c5-11ea-25b8-af41dc182bac +# ╠═51e28596-f3c5-11ea-2237-2b72bbfaa001 +# ╠═0a10acd8-f3c6-11ea-3e2f-7530a0af8c7f +# ╟─946b69a0-f3a2-11ea-2670-819a5dafe891 +# ╟─0fbe2af6-f381-11ea-2f41-23cd1cf930d9 +# ╟─48089a00-f321-11ea-1479-e74ba71df067 +# ╟─6b4d6584-f3be-11ea-131d-e5bdefcc791b +# ╟─437ba6ce-f37d-11ea-1010-5f6a6e282f9b +# ╟─ef88c388-f388-11ea-3828-ff4db4d1874e +# ╟─ef26374a-f388-11ea-0b4e-67314a9a9094 +# ╟─6bdbcf4c-f321-11ea-0288-fb16ff1ec526 +# ╟─ffc17f40-f380-11ea-30ee-0fe8563c0eb1 +# ╟─ffc40ab2-f380-11ea-2136-63542ff0f386 +# ╟─ffceaed6-f380-11ea-3c63-8132d270b83f +# ╟─ffde44ae-f380-11ea-29fb-2dfcc9cda8b4 +# ╟─ffe326e0-f380-11ea-3619-61dd0592d409 +# ╟─fff5aedc-f380-11ea-2a08-99c230f8fa32 +# ╟─00026442-f381-11ea-2b41-bde1fff66011 +# ╟─fbf6b0fa-f3e0-11ea-2009-573a218e2460 +# ╟─256edf66-f3e1-11ea-206e-4f9b4f6d3a3d +# ╟─00115b6e-f381-11ea-0bc6-61ca119cb628 diff --git a/homework/homework3/hw3.html b/homework/homework3/hw3.html new file mode 100644 index 000000000..717b5b840 --- /dev/null +++ b/homework/homework3/hw3.html @@ -0,0 +1,385 @@ + + + + + ⚡ Pluto.jl ⚡ + + + + + + + + + + + + +

homework 3, version 1

+
17 μs

Submission by: Jazzy Doe (jazz@mit.edu)

+
11.5 ms

Homework 3: Structure and Language

+

18.S191, fall 2020

+

This notebook contains built-in, live answer checks! In some exercises you will see a coloured box, which runs a test case on your code, and provides feedback based on the result. Simply edit the code, run it, and the check runs again.

+

For MIT students: there will also be some additional (secret) test cases that will be run as part of the grading process, and we will look at your notebook and write comments.

+

Feel free to ask questions!

+
287 μs
student
11.4 ms

Let's create a package environment:

+
27.5 μs
162 ms
23.8 s
+
474 μs





2.1 μs

Exercise 1: Language detection

+

In this exercise, we are going to create some super simple Artificial Intelligence. Natural language can be quite messy, but hidden in this mess is structure, which we are going to look for today.

+

Let's start with some obvious structure in English text: the set of characters that we write the language in. If we generate random text by sampling random Unicode characters, it does not look like English:

+
40.2 μs
"\Uaf09b\U101dd6\Ua8c9b\U37643\Ud44e9\U8b241\U6d7e8\Uc2d56\U925aa\U7ca7b\U7edc2\Ub49ca𗍔\Ue635e\U41c76\U75eea\Ue78d3\Uec8c0\U8d63f\Ue4794\U4e196\U45536\U346b9\U2ed0a\U68d4a\Ue48c6\U14fbf\Udb39b\Ueecb9\Ue54fd\Uc7a79\U31f5d\Ud5354\U95c66\Ubf7e8\Uba0ce\U49de8\U9a484\Ud275f\Ue3024"
266 ms

Instead, let's define an alphabet, and only use those letters to sample from. To keep things simple, we ignore punctuation, capitalization, etc, and only use these 27 characters:

+
52.5 μs
alphabet
74.4 ms

Let's sample random characters from our alphabet:

+
29.1 μs
ctzmwrdhyapnqcfvzbyfqguyholyqz rdbtvvdax
72 ms

That already looks a lot better than our first attempt! But still, this does not look like English text – we can do better.

+
+

English words are not well modelled by this random-Latin-characters model. Our first observation is that some letters are more common than others. To put this observation into practice, we would like to have the frequency table of the Latin alphabet. We could search for it online, but it is actually very simple to calculate ourselves! The only thing we need is a representative sample of English text.

+

The following samples are from Wikipedia, but feel free to type in your own sample! You can also enter a sample of a different language, if that language can be expressed in the Latin alphabet.

+

Remember that the button on the left of a cell will show or hide the code.

+

We also include a sample of Spanish, which we'll use later!

+
48.5 μs
samples
1.8 ms

Exercise 1.1 - Data cleaning

+

Looking at the sample, we see that it is quite messy: it contains punctuation, accented letters and numbers. For our analysis, we are only interested in our 27-character alphabet (i.e. 'a' through 'z' plus ' '). We are going to clean the data using the Julia function filter.

+
60.8 μs
29.7 ms

filter takes two arguments: a function and a collection. The function is applied to each element of the collection, and it returns either true or false. If the result is true, then that element is included in the final collection.

+

Did you notice something cool? Functions are also just objects in Julia, and you can use them as arguments to other functions! (Fons thinks this is super cool.)

+
+

We have written a function isinalphabet, which takes a character and returns a boolean:

+
28.6 μs
isinalphabet (generic function with 1 method)
80.6 μs
28.9 ms

👉 Use filter to extract just the characters from our alphabet out of messy_sentence_1:

+
24.3 μs
messy_sentence_1
"#wow 2020 ¥500 (blingbling!)"
6.3 μs
cleaned_sentence_1
missing
5.9 μs

Here we go!

Replace missing with your answer.

+ +
+
30 μs

+

We are not interested in the case of letters (i.e. 'A' vs 'a'), so we want to map these to lower case before we apply our filter. If we don't, all upper case letters would get deleted.

+
35.4 μs

👉 Use the function lowercase to convert messy_sentence_2 into a lower case string, and then use filter to extract only the characters from our alphabet.

+
19.7 μs
messy_sentence_2
"Awesome! 😍"
2.9 μs
cleaned_sentence_2
missing
2.7 μs

Here we go!

Replace missing with your answer.

+ +
+
26.5 μs

+

Finally, we need to deal with accents: simply deleting accented characters from the source text might deform it too much. We can add accented letters to our alphabet, but a simpler solution is to replace accented letters with the corresponding unaccented base character. We have written a function unaccent that does just that.

+
55.4 μs
french_word
"Égalité!"
1.7 μs
"Egalite!"
88.9 μs
Main.workspace1286.unaccent
10.7 ms

+

👉 Let's put everything together. Write a function clean that takes a string, and returns a cleaned version, where:

+
    +
  • accented letters are replaced by their base characters;

    +
  • +
  • upper-case letters are converted to lower case;

    +
  • +
  • it is filtered to only contain characters from alphabet

    +
  • +
+
44 μs
clean (generic function with 1 method)
28.2 μs
missing
371 μs

Here we go!

Replace missing with your answer.

+ +
+
23.4 μs





+

Exercise 1.2 - Letter frequencies

+

We are going to count the frequency of each letter in this sample, after applying your clean function. Can you guess which character is most frequent?

+
190 μs
first_sample
missing
285 μs
letter_frequencies (generic function with 1 method)
72.2 μs
sample_freqs
missing
116 μs

The result is a 27-element array, with values between 0.0 and 1.0. These values correspond to the frequency of each letter.

+

sample_freqs[i] == 0.0 means that the ith letter did not occur in your sample, and sample_freqs[i] == 0.1 means that 10% of the letters in the sample are the ith letter.

+

To make it easier to convert between a character from the alphabet and its index, we have the following function:

+
50 μs
index_of_letter (generic function with 1 method)
35.8 μs
55.3 ms

+

👉 Which letters from the alphabet did not occur in the sample?

+
57.3 μs
unused_letters
10.6 μs

MethodError: no method matching keys(::Missing)

Closest candidates are:

keys(!Matched::Core.SimpleVector) at essentials.jl:605

keys(!Matched::Cmd) at process.jl:639

keys(!Matched::Tuple) at tuple.jl:64

...

  1. pairs(::Missing)@abstractdict.jl:134
  2. findall(::Base.Fix2{typeof(isequal),Float64}, ::Missing)@array.jl:2024
  3. top-level scope@Local: 8
---

Hint

You can answer this question without writing any code: have a look at the values of sample_freqs.

+ +
+
103 ms





+

Now that we know the frequencies of letters in English, we can generate random text that already looks closer to English!

+

Random letters from the alphabet:

+
13.9 μs
+

mgpnjbqxfrympueavzkjup mmpbihitktaniacmg ckoeeptbektfcjmibbebawhtissddfyb kziojkyxqjxdudmpzicigbualpeq vkagwxqlwmtpmgqyghifryigpsrnfniu chbenctadgqiwuxmumojiprvvackpoefrspdtxbaeym rdeijet dafcjurkcflq ic lhqlwbkweit ccnufnvtfixyrbiavjgxqiwxxsszohrdnqkugtxzpzxuvnonzceuncxzcchsyplxleheppffjizbegahaxi dqpbtafbju wcjzdtssghsowidyusbchyzgcvkzqzm ymhmkvgkurwotuutxgthizvaxuddcwbnuabhxlvecrjrlsjiwczmvdln

+
81.5 ms

Random letters at the correct frequencies:

+
60.5 μs

MethodError: no method matching iterate(::Missing)

Closest candidates are:

iterate(!Matched::Core.SimpleVector) at essentials.jl:603

iterate(!Matched::Core.SimpleVector, !Matched::Any) at essentials.jl:603

iterate(!Matched::ExponentialBackOff) at error.jl:253

...

  1. _foldl_impl(::Base.BottomRF{typeof(Base.add_sum)}, ::Base._InitialValue, ::Missing)@reduce.jl:53
  2. foldl_impl(::Base.BottomRF{typeof(Base.add_sum)}, ::NamedTuple{(),Tuple{}}, ::Missing)@reduce.jl:45
  3. mapfoldl_impl(::typeof(identity), ::typeof(Base.add_sum), ::NamedTuple{(),Tuple{}}, ::Missing)@reduce.jl:41
  4. #mapfoldl#189(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(mapfoldl), ::Function, ::Function, ::Missing)@reduce.jl:157
  5. mapfoldl(::Function, ::Function, ::Missing)@reduce.jl:157
  6. #mapreduce#193(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(mapreduce), ::Function, ::Function, ::Missing)@reduce.jl:283
  7. mapreduce(::Function, ::Function, ::Missing)@reduce.jl:283
  8. sum(::Function, ::Missing)@reduce.jl:486
  9. sum(::Missing)@reduce.jl:503
  10. rand_sample(::Missing)@Other: 3
  11. rand_sample_letter(::Missing)@Other: 2
  12. (::Main.workspace1306.var"#1#2")(::Int64)@none:0
  13. iterate@generator.jl:47[inlined]
  14. collect(::Base.Generator{UnitRange{Int64},Main.workspace1306.var"#1#2"})@array.jl:665
  15. top-level scope@Local: 1
---

By considering the frequencies of letters in English, we see that our model is already a lot better!

+

Our next observation is that some letter combinations are more common than others. Our current model thinks that potato is just as 'English' as ooaptt. In the next section, we will quantify these transition frequencies, and use it to improve our model.

+
42.6 μs
2.9 μs
rand_sample (generic function with 1 method)
86.2 μs
rand_sample_letter (generic function with 1 method)
47.2 μs





+

Exercise 1.3 - Transition frequencies

+

In the previous exercise we computed the frequency of each letter in the sample by counting their occurences, and then dividing by the total number of counts.

+

In this exercise, we are going to count letter transitions, such as aa, as, rt, yy. Two letters might both be common, like a and e, but their combination, ae, is uncommon in English.

+

To quantify this observation, we will do the same as in our last exercise: we count occurences in a sample text, to create the transition frequency matrix.

+
20.1 μs
transition_counts (generic function with 1 method)
86.4 μs
normalize_array (generic function with 1 method)
38.5 μs
4.8 ms

MethodError: no method matching iterate(::Missing)

Closest candidates are:

iterate(!Matched::Core.SimpleVector) at essentials.jl:603

iterate(!Matched::Core.SimpleVector, !Matched::Any) at essentials.jl:603

iterate(!Matched::ExponentialBackOff) at error.jl:253

...

  1. count(::String, ::Missing)@reduce.jl:854
  2. (::Main.workspace1286.var"#3#4"{Missing})(::Tuple{Char,Char})@none:0
  3. iterate@generator.jl:47[inlined]
  4. collect(::Base.Generator{Base.Iterators.ProductIterator{Tuple{Array{Char,1},Array{Char,1}}},Main.workspace1286.var"#3#4"{Missing}})@array.jl:665
  5. transition_counts(::Missing)@Other: 2
  6. (::Base.var"#64#65"{typeof(Main.workspace1286.normalize_array),typeof(Main.workspace1286.transition_counts)})(::Missing)@operators.jl:859
  7. top-level scope@Local: 1
---

What we get is a 27 by 27 matrix. Each entry corresponds to a character pair. The column corresponds to the first character, the row is the second pair. Let's visualize this:

+
29.7 μs

MethodError: no method matching iterate(::Missing)

Closest candidates are:

iterate(!Matched::Core.SimpleVector) at essentials.jl:603

iterate(!Matched::Core.SimpleVector, !Matched::Any) at essentials.jl:603

iterate(!Matched::ExponentialBackOff) at error.jl:253

...

  1. count(::String, ::Missing)@reduce.jl:854
  2. (::Main.workspace1286.var"#3#4"{Missing})(::Tuple{Char,Char})@none:0
  3. iterate@generator.jl:47[inlined]
  4. collect(::Base.Generator{Base.Iterators.ProductIterator{Tuple{Array{Char,1},Array{Char,1}}},Main.workspace1286.var"#3#4"{Missing}})@array.jl:665
  5. transition_counts(::Missing)@Other: 2
  6. (::Base.var"#64#65"{typeof(Main.workspace1286.normalize_array),typeof(Main.workspace1286.transition_counts)})(::Missing)@operators.jl:859
  7. top-level scope@Local: 1
---

Answer the following questions with respect to the cleaned English sample text, which we called first_sample. Let's also give the transition matrix a name:

+
19.8 μs

MethodError: no method matching iterate(::Missing)

Closest candidates are:

iterate(!Matched::Core.SimpleVector) at essentials.jl:603

iterate(!Matched::Core.SimpleVector, !Matched::Any) at essentials.jl:603

iterate(!Matched::ExponentialBackOff) at error.jl:253

...

  1. count(::String, ::Missing)@reduce.jl:854
  2. (::Main.workspace1286.var"#3#4"{Missing})(::Tuple{Char,Char})@none:0
  3. iterate@generator.jl:47[inlined]
  4. collect(::Base.Generator{Base.Iterators.ProductIterator{Tuple{Array{Char,1},Array{Char,1}}},Main.workspace1286.var"#3#4"{Missing}})@array.jl:665
  5. transition_counts(::Missing)@Other: 2
  6. (::Base.var"#64#65"{typeof(Main.workspace1286.normalize_array),typeof(Main.workspace1286.transition_counts)})(::Missing)@operators.jl:859
  7. top-level scope@Local: 1
---

👉 What is the frequency of the combination "th"?

+
29 μs
th_frequency
missing
3.2 μs

👉 What about "ht"?

+
60.2 μs
ht_frequency
missing
2.3 μs

Here we go!

Replace missing with your answer.

+ +
+
17.5 μs

👉 Which letters appeared double in our sample?

+
46 μs
double_letters
4.9 μs
4.5 μs

👉 Which letter is most likely to follow a W?

+
23.5 μs
most_likely_to_follow_w
'x': ASCII/Unicode U+0078 (category Ll: Letter, lowercase)
1.7 μs
5.7 μs

👉 Which letter is most likely to precede a W?

+
33 μs
most_likely_to_precede_w
'x': ASCII/Unicode U+0078 (category Ll: Letter, lowercase)
3.4 μs
4.8 μs

👉 What is the sum of each row? What is the sum of each column? How can we interpret these values?"

+
16.4 μs
row_col_answer
8.2 μs





1.2 μs

We can use the measured transition frequencies to generate text in a way that it has the same transition frequencies as our original sample. Our generated text is starting to look like real language!

+
29.4 μs
83 ms

Random letters from the alphabet:

+
14 μs
+

eouhwbqdhdipwafexugjbpwsvb ucrnfsorlyxjzftnndgnoxouswcohitoq fjtiqtnxyq ljppdebilj tlpjjjmowkgkowtuogyyansmdeypxvszgqcv wpbywyp voerkeyasxyrvxxoqnpcvekxftf uhlmzmhwd aretpytnfvznivqyuwztpbc mbdakzncbwvspjbwrdbutoqquzqjpzmffcuibnujhysfoqkcnrqjufrztisyrsvrcxkptndoqsgxkwodbffcjivuyjkywtgkft busrwxgaqzhzv ufuqigvngkumbpyfuzwegdsjfzogafxzkfyaeqxkthcuhewwloyqlbykahwul xptutdhzbqnsotchrmdo pvyiz vvrdowsn

+
43 μs

Random letters at the correct frequencies:

+
35.2 μs
+

heocaewenano voohigmdpsaple thortofwtdln lyrrgen him d c ua r psr fnt sar hr rreor wc rddnryvhtlfm e ns e irrrilet esd igoil a l eip lo u iayn nddnscfeou dnonnbwegsfshh lwsi vhcb oly t yiehlfrhe dmotlmelacfi rgsdtht cinrisnahn ioll ephiidoedmn ttfooola g ygmrga f fglbyn irrlfah ltil cred l movoeogin gdatitoori tw rit eodesaegolo bele gisorcnreavwohtnerf s tg lingt eo todrtsahceco ence f

+
460 ms

Random letters at the correct transition frequencies:

+
26.9 μs

MethodError: no method matching iterate(::Missing)

Closest candidates are:

iterate(!Matched::Core.SimpleVector) at essentials.jl:603

iterate(!Matched::Core.SimpleVector, !Matched::Any) at essentials.jl:603

iterate(!Matched::ExponentialBackOff) at error.jl:253

...

  1. count(::String, ::Missing)@reduce.jl:854
  2. (::Main.workspace1286.var"#3#4"{Missing})(::Tuple{Char,Char})@none:0
  3. iterate@generator.jl:47[inlined]
  4. collect(::Base.Generator{Base.Iterators.ProductIterator{Tuple{Array{Char,1},Array{Char,1}}},Main.workspace1286.var"#3#4"{Missing}})@array.jl:665
  5. transition_counts(::Missing)@Other: 2
  6. (::Base.var"#64#65"{typeof(Main.workspace1286.normalize_array),typeof(Main.workspace1286.transition_counts)})(::Missing)@operators.jl:859
  7. top-level scope@Local: 1
---
sample_text (generic function with 1 method)
160 μs





+

Exercise 1.4 - Language detection

+
15.3 μs

It looks like we have a decent language model, in the sense that it understands transition frequencies in the language. In the demo above, try switching the language between English and Spanish – the generated text clearly looks more like one or the other, demonstrating that the model can capture differences between the two languages. What's remarkable is that our "training data" was just a single paragraph per language.

+

In this exercise, we will use our model to write a classifier: a program that automatically classifies a text as either English or Spanish.

+

This is not a difficult task - you can get dictionaries for both languages, and count matches - but we are doing something much more cool: we only use a single paragraph of each language, and we use a language model as classifier.

+
617 μs

Mystery sample

+

Enter some text here -- we will detect in which language it is written!

8.6 μs
38 ms
398 μs
"Small boats are typically found on inland waterways such as rivers and lakes, or in protected coastal areas. However, some boats, such as the whaleboat, were intended for use in an offshore environment. In modern naval terms, a boat is a vessel small enough to be carried aboard a ship. Anomalous definitions exist, as lake freighters 1,000 feet (300 m) long on the Great Lakes are called \"boats\". \n"
2.5 μs

Let's compute the transition frequencies of our mystery sample! Type some text in the box below, and check whether the frequency matrix updates.

+
10.7 μs
27×27 Array{Float64,2}:
+ 0.0                   0.002857142857142857  …  0.0  0.008571428571428572
+ 0.0                   0.0                      0.0  0.0
+ 0.008571428571428572  0.0                      0.0  0.0
+ 0.0                   0.0                      0.0  0.022857142857142857
+ 0.005714285714285714  0.002857142857142857     0.0  0.02857142857142857
+ 0.0                   0.0                   …  0.0  0.0
+ 0.0                   0.0                      0.0  0.002857142857142857
+ ⋮                                           ⋱  ⋮    
+ 0.002857142857142857  0.0                      0.0  0.0
+ 0.005714285714285714  0.0                      0.0  0.0
+ 0.0                   0.0                      0.0  0.0
+ 0.0                   0.0                      0.0  0.002857142857142857
+ 0.0                   0.0                   …  0.0  0.0
+ 0.03428571428571429   0.011428571428571429     0.0  0.0
441 ms

Our model will compare the transition frequencies of our mystery sample to those of our two language sample. The closest match will be our detected language.

+

The only question left is: How do we compare two matrices? When two matrices are almost equal, but not exactly, we want to quantify their distance.

+

👉 Write a function called matrix_distance which takes 2 matrices of the same size and finds the distance between them by:

+
    +
  1. Subtracting corresponding elements

    +
  2. +
  3. Finding the absolute value of the difference

    +
  4. +
  5. Summing the differences

    +
  6. +
+
37.6 μs
matrix_distance (generic function with 1 method)
35 μs
distances
27.1 ms

Here we go!

Replace missing with your answer.

+ +
+
112 ms

We have written a cell that selects the language with the smallest distance to the mystery language. Make sure sure that matrix_distance is working correctly, and scroll up to the mystery text to see it in action!

+

Further reading

+

It turns out that the SVD of the transition matrix can mysteriously group the alphabet into vowels and consonants, without any extra information. See this paper if you want to try it yourself! We found that removing the space from alphabet (to match the paper) gave better results.

+
121 μs





2.1 μs

Exercise 2 - Language generation

+

Our model from Exercise 1 has the property that it can easily be 'reversed' to generate text. While this is useful to demonstrate its structure, the produced text is mostly meaningless: it fails to model words, let alone sentence structure.

+

To take our model one step further, we are going to generalize what we have done so far. Instead of looking at letter combinations, we will model word combinations. And instead of analyzing the frequencies of bigrams (combinations of two letters), we are going to analyze n-grams.

+

Dataset

+

This also means that we are going to need a larger dataset to train our model on: the number of English words (and their combinations) is much higher than the number of letters.

+

We will train our model on the novel Emma (1815), by Jane Austen. This work is in the public domain, which means that we can download the whole book as a text file from archive.org. We've done the process of downloading and cleaning already, and we have split the text into word and punctuation tokens.

+
2.3 ms
2.7 s
splitwords (generic function with 1 method)
71 μs
emma_words
164 ms
forest_words
323 μs

Exercise 2.1 - bigrams revisited

+

The goal of the upcoming exercises is to generalize what we have done in Exercise 1. To keep things simple, we split up our problem into smaller problems. (The solution to any computational problem.)

+

First, here is a function that takes an array, and returns the array of all neighbour pairs from the original. For example,

+
bigrams([1, 2, 3, 42])
+

gives

+
[[1,2], [2,3], [3,42]]
+

(We used integers as "words" in this example, but our function works with any type of word.)

+
59.9 μs
bigrams (generic function with 1 method)
112 μs
239 ms

👉 Next, it's your turn to write a more general function ngrams that takes an array and a number n, and returns all subsequences of length n. For example:

+
ngrams([1, 2, 3, 42], 3)
+

should give

+
[[1,2,3], [2,3,42]]
+

and

+
ngrams([1, 2, 3, 42], 2) == bigrams([1, 2, 3, 42])
+
63.7 μs
ngrams (generic function with 1 method)
47 μs
missing
2.1 ms
missing
7.5 μs

Here we go!

Replace missing with your answer.

+ +
+
29.9 μs

Hint

Start out with the same code as bigrams, and use the Julia documentation to learn how it works. How can we generalize the bigram function into the ngram function? It might help to do this on paper first.

+ +
+
59.3 μs

If you are stuck, you can write ngrams(words, n) = bigrams(words) (ignoring the true value of n), and continue with the other exercises.

+

Exercise 2.2 - frequency matrix revisisted

+

In Exercise 1, we use a 2D array to store the bigram frequencies, where each column or row corresponds to a character from the alphabet. If we use trigrams, we could store the frequencies in a 3D array, and so on.

+

However, when counting words instead of letters, we run into a problem. A 3D array with one row, column and layer per word has too many elements to store on our computer.

+
25.6 μs

Emma consists of 8465 unique words. This means that there are 606 billion possible trigrams - that's too much!

+
112 ms

+

Although the frequency array would be very large, most entries are zero. For example, "Emma" is a common word, but "Emma Emma Emma" does not occur in the novel. This sparsity of non-zero entries can be used to store the same information in a more efficient structure.

+

Julia's SparseArrays.jl package might sound like a logical choice, but these arrays only support 1D and 2D types, and we also want to directly index using strings, not just integers. So instead, we will use Dict, the dictionary type.

+
64.6 μs
healthy
33.5 ms
10.8 ms

(Did you notice something funny? The dictionary is unordered, this is why the entries were printed in reverse from the definition.)

+

You can dynamically add or change values of a Dict by assigning to my_dict[key]. You can check whether a key already exists using haskey(my_dict, key).

+

👉 Use these two techniques to write a function word_counts that takes an array of words, and returns a Dict with entries word => number_of_occurences.

+

For example:

+
word_counts(["to", "be", "or", "not", "to", "be"])
+

should return

+
Dict(
+	"to" => 2, 
+	"be" => 2, 
+	"or" => 1, 
+	"not" => 1
+)
+
28.5 μs
word_counts (generic function with 1 method)
42.2 μs
9.5 ms

Here we go!

Write your function word_counts above.

+ +
+
89.3 ms

How many times does "Emma" occur in the book?

+
17.3 μs
emma_count
missing
5.1 μs

Great! Let's get back to our ngrams. For the purpose of generating text, we are going to store a completions cache. This is a dictionary where the keys are (n1)-grams, and the values are all found words that complete it to an n-gram. Let's look at an example:

+
let
+	trigrams = ngrams(split("to be or not to be that is the question", " "), 3)
+	cache = completions_cache(trigrams)
+	cache == Dict(
+		["to", "be"] => ["or", "that"],
+		["be", "or"] => ["not"],
+		["or", "not"] => ["to"],
+		...
+	)
+end
+

So for trigrams, our keys are the first 2 words of each trigram, and the values are arrays containing every third word of those trigrams.

+

If the same ngram occurs multiple times (e.g. "said Emma laughing"), then the last word ("laughing") should also be stored multiple times. This will allow us to generate trigrams with the same frequencies as the original text.

+

👉 Write the function completions_cache, which takes an array of ngrams (i.e. an array of arrays of words, like the result of your ngram function), and returns a dictionary like described above.

+
32.6 μs
completions_cache (generic function with 1 method)
81.4 μs
8.5 ms
42.8 μs

Exercise 2.4 - write a novel

+

We have everything we need to generate our own novel! The final step is to sample random ngrams, in a way that each next ngram overlaps with the previous one. We've done this in the function generate_from_ngrams below - feel free to look through the code, or to implement your own version.

+
13.5 μs
Main.workspace1286.generate_from_ngrams
176 μs
Main.workspace1286.ngrams_circular
109 μs
Main.workspace1286.generate
406 μs

Interactive demo

+

Enter your own text in the box below, and use that as training data to generate anything!

+
7.6 μs
115 μs

Using grams for characters

+
39.8 ms

MethodError: no method matching iterate(::Missing)

Closest candidates are:

iterate(!Matched::Core.SimpleVector) at essentials.jl:603

iterate(!Matched::Core.SimpleVector, !Matched::Any) at essentials.jl:603

iterate(!Matched::ExponentialBackOff) at error.jl:253

...

  1. first(::Missing)@abstractarray.jl:341
  2. generate_from_ngrams(::Missing, ::Int64)@Other: 7
  3. #generate#13(::Int64, ::Bool, ::typeof(Main.workspace1286.generate), ::String, ::Int64)@Other: 18
  4. top-level scope@Local: 1
---

Using grams for words

+
71 μs

MethodError: no method matching iterate(::Missing)

Closest candidates are:

iterate(!Matched::Core.SimpleVector) at essentials.jl:603

iterate(!Matched::Core.SimpleVector, !Matched::Any) at essentials.jl:603

iterate(!Matched::ExponentialBackOff) at error.jl:253

...

  1. first(::Missing)@abstractarray.jl:341
  2. generate_from_ngrams(::Missing, ::Int64)@Other: 7
  3. #generate#13(::Int64, ::Bool, ::typeof(Main.workspace1286.generate), ::String, ::Int64)@Other: 18
  4. top-level scope@Local: 1
---

Automatic Jane Austen

+

Uncomment the cell below to generate some Jane Austen text:

+
10.8 μs
2.4 μs





1.5 μs

Before you submit

Remember to fill in your name nad Kerberos ID at the top of this notebook.

+
+
14.3 μs

Function library

+

Just some helper functions used in the notebook.

+
24.1 μs
Quote (generic function with 1 method)
57.7 μs
show_pair_frequencies (generic function with 1 method)
124 μs
compimg (generic function with 2 methods)
201 μs
hint (generic function with 1 method)
37.5 μs
almost (generic function with 1 method)
49.2 μs
still_missing (generic function with 2 methods)
305 μs
keep_working (generic function with 2 methods)
146 μs
yays
33.4 ms
correct (generic function with 2 methods)
154 μs
not_defined (generic function with 1 method)
66.6 μs
5.5 ms
+ + + \ No newline at end of file diff --git a/homework/homework3/hw3.jl b/homework/homework3/hw3.jl new file mode 100644 index 000000000..e9723e293 --- /dev/null +++ b/homework/homework3/hw3.jl @@ -0,0 +1,1297 @@ +### A Pluto.jl notebook ### +# v0.11.14 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ 86e1ee96-f314-11ea-03f6-0f549b79e7c9 +begin + using Pkg + Pkg.activate(mktempdir()) +end + +# ╔═╡ a4937996-f314-11ea-2ff9-615c888afaa8 +begin + Pkg.add([ + "Compose", + "Colors", + "PlutoUI", + ]) + + using Colors + using PlutoUI + using Compose + using LinearAlgebra +end + +# ╔═╡ e6b6760a-f37f-11ea-3ae1-65443ef5a81a +md"_homework 3, version 3_" + +# ╔═╡ 85cfbd10-f384-11ea-31dc-b5693630a4c5 +md""" + +# **Homework 3**: _Structure and Language_ +`18.S191`, fall 2020 + +This notebook contains _built-in, live answer checks_! In some exercises you will see a coloured box, which runs a test case on your code, and provides feedback based on the result. Simply edit the code, run it, and the check runs again. + +_For MIT students:_ there will also be some additional (secret) test cases that will be run as part of the grading process, and we will look at your notebook and write comments. + +Feel free to ask questions! +""" + +# ╔═╡ 33e43c7c-f381-11ea-3abc-c942327456b1 +# edit the code below to set your name and kerberos ID (i.e. email without @mit.edu) + +student = (name = "Jazzy Doe", kerberos_id = "jazz") + +# you might need to wait until all other cells in this notebook have completed running. +# scroll around the page to see what's up + +# ╔═╡ ec66314e-f37f-11ea-0af4-31da0584e881 +md""" + +Submission by: **_$(student.name)_** ($(student.kerberos_id)@mit.edu) +""" + +# ╔═╡ 938185ec-f384-11ea-21dc-b56b7469f798 +md"_Let's create a package environment:_" + +# ╔═╡ b49a21a6-f381-11ea-1a98-7f144c55c9b7 +html""" + +""" + +# ╔═╡ 6f9df800-f92d-11ea-2d49-c1aaabd2d012 +md""" +## **Exercise 1:** _Language detection_ + +In this exercise, we are going to create some super simple _Artificial Intelligence_. Natural language can be quite messy, but hidden in this mess is _structure_, which we are going to look for today. + +Let's start with some obvious structure in English text: the set of characters that we write the language in. If we generate random text by sampling _random Unicode characters_, it does not look like English: +""" + +# ╔═╡ b61722cc-f98f-11ea-22ae-d755f61f78c3 +String(rand(Char, 40)) + +# ╔═╡ f457ad44-f990-11ea-0e2d-2bb7627716a8 +md""" +Instead, let's define an _alphabet_, and only use those letters to sample from. To keep things simple, we ignore punctuation, capitalization, etc, and only use these 27 characters: +""" + +# ╔═╡ 4efc051e-f92e-11ea-080e-bde6b8f9295a +alphabet = ['a':'z' ; ' '] # includes the space character + +# ╔═╡ 38d1ace8-f991-11ea-0b5f-ed7bd08edde5 +md""" +Let's sample random characters from our alphabet: +""" + +# ╔═╡ ddf272c8-f990-11ea-2135-7bf1a6dca0b7 +String(rand(alphabet, 40)) |> Text + +# ╔═╡ 3cc688d2-f996-11ea-2a6f-0b4c7a5b74c2 +md""" +That already looks a lot better than our first attempt! But still, this does not look like English text -- we can do better. + +$(html"
") + +English words are not well modelled by this random-Latin-characters model. Our first observation is that **some letters are more common than others**. To put this observation into practice, we would like to have the **frequency table** of the Latin alphabet. We could search for it online, but it is actually very simple to calculate ourselves! The only thing we need is a _representative sample_ of English text. + +The following samples are from Wikipedia, but feel free to type in your own sample! You can also enter a sample of a different language, if that language can be expressed in the Latin alphabet. + +Remember that the $(html"") button on the left of a cell will show or hide the code. + +We also include a sample of Spanish, which we'll use later! +""" + +# ╔═╡ a094e2ac-f92d-11ea-141a-3566552dd839 +md""" +#### Exercise 1.1 - _Data cleaning_ +Looking at the sample, we see that it is quite _messy_: it contains punctuation, accented letters and numbers. For our analysis, we are only interested in our 27-character alphabet (i.e. `'a'` through `'z'` plus `' '`). We are going to clean the data using the Julia function `filter`. +""" + +# ╔═╡ 27c9a7f4-f996-11ea-1e46-19e3fc840ad9 +filter(isodd, [6, 7, 8, 9, -5]) + +# ╔═╡ f2a4edfa-f996-11ea-1a24-1ba78fd92233 +md""" +`filter` takes two arguments: a **function** and a **collection**. The function is applied to each element of the collection, and it returns either `true` or `false`. If the result is `true`, then that element is included in the final collection. + +Did you notice something cool? Functions are also just _objects_ in Julia, and you can use them as arguments to other functions! _(Fons thinks this is super cool.)_ + +$(html"
") + +We have written a function `isinalphabet`, which takes a character and returns a boolean: +""" + +# ╔═╡ 5c74a052-f92e-11ea-2c5b-0f1a3a14e313 +function isinalphabet(character) + character ∈ alphabet +end + +# ╔═╡ dcc4156c-f997-11ea-3e6f-057cd080d9db +isinalphabet('a'), isinalphabet('+') + +# ╔═╡ 129fbcfe-f998-11ea-1c96-0fd3ccd2dcf8 +md"👉 Use `filter` to extract just the characters from our alphabet out of `messy_sentence_1`:" + +# ╔═╡ 3a5ee698-f998-11ea-0452-19b70ed11a1d +messy_sentence_1 = "#wow 2020 ¥500 (blingbling!)" + +# ╔═╡ 75694166-f998-11ea-0428-c96e1113e2a0 +cleaned_sentence_1 = missing + +# ╔═╡ 05f0182c-f999-11ea-0a52-3d46c65a049e +md""" +$(html"
") + +We are not interested in the case of letters (i.e. `'A'` vs `'a'`), so we want to map these to lower case _before_ we apply our filter. If we don't, all upper case letters would get deleted. +""" + +# ╔═╡ 98266882-f998-11ea-3270-4339fb502bc7 +md"👉 Use the function `lowercase` to convert `messy_sentence_2` into a lower case string, and then use `filter` to extract only the characters from our alphabet." + +# ╔═╡ d3c98450-f998-11ea-3caf-895183af926b +messy_sentence_2 = "Awesome! 😍" + +# ╔═╡ d3a4820e-f998-11ea-2a5c-1f37e2a6dd0a +cleaned_sentence_2 = missing + +# ╔═╡ aad659b8-f998-11ea-153e-3dae9514bfeb +md""" +$(html"
") + +Finally, we need to deal with **accents**: simply deleting accented characters from the source text might deform it too much. We can add accented letters to our alphabet, but a simpler solution is to *replace* accented letters with the corresponding unaccented base character. We have written a function `unaccent` that does just that. +""" + +# ╔═╡ d236b51e-f997-11ea-0c55-abb11eb35f4d +french_word = "Égalité!" + +# ╔═╡ 24860970-fc48-11ea-0009-cddee695772c +import Unicode + +# ╔═╡ 734851c6-f92d-11ea-130d-bf2a69e89255 +""" +Turn `"áéíóúüñ asdf"` into `"aeiouun asdf"`. +""" +unaccent(str) = Unicode.normalize(str, stripmark=true) + +# ╔═╡ d67034d0-f92d-11ea-31c2-f7a38ebb412f +samples = ( + English = """ +Although the word forest is commonly used, there is no universally recognised precise definition, with more than 800 definitions of forest used around the world.[4] Although a forest is usually defined by the presence of trees, under many definitions an area completely lacking trees may still be considered a forest if it grew trees in the past, will grow trees in the future,[9] or was legally designated as a forest regardless of vegetation type.[10][11] + +The word forest derives from the Old French forest (also forès), denoting "forest, vast expanse covered by trees"; forest was first introduced into English as the word denoting wild land set aside for hunting[14] without the necessity in definition of having trees on the land.[15] Possibly a borrowing, probably via Frankish or Old High German, of the Medieval Latin foresta, denoting "open wood", Carolingian scribes first used foresta in the Capitularies of Charlemagne specifically to denote the royal hunting grounds of the King. The word was not endemic to Romance languages, e. g. native words for forest in the Romance languages derived from the Latin silva, which denoted "forest" and "wood(land)" (confer the English sylva and sylvan); confer the Italian, Spanish, and Portuguese selva; the Romanian silvă; and the Old French selve, and cognates in Romance languages, e. g. the Italian foresta, Spanish and Portuguese floresta, etc., are all ultimately derivations of the French word. +""", + Spanish = """ +Un bosque es un ecosistema donde la vegetación predominante la constituyen los árboles y matas.1​ Estas comunidades de plantas cubren grandes áreas del globo terráqueo y funcionan como hábitats para los animales, moduladores de flujos hidrológicos y conservadores del suelo, constituyendo uno de los aspectos más importantes de la biosfera de la Tierra. Aunque a menudo se han considerado como consumidores de dióxido de carbono atmosférico, los bosques maduros son prácticamente neutros en cuanto al carbono, y son solamente los alterados y los jóvenes los que actúan como dichos consumidores.2​3​ De cualquier manera, los bosques maduros juegan un importante papel en el ciclo global del carbono, como reservorios estables de carbono y su eliminación conlleva un incremento de los niveles de dióxido de carbono atmosférico. + +Los bosques pueden hallarse en todas las regiones capaces de mantener el crecimiento de árboles, hasta la línea de árboles, excepto donde la frecuencia de fuego natural es demasiado alta, o donde el ambiente ha sido perjudicado por procesos naturales o por actividades humanas. Los bosques a veces contienen muchas especies de árboles dentro de una pequeña área (como la selva lluviosa tropical y el bosque templado caducifolio), o relativamente pocas especies en áreas grandes (por ejemplo, la taiga y bosques áridos montañosos de coníferas). Los bosques son a menudo hogar de muchos animales y especies de plantas, y la biomasa por área de unidad es alta comparada a otras comunidades de vegetación. La mayor parte de esta biomasa se halla en el subsuelo en los sistemas de raíces y como detritos de plantas parcialmente descompuestos. El componente leñoso de un bosque contiene lignina, cuya descomposición es relativamente lenta comparado con otros materiales orgánicos como la celulosa y otros carbohidratos. Los bosques son áreas naturales y silvestre +""" |> unaccent, +) + +# ╔═╡ a56724b6-f9a0-11ea-18f2-991e0382eccf +unaccent(french_word) + +# ╔═╡ 8d3bc9ea-f9a1-11ea-1508-8da4b7674629 +md""" +$(html"
") + +👉 Let's put everything together. Write a function `clean` that takes a string, and returns a _cleaned_ version, where: +- accented letters are replaced by their base characters; +- upper-case letters are converted to lower case; +- it is filtered to only contain characters from `alphabet` +""" + +# ╔═╡ 4affa858-f92e-11ea-3ece-258897c37e51 +function clean(text) + # we turn everything to lowercase to keep the number of letters small + missing +end + +# ╔═╡ e00d521a-f992-11ea-11e0-e9da8255b23b +clean("Crème brûlée est mon plat préféré.") + +# ╔═╡ 2680b506-f9a3-11ea-0849-3989de27dd9f +first_sample = clean(first(samples)) + +# ╔═╡ 571d28d6-f960-11ea-1b2e-d5977ecbbb11 +function letter_frequencies(txt) + ismissing(txt) && return missing + f = count.(string.(alphabet), txt) + f ./ sum(f) +end + +# ╔═╡ 6a64ab12-f960-11ea-0d92-5b88943cdb1a +sample_freqs = letter_frequencies(first_sample) + +# ╔═╡ 603741c2-f9a4-11ea-37ce-1b36ecc83f45 +md""" +The result is a 27-element array, with values between `0.0` and `1.0`. These values correspond to the _frequency_ of each letter. + +`sample_freqs[i] == 0.0` means that the $i$th letter did not occur in your sample, and +`sample_freqs[i] == 0.1` means that 10% of the letters in the sample are the $i$th letter. + +To make it easier to convert between a character from the alphabet and its index, we have the following function: +""" + +# ╔═╡ b3de6260-f9a4-11ea-1bae-9153a92c3fe5 +index_of_letter(letter) = findfirst(isequal(letter), alphabet) + +# ╔═╡ a6c36bd6-f9a4-11ea-1aba-f75cecc90320 +index_of_letter('a'), index_of_letter('b'), index_of_letter(' ') + +# ╔═╡ 6d3f9dae-f9a5-11ea-3228-d147435e266d +md""" +$(html"
") + +👉 Which letters from the alphabet did not occur in the sample? +""" + +# ╔═╡ 92bf9fd2-f9a5-11ea-25c7-5966e44db6c6 +unused_letters = let + ['a', 'b'] +end + +# ╔═╡ 01215e9a-f9a9-11ea-363b-67392741c8d4 +md""" +**Random letters at the correct frequencies:** +""" + +# ╔═╡ 8ae13cf0-f9a8-11ea-3919-a735c4ed9e7f +md""" +By considering the _frequencies_ of letters in English, we see that our model is already a lot better! + +Our next observation is that **some letter _combinations_ are more common than others**. Our current model thinks that `potato` is just as 'English' as `ooaptt`. In the next section, we will quantify these _transition frequencies_, and use it to improve our model. +""" + +# ╔═╡ 343d63c2-fb58-11ea-0cce-efe1afe070c2 + + +# ╔═╡ b5b8dd18-f938-11ea-157b-53b145357fd1 +function rand_sample(frequencies) + x = rand() + findfirst(z -> z >= x, cumsum(frequencies ./ sum(frequencies))) +end + +# ╔═╡ 0e872a6c-f937-11ea-125e-37958713a495 +function rand_sample_letter(frequencies) + alphabet[rand_sample(frequencies)] +end + +# ╔═╡ fbb7c04e-f92d-11ea-0b81-0be20da242c8 +function transition_counts(cleaned_sample) + [count(string(a, b), cleaned_sample) + for a in alphabet, + b in alphabet] +end + +# ╔═╡ 80118bf8-f931-11ea-34f3-b7828113ffd8 +normalize_array(x) = x ./ sum(x) + +# ╔═╡ 7f4f6ce4-f931-11ea-15a4-b3bec6a7e8b6 +transition_frequencies = normalize_array ∘ transition_counts; + +# ╔═╡ d40034f6-f9ab-11ea-3f65-7ffd1256ae9d +transition_frequencies(first_sample) + +# ╔═╡ 689ed82a-f9ae-11ea-159c-331ff6660a75 +md"What we get is a **27 by 27 matrix**. Each entry corresponds to a character pair. The _row_ corresponds to the first character, the _column_ is the second character. Let's visualize this:" + +# ╔═╡ 0b67789c-f931-11ea-113c-35e5edafcbbf +md""" +Answer the following questions with respect to the **cleaned English sample text**, which we called `first_sample`. Let's also give the transition matrix a name: +""" + +# ╔═╡ 6896fef8-f9af-11ea-0065-816a70ba9670 +sample_freq_matrix = transition_frequencies(first_sample); + +# ╔═╡ 39152104-fc49-11ea-04dd-bb34e3600f2f +if first_sample === missing + md""" + !!! danger "Don't worry!" + 👆 These errors will disappear automatically once you have completed the earlier exercises! + """ +end + +# ╔═╡ e91c6fd8-f930-11ea-01ac-476bbde79079 +md"""👉 What is the frequency of the combination `"th"`?""" + +# ╔═╡ 1b4c0c28-f9ab-11ea-03a6-69f69f7f90ed +th_frequency = missing + +# ╔═╡ 1f94e0a2-f9ab-11ea-1347-7dd906ebb09d +md"""👉 What about `"ht"`?""" + +# ╔═╡ 41b2df7c-f931-11ea-112e-ede3b16f357a +ht_frequency = missing + +# ╔═╡ 1dd1e2f4-f930-11ea-312c-5ff9e109c7f6 +md""" +👉 Which le**tt**ers appeared double in our sample? +""" + +# ╔═╡ 65c92cac-f930-11ea-20b1-6b8f45b3f262 +double_letters = ['x', 'y'] + +# ╔═╡ 4582ebf4-f930-11ea-03b2-bf4da1a8f8df +md""" +👉 Which letter is most likely to follow a **W**? +""" + +# ╔═╡ 7898b76a-f930-11ea-2b7e-8126ec2b8ffd +most_likely_to_follow_w = 'x' + +# ╔═╡ 458cd100-f930-11ea-24b8-41a49f6596a0 +md""" +👉 Which letter is most likely to precede a **W**? +""" + +# ╔═╡ bc401bee-f931-11ea-09cc-c5efe2f11194 +most_likely_to_precede_w = 'x' + +# ╔═╡ 45c20988-f930-11ea-1d12-b782d2c01c11 +md""" +👉 What is the sum of each row? What is the sum of each column? How can we interpret these values?" +""" + +# ╔═╡ cc62929e-f9af-11ea-06b9-439ac08dcb52 +row_col_answer = md""" + +""" + +# ╔═╡ 2f8dedfc-fb98-11ea-23d7-2159bdb6a299 +md""" +We can use the measured transition frequencies to generate text in a way that it has **the same transition frequencies** as our original sample. Our generated text is starting to look like real language! +""" + +# ╔═╡ b7446f34-f9b1-11ea-0f39-a3c17ba740e5 +@bind ex23_sample Select([v => String(k) for (k,v) in zip(fieldnames(typeof(samples)), samples)]) + +# ╔═╡ 4f97b572-f9b0-11ea-0a99-87af0797bf28 +md""" +**Random letters from the alphabet:** +""" + +# ╔═╡ 4e8d327e-f9b0-11ea-3f16-c178d96d07d9 +md""" +**Random letters at the correct frequencies:** +""" + +# ╔═╡ d83f8bbc-f9af-11ea-2392-c90e28e96c65 +md""" +**Random letters at the correct transition frequencies:** +""" + +# ╔═╡ 0e465160-f937-11ea-0ebb-b7e02d71e8a8 +function sample_text(A, n) + + first_index = rand_sample(vec(sum(A, dims=1))) + + indices = reduce(1:n; init=[first_index]) do word, _ + prev = last(word) + freq = normalize_array(A[prev, :]) + next = rand_sample(freq) + [word..., next] + end + + String(alphabet[indices]) +end + +# ╔═╡ 141af892-f933-11ea-1e5f-154167642809 +md""" +It looks like we have a decent language model, in the sense that it understands _transition frequencies_ in the language. In the demo above, try switching the language between $(join(string.(fieldnames(typeof(samples))), " and ")) -- the generated text clearly looks more like one or the other, demonstrating that the model can capture differences between the two languages. What's remarkable is that our "training data" was just a single paragraph per language. + +In this exercise, we will use our model to write a **classifier**: a program that automatically classifies a text as either $(join(string.(fieldnames(typeof(samples))), " or ")). + +This is not a difficult task - you can get dictionaries for both languages, and count matches - but we are doing something much more cool: we only use a single paragraph of each language, and we use a _language model_ as classifier. +""" + +# ╔═╡ 7eed9dde-f931-11ea-38b0-db6bfcc1b558 +html"

Mystery sample

+

Enter some text here -- we will detect in which language it is written!

" # dont delete me + +# ╔═╡ 7e3282e2-f931-11ea-272f-d90779264456 +@bind mystery_sample TextField((70,8), default=""" +Small boats are typically found on inland waterways such as rivers and lakes, or in protected coastal areas. However, some boats, such as the whaleboat, were intended for use in an offshore environment. In modern naval terms, a boat is a vessel small enough to be carried aboard a ship. Anomalous definitions exist, as lake freighters 1,000 feet (300 m) long on the Great Lakes are called "boats". +""") + +# ╔═╡ 7df55e6c-f931-11ea-33b8-fdc3be0b6cfa +mystery_sample + +# ╔═╡ 292e0384-fb57-11ea-0238-0fbe416fc976 +md""" +Let's compute the transition frequencies of our mystery sample! Type some text in the box below, and check whether the frequency matrix updates. +""" + +# ╔═╡ 7dabee08-f931-11ea-0cb2-c7d5afd21551 +transition_frequencies(mystery_sample) + +# ╔═╡ 3736a094-fb57-11ea-1d39-e551aae62b1d +md""" +Our model will **compare the transition frequencies of our mystery sample** to those of our two language sample. The closest match will be our detected language. + +The only question left is: How do we compare two matrices? When two matrices are almost equal, but not exactly, we want to quantify their _distance_. + +👉 Write a function called `matrix_distance` which takes 2 matrices of the same size and finds the distance between them by: + +1. Subtracting corresponding elements +2. Finding the absolute value of the difference +3. Summing the differences +""" + +# ╔═╡ 13c89272-f934-11ea-07fe-91b5d56dedf8 +function matrix_distance(A, B) + missing # do something with A .- B +end + +# ╔═╡ 7d60f056-f931-11ea-39ae-5fa18a955a77 +distances = map(samples) do sample + matrix_distance(transition_frequencies(mystery_sample), transition_frequencies(sample)) +end + +# ╔═╡ 7d1439e6-f931-11ea-2dab-41c66a779262 +try + @assert !ismissing(distances.English) + """

It looks like this text is **$(argmin(distances))**!

""" |> HTML +catch +end + +# ╔═╡ 8c7606f0-fb93-11ea-0c9c-45364892cbb8 +md""" +We have written a cell that selects the language with the _smallest distance_ to the mystery language. Make sure sure that `matrix_distance` is working correctly, and [scroll up](#mystery-detect) to the mystery text to see it in action! + +#### Further reading +It turns out that the SVD of the transition matrix can mysteriously group the alphabet into vowels and consonants, without any extra information. See [this paper](http://languagelog.ldc.upenn.edu/myl/Moler1983.pdf) if you want to try it yourself! We found that removing the space from `alphabet` (to match the paper) gave better results. +""" + +# ╔═╡ 82e0df62-fb54-11ea-3fff-b16c87a7d45b +md""" +## **Exercise 2** - _Language generation_ + +Our model from Exercise 1 has the property that it can easily be 'reversed' to _generate_ text. While this is useful to demonstrate its structure, the produced text is mostly meaningless: it fails to model words, let alone sentence structure. + +To take our model one step further, we are going to _generalize_ what we have done so far. Instead of looking at _letter combinations_, we will model _word combinations_. And instead of analyzing the frequencies of bigrams (combinations of two letters), we are going to analyze _$n$-grams_. + +#### Dataset +This also means that we are going to need a larger dataset to train our model on: the number of English words (and their combinations) is much higher than the number of letters. + +We will train our model on the novel [_Emma_ (1815), by Jane Austen](https://en.wikipedia.org/wiki/Emma_(novel)). This work is in the public domain, which means that we can download the whole book as a text file from `archive.org`. We've done the process of downloading and cleaning already, and we have split the text into word and punctuation tokens. +""" + +# ╔═╡ b7601048-fb57-11ea-0754-97dc4e0623a1 +emma = let + raw_text = read(download("https://ia800303.us.archive.org/24/items/EmmaJaneAusten_753/emma_pdf_djvu.txt"), String) + + first_words = "Emma Woodhouse" + last_words = "THE END" + start_index = findfirst(first_words, raw_text)[1] + stop_index = findlast(last_words, raw_text)[end] + + raw_text[start_index:stop_index] +end; + +# ╔═╡ cc42de82-fb5a-11ea-3614-25ef961729ab +function splitwords(text) + # clean up whitespace + cleantext = replace(text, r"\s+" => " ") + + # split on whitespace or other word boundaries + tokens = split(cleantext, r"(\s|\b)") +end + +# ╔═╡ d66fe2b2-fb5a-11ea-280f-cfb12b8296ac +emma_words = splitwords(emma) + +# ╔═╡ 4ca8e04a-fb75-11ea-08cc-2fdef5b31944 +forest_words = splitwords(samples.English) + +# ╔═╡ 6f613cd2-fb5b-11ea-1669-cbd355677649 +md""" +#### Exercise 2.1 - _bigrams revisited_ + +The goal of the upcoming exercises is to **generalize** what we have done in Exercise 1. To keep things simple, we _split up our problem_ into smaller problems. (The solution to any computational problem.) + +First, here is a function that takes an array, and returns the array of all **neighbour pairs** from the original. For example, + +```julia +bigrams([1, 2, 3, 42]) +``` +gives + +```julia +[[1,2], [2,3], [3,42]] +``` + +(We used integers as "words" in this example, but our function works with any type of word.) +""" + +# ╔═╡ 91e87974-fb78-11ea-3ce4-5f64e506b9d2 +function bigrams(words) + map(1:length(words)-1) do i + words[i:i+1] + end +end + +# ╔═╡ 9f98e00e-fb78-11ea-0f6c-01206e7221d6 +bigrams([1, 2, 3, 42]) + +# ╔═╡ d7d8cd0c-fb6a-11ea-12bf-2d1448b38162 +md""" +👉 Next, it's your turn to write a more general function `ngrams` that takes an array and a number $n$, and returns all **subsequences of length $n$**. For example: + +```julia +ngrams([1, 2, 3, 42], 3) +``` +should give + +```julia +[[1,2,3], [2,3,42]] +``` + +and + +```julia +ngrams([1, 2, 3, 42], 2) == bigrams([1, 2, 3, 42]) +``` +""" + +# ╔═╡ 7be98e04-fb6b-11ea-111d-51c48f39a4e9 +function ngrams(words, n) + missing +end + +# ╔═╡ 052f822c-fb7b-11ea-382f-af4d6c2b4fdb +ngrams([1, 2, 3, 42], 3) + +# ╔═╡ 067f33fc-fb7b-11ea-352e-956c8727c79f +ngrams(forest_words, 4) + +# ╔═╡ 7b10f074-fb7c-11ea-20f0-034ddff41bc3 +md""" +If you are stuck, you can write `ngrams(words, n) = bigrams(words)` (ignoring the true value of $n$), and continue with the other exercises. + +#### Exercise 2.2 - _frequency matrix revisisted_ +In Exercise 1, we use a 2D array to store the bigram frequencies, where each column or row corresponds to a character from the alphabet. If we use trigrams, we could store the frequencies in a 3D array, and so on. + +However, when counting words instead of letters, we run into a problem. A 3D array with one row, column and layer per word has too many elements to store on our computer. +""" + +# ╔═╡ 24ae5da0-fb7e-11ea-3480-8bb7b649abd5 +md""" +_Emma_ consists of $( + length(Set(emma_words)) +) unique words. This means that there are $( + Int(floor(length(Set(emma_words))^3 / 10^9)) +) billion possible trigrams - that's too much! +""" + +# ╔═╡ 47836744-fb7e-11ea-2305-3fa5819dc154 +md""" +$(html"
") + +Although the frequency array would be very large, most entries are zero. For example, _"Emma"_ is a common word, but _"Emma Emma Emma"_ does not occur in the novel. This _sparsity_ of non-zero entries can be used to **store the same information in a more efficient structure**. + +Julia's [`SparseArrays.jl` package](https://docs.julialang.org/en/v1/stdlib/SparseArrays/index.html) might sound like a logical choice, but these arrays only support 1D and 2D types, and we also want to directly index using strings, not just integers. So instead, we will use `Dict`, the dictionary type. +""" + +# ╔═╡ df4fc31c-fb81-11ea-37b3-db282b36f5ef +healthy = Dict("fruits" => ["🍎", "🍊"], "vegetables" => ["🌽", "🎃", "🍕"]) + +# ╔═╡ c83b1770-fb82-11ea-20a6-3d3a09606c62 +healthy["fruits"] + +# ╔═╡ 52970ac4-fb82-11ea-3040-8bd0590348d2 +md""" +(Did you notice something funny? The dictionary is _unordered_, this is why the entries were printed in reverse from the definition.) + +You can dynamically add or change values of a `Dict` by assigning to `my_dict[key]`. You can check whether a key already exists using `haskey(my_dict, key)`. + +👉 Use these two techniques to write a function `word_counts` that takes an array of words, and returns a `Dict` with entries `word => number_of_occurences`. + +For example: +```julia +word_counts(["to", "be", "or", "not", "to", "be"]) +``` +should return +```julia +Dict( + "to" => 2, + "be" => 2, + "or" => 1, + "not" => 1 +) +``` +""" + +# ╔═╡ 8ce3b312-fb82-11ea-200c-8d5b12f03eea +function word_counts(words::Vector) + counts = Dict() + + # your code here + + return counts +end + +# ╔═╡ a2214e50-fb83-11ea-3580-210f12d44182 +word_counts(["to", "be", "or", "not", "to", "be"]) + +# ╔═╡ 808abf6e-fb84-11ea-0785-2fc3f1c4a09f +md""" +How many times does `"Emma"` occur in the book? +""" + +# ╔═╡ 953363dc-fb84-11ea-1128-ebdfaf5160ee +emma_count = missing + +# ╔═╡ 294b6f50-fb84-11ea-1382-03e9ab029a2d +md""" +Great! Let's get back to our ngrams. For the purpose of generating text, we are going to store a _completions cache_. This is a dictionary where the keys are $(n-1)$-grams, and the values are all found words that complete it to an $n$-gram. Let's look at an example: + +```julia +let + trigrams = ngrams(split("to be or not to be that is the question", " "), 3) + cache = completions_cache(trigrams) + cache == Dict( + ["to", "be"] => ["or", "that"], + ["be", "or"] => ["not"], + ["or", "not"] => ["to"], + ... + ) +end +``` + +So for trigrams, our keys are the first $2$ words of each trigram, and the values are arrays containing every third word of those trigrams. + +If the same ngram occurs multiple times (e.g. "said Emma laughing"), then the last word ("laughing") should also be stored multiple times. This will allow us to generate trigrams with the same frequencies as the original text. + +👉 Write the function `completions_cache`, which takes an array of ngrams (i.e. an array of arrays of words, like the result of your `ngram` function), and returns a dictionary like described above. +""" + +# ╔═╡ b726f824-fb5e-11ea-328e-03a30544037f +function completions_cache(grams) + cache = Dict() + + # your code here + + cache +end + +# ╔═╡ 18355314-fb86-11ea-0738-3544e2e3e816 +let + trigrams = ngrams(split("to be or not to be that is the question", " "), 3) + completions_cache(trigrams) +end + +# ╔═╡ 3d105742-fb8d-11ea-09b0-cd2e77efd15c +md""" +#### Exercise 2.4 - _write a novel_ + +We have everything we need to generate our own novel! The final step is to sample random ngrams, in a way that each next ngram overlaps with the previous one. We've done this in the function `generate_from_ngrams` below - feel free to look through the code, or to implement your own version. +""" + +# ╔═╡ a72fcf5a-fb62-11ea-1dcc-11451d23c085 +""" + generate_from_ngrams(grams, num_words) + +Given an array of ngrams (i.e. an array of arrays of words), generate a sequence of `num_words` words by sampling random ngrams. +""" +function generate_from_ngrams(grams, num_words) + n = length(first(grams)) + cache = completions_cache(grams) + + # we need to start the sequence with at least n-1 words. + # a simple way to do so is to pick a random ngram! + sequence = [rand(grams)...] + + # we iteratively add one more word at a time + for i ∈ n+1:num_words + # the previous n-1 words + tail = sequence[end-(n-2):end] + + # possible next words + completions = cache[tail] + + choice = rand(completions) + push!(sequence, choice) + end + sequence +end + +# ╔═╡ f83991c0-fb7c-11ea-0e6f-1f80709d00c1 +"Compute the ngrams of an array of words, but add the first n-1 at the end, to ensure that every ngram ends in the the beginning of another ngram." +function ngrams_circular(words, n) + ngrams([words..., words[1:n-1]...], n) +end + +# ╔═╡ abe2b862-fb69-11ea-08d9-ebd4ba3437d5 +completions_cache(ngrams_circular(forest_words, 3)) + +# ╔═╡ 4b27a89a-fb8d-11ea-010b-671eba69364e +""" + generate(source_text::AbstractString, num_token; n=3, use_words=true) + +Given a source text, generate a `String` that "looks like" the original text by satisfying the same ngram frequency distribution as the original. +""" +function generate(source_text::AbstractString, s; n=3, use_words=true) + preprocess = if use_words + splitwords + else + collect + end + + words = preprocess(source_text) + if length(words) < n + "" + else + grams = ngrams_circular(words, n) + result = generate_from_ngrams(grams, s) + if use_words + join(result, " ") + else + String(result) + end + end +end + +# ╔═╡ d7b7a14a-fb90-11ea-3e2b-2fd8f379b4d8 +md" +#### Interactive demo + +Enter your own text in the box below, and use that as training data to generate anything! +" + +# ╔═╡ 1939dbea-fb63-11ea-0bc2-2d06b2d4b26c +@bind generate_demo_sample TextField((50,5), default=samples.English) + +# ╔═╡ 70169682-fb8c-11ea-27c0-2dad2ff3080f +md"""Using $(@bind generate_sample_n_letters NumberField(1:5))grams for characters""" + +# ╔═╡ 402562b0-fb63-11ea-0769-375572cc47a8 +md"""Using $(@bind generate_sample_n_words NumberField(1:5))grams for words""" + +# ╔═╡ 2521bac8-fb8f-11ea-04a4-0b077d77529e +md""" +### Automatic Jane Austen + +Uncomment the cell below to generate some Jane Austen text: +""" + +# ╔═╡ 49b69dc2-fb8f-11ea-39af-030b5c5053c3 +# generate(emma, 100; n=4) |> Quote + +# ╔═╡ cc07f576-fbf3-11ea-2c6f-0be63b9356fc +if student.name == "Jazzy Doe" + md""" + !!! danger "Before you submit" + Remember to fill in your **name** and **Kerberos ID** at the top of this notebook. + """ +end + +# ╔═╡ 6b4d6584-f3be-11ea-131d-e5bdefcc791b +md"## Function library + +Just some helper functions used in the notebook." + +# ╔═╡ 54b1e236-fb53-11ea-3769-b382ef8b25d6 +function Quote(text::AbstractString) + text |> Markdown.Paragraph |> Markdown.BlockQuote |> Markdown.MD +end + +# ╔═╡ b3dad856-f9a7-11ea-1552-f7435f1cb605 +String(rand(alphabet, 400)) |> Quote + +# ╔═╡ be55507c-f9a7-11ea-189c-4ffe8377212e +if sample_freqs !== missing + String([rand_sample_letter(sample_freqs) for _ in 1:400]) |> Quote +end + +# ╔═╡ 46c905d8-f9b0-11ea-36ed-0515e8ed2621 +String(rand(alphabet, 400)) |> Quote + +# ╔═╡ 489b03d4-f9b0-11ea-1de0-11d4fe4e7c69 +String([rand_sample_letter(letter_frequencies(ex23_sample)) for _ in 1:400]) |> Quote + +# ╔═╡ fd202410-f936-11ea-1ad6-b3629556b3e0 +sample_text(transition_frequencies(clean(ex23_sample)), 400) |> Quote + +# ╔═╡ b5dff8b8-fb6c-11ea-10fc-37d2a9adae8c +generate( + generate_demo_sample, 400; + n=generate_sample_n_letters, + use_words=false +) |> Quote + +# ╔═╡ ee8c5808-fb5f-11ea-19a1-3d58217f34dc +generate( + generate_demo_sample, 100; + n=generate_sample_n_words, + use_words=true +) |> Quote + +# ╔═╡ ddef9c94-fb96-11ea-1f17-f173a4ff4d89 +function compimg(img, labels=[c*d for c in replace(alphabet, ' ' => "_"), d in replace(alphabet, ' ' => "_")]) + xmax, ymax = size(img) + xmin, ymin = 0, 0 + arr = [(j-1, i-1) for i=1:ymax, j=1:xmax] + + compose(context(units=UnitBox(xmin, ymin, xmax, ymax)), + fill(vec(img)), + compose(context(), + fill("white"), font("monospace"), + text(first.(arr) .+ .1, last.(arr) .+ 0.6, labels)), + rectangle( + first.(arr), + last.(arr), + fill(1.0, length(arr)), + fill(1.0, length(arr)))) +end + +# ╔═╡ b7803a28-fb96-11ea-3e30-d98eb322d19a +function show_pair_frequencies(A) + imshow = let + to_rgb(x) = RGB(0.36x, 0.82x, 0.8x) + to_rgb.(A ./ maximum(abs.(A))) + end + compimg(imshow) +end + +# ╔═╡ ace3dc76-f9ae-11ea-2bee-3d0bfa57cfbc +show_pair_frequencies(transition_frequencies(first_sample)) + +# ╔═╡ ffc17f40-f380-11ea-30ee-0fe8563c0eb1 +hint(text) = Markdown.MD(Markdown.Admonition("hint", "Hint", [text])) + +# ╔═╡ 7df7ab82-f9ad-11ea-2243-21685d660d71 +hint(md"You can answer this question without writing any code: have a look at the values of `sample_freqs`.") + +# ╔═╡ e467c1c6-fbf2-11ea-0d20-f5798237c0a6 +hint(md"Start out with the same code as `bigrams`, and use the Julia documentation to learn how it works. How can we generalize the `bigram` function into the `ngram` function? It might help to do this on paper first.") + +# ╔═╡ ffc40ab2-f380-11ea-2136-63542ff0f386 +almost(text) = Markdown.MD(Markdown.Admonition("warning", "Almost there!", [text])) + +# ╔═╡ ffceaed6-f380-11ea-3c63-8132d270b83f +still_missing(text=md"Replace `missing` with your answer.") = Markdown.MD(Markdown.Admonition("warning", "Here we go!", [text])) + +# ╔═╡ ffde44ae-f380-11ea-29fb-2dfcc9cda8b4 +keep_working(text=md"The answer is not quite right.") = Markdown.MD(Markdown.Admonition("danger", "Keep working on it!", [text])) + +# ╔═╡ ffe326e0-f380-11ea-3619-61dd0592d409 +yays = [md"Fantastic!", md"Splendid!", md"Great!", md"Yay ❤", md"Great! 🎉", md"Well done!", md"Keep it up!", md"Good job!", md"Awesome!", md"You got the right answer!", md"Let's move on to the next section."] + +# ╔═╡ fff5aedc-f380-11ea-2a08-99c230f8fa32 +correct(text=rand(yays)) = Markdown.MD(Markdown.Admonition("correct", "Got it!", [text])) + +# ╔═╡ 954fc466-fb7b-11ea-2724-1f938c6b93c6 +let + output = ngrams([1, 2, 3, 42], 2) + + if output isa Missing + still_missing() + elseif !(output isa Vector{<:Vector}) + keep_working(md"Make sure that `ngrams` returns an array of arrays.") + elseif output == [[1,2], [2,3], [3,42]] + if ngrams([1,2,3], 1) == [[1],[2],[3]] + if ngrams([1,2,3], 3) == [[1,2,3]] + if ngrams(["a"],1) == [["a"]] + correct() + else + keep_working(md"`ngrams` should work with any type, not just integers!") + end + else + keep_working(md"`ngrams(x, 3)` did not give a correct result.") + end + else + keep_working(md"`ngrams(x, 1)` did not give a correct result.") + end + else + keep_working(md"`ngrams(x, 2)` did not give the correct bigrams. Start out with the same code as `bigrams`.") + end +end + +# ╔═╡ a9ffff9a-fb83-11ea-1efd-2fc15538e52f +let + output = word_counts(["to", "be", "or", "not", "to", "be"]) + + if output === nothing + keep_working(md"Did you forget to write `return`?") + elseif output == Dict() + still_missing(md"Write your function `word_counts` above.") + elseif !(output isa Dict) + keep_working(md"Make sure that `word_counts` returns a `Dict`.") + elseif output == Dict("to" => 2, "be" => 2, "or" => 1, "not" => 1) + correct() + else + keep_working() + end +end + +# ╔═╡ 00026442-f381-11ea-2b41-bde1fff66011 +not_defined(variable_name) = Markdown.MD(Markdown.Admonition("danger", "Oopsie!", [md"Make sure that you define a variable called **$(Markdown.Code(string(variable_name)))**"])) + +# ╔═╡ 6fe693c8-f9a1-11ea-1983-f159131880e9 +if !@isdefined(messy_sentence_1) + not_defined(:messy_sentence_1) +elseif !@isdefined(cleaned_sentence_1) + not_defined(:cleaned_sentence_1) +else + if cleaned_sentence_1 isa Missing + still_missing() + elseif cleaned_sentence_1 isa Vector{Char} + keep_working(md"Use `String(x)` to turn an array of characters `x` into a `String`.") + elseif cleaned_sentence_1 == filter(isinalphabet, messy_sentence_1) + correct() + else + keep_working() + end +end + +# ╔═╡ cee0f984-f9a0-11ea-2c3c-53fe26156ea4 +if !@isdefined(messy_sentence_2) + not_defined(:messy_sentence_2) +elseif !@isdefined(cleaned_sentence_2) + not_defined(:cleaned_sentence_2) +else + if cleaned_sentence_2 isa Missing + still_missing() + elseif cleaned_sentence_2 isa Vector{Char} + keep_working(md"Use `String(x)` to turn an array of characters `x` into a `String`.") + elseif cleaned_sentence_2 == filter(isinalphabet, lowercase(messy_sentence_2)) + correct() + else + keep_working() + end +end + +# ╔═╡ ddfb1e1c-f9a1-11ea-3625-f1170272e96a +if !@isdefined(clean) + not_defined(:clean) +else + let + input = "Aè !!! x1" + output = clean(input) + + + if output isa Missing + still_missing() + elseif output isa Vector{Char} + keep_working(md"Use `String(x)` to turn an array of characters `x` into a `String`.") + elseif output == "ae x" + correct() + else + keep_working() + end + end +end + +# ╔═╡ 95b81778-f9a5-11ea-3f51-019430bc8fa8 +if !@isdefined(unused_letters) + not_defined(:unused_letters) +else + if sample_freqs === missing + md""" + !!! warning "Oopsie!" + You need to complete the previous exercises first. + """ + elseif unused_letters isa Missing + still_missing() + elseif unused_letters isa String + keep_working(md"Use `collect` to turn a string into an array of characters.") + elseif Set(index_of_letter.(unused_letters)) == Set(findall(isequal(0.0), sample_freqs)) + correct() + else + keep_working() + end +end + +# ╔═╡ 489fe282-f931-11ea-3dcb-35d4f2ac8b40 +if !@isdefined(th_frequency) + not_defined(:th_frequency) +elseif !@isdefined(ht_frequency) + not_defined(:ht_frequency) +else + if th_frequency isa Missing || ht_frequency isa Missing + still_missing() + elseif th_frequency < ht_frequency + keep_working(md"Looks like your answers should be flipped. Which combination is more frequent in English?") + elseif th_frequency == sample_freq_matrix[index_of_letter('t'), index_of_letter('h')] && ht_frequency == sample_freq_matrix[index_of_letter('h'), index_of_letter('t')] + correct() + else + keep_working() + end +end + +# ╔═╡ 671525cc-f930-11ea-0e71-df9d4aae1c05 +if !@isdefined(double_letters) + not_defined(:double_letters) +end + +# ╔═╡ a5fbba46-f931-11ea-33e1-054be53d986c +if !@isdefined(most_likely_to_follow_w) + not_defined(:most_likely_to_follow_w) +end + +# ╔═╡ ba695f6a-f931-11ea-0fbb-c3ef1374270e +if !@isdefined(most_likely_to_precede_w) + not_defined(:most_likely_to_precede_w) +end + +# ╔═╡ b09f5512-fb58-11ea-2527-31bea4cee823 +if !@isdefined(matrix_distance) + not_defined(:matrix_distance) +else + try + let + A = rand(Float64, (5,4)) + B = rand(Float64, (5,4)) + + output = matrix_distance(A,B) + + if output isa Missing + still_missing() + elseif !(output isa Number) + keep_working(md"Make sure that `matrix_distance` returns a nunmber.") + elseif output == 0.0 + keep_working(md"Two different matrices should have non-zero distance.") + else + if matrix_distance(A,B) < 0 || matrix_distance(B,A) < 0 + keep_working(md"The distance between two matrices should always be positive.") + elseif matrix_distance(A,A) != 0 + almost(md"The distance between two identical matrices should be zero.") + elseif matrix_distance([1 -1], [0 0]) == 0.0 + almost(md"`matrix_distance([1 -1], [0 0])` should not be zero.") + else + correct() + end + end + end + catch + keep_working(md"The function errored.") + end +end + +# ╔═╡ 00115b6e-f381-11ea-0bc6-61ca119cb628 +bigbreak = html"




"; + +# ╔═╡ c086bd1e-f384-11ea-3b26-2da9e24360ca +bigbreak + +# ╔═╡ eaa8c79e-f9a2-11ea-323f-8bb2bd36e11c +md""" +$(bigbreak) +#### Exercise 1.2 - _Letter frequencies_ + +We are going to count the _frequency_ of each letter in this sample, after applying your `clean` function. Can you guess which character is most frequent? +""" + +# ╔═╡ dcffd7d2-f9a6-11ea-2230-b1afaecfdd54 +md""" +$(bigbreak) +Now that we know the frequencies of letters in English, we can generate random text that already looks closer to English! + +**Random letters from the alphabet:** +""" + +# ╔═╡ 77623f3e-f9a9-11ea-2f46-ff07bd27cd5f +md""" +$(bigbreak) +#### Exercise 1.3 - _Transition frequencies_ +In the previous exercise we computed the frequency of each letter in the sample by _counting_ their occurences, and then dividing by the total number of counts. + +In this exercise, we are going to count _letter transitions_, such as `aa`, `as`, `rt`, `yy`. Two letters might both be common, like `a` and `e`, but their combination, `ae`, is uncommon in English. + +To quantify this observation, we will do the same as in our last exercise: we count occurences in a _sample text_, to create the **transition frequency matrix**. +""" + +# ╔═╡ d3d7bd9c-f9af-11ea-1570-75856615eb5d +bigbreak + +# ╔═╡ 6718d26c-f9b0-11ea-1f5a-0f22f7ddffe9 +md""" +$(bigbreak) + +#### Exercise 1.4 - _Language detection_ +""" + +# ╔═╡ 568f0d3a-fb54-11ea-0f77-171718ef12a5 +bigbreak + +# ╔═╡ 7f341c4e-fb54-11ea-1919-d5421d7a2c75 +bigbreak + +# ╔═╡ Cell order: +# ╟─e6b6760a-f37f-11ea-3ae1-65443ef5a81a +# ╟─ec66314e-f37f-11ea-0af4-31da0584e881 +# ╟─85cfbd10-f384-11ea-31dc-b5693630a4c5 +# ╠═33e43c7c-f381-11ea-3abc-c942327456b1 +# ╟─938185ec-f384-11ea-21dc-b56b7469f798 +# ╠═86e1ee96-f314-11ea-03f6-0f549b79e7c9 +# ╠═a4937996-f314-11ea-2ff9-615c888afaa8 +# ╟─b49a21a6-f381-11ea-1a98-7f144c55c9b7 +# ╟─c086bd1e-f384-11ea-3b26-2da9e24360ca +# ╟─6f9df800-f92d-11ea-2d49-c1aaabd2d012 +# ╠═b61722cc-f98f-11ea-22ae-d755f61f78c3 +# ╟─f457ad44-f990-11ea-0e2d-2bb7627716a8 +# ╠═4efc051e-f92e-11ea-080e-bde6b8f9295a +# ╟─38d1ace8-f991-11ea-0b5f-ed7bd08edde5 +# ╠═ddf272c8-f990-11ea-2135-7bf1a6dca0b7 +# ╟─3cc688d2-f996-11ea-2a6f-0b4c7a5b74c2 +# ╟─d67034d0-f92d-11ea-31c2-f7a38ebb412f +# ╟─a094e2ac-f92d-11ea-141a-3566552dd839 +# ╠═27c9a7f4-f996-11ea-1e46-19e3fc840ad9 +# ╟─f2a4edfa-f996-11ea-1a24-1ba78fd92233 +# ╟─5c74a052-f92e-11ea-2c5b-0f1a3a14e313 +# ╠═dcc4156c-f997-11ea-3e6f-057cd080d9db +# ╟─129fbcfe-f998-11ea-1c96-0fd3ccd2dcf8 +# ╠═3a5ee698-f998-11ea-0452-19b70ed11a1d +# ╠═75694166-f998-11ea-0428-c96e1113e2a0 +# ╟─6fe693c8-f9a1-11ea-1983-f159131880e9 +# ╟─05f0182c-f999-11ea-0a52-3d46c65a049e +# ╟─98266882-f998-11ea-3270-4339fb502bc7 +# ╠═d3c98450-f998-11ea-3caf-895183af926b +# ╠═d3a4820e-f998-11ea-2a5c-1f37e2a6dd0a +# ╟─cee0f984-f9a0-11ea-2c3c-53fe26156ea4 +# ╟─aad659b8-f998-11ea-153e-3dae9514bfeb +# ╠═d236b51e-f997-11ea-0c55-abb11eb35f4d +# ╠═a56724b6-f9a0-11ea-18f2-991e0382eccf +# ╟─24860970-fc48-11ea-0009-cddee695772c +# ╟─734851c6-f92d-11ea-130d-bf2a69e89255 +# ╟─8d3bc9ea-f9a1-11ea-1508-8da4b7674629 +# ╠═4affa858-f92e-11ea-3ece-258897c37e51 +# ╠═e00d521a-f992-11ea-11e0-e9da8255b23b +# ╟─ddfb1e1c-f9a1-11ea-3625-f1170272e96a +# ╟─eaa8c79e-f9a2-11ea-323f-8bb2bd36e11c +# ╠═2680b506-f9a3-11ea-0849-3989de27dd9f +# ╟─571d28d6-f960-11ea-1b2e-d5977ecbbb11 +# ╠═6a64ab12-f960-11ea-0d92-5b88943cdb1a +# ╟─603741c2-f9a4-11ea-37ce-1b36ecc83f45 +# ╟─b3de6260-f9a4-11ea-1bae-9153a92c3fe5 +# ╠═a6c36bd6-f9a4-11ea-1aba-f75cecc90320 +# ╟─6d3f9dae-f9a5-11ea-3228-d147435e266d +# ╠═92bf9fd2-f9a5-11ea-25c7-5966e44db6c6 +# ╟─95b81778-f9a5-11ea-3f51-019430bc8fa8 +# ╟─7df7ab82-f9ad-11ea-2243-21685d660d71 +# ╟─dcffd7d2-f9a6-11ea-2230-b1afaecfdd54 +# ╟─b3dad856-f9a7-11ea-1552-f7435f1cb605 +# ╟─01215e9a-f9a9-11ea-363b-67392741c8d4 +# ╟─be55507c-f9a7-11ea-189c-4ffe8377212e +# ╟─8ae13cf0-f9a8-11ea-3919-a735c4ed9e7f +# ╟─343d63c2-fb58-11ea-0cce-efe1afe070c2 +# ╟─b5b8dd18-f938-11ea-157b-53b145357fd1 +# ╟─0e872a6c-f937-11ea-125e-37958713a495 +# ╟─77623f3e-f9a9-11ea-2f46-ff07bd27cd5f +# ╠═fbb7c04e-f92d-11ea-0b81-0be20da242c8 +# ╠═80118bf8-f931-11ea-34f3-b7828113ffd8 +# ╠═7f4f6ce4-f931-11ea-15a4-b3bec6a7e8b6 +# ╠═d40034f6-f9ab-11ea-3f65-7ffd1256ae9d +# ╟─689ed82a-f9ae-11ea-159c-331ff6660a75 +# ╠═ace3dc76-f9ae-11ea-2bee-3d0bfa57cfbc +# ╟─0b67789c-f931-11ea-113c-35e5edafcbbf +# ╠═6896fef8-f9af-11ea-0065-816a70ba9670 +# ╟─39152104-fc49-11ea-04dd-bb34e3600f2f +# ╟─e91c6fd8-f930-11ea-01ac-476bbde79079 +# ╠═1b4c0c28-f9ab-11ea-03a6-69f69f7f90ed +# ╟─1f94e0a2-f9ab-11ea-1347-7dd906ebb09d +# ╠═41b2df7c-f931-11ea-112e-ede3b16f357a +# ╟─489fe282-f931-11ea-3dcb-35d4f2ac8b40 +# ╟─1dd1e2f4-f930-11ea-312c-5ff9e109c7f6 +# ╠═65c92cac-f930-11ea-20b1-6b8f45b3f262 +# ╟─671525cc-f930-11ea-0e71-df9d4aae1c05 +# ╟─4582ebf4-f930-11ea-03b2-bf4da1a8f8df +# ╟─7898b76a-f930-11ea-2b7e-8126ec2b8ffd +# ╟─a5fbba46-f931-11ea-33e1-054be53d986c +# ╟─458cd100-f930-11ea-24b8-41a49f6596a0 +# ╠═bc401bee-f931-11ea-09cc-c5efe2f11194 +# ╟─ba695f6a-f931-11ea-0fbb-c3ef1374270e +# ╟─45c20988-f930-11ea-1d12-b782d2c01c11 +# ╠═cc62929e-f9af-11ea-06b9-439ac08dcb52 +# ╟─d3d7bd9c-f9af-11ea-1570-75856615eb5d +# ╟─2f8dedfc-fb98-11ea-23d7-2159bdb6a299 +# ╟─b7446f34-f9b1-11ea-0f39-a3c17ba740e5 +# ╟─4f97b572-f9b0-11ea-0a99-87af0797bf28 +# ╟─46c905d8-f9b0-11ea-36ed-0515e8ed2621 +# ╟─4e8d327e-f9b0-11ea-3f16-c178d96d07d9 +# ╟─489b03d4-f9b0-11ea-1de0-11d4fe4e7c69 +# ╟─d83f8bbc-f9af-11ea-2392-c90e28e96c65 +# ╟─fd202410-f936-11ea-1ad6-b3629556b3e0 +# ╟─0e465160-f937-11ea-0ebb-b7e02d71e8a8 +# ╟─6718d26c-f9b0-11ea-1f5a-0f22f7ddffe9 +# ╟─141af892-f933-11ea-1e5f-154167642809 +# ╟─7eed9dde-f931-11ea-38b0-db6bfcc1b558 +# ╟─7e3282e2-f931-11ea-272f-d90779264456 +# ╟─7d1439e6-f931-11ea-2dab-41c66a779262 +# ╠═7df55e6c-f931-11ea-33b8-fdc3be0b6cfa +# ╟─292e0384-fb57-11ea-0238-0fbe416fc976 +# ╠═7dabee08-f931-11ea-0cb2-c7d5afd21551 +# ╟─3736a094-fb57-11ea-1d39-e551aae62b1d +# ╠═13c89272-f934-11ea-07fe-91b5d56dedf8 +# ╟─7d60f056-f931-11ea-39ae-5fa18a955a77 +# ╟─b09f5512-fb58-11ea-2527-31bea4cee823 +# ╟─8c7606f0-fb93-11ea-0c9c-45364892cbb8 +# ╟─568f0d3a-fb54-11ea-0f77-171718ef12a5 +# ╟─82e0df62-fb54-11ea-3fff-b16c87a7d45b +# ╠═b7601048-fb57-11ea-0754-97dc4e0623a1 +# ╟─cc42de82-fb5a-11ea-3614-25ef961729ab +# ╠═d66fe2b2-fb5a-11ea-280f-cfb12b8296ac +# ╠═4ca8e04a-fb75-11ea-08cc-2fdef5b31944 +# ╟─6f613cd2-fb5b-11ea-1669-cbd355677649 +# ╠═91e87974-fb78-11ea-3ce4-5f64e506b9d2 +# ╠═9f98e00e-fb78-11ea-0f6c-01206e7221d6 +# ╟─d7d8cd0c-fb6a-11ea-12bf-2d1448b38162 +# ╠═7be98e04-fb6b-11ea-111d-51c48f39a4e9 +# ╠═052f822c-fb7b-11ea-382f-af4d6c2b4fdb +# ╠═067f33fc-fb7b-11ea-352e-956c8727c79f +# ╟─954fc466-fb7b-11ea-2724-1f938c6b93c6 +# ╟─e467c1c6-fbf2-11ea-0d20-f5798237c0a6 +# ╟─7b10f074-fb7c-11ea-20f0-034ddff41bc3 +# ╟─24ae5da0-fb7e-11ea-3480-8bb7b649abd5 +# ╟─47836744-fb7e-11ea-2305-3fa5819dc154 +# ╠═df4fc31c-fb81-11ea-37b3-db282b36f5ef +# ╠═c83b1770-fb82-11ea-20a6-3d3a09606c62 +# ╟─52970ac4-fb82-11ea-3040-8bd0590348d2 +# ╠═8ce3b312-fb82-11ea-200c-8d5b12f03eea +# ╠═a2214e50-fb83-11ea-3580-210f12d44182 +# ╟─a9ffff9a-fb83-11ea-1efd-2fc15538e52f +# ╟─808abf6e-fb84-11ea-0785-2fc3f1c4a09f +# ╠═953363dc-fb84-11ea-1128-ebdfaf5160ee +# ╟─294b6f50-fb84-11ea-1382-03e9ab029a2d +# ╠═b726f824-fb5e-11ea-328e-03a30544037f +# ╠═18355314-fb86-11ea-0738-3544e2e3e816 +# ╠═abe2b862-fb69-11ea-08d9-ebd4ba3437d5 +# ╟─3d105742-fb8d-11ea-09b0-cd2e77efd15c +# ╟─a72fcf5a-fb62-11ea-1dcc-11451d23c085 +# ╟─f83991c0-fb7c-11ea-0e6f-1f80709d00c1 +# ╟─4b27a89a-fb8d-11ea-010b-671eba69364e +# ╟─d7b7a14a-fb90-11ea-3e2b-2fd8f379b4d8 +# ╟─1939dbea-fb63-11ea-0bc2-2d06b2d4b26c +# ╟─70169682-fb8c-11ea-27c0-2dad2ff3080f +# ╟─b5dff8b8-fb6c-11ea-10fc-37d2a9adae8c +# ╟─402562b0-fb63-11ea-0769-375572cc47a8 +# ╟─ee8c5808-fb5f-11ea-19a1-3d58217f34dc +# ╟─2521bac8-fb8f-11ea-04a4-0b077d77529e +# ╠═49b69dc2-fb8f-11ea-39af-030b5c5053c3 +# ╟─7f341c4e-fb54-11ea-1919-d5421d7a2c75 +# ╟─cc07f576-fbf3-11ea-2c6f-0be63b9356fc +# ╟─6b4d6584-f3be-11ea-131d-e5bdefcc791b +# ╟─54b1e236-fb53-11ea-3769-b382ef8b25d6 +# ╟─b7803a28-fb96-11ea-3e30-d98eb322d19a +# ╟─ddef9c94-fb96-11ea-1f17-f173a4ff4d89 +# ╟─ffc17f40-f380-11ea-30ee-0fe8563c0eb1 +# ╟─ffc40ab2-f380-11ea-2136-63542ff0f386 +# ╟─ffceaed6-f380-11ea-3c63-8132d270b83f +# ╟─ffde44ae-f380-11ea-29fb-2dfcc9cda8b4 +# ╟─ffe326e0-f380-11ea-3619-61dd0592d409 +# ╟─fff5aedc-f380-11ea-2a08-99c230f8fa32 +# ╟─00026442-f381-11ea-2b41-bde1fff66011 +# ╟─00115b6e-f381-11ea-0bc6-61ca119cb628 diff --git a/lecture_notebooks/Basic Julia syntax.html b/lecture_notebooks/Basic Julia syntax.html new file mode 100644 index 000000000..9c5c5629b --- /dev/null +++ b/lecture_notebooks/Basic Julia syntax.html @@ -0,0 +1,206 @@ + + + + + ⚡ Pluto.jl ⚡ + + + + + + + + + + + + +

A basic Julia syntax cheatsheet

+

This notebook briefly summarizes some of the basic Julia syntax that we will need for the problem sets.

+
11.8 μs

Variables

+

We can define a variable using = (assignment). Then we can use its value in other expressions:

+
10.5 μs
x
3
1.9 μs
y
6
9.6 μs

By default Julia displays the output of the last operation. (You can suppress the output by adding ; (a semicolon) at the end.)

+
20.8 μs

We can ask what type a variable has using typeof:

+
8.5 μs
Int64
7.9 μs

Functions

+
5.3 μs

We can use a short-form, one-line function definition for simple functions:

+
5.2 μs
f (generic function with 1 method)
28.5 μs

Typing the function's name gives information about the function. To call it we must use parentheses:

+
5.2 μs
f (generic function with 1 method)
1.5 μs
12
4.3 ms

For longer functions we use the following syntax with the function keyword and end:

+
5.7 μs
g (generic function with 1 method)
30.9 μs
9
5.4 ms

For loops

+
6.6 μs

Use for to loop through a pre-determined set of values:

+
7.5 μs
55
190 ns

Here, 1:10 is a range representing the numbers from 1 to 10:

+
13.3 μs
UnitRange{Int64}
7.4 μs

Above we used a let block to define a new local variable s. But blocks of code like this are usually better inside functions, so that they can be reused. For example, we could rewrite the above as follows:

+
10.3 μs
mysum (generic function with 1 method)
61.8 μs
5050
11.5 ms

Conditionals: if

+
6.5 μs

We can evaluate whether a condition is true or not by simply writing the condition:

+
5.4 μs
a
3
2 μs
true
6.4 μs

We see that conditions have a Boolean (true or false) value.

+

We can then use if to control what we do based on that value:

+
8.7 μs
"small"
4 μs

Note that the if also returns the last value that was evaluated, in this case the string "small" or "big", Since Pluto is reactive, changing the definition of a above will automatically cause this to be reevaluated!

+
7.1 μs

Arrays

+
9.2 μs

1D arrays (Vectors)

+
16.2 μs

We can make a Vector (1-dimensional, or 1D array) using square brackets:

+
9.3 μs
v
4.4 μs
Array{Int64,1}
2.7 μs

The 1 in the type shows that this is a 1D array.

+

We access elements also using square brackets:

+
6.9 μs
2
6.5 ms
10
3.5 μs

Note that Pluto does not automatically update cells when you modify elements of an array, but the value does change.

+
5.3 μs

A nice way to create Vectors following a certain pattern is to use an array comprehension:

+
8 μs
v2
43.6 ms

2D arrays (matrices)

+
5.3 μs

We can make small matrices (2D arrays) with square brackets too:

+
6.4 μs
M
2×2 Array{Int64,2}:
+ 1  2
+ 3  4
45.1 ms
Array{Int64,2}
3.1 μs

The 2 in the type confirms that this is a 2D array.

+
5.4 μs

This won't work for larger matrices, though. For that we can use e.g.

+
5.3 μs
5×5 Array{Float64,2}:
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
15.1 ms

Note that zeros gives Float64s by default. We can also specify a type for the elements:

+
6 μs
4×5 Array{Int64,2}:
+ 0  0  0  0  0
+ 0  0  0  0  0
+ 0  0  0  0  0
+ 0  0  0  0  0
23 ms

We can then fill in the values we want by manipulating the elements, e.g. with a for loop.

+
5.8 μs

A nice alternative syntax to create matrices following a certain pattern is an array comprehension with a double for loop:

+
3.3 ms
5×6 Array{Int64,2}:
+ 2  3  4  5   6   7
+ 3  4  5  6   7   8
+ 4  5  6  7   8   9
+ 5  6  7  8   9  10
+ 6  7  8  9  10  11
86.5 ms
+ + + \ No newline at end of file diff --git a/lecture_notebooks/Basic Julia syntax.jl b/lecture_notebooks/Basic Julia syntax.jl new file mode 100644 index 000000000..622db1193 --- /dev/null +++ b/lecture_notebooks/Basic Julia syntax.jl @@ -0,0 +1,263 @@ +### A Pluto.jl notebook ### +# v0.11.12 + +using Markdown +using InteractiveUtils + +# ╔═╡ 0d3aec92-edeb-11ea-3adb-cd0dc17cbdab +md"# A basic Julia syntax cheatsheet + +This notebook briefly summarizes some of the basic Julia syntax that we will need for the problem sets. +" + + +# ╔═╡ 3b038ee0-edeb-11ea-0977-97cc30d1c6ff +md"## Variables + +We can define a variable using `=` (assignment). Then we can use its value in other expressions: +" + +# ╔═╡ 3e8e0ea0-edeb-11ea-22e0-c58f7c2168ce +x = 3 + + +# ╔═╡ 59b66862-edeb-11ea-2d62-71dcc79dbfab +y = 2x + +# ╔═╡ 5e062a24-edeb-11ea-256a-d938f77d7815 +md"By default Julia displays the output of the last operation. (You can suppress the output by adding `;` (a semicolon) at the end.) +" + +# ╔═╡ 7e46f0e8-edeb-11ea-1092-4b5e8acd9ee0 +md"We can ask what type a variable has using `typeof`:" + +# ╔═╡ 8a695b86-edeb-11ea-08cc-17263bec09df +typeof(y) + +# ╔═╡ 8e2dd3be-edeb-11ea-0703-354fb31c12f5 +md"## Functions" + +# ╔═╡ 96b5a28c-edeb-11ea-11c0-597615962f54 +md"We can use a short-form, one-line function definition for simple functions:" + +# ╔═╡ a7453572-edeb-11ea-1e27-9f710fd856a6 +f(x) = 2 + x + +# ╔═╡ b341db4e-edeb-11ea-078b-b71ac00089d7 +md"Typing the function's name gives information about the function. To call it we must use parentheses:" + +# ╔═╡ 23f9afd4-eded-11ea-202a-9f0f1f91e5ad +f + +# ╔═╡ cc1f6872-edeb-11ea-33e9-6976fd9b107a +f(10) + +# ╔═╡ ce9667c2-edeb-11ea-2665-d789032abd11 +md"For longer functions we use the following syntax with the `function` keyword and `end`:" + +# ╔═╡ d73d3400-edeb-11ea-2dea-95e8c4a6563b +function g(x, y) + z = x + y + return z^2 +end + +# ╔═╡ e04ccf10-edeb-11ea-36d1-d11969e4b2f2 +g(1, 2) + +# ╔═╡ e297c5cc-edeb-11ea-3bdd-090f415685ab +md"## For loops" + +# ╔═╡ ec751446-edeb-11ea-31ba-2372e7c71b42 +md"Use `for` to loop through a pre-determined set of values:" + +# ╔═╡ fe3fa290-edeb-11ea-121e-7114e5c573c1 +let s = 0 + + for i in 1:10 + s += i # Equivalent to s = s + i + end + + s +end + +# ╔═╡ 394b0ec8-eded-11ea-31fb-27392068ef8f +md"Here, `1:10` is a **range** representing the numbers from 1 to 10:" + +# ╔═╡ 4dc00908-eded-11ea-25c5-0f7b2b7e18f9 +typeof(1:10) + +# ╔═╡ 6c44abb4-edec-11ea-16bd-557800b5f9d2 +md"Above we used a `let` block to define a new local variable `s`. +But blocks of code like this are usually better inside functions, so that they can be reused. For example, we could rewrite the above as follows: +" + +# ╔═╡ 683af3e2-eded-11ea-25a5-0d90bf099d98 +function mysum(n) + s = 0 + + for i in 1:n + s += i + end + + return s +end + +# ╔═╡ 76764ea2-eded-11ea-1aa6-296f3421de1c +mysum(100) + +# ╔═╡ 93a231f4-edec-11ea-3b39-299b3be2da78 +md"## Conditionals: `if`" + +# ╔═╡ 82e63a24-eded-11ea-3887-15d6bfabea4b +md"We can evaluate whether a condition is true or not by simply writing the condition:" + +# ╔═╡ 9b339b2a-eded-11ea-10d7-8fc9a907c892 +a = 3 + +# ╔═╡ 9535eb40-eded-11ea-1651-e33c9c23dbfb +a < 5 + +# ╔═╡ a16299a2-eded-11ea-2b56-93eb7a1010a7 +md"We see that conditions have a Boolean (`true` or `false`) value. + +We can then use `if` to control what we do based on that value:" + +# ╔═╡ bc6b124e-eded-11ea-0290-b3760cb81024 +if a < 5 + "small" + +else + "big" + +end + +# ╔═╡ cfb21014-eded-11ea-1261-3bc30952a88e +md"""Note that the `if` also returns the last value that was evaluated, in this case the string `"small"` or `"big"`, Since Pluto is reactive, changing the definition of `a` above will automatically cause this to be reevaluated!""" + +# ╔═╡ ffee7d80-eded-11ea-26b1-1331df204c67 +md"## Arrays" + +# ╔═╡ cae4137e-edee-11ea-14af-59a32227de1b +md"### 1D arrays (`Vector`s)" + +# ╔═╡ 714f4fca-edee-11ea-3410-c9ab8825d836 +md"We can make a `Vector` (1-dimensional, or 1D array) using square brackets:" + +# ╔═╡ 82cc2a0e-edee-11ea-11b7-fbaa5ad7b556 +v = [1, 2, 3] + +# ╔═╡ 85916c18-edee-11ea-0738-5f5d78875b86 +typeof(v) + +# ╔═╡ 881b7d0c-edee-11ea-0b4a-4bd7d5be2c77 +md"The `1` in the type shows that this is a 1D array. + +We access elements also using square brackets:" + +# ╔═╡ a298e8ae-edee-11ea-3613-0dd4bae70c26 +v[2] + +# ╔═╡ a5ebddd6-edee-11ea-2234-55453ea59c5a +v[2] = 10 + +# ╔═╡ a9b48e54-edee-11ea-1333-a96181de0185 +md"Note that Pluto does not automatically update cells when you modify elements of an array, but the value does change." + +# ╔═╡ 68c4ead2-edef-11ea-124a-03c2d7dd6a1b +md"A nice way to create `Vector`s following a certain pattern is to use an **array comprehension**:" + +# ╔═╡ 84129294-edef-11ea-0c77-ffa2b9592a26 +v2 = [i^2 for i in 1:10] + +# ╔═╡ d364fa16-edee-11ea-2050-0f6cb70e1bcf +md"## 2D arrays (matrices)" + +# ╔═╡ db99ae9a-edee-11ea-393e-9de420a545a1 +md"We can make small matrices (2D arrays) with square brackets too:" + +# ╔═╡ 04f175f2-edef-11ea-0882-712548ebb7a3 +M = [1 2 + 3 4] + +# ╔═╡ 0a8ac112-edef-11ea-1e99-cf7c7808c4f5 +typeof(M) + +# ╔═╡ 1295f48a-edef-11ea-22a5-61e8a2e1d005 +md"The `2` in the type confirms that this is a 2D array." + +# ╔═╡ 3e1fdaa8-edef-11ea-2f03-eb41b2b9ea0f +md"This won't work for larger matrices, though. For that we can use e.g." + +# ╔═╡ 48f3deca-edef-11ea-2c18-e7419c9030a0 +zeros(5, 5) + +# ╔═╡ a8f26af8-edef-11ea-2fc7-2b776f515aea +md"Note that `zeros` gives `Float64`s by default. We can also specify a type for the elements:" + +# ╔═╡ b595373e-edef-11ea-03e2-6599ef14af20 +zeros(Int, 4, 5) + +# ╔═╡ 4cb33c04-edef-11ea-2b35-1139c246c331 +md"We can then fill in the values we want by manipulating the elements, e.g. with a `for` loop." + +# ╔═╡ 54e47e9e-edef-11ea-2d75-b5f550902528 +md"A nice alternative syntax to create matrices following a certain pattern is an array comprehension with a *double* `for` loop:" + +# ╔═╡ 6348edce-edef-11ea-1ab4-019514eb414f +[i + j for i in 1:5, j in 1:6] + +# ╔═╡ Cell order: +# ╟─0d3aec92-edeb-11ea-3adb-cd0dc17cbdab +# ╟─3b038ee0-edeb-11ea-0977-97cc30d1c6ff +# ╠═3e8e0ea0-edeb-11ea-22e0-c58f7c2168ce +# ╠═59b66862-edeb-11ea-2d62-71dcc79dbfab +# ╟─5e062a24-edeb-11ea-256a-d938f77d7815 +# ╟─7e46f0e8-edeb-11ea-1092-4b5e8acd9ee0 +# ╠═8a695b86-edeb-11ea-08cc-17263bec09df +# ╟─8e2dd3be-edeb-11ea-0703-354fb31c12f5 +# ╟─96b5a28c-edeb-11ea-11c0-597615962f54 +# ╠═a7453572-edeb-11ea-1e27-9f710fd856a6 +# ╟─b341db4e-edeb-11ea-078b-b71ac00089d7 +# ╠═23f9afd4-eded-11ea-202a-9f0f1f91e5ad +# ╠═cc1f6872-edeb-11ea-33e9-6976fd9b107a +# ╟─ce9667c2-edeb-11ea-2665-d789032abd11 +# ╠═d73d3400-edeb-11ea-2dea-95e8c4a6563b +# ╠═e04ccf10-edeb-11ea-36d1-d11969e4b2f2 +# ╟─e297c5cc-edeb-11ea-3bdd-090f415685ab +# ╟─ec751446-edeb-11ea-31ba-2372e7c71b42 +# ╠═fe3fa290-edeb-11ea-121e-7114e5c573c1 +# ╟─394b0ec8-eded-11ea-31fb-27392068ef8f +# ╠═4dc00908-eded-11ea-25c5-0f7b2b7e18f9 +# ╟─6c44abb4-edec-11ea-16bd-557800b5f9d2 +# ╠═683af3e2-eded-11ea-25a5-0d90bf099d98 +# ╠═76764ea2-eded-11ea-1aa6-296f3421de1c +# ╟─93a231f4-edec-11ea-3b39-299b3be2da78 +# ╟─82e63a24-eded-11ea-3887-15d6bfabea4b +# ╠═9b339b2a-eded-11ea-10d7-8fc9a907c892 +# ╠═9535eb40-eded-11ea-1651-e33c9c23dbfb +# ╟─a16299a2-eded-11ea-2b56-93eb7a1010a7 +# ╠═bc6b124e-eded-11ea-0290-b3760cb81024 +# ╟─cfb21014-eded-11ea-1261-3bc30952a88e +# ╟─ffee7d80-eded-11ea-26b1-1331df204c67 +# ╟─cae4137e-edee-11ea-14af-59a32227de1b +# ╟─714f4fca-edee-11ea-3410-c9ab8825d836 +# ╠═82cc2a0e-edee-11ea-11b7-fbaa5ad7b556 +# ╠═85916c18-edee-11ea-0738-5f5d78875b86 +# ╟─881b7d0c-edee-11ea-0b4a-4bd7d5be2c77 +# ╠═a298e8ae-edee-11ea-3613-0dd4bae70c26 +# ╠═a5ebddd6-edee-11ea-2234-55453ea59c5a +# ╟─a9b48e54-edee-11ea-1333-a96181de0185 +# ╟─68c4ead2-edef-11ea-124a-03c2d7dd6a1b +# ╠═84129294-edef-11ea-0c77-ffa2b9592a26 +# ╟─d364fa16-edee-11ea-2050-0f6cb70e1bcf +# ╟─db99ae9a-edee-11ea-393e-9de420a545a1 +# ╠═04f175f2-edef-11ea-0882-712548ebb7a3 +# ╠═0a8ac112-edef-11ea-1e99-cf7c7808c4f5 +# ╟─1295f48a-edef-11ea-22a5-61e8a2e1d005 +# ╟─3e1fdaa8-edef-11ea-2f03-eb41b2b9ea0f +# ╠═48f3deca-edef-11ea-2c18-e7419c9030a0 +# ╟─a8f26af8-edef-11ea-2fc7-2b776f515aea +# ╠═b595373e-edef-11ea-03e2-6599ef14af20 +# ╟─4cb33c04-edef-11ea-2b35-1139c246c331 +# ╟─54e47e9e-edef-11ea-2d75-b5f550902528 +# ╠═6348edce-edef-11ea-1ab4-019514eb414f diff --git a/lecture_notebooks/Lecture 1 - Images.jl b/lecture_notebooks/Lecture 1 - Images.jl new file mode 120000 index 000000000..f6464748c --- /dev/null +++ b/lecture_notebooks/Lecture 1 - Images.jl @@ -0,0 +1 @@ +week1/01-images-intro.jl \ No newline at end of file diff --git a/lecture_notebooks/gradient.jl b/lecture_notebooks/gradient.jl new file mode 120000 index 000000000..a56da657d --- /dev/null +++ b/lecture_notebooks/gradient.jl @@ -0,0 +1 @@ +week2/02-sobel_gradient.jl \ No newline at end of file diff --git a/lecture_notebooks/seam_carving.jl b/lecture_notebooks/seam_carving.jl new file mode 120000 index 000000000..575cc9451 --- /dev/null +++ b/lecture_notebooks/seam_carving.jl @@ -0,0 +1 @@ +week2/01-seam_carving.jl \ No newline at end of file diff --git a/lecture_notebooks/taking advantage of structure.jl b/lecture_notebooks/taking advantage of structure.jl new file mode 120000 index 000000000..e03d4b7bf --- /dev/null +++ b/lecture_notebooks/taking advantage of structure.jl @@ -0,0 +1 @@ +week3/01-structure.jl \ No newline at end of file diff --git a/lecture_notebooks/week1/01-images-intro.jl b/lecture_notebooks/week1/01-images-intro.jl new file mode 100644 index 000000000..b1eab24eb --- /dev/null +++ b/lecture_notebooks/week1/01-images-intro.jl @@ -0,0 +1,807 @@ +### A Pluto.jl notebook ### +# v0.11.12 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ 5e688928-e939-11ea-0e16-fbc80af390ab +using LinearAlgebra + +# ╔═╡ a50b5f48-e8d5-11ea-1f05-a3741b5d15ba +html"" + +# ╔═╡ 8a6fed4c-e94b-11ea-1113-d56f56fb293b +br = HTML("
") + +# ╔═╡ dc53f316-e8c8-11ea-150f-1374dbce114a +md"""# Welcome to 18.S191 -- Fall 2020! + +### Introduction to Computational Thinking for Real-World Problems""" + +# ╔═╡ c3f43d66-e94b-11ea-02bd-23cfeb878ff1 +br + +# ╔═╡ c6c77738-e94b-11ea-22f5-1dce3dbcc3ca +md"### " + +# ╔═╡ cf80793a-e94b-11ea-0120-f7913ae06f22 +br + +# ╔═╡ d1638d96-e94b-11ea-2ff4-910e399f864d +md"##### Alan Edelman, David P. Sanders, Grant Sanderson, James Schloss" + +# ╔═╡ 0117246a-e94c-11ea-1a76-c981ce8e725d +md"##### & Philip the Corgi" + +# ╔═╡ 27060098-e8c9-11ea-2fe0-03b39b1ddc32 +md"""# Class outline + +### Data and computation + +- Module 1: Analyzing images + +- Module 2: Particles and ray tracing + +- Module 3: Epidemic spread + +- Module 4: Climate change +""" + +# ╔═╡ 4fc58814-e94b-11ea-339b-cb714a63f9b6 +md"## Tools + +- Julia programming language: + +- Pluto notebook environment +" + +# ╔═╡ f067d3b8-e8c8-11ea-20cb-474709ffa99a +md"""# Module 1: Images""" + +# ╔═╡ 37c1d012-ebc9-11ea-2dfe-8b86bb78f283 +4 + 4 + +# ╔═╡ a0a97214-e8d2-11ea-0f46-0bfaf016ab6d +md"""## Data takes many forms + +- Time series: + - Number of infections per day + - Stock price each minute + - A piece for violin broadcast over the radio +$(HTML("
")) + +- Video: + - The view from a window of a self-driving car + - A hurricane monitoring station +$(HTML("
")) + +- Images: + - Diseased versus healthy tissue in a scan + - Deep space via the Hubble telescope + - Can your social media account recognise your friends? +""" + +# ╔═╡ 1697a756-e93d-11ea-0b6e-c9c78d527993 +md"## Capture your own image!" + +# ╔═╡ af28faca-ebb7-11ea-130d-0f94bf9bd836 + + +# ╔═╡ ee1d1596-e94a-11ea-0fb4-cd05f62471d3 +md"##" + +# ╔═╡ 8ab9a978-e8c9-11ea-2476-f1ef4ba1b619 +md"""## What is an image?""" + +# ╔═╡ 38c54bfc-e8cb-11ea-3d52-0f02452f8ba1 +md"Albrecht Dürer:" + +# ╔═╡ 983f8270-e8c9-11ea-29d2-adeccb5a7ffc +# md"# begin +# using Images + +# download("https://i.stack.imgur.com/QQL8X.jpg", "durer.jpg") + +# load("durer.jpg") +# end + +md"![](https://i.stack.imgur.com/QQL8X.jpg)" + +# ╔═╡ 2fcaef88-e8ca-11ea-23f7-29c48580f43c +md"""## + +An image is: + +- A 2D representation of a 3D world + +- An approximation + +""" + +# ╔═╡ 7636c4b0-e8d1-11ea-2051-757a850a9d30 +begin + image_text = + md""" + ## What *is* an image, though? + + - A grid of coloured squares called **pixels** + + - A colour for each pair $(i, j)$ of indices + + - A **discretization** + + """ + + image_text +end + +# ╔═╡ bca22176-e8ca-11ea-2004-ebeb103116b5 +md""" +## How can we store an image in the computer? + +- Is it a 1D array (`Vector`)? + +- A 2D array (`Matrix`)? + +- A 3D array (`tensor`)? +""" + +# ╔═╡ 0ad91f1e-e8d2-11ea-2c18-93f66c906a8b +md"""## If in doubt: Ask Julia! + +- Let's use the `Images.jl` package to load an image and see what we get +""" + +# ╔═╡ de373816-ec79-11ea-2772-ebdca52246ac +begin + import Pkg + Pkg.activate(mktempdir()) +end + +# ╔═╡ 552129ae-ebca-11ea-1fa1-3f9fa00a2601 +begin + Pkg.add(["Images", "ImageIO", "ImageMagick"]) + using Images +end + +# ╔═╡ fbe11200-e938-11ea-12e9-6125c1b56b25 +begin + Pkg.add("PlutoUI") + using PlutoUI +end + +# ╔═╡ 54c1ba3c-e8d2-11ea-3564-bdaca8563738 +# defines a variable called `url` +# whose value is a string (written inside `"`): + +url = "https://i.imgur.com/VGPeJ6s.jpg" + +# ╔═╡ 6e0fefb6-e8d4-11ea-1f9b-e7a3db40df39 +philip_file = download(url, "philip.jpg") # download to a local file + +# ╔═╡ 9c359212-ec79-11ea-2d7e-0124dad5f127 +philip = load(philip_file) + +# ╔═╡ 7703b032-ebca-11ea-3074-0b80a077078e +philip + +# ╔═╡ 7eff3522-ebca-11ea-1a65-59e66a4e72ab +typeof(philip) + +# ╔═╡ c9cd6c04-ebca-11ea-0990-5fa19ff7ed97 +RGBX(0.9, 0.1, 0.1) + +# ╔═╡ 0d873d9c-e93b-11ea-2425-1bd79677fb97 +md"##" + +# ╔═╡ 6b09354a-ebb9-11ea-2d5a-3b75c5ae7aa9 + + +# ╔═╡ 2d6c434e-e93b-11ea-2678-3b9db4975089 +md"##" + +# ╔═╡ 2b14e93e-e93b-11ea-25f1-5f565f80e778 +typeof(philip) + +# ╔═╡ 0bdc6058-e8d5-11ea-1889-3f706cea7a1f +md"""## + +- According to Julia / Pluto, the variable `philip` *is* an image + +- Julia always returns output + +- The output can be displayed in a "rich" way + +$(HTML("
")) + +- Arthur C. Clarke: + +> Any sufficiently advanced technology is indistinguishable from magic. +""" + +# ╔═╡ e61db924-ebca-11ea-2f79-f9f1c121b7f5 +size(philip) + +# ╔═╡ ef60fcc4-ebca-11ea-3f69-155afffe8ea8 +philip + +# ╔═╡ fac550ec-ebca-11ea-337a-dbc16848c617 +philip[1:1000, 1:400] + +# ╔═╡ 42aa8cfe-e8d5-11ea-3cb9-c365b98e7a8c +md" +## How big is Philip? + +- He's pretty big: +" + +# ╔═╡ 4eea5710-e8d5-11ea-3978-af66ee2a137e +size(philip) + +# ╔═╡ 57b3a0c2-e8d5-11ea-15aa-8da4549f849b +md"- Which number is which?" + +# ╔═╡ 03a7c0fc-ebba-11ea-1c71-79d750c97b16 +philip + +# ╔═╡ e6fd68fa-e8d8-11ea-3dc4-274caceda222 +md"# So, what *is* an image?" + +# ╔═╡ 63a1d282-e8d5-11ea-0bba-b9cdd32a218b +typeof(philip) + +# ╔═╡ fc5e1af0-e8d8-11ea-1077-07216ff96d29 +md""" +- It's an `Array` + +- The `2` means that it has **2 dimensions** (a **matrix**) + +$(HTML("
")) + +- `RGBX{Normed{UInt8,8}}` is the type of object stored in the array + +- A Julia object representing a colour + +- RGB = Red, Green, Blue +""" + +# ╔═╡ c79dd836-e8e8-11ea-029d-57be9899979a +md"## Getting pieces of an image" + + + +# ╔═╡ ae260168-e932-11ea-38fd-4f2c6f43e21c +begin + (h, w) = size(philip) + head = philip[(h ÷ 2):h, (w ÷ 10): (9w ÷ 10)] + # `÷` is typed as \div -- integer division +end + +# ╔═╡ 47d1bc04-ebcb-11ea-3643-d1ba8dea57c8 +size(head) + +# ╔═╡ 72400458-ebcb-11ea-26b6-678ae1de8e23 +size(philip) + +# ╔═╡ f57ea7c2-e932-11ea-0d52-4112187bcb38 +md"## Manipulating matrices + +- An image is just a matrix, so we can manipulate *matrices* to manipulate the *image* +" + +# ╔═╡ 740ed2e2-e933-11ea-236c-f3c3f09d0f8b +[head head] + +# ╔═╡ 6128a5ba-e93b-11ea-03f5-f170c7b90b25 +md"##" + +# ╔═╡ 78eafe4e-e933-11ea-3539-c13feb894ef6 +[ + head reverse(head, dims=2) + reverse(head, dims=1) reverse(reverse(head, dims=1), dims=2) +] + +# ╔═╡ bf3f9050-e933-11ea-0df7-e5dcff6bb3ee +md"## Manipulating an image + +- How can we get inside the image and change it? + +- There are two possibilities: + + - **Modify** (**mutate**) numbers inside the array -- useful to change a small piece + + - Create a new **copy** of the array -- useful to alter everything together +" + +# ╔═╡ 212e1f12-e934-11ea-2f35-51c7a6c8dff1 +md"## Painting a piece of an image + +- Let's paint a corner red + +- We'll copy the image first so we don't destroy the original +" + +# ╔═╡ 117a98c0-e936-11ea-3aac-8f66337cea68 +new_phil = copy(head) + +# ╔═╡ 8004d076-e93b-11ea-29cc-a1bfcc75e87f +md"##" + +# ╔═╡ 3ac63296-e936-11ea-2144-f94bdbd60eaf +red = RGB(1, 0, 0) + +# ╔═╡ 3e3f841a-e936-11ea-0a81-1b95fe0faa83 +for i in 1:100 + for j in 1:300 + new_phil[i, j] = red + end +end + +# ╔═╡ 5978db50-e936-11ea-3145-059a51be2281 +md"Note that `for` loops *do not return anything* (or, rather, they return `nothing`)" + +# ╔═╡ 21638b14-ebcc-11ea-1761-bbd2f4306a96 +new_phil + +# ╔═╡ 70cb0e36-e936-11ea-3ade-49fde77cb696 +md"""## Element-wise operations: "Broadcasting" + +- Julia provides powerful technology for operating element by element: **broadcasting** + +- Adding "`.`" applies an operation element by element +""" + +# ╔═╡ b3ea975e-e936-11ea-067d-81339575a3cb +begin + new_phil2 = copy(new_phil) + new_phil2[100:200, 1:100] .= RGB(0, 1, 0) + new_phil2 +end + +# ╔═╡ 918a0762-e93b-11ea-1115-71dbfdb03f27 +md"##" + +# ╔═╡ daabe66c-e937-11ea-3bc3-d77f2bce406c +new_phil2 + +# ╔═╡ 095ced62-e938-11ea-1169-939dc7136fd0 +md"## Modifying the whole image at once + +- We can use the same trick to modify the whole image at once + +- Let's **redify** the image + +- We define a **function** that turns a colour into just its red component +" + +# ╔═╡ 31f3605a-e938-11ea-3a6d-29a185bbee31 +function redify(c) + return RGB(c.r, 0, 0) +end + +# ╔═╡ 2744a556-e94f-11ea-2434-d53c24e59285 +begin + color = RGB(0.9, 0.7, 0.2) + + [color, redify(color)] +end + +# ╔═╡ 98412a36-e93b-11ea-1954-f1c105c6ed4a +md"##" + +# ╔═╡ 3c32efde-e938-11ea-1ae4-5d88290f5311 +redify.(philip) + +# ╔═╡ 4b26e4e6-e938-11ea-2635-6d4fc15e13b7 +md"## Transforming an image + +- The main goal of this week will be to transfrom images in more interesting ways + +- First let's **decimate** poor Phil +" + + + +# ╔═╡ c12e0928-e93b-11ea-0922-2b590a99ee89 +md"##" + +# ╔═╡ ff5dc538-e938-11ea-058f-693d6b016640 +md"## Experiments come alive with interaction + +- We start to get a feel for things when we can **experiment**! +" + +# ╔═╡ fa24f4a8-e93b-11ea-06bd-25c9672166d6 +md"##" + +# ╔═╡ 15ce202e-e939-11ea-2387-93be0ec4cf1f +@bind repeat_count Slider(1:10, show_value=true) + +# ╔═╡ bf2167a4-e93d-11ea-03b2-cdd24b459ba9 +md"## Summary + +- Images are readily-accessible data about the world + +- We want to process them to extract information + +- Relatively simple mathematical operations can transform images in useful ways +" + +# ╔═╡ 58184d88-e939-11ea-2fc8-73b3476ebe92 +expand(image, ratio=5) = kron(image, ones(ratio, ratio)) + +# ╔═╡ 2dd09f16-e93a-11ea-2cdc-13f558e3391d +extract_red(c) = c.r + +# ╔═╡ df1b7996-e93b-11ea-1a3a-81b4ec520679 +decimate(image, ratio=5) = image[1:ratio:end, 1:ratio:end] + +# ╔═╡ 41fa85c0-e939-11ea-1ad8-79805a2083bb +poor_phil = decimate(head, 5) + +# ╔═╡ cd5721d0-ede6-11ea-0918-1992c69bccc6 +repeat(poor_phil, repeat_count, repeat_count) + +# ╔═╡ b8daeea0-ec79-11ea-34b5-3f13e8a56a42 +md"# Appendix" + +# ╔═╡ bf1bb2c8-ec79-11ea-0671-3ffb34828f3c +md"## Package environment" + +# ╔═╡ 69e3aa82-e93c-11ea-23fe-c1103d989cba +md"## Camera input" + +# ╔═╡ 739c3bb6-e93c-11ea-127b-efb6a8ab9379 +function camera_input(;max_size=200, default_url="https://i.imgur.com/SUmi94P.png") +""" + + + +
+
+ + +
+ +
+ +
+
+ +
+ + Enable webcam + +
+ + +
+""" |> HTML +end + + +# ╔═╡ 9529bc40-e93c-11ea-2587-3186e0978476 +@bind raw_camera_data camera_input(;max_size=2000) + +# ╔═╡ 832ebd1a-e93c-11ea-1d18-d784f3184ebe + +function process_raw_camera_data(raw_camera_data) + # the raw image data is a long byte array, we need to transform it into something + # more "Julian" - something with more _structure_. + + # The encoding of the raw byte stream is: + # every 4 bytes is a single pixel + # every pixel has 4 values: Red, Green, Blue, Alpha + # (we ignore alpha for this notebook) + + # So to get the red values for each pixel, we take every 4th value, starting at + # the 1st: + reds_flat = UInt8.(raw_camera_data["data"][1:4:end]) + greens_flat = UInt8.(raw_camera_data["data"][2:4:end]) + blues_flat = UInt8.(raw_camera_data["data"][3:4:end]) + + # but these are still 1-dimensional arrays, nicknamed 'flat' arrays + # We will 'reshape' this into 2D arrays: + + width = raw_camera_data["width"] + height = raw_camera_data["height"] + + # shuffle and flip to get it in the right shape + reds = reshape(reds_flat, (width, height))' / 255.0 + greens = reshape(greens_flat, (width, height))' / 255.0 + blues = reshape(blues_flat, (width, height))' / 255.0 + + # we have our 2D array for each color + # Let's create a single 2D array, where each value contains the R, G and B value of + # that pixel + + RGB.(reds, greens, blues) +end + +# ╔═╡ 9a843af8-e93c-11ea-311b-1bc6d5b58492 +grant = decimate(process_raw_camera_data(raw_camera_data), 2) + +# ╔═╡ 6aa73286-ede7-11ea-232b-63e052222ecd +[ + grant grant[:,end:-1:1] + grant[end:-1:1,:] grant[end:-1:1,end:-1:1] +] + +# ╔═╡ Cell order: +# ╟─a50b5f48-e8d5-11ea-1f05-a3741b5d15ba +# ╟─8a6fed4c-e94b-11ea-1113-d56f56fb293b +# ╟─dc53f316-e8c8-11ea-150f-1374dbce114a +# ╟─c3f43d66-e94b-11ea-02bd-23cfeb878ff1 +# ╟─c6c77738-e94b-11ea-22f5-1dce3dbcc3ca +# ╟─cf80793a-e94b-11ea-0120-f7913ae06f22 +# ╟─d1638d96-e94b-11ea-2ff4-910e399f864d +# ╟─0117246a-e94c-11ea-1a76-c981ce8e725d +# ╟─27060098-e8c9-11ea-2fe0-03b39b1ddc32 +# ╟─4fc58814-e94b-11ea-339b-cb714a63f9b6 +# ╟─f067d3b8-e8c8-11ea-20cb-474709ffa99a +# ╠═37c1d012-ebc9-11ea-2dfe-8b86bb78f283 +# ╟─a0a97214-e8d2-11ea-0f46-0bfaf016ab6d +# ╟─1697a756-e93d-11ea-0b6e-c9c78d527993 +# ╟─af28faca-ebb7-11ea-130d-0f94bf9bd836 +# ╠═9529bc40-e93c-11ea-2587-3186e0978476 +# ╟─ee1d1596-e94a-11ea-0fb4-cd05f62471d3 +# ╠═6aa73286-ede7-11ea-232b-63e052222ecd +# ╠═9a843af8-e93c-11ea-311b-1bc6d5b58492 +# ╟─8ab9a978-e8c9-11ea-2476-f1ef4ba1b619 +# ╟─38c54bfc-e8cb-11ea-3d52-0f02452f8ba1 +# ╟─983f8270-e8c9-11ea-29d2-adeccb5a7ffc +# ╟─2fcaef88-e8ca-11ea-23f7-29c48580f43c +# ╟─7636c4b0-e8d1-11ea-2051-757a850a9d30 +# ╟─bca22176-e8ca-11ea-2004-ebeb103116b5 +# ╟─0ad91f1e-e8d2-11ea-2c18-93f66c906a8b +# ╠═de373816-ec79-11ea-2772-ebdca52246ac +# ╠═552129ae-ebca-11ea-1fa1-3f9fa00a2601 +# ╠═54c1ba3c-e8d2-11ea-3564-bdaca8563738 +# ╠═6e0fefb6-e8d4-11ea-1f9b-e7a3db40df39 +# ╠═9c359212-ec79-11ea-2d7e-0124dad5f127 +# ╠═7703b032-ebca-11ea-3074-0b80a077078e +# ╠═7eff3522-ebca-11ea-1a65-59e66a4e72ab +# ╠═c9cd6c04-ebca-11ea-0990-5fa19ff7ed97 +# ╟─0d873d9c-e93b-11ea-2425-1bd79677fb97 +# ╠═6b09354a-ebb9-11ea-2d5a-3b75c5ae7aa9 +# ╟─2d6c434e-e93b-11ea-2678-3b9db4975089 +# ╠═2b14e93e-e93b-11ea-25f1-5f565f80e778 +# ╟─0bdc6058-e8d5-11ea-1889-3f706cea7a1f +# ╠═e61db924-ebca-11ea-2f79-f9f1c121b7f5 +# ╠═ef60fcc4-ebca-11ea-3f69-155afffe8ea8 +# ╠═fac550ec-ebca-11ea-337a-dbc16848c617 +# ╟─42aa8cfe-e8d5-11ea-3cb9-c365b98e7a8c +# ╠═4eea5710-e8d5-11ea-3978-af66ee2a137e +# ╟─57b3a0c2-e8d5-11ea-15aa-8da4549f849b +# ╠═03a7c0fc-ebba-11ea-1c71-79d750c97b16 +# ╟─e6fd68fa-e8d8-11ea-3dc4-274caceda222 +# ╠═63a1d282-e8d5-11ea-0bba-b9cdd32a218b +# ╟─fc5e1af0-e8d8-11ea-1077-07216ff96d29 +# ╟─c79dd836-e8e8-11ea-029d-57be9899979a +# ╠═ae260168-e932-11ea-38fd-4f2c6f43e21c +# ╠═47d1bc04-ebcb-11ea-3643-d1ba8dea57c8 +# ╠═72400458-ebcb-11ea-26b6-678ae1de8e23 +# ╟─f57ea7c2-e932-11ea-0d52-4112187bcb38 +# ╠═740ed2e2-e933-11ea-236c-f3c3f09d0f8b +# ╟─6128a5ba-e93b-11ea-03f5-f170c7b90b25 +# ╠═78eafe4e-e933-11ea-3539-c13feb894ef6 +# ╟─bf3f9050-e933-11ea-0df7-e5dcff6bb3ee +# ╟─212e1f12-e934-11ea-2f35-51c7a6c8dff1 +# ╠═117a98c0-e936-11ea-3aac-8f66337cea68 +# ╟─8004d076-e93b-11ea-29cc-a1bfcc75e87f +# ╠═3ac63296-e936-11ea-2144-f94bdbd60eaf +# ╠═3e3f841a-e936-11ea-0a81-1b95fe0faa83 +# ╟─5978db50-e936-11ea-3145-059a51be2281 +# ╠═21638b14-ebcc-11ea-1761-bbd2f4306a96 +# ╟─70cb0e36-e936-11ea-3ade-49fde77cb696 +# ╠═b3ea975e-e936-11ea-067d-81339575a3cb +# ╟─918a0762-e93b-11ea-1115-71dbfdb03f27 +# ╠═daabe66c-e937-11ea-3bc3-d77f2bce406c +# ╟─095ced62-e938-11ea-1169-939dc7136fd0 +# ╠═31f3605a-e938-11ea-3a6d-29a185bbee31 +# ╠═2744a556-e94f-11ea-2434-d53c24e59285 +# ╟─98412a36-e93b-11ea-1954-f1c105c6ed4a +# ╠═3c32efde-e938-11ea-1ae4-5d88290f5311 +# ╟─4b26e4e6-e938-11ea-2635-6d4fc15e13b7 +# ╠═41fa85c0-e939-11ea-1ad8-79805a2083bb +# ╟─c12e0928-e93b-11ea-0922-2b590a99ee89 +# ╟─ff5dc538-e938-11ea-058f-693d6b016640 +# ╠═fbe11200-e938-11ea-12e9-6125c1b56b25 +# ╟─fa24f4a8-e93b-11ea-06bd-25c9672166d6 +# ╠═15ce202e-e939-11ea-2387-93be0ec4cf1f +# ╠═cd5721d0-ede6-11ea-0918-1992c69bccc6 +# ╟─bf2167a4-e93d-11ea-03b2-cdd24b459ba9 +# ╟─5e688928-e939-11ea-0e16-fbc80af390ab +# ╟─58184d88-e939-11ea-2fc8-73b3476ebe92 +# ╟─2dd09f16-e93a-11ea-2cdc-13f558e3391d +# ╟─df1b7996-e93b-11ea-1a3a-81b4ec520679 +# ╟─b8daeea0-ec79-11ea-34b5-3f13e8a56a42 +# ╟─bf1bb2c8-ec79-11ea-0671-3ffb34828f3c +# ╟─69e3aa82-e93c-11ea-23fe-c1103d989cba +# ╟─739c3bb6-e93c-11ea-127b-efb6a8ab9379 +# ╟─832ebd1a-e93c-11ea-1d18-d784f3184ebe diff --git a/lecture_notebooks/week1/02-convolutions.jl b/lecture_notebooks/week1/02-convolutions.jl new file mode 100644 index 000000000..904b74e03 --- /dev/null +++ b/lecture_notebooks/week1/02-convolutions.jl @@ -0,0 +1,183 @@ +### A Pluto.jl notebook ### +# v0.11.10 + +using Markdown +using InteractiveUtils + +# ╔═╡ 1ca14906-eca1-11ea-23f6-472ed97d75aa +begin + using Statistics + using Images + using FFTW + using Plots + using DSP + using ImageFiltering + using PlutoUI +end + +# ╔═╡ 42ed52ba-ed34-11ea-26b5-05379824cbc0 +md""" +# Convolutions with various kernels +""" + +# ╔═╡ 4c13d558-ee15-11ea-2ed9-c5fb90d93881 +kernel = Kernel.gaussian((2, 2)) + +# ╔═╡ 673f7ac0-ee16-11ea-35d0-cf3da430b843 +sum(kernel) + +# ╔═╡ 9c90feb8-ec79-11ea-2870-31be5cedff43 +md""" +# Function definitions +""" + +# ╔═╡ 84e6a57c-edfc-11ea-01a0-157f1df77518 +function show_colored_kernel(kernel) + to_rgb(x) = RGB(max(-x, 0), max(x, 0), 0) + to_rgb.(kernel) / maximum(abs.(kernel)) +end + +# ╔═╡ 9424b46a-ee16-11ea-1819-f17ce53e9997 +show_colored_kernel(kernel) + +# ╔═╡ 68f2afec-eca2-11ea-0758-2f22c7afdd94 +function decimate(arr, ratio=5) + return arr[1:ratio:end, 1:ratio:end] +end + +# ╔═╡ aa3b9bd6-ed35-11ea-1bdc-33861bdbd29a +function shrink_image(image, ratio=5) + (height, width) = size(image) + new_height = height ÷ ratio - 1 + new_width = width ÷ ratio - 1 + list = [ + mean(image[ + ratio * i:ratio * (i + 1), + ratio * j:ratio * (j + 1), + ]) + for j in 1:new_width + for i in 1:new_height + ] + reshape(list, new_height, new_width) +end + +# ╔═╡ 6d39fea8-ed3c-11ea-3d7c-3f62ca91ce23 +begin + large_image = load("tom_in_bowtie.jpg") + image = shrink_image(large_image, 7) +end + +# ╔═╡ 2f446dcc-ee15-11ea-0e78-931ff507b5e5 +size(image) + +# ╔═╡ 14d5b144-ee18-11ea-0080-c187f068c168 +image + +# ╔═╡ 160eb236-eca1-11ea-1dbe-47ad61cc9397 +function rgb_to_float(color) + return mean([color.r, color.g, color.b]) +end + +# ╔═╡ fa3c5074-eca0-11ea-2d2d-bb6bcdeb834c +function fourier_spectrum_magnitudes(img) + grey_values = rgb_to_float.(img) + spectrum = fftshift(fft(grey_values)) + return abs.(spectrum) +end + +# ╔═╡ e40d807e-ed3a-11ea-2340-7f98bd5d04a2 +function plot_1d_fourier_spectrum(img, dims=1) + spectrum = fourier_spectrum_magnitudes(img) + plot(centered(mean(spectrum, dims=1)[1:end])) +end + +# ╔═╡ beb6b4b0-eca1-11ea-1ece-e3c9931c9c13 +function heatmap_2d_fourier_spectrum(img) + heatmap(log.(fourier_spectrum_magnitudes(img))) +end + +# ╔═╡ 18045956-ee18-11ea-3e34-612133e2e39c +heatmap_2d_fourier_spectrum(image) + +# ╔═╡ 58f4754e-ed31-11ea-0464-5bfccf397966 +function clamp_at_boundary(M, i, j) + return M[ + clamp(i, 1, size(M, 1)), + clamp(j, 1, size(M, 2)), + ] +end + +# ╔═╡ f28af11e-ed31-11ea-2b46-7dff147ccb48 +function rolloff_boundary(M, i, j) + if (1 ≤ i ≤ size(M, 1)) && (1 ≤ j ≤ size(M, 2)) + return M[i, j] + else + return 0 * M[1, 1] + end +end + +# ╔═╡ 572cf620-ecb2-11ea-0019-21666a30d9d2 +function convolve(M, kernel, M_index_func=clamp_at_boundary) + height = size(kernel, 1) + width = size(kernel, 2) + + half_height = height ÷ 2 + half_width = width ÷ 2 + + new_image = similar(M) + + # (i, j) loop over the original image + @inbounds for i in 1:size(M, 1) + for j in 1:size(M, 2) + # (k, l) loop over the neighbouring pixels + new_image[i, j] = sum([ + kernel[k, l] * M_index_func(M, i - k, j - l) + for k in -half_height:-half_height + height - 1 + for l in -half_width:-half_width + width - 1 + ]) + end + end + + return new_image +end + +# ╔═╡ 5afed4ea-ee18-11ea-1aa4-abca154b3793 +conv_image = convolve(image, kernel) + +# ╔═╡ 6340c0f8-ee18-11ea-1765-45f4bc140670 +heatmap_2d_fourier_spectrum(conv_image) + +# ╔═╡ 587092e4-ecb2-11ea-18fc-ad5e9778fb30 +box_blur(n) = centered(ones(n, n) ./ (n^2)) + +# ╔═╡ 991cb9b8-ecb8-11ea-3f80-5d95b2200259 +function gauss_blur(n, sigma=0.25) + kern = gaussian((n, n), sigma) + return kern / sum(kern) +end + +# ╔═╡ Cell order: +# ╟─42ed52ba-ed34-11ea-26b5-05379824cbc0 +# ╠═6d39fea8-ed3c-11ea-3d7c-3f62ca91ce23 +# ╠═2f446dcc-ee15-11ea-0e78-931ff507b5e5 +# ╠═4c13d558-ee15-11ea-2ed9-c5fb90d93881 +# ╠═9424b46a-ee16-11ea-1819-f17ce53e9997 +# ╠═673f7ac0-ee16-11ea-35d0-cf3da430b843 +# ╠═14d5b144-ee18-11ea-0080-c187f068c168 +# ╠═18045956-ee18-11ea-3e34-612133e2e39c +# ╠═5afed4ea-ee18-11ea-1aa4-abca154b3793 +# ╠═6340c0f8-ee18-11ea-1765-45f4bc140670 +# ╟─9c90feb8-ec79-11ea-2870-31be5cedff43 +# ╟─1ca14906-eca1-11ea-23f6-472ed97d75aa +# ╟─84e6a57c-edfc-11ea-01a0-157f1df77518 +# ╟─68f2afec-eca2-11ea-0758-2f22c7afdd94 +# ╟─aa3b9bd6-ed35-11ea-1bdc-33861bdbd29a +# ╟─160eb236-eca1-11ea-1dbe-47ad61cc9397 +# ╟─fa3c5074-eca0-11ea-2d2d-bb6bcdeb834c +# ╟─e40d807e-ed3a-11ea-2340-7f98bd5d04a2 +# ╟─beb6b4b0-eca1-11ea-1ece-e3c9931c9c13 +# ╟─58f4754e-ed31-11ea-0464-5bfccf397966 +# ╟─f28af11e-ed31-11ea-2b46-7dff147ccb48 +# ╟─572cf620-ecb2-11ea-0019-21666a30d9d2 +# ╟─587092e4-ecb2-11ea-18fc-ad5e9778fb30 +# ╟─991cb9b8-ecb8-11ea-3f80-5d95b2200259 diff --git a/lecture_notebooks/week2/01-seam_carving.jl b/lecture_notebooks/week2/01-seam_carving.jl new file mode 100644 index 000000000..a6e8bee24 --- /dev/null +++ b/lecture_notebooks/week2/01-seam_carving.jl @@ -0,0 +1,485 @@ +### A Pluto.jl notebook ### +# v0.11.13 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ 877df834-f078-11ea-303b-e98273ef98a4 +begin + import Pkg + Pkg.activate(mktempdir()) +end + +# ╔═╡ 0316b94c-eef6-11ea-19bc-dbc959901bb5 +begin + # Poor man's Project.toml + Pkg.add(["Images", "ImageMagick", "PlutoUI", "ImageFiltering"]) + + using Images + using PlutoUI + using ImageFiltering + + # these are "Standard Libraries" - they are included in every environment + using Statistics + using LinearAlgebra +end + +# ╔═╡ cb335074-eef7-11ea-24e8-c39a325166a1 +md""" +# Seam Carving + +1. We use convolution with Sobel filters for "edge detection". +2. We use that to write an algorithm that removes "uninteresting" + bits of an image in order to shrink it. +""" + +# ╔═╡ bf750d0e-f35c-11ea-0245-713584583fcf +md"Select an image below!" + +# ╔═╡ 90f44be8-f35c-11ea-2fc6-c361fd4966af +@bind image_url Select([ +"https://cdn.shortpixel.ai/spai/w_1086+q_lossy+ret_img+to_webp/https://wisetoast.com/wp-content/uploads/2015/10/The-Persistence-of-Memory-salvador-deli-painting.jpg", + +"https://upload.wikimedia.org/wikipedia/commons/thumb/1/17/Gustave_Caillebotte_-_Paris_Street%3B_Rainy_Day_-_Google_Art_Project.jpg/1014px-Gustave_Caillebotte_-_Paris_Street%3B_Rainy_Day_-_Google_Art_Project.jpg", + +"https://upload.wikimedia.org/wikipedia/commons/thumb/1/17/Gustave_Caillebotte_-_Paris_Street%3B_Rainy_Day_-_Google_Art_Project.jpg/1014px-Gustave_Caillebotte_-_Paris_Street%3B_Rainy_Day_-_Google_Art_Project.jpg", + +"https://upload.wikimedia.org/wikipedia/commons/thumb/c/cc/Grant_Wood_-_American_Gothic_-_Google_Art_Project.jpg/480px-Grant_Wood_-_American_Gothic_-_Google_Art_Project.jpg", + "https://cdn.shortpixel.ai/spai/w_1086+q_lossy+ret_img+to_webp/https://wisetoast.com/wp-content/uploads/2015/10/The-Persistence-of-Memory-salvador-deli-painting.jpg", + +"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg/640px-A_Sunday_on_La_Grande_Jatte%2C_Georges_Seurat%2C_1884.jpg", + +"https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg/758px-Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg", + "https://web.mit.edu/facilities/photos/construction/Projects/stata/1_large.jpg", + ]) + +# ╔═╡ d2ae6dd2-eef9-11ea-02df-255ec3b46a36 +img = load(download(image_url)) + +# ╔═╡ 8ded023c-f35c-11ea-317c-11f5d1b67998 + + +# ╔═╡ 0b6010a8-eef6-11ea-3ad6-c1f10e30a413 +# arbitrarily choose the brightness of a pixel as mean of rgb +# brightness(c::AbstractRGB) = mean((c.r, c.g, c.b)) + +# Use a weighted sum of rgb giving more weight to colors we perceive as 'brighter' +# Based on https://www.tutorialspoint.com/dip/grayscale_to_rgb_conversion.htm +brightness(c::AbstractRGB) = 0.3 * c.r + 0.59 * c.g + 0.11 * c.b + +# ╔═╡ fc1c43cc-eef6-11ea-0fc4-a90ac4336964 +Gray.(brightness.(img)) + +# ╔═╡ 82c0d0c8-efec-11ea-1bb9-83134ecb877e +md""" +# Edge detection filter + +(Spoiler alert!) Here, we use the Sobel edge detection filter we created in Homework 1. + +```math +\begin{align} + +G_x &= \begin{bmatrix} +1 & 0 & -1 \\ +2 & 0 & -2 \\ +1 & 0 & -1 \\ +\end{bmatrix}*A\\ +G_y &= \begin{bmatrix} +1 & 2 & 1 \\ +0 & 0 & 0 \\ +-1 & -2 & -1 \\ +\end{bmatrix}*A +\end{align} +``` +Here $A$ is the array corresponding to your image. +We can think of these as derivatives in the $x$ and $y$ directions. + +Then we combine them by finding the magnitude of the **gradient** (in the sense of multivariate calculus) by defining + +$$G_\text{total} = \sqrt{G_x^2 + G_y^2}.$$ +""" + +# ╔═╡ da726954-eff0-11ea-21d4-a7f4ae4a6b09 +Sy, Sx = Kernel.sobel() + +# ╔═╡ abf6944e-f066-11ea-18e2-0b92606dab85 +(collect(Int.(8 .* Sy)), collect(Int.(8 .* Sx))) + +# ╔═╡ ac8d6902-f069-11ea-0f1d-9b0fa706d769 +md""" +- blue shows positive values +- red shows negative values + $G_x \hspace{180pt} G_y$ +""" + +# ╔═╡ 172c7612-efee-11ea-077a-5d5c6e2505a4 +function shrink_image(image, ratio=5) + (height, width) = size(image) + new_height = height ÷ ratio - 1 + new_width = width ÷ ratio - 1 + list = [ + mean(image[ + ratio * i:ratio * (i + 1), + ratio * j:ratio * (j + 1), + ]) + for j in 1:new_width + for i in 1:new_height + ] + reshape(list, new_height, new_width) +end + +# ╔═╡ fcf46120-efec-11ea-06b9-45f470899cb2 +function convolve(M, kernel) + height, width = size(kernel) + + half_height = height ÷ 2 + half_width = width ÷ 2 + + new_image = similar(M) + + # (i, j) loop over the original image + m, n = size(M) + @inbounds for i in 1:m + for j in 1:n + # (k, l) loop over the neighbouring pixels + accumulator = 0 * M[1, 1] + for k in -half_height:-half_height + height - 1 + for l in -half_width:-half_width + width - 1 + Mi = i - k + Mj = j - l + # First index into M + if Mi < 1 + Mi = 1 + elseif Mi > m + Mi = m + end + # Second index into M + if Mj < 1 + Mj = 1 + elseif Mj > n + Mj = n + end + + accumulator += kernel[k, l] * M[Mi, Mj] + end + end + new_image[i, j] = accumulator + end + end + + return new_image +end + +# ╔═╡ 6f7bd064-eff4-11ea-0260-f71aa7f4f0e5 +function edgeness(img) + Sy, Sx = Kernel.sobel() + b = brightness.(img) + + ∇y = convolve(b, Sy) + ∇x = convolve(b, Sx) + + sqrt.(∇x.^2 + ∇y.^2) +end + +# ╔═╡ dec62538-efee-11ea-1e03-0b801e61e91c + function show_colored_array(array) + pos_color = RGB(0.36, 0.82, 0.8) + neg_color = RGB(0.99, 0.18, 0.13) + to_rgb(x) = max(x, 0) * pos_color + max(-x, 0) * neg_color + to_rgb.(array) / maximum(abs.(array)) + end + +# ╔═╡ da39c824-eff0-11ea-375b-1b6c6e186182 +# Sx +# collect(Int.(8 .* Sx)) +show_colored_array(Sx) + +# ╔═╡ 074a58be-f146-11ea-382c-b7ae6c44bf75 +# Sy +# collect(Int.(8 .* Sy)) +show_colored_array(Sy) + +# ╔═╡ f8283a0e-eff4-11ea-23d3-9f1ced1bafb4 +md""" + +## Seam carving idea + +The idea of seam carving is to find a path from the top of the image to the bottom of the image where the path minimizes the edgness. + +In other words, this path **minimizes the number of edges it crosses** +""" + +# ╔═╡ 025e2c94-eefb-11ea-12cb-f56f34886334 +md""" + +At every step in going down, the path is allowed to go south west, south or south east. We want to find a seam with the minimum possible sum of energies. + +We start by writing a `least_edgy` function which given a matrix of energies, returns +a matrix of minimum possible energy starting from that pixel going up to a pixel in the bottom most row. +""" + +# ╔═╡ acc1ee8c-eef9-11ea-01ac-9b9e9c4167b3 +# e[x,y] +# ↙ ↓ ↘ <--pick the next path which gives the least overall energy +# e[x-1,y+1] e[x,y+1] e[x+1,y+1] +# +# Basic Comp: e[x,y] += min( e[x-1,y+1],e[x,y],e[x+1,y]) +# dirs records which one from (-1==SW,0==S,1==SE) + +function least_edgy(E) + least_E = zeros(size(E)) + dirs = zeros(Int, size(E)) + least_E[end, :] .= E[end, :] # the minimum energy on the last row is the energy + # itself + + m, n = size(E) + # Go from the last row up, finding the minimum energy + for i in m-1:-1:1 + for j in 1:n + j1, j2 = max(1, j-1), min(j+1, n) + e, dir = findmin(least_E[i+1, j1:j2]) + least_E[i,j] += e + least_E[i,j] += E[i,j] + dirs[i, j] = (-1,0,1)[dir + (j==1)] + end + end + least_E, dirs +end + +# ╔═╡ 8b204a2a-eff6-11ea-25b0-13f230037ee1 +# The bright areas are screaming "AVOID ME!!!" +least_e, dirs = least_edgy(edgeness(img)) + +# ╔═╡ 84d3afe4-eefe-11ea-1e31-bf3b2af4aecd +show_colored_array(least_e) + +# ╔═╡ b507480a-ef01-11ea-21c4-63d19fac19ab +# direction the path should take at every pixel. +reduce((x,y)->x*y*"\n", + reduce(*, getindex.(([" ", "↙", "↓", "↘"],), dirs[1:25, 1:60].+3), dims=2, init=""), init="") |> Text + +# ╔═╡ 7d8b20a2-ef03-11ea-1c9e-fdf49a397619 +md"## Remove seams" + +# ╔═╡ f690b06a-ef31-11ea-003b-4f2b2f82a9c3 +md""" +Compressing an image horizontally involves a number of seams of lowest energy successively. +""" + +# ╔═╡ 977b6b98-ef03-11ea-0176-551fc29729ab +function get_seam_at(dirs, j) + m = size(dirs, 1) + js = fill(0, m) + js[1] = j + for i=2:m + js[i] = js[i-1] + dirs[i-1, js[i-1]] + end + tuple.(1:m, js) +end + +# ╔═╡ 9abbb158-ef03-11ea-39df-a3e8aa792c50 +get_seam_at(dirs, 2) + +# ╔═╡ 14f72976-ef05-11ea-2ad5-9f0914f9cf58 +function mark_path(img, path) + img′ = copy(img) + m = size(img, 2) + for (i, j) in path + # To make it easier to see, we'll color not just + # the pixels of the seam, but also those adjacent to it + for j′ in j-1:j+1 + img′[i, clamp(j′, 1, m)] = RGB(1,0,1) + end + end + img′ +end + +# ╔═╡ cf9a9124-ef04-11ea-14a4-abf930edc7cc +@bind start_column Slider(1:size(img, 2)) + +# ╔═╡ 772a4d68-ef04-11ea-366a-f7ae9e1634f6 +path = get_seam_at(dirs, start_column) + +# ╔═╡ 081a98cc-f06e-11ea-3664-7ba51d4fd153 +function pencil(X) + f(x) = RGB(1-x,1-x,1-x) + map(f, X ./ maximum(X)) +end + +# ╔═╡ 237647e8-f06d-11ea-3c7e-2da57e08bebc +e = edgeness(img); + +# ╔═╡ 4f23bc54-ef0f-11ea-06a9-35ca3ece421e +function rm_path(img, path) + img′ = img[:, 1:end-1] # one less column + for (i, j) in path + img′[i, 1:j-1] .= img[i, 1:j-1] + img′[i, j:end] .= img[i, j+1:end] + end + img′ +end + +# ╔═╡ b401f398-ef0f-11ea-38fe-012b7bc8a4fa +function shrink_n(img, n) + imgs = [] + marked_imgs = [] + + e = edgeness(img) + for i=1:n + least_E, dirs = least_edgy(e) + _, min_j = findmin(@view least_E[1, :]) + seam = get_seam_at(dirs, min_j) + img = rm_path(img, seam) + # Recompute the energy for the new image + # Note, this currently involves rerunning the convolution + # on the whole image, but in principle the only values that + # need recomputation are those adjacent to the seam, so there + # is room for a meanintful speedup here. +# e = edgeness(img) + e = rm_path(e, seam) + + push!(imgs, img) + push!(marked_imgs, mark_path(img, seam)) + end + imgs, marked_imgs +end + +# ╔═╡ b1b6b7fc-f153-11ea-224a-2578e8298775 +n_examples = min(200, size(img, 2)) + +# ╔═╡ 2eb459d4-ef36-11ea-1f74-b53ffec7a1ed +# returns two vectors of n successively smaller images +# The second images have markings where the seam is cut out +carved, marked_carved = shrink_n(img, n_examples); + +# ╔═╡ 7038abe4-ef36-11ea-11a5-75e57ab51032 +@bind n Slider(1:length(carved)) + +# ╔═╡ 2d6c6820-ef2d-11ea-1704-49bb5188cfcc +md"shrunk by $n:" + +# ╔═╡ 1fd26a60-f089-11ea-1f56-bb6eba7d9651 +function hbox(x, y, gap=16; sy=size(y), sx=size(x)) + w,h = (max(sx[1], sy[1]), + gap + sx[2] + sy[2]) + + slate = fill(RGB(1,1,1), w,h) + slate[1:size(x,1), 1:size(x,2)] .= RGB.(x) + slate[1:size(y,1), size(x,2) + gap .+ (1:size(y,2))] .= RGB.(y) + slate +end + +# ╔═╡ 44192a40-eff2-11ea-0ec7-05cdadb0c29a +begin + img_brightness = brightness.(img) + ∇x = convolve(img_brightness, Sx) + ∇y = convolve(img_brightness, Sy) + hbox(show_colored_array(∇x), show_colored_array(∇y)) +end + +# ╔═╡ d6a268c0-eff4-11ea-2c9e-bfef19c7f540 +begin + edged = edgeness(img) + # hbox(img, pencil(edged)) + hbox(img, Gray.(edgeness(img)) / maximum(abs.(edged))) +end + +# ╔═╡ 552fb92e-ef05-11ea-0a79-dd7a6760089a +hbox(mark_path(img, path), mark_path(show_colored_array(least_e), path)) + +# ╔═╡ dfd03c4e-f06c-11ea-1e2a-89233a675138 +let + hbox(mark_path(img, path), mark_path(pencil(e), path)); +end + +# ╔═╡ ca4a87e8-eff8-11ea-3d57-01dfa34ff723 +let + # least energy path of them all: + _, k = findmin(least_e[1, :]) + path = get_seam_at(dirs, k) + hbox( + mark_path(img, path), + mark_path(show_colored_array(least_e), path) + ) +end + +# ╔═╡ fa6a2152-ef0f-11ea-0e67-0d1a6599e779 +hbox(img, marked_carved[n], sy=size(img)) + +# ╔═╡ 71b16dbe-f08b-11ea-2343-5f1583074029 +vbox(x,y, gap=16) = hbox(x', y')' + +# ╔═╡ ddac52ea-f148-11ea-2860-21cff4c867e6 +let + ∇y = convolve(brightness.(img), Sy) + ∇x = convolve(brightness.(img), Sx) + # zoom in on the clock + vbox( + hbox(img[300:end, 1:300], img[300:end, 1:300]), + hbox(show_colored_array.((∇x[300:end, 1:300], ∇y[300:end, 1:300]))...) + ) +end + +# ╔═╡ 15d1e5dc-ef2f-11ea-093a-417108bcd495 +[size(img) size(carved[n])] + +# ╔═╡ Cell order: +# ╠═877df834-f078-11ea-303b-e98273ef98a4 +# ╠═0316b94c-eef6-11ea-19bc-dbc959901bb5 +# ╟─cb335074-eef7-11ea-24e8-c39a325166a1 +# ╟─bf750d0e-f35c-11ea-0245-713584583fcf +# ╟─90f44be8-f35c-11ea-2fc6-c361fd4966af +# ╟─d2ae6dd2-eef9-11ea-02df-255ec3b46a36 +# ╠═8ded023c-f35c-11ea-317c-11f5d1b67998 +# ╟─0b6010a8-eef6-11ea-3ad6-c1f10e30a413 +# ╠═fc1c43cc-eef6-11ea-0fc4-a90ac4336964 +# ╟─82c0d0c8-efec-11ea-1bb9-83134ecb877e +# ╠═da726954-eff0-11ea-21d4-a7f4ae4a6b09 +# ╠═da39c824-eff0-11ea-375b-1b6c6e186182 +# ╠═074a58be-f146-11ea-382c-b7ae6c44bf75 +# ╠═abf6944e-f066-11ea-18e2-0b92606dab85 +# ╠═44192a40-eff2-11ea-0ec7-05cdadb0c29a +# ╟─ac8d6902-f069-11ea-0f1d-9b0fa706d769 +# ╠═ddac52ea-f148-11ea-2860-21cff4c867e6 +# ╠═6f7bd064-eff4-11ea-0260-f71aa7f4f0e5 +# ╟─d6a268c0-eff4-11ea-2c9e-bfef19c7f540 +# ╟─172c7612-efee-11ea-077a-5d5c6e2505a4 +# ╟─fcf46120-efec-11ea-06b9-45f470899cb2 +# ╟─dec62538-efee-11ea-1e03-0b801e61e91c +# ╟─f8283a0e-eff4-11ea-23d3-9f1ced1bafb4 +# ╟─025e2c94-eefb-11ea-12cb-f56f34886334 +# ╠═acc1ee8c-eef9-11ea-01ac-9b9e9c4167b3 +# ╠═8b204a2a-eff6-11ea-25b0-13f230037ee1 +# ╠═84d3afe4-eefe-11ea-1e31-bf3b2af4aecd +# ╠═b507480a-ef01-11ea-21c4-63d19fac19ab +# ╟─7d8b20a2-ef03-11ea-1c9e-fdf49a397619 +# ╠═f690b06a-ef31-11ea-003b-4f2b2f82a9c3 +# ╠═977b6b98-ef03-11ea-0176-551fc29729ab +# ╠═9abbb158-ef03-11ea-39df-a3e8aa792c50 +# ╠═772a4d68-ef04-11ea-366a-f7ae9e1634f6 +# ╠═14f72976-ef05-11ea-2ad5-9f0914f9cf58 +# ╠═cf9a9124-ef04-11ea-14a4-abf930edc7cc +# ╠═552fb92e-ef05-11ea-0a79-dd7a6760089a +# ╠═081a98cc-f06e-11ea-3664-7ba51d4fd153 +# ╠═237647e8-f06d-11ea-3c7e-2da57e08bebc +# ╠═dfd03c4e-f06c-11ea-1e2a-89233a675138 +# ╠═ca4a87e8-eff8-11ea-3d57-01dfa34ff723 +# ╠═4f23bc54-ef0f-11ea-06a9-35ca3ece421e +# ╠═b401f398-ef0f-11ea-38fe-012b7bc8a4fa +# ╠═b1b6b7fc-f153-11ea-224a-2578e8298775 +# ╠═2eb459d4-ef36-11ea-1f74-b53ffec7a1ed +# ╠═7038abe4-ef36-11ea-11a5-75e57ab51032 +# ╟─2d6c6820-ef2d-11ea-1704-49bb5188cfcc +# ╠═fa6a2152-ef0f-11ea-0e67-0d1a6599e779 +# ╟─71b16dbe-f08b-11ea-2343-5f1583074029 +# ╟─1fd26a60-f089-11ea-1f56-bb6eba7d9651 +# ╟─15d1e5dc-ef2f-11ea-093a-417108bcd495 diff --git a/lecture_notebooks/week2/02-sobel_gradient.jl b/lecture_notebooks/week2/02-sobel_gradient.jl new file mode 100644 index 000000000..66e2f65e9 --- /dev/null +++ b/lecture_notebooks/week2/02-sobel_gradient.jl @@ -0,0 +1,258 @@ +### A Pluto.jl notebook ### +# v0.11.10 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ 15a4ba3e-f0d1-11ea-2ef1-5ff1dee8795f +using Pkg + +# ╔═╡ 21e744b8-f0d1-11ea-2e09-7ffbcdf43c37 +begin + Pkg.activate(mktempdir()) + + Pkg.add("Gadfly") + Pkg.add("Compose") + Pkg.add("Statistics") + Pkg.add("Hyperscript") + Pkg.add("Colors") + Pkg.add("Images") + Pkg.add("ImageMagick") + Pkg.add("ImageFiltering") + + using Gadfly + using Images + using Compose + using Hyperscript + using Colors + using Statistics + using PlutoUI + using ImageMagick + using ImageFiltering +end + +# ╔═╡ 1ab1c808-f0d1-11ea-03a7-e9854427d45f + + +# ╔═╡ 10f850fc-f0d1-11ea-2a58-2326a9ea1e2a +set_default_plot_size(12cm, 12cm) + +# ╔═╡ 7b4d5270-f0d3-11ea-0b48-79005f20602c +function convolve(M, kernel) + height, width = size(kernel) + + half_height = height ÷ 2 + half_width = width ÷ 2 + + new_image = similar(M) + + # (i, j) loop over the original image + m, n = size(M) + @inbounds for i in 1:m + for j in 1:n + # (k, l) loop over the neighbouring pixels + accumulator = 0 * M[1, 1] + for k in -half_height:-half_height + height - 1 + for l in -half_width:-half_width + width - 1 + Mi = i - k + Mj = j - l + # First index into M + if Mi < 1 + Mi = 1 + elseif Mi > m + Mi = m + end + # Second index into M + if Mj < 1 + Mj = 1 + elseif Mj > n + Mj = n + end + + accumulator += kernel[k, l] * M[Mi, Mj] + end + end + new_image[i, j] = accumulator + end + end + + return new_image +end + +# ╔═╡ 6fd3b7a4-f0d3-11ea-1f26-fb9740cd16e0 +function disc(n, r1=0.8, r2=0.8) + white = RGB{Float64}(1,1,1) + blue = RGB{Float64}(colorant"#4EC0E3") + convolve( + [(i-n/2)^2 + (j-n/2)^2 <= (n/2-5)^2 ? white : blue for i=1:n, j=1:n], + Kernel.gaussian((1,1)) + ) +end + +# ╔═╡ fe3559e0-f13b-11ea-06c8-a314e44c20d6 +brightness(c) = 0.3 * c.r + 0.59 * c.g + 0.11 * c.b + +# ╔═╡ 0ccf76e4-f0d9-11ea-07c9-0159e3d4d733 +@bind img_select Radio(["disc", "mario"], default="disc") + +# ╔═╡ 236dab08-f13d-11ea-1922-a3b82cfc7f51 +begin + url = "http://files.softicons.com/download/game-icons/super-mario-icons-by-sandro-pereira/png/32/Retro%20Mario.png" + img = Dict( + "disc" => disc(25), + "mario" => load(download(url)) + )[img_select] +end + +# ╔═╡ 03434682-f13b-11ea-2b6e-11ad781e9a51 +md"""Show $G_x$ $(@bind Gx CheckBox()) + + Show $G_y$ $(@bind Gy CheckBox())""" + +# ╔═╡ ca13597a-f168-11ea-1a2c-ff7b98b7b2c7 +function partial_derivatives(img) + Sy,Sx = Kernel.sobel() + ∇x, ∇y = zeros(size(img)), zeros(size(img)) + + if Gx + ∇x = convolve(brightness.(img), Sx) + end + if Gy + ∇y = convolve(brightness.(img), Sy) + end + return ∇x, ∇y +end + +# ╔═╡ b369584c-f183-11ea-260a-35dc797e63ad + + +# ╔═╡ b2cbe058-f183-11ea-39dc-23d4a5b92796 + + +# ╔═╡ 9d9cccb2-f118-11ea-1638-c76682e636b2 +function arrowhead(θ) + eq_triangle = [(0, 1/sqrt(3)), + (-1/3, -2/(2 * sqrt(3))), + (1/3, -2/(2 * sqrt(3)))] + + compose(context(units=UnitBox(-1,-1,2,2), rotation=Rotation(θ, 0, 0)), + polygon(eq_triangle)) +end + +# ╔═╡ b7ea8a28-f0d7-11ea-3e98-7b19a1f58304 +function quiver(points, vecs) + xmin = minimum(first.(points)) + ymin = minimum(last.(points)) + xmax = maximum(first.(points)) + ymax = maximum(last.(points)) + hs = map(x->hypot(x...), vecs) + hs = hs / maximum(hs) + + vector(p, v, h) = all(iszero, v) ? context() : + (context(), + (context((p.+v.*6 .- .2)..., .4,.4), + arrowhead(atan(v[2], v[1]) - pi/2)), + stroke(RGBA(90/255,39/255,41/255,h)), + fill(RGBA(90/255,39/255,41/255,h)), + line([p, p.+v.*8])) + + compose(context(units=UnitBox(xmin,ymin,xmax,ymax)), + vector.(points, vecs, hs)...) +end + +# ╔═╡ c821b906-f0d8-11ea-2df0-8f2d06964aa2 +function sobel_quiver(img, ∇x, ∇y) + quiver([(j-1,i-1) for i=1:size(img,1), j=1:size(img,2)], + [(∇x[i,j], ∇y[i,j]) for i=1:size(img,1), j=1:size(img,2)]) +end + +# ╔═╡ 6da3fdfe-f0dd-11ea-2407-7b85217b35cc +# render an Image using squares in Compose +function compimg(img) + xmax, ymax = size(img) + xmin, ymin = 0, 0 + arr = [(j-1, i-1) for i=1:ymax, j=1:xmax] + + compose(context(units=UnitBox(xmin, ymin, xmax, ymax)), + fill(vec(img)), + rectangle( + first.(arr), + last.(arr), + fill(1.0, length(arr)), + fill(1.0, length(arr)))) +end + +# ╔═╡ f22aa34e-f0df-11ea-3053-3dcdc070ec2f +let + ∇x, ∇y = partial_derivatives(img) + + compose(context(), + sobel_quiver(img, ∇x, ∇y), + compimg(img)) +end + +# ╔═╡ 885ec336-f146-11ea-00c4-c1d1ab4c0001 + function show_colored_array(array) + pos_color = RGB(0.36, 0.82, 0.8) + neg_color = RGB(0.99, 0.18, 0.13) + to_rgb(x) = max(x, 0) * pos_color + max(-x, 0) * neg_color + to_rgb.(array) / maximum(abs.(array)) + end + +# ╔═╡ 9232dcc8-f188-11ea-08fe-b787ea93c598 +begin + Sy, Sx = Kernel.sobel() + show_colored_array(Sx) + Sx +end + +# ╔═╡ 7864bd00-f146-11ea-0020-7fccb3913d8b +let + ∇x, ∇y = partial_derivatives(img) + + to_show = (x -> RGB(0, 0, 0)).(zeros(size(img))) + if Gx && Gy + edged = sqrt.(∇x.^2 + ∇y.^2) + to_show = Gray.(edged) / maximum(edged) + elseif Gx + to_show = show_colored_array(∇x) + elseif Gy + to_show = show_colored_array(∇y) + end + compose( + context(), + compimg(to_show) + ) +end + +# ╔═╡ Cell order: +# ╠═15a4ba3e-f0d1-11ea-2ef1-5ff1dee8795f +# ╠═1ab1c808-f0d1-11ea-03a7-e9854427d45f +# ╟─21e744b8-f0d1-11ea-2e09-7ffbcdf43c37 +# ╠═10f850fc-f0d1-11ea-2a58-2326a9ea1e2a +# ╟─7b4d5270-f0d3-11ea-0b48-79005f20602c +# ╠═6fd3b7a4-f0d3-11ea-1f26-fb9740cd16e0 +# ╟─fe3559e0-f13b-11ea-06c8-a314e44c20d6 +# ╟─b7ea8a28-f0d7-11ea-3e98-7b19a1f58304 +# ╟─0ccf76e4-f0d9-11ea-07c9-0159e3d4d733 +# ╟─236dab08-f13d-11ea-1922-a3b82cfc7f51 +# ╟─03434682-f13b-11ea-2b6e-11ad781e9a51 +# ╟─ca13597a-f168-11ea-1a2c-ff7b98b7b2c7 +# ╟─f22aa34e-f0df-11ea-3053-3dcdc070ec2f +# ╟─9232dcc8-f188-11ea-08fe-b787ea93c598 +# ╠═7864bd00-f146-11ea-0020-7fccb3913d8b +# ╠═b369584c-f183-11ea-260a-35dc797e63ad +# ╠═b2cbe058-f183-11ea-39dc-23d4a5b92796 +# ╟─9d9cccb2-f118-11ea-1638-c76682e636b2 +# ╟─c821b906-f0d8-11ea-2df0-8f2d06964aa2 +# ╟─6da3fdfe-f0dd-11ea-2407-7b85217b35cc +# ╠═885ec336-f146-11ea-00c4-c1d1ab4c0001 diff --git a/lecture_notebooks/week3/01-structure.jl b/lecture_notebooks/week3/01-structure.jl new file mode 100644 index 000000000..44bd4b457 --- /dev/null +++ b/lecture_notebooks/week3/01-structure.jl @@ -0,0 +1,668 @@ +### A Pluto.jl notebook ### +# v0.11.12 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ ae24c8b2-f60b-11ea-2c7a-03857d1217b2 +using Pkg + +# ╔═╡ bc14fc1a-f60b-11ea-207a-91b967f28076 +begin + pkg"add Colors ColorSchemes Images ImageMagick PlutoUI Suppressor InteractiveUtils" + using Colors, ColorSchemes, Images, ImageMagick + using Suppressor, InteractiveUtils, PlutoUI +end + +# ╔═╡ 0feb5674-f5d5-11ea-0714-7379a7d381a3 +using LinearAlgebra + +# ╔═╡ 20125236-f5d6-11ea-3877-6b332497be62 +using SparseArrays + +# ╔═╡ 0f6ecba6-f5da-11ea-22c2-2929e562f413 +using Statistics + +# ╔═╡ b0ba5b8c-f5d1-11ea-1304-3f0e47f935fe +md"# What do data structures, machine learning and the singular-value decomposition have in common?" + +# ╔═╡ ffa95430-f5d1-11ea-3cb7-5bb8d8f13701 +md"""Blurry Fons magic: Exploiting Structure! + +ADD A HINT BOX HERE WITH THE ANSWER "STRUCTURE" """ + +# ╔═╡ 261c4df2-f5d2-11ea-2c72-7d4b09c46098 +md"One-hot vectors: Numbers and images + +0s and 1s and colors" + +# ╔═╡ b5177f70-f60b-11ea-14a9-f5a574cc5185 +Pkg.activate(mktempdir()) + +# ╔═╡ ef8f44b2-f5fc-11ea-1e4d-bd873cd39d6c +@bind nn Slider(1:20, show_value=true) + +# ╔═╡ fd9211c0-f5fc-11ea-1745-7f2dae88af9e +@bind kk Slider(1:nn, default=nn, show_value=true) + +# ╔═╡ 3a1ed5b8-f5fd-11ea-2ecd-b9d08349651f +colors = [colorant"black", colorant"red"] + +# ╔═╡ 77ae1146-f5d2-11ea-1226-27d15c90a8df +begin + v = fill(1, nn) + v[kk] = 2 + + colors[v]' +end + +# ╔═╡ 676f6e3a-f5fd-11ea-3552-e7e7c6382276 +v + +# ╔═╡ 3592648a-f5fd-11ea-2ed0-a1d4a4c14d07 + + +# ╔═╡ 7ffa02f6-f5d2-11ea-1b41-15c21f92e9ee +Int.((1:10) .== 3) + +# ╔═╡ 21a15906-f5d3-11ea-3a71-f53eabc31acc +md"Colors and sliders" + +# ╔═╡ 3c5f0ba8-f5d3-11ea-22d3-4f7c513144bc +md"Data structure for a one-hot vector" + +# ╔═╡ 4d2db24e-f5d4-11ea-10d4-b126f643abee + + +# ╔═╡ 4624cd26-f5d3-11ea-1cf8-7d6555eae5fa +struct OneHot <: AbstractVector{Int} + n::Int + k::Int +end + +# ╔═╡ 397ac764-f5fe-11ea-20cc-8d7cab19d410 +Base.size(x::OneHot) = (x.n, ) + +# ╔═╡ 82c7046c-f5d3-11ea-04e2-ef7c0f4db5da +Base.getindex(x::OneHot, i::Int) = Int(x.k == i) + +# ╔═╡ 7a98f708-f5d3-11ea-2301-cde71ca469ff +x = OneHot(5, 3) + +# ╔═╡ 60214c1a-f5fe-11ea-2d08-c59715bcedd0 +x[1:2] + +# ╔═╡ cf77d83c-f5fd-11ea-136e-8951de64e05e +show_vector(x::OneHot) = colors[x .+ 1]' + +# ╔═╡ 5813e1b2-f5ff-11ea-2849-a1def74fc065 +begin + imshow(M) = get.(Ref(ColorSchemes.rainbow), M ./ maximum(M)) + imshow(x::AbstractVector) = imshow(x') +end + +# ╔═╡ e51d4ef6-f5fd-11ea-28f1-a1616e17f715 +imshow(rand(5, 5)) + +# ╔═╡ 982590d4-f5ff-11ea-3802-73292c75ad6c +imshow(x) + +# ╔═╡ c3f70e18-f5ff-11ea-35aa-31b22ca506b8 +get.([ColorSchemes.rainbow], x)' + +# ╔═╡ c200deb0-f5fd-11ea-32e1-d96596b16ebc +x[3] + +# ╔═╡ a5989a76-f5d3-11ea-3fe5-5f9959199fe8 +typeof(x) + +# ╔═╡ b0bd6c6a-f5d3-11ea-00ee-b7c155f23481 +x[3:5] + +# ╔═╡ bd94f82c-f5d3-11ea-2a1e-77ddaadbfeb9 +collect(x) + +# ╔═╡ 8d2c6910-f5d4-11ea-1928-1baf09815687 +md"""How much "information" (numbers) do you need to represent a one-hot vector? Is it $n$ or is it two? +""" + +# ╔═╡ 81c35324-f5d4-11ea-2338-9f982d38732c +md"# Diagonal matrices" + +# ╔═╡ 2cfda0dc-f5d5-11ea-16c4-b5a33b90e37f +md"As you might see in high school:" + +# ╔═╡ 150432d4-f5d5-11ea-32b2-19a2a91d9637 +M = [5 0 0 + 0 6 0 + 0 0 -10] + +# ╔═╡ 21328d1c-f5d5-11ea-288e-4171ad35326d +Diagonal(M) + +# ╔═╡ d8f36278-f5d5-11ea-3338-8573ce40e65e +md"How much information do you need for a diagonal matrix?" + +# ╔═╡ e90c55fc-f5d5-11ea-10f1-470ff772985d +md"""We should always look for *structure* where it exists""" + +# ╔═╡ 19775c3c-f5d6-11ea-15c2-89618e654a1e +md"## Sparse matrices" + +# ╔═╡ 232d1dcc-f5d6-11ea-2710-658c75b9c7a4 +sparse(M) + +# ╔═╡ 2b2feb9c-f5d6-11ea-191c-df20360b12a9 +M2 = spzeros(10^6, 10^6) + +# ╔═╡ 50c74f6c-f5d6-11ea-29f1-5de9997d5d9f +M2[1, 2] = 1 + +# ╔═╡ 5de72b7c-f5d6-11ea-1b6f-35b830b5fb34 +M3 = sparse([1, 2, 10^6], [4, 10, 10^6], [7, 8, 9]) + +# ╔═╡ 2fd7e52e-f5d7-11ea-3b5a-1f338e2451e0 +M4 = [1 0 2 0 10; 0 3 4 0 9; 0 0 0 5 8; 0 0 0 0 7] .* 0 + +# ╔═╡ aa09c008-f5d8-11ea-1bdc-b51ee6eb2478 +sparse(M4) + +# ╔═╡ d941cd66-f5d8-11ea-26ff-47ba7779ab20 +md"Sparse matrix dictionaries? Before CSC" + +# ╔═╡ 62a6ec62-f5d9-11ea-071e-ed33c5dea0cd +md"## Rand: Where is the structure?" + +# ╔═╡ 67274c3c-f5d9-11ea-3475-c9d228e3bd5a +# join(rand("ACGT", 100)) + +vv = rand(1:9, 1000000) + +# ╔═╡ b6c7a918-f600-11ea-18ff-6521507358c6 +md"Mention lossless compression e.g. run-length encoding" + +# ╔═╡ 765c6552-f5d9-11ea-29d3-bfe7b4b04612 +md"""Some might guess that there is "no structure" + +Take mean and standard deviation -- some would say that's the structure + +""" + +# ╔═╡ 126fb3ea-f5da-11ea-2f7d-0b3259a296ce +mean(vv), std(vv), 5, sqrt(10 * 2/3) + +# ╔═╡ 5f79e8f4-f5da-11ea-2b55-ef344b8a3ba2 +var(vv) + +# ╔═╡ 9b9e2c2a-f5da-11ea-369b-b513b196515b +md"Statisticians (and professors who've just graded exams) might say that under certain circumstances the mean and the variance give you the necessary structure, and the rest can be thrown away" + +# ╔═╡ e68b98ea-f5da-11ea-1a9d-db45e4f80241 +m = sum(vv) / length(vv) # mean + +# ╔═╡ f20ccac4-f5da-11ea-0e69-413b5e49f423 +σ² = sum( (vv .- m) .^ 2 ) / (length(vv) - 1) + +# ╔═╡ 0bc792e8-f5db-11ea-0b7a-1502ddc8008e +var(vv) + +# ╔═╡ 12a2e96c-f5db-11ea-1c3e-494ae7446886 +σ = sqrt(σ²) + +# ╔═╡ 22487ce2-f5db-11ea-32e9-6f70ab2c0353 +std(vv) + +# ╔═╡ 389ae62e-f5db-11ea-1557-c3adbbee0e5c +md"Sometimes the summary statistics are all you want. (But sometimes not)" + +# ╔═╡ 0c2b6408-f5d9-11ea-2b7f-7fece2eecc1f +md"## Multiplication tables" + +# ╔═╡ 542a9556-f5db-11ea-0375-99f52416f6e4 +md"How do you make a multiplication table?" + +# ╔═╡ 165788b2-f601-11ea-3e69-cdbbb6558e54 +md"Do you recognise this?" + +# ╔═╡ 22941bb8-f601-11ea-1d6e-0d955297bc2e +rand(3) .* rand(4)' + +# ╔═╡ 2f75df7e-f601-11ea-2fc2-aff4f335af33 +imshow(rand(3) .* rand(4)') + +# ╔═╡ 53e6b612-f601-11ea-05a9-5395e69b3c41 +svd(rand(3) .* rand(4)') + +# ╔═╡ 3e919766-f601-11ea-0485-05f45484bf8d +md"It's not easy to see the structure!" + +# ╔═╡ 1052993e-f601-11ea-2c55-0d67e31b670e +M5 = +[1 2 3 4 + 2 4 6 8 + 3 6 9 12 +] + +# ╔═╡ 68190822-f5db-11ea-117f-d10a161208c3 +md"Comprehension:" + +# ╔═╡ 71b44874-f5db-11ea-1f67-47bad9295e03 +md"Slider" + +# ╔═╡ 6c51eddc-f5db-11ea-1235-332cdbb072fa +[i * j for i in 1:3, j in 1:4] + +# ╔═╡ 86fb49ee-f5db-11ea-3bfa-c95c3b8775a3 +md"Explain '" + +# ╔═╡ 173cfab4-f5d9-11ea-0c7c-bf8b0888f6e7 +(1:3) .* (1:4)' + +# ╔═╡ 8bab3e36-f5db-11ea-187a-f31fa8cf357d +reshape([1, 2, 3], 3, 1) + +# ╔═╡ e64291e8-f5db-11ea-0cab-8567b781408f +[1 + 2 + 3] + +# ╔═╡ 0111a124-f5dc-11ea-0904-fdd88d7acac4 +MM = rand(3, 3) + +# ╔═╡ 06406158-f5dc-11ea-02b1-2519a0176993 +MM[:, 1] + +# ╔═╡ 12051f06-f5dc-11ea-2f6c-0fdc50eeff01 +vvv = [1, 2, 3] + +# ╔═╡ 195854e6-f5dc-11ea-114b-2333f87173f7 +a = reshape([1, 2, 3], 3, 1) + +# ╔═╡ 768116f6-f5dc-11ea-1cfe-b3016c574725 +b = reshape([4, 5, 6], 3, 1) + +# ╔═╡ 9050426e-f5dc-11ea-373c-65456732bd34 +a * b' + +# ╔═╡ 965bd07e-f5dc-11ea-2e85-d34996cf2fae +v4 = [1, 2, 3] + +# ╔═╡ a9daf4cc-f5dc-11ea-270b-2566f89f168c +v4 * v4' + +# ╔═╡ dc0c8b72-f5dc-11ea-3e6f-0f43cbf58f56 +v4 .* v4' + +# ╔═╡ 0ed78e76-f5dd-11ea-2ad8-a35a69c0ef9a +v4 + +# ╔═╡ 1648a0fa-f5dd-11ea-0292-495207e83de9 +[v4 v4] + +# ╔═╡ 1e9cefea-f5dd-11ea-3e5f-a189fd41c42e +[1*v4 5*v4 6*v4] + +# ╔═╡ 24664f20-f5dd-11ea-2a69-cd3e0ebd5c39 +v4 * [1, 5, 6]' + +# ╔═╡ 43a4920c-f5dd-11ea-1ab1-0b3d673c0f1e +v4 .* [1, 5, 6]' + +# ╔═╡ 5d767290-f5dd-11ea-2189-81198fd216ce +outer(v, w) = [x * y for x in v, y in w] # just a multiplication table + +# ╔═╡ 5a493052-f601-11ea-2f5f-f940412905f2 +begin + v6 = rand(3) + w6 = rand(4) + + U6, Σ6, V6 = svd( outer(v6, w6) ) +end + +# ╔═╡ 8633afb2-f601-11ea-206b-e9c4b9621c2a +outer(U6[:,1], V6[:, 1]) .* Σ6[1] + +# ╔═╡ 9918c4fa-f601-11ea-3bf1-3506dcb437f7 +outer(v6, w6) + +# ╔═╡ 6aae805e-f5dd-11ea-108c-733daae313dc +outer(v4, [1, 5, 6]) + +# ╔═╡ 9a023cf8-f5dd-11ea-3016-f95d433e6df0 +outer(1:10, 1:10) # works with things other than Vectors + +# ╔═╡ b4c82246-f5dd-11ea-068f-2f63a5a382e2 +md"Did you memorize this in third grade?" + +# ╔═╡ d1f87b22-f5dd-11ea-3bc3-471d5b3a5202 +md"Slider: 12 is the standard in the UK" + +# ╔═╡ d790281e-f5dd-11ea-0d1c-f57da5018a6b +md"How much information do I need to store a multiplication table?" + +# ╔═╡ d1578d4c-f601-11ea-2983-27dc131d39b8 +md"### Scaled multiplication table" + +# ╔═╡ d9556f32-f601-11ea-3dd8-1bc876b7b719 +10 .* outer(1:10, 1:10) + +# ╔═╡ e36e4ec2-f5dd-11ea-34ea-1bcf5fd7c16d +md"In the context of 1:n times 1:n, just one number n is needed. + +But given arbitrary vectors v and w, we need to store the whole of v and w" + +# ╔═╡ 52e857ca-f5de-11ea-14bb-bdc0ac24ab90 +md"Toeplitz in hw" + +# ╔═╡ 98f08990-f5de-11ea-1f56-1f2d73649773 +"Add mult tables to make flag" + +# ╔═╡ 21bbb60a-f5df-11ea-2c1b-dd716a657df8 +cs = distinguishable_colors(100) + +# ╔═╡ a5d637ea-f5de-11ea-3b70-877e876bc9c9 +flag = outer([1, 1, 1, 2, 2, 2, 1, 1, 1], ones(Int, 9)) + +# ╔═╡ 2668e100-f5df-11ea-12b0-073a578a5edb +cs[flag] + +# ╔═╡ e8d727f2-f5de-11ea-1456-f72602e81e0d +cs[flag + flag'] + +# ╔═╡ 4c80c786-f5df-11ea-31ec-318439349648 +cs[outer(rand(1:10, 3), rand(1:10, 5))] + +# ╔═╡ 8d2bae22-f5df-11ea-10d3-859f4c3aa6c7 +Gray.((100 .- outer(1:10, 1:10)) ./ 100) + +# ╔═╡ e23debc8-f5df-11ea-2c1e-b58a64f9acd3 +ColorSchemes.rainbow[floor.(Int, (1:10, 1:10) ./ 10)] + +# ╔═╡ 70b8918e-f5e0-11ea-3c86-6fa72df5a28e +ColorSchemes.rainbow.colors[floor.(Int, outer(1:10, 1:10) ./ 10)] + +# ╔═╡ a0934122-f5e0-11ea-1f3b-ab0021ac6906 +ColorSchemes.rainbow.colors[ceil.(Int, outer(1:10, 1:10) ./ 10)] + +# ╔═╡ b9381b8a-f5e0-11ea-1f84-e39325203038 + + +# ╔═╡ 4cf96558-f5e0-11ea-19be-db4c59a41120 +ColorSchemes.rainbow + +# ╔═╡ 11de523c-f5e0-11ea-2f3d-c981c1b6a1fe +outer(1:10, 1:10) ./ 100 + +# ╔═╡ fb0c6c7e-f5df-11ea-38d0-2d98c9dc232f + + +# ╔═╡ ebd72fb8-f5e0-11ea-0630-573337dff753 +md"## SVD" + +# ╔═╡ f00d1eaa-f5e0-11ea-21df-d9cf6f7af9b9 +md"Grab the rank-1 approx of an image. + +First multiplication table" + +# ╔═╡ b6478e1a-f5f6-11ea-3b92-6d4f067285f4 +url = "https://arbordayblog.org/wp-content/uploads/2018/06/oak-tree-sunset-iStock-477164218.jpg" + +# ╔═╡ d4a049a2-f5f8-11ea-2f34-4bc0e3a5954a +download(url, "tree.jpg") + +# ╔═╡ f2c11f88-f5f8-11ea-3e02-c1d4fa22031e +begin + image = load("tree.jpg") + image = image[1:5:end, 1:5:end] +end + +# ╔═╡ f7e38aaa-f5f8-11ea-002f-09dd1fa21181 +reds = [Float64(c.r) for c in image] + +# ╔═╡ 29062f7a-f5f9-11ea-2682-1374e7694e32 +picture = Float64.(channelview(image)) + +# ╔═╡ 6156fd1e-f5f9-11ea-06a9-211c7ab813a4 +pr, pg, pb = eachslice(picture, dims=1) + +# ╔═╡ a9766e68-f5f9-11ea-0019-6f9d02050521 +[RGB.(pr, 0, 0) RGB.(0, pg, 0) RGB.(0, 0, pb)] + +# ╔═╡ fee66076-f5f9-11ea-2316-abc57b62a57c +RGB.(image + +# ╔═╡ 6532b388-f5f9-11ea-2ae2-f9b12e441bb3 +pr + +# ╔═╡ 0c0ee362-f5f9-11ea-0f75-2d2810c88d65 +begin + Ur, Σr, Vr = svd(pr) + Ug, Σg, Vg = svd(pg) + Ub, Σb, Vb = svd(pb) +end + +# ╔═╡ 3c28c4c2-f5fa-11ea-1947-9dfe91ea1535 +RGB.(sum(outer(Ur[:,i], Vr[:,i]) .* Σr[i] for i in 1:5), 0, 0) + +# ╔═╡ f56f40e4-f5fa-11ea-3a99-156565445c2e +@bind n Slider(1:100, show_value=true) + +# ╔═╡ 7ba6e6a6-f5fa-11ea-2bcd-616d5a3c898b +RGB.(sum(outer(Ur[:,i], Vr[:,i]) .* Σr[i] for i in 1:n), + sum(outer(Ug[:,i], Vg[:,i]) .* Σg[i] for i in 1:n), + sum(outer(Ub[:,i], Vb[:,i]) .* Σb[i] for i in 1:n)) + +# ╔═╡ 8a22387e-f5fb-11ea-249b-435af5c0a6b6 + + +# ╔═╡ 8df84fcc-f5d5-11ea-312f-bf2a3b3ce2ce +md"## Appendix" + +# ╔═╡ 91980bcc-f5d5-11ea-211f-e9a08ff0fb19 +function with_terminal(f) + local spam_out, spam_err + @color_output false begin + spam_out = @capture_out begin + spam_err = @capture_err begin + f() + end + end + end + spam_out, spam_err + + HTML(""" + +
+
$(Markdown.htmlesc(spam_out))
+
+ """) +end + +# ╔═╡ 466901ea-f5d5-11ea-1db5-abf82c96eabf +with_terminal() do + dump(M) +end + +# ╔═╡ b38c4aae-f5d5-11ea-39b6-7b0c7d529019 +with_terminal() do + dump(Diagonal(M)) +end + +# ╔═╡ 8b60629e-f5d6-11ea-27c8-d934460d3a57 +with_terminal() do + dump(M3) +end + +# ╔═╡ cde79f38-f5d6-11ea-3297-0b5b240f7b9e +with_terminal() do + dump(sparse(M4)) +end + +# ╔═╡ 4f8684ea-f5fb-11ea-07be-11d8046f35df +with_terminal() do + @time @inbounds RGB.(sum(outer(Ur[:,i], Vr[:,i]) .* Σr[i] for i in 1:20), + sum(outer(Ug[:,i], Vg[:,i]) .* Σg[i] for i in 1:20), + sum(outer(Ub[:,i], Vb[:,i]) .* Σb[i] for i in 1:20)); +end + +# ╔═╡ Cell order: +# ╟─b0ba5b8c-f5d1-11ea-1304-3f0e47f935fe +# ╟─ffa95430-f5d1-11ea-3cb7-5bb8d8f13701 +# ╟─261c4df2-f5d2-11ea-2c72-7d4b09c46098 +# ╠═ae24c8b2-f60b-11ea-2c7a-03857d1217b2 +# ╠═b5177f70-f60b-11ea-14a9-f5a574cc5185 +# ╠═bc14fc1a-f60b-11ea-207a-91b967f28076 +# ╠═ef8f44b2-f5fc-11ea-1e4d-bd873cd39d6c +# ╠═fd9211c0-f5fc-11ea-1745-7f2dae88af9e +# ╠═3a1ed5b8-f5fd-11ea-2ecd-b9d08349651f +# ╠═676f6e3a-f5fd-11ea-3552-e7e7c6382276 +# ╠═77ae1146-f5d2-11ea-1226-27d15c90a8df +# ╠═3592648a-f5fd-11ea-2ed0-a1d4a4c14d07 +# ╠═7ffa02f6-f5d2-11ea-1b41-15c21f92e9ee +# ╟─21a15906-f5d3-11ea-3a71-f53eabc31acc +# ╟─3c5f0ba8-f5d3-11ea-22d3-4f7c513144bc +# ╠═4d2db24e-f5d4-11ea-10d4-b126f643abee +# ╠═4624cd26-f5d3-11ea-1cf8-7d6555eae5fa +# ╠═397ac764-f5fe-11ea-20cc-8d7cab19d410 +# ╠═82c7046c-f5d3-11ea-04e2-ef7c0f4db5da +# ╠═60214c1a-f5fe-11ea-2d08-c59715bcedd0 +# ╠═7a98f708-f5d3-11ea-2301-cde71ca469ff +# ╠═cf77d83c-f5fd-11ea-136e-8951de64e05e +# ╠═5813e1b2-f5ff-11ea-2849-a1def74fc065 +# ╠═e51d4ef6-f5fd-11ea-28f1-a1616e17f715 +# ╠═982590d4-f5ff-11ea-3802-73292c75ad6c +# ╠═c3f70e18-f5ff-11ea-35aa-31b22ca506b8 +# ╠═c200deb0-f5fd-11ea-32e1-d96596b16ebc +# ╠═a5989a76-f5d3-11ea-3fe5-5f9959199fe8 +# ╠═b0bd6c6a-f5d3-11ea-00ee-b7c155f23481 +# ╠═bd94f82c-f5d3-11ea-2a1e-77ddaadbfeb9 +# ╠═8d2c6910-f5d4-11ea-1928-1baf09815687 +# ╟─81c35324-f5d4-11ea-2338-9f982d38732c +# ╟─2cfda0dc-f5d5-11ea-16c4-b5a33b90e37f +# ╠═0feb5674-f5d5-11ea-0714-7379a7d381a3 +# ╠═150432d4-f5d5-11ea-32b2-19a2a91d9637 +# ╠═21328d1c-f5d5-11ea-288e-4171ad35326d +# ╠═466901ea-f5d5-11ea-1db5-abf82c96eabf +# ╠═b38c4aae-f5d5-11ea-39b6-7b0c7d529019 +# ╟─d8f36278-f5d5-11ea-3338-8573ce40e65e +# ╟─e90c55fc-f5d5-11ea-10f1-470ff772985d +# ╠═19775c3c-f5d6-11ea-15c2-89618e654a1e +# ╠═20125236-f5d6-11ea-3877-6b332497be62 +# ╠═232d1dcc-f5d6-11ea-2710-658c75b9c7a4 +# ╠═2b2feb9c-f5d6-11ea-191c-df20360b12a9 +# ╠═50c74f6c-f5d6-11ea-29f1-5de9997d5d9f +# ╠═5de72b7c-f5d6-11ea-1b6f-35b830b5fb34 +# ╠═8b60629e-f5d6-11ea-27c8-d934460d3a57 +# ╠═2fd7e52e-f5d7-11ea-3b5a-1f338e2451e0 +# ╠═cde79f38-f5d6-11ea-3297-0b5b240f7b9e +# ╠═aa09c008-f5d8-11ea-1bdc-b51ee6eb2478 +# ╟─d941cd66-f5d8-11ea-26ff-47ba7779ab20 +# ╟─62a6ec62-f5d9-11ea-071e-ed33c5dea0cd +# ╠═67274c3c-f5d9-11ea-3475-c9d228e3bd5a +# ╟─b6c7a918-f600-11ea-18ff-6521507358c6 +# ╟─765c6552-f5d9-11ea-29d3-bfe7b4b04612 +# ╠═0f6ecba6-f5da-11ea-22c2-2929e562f413 +# ╠═126fb3ea-f5da-11ea-2f7d-0b3259a296ce +# ╠═5f79e8f4-f5da-11ea-2b55-ef344b8a3ba2 +# ╟─9b9e2c2a-f5da-11ea-369b-b513b196515b +# ╠═e68b98ea-f5da-11ea-1a9d-db45e4f80241 +# ╠═f20ccac4-f5da-11ea-0e69-413b5e49f423 +# ╠═0bc792e8-f5db-11ea-0b7a-1502ddc8008e +# ╠═12a2e96c-f5db-11ea-1c3e-494ae7446886 +# ╠═22487ce2-f5db-11ea-32e9-6f70ab2c0353 +# ╠═389ae62e-f5db-11ea-1557-c3adbbee0e5c +# ╟─0c2b6408-f5d9-11ea-2b7f-7fece2eecc1f +# ╟─542a9556-f5db-11ea-0375-99f52416f6e4 +# ╠═165788b2-f601-11ea-3e69-cdbbb6558e54 +# ╠═22941bb8-f601-11ea-1d6e-0d955297bc2e +# ╠═2f75df7e-f601-11ea-2fc2-aff4f335af33 +# ╠═53e6b612-f601-11ea-05a9-5395e69b3c41 +# ╠═5a493052-f601-11ea-2f5f-f940412905f2 +# ╠═8633afb2-f601-11ea-206b-e9c4b9621c2a +# ╠═9918c4fa-f601-11ea-3bf1-3506dcb437f7 +# ╟─3e919766-f601-11ea-0485-05f45484bf8d +# ╠═1052993e-f601-11ea-2c55-0d67e31b670e +# ╠═68190822-f5db-11ea-117f-d10a161208c3 +# ╠═71b44874-f5db-11ea-1f67-47bad9295e03 +# ╠═6c51eddc-f5db-11ea-1235-332cdbb072fa +# ╠═86fb49ee-f5db-11ea-3bfa-c95c3b8775a3 +# ╠═173cfab4-f5d9-11ea-0c7c-bf8b0888f6e7 +# ╠═8bab3e36-f5db-11ea-187a-f31fa8cf357d +# ╠═e64291e8-f5db-11ea-0cab-8567b781408f +# ╠═0111a124-f5dc-11ea-0904-fdd88d7acac4 +# ╠═06406158-f5dc-11ea-02b1-2519a0176993 +# ╠═12051f06-f5dc-11ea-2f6c-0fdc50eeff01 +# ╠═195854e6-f5dc-11ea-114b-2333f87173f7 +# ╠═768116f6-f5dc-11ea-1cfe-b3016c574725 +# ╠═9050426e-f5dc-11ea-373c-65456732bd34 +# ╠═965bd07e-f5dc-11ea-2e85-d34996cf2fae +# ╠═a9daf4cc-f5dc-11ea-270b-2566f89f168c +# ╠═dc0c8b72-f5dc-11ea-3e6f-0f43cbf58f56 +# ╠═0ed78e76-f5dd-11ea-2ad8-a35a69c0ef9a +# ╠═1648a0fa-f5dd-11ea-0292-495207e83de9 +# ╠═1e9cefea-f5dd-11ea-3e5f-a189fd41c42e +# ╠═24664f20-f5dd-11ea-2a69-cd3e0ebd5c39 +# ╠═43a4920c-f5dd-11ea-1ab1-0b3d673c0f1e +# ╠═5d767290-f5dd-11ea-2189-81198fd216ce +# ╠═6aae805e-f5dd-11ea-108c-733daae313dc +# ╠═9a023cf8-f5dd-11ea-3016-f95d433e6df0 +# ╠═b4c82246-f5dd-11ea-068f-2f63a5a382e2 +# ╠═d1f87b22-f5dd-11ea-3bc3-471d5b3a5202 +# ╠═d790281e-f5dd-11ea-0d1c-f57da5018a6b +# ╠═d1578d4c-f601-11ea-2983-27dc131d39b8 +# ╠═d9556f32-f601-11ea-3dd8-1bc876b7b719 +# ╠═e36e4ec2-f5dd-11ea-34ea-1bcf5fd7c16d +# ╠═52e857ca-f5de-11ea-14bb-bdc0ac24ab90 +# ╠═98f08990-f5de-11ea-1f56-1f2d73649773 +# ╠═21bbb60a-f5df-11ea-2c1b-dd716a657df8 +# ╠═a5d637ea-f5de-11ea-3b70-877e876bc9c9 +# ╠═2668e100-f5df-11ea-12b0-073a578a5edb +# ╠═e8d727f2-f5de-11ea-1456-f72602e81e0d +# ╠═4c80c786-f5df-11ea-31ec-318439349648 +# ╠═8d2bae22-f5df-11ea-10d3-859f4c3aa6c7 +# ╠═e23debc8-f5df-11ea-2c1e-b58a64f9acd3 +# ╠═70b8918e-f5e0-11ea-3c86-6fa72df5a28e +# ╠═a0934122-f5e0-11ea-1f3b-ab0021ac6906 +# ╠═b9381b8a-f5e0-11ea-1f84-e39325203038 +# ╠═4cf96558-f5e0-11ea-19be-db4c59a41120 +# ╠═11de523c-f5e0-11ea-2f3d-c981c1b6a1fe +# ╠═fb0c6c7e-f5df-11ea-38d0-2d98c9dc232f +# ╠═ebd72fb8-f5e0-11ea-0630-573337dff753 +# ╠═f00d1eaa-f5e0-11ea-21df-d9cf6f7af9b9 +# ╠═b6478e1a-f5f6-11ea-3b92-6d4f067285f4 +# ╠═d4a049a2-f5f8-11ea-2f34-4bc0e3a5954a +# ╠═f2c11f88-f5f8-11ea-3e02-c1d4fa22031e +# ╠═f7e38aaa-f5f8-11ea-002f-09dd1fa21181 +# ╠═29062f7a-f5f9-11ea-2682-1374e7694e32 +# ╠═6156fd1e-f5f9-11ea-06a9-211c7ab813a4 +# ╠═a9766e68-f5f9-11ea-0019-6f9d02050521 +# ╠═fee66076-f5f9-11ea-2316-abc57b62a57c +# ╠═6532b388-f5f9-11ea-2ae2-f9b12e441bb3 +# ╠═0c0ee362-f5f9-11ea-0f75-2d2810c88d65 +# ╠═3c28c4c2-f5fa-11ea-1947-9dfe91ea1535 +# ╠═f56f40e4-f5fa-11ea-3a99-156565445c2e +# ╠═7ba6e6a6-f5fa-11ea-2bcd-616d5a3c898b +# ╠═8a22387e-f5fb-11ea-249b-435af5c0a6b6 +# ╠═4f8684ea-f5fb-11ea-07be-11d8046f35df +# ╠═8df84fcc-f5d5-11ea-312f-bf2a3b3ce2ce +# ╠═91980bcc-f5d5-11ea-211f-e9a08ff0fb19 diff --git a/lecture_notebooks/week3/02-structure-in-data-PCA.jl b/lecture_notebooks/week3/02-structure-in-data-PCA.jl new file mode 100644 index 000000000..4fc340daf --- /dev/null +++ b/lecture_notebooks/week3/02-structure-in-data-PCA.jl @@ -0,0 +1,671 @@ +### A Pluto.jl notebook ### +# v0.11.14 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ 0dcfd858-f867-11ea-301c-c3ca0a224117 +using Plots + +# ╔═╡ 1e058ba2-ec94-11ea-09af-7f9f9cc3a233 +using PlutoUI + +# ╔═╡ 13f6ccac-7ce0-48d7-a0ef-e83489625e1d +using Statistics + +# ╔═╡ 78763674-f8fe-11ea-349c-d997f30ac1f6 +using LinearAlgebra + +# ╔═╡ 35e83a04-f864-11ea-0a8e-9ddf6eec02f3 +using Images, ColorSchemes + +# ╔═╡ 16887070-f891-11ea-2db3-47b91930e728 +using ForwardDiff + +# ╔═╡ 3b9941ac-6043-4dc6-850f-4c7b3ae9d9a7 +md""" +# Finding structure in data +""" + +# ╔═╡ 7365084a-1f37-4897-bca4-fc5855c5ee4e +md""" +In this notebook we will look at one way to analyse, understand and simplify data. We will look at the ideas and intuition behind **principal component analysis**, a technique to find the most important directions and dimensions in a data set. This is very closely related to the SVD (singular-value decomposition) that we saw on Tuesday, and is one way to find and exploit structure in data. +""" + +# ╔═╡ ed7ff6b2-f863-11ea-1a59-eb242a8674e3 +md"## Flags" + +# ╔═╡ fed5845e-f863-11ea-2f95-c331d3c62647 +md"Let's start off by recalling the idea of a **multiplication table**, also called an **outer product**:" + +# ╔═╡ 0e1a6d80-f864-11ea-074a-5f7890180114 +outer(v, w) = [x * y for x in v, y in w] + +# ╔═╡ 2e497e30-f895-11ea-09f1-d7f2c1f61193 +outer(1:10, 1:10) + +# ╔═╡ ab3d55cc-f905-11ea-2f22-5398f3aca803 +md"Some flags are a simple example of this:" + +# ╔═╡ 13b6c108-f864-11ea-2447-2b0741f15c7b +flag = outer([1, 0.1, 2], ones(6)) + +# ╔═╡ e66b30a6-f914-11ea-2c0f-35282d45a30a +ones(6) + +# ╔═╡ 71d1b12e-f895-11ea-39df-f5c18a7766c3 +flag2 = outer([1, 0.1, 2], [1, 1, 1, 3, 3, 3]) + +# ╔═╡ cdbe1d8e-f905-11ea-3884-efeeef386dda +md"## Rank of a matrix" + +# ╔═╡ d9aa9af0-f865-11ea-379e-f16b452bd94c +md"A matrix that can be written exactly as a single multiplication table is called a **rank-1** matrix. If it can be written as the *sum* of *two* multiplication tables, it is called **rank 2**, etc." + +# ╔═╡ 2e8ae92a-f867-11ea-0219-1bdd9627c1ea +md"Let's see what a general rank-1 matrix looks like:" + +# ╔═╡ 38adc490-f867-11ea-1de5-3b633aff7c97 +image = outer([1; 0.4; rand(50)], rand(500)); + +# ╔═╡ 9cf23f9a-f864-11ea-3a08-af448aceefd8 +md"Now what happens if we add a little bit of **noise**, i.e. randomness?" + +# ╔═╡ a5b62530-f864-11ea-21e8-71ccfed487f8 +noisy_image = image .+ 0.03 .* randn.(); + +# ╔═╡ c41df86c-f865-11ea-1253-4942bbdbe9d2 +md"""The noisy image now has a rank larger than 1. But visually we can see that it is "close to" the original rank-1 matrix. + +Given this matrix, how can we discover that it is close to a structured, rank-1 matrix? We would like to be able to find this out and say that the matrix is close to a simple one.""" + +# ╔═╡ 7fca33ac-f864-11ea-2a8b-933eb382c172 +md"## Images as data" + +# ╔═╡ 283f5da4-f866-11ea-27d4-957ca2551b92 +md"Images are just one example of the many different types of data we come across in the world. + +Let's treat the image as a **data matrix**, where each column of the image / matrix is a **vector** representing one observation of data. (In data science it is often the rows that correspond to observations.) + +Let's try to visualize those vectors, taking just the first two rows of the image as the $x$ and $y$ coordinates of our data points:" + +# ╔═╡ 54977286-f908-11ea-166d-d1df33f38454 +image[1:2, 1:20] + +# ╔═╡ 7b4e90b4-f866-11ea-26b3-95efde6c650b +begin + xx = image[1, :] + yy = image[2, :] +end + +# ╔═╡ f574ad7c-f866-11ea-0efa-d9d0602aa63b +md"## Plotting data" + +# ╔═╡ 8775b3fe-f866-11ea-3e6f-9732e39a3525 +md"We would like to **visualise** this data. There are various plotting packages that we could use. We will use `Plots.jl`:" + +# ╔═╡ 7bacf44e-f896-11ea-38be-2b16ae7ca99f +scatter(xx, yy, alpha=0.5, framestyle=:origin, label="original image", leg=:topleft, + xlabel="x values", ylabel="y values") + +# ╔═╡ 1147cbda-f867-11ea-08fa-ef6ed2ae1e93 +begin + scatter(xx, yy, + leg=:topleft, label="rank-1", ms=3, alpha=0.3, + size=(500, 400), m=:square, c=:red, + framestyle=:origin) + + xs = noisy_image[1, :] + ys = noisy_image[2, :] + + scatter!(xs, ys, label="noisy", m=:., alpha=0.3, c=:blue) +end + +# ╔═╡ 8a611e36-f867-11ea-121f-317b7c145fe3 +md"We see that the exact rank-1 matrix has columns that **lie along a line**, since they are just multiples of one another. + +The approximate rank-1 matrix has columns that **lie *close to* a line**!" + +# ╔═╡ f7371934-f867-11ea-3b53-d1566684585c +md"So, given the data, we want to look at it do see if it lies close to a line or not. +How can we do so in an *automatic* way? +" + +# ╔═╡ 119dc35c-ec94-11ea-190c-23a750fbe7f4 +md"The data are given by pairs $(x_i, y_i)$. We can highlight the $i$th data point in the set to see how the data are spread out. Remember that this is literally scanning through the columns of the image!" + +# ╔═╡ 2043d4e6-ec94-11ea-1e1a-c75742eafe71 +@bind i Slider(1:length(xs), show_value=true) + +# ╔═╡ 2a705962-ec94-11ea-1181-2f001ccf472f +begin + scatter(xs, ys, ms=4, alpha=0.5, ratio=1, leg=false, size=(500, 400), + framestyle=:origin) + scatter!([xs[i]], [ys[i]], ms=8, alpha=0.8, c=:red) +end + + +# ╔═╡ 987c1f2e-f868-11ea-1125-0d8c02843ae4 +md"## Size of the data cloud" + +# ╔═╡ 9e78b048-f868-11ea-192e-d903265d1eb5 +md"Looking at this cloud of data points, a natural thing to do is to try to *measure* it: How wide is it, and how tall?" + +# ╔═╡ 24df1f32-ec90-11ea-1f6d-03c1bfa5df8e +md"""For example, let's think about calculating the width of the cloud, i.e. the range of possible $x$-values of the data. For this, the $y$-values are actually irrelevant. + +A first step in analysing data is often to **centre** it data around 0 by subtracting the mean. This is sometimes called "de-meaning": +""" + +# ╔═╡ aec46a9b-f743-4cbd-97a7-3ef3cac78b12 +begin + xs_centered = xs .- mean(xs) + ys_centered = ys .- mean(ys) +end + +# ╔═╡ 1b8c743e-ec90-11ea-10aa-e3b94f768f82 +scatter(xs_centered, ys_centered, ms=5, alpha=0.5, ratio=1, leg=false, framestyle=:origin) + +# ╔═╡ f5358ce4-f86a-11ea-2989-b1f37be89183 +md"A common way (but not the only way!) to calculate the width of a data set is the **standard deviation**, i.e. the square distance of the centered data from the origin. We will do this *separately* for $x$ and $y$ by *projecting* onto that direction -- i.e. effectively ignoring the other coordinate:" + +# ╔═╡ 870d3efa-f8fc-11ea-1593-1552511dcf86 +begin + scatter(xs_centered, ys_centered, ms=5, alpha=0.5, ratio=1, leg=false, framestyle=:origin) + + scatter!(xs_centered, zeros(size(xs_centered)), ms=5, alpha=0.1, ratio=1, leg=false, framestyle=:origin) + + for i in 1:length(xs_centered) + plot!([(xs_centered[i], ys_centered[i]), (xs_centered[i], 0)], ls=:dash, c=:black, alpha=0.1) + end + + plot!() + +end + +# ╔═╡ 03ab44c0-f8fd-11ea-2243-1f3580f98a65 +md"This gives the following approximate extents (standard deviations) of the cloud:" + +# ╔═╡ 2c3721da-f86b-11ea-36cf-3fe4c6622dc6 +begin + width = sqrt(mean(xs_centered.^2)) + height = sqrt(mean(ys_centered.^2)) +end + +# ╔═╡ 6dec0db8-ec93-11ea-24ad-e17870ee64c2 +begin + scatter(xs_centered, ys_centered, ms=5, alpha=0.5, ratio=1, leg=false, + framestyle=:origin) + + vline!([-2*width, 2*width], ls=:dash, lw=1.5) + hline!([-2*height, 2*height], ls=:dash, lw=1.5) +end + +# ╔═╡ 5fab2c32-f86b-11ea-2f27-ed5feaac1fa5 +md"We expect most (~95%) of the data to be contained within the mean $\pm$ (twice the standard deviation)." + +# ╔═╡ ae9a2900-ec93-11ea-1ae5-0748221328fc +md"## Correlated data" + +# ╔═╡ b81c9db2-ec93-11ea-0dbd-4bd0951cb2cc +md"""However, from the figure we see that $x$ and $y$ are not the correct directions to think about for this data set. It would be more natural to think about other directions: the direction in which the data set is mainly pointing (roughly, the direction in which it's longest) and the approximately perpendicular direction in which it is most narrow. + +We need to find from the data *which* directions these are, and the width in those directions. + +We cannot get that information by looking at $x$-coordinates and $y$-coordinates separately; rather, it is encoded in the *relationship* between the values of $x_i$ and $y_i$ for those points $(x_i, y_i)$ that are in the data set. + +For example, when $x$ is large and negative, $y$ is also quite negative; when $x$ is 0, $y$ is near $0$, and when $x$ is large and positive, so is $y$. We say that $x$ and $y$ are **correlated** -- literally they are mutually ("co") related, such that knowing some information about one of them allows us to predict something about the other. +""" + +# ╔═╡ 80722856-f86d-11ea-363d-53fc5f6b8152 +md"There are standard ways of calculating this correlation, but we prefer to hone our **intuition** using computational thinking instead!" + +# ╔═╡ b8fa6a1c-f86d-11ea-3d6b-2959d737254b +md"We want to think about different *directions*, so let's introduce an angle $\theta$ to describe the direction along which we are looking. We want to calculate the width of the cloud along that direction. + +Effectively we are *changing coordinates* to a new coordinate oriented along the line. To do this fully requires more linear algebra than we are assuming in this course, but let's see what it looks like:" + +# ╔═╡ 3547f296-f86f-11ea-1698-53d3c1a0bc30 +md"## Rotating the data" + +# ╔═╡ 7a83101e-f871-11ea-1d87-4946162777b5 +md"""By rotating the data we can look in different directions and calculate the width of the data set "along that direction". Again, what we are really doing is **projecting** the data onto that direction.""" + +# ╔═╡ e8276b4e-f86f-11ea-38be-218a72452b10 +M = [xs_centered ys_centered]' + +# ╔═╡ d71fdaea-f86f-11ea-1a1f-45e4d50926d3 +imax = argmax(M[1, :]) + +# ╔═╡ 757c6808-f8fe-11ea-39bb-47e4da65113a +svdvals(M) + +# ╔═╡ cd9e05ee-f86f-11ea-0422-25f8329c7ef2 +R(θ)= [cos(θ) sin(θ) + -sin(θ) cos(θ)] + +# ╔═╡ 7eb51908-f906-11ea-19d2-e947d81cb743 +md"In the following figure, we are rotating the axis (red arrow) around in the left panel. In the right panel we are viewing the data from the point of view of that new coordinate direction, effectively as if we rotated our head so the red vector was horizontal:" + +# ╔═╡ 4f1980ea-f86f-11ea-3df2-35cca6c961f3 +@bind θ Slider(0:0.01:2π, show_value=true, default=0.0) + +# ╔═╡ 3b71142c-f86f-11ea-0d43-47011d00786c +p1 = begin + + scatter(M[1, :], M[2, :], ratio=1, leg=false, ms=2.5, alpha=0.5, + framestyle=:origin) + + plot!([0.7 .* (-cos(θ), -sin(θ)), 0.7 .* (cos(θ), sin(θ))], lw=2, arrow=true, c=:red, alpha=0.8) + xlims!(-0.7, 0.7) + ylims!(-0.7, 0.7) + + scatter!([M[1, imax]], [M[2, imax]], ms=5, alpha=1, c=:yellow) + + annotate!(0, 1.2, text("align arrow with cloud", :red, 10)) +end; + +# ╔═╡ 88bbe1bc-f86f-11ea-3b6b-29175ddbea04 +p2 = begin + M2 = R(θ) * M + + scatter(M2[1, :], M2[2, :],ratio=1, leg=false, ms=2.5, alpha=0.5, framestyle=:origin) + # plot!([(-1, 0), (1, 0)], lw=3, arrow=true, c=:red) + + scatter!([M2[1, imax]], [M2[2, imax]], ms=5, alpha=1, c=:yellow) + + xlims!(-0.7, 0.7) + ylims!(-0.7, 0.7) + + σ = std(M2[1, :]) + vline!([-2σ, 2σ], ls=:dash, lw=2) +end; + +# ╔═╡ 2ffe7ed0-f870-11ea-06aa-390581500ca1 +plot(p1, p2) + +# ╔═╡ a5cdad52-f906-11ea-0486-755a6403a367 +md"Let's plot the variance in a direction $\theta$ as a function of $\theta$:" + +# ╔═╡ 0115c974-f871-11ea-1204-054510848849 +begin + f(θ) = var((R(θ) * M)[1,:]) + f(θ::AbstractArray) = f(θ[1]) +end + +# ╔═╡ 0935c870-f871-11ea-2a0b-b1b824379350 +begin + plot(0:0.01:2π, f, leg=false, size=(400, 300)) + + xlabel!("θ") + ylabel!("variance in direction θ") +end + +# ╔═╡ e4af4d26-f877-11ea-1de3-a9f8d389138e +md"""The direction in which the variance is **maximised** gives the most important direction, the first **principal component**. We can quantify how close the data is to being along a single line using the width in the perpendicular direction; if it is "very small" compared to the width in the first principal direction then the data is close to being rank 1.""" + +# ╔═╡ bf57f674-f906-11ea-08eb-9b50818a025b +md"The simplest way to maximise this function is to evaluate it everywhere and find one of the places where it takes the maximum value:" + +# ╔═╡ 17e015fe-f8ff-11ea-17b4-a3aa072cd7b3 +begin + θs = 0:0.01:2π + fs = f.(θs) + + θmax = θs[argmax(fs)] + θmin = θs[argmin(fs)] + + fmax = f(θmax) + fmin = f(θmin) +end + +# ╔═╡ 045b9b98-f8ff-11ea-0d49-5b209319e951 +begin + scatter(xs_centered, ys_centered, ms=5, alpha=0.3, ratio=1, leg=false, + framestyle=:origin) + + plot!([(0, 0), 2*sqrt(fmax) .* (cos(θmax), sin(θmax))], arrow=true, lw=3, c=:red) + plot!([(0, 0), 2*sqrt(fmin) .* (cos(θmin), sin(θmin))], arrow=true, lw=3, c=:red) + +end + +# ╔═╡ cfec1ec4-f8ff-11ea-265d-ab4844f0f739 +md"Note that the directions that maximise and minimise variance are perpendicular. This is always the case. + +We can think of this procedure as effectively *fitting an ellipse* to the data. The widths of the ellipse axes show the relative importance of each direction in the data." + +# ╔═╡ e6e900b8-f904-11ea-2a0d-953b99785553 +begin + circle = [cos.(θs) sin.(θs)]' + stretch = [2 * sqrt(fmax) 0 + 0 2 * sqrt(fmin)] + ellipse = R(-θmax) * stretch * circle + + plot!(ellipse[1, :], ellipse[2, :], series=:shape, alpha=0.4, fill=true, c=:orange) +end + +# ╔═╡ aaff88e8-f877-11ea-1527-ff4d3db663db +md"## Higher dimensions" + +# ╔═╡ aefa84de-f877-11ea-3e26-678008e9739e +md"If we now take columns of the first three rows of the original image, we have vectors in 3D. + +A rank-1 matrix corresponds to a line in 3D, while a rank-2 matrix gives a **plane** in 3D. Rank-2 + noise gives a noisy cloud lying close to a plane. + +Similarly to what we did above, we need to calculate the ellipsoid that best fits the data. The widths of the axes of the ellipsoid tell us how close to being a line or a plane (rank-1 or rank-2) the data is. +" + +# ╔═╡ 0bd9358e-f879-11ea-2c83-ed4e7bf9d903 +md"In more than 3D we can no longer visualise the data, but the same idea applies. The calculations are done using the SVD. + +If the widths of the ellipsoid in some directions are very small, we can ignore those directions and hence reduce the dimensionality of the data, by changing coordinates to the principal components." + +# ╔═╡ eb961e36-f899-11ea-39a9-eb33c949b79d +@bind ϕ1 Slider(0:0.1:180, show_value=true, default=30) + +# ╔═╡ fdc87844-f899-11ea-1f2f-afe1cd43a68a +@bind ϕ2 Slider(0:0.1:90, show_value=true, default=30) + +# ╔═╡ 232454b4-f87a-11ea-1c69-91edfca1e589 +md"## Application: A simple recommendation engine" + +# ╔═╡ 2b44df7e-f87a-11ea-1690-dd459eae05a3 +md"Suppose we have data on movie recommendations. It might look something like the following data matrix. The rows correspond to different people and the columns to different movies. Missing values are denoted by the special value `missing` and are shown in black. +" + +# ╔═╡ e5f67376-f917-11ea-1799-4341e3a758d5 +missing + +# ╔═╡ e7ae7312-f917-11ea-1276-bf8687cc0e57 +typeof(missing) + +# ╔═╡ e87e74a6-f87a-11ea-02d5-1970d010bde9 +md"""If we think of movies as having properties, or **features**, such as being a drama or action movie, or having a certain actor in a lead role, and that each person has certain preferences, it might be reasonable to think that we should be able to approximate the non-missing part of this matrix by a matrix of low rank, say rank 2. If so, we can then **impute** (fill in) the missing data, and hence give each person a recommendation as to how much they might like the movies that they have not yet seen!""" + +# ╔═╡ 63bad6ac-f87b-11ea-23ae-31522dfc74d5 +md"""We thus want to find the rank-2 matrix that is *closest* to this given data matrix. "Closest" here requires us to define some kind of distance or **loss function** between the rank-2 matrix and the data matrix, and then to **optimise** this function. + +This is one of the applications of the SVD, in the case of a matrix with *no* missing entries. However we can also directly apply **optimisation** methods. The simplest method is **gradient descent**. +""" + +# ╔═╡ 2d8b13de-f901-11ea-3198-bb513ea1859c +md"The original matrix and the rank-2 approximation are as follows, with missing data shown in black:" + +# ╔═╡ da7592da-f902-11ea-2cee-dbaefacdc382 +md"Here we compare the non-missing values. The rank-2 approximation is not too good. We could then increase the rank of the approximation:" + +# ╔═╡ 1cf3e098-f864-11ea-3f3a-c53017b73490 +md"## Appendix" + +# ╔═╡ 2917943c-f864-11ea-3ee6-db952ca7cd67 +begin + show_image(M) = get.(Ref(ColorSchemes.rainbow), M ./ maximum(M)) + show_image(x::AbstractVector) = show_image(x') +end + +# ╔═╡ 43bff19e-f864-11ea-2315-0f85b532a325 +show_image(flag) + +# ╔═╡ 79d2c6f4-f895-11ea-30c4-9d1102c99482 +show_image(flag2) + +# ╔═╡ b183b6ca-f864-11ea-0b34-4dd3f4f5e69d +show_image(image) + +# ╔═╡ 5471ddce-f867-11ea-2519-21981f5ea68b +show_image(noisy_image) + +# ╔═╡ 1957f71c-f8eb-11ea-0dcf-339bfa7f96fc +show_image(image[1:2, 1:20]) + +# ╔═╡ 72bb11b0-f88f-11ea-0e55-b1108300f854 +loss(M1, M2) = sum( (M1[i] - M2[i])^2 for i in 1:length(M1) if !ismissing(M2[i]) ) + +# ╔═╡ feeeb24a-f88f-11ea-287f-219e53615f32 +function split_up(v, m, n) + return v[1:m], v[m+1:m+n], v[m+n+1:2m+n], v[2m+n+1:2m+2n] +end + +# ╔═╡ 9822b22e-f89a-11ea-3da9-6199f9de033a +nn = 400 + +# ╔═╡ 690364dc-f89a-11ea-30e0-d52fbc146ef7 +M7 = outer([3, 1, 1], rand(nn)) .+ 0.1 .* randn.(); + +# ╔═╡ 8b6ea690-f899-11ea-2712-51508ae9c53e +M8 = outer([1, 2, 3], rand(nn)) + outer([-1, 3, -1], rand(nn)) .+ 0.1 .* randn.(); + +# ╔═╡ 9d5591de-f899-11ea-30d4-b1438066cc92 +begin + scatter(M7[1,:], M7[2,:], M7[3,:], camera=(ϕ1, ϕ2), alpha=0.5, label="rank 1") + scatter!(M8[1,:], M8[2,:], M8[3,:], camera=(ϕ1, ϕ2), alpha=0.5, label="rank 2") +end + +# ╔═╡ c66797fe-f899-11ea-094e-6d65bea15a11 +randmissing(n, p_m, dims...) = rand([1:n; [missing for _ ∈ 1:n/(1/p_m -1)]], dims...) + +# ╔═╡ 4fa77c96-f87a-11ea-2153-4bdf390369a5 +begin + m, n = 20, 5 + + M3 = randmissing(5, 0.4, m, n) +end + +# ╔═╡ 0bcc8852-f890-11ea-3715-11cbead7f636 +function ff(v, m, n) + v1, w1, v2, w2 = split_up(v, m, n) + + loss(outer(v1, w1) + outer(v2, w2), M3) +end + +# ╔═╡ 7040dc72-f893-11ea-3d22-4fbd452faa41 +ff2(v) = ff(v, m, n) + +# ╔═╡ 20e94d56-f890-11ea-3953-cbd70cec8ebd +total = 2(m + n) + +# ╔═╡ 1dbcf15a-f890-11ea-008c-8935edfbdb1c +ff(rand(total), m, n) + +# ╔═╡ 1d7e264c-f891-11ea-131d-134cbfff1ac0 +function optimise(f) + x = rand(total) + η = 0.01 + + for i in 1:1000 + x -= η * ForwardDiff.gradient(f, x) + end + + return x +end + +# ╔═╡ 7715e100-f893-11ea-3768-f9a59d8cc06c +begin + xxx = optimise( v -> ff(v, m, n) ) + + (v1, w1, v2, w2) = split_up(xxx, m, n) + M4 = outer(v1, w1) + outer(v2, w2) +end + +# ╔═╡ 53819d1e-f902-11ea-3388-ebc082de7053 +begin + M4_new = float.(M3) + indices = (!ismissing).(M3) + M4_new[indices] .= M4[indices] +end + + +# ╔═╡ 49b83854-f894-11ea-1f07-c95929bf9aea +M5 = replace(M3, missing=>0) + +# ╔═╡ 81115c14-f893-11ea-0147-c9fd45b7b777 +show_image(replace(M3, missing=>0)) + +# ╔═╡ 9da0cbaa-f893-11ea-2d94-951c1f947a2d +let + global M6 = copy(M4) + M6[ismissing.(M3)] .= 0 + + M6 +end + +# ╔═╡ 1857c66c-f894-11ea-0ba2-efe85cd442aa +[show_image(M5)', show_image(M6)'] + +# ╔═╡ 54ff9624-f901-11ea-309f-e396acc96f23 +function show_image_missing(M) + colors = show_image(replace(M, missing=>0)) + colors[ismissing.(M)] .= RGB(0, 0, 0) + + colors +end + +# ╔═╡ 50d80f8c-f900-11ea-27fb-c5a453928534 +show_image_missing(M3) + +# ╔═╡ 31ed8eac-f901-11ea-3443-25c0c459803c +[show_image_missing(M3)', show_image_missing(M4)'] + +# ╔═╡ a0e357a0-f902-11ea-1895-651d395d025d +[show_image_missing(M3)', show_image_missing(M4_new)'] + +# ╔═╡ b0fadede-f901-11ea-2248-ab8049d719f5 +M' + +# ╔═╡ 8f599fae-f901-11ea-25e5-11a1f569aef1 +show_image_missing(M3) + +# ╔═╡ e465ca72-f901-11ea-22f3-318147c8d79a +colors = show_image(replace(M3, missing=>0)) + +# ╔═╡ f175b60a-f901-11ea-0fcb-01fc17ec2a97 +colors[ismissing.(M3)] + +# ╔═╡ Cell order: +# ╟─3b9941ac-6043-4dc6-850f-4c7b3ae9d9a7 +# ╟─7365084a-1f37-4897-bca4-fc5855c5ee4e +# ╟─ed7ff6b2-f863-11ea-1a59-eb242a8674e3 +# ╟─fed5845e-f863-11ea-2f95-c331d3c62647 +# ╠═0e1a6d80-f864-11ea-074a-5f7890180114 +# ╠═2e497e30-f895-11ea-09f1-d7f2c1f61193 +# ╟─ab3d55cc-f905-11ea-2f22-5398f3aca803 +# ╠═13b6c108-f864-11ea-2447-2b0741f15c7b +# ╠═e66b30a6-f914-11ea-2c0f-35282d45a30a +# ╠═43bff19e-f864-11ea-2315-0f85b532a325 +# ╠═71d1b12e-f895-11ea-39df-f5c18a7766c3 +# ╠═79d2c6f4-f895-11ea-30c4-9d1102c99482 +# ╟─cdbe1d8e-f905-11ea-3884-efeeef386dda +# ╟─d9aa9af0-f865-11ea-379e-f16b452bd94c +# ╟─2e8ae92a-f867-11ea-0219-1bdd9627c1ea +# ╠═38adc490-f867-11ea-1de5-3b633aff7c97 +# ╠═b183b6ca-f864-11ea-0b34-4dd3f4f5e69d +# ╟─9cf23f9a-f864-11ea-3a08-af448aceefd8 +# ╟─a5b62530-f864-11ea-21e8-71ccfed487f8 +# ╠═5471ddce-f867-11ea-2519-21981f5ea68b +# ╟─c41df86c-f865-11ea-1253-4942bbdbe9d2 +# ╟─7fca33ac-f864-11ea-2a8b-933eb382c172 +# ╟─283f5da4-f866-11ea-27d4-957ca2551b92 +# ╠═1957f71c-f8eb-11ea-0dcf-339bfa7f96fc +# ╠═54977286-f908-11ea-166d-d1df33f38454 +# ╠═7b4e90b4-f866-11ea-26b3-95efde6c650b +# ╟─f574ad7c-f866-11ea-0efa-d9d0602aa63b +# ╟─8775b3fe-f866-11ea-3e6f-9732e39a3525 +# ╠═0dcfd858-f867-11ea-301c-c3ca0a224117 +# ╠═7bacf44e-f896-11ea-38be-2b16ae7ca99f +# ╟─1147cbda-f867-11ea-08fa-ef6ed2ae1e93 +# ╟─8a611e36-f867-11ea-121f-317b7c145fe3 +# ╟─f7371934-f867-11ea-3b53-d1566684585c +# ╟─119dc35c-ec94-11ea-190c-23a750fbe7f4 +# ╟─1e058ba2-ec94-11ea-09af-7f9f9cc3a233 +# ╠═2043d4e6-ec94-11ea-1e1a-c75742eafe71 +# ╟─2a705962-ec94-11ea-1181-2f001ccf472f +# ╟─987c1f2e-f868-11ea-1125-0d8c02843ae4 +# ╟─9e78b048-f868-11ea-192e-d903265d1eb5 +# ╟─24df1f32-ec90-11ea-1f6d-03c1bfa5df8e +# ╠═13f6ccac-7ce0-48d7-a0ef-e83489625e1d +# ╠═aec46a9b-f743-4cbd-97a7-3ef3cac78b12 +# ╟─1b8c743e-ec90-11ea-10aa-e3b94f768f82 +# ╟─f5358ce4-f86a-11ea-2989-b1f37be89183 +# ╟─870d3efa-f8fc-11ea-1593-1552511dcf86 +# ╟─03ab44c0-f8fd-11ea-2243-1f3580f98a65 +# ╠═2c3721da-f86b-11ea-36cf-3fe4c6622dc6 +# ╟─6dec0db8-ec93-11ea-24ad-e17870ee64c2 +# ╟─5fab2c32-f86b-11ea-2f27-ed5feaac1fa5 +# ╟─ae9a2900-ec93-11ea-1ae5-0748221328fc +# ╟─b81c9db2-ec93-11ea-0dbd-4bd0951cb2cc +# ╟─80722856-f86d-11ea-363d-53fc5f6b8152 +# ╟─b8fa6a1c-f86d-11ea-3d6b-2959d737254b +# ╟─3547f296-f86f-11ea-1698-53d3c1a0bc30 +# ╟─7a83101e-f871-11ea-1d87-4946162777b5 +# ╟─e8276b4e-f86f-11ea-38be-218a72452b10 +# ╟─3b71142c-f86f-11ea-0d43-47011d00786c +# ╠═d71fdaea-f86f-11ea-1a1f-45e4d50926d3 +# ╠═78763674-f8fe-11ea-349c-d997f30ac1f6 +# ╠═757c6808-f8fe-11ea-39bb-47e4da65113a +# ╟─88bbe1bc-f86f-11ea-3b6b-29175ddbea04 +# ╟─cd9e05ee-f86f-11ea-0422-25f8329c7ef2 +# ╟─7eb51908-f906-11ea-19d2-e947d81cb743 +# ╠═4f1980ea-f86f-11ea-3df2-35cca6c961f3 +# ╟─2ffe7ed0-f870-11ea-06aa-390581500ca1 +# ╟─a5cdad52-f906-11ea-0486-755a6403a367 +# ╟─0115c974-f871-11ea-1204-054510848849 +# ╠═0935c870-f871-11ea-2a0b-b1b824379350 +# ╟─e4af4d26-f877-11ea-1de3-a9f8d389138e +# ╟─bf57f674-f906-11ea-08eb-9b50818a025b +# ╠═17e015fe-f8ff-11ea-17b4-a3aa072cd7b3 +# ╟─045b9b98-f8ff-11ea-0d49-5b209319e951 +# ╟─cfec1ec4-f8ff-11ea-265d-ab4844f0f739 +# ╟─e6e900b8-f904-11ea-2a0d-953b99785553 +# ╟─aaff88e8-f877-11ea-1527-ff4d3db663db +# ╟─aefa84de-f877-11ea-3e26-678008e9739e +# ╟─0bd9358e-f879-11ea-2c83-ed4e7bf9d903 +# ╠═690364dc-f89a-11ea-30e0-d52fbc146ef7 +# ╠═8b6ea690-f899-11ea-2712-51508ae9c53e +# ╠═eb961e36-f899-11ea-39a9-eb33c949b79d +# ╠═fdc87844-f899-11ea-1f2f-afe1cd43a68a +# ╟─9d5591de-f899-11ea-30d4-b1438066cc92 +# ╟─232454b4-f87a-11ea-1c69-91edfca1e589 +# ╟─2b44df7e-f87a-11ea-1690-dd459eae05a3 +# ╠═e5f67376-f917-11ea-1799-4341e3a758d5 +# ╠═e7ae7312-f917-11ea-1276-bf8687cc0e57 +# ╠═4fa77c96-f87a-11ea-2153-4bdf390369a5 +# ╠═50d80f8c-f900-11ea-27fb-c5a453928534 +# ╟─e87e74a6-f87a-11ea-02d5-1970d010bde9 +# ╟─63bad6ac-f87b-11ea-23ae-31522dfc74d5 +# ╟─2d8b13de-f901-11ea-3198-bb513ea1859c +# ╠═31ed8eac-f901-11ea-3443-25c0c459803c +# ╟─53819d1e-f902-11ea-3388-ebc082de7053 +# ╟─da7592da-f902-11ea-2cee-dbaefacdc382 +# ╠═a0e357a0-f902-11ea-1895-651d395d025d +# ╟─1cf3e098-f864-11ea-3f3a-c53017b73490 +# ╠═35e83a04-f864-11ea-0a8e-9ddf6eec02f3 +# ╠═2917943c-f864-11ea-3ee6-db952ca7cd67 +# ╠═72bb11b0-f88f-11ea-0e55-b1108300f854 +# ╠═feeeb24a-f88f-11ea-287f-219e53615f32 +# ╠═0bcc8852-f890-11ea-3715-11cbead7f636 +# ╠═7040dc72-f893-11ea-3d22-4fbd452faa41 +# ╠═1dbcf15a-f890-11ea-008c-8935edfbdb1c +# ╠═20e94d56-f890-11ea-3953-cbd70cec8ebd +# ╠═16887070-f891-11ea-2db3-47b91930e728 +# ╠═1d7e264c-f891-11ea-131d-134cbfff1ac0 +# ╠═7715e100-f893-11ea-3768-f9a59d8cc06c +# ╠═49b83854-f894-11ea-1f07-c95929bf9aea +# ╠═81115c14-f893-11ea-0147-c9fd45b7b777 +# ╠═9da0cbaa-f893-11ea-2d94-951c1f947a2d +# ╠═1857c66c-f894-11ea-0ba2-efe85cd442aa +# ╠═9822b22e-f89a-11ea-3da9-6199f9de033a +# ╠═c66797fe-f899-11ea-094e-6d65bea15a11 +# ╠═54ff9624-f901-11ea-309f-e396acc96f23 +# ╠═b0fadede-f901-11ea-2248-ab8049d719f5 +# ╠═8f599fae-f901-11ea-25e5-11a1f569aef1 +# ╠═e465ca72-f901-11ea-22f3-318147c8d79a +# ╠═f175b60a-f901-11ea-0fcb-01fc17ec2a97 diff --git a/lecture_notebooks/week4/01-introspection.jl b/lecture_notebooks/week4/01-introspection.jl new file mode 100644 index 000000000..1654e9064 --- /dev/null +++ b/lecture_notebooks/week4/01-introspection.jl @@ -0,0 +1,244 @@ +### A Pluto.jl notebook ### +# v0.11.14 + +using Markdown +using InteractiveUtils + +# ╔═╡ c63c2fbc-f1dd-11ea-3081-cd1f11630f23 +using Pkg + +# ╔═╡ bdbb7f1e-f1dd-11ea-1219-91d906455e4a +begin + Pkg.add("Suppressor") + using Suppressor +end + +# ╔═╡ ca0e570a-f1dd-11ea-1655-29faf543c20a +Pkg.activate(mktempdir()) + +# ╔═╡ 41e94dd4-f1d9-11ea-2d2f-4b521ddb2284 +md""" + +# Looking under the hood + +This segment introduces the tools available in Julia to **find and inspect** Julia code. + +""" + +# ╔═╡ 8983cd52-f1d9-11ea-1be0-c9d7d64e8942 +md""" +## Finding the implementation of functions +""" + +# ╔═╡ b00ae624-f1d9-11ea-003a-a149946054a9 +a = 1.0 + 2.0im + +# ╔═╡ c3433912-f1d9-11ea-1198-095259b8c36a +b = -2.0 + 2im + +# ╔═╡ a81c0d26-f1d9-11ea-3319-19c78805c432 +@which angle(a) + +# ╔═╡ e779062e-f1d9-11ea-11c2-678f14dcd4a0 +md"""**Tip:** infix operations like `+` are also functions. + +`1 + 2` is the same as `+(1,2)` +""" + +# ╔═╡ 2a66f8cc-f1da-11ea-3e26-cbe02936adb5 +@which a * 2 + +# ╔═╡ 1b72c758-f1da-11ea-2a63-e164d7e0c63b +@which a * b + +# ╔═╡ ef1f280e-fcd6-11ea-17c2-873d329119e7 +md"Count the multiplies and adds in each case." + +# ╔═╡ 56398582-f1da-11ea-2ae7-6f55de7e9e82 +md""" +**Tip:** If you're using the default Julia REPL, you can type `@edit a * 2` and go to this line of code within your editor. + +## Specialized implementations of functions + +By now you may have guessed: there are many implementations of the same function and they are chosen based on the types of its arguments. + +These implementations are called "**methods**". + +You will see that it's very common for functions to have many methods, in fact the `*` function has a total of 364 methods!! Try to see them with `methods(*)`! +""" + +# ╔═╡ 3e455938-f1da-11ea-1dc3-778cef1b6189 +methods(*) + + +# ╔═╡ 73bef402-f1df-11ea-36d5-13e346ee1d59 +md""" + +Further, any other function which uses `*` will be able to make use of the correct implementation based on the types used! + +For example, `prod` function which finds the product of an array uses `*` within itself. + +""" + +# ╔═╡ de7f0034-f1df-11ea-36e8-1d9720219a30 +prod([a,b,a,b]) + +# ╔═╡ f0325696-f1df-11ea-2fc0-236d41409f16 +prod([1,2,3,4]) + +# ╔═╡ dc718976-fce6-11ea-31fe-478aec584c33 +prod( [rand(2,2) for i=1:4] ) + +# ╔═╡ f8fdbc82-fcd4-11ea-2c4f-0bd4db86b30e +md"This is a big deal, and it's called *Generic Programming* " + +# ╔═╡ ba55539c-f1db-11ea-1744-41d9fc14d417 +md""" + +## Looking under the hood at code specialization + +Julia takes a number of steps to run your code on a computer. + +This involves transforming the requested Julia code into various forms of code and eventually machine code. + +These are the forms your code will take in order: + +- Julia code +- `@code_lowered`: Lowered Julia code ([syntactic sugar](https://en.wikipedia.org/wiki/Syntactic_sugar#:~:text=In%20computer%20science%2C%20syntactic%20sugar,style%20that%20some%20may%20prefer.) is removed) +- `@code_typed`: Typed julia code (types are propagated depending on types of the arguments to the function) +- `@code_llvm`: Code compiled for the compiler infrastructure +- `@code_native`: Assembly code compiled for your specific machine! + +""" + +# ╔═╡ 7ab12188-fcd7-11ea-3d1b-db3bcb8c93e2 + # c = 2 + c = 3.0 + 4.0im + # c = [1 2 3] + + +# ╔═╡ ff22427c-fcd6-11ea-01a4-0d8fd0be3633 +@code_lowered a * c + +# ╔═╡ e6054270-f1dd-11ea-2c15-3f63ac42feca +md""" + +**Notes on `@code_lowered` output**: + +You can see that it has turned the function into a straight-line code. + +- Each line does only 1 function call. +- The result of each line is assigned to a variable `%N` +""" + +# ╔═╡ 32bc8b2e-f1dd-11ea-1ac2-5b2e6f2c213f +@code_typed a * c + +# ╔═╡ 282d2bfe-f1de-11ea-3acc-9b4d284b99b3 +md""" + +**Notes on `@code_typed` output**: + +Compared to `@code_lowered`, this code is more low level: it has chosen specific functions which will emit floating point instructions. +""" + +# ╔═╡ 5e5f78c6-f1de-11ea-0156-25f96ef2b812 +md""" +**Notes on the `@code_llvm` output** + +Notice that here not only the variable names, but the field names are also gone! +""" + +# ╔═╡ aa907166-f1de-11ea-00cc-a935c275eebd +md""" +**Notes on the `@code_native` output** + +This is specific to your computer. + +## Conclusion + +Julia allows you to look all the way to the exact instructions which will run on your CPU. Isn't that cool? + +We hope you also got a feeling for the kind of specialization of code that Julia performs in order to get the fast performance for your code! + +In a language without types this would not be possible! Or at least very hard! +""" + +# ╔═╡ 4c131066-f1dd-11ea-3e4f-4122c5feb20a +function with_terminal(f) + local spam_out, spam_err + @color_output false begin + spam_out = @capture_out begin + spam_err = @capture_err begin + f() + end + end + end + spam_out, spam_err + + HTML(""" + +
+
$(Markdown.htmlesc(spam_out))
+
+ """) +end + +# ╔═╡ 3c9f179c-f1dd-11ea-1c63-59429d7fa23a +with_terminal() do + @code_llvm debuginfo=:none a * c +end + +# ╔═╡ dd322a8c-f1dd-11ea-245e-e54922c0f355 +with_terminal() do + @code_native a * c +end + +# ╔═╡ 293c8888-fcd7-11ea-34f6-4b994aa6baf4 +with_terminal() do + @code_native a * c +end + +# ╔═╡ Cell order: +# ╠═c63c2fbc-f1dd-11ea-3081-cd1f11630f23 +# ╠═ca0e570a-f1dd-11ea-1655-29faf543c20a +# ╠═bdbb7f1e-f1dd-11ea-1219-91d906455e4a +# ╟─41e94dd4-f1d9-11ea-2d2f-4b521ddb2284 +# ╟─8983cd52-f1d9-11ea-1be0-c9d7d64e8942 +# ╠═b00ae624-f1d9-11ea-003a-a149946054a9 +# ╠═c3433912-f1d9-11ea-1198-095259b8c36a +# ╠═a81c0d26-f1d9-11ea-3319-19c78805c432 +# ╟─e779062e-f1d9-11ea-11c2-678f14dcd4a0 +# ╠═2a66f8cc-f1da-11ea-3e26-cbe02936adb5 +# ╠═1b72c758-f1da-11ea-2a63-e164d7e0c63b +# ╟─ef1f280e-fcd6-11ea-17c2-873d329119e7 +# ╟─56398582-f1da-11ea-2ae7-6f55de7e9e82 +# ╠═3e455938-f1da-11ea-1dc3-778cef1b6189 +# ╟─73bef402-f1df-11ea-36d5-13e346ee1d59 +# ╠═de7f0034-f1df-11ea-36e8-1d9720219a30 +# ╠═f0325696-f1df-11ea-2fc0-236d41409f16 +# ╠═dc718976-fce6-11ea-31fe-478aec584c33 +# ╟─f8fdbc82-fcd4-11ea-2c4f-0bd4db86b30e +# ╟─ba55539c-f1db-11ea-1744-41d9fc14d417 +# ╠═7ab12188-fcd7-11ea-3d1b-db3bcb8c93e2 +# ╠═ff22427c-fcd6-11ea-01a4-0d8fd0be3633 +# ╟─e6054270-f1dd-11ea-2c15-3f63ac42feca +# ╠═32bc8b2e-f1dd-11ea-1ac2-5b2e6f2c213f +# ╟─282d2bfe-f1de-11ea-3acc-9b4d284b99b3 +# ╠═3c9f179c-f1dd-11ea-1c63-59429d7fa23a +# ╟─5e5f78c6-f1de-11ea-0156-25f96ef2b812 +# ╠═dd322a8c-f1dd-11ea-245e-e54922c0f355 +# ╠═293c8888-fcd7-11ea-34f6-4b994aa6baf4 +# ╟─aa907166-f1de-11ea-00cc-a935c275eebd +# ╟─4c131066-f1dd-11ea-3e4f-4122c5feb20a diff --git a/lecture_notebooks/week4/02-covid_data.jl b/lecture_notebooks/week4/02-covid_data.jl new file mode 100644 index 000000000..bf2798fb6 --- /dev/null +++ b/lecture_notebooks/week4/02-covid_data.jl @@ -0,0 +1,633 @@ +### A Pluto.jl notebook ### +# v0.11.14 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ db4c1f10-7c37-4513-887a-2467ce673458 +begin + using Pkg + Pkg.add.(["CSV", "DataFrames", "PlutoUI", "Shapefile", "ZipFile", "LsqFit"]) + + using CSV + using DataFrames + using PlutoUI + using Shapefile + using ZipFile + using LsqFit +end + +# ╔═╡ a26b8742-6a16-445a-ae77-25a4189c0f14 +using Plots + +# ╔═╡ cbd9c1aa-fc37-11ea-29d9-e3361406796f +using Dates + +# ╔═╡ 0f87cec6-fc31-11ea-23d2-395e61f38b6f +md"# Module 2: Epidemic propagation" + +# ╔═╡ 19f4da16-fc31-11ea-0de9-1dbe668b862d +md"We are starting a new module on modelling epidemic propagation. + +Let's start off by analysing some of the data that is now available on the current COVID-19 pandemic. +" + +# ╔═╡ d3398953-afee-4989-932c-995c3ffc0c40 +md""" +## Exploring COVID-19 data +""" + +# ╔═╡ efa281da-cef9-41bc-923e-625140ce5a07 +md""" +In this notebook we will explore and analyse data on the COVID-19 pandemic. The aim is to use Julia tools to analyse and visualise the data in different ways. + +By the end of the notebook we will produce the following visualisation using Julia and Pluto: +""" + +# ╔═╡ 7617d970-fce4-11ea-08ba-c7eba3e17f62 +@bind day Clock(0.5) + +# ╔═╡ e0493940-8aa7-4733-af72-cd6bc0e37d92 +md""" +## Download and load data +""" + +# ╔═╡ 64d9bcea-7c85-421d-8f1e-17ea8ee694da +url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"; + +# ╔═╡ c460b0c3-6d3b-439b-8cc7-1c58d6547f51 +download(url, "covid_data.csv"); + +# ╔═╡ a7369222-fc20-11ea-314d-4d6b0f0f72eb +md"We will need a couple of new packages. The data is in CSV format, i.e. *C*omma-*S*eparated *V*alues. This is a common data format in which observations, i.e. data points, are separated on different lines. Within each line the different data for that observation are separated by commas or other punctuation (possibly spaces and tabs)." + +# ╔═╡ 1620aa9d-7dcd-4686-b7e4-a72cebe315ed +md""" +We can load the data from a CSV using the `File` function from the `CSV.jl` package, and then convert it to a `DataFrame`: +""" + +# ╔═╡ 38344160-fc27-11ea-220e-95aa00e4b083 +begin + csv_data = CSV.File("covid_data.csv"); + data = DataFrame(csv_data) # it is common to use `df` as a variable name +end + +# ╔═╡ ad43cea2-fc28-11ea-2bc3-a9d81e3766f4 +md"A `DataFrame` is a standard way of storing **heterogeneous data** in Julia, i.e. a table consisting of columns with different types. As you can see from the display of the `DataFrame` object above, each column has an associated type, but different columns have different types, reflecting the type of the data in that column. + +In our case, country names are stored as `String`s, their latitude and longitude as `Float64`s and the (cumulative) case counts for each day as `Int64`s. +." + +# ╔═╡ fab64d86-fc28-11ea-0ae1-3ba1b9a14759 +md"## Using the data" + +# ╔═╡ 3519cf96-fc26-11ea-3386-d97c61ea1b85 +md"""Since we need to manipulate the columns, let's rename them to something shorter. We can do this either **in place**, i.e. modifying the original `DataFrame`, or **out of place**, creating a new `DataFrame`. The convention in Julia is that functions that modify their argument have a name ending with `!` (often pronounced "bang"). + +We can use the `head` function to see only the first few lines of the data. +""" + +# ╔═╡ a054e048-4fea-487c-9d06-463723c7151c +begin + data_2 = rename(data, 1 => "province", 2 => "country", 3 => "latitude", 4 => "longitude") + head(data_2) +end + +# ╔═╡ e9ad97b6-fdef-4f48-bd32-634cfd2ce0e6 +begin + rename!(data, 1 => "province", 2 => "country", 3 => "latitude", 4 => "longitude") + head(data) +end + +# ╔═╡ aaa7c012-fc1f-11ea-3c6c-89630affb1db +md"## Extracting useful information" + +# ╔═╡ b0eb3918-fc1f-11ea-238b-7f5d23e424bb +md"How can we extract the list of all the countries? The country names are in the second column. + +For some purposes we can think of a `DataFrame`.as a matrix and use similar syntax. For example, we can extract the second column: +" + +# ╔═╡ 68f76d3b-b398-459d-bf39-20bf300dcaa2 +all_countries = data[:, "country"] + +# ╔═╡ 20e144f2-fcfb-11ea-010c-97e21eb0d231 +all_countries2 = data[:, :country] + +# ╔═╡ 2ec98a16-fcfb-11ea-21ad-15f2f5e68248 +all_countries3 = data[:, 2] + +# ╔═╡ 382cfc62-fcfb-11ea-26aa-2984d0449dcc +data[5:8, 2] + +# ╔═╡ 34440afc-fc2e-11ea-0484-5b47af235bad +md"It turns out that some countries are divided into provinces, so there are repetitions in the `country` column that we can eliminate with the `unique` function:" + +# ╔═╡ 79ba0433-2a31-475a-87c9-14103ebbff16 +countries = unique(all_countries) + +# ╔═╡ 5c1ec9ae-fc2e-11ea-397d-937c7ab1edb2 +@bind i Slider(1:length(countries), show_value=true) + +# ╔═╡ a39589ee-20e3-4f22-bf81-167fd815f6f9 +md"$(Text(countries[i]))" + +# ╔═╡ 9484ea9e-fc2e-11ea-137c-6da8212da5bd +md"[Here we used **string interpolation** with `$` to put the text into a Markdown string.]" + +# ╔═╡ bcc95a8a-fc2e-11ea-2ccd-3bece42a08e6 +md"You can also use `Select` to get a dropdown instead:" + +# ╔═╡ ada3ceb4-fc2e-11ea-2cbf-399430fa18b5 +@bind country Select(countries) + +# ╔═╡ 1633abe8-fc2f-11ea-2c7e-21b3348a3569 +md"""How can we extract the data for a particular country? First we need to know the exact name of the country. E.g. is the US written as "USA", or "United States"? + +We could scroll through to find out, or **filter** the data to only look at a sample of it, for example those countries that begin with the letter "U". + +One way to do this is with an array comprehension:""" + +# ╔═╡ ed383524-e0c0-4da2-9a98-ca75aadd2c9e +md""" +Array comprehension: +""" + +# ╔═╡ 90810d7e-fcfb-11ea-396a-35543dcc1e06 +startswith("david", "d") + +# ╔═╡ 977e1a2c-fcfb-11ea-08e9-cd656a631778 +startswith("hello", "d") + +# ╔═╡ 9ee79840-30ff-4c92-97f4-e178caceceaf +U_countries = [startswith(country, "U") for country in all_countries] + +# ╔═╡ 99d5a138-fc30-11ea-2977-71732ca3aead +length(U_countries) + +# ╔═╡ 450b4902-fc30-11ea-321d-29faf6188ff5 +md"Note that this returns an array of booleans of the same length as the vector `all_countries`. We can now use this to index into the `DataFrame`:" + +# ╔═╡ 4f423a75-43da-486f-ac2a-7220032dac9f +data[U_countries, :] + +# ╔═╡ a8b2db96-fc30-11ea-2eea-b938a3a430fb +md"""We see that the correct spelling is `"US"`. (And note how the different provinces of the UK are separated.)""" + +# ╔═╡ c400ce4e-fc30-11ea-13b1-b54cf8f5630e +md"Now we would like to extract the data for the US alone. How can we access the correct row of the table? We can again filter on the country name. A nicer way to do this is to use the `filter` function. + +This is a **higher-order function**: its first argument is itself a function, which must return `true` or `false`. `filter` will return all the rows of the `DataFrame` that satisfy that **predicate**: +" + +# ╔═╡ 7b2496b0-fc35-11ea-0e78-473e5e8eac44 +filter(x -> x.country == "United Kingdom", data) + +# ╔═╡ 8990f13a-fc35-11ea-338f-0955eeb23c3c +md"Here we have used an **anonymous function** with the syntax `x -> ⋯`. This is a function which takes the argument `x` and returns whatever is on the right of the arrow (`->`)." + +# ╔═╡ a772eadc-fc35-11ea-3d38-4b121f88f1d7 +md"To extract a single row we need the **index** of the row (i.e. which number row it is in the `DataFrame`). The `findfirst` function finds the first row that satisfies the given predicate:" + +# ╔═╡ 16a79308-fc36-11ea-16e5-e1087d7ebbda +US_row = findfirst(==("US"), all_countries) + +# ╔═╡ a41db8ea-f0e0-461f-a298-bdcea42a67f3 +data[US_row, :] + +# ╔═╡ f75e1992-fcfb-11ea-1123-b59bf888eac3 +data[US_row:US_row, :] + +# ╔═╡ 67eebb7e-fc36-11ea-03ef-bd6966487bb5 +md"Now we can extract the data into a standard Julia `Vector`:" + +# ╔═╡ 7b5db0f4-fc36-11ea-09a5-49def64f4c79 +US_data = Vector(data[US_row, 5:end]) + +# ╔═╡ f099424c-0e22-42fb-894c-d8c2a65715fb +scatter(US_data, m=:o, alpha=0.5, ms=3, xlabel="day", ylabel="cumulative cases", leg=false) + +# ╔═╡ 7e7d14a2-fc37-11ea-3f1a-870ca98c4b75 +md"Note that we are only passing a single vector to the `scatter` function, so the $x$ coordinates are taken as the natural numbers $1$, $2$, etc. + +Also note that the $y$-axis in this plot gives the *cumulative* case numbers, i.e. the *total* number of confirmed cases since the start of the epidemic up to the given date. +" + +# ╔═╡ 75d2dc66-fc47-11ea-0e35-05f9cf38e901 +md"This is an example of a **time series**, i.e. a single quantity that changes over time." + +# ╔═╡ b3880f40-fc36-11ea-074a-edc51adeb6f0 +md"## Using dates" + +# ╔═╡ 6de0800c-fc37-11ea-0d94-2b6f8f66964d +md"We would like to use actual dates instead of just the number of days since the start of the recorded data. The dates are given in the column names of the `DataFrame`: +" + +# ╔═╡ bb6316b7-23fb-44a3-b64a-dfb71a7df011 +column_names = names(data) + +# ╔═╡ 0c098923-b016-4c65-9a37-6b7b56b13a0c +date_strings = names(data)[5:end] # apply String function to each element + +# ╔═╡ 546a40eb-7897-485d-a1b5-c4dfae0a4861 +md""" +Now we need to **parse** the date strings, i.e. convert from a string representation into an actual Julia type provided by the `Dates.jl` standard library package: +""" + +# ╔═╡ 9e23b0e2-ac13-4d19-a3f9-4a655a1e9f14 +date_strings[1] + +# ╔═╡ 25c79620-14f4-45a7-b120-05ec72cb77e9 +date_format = Dates.DateFormat("m/d/Y") + +# ╔═╡ 31dc4e46-4839-4f01-b383-1a1189aeb0e6 +parse(Date, date_strings[1], date_format) + +# ╔═╡ ee27bd98-fc37-11ea-163c-1365e194fc2e +md"Since the year was not correctly represented in the original data, we need to manually fix it:" + +# ╔═╡ f5c29f0d-937f-4731-8f87-0405ebc966f5 +dates = parse.(Date, date_strings, date_format) .+ Year(2000) + +# ╔═╡ b0e7f1c6-fce3-11ea-10e5-9101d0f861a2 +dates[day] + +# ╔═╡ 36c37b4d-eb23-4deb-a593-e511eccd9204 +begin + plot(dates, US_data, xrotation=45, leg=:topleft, + label="US data", m=:o, ms=3, alpha=0.5) + + xlabel!("date") + ylabel!("cumulative US cases") + title!("US cumulative confirmed COVID-19 cases") +end + +# ╔═╡ 511eb51e-fc38-11ea-0492-19532da809de +md"## Exploratory data analysis" + +# ╔═╡ d228e232-fc39-11ea-1569-a31b817118c4 +md" +Working with *cumulative* data is often less intuitive. Let's look at the actual number of daily cases. Julia has a `diff` function to calculate the difference between successive entries of a vector: +" + +# ╔═╡ dbaacbb6-fc3b-11ea-0a42-a9792e8a6c4c +begin + daily_cases = diff(US_data) + plot(dates[2:end], daily_cases, m=:o, leg=false, xlabel="days", ylabel="daily US cases", alpha=0.5) # use "o"-shaped markers +end + +# ╔═╡ 19bdf146-fc3c-11ea-3c60-bf7823c43a1d +begin + using Statistics + running_mean = [mean(daily_cases[i-6:i]) for i in 7:length(daily_cases)] +end + +# ╔═╡ 12900562-fc3a-11ea-25e1-f7c91a6940e5 +md"Note that discrete data should *always* be plotted with points. The lines are just to guide the eye. + +Cumulating data corresponds to taking the integral of a function and is a *smoothing* operation. Note that the cumulative data is indeed visually smoother than the daily data. + +The oscillations in the daily data seem to be due to a lower incidence of reporting at weekends. We could try to smooth this out by taking a **moving average**, say over the past week: +" + +# ╔═╡ be868a52-fc3b-11ea-0b60-7fea05ffe8e9 +begin + plot(daily_cases, label="raw daily cases") + plot!(running_mean, m=:o, label="running weakly mean", leg=:topleft) +end + +# ╔═╡ 0b01120c-fc3d-11ea-1381-8bab939e6214 +md"## Exponential growth + +Simple models of epidemic spread often predict a period with **exponential growth**. Do the data corroborate this? +" + +# ╔═╡ 252eff18-fc3d-11ea-0c18-7b130ada882e +md"""A visual check for this is to plot the data with a **logarithmic scale** on the $y$ axis (but a standard scale on the $x$ axis). + +If we observe a straight line on such a semi-logarithmic plot, then we know that + +$$\log(y) \sim \alpha x + \beta,$$ + +where we are using $\sim$ to denote approximate equality. + +Taking exponentials of both sides gives + +$$y \sim \exp(\alpha x + \beta),$$ + +i.e. + +$$y \sim c \, \mathrm{e}^{\alpha x},$$ + +where $c$ is a constant (sometimes called a "pre-factor") and $\alpha$ is the exponential growth rate, found from the slope of the straight line on the semi-log plot. +""" + +# ╔═╡ 9626d74a-fc3d-11ea-2ab3-978dc46c0f1f +md"""Since the data contains some zeros, we need to replace those with `NaN`s ("Not a Number"), which `Plots.jl` interprets as a signal to break the line""" + +# ╔═╡ 4358c348-91aa-4c76-a443-0a9cefce0e83 +begin + plot(replace(daily_cases, 0 => NaN), + yscale=:log10, + leg=false, m=:o) + + xlabel!("day") + ylabel!("confirmed cases in US") + title!("US confirmed COVID-19 cases") +end + +# ╔═╡ 687409a2-fc43-11ea-03e0-d9a7a48165a8 +md"Let's zoom in on the region of the graph where the growth looks linear on this semi-log plot:" + +# ╔═╡ 4f23c8fc-fc43-11ea-0e73-e5f89d14155c +xlims!(0, 100) + +# ╔═╡ 91f99062-fc43-11ea-1b0e-afe8aa8a1c3d +exp_period = 38:60 + +# ╔═╡ 07282688-fc3e-11ea-2f9e-5b0581061e65 +md"We see that there is a period lasting from around day $(first(exp_period)) to around day $(last(exp_period)) when the curve looks straight on the semi-log plot. +This corresponds to the following date range:" + +# ╔═╡ 210cee94-fc3e-11ea-1a6e-7f88270354e1 +dates[exp_period] + +# ╔═╡ 2f254a9e-fc3e-11ea-2c02-75ed59f41903 +md"i.e. the first 3 weeks of March. Fortunately the imposition of lockdown during the last 10 days of March (on different days in different US states) significantly reduced transmission." + +# ╔═╡ 84f5c776-fce0-11ea-2d52-39c51d4ab6b5 +md"## Data fitting" + +# ╔═╡ 539c951c-fc48-11ea-2293-457b7717ea4d +md"""Let's try to fit an exponential function to our data in the relevant region. We will use the Julia package `LsqFit.jl` ("least-squares fit"). + +This package allows us to specify a model function that takes a vector of data and a vector of parameters, and it finds the best fit to the data. +""" + +# ╔═╡ b33e97f2-fce0-11ea-2b4d-ffd7ed7000f8 +model(x, (c, α)) = c .* exp.(α .* x) + +# ╔═╡ d52fc8fe-fce0-11ea-0a04-b146ee2dbe80 +begin + p0 = [0.5, 0.5] # initial guess for parameters + + x_data = exp_period + y_data = daily_cases[exp_period] + + fit = curve_fit(model, x_data, y_data, p0) +end; + +# ╔═╡ c50b5e42-fce1-11ea-1667-91c56ea80dcc +md"We are interested in the coefficients of the best-fitting model:" + +# ╔═╡ 3060bfa8-fce1-11ea-1047-db0dc06485a2 +parameters = coef(fit) + +# ╔═╡ 62bdc04a-fce1-11ea-1724-bfc4bc4789d1 +md"Now let's add this to the plot:" + +# ╔═╡ 6bc8cc20-fce1-11ea-2180-0fa69e86741f +begin + plot(replace(daily_cases, 0 => NaN), + yscale=:log10, + leg=false, m=:o, + xlims=(1, 100), alpha=0.5) + + line_range = 30:70 + plot!(line_range, model(line_range, parameters), lw=3, ls=:dash, alpha=0.7) + + xlabel!("day") + ylabel!("confirmed cases in US") + title!("US confirmed COVID-19 cases") +end + +# ╔═╡ 287f0fa8-fc44-11ea-2788-9f3ac4ee6d2b +md"## Geographical data" + +# ╔═╡ 3edd2a22-fc4a-11ea-07e5-55ca6d7639e8 +md"Our data set contains more information: the geographical locations (latitude and longitude) of each country (or, rather, of a particular point that was chosen as being representative of that country)." + +# ╔═╡ c5ad4d40-fc57-11ea-23cb-e55487bc6f7a +filter(x -> startswith(x.country, "A"), data) + +# ╔═╡ 57a9bb06-fc4a-11ea-2665-7f97026981dc +md"Let's extract and plot the geographical information. To reduce the visual noise a bit we will only use those " + +# ╔═╡ 80138b30-fc4a-11ea-0e15-b54cf6b402df +province = data.province + +# ╔═╡ 8709f208-fc4a-11ea-0203-e13eae5f0d93 +md"If the `province` is missing we should use the country name instead:" + +# ╔═╡ a29c8ad0-fc4a-11ea-14c7-71435769b73e +begin + indices = ismissing.(province) + province[indices] .= all_countries[indices] +end + +# ╔═╡ 4e4cca22-fc4c-11ea-12ae-2b51545799ec +begin + + scatter(data.longitude, data.latitude, leg=false, alpha=0.5, ms=2) + + for i in 1:length(province) + annotate!(data.longitude[i], data.latitude[i], text(province[i], :center, 5, color=RGBA{Float64}(0.0,0.0,0.0,0.3))) + end + + plot!(axis=false) +end + +# ╔═╡ 16981da0-fc4d-11ea-37a2-535aa014a298 +data.latitude + +# ╔═╡ a9c39dbe-fc4d-11ea-2e86-4992896e2abb +md"## Adding maps" + +# ╔═╡ b93b88b0-fc4d-11ea-0c45-8f64983f8b5c +md"We would also like to see the outlines of each country. For this we can use, for example, the data from [Natural Earth](https://www.naturalearthdata.com/downloads/110m-cultural-vectors/110m-admin-0-countries), which comes in the form of **shape files**, giving the outlines in terms of latitude and longitude coordinates. + +These may be read in using the `Shapefile.jl` package. + +The data is provided in a `.zip` file, so after downloading it we first need to decompress it. +" + +# ╔═╡ 7ec28cd0-fc87-11ea-2de5-1959ea5dc37c +begin + zipfile = download("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip") + + r = ZipFile.Reader(zipfile); + for f in r.files + println("Filename: $(f.name)") + open(f.name, "w") do io + write(io, read(f)) + end + end + close(r) +end + +# ╔═╡ ada44a56-fc56-11ea-2ab7-fb649be7e066 +shp_countries = Shapefile.shapes(Shapefile.Table("./ne_110m_admin_0_countries.shp")) + +# ╔═╡ d911edb6-fc87-11ea-2258-d34d61c02245 + + +# ╔═╡ b3e1ebf8-fc56-11ea-05b8-ed0b9e50503d +plot!(shp_countries, alpha=0.2) + +# ╔═╡ f8e754ee-fc73-11ea-0c7f-cdc760ab3e94 +md"Now we would like to combine the geographical and temporal (time) aspects. One way to do so is to animate time:" + +# ╔═╡ 39982810-fc76-11ea-01c3-3987cfc2fd3c +daily = max.(1, diff(Array(data[:, 5:end]), dims=2)); + +# ╔═╡ 0f329ece-fc74-11ea-1e02-bdbddf551ef3 +@bind day2 Slider(1:size(daily, 2), show_value=true) + +# @bind day Clock(0.5) + +# ╔═╡ b406eec8-fc77-11ea-1a98-d36d6d3e2393 +log10(maximum(daily[:, day])) + +# ╔═╡ 1f30a1ac-fc74-11ea-2abf-abf437006bab +dates[day2] + +# ╔═╡ 24934438-fc74-11ea-12e4-7f7e50f54029 +world_plot = begin + plot(shp_countries, alpha=0.2) + scatter!(data.longitude, data.latitude, leg=false, ms=2*log10.(daily[:, day2]), alpha=0.7) + xlabel!("latitude") + ylabel!("longitude") + title!("daily cases per country") +end + + +# ╔═╡ f7a37706-fcdf-11ea-048a-236b8ed0f1f3 +world_plot + +# ╔═╡ 251c06e4-fc77-11ea-1a0f-73139ba11e83 +md"However, we should always be wary about visualisations such as these. Perhaps we should be plotting cases per capita instead of absolute numbers of cases. Or should we divide by the area of the country? Some countries, such as China and Canada, are divided into states or regions in the original data set -- but others, such as the US, are not. You should always check exactly what is being plotted! + +Unfortunately, published visualisations often hide some of this information. This emphasises the need to be able to get our hands on the data, create our own visualisations and draw our own conclusions." + +# ╔═╡ Cell order: +# ╟─0f87cec6-fc31-11ea-23d2-395e61f38b6f +# ╟─19f4da16-fc31-11ea-0de9-1dbe668b862d +# ╟─d3398953-afee-4989-932c-995c3ffc0c40 +# ╟─efa281da-cef9-41bc-923e-625140ce5a07 +# ╠═7617d970-fce4-11ea-08ba-c7eba3e17f62 +# ╠═b0e7f1c6-fce3-11ea-10e5-9101d0f861a2 +# ╠═f7a37706-fcdf-11ea-048a-236b8ed0f1f3 +# ╟─e0493940-8aa7-4733-af72-cd6bc0e37d92 +# ╠═64d9bcea-7c85-421d-8f1e-17ea8ee694da +# ╠═c460b0c3-6d3b-439b-8cc7-1c58d6547f51 +# ╟─a7369222-fc20-11ea-314d-4d6b0f0f72eb +# ╠═db4c1f10-7c37-4513-887a-2467ce673458 +# ╟─1620aa9d-7dcd-4686-b7e4-a72cebe315ed +# ╠═38344160-fc27-11ea-220e-95aa00e4b083 +# ╟─ad43cea2-fc28-11ea-2bc3-a9d81e3766f4 +# ╟─fab64d86-fc28-11ea-0ae1-3ba1b9a14759 +# ╟─3519cf96-fc26-11ea-3386-d97c61ea1b85 +# ╠═a054e048-4fea-487c-9d06-463723c7151c +# ╠═e9ad97b6-fdef-4f48-bd32-634cfd2ce0e6 +# ╟─aaa7c012-fc1f-11ea-3c6c-89630affb1db +# ╟─b0eb3918-fc1f-11ea-238b-7f5d23e424bb +# ╠═68f76d3b-b398-459d-bf39-20bf300dcaa2 +# ╠═20e144f2-fcfb-11ea-010c-97e21eb0d231 +# ╠═2ec98a16-fcfb-11ea-21ad-15f2f5e68248 +# ╠═382cfc62-fcfb-11ea-26aa-2984d0449dcc +# ╟─34440afc-fc2e-11ea-0484-5b47af235bad +# ╠═79ba0433-2a31-475a-87c9-14103ebbff16 +# ╠═5c1ec9ae-fc2e-11ea-397d-937c7ab1edb2 +# ╟─a39589ee-20e3-4f22-bf81-167fd815f6f9 +# ╟─9484ea9e-fc2e-11ea-137c-6da8212da5bd +# ╟─bcc95a8a-fc2e-11ea-2ccd-3bece42a08e6 +# ╠═ada3ceb4-fc2e-11ea-2cbf-399430fa18b5 +# ╟─1633abe8-fc2f-11ea-2c7e-21b3348a3569 +# ╟─ed383524-e0c0-4da2-9a98-ca75aadd2c9e +# ╠═90810d7e-fcfb-11ea-396a-35543dcc1e06 +# ╠═977e1a2c-fcfb-11ea-08e9-cd656a631778 +# ╠═9ee79840-30ff-4c92-97f4-e178caceceaf +# ╠═99d5a138-fc30-11ea-2977-71732ca3aead +# ╟─450b4902-fc30-11ea-321d-29faf6188ff5 +# ╠═4f423a75-43da-486f-ac2a-7220032dac9f +# ╟─a8b2db96-fc30-11ea-2eea-b938a3a430fb +# ╟─c400ce4e-fc30-11ea-13b1-b54cf8f5630e +# ╠═7b2496b0-fc35-11ea-0e78-473e5e8eac44 +# ╟─8990f13a-fc35-11ea-338f-0955eeb23c3c +# ╟─a772eadc-fc35-11ea-3d38-4b121f88f1d7 +# ╠═16a79308-fc36-11ea-16e5-e1087d7ebbda +# ╠═a41db8ea-f0e0-461f-a298-bdcea42a67f3 +# ╠═f75e1992-fcfb-11ea-1123-b59bf888eac3 +# ╟─67eebb7e-fc36-11ea-03ef-bd6966487bb5 +# ╠═7b5db0f4-fc36-11ea-09a5-49def64f4c79 +# ╠═a26b8742-6a16-445a-ae77-25a4189c0f14 +# ╠═f099424c-0e22-42fb-894c-d8c2a65715fb +# ╟─7e7d14a2-fc37-11ea-3f1a-870ca98c4b75 +# ╟─75d2dc66-fc47-11ea-0e35-05f9cf38e901 +# ╟─b3880f40-fc36-11ea-074a-edc51adeb6f0 +# ╟─6de0800c-fc37-11ea-0d94-2b6f8f66964d +# ╠═bb6316b7-23fb-44a3-b64a-dfb71a7df011 +# ╠═0c098923-b016-4c65-9a37-6b7b56b13a0c +# ╟─546a40eb-7897-485d-a1b5-c4dfae0a4861 +# ╠═cbd9c1aa-fc37-11ea-29d9-e3361406796f +# ╠═9e23b0e2-ac13-4d19-a3f9-4a655a1e9f14 +# ╠═25c79620-14f4-45a7-b120-05ec72cb77e9 +# ╠═31dc4e46-4839-4f01-b383-1a1189aeb0e6 +# ╟─ee27bd98-fc37-11ea-163c-1365e194fc2e +# ╠═f5c29f0d-937f-4731-8f87-0405ebc966f5 +# ╠═36c37b4d-eb23-4deb-a593-e511eccd9204 +# ╟─511eb51e-fc38-11ea-0492-19532da809de +# ╟─d228e232-fc39-11ea-1569-a31b817118c4 +# ╠═dbaacbb6-fc3b-11ea-0a42-a9792e8a6c4c +# ╟─12900562-fc3a-11ea-25e1-f7c91a6940e5 +# ╠═19bdf146-fc3c-11ea-3c60-bf7823c43a1d +# ╠═be868a52-fc3b-11ea-0b60-7fea05ffe8e9 +# ╟─0b01120c-fc3d-11ea-1381-8bab939e6214 +# ╟─252eff18-fc3d-11ea-0c18-7b130ada882e +# ╟─9626d74a-fc3d-11ea-2ab3-978dc46c0f1f +# ╠═4358c348-91aa-4c76-a443-0a9cefce0e83 +# ╟─687409a2-fc43-11ea-03e0-d9a7a48165a8 +# ╠═4f23c8fc-fc43-11ea-0e73-e5f89d14155c +# ╟─07282688-fc3e-11ea-2f9e-5b0581061e65 +# ╠═91f99062-fc43-11ea-1b0e-afe8aa8a1c3d +# ╠═210cee94-fc3e-11ea-1a6e-7f88270354e1 +# ╟─2f254a9e-fc3e-11ea-2c02-75ed59f41903 +# ╟─84f5c776-fce0-11ea-2d52-39c51d4ab6b5 +# ╟─539c951c-fc48-11ea-2293-457b7717ea4d +# ╠═b33e97f2-fce0-11ea-2b4d-ffd7ed7000f8 +# ╠═d52fc8fe-fce0-11ea-0a04-b146ee2dbe80 +# ╟─c50b5e42-fce1-11ea-1667-91c56ea80dcc +# ╟─3060bfa8-fce1-11ea-1047-db0dc06485a2 +# ╟─62bdc04a-fce1-11ea-1724-bfc4bc4789d1 +# ╠═6bc8cc20-fce1-11ea-2180-0fa69e86741f +# ╟─287f0fa8-fc44-11ea-2788-9f3ac4ee6d2b +# ╟─3edd2a22-fc4a-11ea-07e5-55ca6d7639e8 +# ╠═c5ad4d40-fc57-11ea-23cb-e55487bc6f7a +# ╟─57a9bb06-fc4a-11ea-2665-7f97026981dc +# ╠═80138b30-fc4a-11ea-0e15-b54cf6b402df +# ╟─8709f208-fc4a-11ea-0203-e13eae5f0d93 +# ╠═a29c8ad0-fc4a-11ea-14c7-71435769b73e +# ╠═4e4cca22-fc4c-11ea-12ae-2b51545799ec +# ╠═16981da0-fc4d-11ea-37a2-535aa014a298 +# ╟─a9c39dbe-fc4d-11ea-2e86-4992896e2abb +# ╟─b93b88b0-fc4d-11ea-0c45-8f64983f8b5c +# ╠═7ec28cd0-fc87-11ea-2de5-1959ea5dc37c +# ╟─ada44a56-fc56-11ea-2ab7-fb649be7e066 +# ╠═d911edb6-fc87-11ea-2258-d34d61c02245 +# ╠═b3e1ebf8-fc56-11ea-05b8-ed0b9e50503d +# ╟─f8e754ee-fc73-11ea-0c7f-cdc760ab3e94 +# ╠═39982810-fc76-11ea-01c3-3987cfc2fd3c +# ╠═0f329ece-fc74-11ea-1e02-bdbddf551ef3 +# ╠═b406eec8-fc77-11ea-1a98-d36d6d3e2393 +# ╠═1f30a1ac-fc74-11ea-2abf-abf437006bab +# ╠═24934438-fc74-11ea-12e4-7f7e50f54029 +# ╟─251c06e4-fc77-11ea-1a0f-73139ba11e83 diff --git a/lecture_notebooks/week4/03-functions-are-objects.jl b/lecture_notebooks/week4/03-functions-are-objects.jl new file mode 100644 index 000000000..31bbd0137 --- /dev/null +++ b/lecture_notebooks/week4/03-functions-are-objects.jl @@ -0,0 +1,125 @@ +### A Pluto.jl notebook ### +# v0.11.14 + +using Markdown +using InteractiveUtils + +# ╔═╡ 32465c0e-fcd4-11ea-1544-df26081c7fa7 +md""" +# Functions are objects +""" + +# ╔═╡ 65c59b14-fcd7-11ea-2a19-3d084b3bca56 +square_root = sqrt + +# ╔═╡ 249cf7dc-fcdb-11ea-3630-ed2369d20041 +square_root(123) + +# ╔═╡ 6b91914a-fcd4-11ea-0d27-c99e7ef99354 +function double(x) + x * 2 +end + +# ╔═╡ b5614d1a-fcd4-11ea-19b9-45043b16b332 +function half(x) + x / 2 +end + +# ╔═╡ 0991e74c-fce3-11ea-0616-336e1d5d83e9 +things = [double, half] + +# ╔═╡ 10fe7950-fce3-11ea-1ace-e1676961935e +rand(things)(123) + +# ╔═╡ 2424dd62-fce3-11ea-14a6-81792a7dee89 +function applyboth(f, g, x) + f(g(x)) +end + +# ╔═╡ 2fb8117e-fce3-11ea-2492-55e4768f6e37 +applyboth(double, half, 10) + +# ╔═╡ a34557ac-fce3-11ea-1391-3d0cddd4201b +md""" +# _map_ and _filter_ +""" + +# ╔═╡ 70aa854a-fce5-11ea-3477-6df2b0ca1d22 +struct Dog + name + age + photo +end + +# ╔═╡ cbfc5ede-fce3-11ea-2044-15b8a07ef5f2 +data = [ + Dog("Floep", 13, md"![](https://i.imgur.com/4PHFyIE.jpg)"), + Dog("Hannes", 5, md"![](https://i.imgur.com/nD5c6yF.jpg)"), + Dog("Fred", 8, md"![](https://i.imgur.com/aYTy1QN.jpg)"), + Dog("Lily", 3, md"![](https://i.imgur.com/2monPgX.jpg)"), + Dog("Robert", 15, md"![](https://i.imgur.com/jEwm3Q0.jpg)"), + Dog("Kit", 2, md"![](https://i.imgur.com/KKtlIEe.jpg)"), + Dog("Spot", 10, md"![](https://i.imgur.com/hNadBtk.jpg)"), + ] + +# ╔═╡ 74f63e2c-fce9-11ea-2145-dd96e9cda96c +md"👉 Show the **photos** of all dogs older than **7 years**." + +# ╔═╡ 9b688c4a-fceb-11ea-10b1-590b77c7bfe3 +function isold(dog) + dog.age > 7 +end + +# ╔═╡ ef6ebf86-fcea-11ea-1118-4f4b4960692b +filter(isold, data) + +# ╔═╡ b7608b28-fceb-11ea-3742-a7828971d170 +filter(dog -> dog.age > 7, data) + +# ╔═╡ c53212da-fceb-11ea-0eeb-617a18323021 +special_dogs = filter(data) do dog + dog.age > 7 +end + +# ╔═╡ ea0ca73c-fceb-11ea-348a-5df7974b4aba +map(special_dogs) do dog + dog.photo +end + +# ╔═╡ 0758eff0-fcd4-11ea-3186-e1f76a06b91c +bigbreak = html""" +
+""" + +# ╔═╡ 175cb644-fcd5-11ea-22f2-3f96d6d2e637 +bigbreak + +# ╔═╡ d5c31a1a-fcd8-11ea-0841-1f4a056c048e +bigbreak + +# ╔═╡ c8fc9460-fce3-11ea-0f2a-1b87abdd12b7 +bigbreak + +# ╔═╡ Cell order: +# ╟─32465c0e-fcd4-11ea-1544-df26081c7fa7 +# ╠═65c59b14-fcd7-11ea-2a19-3d084b3bca56 +# ╠═249cf7dc-fcdb-11ea-3630-ed2369d20041 +# ╟─175cb644-fcd5-11ea-22f2-3f96d6d2e637 +# ╠═6b91914a-fcd4-11ea-0d27-c99e7ef99354 +# ╠═b5614d1a-fcd4-11ea-19b9-45043b16b332 +# ╠═0991e74c-fce3-11ea-0616-336e1d5d83e9 +# ╠═10fe7950-fce3-11ea-1ace-e1676961935e +# ╠═2424dd62-fce3-11ea-14a6-81792a7dee89 +# ╠═2fb8117e-fce3-11ea-2492-55e4768f6e37 +# ╟─d5c31a1a-fcd8-11ea-0841-1f4a056c048e +# ╟─a34557ac-fce3-11ea-1391-3d0cddd4201b +# ╠═70aa854a-fce5-11ea-3477-6df2b0ca1d22 +# ╟─cbfc5ede-fce3-11ea-2044-15b8a07ef5f2 +# ╟─74f63e2c-fce9-11ea-2145-dd96e9cda96c +# ╠═9b688c4a-fceb-11ea-10b1-590b77c7bfe3 +# ╠═ef6ebf86-fcea-11ea-1118-4f4b4960692b +# ╠═b7608b28-fceb-11ea-3742-a7828971d170 +# ╠═c53212da-fceb-11ea-0eeb-617a18323021 +# ╠═ea0ca73c-fceb-11ea-348a-5df7974b4aba +# ╟─c8fc9460-fce3-11ea-0f2a-1b87abdd12b7 +# ╟─0758eff0-fcd4-11ea-3186-e1f76a06b91c diff --git a/lecture_notebooks/week4/04-probability.jl b/lecture_notebooks/week4/04-probability.jl new file mode 100644 index 000000000..cb7dccc96 --- /dev/null +++ b/lecture_notebooks/week4/04-probability.jl @@ -0,0 +1,476 @@ +### A Pluto.jl notebook ### +# v0.11.14 + +using Markdown +using InteractiveUtils + +# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error). +macro bind(def, element) + quote + local el = $(esc(element)) + global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing + el + end +end + +# ╔═╡ f6d07dee-fdb9-11ea-004a-f1283db60877 +begin + using Pkg + Pkg.add.(["Plots", "StatsBase", "PlutoUI", "DataStructures", "GR"]) + + using DataStructures + using Plots + using StatsBase + using PlutoUI +end + +# ╔═╡ c3dc6d9c-fdb6-11ea-3b74-e1ecfa6c6f49 +md"# Probability via computation" + +# ╔═╡ f07d314c-fdb6-11ea-0e6e-173e625133cf +md""" +We would like to model how an epidemic spreads in a population. There are many approaches to do so. + +One approach would be to think about individual people, or **agents**. They move around in an environment and interact with other agents when they are close by. If I spend time near a person who is infectious, I might catch the infection. It's too hard to model the details of the physical process by which virus particles get transmitted from one person to another, so we will model that as saying that I have a certain **probability** (chance) $p_I$ to get infected whenever I meet somebody infectious. +""" + +# ╔═╡ 8fbf80ac-fe0c-11ea-1848-9fae8515757f +md"![](https://i.imgur.com/OzlPEjn.gif)" + +# ╔═╡ 89cbbbe8-fe0c-11ea-1cf8-e33f6bd7cee2 +md""" +Once I'm infected, a complicated process takes place in my immune system which is also difficult to model. So we could say that each day I have a certain **probability** $p_R$ to recover. + +In order to implement an **agent-based model** (or "individual-based model") like this, we first need to understand what it means for something to happen with probability $p$, and how to implement that on a computer. + +We will take a practical and pragmatic approach to probability *via computation*, and leave discussions of the philosophical underpinnings of probability to other venues! +""" + +# ╔═╡ e906450c-fdb6-11ea-13c3-d1348fdae587 +md"## Random sampling" + +# ╔═╡ cd32d0c6-fdb7-11ea-3d3f-d9981951293f +md"We have already generated random objects using the `rand` function. Let's look at that function in a bit more detail. In this notebook we won't discuss how randomness is generated on a computer; we will just assume that these methods are already available. + +[Note that computers cannot generate true randomness -- they simulate it using complicated deterministic processes. There are [ways](https://www.random.org/) to obtain true randomness from physical processes, such as atmospheric or electronic noisde.]" + +# ╔═╡ ebccc984-fdb9-11ea-2f94-997184ccc66d +md"Calling `rand` on a collection like an array or a tuple returns one of the elements in that collection with equal (**uniform**) probability, i.e. each element of the collection has the same chance of being returned. + +For example, we can choose a random friend to call: +" + +# ╔═╡ c55235d4-fdbc-11ea-0942-45ac804aa51d +friends = ["Alicia", "Jaime", "Elena", "Roberto"] + +# ╔═╡ cf2a7ec2-fdbc-11ea-1010-190de6ff85d1 +rand(friends) + +# ╔═╡ 1ef8cf70-fe13-11ea-257b-cbc9abd8ab3b +rand( (1, 2) ) + +# ╔═╡ f317779a-fdbc-11ea-1afa-2dc25c9a16f8 +md"Choosing a random element like this is called (uniform) **sampling**. + +The `rand` function has many methods that sample from different types of objects." + +# ╔═╡ e31461dc-fdbc-11ea-1398-89b7ba513f5c +md"## Rolling a die" + +# ╔═╡ b6aec1dc-fdbc-11ea-2345-831cff1d5e7c + md"""Let's roll an (unbiased) die. ["Die" is the singular of "dice".]""" + +# ╔═╡ 2fea1cb6-fdba-11ea-2744-f5dd5fe0fcd7 +begin + num_sides = 6 + sides = 1:num_sides +end + +# ╔═╡ 74430946-fe95-11ea-31d6-31bde3a357ba +sides + +# ╔═╡ 75e017d0-fe95-11ea-37b1-357616582a9e +typeof(sides) + +# ╔═╡ 5a5d74d4-fdba-11ea-16a3-ed2c24f47821 +rand(sides) + +# ╔═╡ 61f7ab10-fdba-11ea-271c-d17d8d4ebc8e +md" +We can repeat this sampling $N$ times by adding a second argument to `rand`: +" + +# ╔═╡ a36f553e-fdba-11ea-2a6a-b3ba05d11acd +md"What does it mean for the sampling to be uniform? It means that we expect each element to occur with the same **frequency** (number of occurrences). + +We can compute the frequencies using the `countmap` function from the `StatsBase.jl` package. But you should think about how we could write this function ourselves!" + +# ╔═╡ a03b164e-fdba-11ea-0c7f-a59d54b2f813 +sample = rand(sides, 10) + +# ╔═╡ 10b83e00-fdbe-11ea-260d-65cf5e080534 +frequencies = StatsBase.countmap(sample) + +# ╔═╡ a6da8b1a-fe89-11ea-364c-958bddfa4612 +bar(frequencies, alpha=0.5, + size=(400, 300), leg=false, + xlabel="outcome", ylabel="number of rolls", xlim=(0, num_sides+1)) + +# ╔═╡ 2d0be59e-fdbf-11ea-0b86-a50db0aaccaa +md"We expect the **relative frequency** or **proportion** to be near $1/6$:" + +# ╔═╡ db102cb6-fdbf-11ea-0408-61334cb89f6d +begin + max_rolls = 10000 + rolls2 = rand(1:num_sides, max_rolls) +end + +# ╔═╡ b596f406-fdbf-11ea-0ed7-ab15d2da9fd2 +@bind num_rolls2 Slider(1:max_rolls, show_value=true) + +# ╔═╡ 46ae7d02-fdbf-11ea-1f19-4f94f9bbf851 +begin + freqs2 = SortedDict(StatsBase.countmap(rolls2[1:num_rolls2])) + + ks = collect(keys(freqs2)) + vs = collect(values(freqs2)) ./ num_rolls2 + + bar(ks, vs, leg=false, alpha=0.5, xlims=(0, num_sides+1), + size=(400,300), + xlabel="value", ylabel="relative frequency") + + hline!([1 / num_sides], ls=:dash, lw=3, c=:red) + + ylims!(0, 0.3) +end + +# ╔═╡ f97c2aa6-fdbf-11ea-11bf-2b44cd0a90bc +md"Note that we **pre-generated** the data, to avoid generating new random samples each time." + +# ╔═╡ 24674186-fdc5-11ea-2a67-3bd71199ae6e +md"## Random variables" + +# ╔═╡ 2b954fca-fdc5-11ea-149f-e5f7f6d49407 +md"Let's call $X$ the outcome of flipping one coin. Each time we run the experiment, $X$ will take a different value. This makes $X$ an example of a **random variable**. + +[Giving a proper mathematical description of this is rather tricky; see an advanced probability course.] + +We write + +$$\mathbb{P}(X=1) = \textstyle \frac{1}{6}$$ + +to say that the probability that $X$ takes the value 1 is $\frac{1}{6}$. +" + +# ╔═╡ 112c47a4-fe0f-11ea-04a4-adc7e339a352 +md"## Uniform random numbers" + +# ╔═╡ 69c2c1e4-fdc0-11ea-0e76-335b6e06057d +md"From the above we can see how to do something with probability $1/6$: + +> roll a 6-sided die, and return `true` if the outcome is 1 (for example). + +Let's extend this to events which occur with a given probability $p$ with $0 \le p \le 1$: How could we sample an event with probability $p$?" + +# ╔═╡ cbe5bbc6-fe0e-11ea-17c7-095c0d485bdd +md"""Calling the `rand()` function with *no* arguments returns a random `Float64` between $0$ and $1$. This approximates uniform sampling of a *real* number between $0$ and $1$.""" + +# ╔═╡ 6d7f1ca4-fe8a-11ea-3b20-b3df98a4201b +rand(Float64) + +# ╔═╡ d5743c58-fe0e-11ea-0391-c7591b944bdd +rand() + +# ╔═╡ bf084b00-fdc1-11ea-04ca-1757996125fd +md""" +Let's see what this looks like. We'll sample uniform random variates in $[0, 1]$ and plot them as a function of "time" on the $x$ axis (i.e. the number of the trial in which they appeared). We also plot the outcomes along the $y$ axis so that we can see how the interval $[0, 1]$ gets covered: +""" + +# ╔═╡ 019bd65c-fdc3-11ea-01ba-51d497f875d2 +max_samples = 500 + +# ╔═╡ 2dd45d84-fdc3-11ea-1b61-6bc6467a5013 +sample2 = rand(max_samples) + +# ╔═╡ 08af602e-fdc3-11ea-0e47-af1047c7bd13 +@bind num_samples Slider(1:max_samples, show_value=true) + +# ╔═╡ da913e08-fdc2-11ea-14c1-6bb4fdaf9353 +begin + r = sample2[1:num_samples] + scatter(r, 1:length(r), alpha=0.5, leg=false, + size=(400, 300)) + scatter!(r, zeros(length(r)), alpha=0.5) + + ylims!(-10, max_samples) + xlims!(0, 1) + + xlabel!("random variate") + ylabel!("position in sequence (time)") +end + +# ╔═╡ e5994394-fe0e-11ea-1350-51442f863799 +md"How uniform is the sampling? Let's plot a **histogram**. This splits the interval $[0, 1]$ up into boxes and counts the number (frequency) of data that fall into each box:" + +# ╔═╡ 3620150e-fe0f-11ea-0eff-91a85ccf3864 +@bind bins Slider(10:1000, show_value=true) + +# ╔═╡ 479e0232-fe0f-11ea-308e-2928fe213807 +@bind random_samples Slider(1:max_samples, show_value=true) + +# ╔═╡ 022adcee-fde1-11ea-1ae5-8bcf6b129404 +histogram(sample2[1:random_samples], bins=bins, alpha=0.5, leg=false, + size=(400, 300), xlims=(0, 1)) + +# ╔═╡ 5d4102dc-fdc0-11ea-049f-7bc11f5b728d +md"## Sampling events with a given probability: Bernoulli trials" + +# ╔═╡ 86731f7a-fdc3-11ea-2955-6b938f3346a5 +md""" +How can we use *uniform* sampling to do *non-uniform* sampling? For example, to simulate an event that occurs with probability 0.25? This means that when we sample many points, the proportion that fall into $X$ should be approximately 0.25. + +Since the sampling is uniform, we can take $X = [0, 0.25]$. The definition of "uniform" is that the probability is proportional to the length of the interval. + +The recipe for sampling an event with probability $p$ is then as follows: + +> 1. Generate a uniform random variate $r \in [0, 1]$, using `rand()`. +> 2. If $r < p$ then return `true`, else return `false`. +""" + +# ╔═╡ ba9939f0-fe0f-11ea-0328-6780c29cc01c +md"To get a feeling for why this is a good definition, let's go back to our picture of uniform sampling and add in the cutoff at height $p$:" + +# ╔═╡ ca62d4a4-fe0f-11ea-3623-73a11752dc8c +@bind num_samples2 Slider(1:max_samples, show_value=true) + +# ╔═╡ 8fa5e992-fde1-11ea-0c65-3d83d42729ea +@bind pp Slider(0:0.01:1, show_value=true) + +# ╔═╡ 39b9d2a0-fde1-11ea-2189-97a95849152f +begin + r2 = sample2[1:num_samples2] + + scatter(r2, 1:length(r2), alpha=0.5, leg=false, + size=(400, 300)) + scatter!(r2, zeros(length(r2)), alpha=0.5) + + ylims!(-10, max_samples) + xlims!(0, 1) + + vline!([pp], ls=:dash, lw=3, c=:green) + annotate!(170, pp+0.06, text("p", color=:green)) + + which = findall(x -> x .< pp, r2) + scatter!(r2[which], which, m=:square, alpha=0.5) + scatter!(r2[which], zeros(length(r2[which])), m=:square, alpha=0.5) + +end + +# ╔═╡ f80a700a-fe15-11ea-2c48-9f499842e45d +md"We expect that out of $N$ trials, about $pN$ will be below $p$. So the proportion of the total number is $(pN) / N \simeq p$. +" + +# ╔═╡ 1fa92ed0-fdc4-11ea-0f6c-9178b21b259c +md"The above procedure is called a [**Bernoulli trial**](https://en.wikipedia.org/wiki/Bernoulli_trial) with probability $p$. +We can think of it as flipping a coin that is **biased** to fall on heads a higher proportion of the time (e.g. by making the coin out of two different metals, one of which is more dense than the other). + +Julia allows us to code this in the following very compact way:" + +# ╔═╡ 4bf82e84-fdc4-11ea-2abf-19174c74f726 +bernoulli(p) = rand() < p + +# ╔═╡ bc549f38-fe15-11ea-10dd-db165347709d +3 < 4 + +# ╔═╡ 9ab21d04-fe97-11ea-0f88-11a86a4974c6 +function bernoulli2(p) + if rand() < p + return true + else + return false + end +end + + +# ╔═╡ 4f9ebeea-fdc4-11ea-052d-3b9e7dfbc02b +bernoulli(0.25) + +# ╔═╡ 5640f132-fdc4-11ea-372e-799a17716334 +md"We are using the short-form version of a function definition, together with the fact that a comparison like `x < y` returns a Boolean value." + +# ╔═╡ db63e988-fde1-11ea-152e-8b194c107e78 +md"Note that the value returned, $B$, is a random variable! It has + +$$\mathbb{P}(B = 1) = p$$ +$$\mathbb{P}(B = 0) = 1 - p$$ +" + +# ╔═╡ 11e7a0b2-fde2-11ea-0c37-6be01573167b +md"This collection of the probabilities of all the possible outcomes is the **probability distribution** of the Bernoulli random variable." + +# ╔═╡ c9b287a2-fdc4-11ea-04c6-6b0d1a6c5321 +md"## Flipping many biased coins" + +# ╔═╡ ca7b9150-fdc5-11ea-2281-b3b026825ea5 +md" +Let's suppose we flip $n$ coins, each with the same probability $p$. We will suppose that the coins are **independent**, i.e. that flipping one of them has no influence on the outcome of the others. (This would not be the case if the coins were metallic and attracted each other, for example.) + +There are many quantities of interest, or **observables**, that we might want to measure. One obvious one is the number of coins that come out as heads: +" + +# ╔═╡ 60949cce-fde2-11ea-1946-31d1b1bce37a +[bernoulli(0.3) for i = 1:10] # array comprehension + +# ╔═╡ 727b0e56-fdd7-11ea-28d1-5b859a5f8a29 +flips(n, p) = count( bernoulli(p) for i = 1:n ) + +# ╔═╡ dabc00d6-fe97-11ea-0180-cd138faaab5d +( bernoulli(0.3) for i = 1:10 ) + +# ╔═╡ d450742c-fdd7-11ea-2477-b37441b6ddfa +flips(20, 0.3) + +# ╔═╡ dca0b5a6-fdd7-11ea-15e4-0320a1ceebff +md"The function `count` counts the number of `true`s in its argument. The argument itself looks like an array comprehension, but with no square brackets; this is called a **generator expression**. Since `count` does not need to **materialize** the array, i.e. create it in memory, it can be faster." + +# ╔═╡ 2c2479a0-fdd8-11ea-19a2-db21219dd7c3 +md"""The outcome of flipping the coin is another random variable, $H_{n}$. Clearly its possible values are the integers between $0$ and $n$. But intuitively we expect that it's very *unlikely* to have either exactly $0$ or exactly $n$ heads. This is another example of a *non-uniform* random variable. + +What again need to calculate the **probability distribution** of the random variable $H_{n,p}$, i.e. the probability that it takes on each of its possible values. + +We can calculate this numerically by thinking of "flipping $n$ coins" as a single experiment, and running that experiment many times: +""" + +# ╔═╡ aa63b25e-fdd8-11ea-2b30-ed13d2072488 +run_experiment(n, p, num_times) = [flips(n, p) for i in 1:num_times] + +# ╔═╡ 48acfd94-fde3-11ea-3ae8-0d8ba164a68c +num_experiments = 1000 + +# ╔═╡ 521c85c0-fde3-11ea-1040-09a6e13a4328 +num_coins = 100 + +# ╔═╡ c55013e6-fdd8-11ea-3784-cd27c2e3d0d8 +run_experiment(num_coins, 0.3, num_experiments) + +# ╔═╡ caf791ca-fdd8-11ea-2234-bdc14d214cac +md"As expected, we get back a dataset containing integers between 0 and 20, but actually 1 and 20 occur few times. + +How can we calculate the probability distribution? We can use `countmap` again to obtain the frequencies:" + +# ╔═╡ 15231f76-fdd9-11ea-361e-0f85e1c3ace2 +begin + max_times = 10^6 + data = run_experiment(num_coins, 0.3, max_times) +end + +# ╔═╡ 29edd4e6-fdd9-11ea-016b-9142be69bf68 +@bind num_times Slider(1:max_times, show_value=true) + +# ╔═╡ 3398d3a6-fdd9-11ea-0845-f54dab63b2dc +begin + freqs3 = SortedDict(countmap(data[1:num_times])) + bar(collect(keys(freqs3)), collect(values(freqs3)) ./ num_times, + alpha=0.5, + size=(400, 300), leg=false) + + xlims!(0, num_coins) + ylims!(0, 0.2) + + xlabel!("number of heads") + ylabel!("proportion") +end + +# ╔═╡ adf1c626-fdd9-11ea-3535-2d2219feda56 +md"As we take more samples, the **empirical distribution** (calculated from the data) converges to the true underlying **population distribution** (the theoretical distribution for this random variable)." + +# ╔═╡ 92efff82-fde3-11ea-010d-a188a852b79b +md"If we take a large number of coins then we see the well-known bell-shaped distribution, called a **Gaussian distribution** or **normal distribution**. + +If we call the $i$th coin flip result $B_i$, which takes the value 0 or 1, then we have + +$$H_{n} = \sum_{i=1}^n B_i.$$ + +We are summing up independent random variables which take the value 0 or 1 (the number of heads in each coin flip), and the Central Limit Theorem says that the sum of independent random variables usually converges to a normal distribution. + +Note that we have not said what we mean by **converges** here. Making that precise turns out to be quite difficult. Intuitively it means that the function describing the probability distribution converges at each point to a limiting curve, but unfortunately this intuition is not always correct. +" + +# ╔═╡ Cell order: +# ╠═f6d07dee-fdb9-11ea-004a-f1283db60877 +# ╟─c3dc6d9c-fdb6-11ea-3b74-e1ecfa6c6f49 +# ╟─f07d314c-fdb6-11ea-0e6e-173e625133cf +# ╟─8fbf80ac-fe0c-11ea-1848-9fae8515757f +# ╟─89cbbbe8-fe0c-11ea-1cf8-e33f6bd7cee2 +# ╟─e906450c-fdb6-11ea-13c3-d1348fdae587 +# ╟─cd32d0c6-fdb7-11ea-3d3f-d9981951293f +# ╟─ebccc984-fdb9-11ea-2f94-997184ccc66d +# ╠═c55235d4-fdbc-11ea-0942-45ac804aa51d +# ╠═cf2a7ec2-fdbc-11ea-1010-190de6ff85d1 +# ╠═1ef8cf70-fe13-11ea-257b-cbc9abd8ab3b +# ╟─f317779a-fdbc-11ea-1afa-2dc25c9a16f8 +# ╟─e31461dc-fdbc-11ea-1398-89b7ba513f5c +# ╟─b6aec1dc-fdbc-11ea-2345-831cff1d5e7c +# ╠═2fea1cb6-fdba-11ea-2744-f5dd5fe0fcd7 +# ╠═74430946-fe95-11ea-31d6-31bde3a357ba +# ╠═75e017d0-fe95-11ea-37b1-357616582a9e +# ╠═5a5d74d4-fdba-11ea-16a3-ed2c24f47821 +# ╟─61f7ab10-fdba-11ea-271c-d17d8d4ebc8e +# ╟─a36f553e-fdba-11ea-2a6a-b3ba05d11acd +# ╠═a03b164e-fdba-11ea-0c7f-a59d54b2f813 +# ╠═10b83e00-fdbe-11ea-260d-65cf5e080534 +# ╟─a6da8b1a-fe89-11ea-364c-958bddfa4612 +# ╟─2d0be59e-fdbf-11ea-0b86-a50db0aaccaa +# ╠═db102cb6-fdbf-11ea-0408-61334cb89f6d +# ╠═b596f406-fdbf-11ea-0ed7-ab15d2da9fd2 +# ╟─46ae7d02-fdbf-11ea-1f19-4f94f9bbf851 +# ╟─f97c2aa6-fdbf-11ea-11bf-2b44cd0a90bc +# ╟─24674186-fdc5-11ea-2a67-3bd71199ae6e +# ╟─2b954fca-fdc5-11ea-149f-e5f7f6d49407 +# ╟─112c47a4-fe0f-11ea-04a4-adc7e339a352 +# ╟─69c2c1e4-fdc0-11ea-0e76-335b6e06057d +# ╟─cbe5bbc6-fe0e-11ea-17c7-095c0d485bdd +# ╠═6d7f1ca4-fe8a-11ea-3b20-b3df98a4201b +# ╠═d5743c58-fe0e-11ea-0391-c7591b944bdd +# ╟─bf084b00-fdc1-11ea-04ca-1757996125fd +# ╠═019bd65c-fdc3-11ea-01ba-51d497f875d2 +# ╠═2dd45d84-fdc3-11ea-1b61-6bc6467a5013 +# ╟─08af602e-fdc3-11ea-0e47-af1047c7bd13 +# ╟─da913e08-fdc2-11ea-14c1-6bb4fdaf9353 +# ╟─e5994394-fe0e-11ea-1350-51442f863799 +# ╠═3620150e-fe0f-11ea-0eff-91a85ccf3864 +# ╠═479e0232-fe0f-11ea-308e-2928fe213807 +# ╠═022adcee-fde1-11ea-1ae5-8bcf6b129404 +# ╟─5d4102dc-fdc0-11ea-049f-7bc11f5b728d +# ╟─86731f7a-fdc3-11ea-2955-6b938f3346a5 +# ╟─ba9939f0-fe0f-11ea-0328-6780c29cc01c +# ╠═ca62d4a4-fe0f-11ea-3623-73a11752dc8c +# ╠═8fa5e992-fde1-11ea-0c65-3d83d42729ea +# ╟─39b9d2a0-fde1-11ea-2189-97a95849152f +# ╟─f80a700a-fe15-11ea-2c48-9f499842e45d +# ╟─1fa92ed0-fdc4-11ea-0f6c-9178b21b259c +# ╠═4bf82e84-fdc4-11ea-2abf-19174c74f726 +# ╠═bc549f38-fe15-11ea-10dd-db165347709d +# ╠═9ab21d04-fe97-11ea-0f88-11a86a4974c6 +# ╠═4f9ebeea-fdc4-11ea-052d-3b9e7dfbc02b +# ╟─5640f132-fdc4-11ea-372e-799a17716334 +# ╟─db63e988-fde1-11ea-152e-8b194c107e78 +# ╟─11e7a0b2-fde2-11ea-0c37-6be01573167b +# ╟─c9b287a2-fdc4-11ea-04c6-6b0d1a6c5321 +# ╟─ca7b9150-fdc5-11ea-2281-b3b026825ea5 +# ╠═60949cce-fde2-11ea-1946-31d1b1bce37a +# ╠═727b0e56-fdd7-11ea-28d1-5b859a5f8a29 +# ╠═dabc00d6-fe97-11ea-0180-cd138faaab5d +# ╠═d450742c-fdd7-11ea-2477-b37441b6ddfa +# ╟─dca0b5a6-fdd7-11ea-15e4-0320a1ceebff +# ╟─2c2479a0-fdd8-11ea-19a2-db21219dd7c3 +# ╠═aa63b25e-fdd8-11ea-2b30-ed13d2072488 +# ╠═48acfd94-fde3-11ea-3ae8-0d8ba164a68c +# ╠═521c85c0-fde3-11ea-1040-09a6e13a4328 +# ╠═c55013e6-fdd8-11ea-3784-cd27c2e3d0d8 +# ╟─caf791ca-fdd8-11ea-2234-bdc14d214cac +# ╠═15231f76-fdd9-11ea-361e-0f85e1c3ace2 +# ╠═29edd4e6-fdd9-11ea-016b-9142be69bf68 +# ╟─3398d3a6-fdd9-11ea-0845-f54dab63b2dc +# ╟─adf1c626-fdd9-11ea-3535-2d2219feda56 +# ╟─92efff82-fde3-11ea-010d-a188a852b79b diff --git a/website-docs.md b/website-docs.md index bb0bddebf..df5be6870 100644 --- a/website-docs.md +++ b/website-docs.md @@ -1,19 +1,19 @@ # Documentation on course website -This document is for course staff and anyone who would like to edit the course website that is available at [http://mitmath.github.io/18S191/](http://mitmath.github.io/18S191/). +This document is for course staff and anyone who would like to edit the course website that is available at [computationalthinking.mit.edut/Fall20](computationalthinking.mit.edut/Fall20). The code for the website is in the `website/` directory in this repo. It is processed by [Franklin.jl](https://franklinjl.org/) to generate the website whenever any file in this repository is changed. - cd to the `website/` and launch Julia. Then run the following to install PkgPage, a Franklin.jl helper package that we require. + cd to the `website/` and launch Julia. Then run the following to install Franklin ```julia using Pkg -Pkg.add("PkgPage") +Pkg.add("Franklin") ``` Then run this in the Julia REPL to bring up the website server. ```julia -using PkgPage +using Franklin serve() ``` @@ -43,4 +43,3 @@ Once you have added this entry, you can embed the video into a markdown file wit ``` {{youtube my-video}} ``` - diff --git a/website/.gitignore b/website/.gitignore index fe9c8bf85..33cfcafc9 100644 --- a/website/.gitignore +++ b/website/.gitignore @@ -1 +1,3 @@ __site + +Manifest.toml \ No newline at end of file diff --git a/website/HOW TO run the course page locally.md b/website/HOW TO run the course page locally.md new file mode 100644 index 000000000..310713cd6 --- /dev/null +++ b/website/HOW TO run the course page locally.md @@ -0,0 +1,21 @@ +# 18S191 course website + +The website was written in Franklin.jl - a static site generator with Julia as templating language. + +Whenever you push to this folder of the repository, the website will automatically rebuild and update. To test your changes locally before pushing, follow these steps: + +1. In a terminal, navigate to this folder using `cd`. +2. Run Julia with this folder as the active project, and `instantiate`: +``` +$ julia --project +julia> ] +(website) pkg> instantiate +``` +3. Import & run our buddy Franklin: +``` +julia> using Franklin +# ignore some warnings +julia> serve() +``` + +Franklin will now launch a _live dev server_: whenever you change a file, your browser tab will automatically refresh. \ No newline at end of file diff --git a/website/Project.toml b/website/Project.toml index eaac7b910..90516f20d 100644 --- a/website/Project.toml +++ b/website/Project.toml @@ -3,4 +3,3 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Franklin = "713c75ef-9fc9-4b05-94a9-213340da978e" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" -PkgPage = "e7214860-93a8-4f22-b43d-bd447d1a2094" diff --git a/website/_css/custom.css b/website/_css/custom.css index 8b281f223..c9de690ad 100644 --- a/website/_css/custom.css +++ b/website/_css/custom.css @@ -17,6 +17,11 @@ html, body { } +.sidebar { + z-index: 2; + overflow-y: auto; + overflow-x: hidden; +} .content { padding-top: 0; @@ -88,11 +93,11 @@ pre.code-output { } } .sidebar .container.sidebar-sticky { - top: 2rem; + top: 1rem; } .sidebar-nav .sidebar-nav-item.active { box-sizing: border-box; - border-bottom: 1px #f1f1f1 solid; + background: #f0f8ff40; margin-right: -1em; margin-left: -0.5em; padding-left: 0.5em; @@ -104,9 +109,42 @@ pre.code-output { margin: 0.25em 0; } +.sidebar-nav .week { + font-style: italic; + display: flex; + align-items: center; + color: #c08282; +} +.sidebar-nav .week::before, +.sidebar-nav .week::after { + content: " "; + flex: 1; + border-bottom: 1px dashed #c08282; +} +.sidebar-nav .week::before { + margin-right: .25em; +} +.sidebar-nav .week::after { + margin-left: .25em; +} + h1 { margin-top: 2em; } h2 { margin-top: 1em; } + + +.page-foot img.github-logo { + width: 1em; + height: 1em; + margin: 0; + padding: 0; + display: inline-block; + margin-bottom: -2px; +} + +.page-foot { + margin-bottom: 0; +} \ No newline at end of file diff --git a/website/_layout/foot.html b/website/_layout/foot.html index 3f2d7b9e3..0f9d6fea3 100644 --- a/website/_layout/foot.html +++ b/website/_layout/foot.html @@ -22,5 +22,14 @@ {{ if hascode }} {{ insert foot_highlight.html }} {{ end }} + + + + diff --git a/website/_layout/head.html b/website/_layout/head.html index c56e33b7a..5196442db 100644 --- a/website/_layout/head.html +++ b/website/_layout/head.html @@ -10,4 +10,11 @@ {{insert sidebar.html}} + +{{isdef airtime}} + +{{end}} diff --git a/website/_layout/page_foot.html b/website/_layout/page_foot.html index 7bb58e586..d8e2f9a64 100644 --- a/website/_layout/page_foot.html +++ b/website/_layout/page_foot.html @@ -1,5 +1,6 @@
diff --git a/website/_layout/sidebar.html b/website/_layout/sidebar.html index 0989d34b0..469ce6035 100644 --- a/website/_layout/sidebar.html +++ b/website/_layout/sidebar.html @@ -1,11 +1,12 @@