diff --git a/.gitignore b/.gitignore index 5ed9f82..5115b98 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ .obsidian .obsidian/* -./todos +todos/ out.md out.markdown @@ -8,4 +8,11 @@ out.markdown *.slide.md SLIDES.md -obsidian.css \ No newline at end of file +obsidian.css + +.ipynb_checkpoints/ + +.DS_Store + +*.class +target/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..985d81b --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "kafka-training"] + path = kafka-training + url = https://github.com/riccardotommasini/kafka-training/ diff --git a/.ipynb_checkpoints/MongoDB_HW-checkpoint.ipynb b/.ipynb_checkpoints/MongoDB_HW-checkpoint.ipynb deleted file mode 100644 index 49a85c4..0000000 --- a/.ipynb_checkpoints/MongoDB_HW-checkpoint.ipynb +++ /dev/null @@ -1,592 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Week 4: Document-Based Stores (MongoDB)\n", - "### Student ID: [#####]\n", - "### Subtasks Done: [#,#,..]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Task 1: Create a simple MongoDB out of this relational model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is a toy DB about movies and actors who played roles in these movies. This DB is consisted of \n", - "\n", - "- A \"Person\" table who has a unique id, and a name fields.\n", - "\n", - "- Another \"Movie\" table that has a unique id, a title, a country where it was made, and a year when it was released.\n", - "\n", - "- There is (m-n) or \"many-many\" relationship between these two tables (i.e basically, many actors can act in many movies, and the movie include many actors)\n", - "- Therefore, we use the \"Roles\" table in which we can deduct which person has acted in which movie, and what role(s) they played.\n", - "\n", - "\"3\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Connect to the MongoDB server, and create a mongoDB with the name 'moviedb'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "##YOUR CODE HERER" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create Person/Actor collection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "##YOUR CODE HERER" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Insert the data into the Person Table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "personList = [\n", - " { \"id\": 1, \"name\": \"Charlie Sheen\" },\n", - " { \"id\": 2, \"name\": \"Michael Douglas\"},\n", - " { \"id\": 3, \"name\": \"Martin Sheen\"},\n", - " { \"id\": 4, \"name\": \"Morgan Freeman\"}\n", - "]\n", - "\n", - "...###YOUR CODE HERE\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Creating rest of Collections (\"Movies\", \"Roles\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "restcols = [\"Movies\",\"Roles\"]\n", - "\n", - "for col in restcols:\n", - " ...###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Inserting data into the movie Collection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "moviescoll = ...###YOUR CODE HERE\n", - "\n", - "movieList = [\n", - " { \"id\": 1, \"title\": \"Wall Street\", \"country\":\"USA\",\"year\":1987},\n", - " { \"id\": 2, \"title\": \"The American President\", \"country\":\"USA\",\"year\":1995},\n", - " { \"id\": 3, \"title\": \"The Shawshank Redemption\", \"country\":\"USA\",\"year\":1994},\n", - "]\n", - "\n", - "...###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Inserting data into the roles Collection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rolesCol = ...\n", - "\n", - "roleList = [\n", - " { \"personId\": 1, \"movieId\": 1, \"role\":[\"Bud Fox\"]},\n", - " { \"personId\": 2, \"movieId\": 1, \"role\":[\"Carl Fox\"]},\n", - " { \"personId\": 3, \"movieId\": 1, \"role\":[\"Gordon Gekko\"]},\n", - " { \"personId\": 2, \"movieId\": 2, \"role\":[\"A.J. MacInerney\"]},\n", - " { \"personId\": 3, \"movieId\": 2, \"role\":[\"President Andrew Shepherd\"]},\n", - " { \"personId\": 4, \"movieId\": 3, \"role\":[\"Ellis Boyd 'Red' Redding\"]}\n", - "]\n", - "\n", - "...###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Just for your info:\n", - "\n", - "#### Another Way of Modeling this M-N model in Mongo would be using the Forien Keys \n", - "\n", - "\n", - "* Movies\n", - "\n", - "\n", - "```[\n", - "\n", - "{\n", - "\t\"_id\": 1,\n", - "\t\"title\":\"Wall Street\",\n", - "\t\"country\":\"USA\",\n", - "\t\"year\":1987,\n", - "\t\"persons\":[1,2]\n", - "},\n", - "\n", - "{\n", - "\t\"_id\": 2,\n", - "\t\"title\":\"The American President\",\n", - "\t\"country\":\"USA\",\n", - "\t\"year\":1995,\n", - "\t\"persons\":[2]\n", - "}]\n", - "```\n", - "* Actors\n", - "\n", - "```\n", - "[{\n", - " \"_id\": 1,\n", - " \"name\": \"Charlie Sheen\",\n", - " \"movies\":[\n", - " {\"role\": \"Bud Fox\", \"movie_id\":1}\n", - " ]\n", - "},\n", - "\n", - "{\n", - " \"_id\": 2,\n", - " \"name\": \"Micheal Douglas\",\n", - " \"movies\":[\n", - " {\"role\": \"Gordon Geko\", \"movie_id\":1},\n", - " {\"role\": \"President Andrew Shepherd\", \"movie_id\":2}\n", - " ]\n", - "}\n", - "\n", - "] ```\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get all actors in your Mongo DB" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get actors with names start with 'C' letter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get all Movies sorted from recent to old! (get only the title and year fields)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get all Movies released in the 90s (after year (1990) and before 2000) ordered from old to recent." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get Movies and Actors from your \"movies\" DB\n", - "* Hint : use the '$lookup' operator.\n", - "* The Result should be something like the following:\n", - "\n", - "Charlie Sheen : Wall Street\n", - "Michael Douglas : Wall Street\n", - "Martin Sheen : Wall Street\n", - "Michael Douglas : The American President\n", - "Martin Sheen : The American President\n", - "Morgan Freeman : The Shawshank Redemption\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### For each Actor, get count of \"Movies\" he acted in." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### In your DB, list the movies that every Actor played" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get the Persons/Actors who acted in \"Wall Street\" movie\n", - "- Hint use `$lookup` , `$match` operators in the aggregation piepeline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get the Movies in which \"Micheal Douglas\" has played a role in" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get count of \"Movies\" in your DB" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### update the year of the 'Wall Street' movie was released in to be 2000(which is not true BTW :)\n", - "- Show that movie before and After updating it" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Delete all the persons with names start with 'M' letter." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Task 2: Extend your Mongo-\"MovieDB\" \n", - "\n", - "Imagine now that we are going to extend our DB with new movies, actors, even with new directors.\n", - "\n", - "- We add **\"The matrix\"** movie which was released in USA, (1999), and has a new property/field \"Tagline\" (\"Welcome to the Real World\").\n", - " \n", - "- We will also add 4 new actors (Person):\n", - " - **\"Keanu Reeves\"** who was born in (1964). Note: \"born\" property is also new.\n", - " - **\"Carrie-Anne Moss\"** who was born in (1967).\n", - " - **\"Laurence Fishburne\"** who was born in (1960).\n", - " - **\"Hugo Weaving\"** who was born in (1960).\n", - " \n", - "- Moreover, we add 2 directors (Person) :\n", - " - **\"Lilly Wachowski\"**, born in (1967)\n", - " - **\"Lana Wachowski\"**, born in(1965)\n", - "- For these directors specify one more label/field as (\"Director\"). (You can add this while inserting the director documents)\n", - " \n", - " \n", - "- We will also create a new collection \"Directed\" that is directed from the later 2 directors to \"the Matrix\" movie." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Add the Movie \"The Matrix\" with the provided data to the Movies collection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Insert the new 4 actors to the person collection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Notice, How is easy to add a new feild compared to the RDB\n", - "newActorList = [\n", - " { \"id\": 5, \"name\": \"Keanu Reeves\", \"born\":1964 },\n", - " { \"id\": 6, \"name\": \"Carrie-Anne Moss\", \"born\":1967},\n", - " { \"id\": 7, \"name\": \"Laurence Fishburne\", \"born\":1960},\n", - " { \"id\": 8, \"name\": \"Hugo Weaving\", \"born\":1960}\n", - "]\n", - "\n", - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Insert the new 2 directors to the person collection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create the \"Directed\" collection, and insert the data into it " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get only the directors from the person collection (i.e. persons marked with the label \"Director\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Perform a query that get persons (names, and born year) who Directed \"The Matrix\" movie." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###YOUR CODE HERE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " ## How long did it take you to solve the homework?\n", - " \n", - "Please answer as precisely as you can. It does not affect your points or grade in any way. It is okey, if it took 0.5 hours or 24 hours. The collected information will be used to improve future homeworks." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Answer:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**
THANK YOU FOR YOUR EFFORT!
**" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Apache Flink.md b/Apache Flink.md deleted file mode 100644 index d0833e9..0000000 --- a/Apache Flink.md +++ /dev/null @@ -1,26 +0,0 @@ -### Systems Overview: [[Apache Flink]] - -![inline](./attachments/Images/Flink.png) - -Apache Flink is a distributed platform for streaming data (DataStream -API) and batch data (DataSet API). The dataflow engine, the core of the -platform, guarantees the fault tolerance during the distributed -computations. Apache Flink is based on the parallelization contract or -PACT, i.e., a programming model that generalizes MapReduce. - -- Flink programs are translated into Direct-Acyclic Graphs that - describe the operations. - -- Such DAGs are further translated into low-level job graphs - -- Job graphs are generic streaming programs - -References: - -Carbone, Paris, et al. "Apache flink: Stream and batch processing in a -single engine." Bulletin of the IEEE Computer Society Technical -Committee on Data Engineering 36.4 (2015). - -Carbone, Paris, et al. "State management in Apache Flink®: consistent -stateful distributed stream processing." Proceedings of the VLDB -Endowment 10.12 (2017): 1718-1729. \ No newline at end of file diff --git a/Apache Kafka.md b/Apache Kafka.md deleted file mode 100644 index 1ed4014..0000000 --- a/Apache Kafka.md +++ /dev/null @@ -1,331 +0,0 @@ -# Apache Kafka[^1] - -![inline](./attachments/img0022.png) - -An overview - -### Motivation - -Data pipelines start with a small number of systems to integrates. A single ETL (extract, transform, load) process move data from the source to the interested applications. - -![inline](./attachments/WhyKafka.png) - - -### Motivation - -But data pipeline grow over time. Adding new system causes the need of new ETL process. The code-base grows together with data formats and services. - -![inline](./attachments/WhyKafka2.png) - -### Motivation - -Things end up messy when sources and sinks are coupled! - -![inline](./attachments/WhyKafka4.png) - -### An alternative: Publish/Subscribe - -PubSubs decouple data sources and their consumers making communication asynchronous and processing scalable. - -![inline](./attachments/pubsub1.png) - -### An alternative: Publish/Subscribe - -PubSubs organize messages logically so that it is easier for the interested consumers to access. - -![inline](./attachments/pubsub2.png) - -### Apache Kafka - -Apache Kafka is an horizontally scalable, fault-tolerant, publish-subscribe system. It can process over 1 trillion messages without neglecting durability, i.e., it persists data on disk. - -![right fit](./attachments/kafka.png) - -### Kafka Conceptual View - -- **Messages**, the basic unit in Kafka, are organized in **Topics** - -- **Producers** write messages topics - -- **Consumers** read messages by from topics - -![right fit](./attachments/kafkalogic.png) - -### Kafka Conceptual View: Example - -![image](./attachments/kafkacvexample.png) - ---- - -![inline](./attachments/kafkacvexample.png) - - -### Kafka Logical View - -- **Messages** are key-value pairs - -- **Brokers** are the main component inside the Kafka Cluster. - -- **Producers** write messages to a certain broker - -- **Consumers** read messages by from a certain broker - -![left fit](./attachments/kafkaconcept.png) - -### Kafka Physical View - -- **Topics** are partitioned across brokers using the message **Key**. - -- Typically, **Producers** has the message key to determine the partition. Also they serialize the message - -- **Consumers** read messages by from brokers and de-serialize them - -![right fit](./attachments/kafkaphysic.png) - -### Kafka Physical View: Zoom In - -![image](./attachments/kafkaphysic.png) - - ---- - - -![fit](./attachments/kafkaphysic.png) - -### Topics Partitions - -Producers shard data over a set of Partitions - -- Each Partition contains a subset of the Topic's messages - -- Typically, the message key is used to determine which Partition a message is assigned to - -- Each Partition is an ordered, immutable log of messages - -### Topics Partitions and Distributed Consumption - -- Different Consumers can read data from the same Topic - - - By default, each Consumer will receive all the messages in the Topic - -- Multiple Consumers can be combined into a Consumer Group - - - Consumer Groups provide scaling capabilities - - - Each Consumer is assigned a subset of Partitions for consumption - -![right fit](./attachments/consumergroup.pdf) - -### Apache Kafka[^2] - -![inline](./attachments/img0022.png) - -Internals - -### Messages and Metadata - -Messages are Key-Value pairs and there is not restriction on what each of them can be. - -Additionally, messages are enriched with metadata: - -- Offset - -- Timestamp - -- Compression type - -- Magic byte - -- Optional message headers API - -- Application teams can add custom key-value paired metadata to messages - -- Additional fields to support batching, exactly once semantics, replication protocol - - -![right fit](./attachments/commitlog.pdf) - -### Topics Partitions: Physical View - -Each Partition is stored on the Broker's disk as one or more log files Each message in the log is identified by its offset number - -![inline](./attachments/commitlog.pdf) - -### Topics Partitions: Physical View - -Messages are always appended. Consumers can consume from different offset. Brokers are single thread to guarantee consistency - -![inline](./attachments/commitlog2.pdf) - -### Topics Partitions: Load Balancing - -Producers use a partition strategy to assign each message a partition - -- To ensure load balancing across the Brokers - -- To allow user-specified key - -You can customize the partition strategy, but! - -- it must ensure load balancing across the Brokers too, i.e., hash(key) % number\_of\_partitions - -- if key is not specified, messages are sent to Partitions on a round-robin basis - -### Important: About Ordering - -If there are multiple Partitions, you will not get total ordering across all messages when reading data - -![right fit](./attachments/order.png) - -### Log Retention - -- Duration default: messages will be retained for seven days - -- Duration is configurable per Broker by setting - - - a time period - - - a size limit - -- Topic can override a Broker's retention policy - -- When cleaning up a log - - - the default policy is delete - - - An alternate policy is compact - -### Log Compaction - -A compacted log retains at least the last known message value for each key within the Partition Before After - -![inline](./attachments/compaction.pdf) - -### Fault Tolerance via a Replicated Log - -- Kafka maintains replicas of each partition on other Brokers in the cluster - - - Number of replicas is configurable - -- One Broker is the leader for that Partition - - - All writes and reads go to and from the leader - - - Other Brokers are followers - -- Replication provides fault tolerance in case a Broker goes down - -![right fit](./attachments/replicas.pdf) - -### Important: Clients do not Access Followers - -It is important to understand that Producers and Consumers only write/read to/from the leader - -- Replicas only exist to provide reliability in case of\ - Broker failure - -- If a leader fails,\ - the Kafka cluster will\ - elect a new leader\ - from among the followers - -![right fit](./attachments/replicas2.png) - -In the diagram, m1 hashes to Partition 0 and m2 hashes to Partition 1 - -### Delivery Semantics - -- At least once - - - Messages are never lost but may be redelivered - -- At most once - - - Messages are lost but never redelivered - -- Exactly once - - - Messages are delivered once and only once - -### Zookeeper - -- ZooKeeper is a centralized service that stores configurations for distributed applications - -- Kafka Brokers use ZooKeeper for a number of important internal features - - - Cluster management - - - Failure detection and recovery - - - Access Control List (ACL) storage - -### Quiz - -Provide the correct relationship - 1:1, 1:N, N:1, or N:N - - -- Broker to Partition - ? - -- Key to Partition - ? - -- Producer to Topic - ? - -- Consumer Group to Topic - ? - -- Consumer (in a Consumer Group) to Partition - ? - -### Quiz - -Provide the correct relationship - 1:1, 1:N, N:1, or N:N - - -- Broker to Partition - N:N - -- Key to Partition - N:1 - -- Producer to Topic - N:N - -- Consumer Group to Topic - N:N - -- Consumer (in a Consumer Group) to Partition - 1:N - -### Getting Exactly Once Semantics - -- Must consider two components - - - Durability guarantees when publishing a message - - - Durability guarantees when consuming a message - -- Producer - - - What happens when a produce request was sent but a network error returned before an ack? - - - Use a single writer per partition and check the latest committed value after network errors - -- Consumer - - - Include a unique ID (e.g. UUID) and de-duplicate. - - - Consider storing offsets with data - - -### Systems Overview: [[Apache Kafka]] - -![inline](./attachments/Images/Kafka.png) - -- Apache Kafka is a scalable replicated commit log that enables stream - processing at scale. - -- It can handle huge numbers of concurrent reads and writes - -- It comes with connector to a number of Big Data Framework, e.g., - `Storm`, `Samza`, `Flink`, `Spark`. - -- It persists messages on disk and replicated within the cluster. - -References: - -Kreps, Jay, Neha Narkhede, and Jun Rao. "Kafka: A distributed messaging -system for log processing." Proceedings of the NetDB. Vol. 11. 2011. -Wang, Guozhang, et al. "Building a replicated logging system with -Apache Kafka." Proceedings of the VLDB Endowment 8.12 (2015): -1654-1655. \ No newline at end of file diff --git a/Beyond Interactive - Notebook Innovation at Netflix.md b/Beyond Interactive - Notebook Innovation at Netflix.md deleted file mode 100644 index 811463d..0000000 --- a/Beyond Interactive - Notebook Innovation at Netflix.md +++ /dev/null @@ -1,28 +0,0 @@ - -[source](https://netflixtechblog.com/notebook-innovation-591ee3221233) - - -## Data Exploration - -occurs once, early in a project; - -It may include viewing sample data, running queries for statistical profiling and exploratory analysis, and visualizing data - -## data preparation -iterative task; - -it may include cleaning, standardizing, transforming, denormalizing, and aggregating data; - -typically the most time-intensive task of a project - -## data validation -recurring task; - -it may include viewing sample data, running queries for statistical profiling and aggregate analysis, and visualizing data; -typically occurs as part of data exploration, data preparation, development, pre-deployment, and post-deployment phases - -## productionalization - -it occurs late in a project; - -it may include deploying code to production, backfilling datasets, training models, validating data, and scheduling workflows \ No newline at end of file diff --git a/Bibliography.md b/Bibliography.md deleted file mode 100644 index a0b602b..0000000 --- a/Bibliography.md +++ /dev/null @@ -1,24 +0,0 @@ -# Bibliography - -### Data Engineer - -#### 2020 -- [What is Data Engineering: Explaining the Data Pipeline, Data Warehouse, and Data Engineer Role](https://www.altexsoft.com/blog/datascience/what-is-data-engineering-explaining-data-pipeline-data-warehouse-and-data-engineer-role/) - - -#### 2019 -- [The Role of the Data Engineer is Changingo](https://www.kdnuggets.com/2019/01/role-data-engineer-changing.html) - -#### 2017 - -- [The AI Hierarchy of Needs](https://hackernoon.com/the-ai-hierarchy-of-needs-18f111fcc007) -- [The Rise of the Data Engineer](https://medium.com/free-code-camp/the-rise-of-the-data-engineer-91be18f1e603) - -### Data Lineage - -- [What is Data Lineage](https://www.talend.com/resources/what-is-data-lineage-and-how-to-get-started/) -- [Building and Scaling Data Lineage at Netflix to Improve Data Infrastructure Reliability, and Efficiency](https://netflixtechblog.com/building-and-scaling-data-lineage-at-netflix-to-improve-data-infrastructure-reliability-and-1a52526a7977) - -### Culture - -- [Freedom & Responsibility (F&R) at Netflix](https://jobs.netflix.com/culture) \ No newline at end of file diff --git a/Bloom Filter.md b/Bloom Filter.md deleted file mode 100644 index 735aade..0000000 --- a/Bloom Filter.md +++ /dev/null @@ -1,6 +0,0 @@ -- Compact way of representing a set of items -- Checking for existence in set is cheap -- Some probability of false positives:an item not in set maycheck true as being in set -- Never false negatives - -![right fit](./attachments/bloomfilter.png) \ No newline at end of file diff --git a/CAP Theorem Mum Example.md b/CAP Theorem Mum Example.md deleted file mode 100644 index 1789520..0000000 --- a/CAP Theorem Mum Example.md +++ /dev/null @@ -1,16 +0,0 @@ -Explaining CAP Theorem To a 6 years old - -![[attachments/captheorem6yo.png]] - - -## Concistency - -Asking mum or data about buying a new toy will always get the same answer - -## Partition Tolerance - -If mum or data are not at home, I can still have lunch/dinner - -## Availability - -Whenever I ask for help, mum or dad will answer me \ No newline at end of file diff --git a/Caching.md b/Caching.md deleted file mode 100644 index 93ded5b..0000000 --- a/Caching.md +++ /dev/null @@ -1,69 +0,0 @@ -### What is caching? - -**From Wikipedia: "A cache is a collection of data duplicating original values stored elsewhere or computed earlier, where the original data is expensive to fetch (owing to longer access time) or to compute, compared to the cost of reading the cache."** - -**Term introducted by IBM in the 60’s** - -### The anatomy - -* **simple key/value storage** -* **simple operations** - * **save** - * **get** - * **delete** - -### Terminology - -**storage cost** - -**retrieval cost** (network load / algorithm load) - -**invalidation** (keeping data up to date / removing irrelevant data) - -**replacement policy** (FIFO/LFU/LRU/MRU/RANDOM vs. Belady’s algorithm) - -**cold cache / warm cache** - -* **cache hit and cache miss** -* **typical stats:** - * **hit ratio (hits / hits + misses)** - * **miss ratio (1 - hit ratio)** - * **45 cache hits and 10 cache misses** -* **•** **45/(45+10) = 82% hit ratio** - * **18% miss ratio** - -### When to cache? - -* **caches are only efficient when the benefits of faster access outweigh the overhead of checking and keeping your cache up to date** - * **more cache hits then cache misses** - -### Where are caches used? - -**at hardware level** (cpu, hdd) - -**operating systems** (ram) - -**web stack** - -**applications** - -**your own short term vs long term memory** - -### Caches in the web stack - -* **Browser cache** -* **DNS cache** -* **Content Delivery Networks (CDN)** -* **Proxy servers** -* **Application level** - * **full output caching** (eg. Wordpress WP-Cache - -### Efficiency of caching? - -**the earlier in the process, the closer to the original request(er), the faster** - -**browser cache will be faster than cache on a proxy** - -**but probably also the harder to** **get it right** - -**the closer to the requester the more parameters the cache depends on** diff --git a/Cassandra.md b/Cassandra.md deleted file mode 100644 index 2299df4..0000000 --- a/Cassandra.md +++ /dev/null @@ -1,551 +0,0 @@ - -# Cassandra - -![inline](https://upload.wikimedia.org/wikipedia/commons/thumb/5/5e/Cassandra_logo.svg/1280px-Cassandra_logo.svg.png) - -## History of Cassandra - -Originally designed at Facebook - -Open-sourced and now within Apache foundation - -## What Cassandra is - -- A Wide [[Column Oriented Database]] -- *tuneably* consistent (**~~C~~**) -- very fast in writes -- highly avaeng -- ailable (**A**) -- fault tolerant (**P**) -- linearly scalable, elastic scalability -- Cassandra is very good at writes, okay with reads. - -![right fit](https://www.datastax.com/sites/default/files/content/blog/blog-fix-0514.png) - -## What Cassandra is not - -- Cassandra is not a replacement for Relational Databases -- Tables should **not** have multiple access paths -- Cassandra does not support aggregates, if you need to do a lot of them, think another database. -- Updates and deletes are implemented as special cases of writes and that has consequences that are not immediately obvious. - -## Comparison with RDBMS - -![inline](./attachments/cassandra-26.png) - ---- - -![inline](./attachments/cassandra-27.png) - -## Use Cases - -The use-case leading to the initial design and development of Cassandra was the so entitled Inbox Search problem at Facebook. - -- ![inline 30%](https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcSu9NTPsdf3vDCAsMuYTH_7jsd69WrDXlCVmw&usqp=CAU)[Purchases, test scores](https://tech.ebayinc.com/engineering/cassandra-data-modeling-best-practices-part-1/) -- Storing time series data (as long as you do your own aggregates). - - Storing health tracker data. - - Weather service history. - - ![inline 30%](https://miro.medium.com/fit/c/96/96/1*BJWRqfSMf9Da9vsXG9EBRQ.jpeg) [User Activity](https://netflixtechblog.com/scaling-time-series-data-storage-part-i-ec2b6d44ba39) -- Internet of things status and event history. -- ![inline 30%](https://farm1.staticflickr.com/781/20772148484_a1932971e7_o.jpg)[IOT for cars and trucks](http://highscalability.com/blog/2016/9/28/how-uber-manages-a-million-writes-per-second-using-mesos-and.html) -- Email envelopes—not the contents. - - -### When to consider Cassandra - -[.column] -- you need really fast writes -- you need durability -- you have lots of data (> GBs) and (>=) three servers -- your app is evolving - - startup mode, fluid data structure -- loose domain data - - “points of interest” - -[.column] -- your programmers can handle - - complexity - - consistency model - - change - - visibility tools -- your operations can deal - - hardware considerations - - data transport - - JMX monitoring - -### Advantages - -A general-purpose framework for high concurrency & load conditioning - -Decomposes applications into stages separated by queues - -Adopt a structured approach to event-driven concurrency - -## Data Model - -**RDBMSs**: domain-based model - -> what answers do I have? - -

- -**Cassandra**: query-based model - -> what questions do I have? - -^ Start from queries, then design the data model - ---- - -Cassandra does **not** support a full relational data model. - -Instead, it provides clients with a simple data model that supports **dynamic control** over data layout and formats. - -An instance of Cassandra typically consists of a one distributed multidimensional map indexed by key which contains one or more **column families** that, in turn, **rows** - -![right fit](./attachments/cassandra-25.png) - ---- -- **Rows** are identified by a string-key -- **Column Families** correponds to tables in RDBMS but may be unstructured. A column family consists of - - **(Simple) Columns Families** have a name and store a number of values per row which are identified by a timestamp - - **Super Columns Families** have a name and an arbitrary number of columns associated with them - -![inline](https://www.researchgate.net/publication/274174394/figure/fig6/AS:668443506384898@1536380759059/cassandra-data-model.ppm) - -### Keyspace - - -- Key space is typically one per application -- Keys are similar to those of databases -- Some settings are configurable only per keyspace -- Each Row must have a key - -![right fit](./attachments/cassandra-32.png) - -### Columns Families - -A Column consists of three parts - -[.column] -- name - - byte\[\] - - determines sort order - - used in queries - - indexed - -[.column] -- value - - byte\[\] - - you don’t query on column values - -[.column] -- timestamp - - long (clock) - - last write wins conflict resolution - -### Super Column Families - -- Super columns group columns under a common name -- sub-column names in a Super Column Family are **not** indexed - - top level columns (Super Column Family Name) are **always** indexed -- often used for **denormalizing** data from standard Column Families - -![right fit](./attachments/supercolumn.png) - ---- -#### Example - - ![inline](./attachments/cassandra-37.png) - ---- -#### Example (Json Notation) - - -```json - - PointOfInterest { //Supercolumn Family - key:85255 { - Phoenixzoo { phone: 480-555-5555, //column - desc: They have animals here //column }, - Spring Training { - phone: 623-333-3333, //column - desc: Fun for baseball fans. //column - } - } //end phoenix, - - key: 10019 { - Central Park //super column - { desc: Walk around. It's pretty. // missing phone column } , - Empire State Building { phone: 212-777-7777, - desc: Great view from 102nd floor. } - } //end nyc -} - ``` - - - -# Architecture - -Cassandra is required to be incrementally scalable. - -Therefore machines can join and leave a cluster (or they may crash). - -Data have to be **partitioned** and **distributed** among the nodes of a cluster in a fashion that allows *repartitioning* and *redistribution*. - -## Partitioning - -- Data of a Cassandra table get partitioned and distributed among the nodes by a consistent **order-preserving** hashing function. -- The order preservation property of the hash function is important to support **range scans** over the data of a table. -- Cassandra performs a **deterministic** load balancing - - it measures and analyzes the load information of servers and moves nodes on the consistent hash ring to get the data and processing load balanced. - -## Replication - -^ Cassandra is configured such that each row is replicated across multiple data centers. In essence, the preference list of a key is constructed such that the storage nodes are spread across multiple data centers - -- Data get replicated to a number of nodes which can be defined as a **replication factor** per Cassandra instance. -- Replication is managed by a **coordinator node** for the particular **key** being modified. -- The coordinator node for any key is the **first node on the consistent hash ring** that is visited when walking from the key’s position on the ring in **clockwise** direction. - -### ~~Replication Strategies~~ (Used to be) - - - **Rack Unaware**: the non-coordinator replicas are chosen by picking N-1 successors of the coordinator on the ring - - **Rack Aware** and **Datacenter Aware** rely on Zookeeper for leader election. - - the elected leader is in charge of maintaining the invariant that no node is responsible for more than N-1 ranges in the ring - -### Replica placement strategies Today [^81] - -[^81]: [docs](https://cassandra.apache.org/doc/latest/architecture/dynamo.html) - ---- -#### Simple Strategy -- Allows a single integer *replication_factor* to be defined -- Single datacenter -- Clockwise placement to the next node(s) -- all nodes are treaded equally, ignoring any configured data centers or racks. - -![right fit](./attachments/cassandra-30.png) - ---- -##### Network Topology Strategy -- Multiple datacenters -- Allows a single integer *replication_factor* to be defined per data center -- Attempts to choose replicas within a data center from different racks as specified by the [Snitch](https://cassandra.apache.org/doc/latest/operating/snitch.html#)[^82] -- Supports local (reads) queries - -![right fit](./attachments/cassandra-31.png) - -[^82]: Snitch teaches Cassandra about your network topology to route requests efficiently. - -^ NetworkTopologyStrategy should be preferred over SimpleStrategy to make it easier to add new physical or virtual datacenters to the cluster later. - -### Partitioner Smack-Down - -[.column] - -#### Random Preserving - -- system will use MD5(key) to distribute data across nodes -- even distribution of keys from one Column Family across ranges/nodes - -[.column] - -#### Order Preserving - -- key distribution determined by token -- lexicographical ordering -- required for range queries -- can specify the token for this node to use - -### Persistence - - -Cassandra provides **durability guarantees** in the presence of node failures and network partitions by relaxing the quorum requirements - -The Cassandra system relies on the **local file system** for data persistence. - -The data is **represented** on disk using a format that lends itself to **efficient** data **retrieval**. - -Typical write operation involves a write into a **commit** log for durability -and recoverability and an update into an in-memory data structure. - -![right fit](https://2.bp.blogspot.com/-MLpiirj6BW8/UzFD57JiPLI/AAAAAAAALBM/CmFOmzYNTyM/s1600/persist.png) - -# Operations - -![original fit](https://upload.wikimedia.org/wikipedia/commons/thumb/1/12/Basic_arithmetic_operators.svg/1200px-Basic_arithmetic_operators.svg.png) - -## Writes - -- Need to be lock-free and fast (no reads or disk seeks) -- AClient sends write to one front-end node in Cassandra cluster (Coordinator) -- Coordinator sends it to all replica nodes responsible for that key -- A write is atomic at the partition-level, meaning inserting columns in a row is treated as one write operation. - -### Hinted Handoff - -If any replica is down, the coordinator writes to all other replicas, and keeps the write until down replica comes back up. - -When all replicas are down, the Coordinator (front end) buffers writes (for up to an hour). - -### Writing Flow - -1. Cassandra logs it in disk commit log (disk) -2. Adds *values* to appropriate *memtables* [^83] -3. When memtable is full or old, flush to disk using a Sorted String Table - -![right fit](https://static.packt-cdn.com/products/9781789131499/graphics/d8cba1d6-07f7-404e-a4e3-d73233474f3e.png) - -[source](https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789131499/2/ch02lvl1sec20/cassandra-s-write-path) - -[^83]: In-memory representation of multiple key-value pairs - -### Consistency levels for a write operations[^87] - - ANY: any node (may not be replica) - - ONE: at least one replica - - QUORUM: quorum across all replicas in all datacenters - - LOCAL-QUORUM: in coordinator’s datacenter - - EACH-QUORUM: quorum in every datacenter - - ALL: all replicas all datacenters - -[^87]: [detailed discussion](https://medium.com/@foundev/cassandra-how-many-nodes-are-talked-to-with-quorum-also-should-i-use-it-98074e75d7d5) - -### Write Consistency - -| Level | Description | Usage | -|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| ALL | A write must be written to the commit log and memtable on all replica nodes in the cluster for that partition. | Provides the highest consistency and the lowest availability of any other level. | -| QUORUM | A write must be written to the commit log and memtable on a quorum of replica nodes across all datacenters. | Used in either single or multiple datacenter clusters to maintain strong consistency across the cluster. Use if you can tolerate some level of failure. | -| ONE/TWO/THREE | A write must be written to the commit log and memtable of at least one/two/three replica node. | Satisfies the needs of most users because consistency requirements are not stringent. | -| ANY | A write must be written to at least one node. If all replica nodes for the given partition key are down, the write can still succeed after a hinted handoff has been written. If all replica nodes are down at write time, an ANY write is not readable until the replica nodes for that partition have recovered. | Provides low latency and a guarantee that a write never fails. Delivers the lowest consistency and highest availability. | -|...|...|...| - -## Reads - -- Coordinator can contact closest replica (e.g., in same rack) -- Coordinator also fetches from multiple replicas - - check consistency in the background, - - Makes read slower than writes (but still fast) - - initiating a **read-repair** if any two values are different using gossip - -### Reading Flow - -1. Check row cache, if enabled -2. Checks partition key cache, if enabled -3. Check the memtable -4. Fetches the data from the SSTable on disk -5. If Row cache is enabled the data is added to the row cache - -![right fit](https://static.packt-cdn.com/products/9781789131499/graphics/04b08f1b-a6ba-4711-b3f2-5b7532176bcc.jpeg) - -[source](https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789131499/2/ch02lvl1sec20/cassandra-s-write-path) - -### Read Consistency: Read count - -| Level | Description | Usage | -|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| ALL | Returns the record after all replicas have responded. The read operation will fail if a replica does not respond. | Provides the highest consistency of all levels and the lowest availability of all levels. | -| QUORUM | Returns the record after a quorum of replicas from all datacenters has responded. | Used in either single or multiple datacenter clusters to maintain strong consistency across the cluster. Ensures strong consistency if you can tolerate some level of failure. | -| ONE/TWO/Threee | Returns a response from the closest (1/2/3) replica, as determined by the snitch. By default, a read repair runs in the background to make the other replicas consistent. | Provides the highest availability of all the levels if you can tolerate a comparatively high probability of stale data being read. The replicas contacted for reads may not always have the most recent write. | | - -We are discussing the number of replicas that are contacted when a data object is accessed through a read operation - -### Read-Repair[^84] - -Read-repair is a **lazy** mechanism in Cassandra that ensures that the data you request from the database is accurate and consistent. - -For every read request, the coordinator node requests to all the node having the data requested by the client. All nodes return the data which client requested for. - -The most recent data is sent to the client and asynchronously, the coordinator identifies any replicas that return obsolete data and issues a read-repair request to each of these replicas to update their data based on the latest data. - -[^84]: [source](https://blog.knoldus.com/the-curious-case-of-cassandra-reads) - -### Consistency Level: Quorum - -[.column] -- N **is replication factor**: the number of copies of each data item -- R **is read replica count**: the number of replicas that are contacted when a data object is accessed through a read operation -- W **is write replica count**: the number of replicas that need to acknowledge the receipt of the update before the update completes - -[.column] -- Quorum Q = N/2 + 1 -- If W+R > N and W > N/2, you have *strong* consistency -- Allowed: - -|W |R | Comment -|---|---|---------| -|1 |N | In a write-intensive application, setting W=1 and R=RF can affect durability, as failures can result in conflicting writes| -|N |1 |In read-intensive applications, setting W=RF and R=1 can affect the probability of the write succeeding.| -|Q|Q| balanced mix of reads and writes| - -### Consistency Level: Explained - -[.column] -#### Reads - - Wait for R replicas (R specified by clients) - - In background check for consistency of remaining N-R replicas - -[.column] -#### Writes - - **Block** until quorum is reached - - **Async**: Write to any node - -## Deletes - -- Delete: don’t delete item right away - - add a tombstone to the log - - Compaction will remove tombstone and delete item - -# Digression Time - -![original](https://media1.tenor.com/images/b190022cc53957ac269ac226a922d745/tenor.gif?itemid=16077304) - -## The Data Structure That Power Your Database[^85] - -[^85]: Chapter 3 - Designing Data Intensive Applications - -### [[Log]] - -- A log is an append-only sequence of records. It doesn’t have to be human-readable; -- Log-structured storage segments are typically a sequence of key-value pairs. -- These pairs appear in the order that they were written, and values later in the log take precedence over values for the same key earlier in the log. - -![right fit](./attachments/commitlog.png) - -^ Questions: -- What is the cost of lookup O(n) -- What is the cost of write O(1) -- What is the cost of read from the head O(1). - -### [[Sorted String Table]] (SSTable) - -Make a simple change to logs: sequence of key-value pairs is sorted by key. - -Merging segments is simple and efficient, even if the files are bigger than the available memory (mergesort algorithm). - -In order to find a particular key in the file, you no longer just need a spare index of the offsets - -### [[Bloom Filter]] - -- Compact way of representing a set of items -- Checking for existence in set is cheap -- Some probability of false positives and item not in set may check true as being in set -- Never false negatives - -![right fit](./attachments/bloomfilter.png) - -### - -## Cluster Membership - -- Any server in cluster could be the coordinator -- So every server needs to maintain a list of all the -other servers that are currently in the server -- List needs to be updated automatically as servers join, leave, and fail - -## Gossip Protocol - -- Each node picks its discussants (up to 3) - -- Having three messages for each round of gossip adds a degree of *anti-entropy* . - -- This process allows obtaining "**convergence**" of data shared between the two interacting nodes much faster. - -- Always a constant amount of network traffic (except for gossip storms) - - -![right fit](./attachments/cassandra-29.png) - -### Gossip Protocol in practice -![inline](./attachments/casssandra-38.png) - -- regulates cluster membership -- Nodes periodically gossip their membership list -- On receipt, the local membership list is updated - - -### Cluster Membership, contd. - -- Suspicion mechanisms -- Accrual detector: FD outputs a value (PHI) representing suspicion -- Apps set an appropriate threshold -- PHI = 5 => 10-15 sec detection time -- PHI calculation for a member - - Inter-arrival times for gossip messages - - PHI(t) = - log(CDF or Probability(tnow – tlast))/log 10 - - PHI basically determines the detection timeout, but is sensitive to actual inter-arrival time variations for gossiped heartbeats - -## Queries - -Values in Cassandra are addressed by the triple (row-key, column-key, timestamp) with column- key as - -- column-family:column (for simple columns contained in the column family) -- column-family: supercolumn:column (for columns subsumed under a supercolumn). - - -### what about… CQL? - -SELECT WHERE -ORDER BY -JOIN ON -GROUP - -### SELECT WHERE - -Column Family: USER -Key: UserID -Colunms: username, email, birth date, city, state - -How to support this query? - - -```sql -SELECT * FROM User WHERE city = ‘Scottsdale’ -``` - -Create a new columns family called **UserCity**: - -Column Family: USERCITY -Key: city -Colunms: IDs of the users in that city. - -Also uses the Valueless Column pattern - -### SELECT WHERE pt 2 - -- Use an aggregate key - - **state:city: { user1, user2}** - -Get rows between **AZ:** & **AZ;** for all Arizona users - -Get rows between **AZ:Scottsdale** & **AZ:Scottsdale1** for all Scottsdale users - -### ORDER BY - -[.column] -#### Columns - -are sorted according to ```CompareWith``` or ```CompareSubcolumnsWith``` - -[.column] -#### Rows - -- are *sorted* by key, regardless of partitioner -- are *placed* according to their Partitioner: - - Random: MD5 of key - - Order-Preserving: actual key - -### References - -![left inline](./attachments/cassandra-35.png) - -### Extra (10) - -Prepare Cassandra Practice - -[hints](https://medium.com/@michaeljpr/five-minute-guide-getting-started-with-cassandra-on-docker-4ef69c710d84) diff --git a/Column Oriented Database.md b/Column Oriented Database.md deleted file mode 100644 index 67260b8..0000000 --- a/Column Oriented Database.md +++ /dev/null @@ -1,85 +0,0 @@ -# Column Oriented Database - -The approach to store and process data by column instead of row has its origin in analytics and business intelligence - -Column-stores operating in a **shared-nothing** massively parallel processing architecture can be used to build high-performance applications. - -The class of column-oriented stores, which sees in Google's BigTable it's first member, is seen less puristic, also subsuming datastores that integrate column- and row-orientation. - -### Column storage - -![inline](./attachments/row-vs-column-storage.png) - -### Pros and Cons - -[.column] - -- Data compression -- Improved Bandwidth Utilization -- Improved Code Pipelining -- Improved cache locality - -[.column] - -- Increased Disk Seek[^70] Time -- Increased cost of Inserts -- Requires disk prefetching -- Adds tuple reconstruction costs - -[^70]: Seek time is the time taken for a hard disk controller to locate a specific piece of stored data - -### Tuple Reconstruction - -![inline](./attachments/tuple-reconstruction.png)![inline](./attachments/selected-bytes-per-tuple.png) - -[source](http://nms.csail.mit.edu/~stavros/pubs/vldb2006.pdf) - -^ -- Large prefetch hidesdisk seeks in columns. -- As expected, the row store is insensitive to projectivity (since it reads all data anyway), and therefore its curve remains flat. -- The column store, however, performs better most of the time, as it reads less data. -- The column store starts performing worse than the row store when it is selecting more than 85% of a tuple’s size - -## Compression - -- Increased column-store opportunities - - Higher data value locality in column stores - - Can use extra space to store multiple copies of data in different sort orders - - Techniques such as run length encoding far more useful - -
-**Extra Available**![inline 30%](https://icons.iconarchive.com/icons/pixelkit/swanky-outlines/256/15-Light-Bulb-icon.png)[Paper Summary](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.296.6581&rep=rep1&type=pdf) - -### Example (String): Run-Length Encoding -



-> aaaabbbbbbbbbbbbbcccccccdddddddeeeeeddddd -

-> 4a13b7c7d5e5f - -### Example (DB): Run-Length Encoding - -![inline](./attachments/Run-Length-Encoding-question.png) - -### Compression: Run-Length Encoding - -![inline](./attachments/Run-Length-Encoding-question2.png) - - -### Compression: Run-Length Encoding - -![inline](./attachments/Run-Length-Encoding-question3.png) - -### Compression: Run-Length Encoding - -![inline](./attachments/Run-Length-Encoding.png) - -## List of Databases - -- **[[Cassandra]]** -- Vertica -- SybaseIQ -- C-Store -- BigTable/[[HBASE]] -- MonetDB -- LucidDB - diff --git a/Data Acquisition.md b/Data Acquisition.md deleted file mode 100644 index ca164c2..0000000 --- a/Data Acquisition.md +++ /dev/null @@ -1,37 +0,0 @@ -Synonsymns: [[Data Ingestion]] [[Data Collection ]] - -It is the process of collecting raw data from various silo databases or files and integrating it into a data lake on the data processing platform, e.g., Hadoop data lake. - -Data collection corresponds the extract (E) in an ETL/ELT pipeline. - -Two forms of data collection: Collection and Ingestion - -- Batch vs Streaming -- Pull vs Push -- Data flow vs Query Languages - -Additionally, data can be Structured or Unstructured - -### Two Schools - -[.column] - -- Batch -- Pull -- Query-Based - -[.column] - -- Streaming -- Push -- Dataflow based - -### Data Collection vs Ingestion examples - -- Accessing Databases -- Crowling the Web -- Log processing - -[[HDFS]] - -[[Apache Kafka]] \ No newline at end of file diff --git a/Data Driven Decision Making.md b/Data Driven Decision Making.md deleted file mode 100644 index 5ad3fd1..0000000 --- a/Data Driven Decision Making.md +++ /dev/null @@ -1,59 +0,0 @@ -footer: Emanuele Della Valle - emanuele.dellavalle@polimi.it - @manudellavalle - [http://emanueledellavalle.org](http://emanueledellavalle.org) -slidenumbers: true -slide-transition: fadeThroughColor(#ffffff) - - -# Data-driven Decision Making for Data-driven Organizations - -### Taking Decisions -In many organizations decisions are made by "questionable" methodologies such as -- Highest Paid Person Opinion (HiPPO) - -- Flipism (all decisions are made by flipping a coin) - -### Taking Decisions: HiPPO vs Flipism -![inline](./attachments/01a_Data-driven-decisions-006.jpg) - -### Taking Decisions: -![inline](./attachments/01a_Data-driven-decisions-010.png) - -### Taking Decisions (cont.) -- In many organizations decisions are made by "questionable" methodologies such as - - Highest Paid Person Opinion (HiPPO) - - Flipism (all decisions are made by flipping a coin) -- This could have been the right approach in the '70s ... - - See the "Theory of Bounded Rationality" by Herbert Simons - -### Taking Decisions (cont.) - -![inline](./attachments/01a_Data-driven-decisions-016.png) - -### Taking Decisions (cont.) -- In many organizations decisions are made by "questionable" methodologies such as - - Highest Paid Person Opinion (HiPPO) - - Flipism (all decisions are made by flipping a coin) -- This could have been the right approach in the '70s ... - - See the "Theory of Bounded Rationality" by Herbert Simons - - ... but in the Digital Era one can dream of data-driven organization - -### Taking Decisions: data-driven organization ! -![inline](./attachments/01a_Data-driven-decisions-022.png) - -### Data-Driven Decisions -**Decisions** no longer have to be made in the dark or based on gut instinct; they can be based on** evidence, experiments** and more **accurate** forecasts. - -- McKinsey -### Data-driven organizations -- perform better - - The data shows where they can streamline their processes -- are operationally more predictable - - Data insights fuel current and future decision making -- are more profitable - - Constant improvements and better predictions help to outsmart the competition and improve innovation. - -### The Moneyball case -Winning 20 consecutive games by building a team of undervalued talent by using data science to scouting and analyzing players. - -![inline](./attachments/01a_Data-driven-decisions-030.jpg)![inline](https://www.youtube.com/watch?v=KWPhV6PUr9o&t=85s ) - -### Credits -- [Becoming a data-driven organization The what, why and how. SAS, 2018 ](https://www.sas.com/en_us/whitepapers/becoming-data-driven-organiza?on-109150.html) -- [The Age Of Analytics: Competing In A Data-driven World. McKinsey, 2016](http://www.mckinsey.com/business-func?ons/mckinsey-analy?cs/our-insights/the-age-of-analy?cs-compe?ng-in-a-data-driven-world ) \ No newline at end of file diff --git a/Data Engineer.md b/Data Engineer.md deleted file mode 100644 index 5787219..0000000 --- a/Data Engineer.md +++ /dev/null @@ -1,352 +0,0 @@ -autoscale: true - -# Data Engineer - -![original](https://upload.wikimedia.org/wikipedia/commons/5/57/Who_is_it.png) - -### Data Science[^01] - -![inline](./attachments/what-is-data-science.jpg) - -[^01]:[Source](https://thedatascientist.com/data-science-considered-own-discipline/) - -### Roles in a Data Science Project[^02] - -
-
- -![inline](http://emanueledellavalle.org/slides/dspm/img/DS-roles.png) - -[^02]: http://emanueledellavalle.org/slides/dspm/ds4biz.html#25 - ---- -### Roles in a Data Science Project[^02] - -
-
- -![inline](./attachments/DS-roles.png) - -### The Data Engineer - - ![](https://www.clandestinecritic.co.uk/wp-content/uploads/2012/08/the-dark-knight-rises-poster-landscape.jpg) - -A dedicated specialist that maintain data available and usable by others (Data Scientists).[^03] - -Data engineers set up and operate the organization’s data infrastructure preparing it for further analysis by data analysts and scientists.[^03] - -Data engineering field could be thought of as a superset of business intelligence and data warehousing that brings more elements from software engineering.[^04] - -[^03]:[What is Data Engineering](https://medium.com/datadriveninvestor/what-is-data-engineering-explaining-the-data-pipeline-data-warehouse-and-data-engineer-role-1a4b182e0d16) - -[^04]: [Source: The Rise of Data Engineer](https://www.freecodecamp.org/news/the-rise-of-the-data-engineer-91be18f1e603/) - -### Data Engineering - -
- -![](./attachments/dataengineer.png) - -> Data engineering is a set of operations aimed at creating interfaces and mechanisms for the flow and access of information[^03]. - ---- - - - ---- - -![inline](./attachments/dataengineer.png) - ---- - -### Netflix's Perspective[^05] - -![inline 90%](https://miro.medium.com/max/700/1*NRoFl1l4lIVQAAvmBOKd4A.jpeg) -[^05]: [Netflix Innovation](https://netflixtechblog.com/notebook-innovation-591ee3221233) - - -^ -- a data engineer might create a new aggregate of a dataset containing trillions of streaming events -- analytics engineer might use that aggregate in a new report on global streaming quality -- a data scientist might build a new streaming compression model reading the report - -^ each of these workflows has multiple overlapping tasks: - - --- -### The Knowledge Scientist[^06] - -![inline](./attachments/the_gift_of_knowledge.jpeg) - -[^06]: [The Manifesto](https://www.knowledgescientist.org/) - ---- -### Google's Two-Cents - ![inline](./attachments/google-dataeng.png) - - - --- -[.background-color: #ffffff] - -# Philosophy of (Data) Science[^07] -![inline](https://upload.wikimedia.org/wikipedia/commons/6/6d/Data_types_-_en.svg) - -^ Nowdays we deal with a number of data from different domains. - -[^07]: [Data as Fact](https://en.wikipedia.org/wiki/DIKW_pyramid#Data_as_fact) - ---- - -# What is Data? - ---- -![inline](https://alchetron.com/cdn/data-star-trek-f70d3e0b-e5fe-455d-b118-640f983329b-resize-750.jpeg) - ---- - -### Oxford Dictionary - -
- -*Data \[__uncountable, plural__\] facts or information, especially when examined and used to find out things or to make decisions.* [^08] - -[^08]:[Def](https://www.oxfordlearnersdictionaries.com/definition/english/data) - -### Wikipedia -Data (treated as singular, plural, or as a mass noun) is any sequence of one or more symbols given meaning by specific act(s) of interpretation [^09] - -[^09]: [Data in Computing](https://en.wikipedia.org/wiki/Data_(computing)) - ---- -[.background-color: #ffffff] - -![right](https://upload.wikimedia.org/wikipedia/commons/0/06/DIKW_Pyramid.svg) - ---- - -### Data Warehouse: A Traditional Approach: - -> A data warehouse is a copy of transaction data specifically structured for query and analysis. — [Ralph Kimball](https://en.wikipedia.org/wiki/Ralph_Kimball) - -
- -> A data warehouse is a subject-oriented, integrated, time-variant and non-volatile collection of data in support of management’s decision making process.-- [Bill Inmon](https://en.wikipedia.org/wiki/Bill_Inmon) - -^ -- A data warehouse is a central repository where raw data is transformed and stored in query-able forms.[^03] -- Data Warehouse are still relevant today and their maintenance is part of Data Engineers' resposibilities. -- The warehouse is created with structure and model first before the data is loaded and it is called schema-on-write. - -### Data Warehouse vs Data Bases - -Surprisingly, Data Warehouse isn’t a regular database. - -[.column] - -- A database normalizes data separating them into tables and avoiding redundancies -- It supports arbitrary workload and complex queries -- do not store multiple versions of data - -[.column] - -- a Data Warehouse uses few tables to improve performance and analytics. -- a Data Warehouse allows simple queries -- supports versioning for complex analysis - - -### Data Pipeline - - A Data pipeline is a sum of tools and processes for performing data integration[^03] - - Constructing data pipelines is the core responsibility of data engineering. - -![](./attachments/1_62WJpBzEdlsjlc2TtjFf3g.jpg) - -^ While data warehouse concerns the storage of data, data pipeline ensures the consumption and handling of it. - -### Data pipelines are used for - -- moving data to the cloud or to a data warehouse -- data wrangling -- data integration - ---- - -### Transporting data from sources into a warehouse[^010] - -![inline](./attachments/word-image-29.png) - -[^010]:[Source](https://www.altexsoft.com/blog/datascience/what-is-data-engineering-explaining-data-pipeline-data-warehouse-and-data-engineer-role/) - -### Two Paradigms (and a half): SQL- v.s. JVM-Centric Pipelines[^011] - -- **SQL-centric Pipelines** uses SQL dialects from Presto or Hive. Pipelines (ETLs) are defined in a declarative way, and almost everything centers around SQL and tables. - -^ -- PROs: SQL is easier to learn and can rely on good optimizers -- CONSs: - - Writing UDFs is troublesome because one has to write it in a different language (e.g. Java or Python) - - testing can be a lot more challenging due to this. - -- **JVM-centric Pipelines** uses languages like Java or Scala and often involves thinking data transformation in an imperative manner, e.g. in terms of key-value pairs. - -^ PROs: - - Writing User Defined Functions (UDFs) is less painful; - - and testing jobs is relatively easy; - CONs: Requires strong programming skills - -* Drag & Drop... - -[^011]: we are focusing on ETL - -### Skill Set: SQL mastery[^03] - -If english is the language of business, SQL is the language of data. - -- SQL/DML/DDL primitives are simple enough that it should hold no secrets to a data engineer. Beyond the declarative nature of SQL, she/he should be able to read and -- understand database execution plans, and have an understanding of what -all the steps are, -- understand how indices work, -- understand the different join algorithms - -### Skill Set: Data modeling[^03] - -For a data engineer, entity-relationship modeling should be a cognitive reflex, along with a clear understanding of normalization, and have a sharp intuition around denormalization tradeoffs. - -The data engineer should be familiar with dimensional modeling and the related concepts and lexical field. - ---- - -# But... - - -### Engineers Shouldn’t (only) Write (SQL-based) ETL[^012] - -- Unless you need to process over many petabytes of data, or you’re ingesting hundreds of billions of events a day, most technologies have evolved to a point where they can trivially scale to your needs. - -- Unless you need to push the boundaries of what these technologies are capable of, you probably don’t need a highly specialized team of dedicated engineers to build solutions on top of them. - -[^012]: [JeffMagnusson, 2016](https://multithreaded.stitchfix.com/blog/2016/03/16/engineers-shouldnt-write-etl/) - -### If Not (only) ETL, Then…What?[^013] - -Data Engineers are still a critical part of any high-functioning data team. -- managing and optimizing core data infrastructure, -- building and maintaining custom ingestion pipelines, -- supporting data team resources with design and performance optimization, and -- building non-SQL transformation pipelines. - -^ Instead of building ingestion pipelines that are available off-the-shelf and implementing SQL-based data transformations, here’s what your data engineers should be focused on: - -[^013]:[TristanHandy, 2019](https://www.kdnuggets.com/2019/01/role-data-engineer-changing.html) - ---- -# Big Data - - - -### Challenges [^014] - -![inline](https://www.ec-better.eu/img/upload/galeria/5a50c3f0b37206930825bb69578686454adae022.png) - - - -[^014]:[Lanely, 2001](x-bdsk://laney20013d) - ---- -### Paradigm Shift - -![Pradigm Shift](https://thumbor.forbes.com/thumbor/fit-in/1200x0/filters%3Aformat%28jpg%29/https%3A%2F%2Fspecials-images.forbesimg.com%2Fimageserve%2F5eb0c332cb95f20007db3bef%2F0x0.jpg) - - ---- -[.slide-transition: push(vertical, 0.3)] - -![Pradigm Shift](https://thumbor.forbes.com/thumbor/fit-in/1200x0/filters%3Aformat%28jpg%29/https%3A%2F%2Fspecials-images.forbesimg.com%2Fimageserve%2F5eb0c332cb95f20007db3bef%2F0x0.jpg) - -![inline](./attachments/volume-1.pdf) - ---- -[.slide-transition: push(vertical, 0.3)] - -![Pradigm Shift](https://thumbor.forbes.com/thumbor/fit-in/1200x0/filters%3Aformat%28jpg%29/https%3A%2F%2Fspecials-images.forbesimg.com%2Fimageserve%2F5eb0c332cb95f20007db3bef%2F0x0.jpg) - -![inline](./attachments/variety.pdf) - ---- -[.slide-transition: push(vertical, 0.3)] - -![Pradigm Shift](https://thumbor.forbes.com/thumbor/fit-in/1200x0/filters%3Aformat%28jpg%29/https%3A%2F%2Fspecials-images.forbesimg.com%2Fimageserve%2F5eb0c332cb95f20007db3bef%2F0x0.jpg) - -![inline](./attachments/volume-2.pdf) - ---- -[.slide-transition: push(vertical, 0.3)] - -![Pradigm Shift](https://thumbor.forbes.com/thumbor/fit-in/1200x0/filters%3Aformat%28jpg%29/https%3A%2F%2Fspecials-images.forbesimg.com%2Fimageserve%2F5eb0c332cb95f20007db3bef%2F0x0.jpg) - -![inline](./attachments/velocity.pdf) - -[.slide-transition: reveal(top)] - - -### Data Lake -A Data lake is a vast pool of raw data (i.e., data as they are natively, unprocessed). A data lake stands out for its high agility as it isn’t limited to a warehouse’s fixed configuration[^03]. - ---- -![inline](./attachments/datalake.png) - -[Full Inforgraphic](./attachments/emc_understanding_data_lakes_infographic.pdf) - -^ -- In Data Lake, the raw data is loaded as-is, when the data is used it is given structure, and it is called schema-on-read. -- Data Lake gives engineers the ability to easily change. -- In practice, Data Lake is a commercial term so don't sweat it. - ---- - -![filtered](https://luminousmen.com/media/data-lake-vs-data-warehouse.JPG) - -
- -[.column] - -- **Structured Data** -- **Schema On Write** -- **Data Pipelines: Extract-Transform-Load** -- **Processing Model: Batch** - -[.column] - -- **Unstructured Data** -- **Schema on Read** -- **Data Pipelines: Extract-Load-Transform** -- **Processing Model: Streaming** - -### (Big) Data Engineer - -In the contex of Big Data, a data engineer must focus on **distributed systems**, and **programming languages** such as Java and Scala is recommended. - -### New Tasks - -Since data lake are taking data from a wide range of systems, data can be in **structured** or **unstructured** formats, and usually **not clean**, e.g., with missing fields, mismatched data types, and other data-related issues. - -Therefore data engineers are challenged with the task of wrangling, cleansing, and integrating data. - -### The Future of Data Engineering - -- there has been a significant shift toward real-time data pipelines -- Increased connectivity between data sources and the data warehouse -- Self-service analytics via smart tools, made possible by data engineering -- Automation of Data Science functions -- Hybrid data architectures spanning on-premise and cloud environments - - - - - - \ No newline at end of file diff --git a/Data Modeling for Big Data.md b/Data Modeling for Big Data.md deleted file mode 100644 index e09acac..0000000 --- a/Data Modeling for Big Data.md +++ /dev/null @@ -1,513 +0,0 @@ -# Data Modeling for Big Data - -![inline](https://upload.wikimedia.org/wikipedia/commons/6/6d/Data_types_-_en.svg) - -^ The Data Landscape: Variety is the Driver - -### From data to analysis and execution - -![inline](./attachments/bigdatatimeline1.png) - - -### The appearance of the “Big Data” - -![inline](./attachments/bigdatatimeline2.png) - - -### Big Data Vs [Lanely] - -![inline](https://storage.ning.com/topology/rest/1.0/file/get/1994871?profile=RESIZE_1024x1024) - - -### A Growing Trend - -![inline](https://www.elderresearch.com/hs-fs/hubfs/graph_big-data-number%20of%20v's%20over%20time.png?width=767&name=graph_big-data-number%20of%20v%27s%20over%20time.png) - - -[source](https://www.elderresearch.com/blog/42-v-of-big-data) - -### The Data Landscape - -![inline](./attachments/m2_structure.png) - -^ Structured data are organized and labeled according to a precise model (e.g., relational data) -^ Unstructured data, on the other hand, are not constrained (e.g., text, video, audio) -^ In between, there are many form of semi-structured data, e.g., JSON and XML, whose models do not impose a strict structure but provide means for validation. - -### Traditional Data Modelling Workflow - -- Known as Schema on Write -- Focus on the modelling a schema that can accommodate all needs -- Bad impact on those analysis that were not envisioned - -![right fit](./attachments/schemaonread.png) - -^ - - Extract Transform Load - - Some analyses may no longer be performed because the data were lost at writing time, - ---- -## Schema on Read - -- Load data first, ask question later -- All data are kept, the minimal schema need for an analysis is applied when needed -- New analyses can be introduced in any point in time - -![right fit](./attachments/schemaonwrite.png) - -## Data Lakes - -![inline](./attachments/datalakewf.png) - -## Horizontal vs Vertical Scalability - -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] - -### Introduction -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] - -- "Traditional" SQL system scale **vertically** (scale up) - Adding data to a "traditional" SQL system may degrade its performances - - When the machine, where the SQL system runs, no longer performs as required, the solution is to buy a better machine (with more RAM, more cores and more disk) -- Big Data solutions scale **horizontally** (scale out) - - Adding data to a Big Data solution may degrade its performances - - When the machines, where the big data solution runs, no longer performs as required, the solution is to add another machine - -### hardware -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] - -[.column] -#### Commodity - -- CPU: 8-32 cores -- RAM: 16-64 GB -- Disk: 1-3 TB -- Network: 10 GE - -[.column] -#### Appliance - -- CPU: 576 cores -- RAM: 24TB -- Disk: 360TB of SSD/rack -- Network: 40 Gb/second InfiniBand - -^ ORACLE EXADATA DATABASE MACHINE X6-8 - -### Vertical Scalability -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] - -![inline](./attachments/verticalscalability.png) - - -### Horizontal Scalability -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] - -![inline](./attachments/horizontalscalability.png) - -### Vertical vs Horizontal Scalability -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] - -![inline](./attachments/verticalvshorizontalscalability.png) - - -### Vertical vs Horizontal Scalability - -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] -![inline](./attachments/grayareahv.png) - - -### Grey Area is Time-Dependent - -[.footer: Curtesy of Emanuele Della Valle and Marco Brambilla] - -![inline](./attachments/greyarea2.png) - -### Big Data Storage - -- Distributed File Systems, e.g., HDFS -- NoSQL Databases -- NewSQL Databases[^65] e.g., VoltDB -- Distributed Queues, e.g., Pulsar or Kafka - -[^65]: a modern form of relational databases that aim for comparable scalability with NoSQL databases while maintaining the transactional guarantees made by traditional database systems - -^ A distributed file system stores files across a large collection of machines while giving a single-file-system view to clients. - -## Data Ingestion - -- The process of importing, transferring and loading data for storage and later use -- It involves loading data from a variety of sources -- It can involve altering and modification of individual files to fit into a format that optimizes the storage -- For instance, in Big Data small files are concatenated to form files of 100s of MBs and large files are broken down in files of 100s of MB - -![right fit](./attachments/ingestion.png) - -### We Will Talk About Distributed File Systems - -A distributed file system stores files across a large collection of machines while giving a single-file-system view to clients. - -- ![[HDFS]] - -![inline](https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcSn-w26lhLU7DoR89tUA8ST9Dlb4GfCkuJY4A&usqp=CAU) - -![right fit](./attachments/nottoday.png) - -### Will Will Talk About Distributed Message Queues - -A distribured message quque stores file in a log an allos sequential reads. - -- ![[Apache Kafka]] - -![inline](https://i.pinimg.com/originals/a1/82/b0/a182b0d8caa7627e288fb2a67395c263.png) - -![right fit](./attachments/nottoday.png) - ---- - -## ~~ETL~~ [[Data Pipelines]] - -A data pipeline aggregates, organizes, and moves data to a destination for storage, insights, and analysis. - -Modern data pipeline generalize the notion of ETL (extract, transform, load) to include data ingestion, integration, and movement across any cloud architecture and add additional layers of resiliency against failure. - -- [[Apache Airflow]] -- [[Kafka Streams]] -- [[KSQL]] - -## [[Data Wrangling]] - -The process of creating *reliable* that can be analysed to generate valid actionable insights. - -The central goal is to make data usable: to put data in a form that can be easily manipulated by analysis tools. - -It includes understanding, cleansing, augmenting and shaping data. - -^ Additional goals: -- ensure that data is responsive to the intended analyses -- ensure that data contain the necessary information, -- ensure metadata that describe data are available -- ensure that data are sufficiently correct to support successful modeling and decision-making. - -The results is data in the best format (e.g., columnar) for the analysis to perform. - -![right 150%](./attachments/wranglingsteps4.png) - -# The Advent of NoSQL - -### Quote time - -> Google, Amazon, Facebook, and DARPA all recognized that when you scale systems large enough, you can never put enough iron in one place to get the job done (and you wouldn’t want to, to prevent a single point of failure). -
-> Once you accept that you have a distributed system, you need to give up consistency or availability, which the fundamental transactionality of traditional RDBMSs cannot abide. ---[Cedric Beust](https://beust.com/weblog/2010/02/25/nosql-explained-correctly-finally/) - -^ The name “NoSQL” is unfortunate, since it doesn’t actually refer to any particular technology—it was originally intended simply as a catchy Twitter hashtag for a meetup on open source, distributed, nonrelational databases in 2009 Cf Pramod J. Sadalage and Martin Fowler: NoSQL Distilled. Addison-Wesley, August 2012. ISBN: 978-0-321-82662-6 - -### The Reasons Behind - -- **Big Data**: need for greater scalability than relational databases can easily achieve *in write* -- **Open Source:** a widespread preference for free and open source software -- **Queryability**: need for specialized query operations that are not well supported by the relational model -- **Schemaless**: desire for a more dynamic and expressive data model than relational - -### Object-Relational Mismatch - -Most application development today is done in **object-oriented** programming languages - -An **awkward translation** layer is required between the **objects** in the application code and the database model of **tables**, **rows**, and **columns** - -Object-relational mapping (**ORM**) frameworks like **Hibernate** try to mild the mismatch, but they **can’t completely hide** the differences - ---- - -![inline](./attachments/timelinenosql.png) - -### NoSQL Familty - -![inline fit](./attachments/nsqlfamily.png) - -### Kinds of NoSQL (2/4) - -NoSQL solutions fall into four major areas: - -- **Key-Value Store** - - A key that refers to a payload (actual content / data) - - Examples: MemcacheDB, Azure Table Storage, Redis, HDFS - -- **Column Store** - - Column data is saved together, as opposed to row data - - Super useful for data analytics - - Examples: Hadoop, Cassandra, Hypertable - -### Kinds of NoSQL (4/4) - -- **Document / XML / Object Store** - - Key (and possibly other indexes) point at a serialized object - - DB can operate against values in document - - Examples: MongoDB, CouchDB, RavenDB - -- **Graph Store** - - Nodes are stored independently, and the relationship between nodes (edges) are stored with data - - Examples: AllegroGraph, Neo4j - -### You can also distinguish them - -[.column] - -- **Key/Value or ‘the big hash table’ (remember caching?)** - - Amazon S3 (Dynamo) - - Voldemort - - Scalaris - - MemcacheDB, - - Azure Table Storage, - - *Redis* $$\leftarrow$$ - - Riak - -[.column] - -- **Schema-less** - - *MongoDB* $$\leftarrow$$ - - Cassandra (column-based) - - CouchDB (document-based) - - *Neo4J (graph-based)* $$\leftarrow$$ - - HBase (column-based) - ---- - -### NoSQL Complexity - -![inline](https://slideplayer.com/slide/16139843/95/images/16/NoSQL.jpg) - ---- - - -![original fit](https://www.ebayinc.com/assets/Uploads/Blog/2014/10/nosql_evolution.png) - -^ a natural evolutionary path exists from simple key-value stores to the highly complicated graph databases, as shown in the following diagram: - -### SQL vs (Not only SQL) NoSQL - -|SQL databases| NoSQL databases| -|----|-----| -| Triggered the need of relational databases | Triggered by the storage needs of Web 2.0 companies such as Facebook,Google and Amazon.com| -| Well structured data| Not necessarily well structured – e.g., pictures, documents, web page description, video clips, etc.| -| Focus on data integrity|focuses on availability of data even in the presence of multiple failures| -| Mostly Centralized|spread data across many storage systems with a high degree of replication.| -| ACID properties should hold|ACID properties may not hold[^62]| - -[^6 g2]: no properties at all??? - -## ACID vs. BASE properties[^61] - -[^61]:Do you recall the CAP theorem? 🎩 - ---- - -### Rationale - -- It’s ok to use stale data (Accounting systems do this all the time. It’s called “closing out the books.”) ; -- It’s ok to give approximate answers -- Use resource versioning -> say what the data really is about – no more, no less - - the value of x is 5 at time T - -![right fit](https://i.ytimg.com/vi/ZNo6gfCAgWE/maxresdefault.jpg) - ---- - -### CAP Theorem is a Trade-off, remember? - ---- - -![original fit](https://player.slideplayer.com/95/16139843/slides/slide_30.jpg) - -### BASE(Basically Available, Soft-State, Eventually Consistent) - -- **Basic Availability**: fulfill request, even in partial consistency. -- **Soft State**: abandon the consistency requirements of the ACID model pretty much completely -- **Eventual Consistency**: delayed consistency, as opposed to immediate consistency of the ACID properties.[^67] - - purely aliveness guarantee (reads eventually return the requested value); but - - does not make safety guarantees, i.e., - - an eventually consistent system can return any value before it converges - -[^67]: at some point in the future, data will converge to a consistent state; - ---- -![original fit](./attachments/media_httpfarm5static_mevIk.png) - -[.footer: [img](https://blog.nahurst.com/visual-guide-to-nosql-systems)] - -### ACID vs. BASE trade-off - -**No general answer** to whether your application needs an ACID versus BASE consistency model. - -Given **BASE** ’s loose consistency, developers **need to** be more knowledgeable and **rigorous** about **consistent** data if they choose a BASE store for their application. - -Planning around **BASE** limitations can sometimes be a major **disadvantage** when compared to the simplicity of ACID transactions. - -A fully **ACID** database is the perfect fit for use cases where data **reliability** and **consistency** are essential. - -# History of Data Models[^5] - ---- - -![original fit](https://miro.medium.com/max/1225/1*V2zU24JMyIuCKV3wkDN26A.png) - -[^5]: [by Ilya Katsov](https://highlyscalable.wordpress.com/2012/03/01/nosql-data-modeling-techniques/) - -### Extra Reads - -![original fit](https://www.ics.uci.edu/~cs223/papers/cidr07p15.pdf) - - -## Shall we rethink the three-layered modeling? - -![right 95%](https://www.matillion.com/wp-content/uploads/2020/04/DataModels-Diagram-01.png) - -### Data Modeling for Big Data - -- **Conceptual Level** remains: - - ER, UML diagram can still be used for no SQL as they output a model that encompasses the whole company. - -- **Phsyical Level** remains: NoSQL solutions often expose internals for obtaining flexibility, e.g., - - Key-value stores API - - Column stores - - Log structures - -- _Logical level no longer make sense. Schema on read focuses on the query side.__ - -## Domain Driven Design[^68] - -Domain-Driven Design is a **language**- and **domain-centric** approach to software design for complex problem domains. - -DDD promotes the reduction of the translation cost between business and technical terminology by developing an **ubiquitus language** that embeds domain terminology into the software systems. - -DDD consists of a collection of **patterns**, **principles**, and **practices** that allows teams to **focus** **on** the core **business** goals while **crafting** software. - -[intro](https://martinfowler.com/bliki/DomainDrivenDesign.html) - -![right fit](https://image.slidesharecdn.com/b00794taug-domain-drivendesignbyericevans-190925204437/95/2003-domaindriven-design-pdf-tackling-complexity-in-the-heart-of-software-by-eric-evans-addisonwesley-professional-1-638.jpg?cb=1569444406) - - - -[^68]:[book](https://www.amazon.com/gp/product/0321125215?ie=UTF8&tag=martinfowlerc-20&linkCode=as2&camp=1789&creative=9325&creativeASIN=0321125215) - -### Domain Driven Design[^68] - -Domain-Driven Design is a **language**- and **domain-centric** approach to software design for complex problem domains. - -DDD promotes the reduction of the translation cost between business and technical terminology by developing an **ubiquitus language** that embeds domain terminology into the software systems. - -DDD consists of a collection of patterns, principles, and practices that allows teams to focus on the core t business goels while crafting software. - -![right fit](https://images-na.ssl-images-amazon.com/images/I/81aA7hEEykL.jpg) - -### Domain Driven Design - -![inline](./attachments/domain-driven-design-model-driven-design/main.png) - -[source](http://tigerthinks.com/images/books/domain-driven-design-model-driven-design.png) - - -### Domain Driven Design - -![inline](./attachments/domain-driven-design-model-driven-design/stage0.png) - -### Domain Driven Design - -![inline](./attachments/domain-driven-design-model-driven-design/stage1.png) - -### The Layered Architecture - -![right fit](https://miro.medium.com/max/1225/1*Ly4z7CXj1znZl8fPIbQ5_w.png) - -|Layer|Description| -|------|--------| -| Presentation Layer |Responsible for showing information to the user and interpreting the user’s commands. | -|Application Layer| Defines the jobs the software is supposed to do and directs the expressive domain objects to work out problems| -|Domain Layer |Responsible for representing concepts of the business, information about the business situation, and business rules. | -|Infrastructure Layer |Provide generic technical capabilities that support the higher layers: message sending for the application, persistence for the domain, drawing widgets for the UI, etc. | - -### Entities - -![right fit](./attachments/domain-driven-design-model-driven-design/stage2.png) - -- Are objects defined primarily by their identity -- Their identities must be defined so that they can be effectively tracked. We care about *who* they are rather than *what* information they carry -- They have lifecycles may can radically change their form and content, while a thread of continuity must be maintained. -- E.g., bank accounts, deposit transaction. - - -### Value Objects - -![right fit](./attachments/domain-driven-design-model-driven-design/stage3.png) - -- Value Objects represent a descriptive aspect of the domain that has no conceptual identity. - - They are instantiated to represent elements of the design that we care about only for *what* they are, not *who* they are. - - E.g., For example, street, city, and postal code shouldn’t be separate attributes of a Person object. - -### Services - -![right fit](./attachments/domain-driven-design-model-driven-design/stage6.png) - - - Services are operations offered as an interface that stands alone in the model, without encapsulating state as Entities and Value Objects do. - - They are a common pattern in technical frameworks, but they can also apply in the domain layer. - - The name “service” is ment to emphasize the relationship with other objects. - - -### The Lifecycle of a Domain Object - -Every object has a lifecycle. It is **born**, it may go **through** various **states**, it eventually is either **archived** or **deleted**. - -The problems fall into two categories: -- **Maintaining** **integrity** throughout the lifecycle -- **Preventing** the model from getting swamped by the **complexity** of managing the lifecycle. - - -### Aggregates and Repositories -The most important concepts for this are Aggregates and Repositories[^63] - -[^63]: an Aggregate is always associated with one and only one Repository. - -[.column] -**Aggregates** are a cluster of Entities and Value Objects that make sense domain-wise and are retrieved and persisted together. - -E.g. A Car is an aggregate of wheel, engine, and the customer - -[.column] - -**Repositories** offer an interface to retrieve and persist aggregates, hiding lower level details from the domain. - -E.g. Sold cars catalogue - - -### Event Sourcing[^64] - -- The fundamental idea of Event Sourcing is ensuring that every change to the state of an application is captured in an event object, - -- Event objects are immutable and stored in the sequence they were applied for the same lifetime as the application state itself. - -![right fit](https://www.kcrw.com/culture/shows/the-spin-off/@@images/rectangular_image/page-header?v=1525906101.26) - -[^64]: Martin Fowler, [link](https://martinfowler.com/eaaDev/EventSourcing.html) - -### The Power of Events - -Events are both a **fact** and a **notification**. - -They represent **something** that **happened** in the **real world** but include no expectation of any future action. - -They **travel** in only **one direction** and expect no response (sometimes called “fire and forget”), but one **may be “synthesized” from a subsequent event**. - -![right fit](https://images-na.ssl-images-amazon.com/images/I/51NS8A8JT5L._AC_UL600_SR468,600_.jpg) - ---- - -![original fit](./attachments/seen.png) - ---- - -![original fit](./attachments/brandnew.png) - ---- - -![original fit](attachments/4doohb.jpg) \ No newline at end of file diff --git a/Data Modeling for Data Warehouses.md b/Data Modeling for Data Warehouses.md deleted file mode 100644 index 7d8620d..0000000 --- a/Data Modeling for Data Warehouses.md +++ /dev/null @@ -1,102 +0,0 @@ -# Data Modeling for Data Warehouses - -- Works in phases related to the aforementioned levels of abstractions -- Less diversity in the data model, usually relational in the form of a star schema (also known as dimensional modeling[^41]). -- Redundancy and incompleteness are not avoided, fact tables often have over 100 columns, sometimes several hundreds. -- Optimized for OLAP - -^ -- The data model of a data warehouse is most commonly relational, because SQL is generally a good fit for analytic queries. -- Do not associate SQL with analytic, it depends on the data modeling. - -[^41]: Ralph Kimball and Margy Ross: The Data Warehouse Toolkit: The Definitive Guide to Dimensional Modeling, 3rd edition. John Wiley & Sons, July 2013. ISBN: 978-1-118-53080-1 - -### A Star is Born - -[.column] - -![inline](https://docs.microsoft.com/en-us/power-bi/guidance/media/star-schema/star-schema-example1.png) - -[.column] - -![inline](https://m.media-amazon.com/images/M/MV5BNmE5ZmE3OGItNTdlNC00YmMxLWEzNjctYzAwOGQ5ODg0OTI0XkEyXkFqcGdeQXVyMTMxODk2OTU@._V1_.jpg) - -### A Star is Born - -[.column] - -![inline](https://docs.microsoft.com/en-us/power-bi/guidance/media/star-schema/star-schema-example2.png) - -[.column] - -![inline](https://m.media-amazon.com/images/M/MV5BNmE5ZmE3OGItNTdlNC00YmMxLWEzNjctYzAwOGQ5ODg0OTI0XkEyXkFqcGdeQXVyMTMxODk2OTU@._V1_.jpg) - - -### Dimensional Modelling - -Four-Step Dimensional Design Process - -1. Select the business process. -2. Declare the grain. -3. Identify the dimensions. -4. Identify the facts. - -![right fit](https://images-na.ssl-images-amazon.com/images/I/51dvU76edNL._SX399_BO1,204,203,200_.jpg) - -[Mandatory Read](http://www.kimballgroup.com/wp-content/uploads/2013/08/2013.09-Kimball-Dimensional-Modeling-Techniques11.pdf) - -^ -- **Business processes** are crtical activities that your organization performs, e.g., registering students for a class. -- The **grain** establishes exactly what a single fact table row represents. Three common grains categorize all fact tables: transactional, periodic snapshot, or accumulating snapshot. -- **Dimensions** provide contex to business process events, e.g., who, what, where, when, why, and how. -- :wq -- **Facts** are the measurements that result from a business process event and are almost always numeric. - -### Dimensional Modelling: Fact Table - -A **fact table** contains the numeric measures produced by an operational measurement event in the real world. - -A **single fact** table row has a one-to-one relationship to a measurement event as described by the fact table’s grain. - -A **surrogate key** is a unique identifier that you add to a table to support star schema modeling. By definition, it's not defined or stored in the source data - -### Dimensional Modelling: Dimension Table - -Dimension tables contain the descriptive attributes used by BI applications for filtering and grouping the facts. - -Every dimension table has a single **primary key** column , which is embedded as a foreign key in any associated fact table. - - -### The 5/10 Essential Rules of Dimensional Modeling (Read)[^42] - -1. Load detailed atomic data into dimensional structures. -2. Structure dimensional models around business processes. -3. Ensure that every fact table has an associated date dimension table. -4. Ensure that all facts in a single fact table are at the same grain or level of detail. -5. Resolve many-to-many relationships in fact tables. - -### The 10/10 Essential Rules of Dimensional Modeling (Read)[^42] - -6. Resolve many-to-one relationships in dimension tables. -7. Store report labels and filter domain values in dimension tables. -8. Make certain that dimension tables use a surrogate key. -9. Create conformed dimensions to integrate data across the enterprise. -10. Continuously balance requirements and realities to deliver a DW/BI solution that’s accepted by business users and that supports their decision-making. - -[^42]:https://www.kimballgroup.com/2009/05/the-10-essential-rules-of-dimensional-modeling/ - - -### The Traditional RDBMS Wisdom Is (Almost Certainly) All Wrong[^43] - -![inline](https://youtu.be/qyDPqbpbA60) - -[^43]: Source with slides: [The Traditional RDBMS Wisdom Is (Almost Certainly) All Wrong,” presentation at EPFL, May 2013](http://slideshot.epfl.ch/play/suri_stonebraker) - -### A note on Storage - -- Data warehouse typically interact with OLTP database to expose one or more OLAP system. -- Such OLAP system adopt storage optimized for analytics, i.e., Column Oriented -- The column-oriented storage layout relies on each column file containing the rows in the same order. -- Not just relational data, e.g., Apache Parquet - -![inline](./attachments/5e08f341edb7545ceaa16494_672340c374e04c44b8d01a085a93ad5f.png) \ No newline at end of file diff --git a/Data Modeling for Databases.md b/Data Modeling for Databases.md deleted file mode 100644 index b73d585..0000000 --- a/Data Modeling for Databases.md +++ /dev/null @@ -1,413 +0,0 @@ -# Data Modeling for Databases - -- Works in phases related to the aforementioned levels of abstractions[^31] -- Uses different data models depending on the need: - - Relational, Graph, Document... -- Tries to avoid two major pitfalls: - - **Redundancy**: A design should not repeat information - - **Incompleteness**: A design should not make certain aspects of the enterprise difficult or impossible to model -- Optimized for OLTP - -[^31]: Also known as Database Design - -^ The biggest problem with redundancy is that information may become inconsistent in case of update - ---- - -Before, let's refresh - ---- - -### - -## Relational Database - -A relational database consists of… -- a set of relations (tables) -- a set of integrity constraints - -If the database satisfies all the constraints we said it is in a valid state. - -An important distinction regards the **database schema**, which is the logical design of the database, and the **database instance**, which is a snapshot of the data in the database at a given instant in time. - - -## Relational Model [^32] - -A formal mathematical basis for databases based on set theory and first-order predicate logic - -Underpins of SQL - -![right fit](./attachments/codd.png) - -[^32]: Extra Read [Codd, Edgar F. "A relational model of data for large shared data -banks." Communications of the ACM 13.6 (1970): 377-38z](https://course.ccs.neu.edu/cs3200sp18s3/ssl/readings/codd.pdf) - -### Relation - -> Relation R is a set of tuples (d1, d2, ..., dn), where each element dj is a member of Dj, a data domain. - -
- -> A Data Domain refers to all the values which a data element may contain, e.g., N. - -Note that in the relational model the **term relation is used to refer to a table**, while the term **tuple is used to refer to a row** - -^ In mathematical terms, a tuple indicates a sequence of values. -A relationship between n values is represented mathematically by an n-tuple of values, that is, a tuple with n values, which corresponds to a row in a table. - ---- - -![inline](./attachments/tableex1.png) - ---- - -### Relation Schema - -- corresponds to the notion of **type** in programming languages -- consists of a list of **attributes** and their corresponding domains -- a **relation instance** corresponds to the programming-language no- tion of a value of a variable - ---- - -![inline](./attachments/tableex2.png) - -### Keys - -- A **superkey** is a set of one or more attributes that, taken collectively, allow us to identify uniquely a tuple in the relation -- **candidate keys** are superkeys for which no proper subset is a superkey -- primary key is the chosen candidate key -- foreign key is s set of attributes from a referenced relation. - -^ If K is a superkey, then so is any superset of K - ---- - -![inline](./attachments/keystable.png) - ---- - -### [[Relational Algebra]] (On Practice) - -is a procedural language consisting of a six basic operations that take one or two relations as input and produce a new relation as their result: - -- select: σ -- project: ∏ -- union: ∪ -- set difference: – -- Cartesian product: x -- rename: ρ - -^ Question: What is an algebra? - ---- -### Two Sets -![inline](./attachments/2setvisual.png) - ---- - -### Intersection - -![inline](./attachments/intersectionvisual.png) - ---- -### Difference -![inline](./attachments/differencevisual.png) - ---- -### Union -![inline](./attachments/unisionvisual.png) - ---- - -![inline](./attachments/productvisual.png) - ---- -### Projection - -![inline](./attachments/projectionvisual.png) - ---- -### Selection -![inline](./attachments/selectvisual.png) - - ---- - -### Natural JOIN -![inline](./attachments/naturaljoin.png) - ---- - -## Entity-Relationship (ER) Model - -- Outputs a conceptual schema. -- The ER data model employs three basic concepts: - - entity sets - - relationship sets and - - attributes. -- It is also associated with diagrammatic representation [try out](https://erdplus.com/) - -### Entities And Entity Sets - -An entity can be any object in the real world that is distinguishable from all other objects. - - -An **entity set** contains entities of the same type that share the same properties, or attributes. - - -NB We work at *set* level - -^ Ask the students -Examples of entities: - - University - - Department - - Persons - - Courses -- Examples of entity sets - - Professors and Students - - Data Science coruses: curriculms - ---- -#### Syntax - -![inline](./attachments/entities.png) - -^ fields are what we call attribtues - -### Relationships and Relationship Sets - -A **relationship** is an association among several entities. - - A **relationship set** is a set of relationships of the same type. - -^ -Examples of entities: - - advisor - - attendee - - enrollment - -### Intution - -![inline](./attachments/Relationship-syntax.png) - ---- -#### Syntax - -![inline](./attachments/Pasted image 4.png) - -^^ ER works under the assumption that most relationship sets in a database system are binary. Relationships between more than two entity sets are rare. - -### Attributes and Values - - attributes. Attributes are descriptive properties possessed by each member of an entity set. - - Each entity has a **value** for each of its attributes. - - Also relationshis may have attributes called **descriptive attributes**. - -### Intution - -![inline 25%](./attachments/attrrel.png) - ---- -#### Syntax - -![inline](./attachments/attrer2.png) - -### Cardinality - -For a binary relationship set the mapping cardinality must be one of the following types: -- One to one -- One to many -- Many to one -- Many to many - ---- -#### Cardinality Visualized -- (a) One to One -- (b) One to Many - -![right fit](./attachments/o2o-o2m.png) - ---- -#### Cardinality Visualized - -- (a) Many to One -- (b) Many to Many - -![right fit](./attachments/o2m-m2m.png) - -### University of Tartu Example - -![inline 90%](https://www.ut.ee/sites/default/files/styles/ut_content_width/public/tu_struktuurijoonis_2020_eng_0.png) - -[source](https://www.ut.ee/sites/default/files/styles/ut_content_width/public/tu_struktuurijoonis_2020_eng_0.png?itok=7l0q6cxg) - ---- -#### One to Many - -![inline](./attachments/Pasted image 8.png) - -A (full) professor has one office -an office hosts one full professor - ---- -#### One to Many - -![inline](./attachments/One-to-Many.png) - -A Dean is associated with many institutes -An Institute has only one dean - ---- -#### Many to One - -![inline](./attachments/many-to-one.png) - -A professor advises many students but a student has only one advisor. - -^ Many students share the same advisor but they only have one. - ---- -#### Many to Many - -![inline](./attachments/many-to-many.png) - -A course is associated to many insitute in the context of a curriculum -An institute offers many courses within a curriculum - -### Keys - -- Provide a way to specify how entities and relations are distinguished. -- *Primary key* for Entity Sets - - By definition, individual entities are distinct (set) - - From database perspective, the differences among them must be expressed in terms of their attributes -- *Primary Key* for Relationship Sets - - We use the individual primary keys of the entities in the relationship set. - - The choice depends on the mapping cardinality of the relationship set. - ---- -#### Choice of Primary key for Binary Relationship - -- One-to-one relationships. The primary key of either one of the participating entity sets forms a minimal superkey, and either one can be chosen as the primary key. -- One-to-Many relationships and Many-to-one relationships - - The primary key of the “Many” side is a minimal superkey and is used as the primary key. -- Many-to-Many relationships: - - The preceding union of the primary keys is a minimal superkey and is chosen as the primary key. - ---- -#### Weak Entity Sets - -- A weak entity set is one whose existence is dependent on another entity, -called its **identifying entity** - -- A weak entity set is one whose existence is dependent on another entity, -called its identifying entity - - -### Summary of Symbols - -![inline](./attachments/er-syntax-summary-1.png) - ---- - -![inline](./attachments/er-syntax-summary-2.png) - -### From ER to Relational Model - -- Entity and relationship sets can be expressed as relation -schemas that represent the contents of the database. - -- A database which conforms to an E-R diagram can be represented by a -collection of schemas. - ---- -#### Reduction of Entities - -- For each **entity** set there is a unique schema with the same name - -- Each schema has a number of columns (generally corresponding to -attributes), which have unique names - -![right fit](./attachments/entities.png) - -Professor(ID,Name,Age) -Student(ID,Name,GPA) - -^ Weak entities set becomes a relation that includes a column for the primary -key of the identifying entity. - ---- -#### Reduction of Relationships - -[.column] - -- For each **relationship** set there is a unique schema with the same name - -- A **many-to-many** relationship (figure) is represented as a schema with attributes for the primary keys of the two participating entity sets, and any descriptive attributes of the relationship set. - -[.column] - -![inline](./attachments/many-to-many.png) - -Curriculum(Institute\_ID,Course\_ID) - ---- -#### Reduction of Relationships - -- **Many-to-one** and one-to-many** relationship can be represented by adding an extra attribute to the "many" side - -- For **one-to-one** relationship, either side can be chosen to act as the "many" side - - -### Normalisation - -- Typically decomposes tables to avoid redundancy -- Spans both logical and physical database design -- Aims at **improving** the database design - ---- - -#### Goals - -- Make the schema informative -- Minimize information duplication -- Avoid modification anomalies -- Disallow spurious tuples - ---- - -![inline](./attachments/Pasted image 7.png) - ---- - -![inline](./attachments/Pasted image 9.png) - ---- - -![inline](./attachments/image3.jpeg) - ---- -### Normal Forms (Refresh) - -- First Normal Form (1NF) - - A table has only atomic valued clumns. - - Values stored in a column should be of the same domain - - All the columns in a table should have unique names. - - And the order in which data is stored, does not matter. -- Second Normal Form (2NF) - - A table is in the First Normal form and every non-prime attribute is fully functional dependent[^33] on the primary key -- Third Normal Form (3NF) - - A table is in the Second Normal form and every non-prime attribute is non-transitively dependent on every key - - [^33]: $$X \rightarrow Y, \forall A \in X ((X -{A}) \nrightarrow Y)$$ - ---- - -### Modeling for Database: A note on Storage - -- Storage is laid out in a row-oriented fashion -- For relational this is as close as the the tabular representation -- All the values from one row of a table are stored next to each other. -- This is true also for some NoSQL (we will see it again) - - Document databases stores documents a contiguous bit sequence diff --git a/Data Modeling.md b/Data Modeling.md deleted file mode 100644 index 0aabec1..0000000 --- a/Data Modeling.md +++ /dev/null @@ -1,524 +0,0 @@ -# Data Modeling - -It is the process of defining the structure of the data for the purpose of communicating[^11] or to develop an information systems[^12]. - -[^11]: between functional and technical people to show data needed for business processes - -[^12]: between components of the information system, how data is stored and accessed. - -### What is a data model? - -A data model represents the structure and the integrity of the data elements of a (single) applications [2](x-bdsk://DBLP:journals/sigmod/SpynsMJ02) - -Data models provide a framework for data to be used within information systems by providing specific definition and format. - -The literature of data management is rich of data models that aim at providing increased expressiveness to the modeler and capturing a richer set of semantics. - ---- - -
- -> Data models are perhaps the most important part of developing software. They have such a profound effect not only on how the software is written, but also on how we think about the problem that we are solving[^13]. -> --Martin Kleppmann - -[^13]:[Designing Data-Intensive Applications](https://dataintensive.net/) - -# Any Example? - -![inline](./attachments/slide_4.jpg) - ---- - -![inline](https://upload.wikimedia.org/wikipedia/commons/f/f3/3-4_Data_model_roles.jpg) - -# Level of Data Modeling - -**Conceptual**: The data model defines *WHAT* the system contains. - -^ Conceptual model is typically created by Business stakeholders and Data Architects. The purpose is to organize, scope and define business concepts and rules. Definitions are most important this level. - -**Logical**: Defines *HOW* the system should be implemented regardless of the DBMS. - -^ Logical model is typically created by Data Architects and Business Analysts. The purpose is to developed technical map of rules and data structures. Business rules, relationships, attribute become visible. Conceptual definitions become metadata. - -**Physical**: This Data Model describes *HOW* the system will be implemented using a specific DBMS system [^14]. - -^ Physical model is typically created by DBA and developers. The purpose is actual implementation of the database. Trade-offs are explored by in terms of data structures and algorithms. - -![right 95%](https://image.slidesharecdn.com/datamodelingbigdatadataversityaugust2016-160830052651/95/data-modeling-for-big-data-25-1024.jpg?cb=1472534835) - -[^14]: [physical](https://www.databass.dev/) - -### A Closer Look[^15] -![inline](https://image.slidesharecdn.com/datamodelingbigdatadataversityaugust2016-160830052651/95/data-modeling-for-big-data-25-1024.jpg?cb=1472534835) - -[^15]: [slides](https://www.slideshare.net/Dataversity/data-modeling-for-big-data) & [video](https://www.dataversity.net/ldm-webinar-data-modeling-big-data/) by Donna Burbank - -^ The variety of data available today encourages the design and development of dedicated data models and query languages that can improve both BI as well as the engineering process itself. - ---- - -![inline](https://www.youtube.com/watch?v=PU7nKBNR1Vs&feature=youtu.be) - ---- - -### Conceptual - -- Semantic Model (divergent) - - Describes an enterprise in terms of the language it uses (the jargon). - - It also tracks inconsistencies, i.e., semantic conflicts - -- Architectural Model (convergent) - - More fundamental, abstract categories across enterprise - - -### Logical - -Already bound to a technology, it typically refers already to implementation details - -- Relational -- Hierarchical -- Key-Value -- Object-Oriented -- Graph - -^ Since it has a physical bias, you might be tempted to confuse this with the physical model, but this is wrong. - -### Physical - -The physical level describes how data are **Stored** on a device. - -- Data formats -- Distribution -- Indexes -- Data Partitions -- Data Replications - -
- -...an you are in the Big Data World - -### A Question - -![right fit](./attachments/meandmarti.jpg) - -> Why should you, ~~an application developer~~ a data engineer, care how the database handles storage and retrieval internally? ---Martin Kleppmann - -I mean, you’re probably not going to implement your own storage engine from scratch... - ---- -### The Students - -![inline](./attachments/No_No_He's_Got_A_Point_Banner.jpg) - ---- -### But... -- You do need to select a storage engine that is appropriate for your application, from the many that are available -- You need to tune a storage engine to perform well on your kind of workload -- You are going to experiment with different access patterns and data formats - -Therefore, you must have a rough idea of what the storage engine is doing under the hood - ---- -### Also the Students - -![inline](./attachments/9909953816_e8cecebfc3.jpg) - -## Data Formats - -- In memory, data are kept in objects, structs, lists, arrays, hash tables, trees, and so on. These data structures are optimized for efficient access and manipulation by the CPU (typically using pointers). -- On Disk (or over the network), data are encoded into a self-contained sequence of bytes (for example, a JSON document). - -### Encoding and decoding - -Encoding is the translation from the in-memory representation to a byte sequence (also known as serialization or marshalling) - -Decoding is the reverse translation from the byte sequence to a memory layout (also known as parsing, deserialization, unmarshalling) - -The encoding is often tied to a particular programming language, and reading the data in another language is very difficult - -### Memory vs Disk - -Data layout is much less important in memory than on disk. - -An efficient disk-resident data structure must allow quick access to it, i.e., find a way to serialize and deserialize data rapidly and in a compacted way. - -In general, pointers do not make sense outside memory, thus the sequence-of-bytes representation looks quite different from the data structures that are normally used in memory. - -### Popular (textual) File Formats - -JSON -- has a schema -- cannot distinguish between integers and floating-point numbers -- have good support for Unicode character string -- do not support sequences of bytes without a character encoding -XML -- has a schema -- cannot distinguish between a number and a string -- have good support for Unicode character string -- do not support sequences of bytes without a character encoding -CSV -- cannot distinguish between a number and a string -- does not have any schema - -### Avro - -Avro is a binary encoding format that uses a schema to specify the structure of the data being encoded. - -Avro's encoding consists only of values concatenated together, and the -there is nothing to identify fields or their datatypes in the byte sequence. - ---- -#### Avro Schema Definition -
-
- -```python -record Person { - string userName; - union { null, long } favoriteNumber = null; - array interests; -} -``` - ---- -#### Example -![inline](https://www.oreilly.com/library/view/designing-data-intensive-applications/9781491903063/assets/ddia_0405.png) - -[Source](https://www.oreilly.com/library/view/designing-data-intensive-applications/9781491903063/ch04.html) - ---- -#### Encoding and Decoding - -- Encoding requires the writer's schema -- Decoding requires the reader’s schema. -- Avro does not require that the writer’s schema and the reader’s schema are the same, they only need to be **compatible** - ---- -#### Schema Evolution Rules - -- If the code reading the data encounters a field that appears in the writer’s schema but not in the reader’s schema, it is ignored. -- If the code reading the data expects some field, but the writer’s schema does not contain a field of that name, it is filled in with a default value declared in the reader’s schema. - ---- -#### Compatibility -- forward compatibility: there is a new version of the writer's schema and an old version of the reader's schema -- backwards compatibility: there is a new version of the reader's schema and an old version of the writer's schema - -### Worth Mentioning[^13] - -- Apache Thrift and Protocol Buffers are binary encoding libraries - - require a schema for any data that is encoded. - - come with a code generation tool that takes a schema definitions to reproduce the schema in various programming languages - -[.column] -```c -struct Person { - 1: required string userName, - 2: optional i64 favoriteNumber, - 3: optional list interests -} -``` - -[.column] -```c -message Person { - required string user_name = 1; - optional int64 favorite_number = 2; - repeated string interests = 3; -} -``` - -## Distribution -![](./attachments/rick-mason-2FaCKyEEtis-unsplash.jpg) - -### CAP Theorem (Brewer’s Theorem) - -It is impossible for a distributed computer system to simultaneously provide all three of the following guarantees: - -- **Consistency**: all nodes see the same data at the same time -- **Availability**: Node failures do not prevent other survivors from continuing to operate (a guarantee that every request receives a response whether it succeeded or failed) -- **Partition tolerance**: the system continues to operate despite arbitrary partitioning due to network failures (e.g., message loss) - -A distributed system can satisfy any two of these guarantees at the same time but not all three. - ---- - -![original fit](https://player.slideplayer.com/95/16139843/slides/slide_30.jpg) - - - -### The network is not reliable - -In a distributed system, **a network (of networks) ** failures can, and will, occur. - -#### We cannot neglect Partition Tolerance - -The remaining option is choosing between **Consistency** and **Availability**. - ---- -#### We cannot neglect Partition Tolerance -Not necessarily in a mutually exclusive manner: - -- CP: A partitioned node returns - - the correct value - - a timeout error or an error, otherwise -- AP: A partitioned node returns the most recent version of the data, which could be stale. - -## Indexing - -- Indices are critical for efficient processing of queries in (any kind of) databases. -- basic idea is trading some computational cost for space, i.e., materialize a convenient data structure to answer a set of queries. -- The caveat is that we must maintain indexes up-to-date upon changes - -^ -- Without indices, query cost will blow up quickly making the database unusable -- databases don’t usually index everything by default - -### Basics Terms - -- Ordered indices. Based on a sorted ordering of the values. -- Hash indices. Using an hash-function that assigns values across a range of buckets. - -- Primary Index: denotes an index on a primary key -- Secondary Index: denotes an index on non primary values - - -## Data Replication - -> Replication means keeping a copy of the same data on multiple machines that are connected via a network - -![right fit](https://images.theconversation.com/files/171410/original/file-20170530-16298-5xn3ob.png?ixlib=rb-1.1.0&q=45&auto=format&w=1200&h=1200.0&fit=crop) - -### Reasons for Replication - -- Increase data locality -- Fault tolerance -- Concurrent processing (read queries) - -^ -- To keep data geographically close to your users (and thus reduce access latency) -- To allow the system to continue working even if some of its parts have failed (and thus increase availability) -- To scale out the number of machines that can serve read queries (and thus increase read throughput) - -### Approaches - -- Synchronous vs Asynchronous Replication - - The advantage of synchronous replication is that the follower is guaranteed to have an up-to-date copy - - The advantage of asynchronous replication is that follower's availability is not a requirement (cf CAP Theorem) - -- Leader - Follower (Most common cf Kafka) - - -### Leaders and Followers - -- One of the replicas is designated as the leader -- Write requests go to the leader -- leader sends data to followers for replication -- Read request may be directed to leaders or followers - -![right fit](https://cdn3.whatculture.com/images/2014/04/leader.jpg) - ---- - -![inline](https://miro.medium.com/max/2450/1*WTkANoAmRq9WUmU0v9sV9Q.png) - -Source is [^13] - -### Caveats - -
-
-Only one: handling changes to replicated data is extremely hard. - -## Data Partitioning (Sharding) - - > breaking a large database down into smaller ones - -^ For very large datasets, or very high query throughput, that is not sufficient - -### Reasons for Partitioning - -- The main reason for wanting to partition data is scalability[^13] - -^ -- Different partitions can be placed on different nodes in a shared-nothing cluster -- Queries that operate on a single partition can be independently executed. Thus, throughput can be scaled by adding more nodes. - - -### What to know - -- If some partitions have more data or queries than others the partitioning is **skewed** -- A partition with disproportionately high load is called a **hot spot** -- For reaching maximum scalability (linear) partitions should be balanced - -Let's consider some partitioning strategies, for simplicity we consider Key,Value data. - -### Partitioning Strategies - -- **Round-robin** randomly assigns new keys to the partitions. - - Ensures an even distribution of tuples across nodes; -- **Range partitioning** assigns a contiguous key range to each node. - - Not necessarily balanced, because data may not be evenly distributed -- **Hash partitioning** uses a hash function to determine the target partition. - If the hash function returns i, then the tuple is placed - -# Let's take a step back -[.header: #ffffff] -[.text: #ffffff] - -![original](./attachments/giphy-2773.gif) - ---- - -# To the future - -[.header: #ffffff] -[.text: #ffffff] - -![original](https://media.giphy.com/media/xsF1FSDbjguis/giphy.gif) - -^ Joke Explained: because we will discuss *Processing* later - -# Let's Talk about Workloads - -![inline](./attachments/oltpvsolap.png) - -^ -- **OLTP** systems are usually expected to be **highly available** and to process transactions with low latency, since they are often critical to the operation of the business. -- **OLAP** queries are often written by business analysts, and feed into reports that help the management of a company make better decisions (business intelligence). - -### Online Transactional Processing - -Because these applications are interactive, the access pattern became known as **online** - -**Transactional** means allowing clients to make low-latency reads and writes—as opposed to batch processing jobs, which only run periodically (for example, once per day). - -### Refresh on ACID Properties - -- ACID, which stands for Atomicity, Consistency, Isolation, and Durability[^11] -- **Atomicity** refers to something that cannot be broken down into smaller parts. - - It is not about concurrency (which comes with the I) -- **Consistency** (overused term), that here relates to the data *invariants* (integrity would be a better term IMHO) -- **Isolation** means that concurrently executing transactions are isolated from each other. - - Typically associated with serializability, but there weaker options. -- **Durability** means (fault-tolerant) persistency of the data, once the transaction is completed. - -- -^ The terms was coined in 1983 by Theo Härder and Andreas Reuter [^16] - -[^16]: Theo Härder and Andreas Reuter: “Principles of Transaction-Oriented Database Recovery,” ACM Computing Surveys, volume 15, number 4, pages 287–317, December 1983. doi:10.1145/289.291 - -### Online Analytical Processing - -An OLAP system allows a data analyst to look at different cross-tabs on the same data by interactively selecting the attributes in the cross-tab - -Statistical analysis often requires grouping on multiple attributes. - -### Example[^121] - -Consider this is a simplified version of the sales fact table joined with the dimension tables, and many attributes removed (and some renamed) - -sales (item_name, color, clothes_size, quantity) - ---- -|item_name|color|clothes_size|quantity| -|-----------|----------|----------|----------| -|dress|dark|small|2 -|dress|dark|medium|6 -|...|...|...|...| -|pants|pastel|medium|0 -|pants|pastel|large|1 -|pants|white|small|3 -|pants|white|medium|0 -|shirt|white|medium|1 -|...|...|...|...| -|shirt|white|large|10 -|skirt|dark|small|2 -|skirt|dark|medium|5 -|...|...|...|...| - -### Cross-tabulation of sales by item name and color - -||dark|pastel|white|total -|----|----|----|----|----| -|skirt | 8 | 35 | 10 |53| -|dress|20|11 |5 |36| -|shirt |22|4 |46 |72| -|pants|23|42 |25| 90| -|total |73|92 |102| 267| - -columns header: color -rows header: item name - -### Data Cube[^121] - -- It is the generalization of a Cross-tabulation - -![right fit](./attachments/Screenshot%202020-09-03%20at%209.25.34%20AM.png) - -### Cheat Sheet of OLAP Operations[^17] - -- **Pivoting**: changing the dimensions used in a cross-tab - - E.g. moving colors to column names -- **Slicing**: creating a cross-tab for fixed values only - - E.g fixing color to white and size to small - - Sometimes called dicing, particularly when values for multiple -dimensions are fixed. -- **Rollup**: moving from finer-granularity data to a coarser granularity - - E.g. aggregating away an attribute - - E.g. moving from aggregates by day to aggregates by month or year -- **Drill down**: The opposite operation - that of moving from coarser granularity data to finer-granularity data - -[^17]: Database System Concepts Seventh Edition Avi Silberschatz Henry F. Korth, S. Sudarshan McGraw-Hill ISBN 9780078022159 [link](https://www.db-book.com/db7/slides-dir/PDF-dir/ch11.pdf) - - -### Summary OLTP vs OLAP[^13] - -| Property | OLTP | OLAP | -|----------|----------|----------| -|Main read pattern| Small number of records per query, fetched by key |Aggregate over large number of records | -|Main write pattern| Random-access, low-latency writes from user input| Bulk import (ETL) or event stream | -|Primarily used by| End user/customer, via web application| Internal analyst, for decision support| -|What data represents| Latest state of data (current point in time)| History of events that happened over time | -|Dataset size |Gigabytes to terabytes |Terabytes to petabytes| - ---- - -[[Data Modeling for Databases]] - -[[Data Modeling for Data Warehouses]] - -[[Data Modeling for Big Data]] - -## Summary of Data Modeling Techniques - -According to Len Silverston (1997) only two modeling methodologies stand out, top-down and bottom-up. - -![right fit](https://pbs.twimg.com/profile_images/974019987630301184/kr2LdIyL.jpg) - -### Data Modeling Techniques[^18] - -- **Entity-Relationship (ER) Modeling**[^19] prescribes to design model encompassing the whole company and describe enterprise business through Entities and the relationships between them - - it complies with 3rd normal form - - tailored for OLTP - -- **Dimensional Modeling** (DM)[^110] focuses on enabling complete requirement analysis while maintaining high performance when handling large and complex (analytical) queries - - The star model and the snowflake model are examples of DM - - tailored for OLAP - -- **Data Vault (DV) Modeling**[^111] focuses on data integration trying to take the best of ER 3NF and DM - - emphasizes establishment of an auditable basic data layer focusing on data history, traceability, and atomicity - - one cannot use it directly for data analysis and decision making -- **Domain Driven Design**[^112] focuses on designing software based on the underlying domain. - - promotes the usage of an ubiquitus languageo help communication between software developers and domain experts. - - replaces the conceptual level for NOSQL - -[^18]: [source](https://dzone.com/articles/a-comparison-of-data-modeling-methods-for-big-data) -[^19]: by Bill Inmon -[^110]: Ralph Kimball, book ‘The Data Warehouse Toolkit — The Complete Guide to Dimensional Modeling" -[^111]: [https://en.wikipedia.org/wiki/Data_vault_modeling](https://en.wikipedia.org/wiki/Data_vault_modeling) -[^112]: Evans, Eric. Domain-driven design: tackling complexity in the heart of software. Addison-Wesley Professional, 2004. - - - diff --git a/Data Transformation.md b/Data Transformation.md deleted file mode 100644 index 8cb403f..0000000 --- a/Data Transformation.md +++ /dev/null @@ -1,5 +0,0 @@ -### ETL (Extract-Transform-Load) - -Information architecture pattern for data warehouse solutions - -Batch oriented data integration and delivery \ No newline at end of file diff --git a/Data Velocity.md b/Data Velocity.md deleted file mode 100644 index dd06317..0000000 --- a/Data Velocity.md +++ /dev/null @@ -1,644 +0,0 @@ -# Data Velocity - -![inline](./attachments/2020-internet-minute-infographic.jpg) - ---- -![inline](./attachments/2019-internet-minute-infographic.png) - ---- -![inline](./attachments/2018-internet-minute-infographic.png) - ---- -![inline](./attachments/2017-internet-minute-infographic.png) - ---- -![inline](./attachments/2016-internet-minute-infographic.png) - -### Data Volume & Velocity - -[.column] -Nowadays, we produce massive amount of data. - -Moreover, we do not just produce a lot of data. We also do data at very high rates. -Thus, aside of Data Volume another critical issue is data velocity. - -Data velocity represents the speed at which data are consumed and insights are produced. - -[.column] - -![inline](./attachments/Images/oneminute.jpg) - -### Review on Batch Processing - -We have addressed the Volume aspect of big Data - -- Static large data sets - -- Partitioned across different nodes - -- Processing jobs eventually terminate - -Processing technologies -- MapReduce (Hadoop) -- HDFS -- Spark -- Hive - -### What is A Stream? - -A stream is an unbounded sequence of data. Typically it is modeled as a sequence of pairs $$(o,i)$$, where $$o$$ is an document/tuple/graph and $$i$$ is a timestamp. - -$$(o1,1), (o2,2), (o3,3), (o4,4),(o5,5), (oi,i) .....$$ - -### What for? - -Three interesting characteristics distinguish streams from the data: - -- Unbounded data, i.e., infinite -- Data are ordered, typically time-wise. -- Data are shared in active way, i.e., push model vs pull. is controlled by the source - -### Where are the streams? - ![right 90%](./attachments/Images/buzmeme.png) - -Several sources share data in a streaming form. - -- Stream from clusters, e.g., traces, metrics, and logs. -- Stream from social media, e.g., Twitter feeds. -- Stream from news, e.g., GDELT and Google news. -- Stream from sensor networks, e.g., from smart cities. - - -### How to Process Streams? - -Due to the *unbounded* nature of stream, traditional processing techniques are not adequate. -Moreover, stream analysis typically has strict time constrains. - -Thus, **Stream processing** requires a paradigm shift, i.e., -from data at rest and post-hoc analyses, to process data **in-motion** and in-motion insights. - -![right fit](./attachments/Images/ibm-velocity.pdf) - -### Who needs Stream Processing? - -In many applications domains, the limits of traditional -data processing infrastructures are challenged: -- Electronic trading -- Network monitoring -- Fraud detection -- Social network analysis -- IoT Applications - - Smart cities - - Smart grid - - -### 8 Requirements for Big Streaming[^1] - -[.column] - -- Keep the data moving - - Prefer active (push) data sharing -- Declarative Access - - E.g., streamSQL, CQL -- Handle imperfections - - Late, missing, unordered items -- Predictable outcomes - - Consistency, event-time processing - -[.column] - -- Integrate stored and streaming data - - Hybrid stream and batch -- Data safety and availability - - Fault tolerance -- Partitioning and Scaling - - Distributed processing -- Instantaneous response - - Low latency - -### Stream Processing State of the Art - -[.column] - -Stream Analytics (SA) - -- Obtain aggregates over streaming data within time boundaries - -Event Processing (EP) - -- Interested in sequences of event, called composite events, defined using regular expressions - -[.column] - -![inline fit](./attachments/Images/asp2.png) -![inline fit](./attachments/Images/esp.png) - -### 4 Dimensions to Consider - -Five dimensions are important when considering stream processing engines for big data. - -[.column] - -- Notions of Time -- Continuous Processing -- Architectural View -- Fault Tolerance -- Programming Model - -[.column] -![inline](./attachments/Images/placeholdergraph.png) - -### Different Notions of Time (1/2) -In the SP literature, many notion of time have been proposed. For sake of simplicity, we use the nomenclature suggested by Tyler Akidau [^2]. -- **Ingestion time**: the time at which a stream element arrives at the source of the application -- **Processing time**: the time at which an operator in the processing pipeline reads the stream element -- **Event time**: The original time at which the data was generated - -NB: in the following we ignore ingestion time without loss of -generality. - -### Different Notions of Time (2/2) - -For both ingestion and processing time, the progress of time is -controlled by stream processing engine and the data ordering is strictly monotonic. - -On the other hand, event time is controlled by the source. Thus, the data ordering is only monotonic. Indeed, in event-time, it is possible to have late arriving elements and element occurring simultaneously. - -Depending on the type of processing one needs to do, late arrivals may be taken into account. - -### Event Time vs Processing Time (1/2) - -Ideally, one would like the data to reach the system when they are generated in the real world. - -However, this is *physically* not possible, due to the network delay. - -Moreover, in a distributed system, where the components are connected to a *non-reliable* network, events can arrive late, i.e., out of order. - -### Event Time vs Processing Time (2/2) - -![inline](./attachments/Images/EventTimeVsProcessingTime.png) -Image: Tyler Akidau - -### Continuous Processing (1/2) -![](./attachments/Images/stream.png) - -The infinite nature of streams requires a change of paradigm in the way we process data. - -Continuous semantics: the results of a continuous query is the set of data that would be returned if the query were executed at every instant in time[^3]. - - - -### Continuous Processing (2/2) - -[.column] - -- The Stream is filled with the elements of the answer that are produced and never changed; -- The Store is filled with parts of the answer that may change in the future; -- The Scratch is used to store data that are not part of the answer but are used to compute it; -- The Throw is used to store unneeded tuples. - -[.column] - -![inline fit](./attachments/Images/modelsissues.png) - -Image: Tyler Akidau - -### Dealing With Unboundedness: Window Operators - -[.column] - -- Time windows - - Sliding - - Tumbling -- Tuple windows - - Also called physical windows -- Data-driven windows - - Session windows -- Triggered by - - Event time, processing time, count - -[.column] - -![inline fit](./attachments/Images/StreamWindowing.png) - -Image: Tyler Akidau - -### Time-Based Window Operator (1/2) - -A time-based sliding window operator consists of two parameter $$W=P(omega,beta)$$ where: -- $$\omega$$ represents the window width; -- $$\beta$$ is called sliding parameter. - -For each point in time, the time-based sliding window operator defines a set of windows. - -$$\mathcal{W}=(| c>o, |c-o|=omega, |o-o'|=|c-c'|=beta )$$ - -### Time-Based Window Operator (2/2)[^15] - -![inline 110%](./attachments/Images/windows/1.pdf) - -### Time-Based Window Operator (2/2)[^15] - -![inline 110%](./attachments/Images/windows/2.pdf) - -### Time-Based Window Operator (2/2)[^15] - -![inline 110%](./attachments/Images/windows/3.pdf) - -### Time-Based Window Operator (2/2)[^15] - -![inline 110%](./attachments/Images/windows/4.pdf) - -### Time-Based Window Operator (2/2)[^15] - -![inline 110%](./attachments/Images/windows/5.pdf) - -### Time-Based Window Operator (2/2)[^15] - -![inline 110%](./attachments/Images/windows/6.pdf) - -### Time-Based Window Operator (2/2)[^15] - -![inline 110%](./attachments/Images/windows/7.pdf) - - -### Windowing in Processing Time - -- elements' timestamps are controlled by the system -- time progresses according to system's internal clock -- no chance for out of order or late arrival - -### Windowing in Processing Time[^16] - -![inline](./attachments/Images/ProcessingTimeWindows.png) - -[^16]: Image: Tyler Akidau - -### Windowing in Event Time - -- Windows are based on timestamp info in the stream - -- Buffering is needed to deal with late arrivals - -- Needs an external time progress indicator - -### Windowing in Event Time[^16] - -![inline](./attachments/Images/eventitme1.png) - -### Example Windowed Aggregation - -![inline](./attachments/Images/ExampleWindowAggregation.png) - -### Architectural Approaches to Stream Processing (1/2) - -On the one hand, the Information Flow Processing (IFP) architecture is an abstract schema defined by Cugola and Margara to survey the existing SP proposal[^4]. - -![right fit](./attachments/Images/ifp.pdf) - -### Architectural Approaches to Stream Processing (2/2) - -On the other hand, two SP architectures f had big industrial tractions include: - -- The Lambda Architecture, which combines streaming and batch processing. -- The Kappa Architecture, which relies on fault-tolerant stream log. - -### Lambda Architecture[^17] - -The lambda architecture is used when approximate results are needed quickly and more accurate results can come later. - -Moreover, it is suitable for cases where pure stream processing is not fault tolerant and more accurate results require to wait for late arrivals. - -![left fit](./attachments/Images/lambda-arch.pdf) - -[^17]: Courtesy of Emanuele Della Valle/Marco Balduini - -### Kappa Architecture[^17] - -The Kappa architecture was designed to address the limitation of the lambda architecture. - -It leverages only a speed layer but it relies on a fault-tolerant stream storage, e.g., a distributed log. - -The Kappa architecture is simpler to maintain and less costly to operate than the lambda architecture. - -### Kappa Architecture -![inline](./attachments/Images/kappa-arch.pdf) - -### Programming with Streams - -Stream processing frameworks hide execution details from the programmers, and manage them in the background. - -There are different abstraction levels that a programmer can use to express streaming computations. - -![right fit](./attachments/Images/programming2.pdf) - - -### SQL-like Languages For Stream Processing - -Prominent batch-processing solutions provide SQL interfaces e.g., Hive, -PIG, SparkSQL. The reasons include the access to a wider audience and -all the benefits of declarative languages. Similarly, Stream Processing -systems are migrating towards SQL-like languages. Can you guess what -kind of extensions they have? Exactly! Window Operators! - -### Continuous Query Language (1/2) - -The infinite nature of streams requires a change of paradigm in the way -we process data. A first approach to this is given by the Continuous -Query Language (CQL)[^5] - -CQL includes three families of operators that reconcile stream -processing with traditional relational algebra. -- Stream-to-Relation operators -- Relation-to-Relation operators -- Relation-to-Stream operators - -### Continuous Query Language (2/2) - -![inline](./attachments/Images/StreamToRelation.png) - -### Programming with Streams: Functional APIs ![inline](./attachments/Images/programming_step2.pdf) - -Stream Processing frameworks such as Kafka Streams and Flink offer -functional APIs to directly write streaming programs. - -The use of the function abstraction operator ($lambda$-calculus) -provides a mechanism for the representation of Streaming Transformation -using higher-order function such as filter, maps, and flatmaps. - -Functional APIs are still declarative. However, they give more freedom -to the developer who needs to design specialized operations, e.g., -aggregations. - -### Functional APIs -[.column] - -- Relies on standard functional terms (e.g., map, flatmap) - -- Hides the details about the underling computations; - -- Programmers need only specify what should be computed in terms of - pipelines. - -[.column] - -![inline](./attachments/Images/img0012.png) - -### Functional APIs: Reference Model[^6] - -![inline](./attachments/Images/windows/duality.png) - -### Programming with Streams: Dataflow                 - -![inline](./attachments/Images/programming_step3.pdf) - -- As dataflow networks were the first type of streaming programs to - appear in the literature. - -- A dataflow network represents a program as nodes and edges. - - - nodes represent (continuous) computations - - - edges represent data moving across nodes, i.e., streams. - - - Nodes (operator) can only communicate with each other by their - input and output connections. - -- Languages for dataflow programming offer programmers the primitives - to implement custom business logic as topologies of nodes. - -### Logical Dataflow Network - -- What programmer design is actually the logical dataflow plan. - -- However, a stream processing system distributes a dataflow graph - across multiple machines. - -- The system is also responsible for managing the partitioning of - data, the network communication, as well as program recovery in case - of machine failure. - -![inline](./attachments/Images/img0007.png) - -### Dataflow Programming: Physical Dataflow - -- Before execution, systems typically create several parallel - instances of the same operator, which we refer to as tasks. - -- A system is able to scale out by distributing these tasks across - many machines, akin to a MapReduce execution. - -- In dataflow programming, the programmers can control the degree of - parallelism and, thus, part of the physical execution. - -### Physical Dataflow Network - -![inline](./attachments/Images/img0007.png) - -### Dataflow Programming: Stateful Operators - -- Unlike a simple operator such as $filter$, certain operators need to - keep mutable state. - -- For instance, an operator that $Countc$ all the occurrence of a - certain event must keep a state of the current counts. - -- In the the word counting example, counting the word occurrences - received by an operator, requires storing the words received thus - far along with their respective counts. - -### The Dataflow Model - -The model provides a framework to answer four questions: - -- What results are we computing? - -- Where in event-time are they computed? - -- When in processing time are they materialized? - -- How do results are reported ? - -### The Dataflow Data Model - -- A `PCollection ` is a collection of data Type $T$ - -- Maybe be bounded or unbounded in size - - - Batch/Stream - -- Each element has an implicit timestamp - - - Event time - -- Initially created from backing data stores - - - File - - - Stream source - -### What: Transformations - -`PTransforms` transform `PCollections` into other `PCollections` -![inline](./attachments/Images/img0015.png) - -### Where: Windowing Over Event-Time - -![inline](./attachments/Images/img0017.png) - -### When in Processing Time? - -- Triggers control when results are materialized - - - Watermark trigger - - - Processing time trigger - - - Count trigger - - - Delta trigger (not supported by Dataflow) - -- Multiple triggers - - - **Early**: useful to get early results, e.g., a 24-hours window - - - **On**: At the window closure time - - - **Late**: Responding to late arrivals - -### Trigger at Watermark - -![inline](./attachments/Images/img0021.png) - -### How to Refine Results? - -- How to relate results of multiple firing? - - - Discarding - - - Stateless, report the results for the latest firing only - - - Accumulating - - - Stateful, add to the previous result - - - Accumulating and Retracting - - - Remove the last update, put the new value - -![inline](./attachments/Images/img0024.png) - -### Programming with Streams: Actor Model                - -![right fit](./attachments/Images/programming_step4.pdf) - -Starting from the seminal work of Hewitt et al., actors were thought as -a model for concurrency computing. This theory became the foundation of -several programming languages. - -### Actors - -[.column] - -- Actors are lightweight objects that encapsulate a *state* and a - *behavior*. - -- They share no mutable state among them, and in fact the only way to - communicate is through asynchronous message passing. - -- To manage the incoming messages, each actor has a mailbox. - -[.column] - -![inline](./attachments/Images/actors.png) - -### Actor Model And Stream Processing Execution - -Immutable state, no-sharing and asynchronous processing are common requirements for this Stream Processing systems, e.g., Flink or Storm. - -The asynchronous message-passing communication that governs actor interactions is a key feature that allows providing a loose-coupled architecture where blocking operators are avoided. - -Indeed, these characteristics are particularly interesting for stream processing systems, especially for those where high scalability and parallel processing of streams are needed. - -### Actor Model: Partitioning - -Partitioning strategies determine the allocation of records between the -parallel tasks of two connected logical operators. - -- **Random partitioning**: each output record of a task is shipped to - a uniformly random assigned task of a receiving operator. - distributing the workload evenly among tasks of the same operator. - -- **Broadcast partitioning**: send records to every parallel task of - the next operator. - -- **Partitioning by key**: guarantees that records with the same key - (e.g., declared by the user) are sent to the same parallel task of - consuming operators - -- **User defined partitioning functions**: (e.g., geo-partitioning or - machine learning model selection ). - -### Open-Source Systems Overview - -![inline](./attachments/Images/StreamingSystemsOverview.png) - -### Large-Scale Data Stream Processing on Commodity Clusters - -- MapReduce and the development of open source software stacks for distributed data processing on commodity clusters (e.g., Apache - Hadoop, Apache Spark) initially covered a major need for batch or - offline data processing. However, low-latency and high-throughput - computing emerged as an open problem. - -- Some of the first open source SPs for commodity clusters were - `Yahoo! S4`[^7] and `Twitter Storm`[^8]. - -- The more systems provide richer semantics and higher-level - programming abstractions for data streams in order to simplify the - writing of data stream analysis applications. Examples of such - systems are `Apache Flink`[^9], `Beam`[^10] , `Samza`[^11], `Spark Streaming`[^12], `APEX`[^13], and `Kafka Streams`[^14]. - - - -### **The End** - - -![left fit](./attachments/Images/Thankyou.jpeg) -![right fit](./attachments/Images/Questions.jpg) - - -[^1]: Stonebraker, Michael, Ugur Cetintemel, and Stan Zdonik. "*The 8 requirements of real-time stream processing*." ACM Sigmod Record 34.4 (2005): 42-47 - - [^2]: Akidau, Tyler, et al. The dataflow model: a practical approach to balancing correctness, latency, and cost in massive-scale, unbounded, out-of-order data processing.(2015). - -[^3]: Terry, Douglas, et al. "Continuous queries over append-only databases." Acm Sigmod Record 21.2 (1992): 321-330. - -[^4]: Cugola, Gianpaolo, and Alessandro Margara. "Processing flows of - information: From data stream to complex event processing." ACM - Computing Surveys (2012). - -[^5]: Arasu, A., Babu, S., & Widom, J. (2006). The CQL continuous query - language: semantic foundations and query execution. The VLDB - Journal, 15(2), 121-142. - -[^6]: Sax, Matthias J., et al. "Streams and tables: Two sides of the - same coin." Proceedings of the International Workshop on Real-Time - Business Intelligence and Analytics. 2018. - -[^7]: [S4](http://incubator.apache.org/projects/s4.html) - -[^8]: [Storm](http://storm.apache.org/) - -[^9]: [Flink](https://flink.apache.org/) - -[^10]: [Beam](https://beam.apache.org/) - -[^11]: [Samza](http://samza.apache.org/) - -[^12]: [SparkStreaming](https://spark.apache.org/streaming/) - -[^13]: [Apex](https://apex.apache.org/) - -[^14]: [Kafka Ssubtreams](https://kafka.apache.org/documentation/streams/) - -[^15]: Courtesy of Emanuele Della Valle/Daniele Dell'Aglio diff --git a/Data Wrangling.md b/Data Wrangling.md deleted file mode 100644 index 2be0d4b..0000000 --- a/Data Wrangling.md +++ /dev/null @@ -1,755 +0,0 @@ -footer: Curtesy of Marco Brambilla (polimi) -slidenumbers: true -slide-dividers: ### - - -[Reading](http://cidrdb.org/cidr2015/Papers/CIDR15_Paper2.pdf) - - - -# Data Engineering -## Data Wrangling -#### LTAT.02.007 -#### Ass Prof. Riccardo Tommasini -#### Assistants: Mohamed Ragab, Samuele Langhi, Hassan Elsaeeb -#### Curtesy of Marco Brambilla - -- [https://courses.cs.ut.ee/2020/dataeng](https://courses.cs.ut.ee/2020/dataeng) -- [Forum]() - ---- - -### Conventional Definition of Data Quality - -* __Accuracy__ - * The data was recorded correctly -* __Completeness__ - * All relevant data was recorded -* __Uniqueness__ - * Entities are recorded once -* __Timeliness__ - * The data is kept up to date(and time consistency is granted( -* __Consistency__ - * The data agrees with itself - -### Problems … - -* Unmeasurable - * Accuracy and completeness are extremely difficult, perhaps impossible to measure -* Context independent - * No accounting for what is important Eg, if you are computing aggregates, you can tolerate a lot of inaccuracy -* Incomplete - * What about interpretability, accessibility, metadata, analysis, etc -* Vague - * The conventional definitions provide no guidance towards practical improvements of the data - ---- - -# Isn’t data science sexy? - -![original](./attachments/17-dataWrangling4.png) - ---- -# When Data Is Wrong - -### The skeptic approach - -![inline](./attachments/SkepticalDW.png) - -### The pragmatic approach - -![inline](./attachments/pseudopractitioner.png) - -### The (pseudo) practioner approach -![inline](./attachments/pseudo-practitioner.png) - -### Goal: Better Faster Cheaper! - -![inline 150%](./attachments/Better Faster Cheaper.png) - -### The Vicious Cycle of Bad Data - -![inline](./attachments/data quality issues.png) - -### Data Quality Issue - -Gartner Report - -By 2017, 33% of the largest global companies will experience an information crisis due to their inability to adequately value, govern and trust their enterprise information. - -> If you torture the data long enough, it will confess to anything -> – Darrell Huff - ---- - -# Making a Wrong Right - ---- -### Data Wrangling is … - -![inline](./attachments/datawranglingpipeline.png) - -The process of transforming “raw” data into data that can be analyzed to generate valid actionable insights - -Data scientists spend more timeon preparing data than on analyzing it. - -### Data Wrangling a.k.a. - -Data Preprocessing -Data Preparation -Data Cleansing -Data Scrubbing -Data Munging -Data Fold, Spindle, Mutilate… -(good old ETL) - - -### Data Wrangling Steps - -[.column] - -- Iterative process -- Understand -- Explore -- Transform -- Augment -- Visualize - -[.column] - -![inline fit](./attachments/wranglingsteps.png) - - -### What is Data Cleansing? - -__Data cleansing__ or __data scrubbing__ is the act of __detecting and correcting (or removing corrupt or inaccurate records__) from a data set. - -The term refers to identifying incomplete, incorrect, inaccurate, partial or irrelevant parts of the data and then replacing, modifying, filling in or deleting this dirty data. - -![right fit](./attachments/cleansing.png) - -### Why is Data “Dirty” ? - -[.column] - -- Dummy Values -- Absence of Data -- Multipurpose Fields -- Cryptic Data -- Contradicting Data - -[.column] - -- Shared Field Usage -- Inappropriate Use of Fields -- Violation of Business Rules -- Non-Unique Identifiers -- Data Integration Problems - -### Data Cleansing in Practice - -Parsing - -Correcting - -Standardizing - -Matching - -Consolidating - -### Parsing - -Parsing locates and identifies individual data elements in the source files and then isolates these data elements in the target files - -![inline](./attachments/parsing.png) - -### Correcting - -Corrects parsed individual data components using sophisticated data algorithms and secondary data sources - -![inline](./attachments/correcting.png) - -### Standardizing - -Standardizing applies conversion routines to __transform data into its preferred (and consistent( format__ using both standard and custom business rules, as well as coherent measurement units,… - -![inline](./attachments/standardizing.png) - - -### Matching - -Searching and __matching records__ within and across the parsed, corrected and standardized data based on predefined business rules to __eliminate duplications__ - - -### Match Patterns - -![inline](./attachments/match-pattern.png) - -### Matching - -![inline](./attachments/matching.png) - -### Consolidating - -Analyzing and __identifying relationships__ between matched records and consolidating/merging them into ONE representation - -![inline](./attachments/consolidating.png) - -### Understanding Data: PDF - -![inline](./attachments/Understandingdata.png) - -### Understanding Data: Free Text - -![inline](./attachments/UnderstandingDataFreeText.png) - -### Understanding Data: more - -#### Unstructured - - > Looks like my V8 Chevy is running low on fuel Didn’t I fill up just the day before? - -#### Structured - -|Owner|Vehicle|Type|Fuel Level|Engine|Last Fill| -|-------|-------|-----|----------|------|--------| -|AK |Chevy|Gas|5%|V8|05/04/16 - ---- - -### Understanding Data: more - - -Decode the following secret message: - -$$DALDFWSFOEWRBOSDCALAXORDJFKMCO$$ - -↓ - -$$DAL~DFW~SFO~EWR~BOSDCA~LAX~ORD~JFK~MCO$$ - - -### Data Munging - -Potentiallylossytransformations applied to a piece of data or a file - -Vague data transformation steps that are not yet completely clear - -Eg, removing punctuation or html tags, data parsing, filtering, and transformation - ---- -# Semantics - -[.column] -|???| -|:---:| -|75| -|80| -|65| -|55| -|67| -|78| -|88| -|90| -|45| -|58| -|69| -|80| -|110| - -[.column] - -![inline](./attachments/questionmark.png) - -### Semantics and Outliers -The value stands in the abnormal - -|Age(Years)| -|:---:| -|75| -|80| -|65| -|55| -|67| -|78| -|88| -|90| -|45| -|58| -|69| -|80| -|110| - - -![right fit](./attachments/17-dataWrangling21.png) - - -### Missing Data: Detection - -__Overtly missing data__ - -- Match data specs against data - are all the attributes present? -- Scan individual records - are there gaps? -- Rough checks: file sizes; \#files, \#records, or \#duplicates. -- Compare estimates (e.g., medians) with “expected” values and bounds. - -^ more estimates are averages, frequencies -^ check at various levels of granularity since aggregates can be misleading - -### Missing data: Detection (cont.) -__Hidden damage to data__ - -- Values are truncated or censored - -^ - check for spikes and dips in distributions and histograms - -- Missing values and defaults are indistinguishable - -^ - too many missing values? metadata or domain expertise can help - -- Errors of omission eg all calls from a particular area are missing - -^ - check if data are missing randomly or are localized in some way - -### Missing Values: Random - -- System failures -- Complete miss - -![left fit](./attachments/randommissingvalues.png) - -### Missing Values: Wrong Ingestion - -[.column] -CSV to table / excel - -Merged fields - -Missing fields - -[.column] -![inline](./attachments/wrongingestion1.png) - -![inline 4%](./attachments/1280px-Arrow_west.svg.png)[^1] - -![inline](./attachments/wrongingestion2.png) - -[^1]:Missing due to invalid data and ingestion - -### Missing Values: Inapplicability - -- Partial data by nature -- Remember to leave empty slots - -![left fit](./attachments/inapplicability.png) - -### Imputing Values to Missing Data - -- In federated data, between 30%-70% of the data points will have at least one missing attribute -- data wastage if we ignore all records with a missing value -- Remaining data is seriously biased -- Lack of confidence in results -- Understanding pattern of missing data unearths data integrity issues - -### Missing Value Imputation - 1 - -* Standalone imputation - * Mean, median, other point estimates - * Assume: Distribution of the missing values is the same as the non\-missing values - * Does not take into account inter\-relationships - * Introduces bias - * Convenient, easy to implement - -### Missing Value Imputation - 2 - -[.column] - -* Better imputation \- use attribute relationships -* Assume : all prior attributes are populated -* That is, _monotonicity_ in missing values - -[.column] - -|X1|X2|X3|X4|X5| -|:---:|:---:|:----:|:---:|:----:| -10| 20| 35| 4| . -11| 18| 40| 2| . -19| 22| 22| . | . -09| 15| . | . | . - -### Missing Value Imputation –3 - - * Two techniques - * Regression (parametric) - * Propensity score (non parametric) - -### Regression method (parametric) - - * Use linear regression, sweep left\-to\-right - - $$X3=a+b*X2+c*X1;$$ - - $$X4=d+e*X3+f*X2+g*X1$$ - - and so on - * X3 in the second equation is estimated from the first equation if it is missing - -### Propensity Scores (nonparametric) - * Let -$$Y_j=1 = \begin{cases} - 1~if~X_{ji}~is~missing; \\ - 0~otherwise - \end{cases}$$ - - - * Estimate P(Yj=1) based on X1through X1-jusing logistic regression - * Group by propensity score P(Y1=1) - * Within each group, estimate missing Xjs from known Xjs using approximate Bayesian bootstrap - * Repeat until all attributes are populated - -### Missing Value Imputation - 4 - -* Arbitrary missing pattern - * Markov Chain Monte Carlo (MCMC( - * Assume data is multivariate Normal,with parameter $$\Theta$$ - * (1) Simulate missing X, given $$\Theta$$ estimated from observed X ; - * (2) Re-compute $$\Theta$$ using filled in X - * Repeat until stable - * Expensive: Used most often to induce monotonicity - -^ __Note that imputed values are useful in aggregates but can’t be trusted individually__ - -### Censoring and Truncation - -Well studied in Biostatistics, relevant to time dependent data e.g. duration - - _Censored_ \- Measurement is bounded but not precise eg Call duration > 20 are recorded as 20 - - _Truncated_ \- Data point dropped if it exceeds or falls below a certain bound eg customers with less than 2 minutes of calling per month - -![inline](./attachments/17-dataWrangling25.png) - -Censored time intervals - -### Censoring/Truncation (cont.) - -If censoring/truncation mechanism not known, analysis can be inaccurate and biased - -But if you know the mechanism, you can mitigate the bias from the analysis - -Metadata should record the existence as well as the nature of censoring/truncation - -# - -![inline](./attachments/17-dataWrangling26.png) - -Spikes usually indicate censored time intervals - -caused by resetting of timestamps to defaults - -### Suspicious Data - -Consider the data points - -3, 4, 7, 4, 8, 3, 9, 5, 7, 6, 92 - -“92” is suspicious \- an _outlier_ - -Outliers are potentially legitimate - -Often, they are data or model glitches - -Or, they could be a data miner’s dream, eg highly profitable customers - -### Outliers - -* Outlier – “departure from the expected” -* Types of outliers – defining “expected” -* Many approaches - * Error bounds, tolerance limits – control charts - * Model based – regression depth, analysis of residuals - * Geometric - * Distributional - * Time Series outliers - -### Control Charts - -* Quality control of production lots -* Typically univariate: X\-Bar, R, CUSUM -* Distributional assumptions for charts not based on means eg R–charts -* Main steps (based on statistical inference( - * Define “expected” and “departure” eg Mean and standard error based on sampling distribution of sample mean (aggregate(; - * Compute aggregate each sample - * Plot aggregates vs expected and error bounds - * “Out of Control” if aggregates fall outside bounds - -### An Example - -(http://wwwitlnistgov/div898/handbook/mpc/section3/mpc3521htm( - -![inline](./attachments/17-dataWrangling27.png) - -### Multivariate Control Charts - 1 - -* Bivariate charts: - * based on bivariate Normal assumptions - * component\-wise limits lead to Type I, II errors -* Depth based control charts (nonparametric(: - * map n\-dimensional data to one dimension using depth egMahalanobis - * Build control charts for depth - * Compare against benchmark using depth eg Q\-Q plots of depth of each data set - -Bivariate Control Chart - -# - -### Multivariate Control Charts - 2 - -* Multiscale process control with wavelets: - * Detects abnormalities at multiple scales as large wavelet coefficients - * Useful for data with heteroscedasticity - * Applied in chemical process control - -### Model Fitting and Outliers - -* Models summarize general trends in data - * more complex than simple aggregates - * eg linear regression, logistic regression focus on attribute relationships -* Data points that do not conform to well fitting models are _potential outliers_ -* Goodness of fit tests (DQ for analysis/mining( - * check suitableness of model to data - * verify validity of assumptions - * data rich enough to answer analysis/business question? - -### Set Comparison and Outlier Detection - -“Model” consists of partition based summaries - -Perform nonparametric statistical tests for a rapid section\-wise comparison of two or more massive data sets - -If there exists a baseline “good’’ data set, this technique can detect potentially corrupt sections in the test data set - -### Types of data - -* Categorical -* Qualitative - * Subjective -* Quantitative - * Discrete - * Continuous - -![inline](./attachments/17-dataWrangling28.png) - -Color - -Nice, Good, For birthday - -6 balloons - -Pressure 15 PSI, - -139 m over sea level - -* Categorical -* Qualitative - * Subjective -* Quantitative - * Discrete - * Continuous - -![inline](./attachments/17-dataWrangling29.png) - -### Data Source Selection Criteria - -Credibility - -Completeness - -Accurateness - -Verifiability - -Currency - -Accessibility - -Compliance - -Cost - -Legal issues - -Security - -Storage - -Provenance - -### Not all tables are created equal - -Find total comedy movies in all of 2014? \-> Not easy in current form - -Find % of hit comedy movies in a 2015? - -Very easy to add a new column - -Very messy data - -Variables in both rows and columns - -Each row is complete - -observation - -Normalize to avoid duplication - -Multiple Tables - -Divided by Time - -Combine all tables - -accommodating - -varying formats - -### The “Key” (matching) problem - -Keys are crucial in DB - -Many DBs \-\-> Many keys - -How to align? - -Identification to a certain degree of accuracy likely\-identities - -eg, same user match - -### The “Duplicates” problem - -Related to Key problem - -Identification to certain degree of accuracy likely\-duplicates - -eg, duplicate posts - - -Related to Key problem - -Identification to certain degree of accuracy likely\-duplicates - -### Lessons Learnt on Tables - -(Multiple( variables in columns - -Never values as columns\! - -Shape may depend on convenience of queries - -Matching identities and duplications are crucial in data science\! - -Each observation is complete and atomic - -Each variable belongs to (only\!( one column\! - -### Schema-On-Write Vs Schema-On-Read - -Traditional DBMSs enforced writing only data consistent with a pre\-designed schema - -Today data modeling at design time is a luxury - -In schema on read, data is applied to a plan or schema as it is pulled out of a stored location, rather than as it goes in - -### Popular Open Source Tools - -![inline](./attachments/17-dataWrangling32.png)![inline](./attachments/17-dataWrangling33.png)![inline](./attachments/17-dataWrangling34.png)![inline](./attachments/17-dataWrangling35.png)![inline](./attachments/17-dataWrangling36.png)![inline](./attachments/17-dataWrangling37.png)![inline](./attachments/17-dataWrangling38.png) - -### Other Resources - -![inline](./attachments/17-dataWrangling39.png)![inline](./attachments/17-dataWrangling40.png)![inline](./attachments/17-dataWrangling41.png)![inline](./attachments/17-dataWrangling42.png) - -![inline](./attachments/17-dataWrangling43.png)![inline](./attachments/17-dataWrangling44.png)![inline](./attachments/17-dataWrangling45.png)![inline](./attachments/17-dataWrangling46.png) - -### Commercial Vendors - -![inline](./attachments/17-dataWrangling47.png)![inline](./attachments/17-dataWrangling48.png)![inline](./attachments/17-dataWrangling49.png)![inline](./attachments/17-dataWrangling50.png) - -![inline](./attachments/17-dataWrangling51.png)![inline](./attachments/17-dataWrangling52.png)![inline](./attachments/17-dataWrangling53.png)![inline](./attachments/17-dataWrangling54.png) - -### Trifacta Wrangler - -![inline](./attachments/17-dataWrangling55.png) - -### Google’s Open Refine - -![inline](./attachments/17-dataWrangling56.png) - -### Hands on Data Wrangling - -* __Data Ingestion__ - * CSV - * PDF - * API/JSON - * HTML Web Scraping - * XLS, Access,…\! -* __Data Exploration__ - * Visual inspection - * Graphing -* __Data Shaping__ - * Tidying Data - -* __Data Cleansing__ - * Missing values - * Format - * Measurement Units - * Outliers - * Data Errors Per Domain - * Fat Fingered Data -* __Data Augmenting__ - * Aggregate data sources - * Fuzzy/Exact match - -![inline](./attachments/17-dataWrangling57.png) - -### R Libraries for Data Wrangling - - * stringr - * dplyr - * tidyr - * readxl,xlsx - * lubridate - * gtools - * plyr - * rvest - -### References – Web and Books - -Web: - -www2gbiforg/DataCleaningpdf - -wwwwebopediacom/TERM/D/data\_cleansinghtml - -Books: - -Data Mining by Ian H Witten and Eibe Frank - -Exploratory Data Mining and Data Quality by Dasu and Johnson (Wiley, 2004( - -### References - Tools - -Stanford Wranglerhttp://visstanfordedu/papers/wranglerhttp://visstanfordedu/wrangler - -http://openrefineorg/ - -http://okfnlabsorg/ - -http://schoolofdataorg/ - - -# Marco Brambilla, @marcobrambi, marco.brambilla@polimi.it -http://datascience.deib.polimi.it - -![inline](./attachments/17-dataWrangling60.png) - diff --git a/Document Databases.md b/Document Databases.md deleted file mode 100644 index 60b8ad5..0000000 --- a/Document Databases.md +++ /dev/null @@ -1,135 +0,0 @@ -# Document Databases - -### Why document-based? - -* Handles Schema Changes Well (easy development) -* Solves Impedance Mismatch problem -* Rise of JSON -* python module: simplejson - -![inline](./attachments/mongodb-42.png) - -### What is a document? - - -```javascript -{ - "business_id": "rncjoVoEFUJGCUoC1JgnUA", - "full_address": "8466 W Peoria AvenSte 6nPeoria, AZ 85345", - "open": true, - "categories": ["Accountants", "Professional Services", "Tax Services",], - "city": "Peoria", - "review_count": 3, - "name": "Peoria Income Tax Service", - "neighborhoods": [], - "longitude": -112.241596, - "state": "AZ", - "stars": 5.0, - "latitude": 33.581867000000003, - "type": "business": -} -``` - -### JSON Format - -![inline](./attachments/mongodb-60.png) - -## Designing NoSQL Data Structures - -- NoSQL data structures driven by application design. - - Need to take into account necessary CRUD operations -- To embed or not to imbed. That is the question! - - Rule of thumb is to imbed whenever possible. -- No modeling standards or CASE tools! - -### Relational to Document - -![left fit](./attachments/image89.png) - - -```json - { - "title" : "MongoDB", - "contributors": [ - { "name" : "Eliot Horowitz", - "email" : "eliot@10gen.com" }, - { "name" : "Dwight Merriman" - "email" : "dwight@10gen.com" } ], - "model" : { - "relational" : false, - "awesome" : true } - } -``` - -### A normalized structure - - -```json -{ - "_id" : "First Post", - "author" : "Rick", - "text" : "This is my first post." - -} -``` - -```json -{ - "_id" : ObjectID(...), - "post_id" : "First Post", - "author" : "Bob", - "text" : "Nice Post!" -} -``` - -### A (denormalized) embedded structure - - -```json -{ - -"_id" : "First Post", -"comments" : [ - { "author" : "Bob", - "text" : "Nice Post!"}, - { "author" : "Tom", - "text" : "Dislike!"}], -"comment_count" : 2 -} -``` - -### A polymorphic structure - -[.column] - -- When all the documents in a collection are similarly, but not identically structured. -- Enables simpler schema migration. -- no more of custom \_field\_1 -- Better mapping of object – oriented inheritance and polymorphism. - -[.column] - - -```json -{ - "_id" : 1, - "title": "Welcome", - "url": "/", - "type": "page", - "content": "Welcome to my wonderful wiki." -} - -{ - "_id": 3, - "title": "Cool Photo", - "url": "/photo.jpg", - "type": "photo", - "content": Binary(...) -} -``` - -### List of Systems - -- **MongoDB** -- CouchDB -- OrientDB diff --git a/Event Sourcing.md b/Event Sourcing.md deleted file mode 100644 index e2226ec..0000000 --- a/Event Sourcing.md +++ /dev/null @@ -1,15 +0,0 @@ -### Event Sourcing[^51] - -> The fundamental idea of Event Sourcing is ensuring that every change to the state of an application is captured in an event object, - -> Event objects are immutable and stored in the sequence they were applied for the same lifetime as the application state itself. - -[^51]: Martin Fowler, [link](https://martinfowler.com/eaaDev/EventSourcing.html) - -### Events - -Events are both a fact and a notification. - -They represent something that happened in the real world but include no expectation of any future action. - -They travel in only one direction and expect no response (sometimes called “fire and forget”), but one may be “synthesized” from a subsequent event. diff --git a/Graph Databases.md b/Graph Databases.md deleted file mode 100644 index 5ad35bd..0000000 --- a/Graph Databases.md +++ /dev/null @@ -1,871 +0,0 @@ -# Graph Technologies - -![inline](./attachments/graphemergingtechnologies.png) - -# Graph Databases - -![inline](./attachments/cropped-relationships-matter-text-logo-2018-b.png) - ---- - -## Back to One Machine - -- Graph Databases are tailored for OLTP workloads. - -- Typically, you are interested in selecting the subset of your graph based on a condition and then operate on that. - -- Most of them work in a centralized fashion - -## The Case of Graph OLAP - -- OLAP queries over the entire graph will not be so efficient (why?) - -- Graph OLAP algorithms are ofte **iterative**, and need to process the whole graph. - -- Hard to Scale out because graph are hard to partition - -- If you're interested join our Spring courses LTAT.02.003 and LTAT.02.010 - -## Graph DBs VS. RDBMSs - -![right fit](./attachments/rdbsjoinpains.jpg) - -- RDBs are well fitted to find generice queries, thanks to the internal structure of the tables. - -- Aggregations over a complete dataset are "easy". - -- However, Relational databases struggle with highly connected domains. - - -### Performance - -
- -In relational databases, the performance of join-intensive queries deteriorates as the dataset gets bigger. - -On the other hand, graph database performance tends to remain relatively constant, even as the dataset grows. - -![right fit](./attachments/performance.png) - ---- - -![inline](https://thumbs.gfycat.com/RewardingScarceAmericanriverotter-size_restricted.gif) - -(Nope, indexes) - ---- - -> **Clarke's Third Law**: Any sufficiently advanced technology is indistinguishable from magic. - -![right fit](https://i.imgur.com/St8zTt0.jpg) - -### Agility - -Despite their names though, relational databases are less suited for exploring relationships. Thus, the complexity is pushed on the query language. - -In graph databses, relationships are first-class moreover, they have no schema. Thus, API and query language are much simpler and agile. - -![right fit](https://upload.wikimedia.org/wikipedia/commons/8/85/Australian_Shepherd_blue_merle_agility.jpg) - -### Flexibility - -Changing schemas in Relational Databases may break queries and store procedures or require to change the integrity constraints. - -Graphs are naturally additive, we can add new relationships or nodes without disturbing existing queries and application functionality. - -![right fit](https://images.freeimg.net/rsynced_images/yoga-2959233_1280.jpg) - - -### Graph DBs VS. NoSQL - -- Are RelationalDB NoSQL? - - In principles, yes. However they do not target OLAP... - - -![inline](./attachments/RDBMS_NoSQL-1-1024x785.png) - -### Nosql also Lacks Relationships - -- Most NOSQL databases whether key-value, document, or column oriented store sets of disconnected documents/values/columns. - -- This makes it difficult to use them for connected data and graphs. - -- One well-known strategy for adding relationships to such stores is to embed an aggregate's identifier inside the field belonging to another aggregate. - -![right fit](./attachments/nosqldbs.jpg) - ---- - -![inline](https://i.kym-cdn.com/photos/images/original/001/883/586/63f.jpg) - ---- - - -### Nosql also Lacks Relationships - -- We can **join aggregates** at the application level - - - Seeing a reference to order: 1234 in the record beginning user: Alice, we infer a connection between user: Alice and order: 1234. - -- Because there are no identifiers that "point" backward (the foreign aggregate "links" are not reflexive. - - - How to answer: *Who customers that bought a particular product?* - -- Aggregates quickly becomes prohibitively expensive. - -## Graph DBs embrace Relationships - -![inline](https://lh3.googleusercontent.com/proxy/nU7vVFas3w1ABHvKobdhDSfArqDEqZHld_Rpai7bXTSANgF84D3FK7PSb0x3Byq9F2bDqieq1xKYPBoMaQHQQHD9JfBlZjPxU9_zkAKaEP-cJpe6As5oWRi4WRyqSK_ZRQ_WbKZitHvRG_uJ8q52CTMUUU-AvkrC) - ---- - -![inline](./attachments/graphdbsRelations.jpg) - ---- -## Popularity of Graph DBs - -![inline](./attachments/graphdbspopular.jpg) - -## Which one to choose?![^111] - -![inline](./attachments/graphdatabases.jpg) - - -### Graph Storage and Processing - - - **Native Graph Storage** benefits traversal performance at the expense of making some queries that don't use traversals difficult or memory intensive. - - - **Non-Native graph storage**, e.g., usuing a relational backend, is purpose-built stack and can be engineered for performance and scalability. - -### Native Graph Processing - -A graph database has native processing capabilities if it uses index-free adjacency. - -A node directly references its adjacent nodes, acting as a micro-index for all nearby nodes. - -With index-free adjacency, bidirectional joins are effectively precomputed and stored in the database as relationships[^1140]. - -[^1140]:It is cheaper and more efficient than doing the same task with indexes, because query times are proportional to the amount of the graph searched. - ---- -![inline](https://dist.neo4j.com/wp-content/uploads/20181218005743/native-graph-technology-index-free-adjacency.png) - -### Storage - -Doubly Linked Lists in the Relationship Store - -![inline](https://i.stack.imgur.com/eHjOD.png) - -### Non-native processing - -- A nonnative graph database engine uses (global) indexes to link nodes together, - -- Example: - - To find Ali‐ ce’s friends we have first to perform an index lookup, at cost O(log n). - - - If we wanted to find out who is friends with Alice, we would have to one lookup for each node that is potentially friends with Alice. This makes the cost O(m log n). - -![right fit](https://dist.neo4j.com/wp-content/uploads/20181218005826/non-native-graph-database-relying-on-index.png) - ---- - -### Neo4J Graph DB[^112] - -- It supports ACID transactions -- It implements a Property Graph Model efficiently down to the storage level. -- It is useful for single server deployments to query over medium sized graphs due to using memory caching and compact storage for the graph. -- Its implementation in Java also makes it widely usable. -- It provides master-worker clustering with cache sharding for enterprise deployment. -- It uses Cypher as a declarative query language. - -![right fit](./attachments/neo4j_logo_globe.png) -### AllegroGraph Semantic Graph DB[^114] - -- AllegroGraph is a graph database and application framework for building Semantic Web applications. -- It can store data and meta-data as triples. -- It can query these triples through various query APIs like SPARQL (the standard W3C query language). -- It supports RDFS++ as well as Prolog reasoning with its built-in reasoner. -- AllegroGraph includes support for Federation, Social Network Analysis, Geospatial capabilities and Temporal reasoning. - -![right fit](./attachments/blog-spparql.png) - -## Graph Data Models - -- Two Popular Graph Data Models: - - - Edge-Labelled Graphs - - - Property Attributed Graphs - -![inline](./attachments/graphDatamodels.jpg) - -### Property Graphs Vs. Edge-Labelled Graphs - -- Edge-Labelled Graphs are widely adopted in practice. E.g. Resource Description Framework (RDF) (Figure in the previous slide). - -- However, it is often cumbersome to add information about the edges to an edge-labelled graph. - -- For example, if we wished to add the source of information, for example, that the acts-in relations were sourced from the web-site IMDb. - -- Adding new types of information to edges in an edge-labelled graph may thus require a major change to the graph's structure, entailing a significant cost. - -### Property Graph Example - -![inline](./attachments/propertygraph.png) - -### Variations of the Property Graph Data Model (PGM) - -- **Direction.** A property graph is a directed graph; the PGM defines edges as ordered pairs of vertices. - -- **Multi-graph.** A property graph is a multi-graph; the PGM allows multiple edges between a given pair of vertices. - -- **Simple graphs** (in contrast to multi-graphs) additionally require to be injective (one-to-one). - -- **Labels.** A property graph is a multi-labeled graph; the PGM allows vertices and edges to be tagged with zero or more labels. - -- **Properties.** A property graph is a key-value-attributed graph; the PGM allows vertices and edges to be enriched with data in the form of key-value pairs. - -## Graph Query Languages - -### How To Query Graph Databases! - -- Although graphs can still be (and sometimes still are) stored in relational databases, the choice to use a graph database for certain domains has significant benefits in terms of querying. - -- Where the emphasis shifts from joining various tables to specifying graph patterns and navigational patterns between nodes that may span arbitrary-length paths. - -- A variety of graph database engines, graph data models, and graph query languages have been released over the past few years. - - - Examples of Graph DBs: Neo4j, OrientDB, AllegroGraph. - - - Graph data models: Property graphs, and edge labelled graphs and many other variations of them. - - - Different modern query languages also come to the scene such as Cypher, SPARQL, Gremlin and many more. - -![right fit](./attachments/Creating-Custom-Graph-Views-Over-your-RDF-Data_without-taaext.jpg) - - -### Graph Query Languages Core Features - -- Features: - - - Graph Patterns. - - - Navigational "Path" expressions. - - - Aggregation - - - Graph-to-Graph queries. - - - Path unwinding. - -- Standardization: - - - (SPARQL/SPARQL 1.1) --- Yes - - - (Gremlin,G-Core,Gremlin,GraphQl,Cypher)--- No - -![right fit](./attachments/graphquerylangsexamples.jpg) - - -# Pattern Matching and Graph Navigation - -## Graph Pattern Matching VS. Graph Navigational - -- Graph query languages vary significantly in terms of style, purpose, and expressivity. -- However, they share a common conceptual core: - - **Graph pattern matching** consists of a graph-structured query that should be matched against the graph database - - e.g. find all triangles of friendships in a social network. - - **Graph navigation** is a more flexible querying mechanisms that allows to navigate the topology of the data. - - e.g find all friends-of-a-friend of some person in a social network. - -![right fit](./attachments/pattern.jpg) - -## Graph Pattern Matching - -For matching graph patterns we classified the main proposals for the semantics into two categories: - - - **Homomorphism-base**d: matching the pattern onto a graph with no restrictions. - - **Isomorphism-based**: one of the following restrictions is imposed on a match: - - **No-repeated-anything**: no part of a graph is mapped to two different variables. - - **No-repeated-node**: no node in the graph is mapped to two different variables. - - **No-repeated-edge**: no edges in the graph is mapped to two different variables. - -![right fit](./attachments/Pasted image 20201001090110.png) - -### Basic Graph patterns VS. Complex Graph patterns - - - Basic Graph Patterns (BGPs) are just graph to match within the bigger graph database. BGPs are the core of any graph query language. - - - Complex Graph Patterns (CGPs) extend BGPs with some additional query features such as UNION, Difference, Projection, Optional (aka left-outer-join), and Filters. - -![right fit](./attachments/pattern.jpg) - -### CGPs Operators: Projection - - - Like `SELECT` in SQL, is used also to select project on specific outputs. - - - Example: retrieve only the names of actors who starred together in Unforgiven - -### CGPs Operators: Union - -- Intended to merge the result of two queries - -- Let $$Q1$$ and $$Q2$$ be two graph patterns. The union of $$Q1$$ and $$Q2$$ is a complex graph pattern whose evaluation is defined as the union of the evaluations. - -- Example: *find the movies in which Clint Eastwood acted or which he directed*. - -### CGPs Operators: Difference -- The difference of $$Q1$$ and $$Q2$$ is also a complex graph pattern whose evaluation is defined as the set of matches in the evaluation of $$Q1$$ that do not belong to the evaluation of $$Q2$$. - -- Logically a form of **negation** -- Example: * find the movies in which Clint Eastwood acted but did **not** direct*. - -### CGPs Operators: Optional - -- This feature is particularly useful when dealing with incomplete information, or in cases where the user may not know what information is available. - -- Essentially a Left-join - -- Example: *Find the information relating to the gender of users is incomplete but may still be interesting to the client, where available*. - -### CGPs Operators: Filter - -- Users may wish to restrict the matches of a cgp over a graph database G based on some of the intermediate values returned using, for example, inequalities, or other types of expressions. - -- Equivalent to relational selection - -- Example: *find all male actors that acted in a Clint Eastwood's movie* - - ---- - -Or find all Leonardo Di Caprio's ex girlfriends that are were above 25 yo. - -![inline](https://i.insider.com/5c8929cadd086120820a26b2?width=1100&format=jpeg&auto=webp) - -Hint: None - ---- -## Navigational (Path) Queries in Graphs - -![right fit](./attachments/navigationalqueries.png) - -### Navigational Path Queries - -- Graph patterns allow for querying graph databases in a bounded manner. - -- Navigational Path Queries provide a more flexible querying mechanisms (yet more expensive) that allow to navigate the topology of the data. - -- One example of such a query is to find all friends-of-a-friend of some person in a social network. - -![right fit](https://i.ytimg.com/vi/AMOb_w6Jfug/maxresdefault.jpg) - ---- - -### Path under Set Semantics - -- **Arbitrary paths**: All paths are considered. More specifically, all paths in G that satisfy the constraints of P are included in P (G). - -- **Shortest paths**: In this case, P (G) is defined in terms of shortest paths only, that is, paths of minimal length that satisfy the constraint specified by P. - -- **No-repeated-node paths**: In this case, P (G) contains all matching paths where each node appears once in the path; such paths are commonly known as simple paths. This interpretation makes sense in some practical scenarios; for example, when finding a route of travel, it is often not desired to have routes that come to the same place more than once. - -- **No-repeated-edge paths**: Under this semantics, P (G) contains all matching paths where each edge appears only once in the path. The Cypher query language of the Neo4j engine currently uses this semantics. - -### Output of Navigational Queries - -- As hinted at previously, a user may have different types of questions with respect to the paths contained in the evaluation P(G), such as: - - - *Does there exist any such path* - - - *Is a particular path contained in P (G* ) - - - *What are the pairs of nodes connected by a path in P (G)* - - - *What are (some of) the paths in P (G)* - -- We can Categorize such questions by what they return as results: - - - Boolean --- (True / False) values. - - - Nodes --- are interested in the nodes connected by specific paths. - - - Paths --- some or all of the full paths are returned from P (G). Example:Some of the Shortest Paths. - - - Graphs --- is to offer a compact representation of the output as a graph - -## Navigational Graph Patterns (NGPs) - -- Combining path queries with basic graph patterns (BGPs) gives rise to navigational graph patterns (NGPs). - -- In particular, this language allows to express that some edges in a graph pattern should be replaced by a path (satisfying certain conditions) instead of a single edge. - -- Example: Persons and movies are connected , while a person can also have an author edge connecting it to an article. - -- In such a database we might be interested in finding people with finite Erdos-Bacon number, that is, people who are connected to Kevin Bacon through co-stars relations and are connected to Paul Erdos through co-authorship relations. - -![inline](./attachments/ngbs.png) - ---- - -![inline](./attachments/navigationalqueries.jpg) - ---- - -### Navigational Graph Patterns (NGPs) - -- Coming back to the social network, we might be interested in finding all friends of friends of Julie that liked a post with a tag that Julie follows. The navigational graph pattern in this Figure expresses this query over our social graph. - -- Extending Navigational Graph patterns with the complex operators of "Projection", "Optional", "Filter", "Union" and "Difference" give the rise to another new type of them: (cngps). - -- Example: Let's call these results the "recommended posts" for Julie. Now consider a copy of the same pattern to find the recommended posts for John. - -![inline](./attachments/ngpexample2.jpg) - -# Graph Query Languages In Action - -- **Cypher** --- Property Graphs - -- Gremlin--- Property Graphs - -- GraphQL --- Edge-Labelled multi Graphs - -- **SPARQL** --- Edge-Labelled Graphs RDF - -- G-Core --- Property Graphs - -![right fit](./attachments/graphquerylangsexamples.jpg) - -## Cypher - The Neo4J DB Query Language - -- Cypher is a declarative language for querying property graphs that uses "patterns" as its main building blocks. - -- Cypher's declarative syntax provides a familiar way to match patterns of nodes and relationships in the graph. - -- It is backed by several companies in the database space and allows implementors of databases and clients to freely benefit, use from and contribute to the development of the openCypher language. - -![right fit](./attachments/cypherneo4j.jpg) - -### Graph Patterns in Cypher (Projection) - -- Patterns are expressed syntactically following a "pictorial" intuition to encode nodes and edges with arrows between them. - -- The following queries ask for co-stars of the *"Unforgiven"* movie. - -[.column] -```sql -MATCH (x:Person)-[:acts_in]-> - (m:Movie {title: "Unforgiven"}) - <-[:acts_in]-(y:Person) -RETURN x,y -``` -[.column] -```sql - -MATCH (x:Person)-[:acts_in]->(m:Movie - {title: "Unforgiven"}) -(y:Person)-[:acts_in]->(m) -RETURN x,y - -``` - -^ In this case, we would also get the matches that send both x and y to the node of Clint Eastwood (and likewise to the node of Anna Levine). - -### Comple Graph Patterns in Cypher: Union - -```sql - -MATCH (:Person - {name:"Clint Eastwood"})-[:acts_in]->(m:Movie) -RETURN m.title -UNION ALL -MATCH (:Person - {name:"Clint Eastwood"})-[:directs]->(m:Movie) -RETURN m.title - -``` - -### Comple Graph Patterns in Cypher: Difference - -```sql - -MATCH (p:Person)-[:acts_in]->(m:Movie - {title: "Unforgiven"}) -WHERE NOT (p)-[:direct]->(m) -RETURN m.title - -``` - -### Comple Graph Patterns in Cypher: Optional - -```sql - -MATCH (p:Person)-[:acts_in]->(m:Movie) -OPTIONAL MATCH (p)-[x]->(m) -WHERE type(x) <> "acts_in" -RETURN p.name, m.title, type(x) - -``` - -### Navigational Queries in Cypher - -- While not supporting full regular expressions, Cypher still allows transitive closure over a single edge label in a property graph. - -- Since it is designed to run over property graphs, Cypher also allows the star to be applied to an edge property/value pair. - -- **Example**: compute the friend-of-a-friend relation. The following query selects pairs of nodes that are linked by a path completely labelled by knows. To do this, it applies the star operator * over the label knows . - -```sql - -MATCH (x:Person)-[:knows*]->(y:Person) -RETURN x,y - -``` - -### Navigational Queries in Cypher - -- Example 2. If we wanted to find friends of friends of Julie and return only the shortest witnessing path. This will return a single shortest witnessing path. If we wanted to return all shortest paths, then we could replace "shortestPath" with "allShortestPaths". - -```sql - -MATCH (x:Person {firstname:"Julie"}), -p = shortestPath( (x)-[:knows*]->(y:Person)) -RETURN p - -``` - -- Example 3. Coming back to the social network, if we want to find all friends of-friends of Julie that liked a post with a tag that Julie follows, we can use the following Cypher query: - -```sql - -MATCH (x:Person {firstname:"Julie"})-[:knows*]->(y:Person)) -MATCH (y)-[:likes]->()->[:hasTag]->(z) -MATCH (z)-[:hasFollower]->(x) -RETURN y - -``` - -### Navigational Queries Cypher - -- Another interesting feature available in Cypher is the ability to return paths. - -- Example 4. If we wanted to return all friends of friends of Julie in the graph, together with a path witnessing the friendship, then we can use: - -```sql - -MATCH p = (:Person name:"Julie")-[:knows*]->(x:Person) -RETURN x,p - -``` - -- Result will be: - -|x|p| -|-------|--------| -|Node[2]|[Node[1],:knows[1],Node[2]]| -|Node[1]|[Node[1],:knows[1],Node[2],:knows[2],Node[1]]| - - -## **SPARQL** - The RDF Query Language - -- SPARQL is the standard query language of RDF and become official W3C recommendation since 2003. - -- SPARQL is a pattern matching query language over the RDF graph. SPARQL queries contain a set of triple patterns (TPs), also known as Basic Graph Patterns (BGPs). - -- Triple patterns are similar to RDF triple patterns, but each of the subject, predicate or object may be unbounded variable preceded by ("?") prefix. - -- SPARQL mission is to bind those variable by matching the query patterns to triples in the RDF dataset. - -![right fit](./attachments/blog-spparql.png) - -### RDF Graphs - -- RDF graphs are a special type of edge-labelled graph. - -- The basic bloc is a triple ``` ``` - -- Nodes and edges are identified using URIs - -- Obejcts can be literals (Numbers, strings) - -![right fit](./attachments/Screenshot 2020-10-01 at 9.25.20 AM.png) - -### Anathomy of a SPARQL Query - -![inline](./attachments/sparql.png) - -### SPARQL Graph Patterns - -[.column] - -Let us take a closer look at how graph patterns are applied in three practical query languages: SPARQL, Cypher, and Gremlin. - -- SPARQL: Projection - - - The following SPARQL query represents a complex graph pattern that combines the basic graph pattern with a projection that asks to only return the co-stars and not the movie identifier. - -[.column] - -```SQL - -PREFIX : -SELECT ?x ?y -WHERE { - ?x :acts_in ?y ; - :type :Person . - ?z :acts_in ?y ; - :type :Person . - ?y :title "Unforgiven" ; - :type :Movie . - FILTER(?x!=?y) -} - -``` - -|?x|?y| -|-----|-----| -|:Clint_Eastwood|:Anna_Levine| -|:Anna_Levine|:Clint_Eastwood| - ---- - -![inline](./attachments/rdfgraph.jpg) - - -### Complex Graph Patterns in SPARQL (Union) - -[.column] - - - This example of a union to find movies that Clint Eastwood has acted or directed in. - -[.column] - -```SQL -SELECT ?x -WHERE { - {:Clint_Eastwood :acts_in ?x . } - UNION - {:Clint_Eastwood :directs ?x . } -} - -``` - -|?x| -|---| -|:Unforgiven| - -### Complex Graph Patterns in SPARQL (Difference) - -[.column] -- SPARQL Difference We could use difference to ask for people who acted in the movie Unforgiven but who did not (also) direct. - -[.column] - -```SQL -SELECT ?x -WHERE { - {?x :acts_in :Unforgiven . } - MINUS - {?x :directs :Unforgiven . } -} - -``` - -|?x| -|---| -|:Anna_Levin| - -### Complex Graph Patterns in SPARQL (OPTIONAL) -[.column] - -- SPARQL: Optional Using optional, we could ask for movies that actors have appeared in, and any other participation they had with the movie besides acting in it - -[.column] - -```SQL -SELECT ?x ?y ?z -WHERE { - {?x :acts_in ?y . } - OPTIONAL - {?x ?z ?y . - FILTER(?x != :acts_in) } -} -``` - - -### Navigational Queries in Action: SPARQL - -- Since Version 1.1 , SPARQL permits the use of property paths. - -- SPARQL Property Paths are an extended form of regular expression. - -- As a consequence, we can express any path query using SPARQL 1.1. - ---- -[.column] -#### Example 1 -- Consider the following SPARQL query to find all pairs of actors who have finite collaboration distance, we can use - -```SQL - -SELECT ?x ?y -WHERE { ?x (:acts_in/acts_in*) ?y } - -``` - -[.column] - -#### Example 2 -- Consider the following SPARQL query with a negated property-set. -- This query will match :Unforgiven (the IRI) and "Unforgiven" (the title string) for ?y. - -```SQL - -SELECT ?y -WHERE { :Clint_Eastwood (!{rdf:type,:directs})* ?y } - -``` - - -### Navigational Queries in SPARQL - -- Similarly, SPARQL can also express navigational graph patterns (ngps). - -- **Example**: find all people with a finite Erdos-Bacon number can be expressed in SPARQL as in the query below, which is a conjunction of two RPQs, where the symbol "." denotes conjunction. - -```SQL - -SELECT ?x -WHERE { - ?x (:acts_in/^:acts_in)* :Kevin_Bacon . - ?x (:author/^:author)* :Paul_Erdos . -} - -``` - - -### Navigational Queries in SPARQL - -- Likewise, SPARQL can express complex navigational graph patterns (cngps). - -- **Example**. We can express an RDF version of the query for the posts recommended to Julie but not to John as follows: - - -```SQL -SELECT ?x ?y ?z -WHERE { - {:Julie :knows+/:likes ?x ; - :hasTag/:hasFollower :Julie . } - MINUS - {:John :knows+/:likes ?x ; - :hasTag/:hasFollower :John . } -} -``` - -## Other Popular Query Languages. - -- G-Core[^117] - - - Community effort between industry and academia to shape and standardize the future of graph query languages. - - - G-Core Features: - - - Composability: Graphs are inputs and outputs of the queries. Queries can be composed. The fact that G-CORE is closed on the PPG data model means that subqueries and views are possible. - - - Paths are First Class-Citizens: Paths can increase the expressivity of the language. G-Core extends graphs models with paths (PPG). Can have labels and prosperities. - - - Capture a core: Standards are difficult and politics, Take the successful functionalities with tractable evaluation of current languages as a base to develop - - -## Other Popular Query Languages. - -- GraphQL also removes redundancy, Another restriction is type restrictions. - -- The following Figure (left) shows an example GraphQL query over the domain (F, A, T) and the response is in the right. - -![inline](./attachments/graphql.png) - -## Graph Query Languages Features Comparison - -![inline](./attachments/graphqlscompar.png) - - -[^111]: Ian Robinson, Jim Webber, and Emil Eifrem. 2013. Graph Databases. O'Reilly Media, Inc. - -[^112]: [url](https://neo4j.com) - -[^113]: - -[^114]: - -[^115]: - -[^116]: Sherif Sakr, Sameh Elnikety, and Yuxiong He. 2012. G-SPARQL: a hybrid engine for querying large attributed graphs. In Proceedings of the 21st ACM international conference on Information and knowledge management (CIKM '12) - -[^117]: Angles, Renzo, et al. *G-CORE: A core for future graph query languages*. Proceedings of the 2018 International Conference on Management of Data. ACM, 2018. - -[^118]: http://www.cs.cmu.edu/ pegasus/ - -[^119]: https://github.com/twitter/cassovary - -[^1110]: http://uzh.github.io/signal-collect/ - -[^1111]: Kyrola, Aapo, Guy E. Blelloch, and Carlos Guestrin. *Graphchi: Large-scale graph computation on just a pc. -USENIX, 2012. - -[^1112]: A. Roy, I. Mihailovic, and W. Zwaenepoel. 2013. X-Stream: Edge-centric graph processing using streaming partitions. In Proceedings of the 24th ACM Symposium on Operating Systems Principles (SOSP'13) - -[^1113]: Nilakant, K., Dalibard, V., Roy, A., & Yoneki, E. (2014, June). *PrefEdge: SSD prefetcher for large-scale graph traversal*. In Proceedings of International Conference on Systems and Storage - -[^1114]: Roy, A., Mihailovic, I., & Zwaenepoel, W. (2013, November). *X-stream: Edge-centric graph processing using streaming partitions*. In Proceedings of the Twenty-Fourth ACM Symposium on Operating Systems Principles - -[^1115]: Roy, A., Bindschaedler, L., Malicevic, J., & Zwaenepoel, W. (2015, October). *Chaos: Scale-out graph processing from secondary storage*. In Proceedings of the 25th Symposium on Operating Systems Principles. - -[^1116]: Y.Tian, A. Balmin, S. Andreas Corsten, S. Tatikond, and J. McPherson. 2013. *From \"Think Like a Vertex\" to \"Think Like a Graph.\" -Proc. VLDB Endow. - -[^1117]: Y. Simmhan, A. Kumbhare, C. Wickramaarachchi, S. Nagarkar, S. Ravi, C. Raghavendra, and V. Prasanna. 2014. GoFFish: *A sub-graph centric framework for large-scale graph analytics*. In Proceedings of the Euro-Par 2014 Parallel Processing Conference. - -[^1118]: Z. Khayyat, K. Awara, A. Alonazi, H. Jamjoom, D. Williams, and P. Kalnis. Mizan: a system for dynamic load balancing in large-scale graph processing. EuroSys, 2013. - -[^1119]: http://infolab.stanford.edu/gps/ - -[^1120]: http://www.cse.cuhk.edu.hk/pregelplus/ - -[^1121]: http://dbs.uni-leipzig.de/en/research/projects/gradoop - -[^1122]: Y. Low, J. Gonzalez, A. Kyrola, D. Bickson, C. Guestrin, and J. M. Hellerstein. *Distributed GraphLab: A Framework for Machine Learning in the Cloud*. PVLDB, 2012 - -[^1123]: https://giraph.apache.org/ - -[^1124]: - -[^1125]: - -[^1126]: Eugene Inseok Chong, Souripriya Das, George Eadon, and Jagannathan Srinivasan. *An efficient sql-based rdf querying scheme*. In Proceedings of the 31st international conference on Very large data bases, 2005 - -[^1127]: Thomas Neumann and Gerhard Weikum. *The rdf-3x engine for scalable management of rdf data*. The VLDB Journal, 2010 - -[^1128]: Weiss, Cathrin, Panagiotis Karras, and Abraham Bernstein. *Hexastore: sextuple indexing for semantic web data management*. Proceedings of the VLDB Endowment, 2008 - -[^1129]: Li Ma,et.al. *Rstar: An rdf storage and query system for enterprise resource management*. In Proceedings of the thirteenth ACM international conference on Information and knowledge management, 2004 - -[^1130]: Luis Galarraga. *Partout: a distributed engine for efficient RDF processing*. In Proceedings of the 23rd International Conference on World Wide Web,2014 - -[^1131]: M.Hammoud, D. A.Rabbou, R.Nouri, S.Beheshti, and S.Sakr. *Dream: Distributed Rdf Engine with Adaptive query planner and Minimal communication*. Proceedings of the VLDB 2015 - -[^1132]: Rohloff, Kurt, and Richard E. Schantz. *High-performance, massively scalable distributed systems using the MapReduce software framework: the SHARD triple-store*. Programming support innovations for emerging distributed applications. ACM, 2010. - -[^1133]: Nikolaos Papailiou, Dimitrios Tsoumakos, Ioannis Konstantinou, Panagiotis Karras, and Nectarios Koziris. *H2rdf+: an efficient data management system for big RDF graphs*. In Proceedings of the 2014 ACM SIGMOD international conference on Management of data. - -[^1134]: Olivier Cure, Hubert Naacke, Mohamed Amine Baazizi, and Bernd Amann. *HAQWA: a hash-based and query workload aware distributed RDF store*. In International Semantic Web Conference (Posters & Demos), 2015 - -[^1135]: Damien Graux, Louis Jachiet, Pierre Geneves, and Nabil Layaida. *Sparqlgx: Efficient distributed evaluation of Sparql with Apache Spark*. In International Semantic Web Conference, 2016. - -[^1136]: Alexander Schatzle, Martin Przyjaciel-Zablocki, Simon Skilevic, and Georg Lausen. *S2rdf: Rdf querying with Sparql on Spark*. VLDB 2016 - -[^1137]: Alexander Schatzle, Martin Przyjaciel-Zablocki, Thorsten Berberich, and Georg Lausen. *S2x: graph-parallel querying of RDF with Graphx*. In Biomedical Data Management and Graph Online Querying, 2015 - -[^1138]: Gergo Gombos, Gabor Racz, and Attila Kiss. *Spar (k) ql: Sparql evaluation method on Spark Graphx*. In 2016 IEEE 4th International Conference on Future Internet of Things and Cloud Workshops (FiCloudW), 2016 - -[^1139]: Ramazan Ali Bahrami, Jayati Gulati, and Muhammad Abulaish. *Efficient processing of Sparql queries over GraphFrames*. In Proceedings of the International Conference on Web Intelligence, 2017 - - diff --git a/Graph Theory.md b/Graph Theory.md deleted file mode 100644 index a1ff26f..0000000 --- a/Graph Theory.md +++ /dev/null @@ -1,249 +0,0 @@ -# History - -![right fit](./attachments/03-USDE-graph-db14.png) - -Leonhard Euler's paper on "Seven Bridges of Königsberg", published in 1736. - -![inline](./attachments/03-USDE-graph-db13 1.png)![inline](./attachments/03-USDE-graph-db11 1.png)![inline](./attachments/03-USDE-graph-db12 1.png) - -## Famous problems - - - The traveling salesman problem: A traveling salesman is to visit a number of cities. - - how to plan the trip so every city is visited once and just once and the whole trip is as short as possible ? - - Four color problem[^100] : using only four colors, color any map of countries in such a way as to prevent two bordering countries from having the same color. - - SOLVED ONLY 120 YEARS LATER! - -[^100]: Francis Guthrie,1852 - -## Other Examples of Graph Problems - -- Cost of wiring electronic components -- Shortest route between two cities. -- Shortest distance between all pairs of cities in a road atlas. -- Matching / Resource Allocation -- Task scheduling -- Visibility / Coverage - -## What is a Graph? - -Informally a *graph* is a set of nodes joined by a set of lines or arrows. - -![inline](./attachments/graph0.png) - -### Graph - -G is an ordered triple $$G:=(V, E, f)$$ - - V is a set of nodes, points, or vertices. - - E is a set, whose elements are known as edges or lines. - - $$f$$ is a function - - maps each element of E - - to an unordered pair of vertices in V. - -### Vertexes and Edges - -- A Vertex is a Basic Element - - Drawn as a *node* or a *dot* . - - The *Vertex set of a graph *G* is usually denoted by *V* - -- An edge is *set* of two elements - - Drawn as a line connecting two vertices, called end vertices, or endpoints. - - The edge set of G is usually denoted by E(G), or E. - - -### Example - - -V:={1,2,3,4,5,6} - -E:={{1,2},{1,5},{2,3},{2,5},{3,4},{4,5},{4,6}} - -![right fit](./attachments/graph2.png) - - -### Directed Graph (digraph) - -Edges have directions, i.e. an edge is an *ordered* pair of nodes - - -![right fit](./attachments/graph5b.png) - - -### Weighted graphs - -are graphs for which each edge has an associated *weight*, usually given by a _weight function - -$$f_w$$ : $$E \rightarrow R $$ . - -![right fit](./attachments/graph5.png) - -## Path - -> A *path* is a sequence of vertices such that there is an edge from each vertex to its successor. - -- A path is *simple* if each vertex is distinct. -- If there is path *p* from *u* to *v* then we say *v* is **reachable** from *u* via *p* . - -**Example**: Simple path from 1 to 5= [ 1, 2, 4, 5 ] - -![inline](./attachments/graph3.png) - -### Cycle - - - A path from a vertex to itself is called a *cycle* . - - A graph is called *cyclic* if it contains a cycle; - - otherwise it is called *acyclic* - -### Connectivity - - - A graph is *connected* if - - you can get from any node to any other by following a sequence of edges OR - - any two nodes are connected by a path. - - A directed graph is _strongly connected_ if there is a directed path from any node to any other node. - -### Sparsity/Density - -A graph is *sparse* if $$|E|\approx |V|$$ - -A graph is *dense* if $$|E|\approx |V^2|$$ - -### Degree - -Number of edges incident on a node - -E.g., the degree of **5** is 3. - - -![inline](./attachments/graph2.png) - -### Degree (Directed Graphs) - -In degree: Number of edges entering - -Out degree: Number of edges leaving - -Degree =indegree+outdegree - - -## Graph Types - -### Bipartite graph - - - *V* can be partitioned into 2 sets $$V_1$$ and $$V_2$$ such that $$(u,v) \in E$$ implies - - either $$u \in V_1$$ and $$ v \in V_2$$ - - or *$$v \in V_1$$ and $$ u \in V_2$$ - -![right 60%](./attachments/bipartitegraph.png) - - -### Complete Graph - -[.column] - - -- Denoted $$K_n$$ -- Every pair of vertices are adjacent -- Has n(n-1) edges - -[.column] - -![inline](./attachments/03-USDE-graph-db16.png)![inline](./attachments/03-USDE-graph-db17.png)![inline](./attachments/03-USDE-graph-db18.png) -![inline](./attachments/03-USDE-graph-db15.png)![inline](./attachments/03-USDE-graph-db19.png) -![inline](./attachments/03-USDE-graph-db20.png)![inline](./attachments/03-USDE-graph-db21.png)![inline](./attachments/03-USDE-graph-db22.png) - -### Planar Graph - -- Can be drawn on a plane such that no two edges intersect -- $$K_4$$ is the largest complete graph that is planar - -![right fit](./attachments/03-USDE-graph-db15.png) - -### Tree - -Connected Acyclic Graph - -Two nodes have *exactly* one path between them - -![right 60%](./attachments/graphtree.png) - -## Hypergraph - - - Generalization of a graph, - - edges can connect any number of vertices. - - Formally, an hypergraph is a pair (X,E) where - - X is a set of elements, called nodes or vertices, and - - E is a set of subsets of X, called hyperedges. - - Hyperedges are arbitrary sets of nodes, - - contain an arbitrary number of nodes. - -### Subgraph - - - Vertex and edge sets are subsets of those of G - - a *supergraph* of a graph G is a graph that contains G as a subgraph. - -### Spanning subgraph - - - Subgraph G has the same vertex set as H. - - Possibly not all the edges - - "G spans H". - -![inline](./attachments/spanning.png) - -## Graph ADT - - - In computer science, a graph is an abstract data type (ADT) - - that consists of - - a set of nodes and - - a set of edges - - establish relationships (connections) between the nodes. - - The graph ADT follows directly from the graph concept from mathematics. - -### Representation (Matrix) - - - Incidence Matrix - - E x V - - [edge, vertex] contains the edge's data - - Adjacency Matrix - - V x V - - Boolean values (adjacent or not) - - Or Edge Weights - ---- -![inline](./attachments/AdjacencyMaterix.png) - - -### Representation (List) - - - Edge List - - pairs (ordered if directed) of vertices - - Optionally weight and other data - - Adjacency List - ---- -![inline](./attachments/AdjacencyList.png) - - -## Graph Algorithms - -[.column] - - Shortest Path - - Single Source - - All pairs (Ex. Floyd Warshall) - - Network Flow - - Matching - - Bipartite - - Weighted - - Topological Ordering - - Strongly Connected - -[.column] - -- Biconnected Component / Articulation Point -- Bridge -- Graph Coloring -- Euler Tour -- Hamiltonian Tour -- Clique -- **Isomorphism** -- Edge Cover -- Vertex Cover -- Visibility \ No newline at end of file diff --git a/HBASE.md b/HBASE.md deleted file mode 100644 index 743d6a6..0000000 --- a/HBASE.md +++ /dev/null @@ -1,76 +0,0 @@ -### HBase - -HBase is a [[Column Oriented Database]] - -* Google’s BigTable was first “blob-based” storage system -* Yahoo! Open-sourced it -> HBase -* Major Apache project today, part of Hadoop -* Facebook uses HBase internally -* API - * Get/Put(row) - * Scan(row range, filter) – range queries - * MultiPut - -### Hbase API - -* Supported operations - * Get(row) - * Put(row) - * Scan(row range, filter) – range queries - * MultiPut(rows) - -### HBase Architecture - -![[attachments/hbase-36.png]] - -### HBase Storage hierarchy - -* HBase Table - * Split it into multiple *regions* : replicated across servers - * One *Store* per ColumnFamily (subset of columns with similar query patterns) per region - * Memstore for each Store: in-memory updates to Store; flushed to disk when full - * *StoreFiles* for each store for each region: where the data lives - * - Blocks -* HFile - * SSTable from Google’s BigTable - -### HFile - -![[attachments/hbase-37.png]] - -**(For a census table example)** - -![[attachments/hbase-38.png]] - -### Strong Consistency: HBase Write-Ahead Log - -*Write to HLog* *before* *writing to MemStore* - -*Can recover from failure* - - -![[attachments/hbase-39.png]] - -### Log Replay - -* After recovery from failure, or upon bootup (HRegionServer/HMaster) - * Replay any stale logs (use timestamps to find out where the database is w.r.t. the logs) - * Replay: add edits to the MemStore -* Why one HLog per HRegionServer rather than per region? - * Avoids many concurrent writes, which on the local file system may involve many disk seeks - -### Cross-data center replication - -![[attachments/hbase-40.png]] - -*Zookeeper actually a file* - -*system for control information* - -*1. /hbase/replication/state* - -*2. /hbase/replication/peers* - -*/* - -*3. /hbase/replication/rs/* \ No newline at end of file diff --git a/HDFS.md b/HDFS.md deleted file mode 100644 index 2710c25..0000000 --- a/HDFS.md +++ /dev/null @@ -1,101 +0,0 @@ -# Hadoop Distributed File System (HDFS)[^12] - -- Abstracts physical location (Which node in the cluster) from the application -- Partition at ingestion time -- Replicate for high-availability and fault tolerance - - -[^12]: Inspired by [Google File System](https://static.googleusercontent.com/media/research.google.com/en/archive/gfs-sosp2003.pdf) - -![right fit](./attachments/HDFS.png) - -### Design Objectives - - Partition and distribute a single file across different machines - - Favor larger partition sizes - - Data replication - - Local processing (as much as possible) - -### Optimizations - - Reading sequentially versus (random access and writing) - - No updates on files - - No local caching - -### HDFS Architecture[^13] - -![inline fit](./attachments/img0034.png) - -[^13]: Figure 2-1 in book Professional Hadoop Solutions - -### HDFS Files -- A single large file is partitioned into several blocks - - Size of either 64 MB or 128MB - - Compare that to block sizes on ordinary file systems - - This is why sequential access is much better as the disk will make less numbers of seeks - -^ Question: What would be the costs/benefits if we use smaller block sizes? - -### Data Node -- It stores the received blocks in a local file system; -- It forwards that portion of data to the next DataNode in the list. -- The operation is repeated by the next receiving DataNode until the last node in the replica set receives data. - -### Name Node - -- A single node that keeps the metadata of HDFS - - Keeps the metedata in memory for fast access - - Periodically flushes to the disk (FsImage file) for durability - - Name node maintains a daemon process to handle the requests and to receive heartbeats from other data nodes - -^ - - In some high-availability setting, there is a secondary name node - - As a name node can be accessed concurrently, a logging mechanism similar to databases is used to track the updates on the catalog. - -### HDFS Federation -- By default, HDFS has a single NameNode. What is wrong with that? If NameNode daemon process goes down, the cluster is inaccessible - -- A solution: HDFS Federation - - - Namespace Scalability: Horizontal scalability to access meta data as to access the data itself - - Performance: Higher throughput as NameNodes can be queried concurrently - - Isolation: Serve blocking applications by different NameNodes -- Is it more reliable? - -![right fit](./attachments/img0036.png) - -### Writing to HDSF - -- When a client is writing data to an HDFS file, this data is first written to a local file. -- When the local file accumulates a full block of data, the client consults the NameNode to get a list of DataNodes that are assigned to host replicas of that block. -- The client then writes the data block from its local storage to the first DataNode in 4K portions. - -### Writing a File to HDSF Cont. - -- This DataNode stores data locally without sending it any further -- If one of the DataNodes fails while the block is being written, it is removed from the pipeline -- The NameNode re-replicates it to make up for the missing replica caused by the failed DataNode -- When a file is closed, the remaining data in the temporary local file is pipelined to the DataNodes -- If the NameNode dies before the file is closed, the file is lost. - -### Replica Placement - -- Replica placement is crucial for reliability of HDFS - - Should not place the replicas on the same rack -- All decisions about placement of partitions/replicas are made by the NameNode -- NameNode tracks the availability of Data Nodes by means of Heartbeats - - Every 3 seconds, NameNode should receive a heartbeat and a block report from each data node - - Block report allows verifying the list of stored blocks on the data node - - Data node with a missing heartbeat is declared dead, based on the catalog, replicas missing on this node are made up for through NameNode sending replicas to other available data nodes - - -### HDFS High-availability - -- Each NameNode is backedup with a slave other NameNode that keeps a copy of the catalog - -- The slave node provides a failover replacement of the primary NameNode - -- Both nodes must have access to a shared storage area - -- Data nodes have to send heartbeats and block reports to both the master and slave NameNodes. - -![right fit](./attachments/img0037.png) - diff --git a/KSQL.md b/KSQL.md deleted file mode 100644 index e8f0337..0000000 --- a/KSQL.md +++ /dev/null @@ -1,117 +0,0 @@ -footer: [Riccardo Tommasini](http://rictomm.me) - riccardo.tommasini@ut.ee - @rictomm - -autoscale: true -slidenumbers: true - - -# KSQL - -- Brings SQL support to Kafka Streams - -- Streaming ETL - - - DDL - - - Querying - - - Link streams to tables - - - Don't confuse it with KStream and KTable - ---- - -![inline](./attachments/img0025.png) - -### Create Stream - -Stream is KSQL's wrapper for the data in a Kafka topic - -```sql -CREATE STREAM ratings ( -rating_idlong, -user_idint, -stars int, -route_idint, -rating_timelong, -channel varchar, -message varchar) -WITH( -value_format='JSON', kafka_topic='ratings'); -``` - ---- - -![inline](./attachments/CreateStream.png) - -### Selecting From the Stream - -```sql -SELECT * -FROM ratings -WHERE stars <= 2 -AND lcase(channel) LIKE '%ios%' -AND user_id> 0 -LIMIT 10; -``` - -### Selecting From the Stream - -[.column] - -
- -```sql -SELECT * -FROM ratings -WHERE stars <= 2 -AND lcase(channel) LIKE '%ios%' -AND user_id> 0 -LIMIT 10; -``` - -[.column] - -We can derive another stream based on the query result - -$$\Downarrow$$ - -```sql -CREATE STREAM poor_ratings AS -SELECT * -FROM ratings -WHERE stars <= 2 -AND lcase(channel) LIKE '%ios%'; -``` - -### Create Table - -
- -```sql -CREATE TABLE users ( uidint, name varchar, elite varchar) -WITH(Key= 'uid', value_format='JSON', kafka_topic='mysql-users'); -``` - -### Enrich Stream with Table Data - -
- -```sql -CREATE STREAM vip_poor_ratings AS -SELECT uid, name, elite, -stars, route_id, rating_time, message -FROM poor_ratingsr LEFT JOIN users u ON r.user_id= u.uid -WHERE u.elite= 'P'; -``` - -### Aggregation and Windowing - -```sql -SELECT uid, name, count(*) as rating_count -FROM vip_poor_ratings -WINDOW TUMBLING(size 2 minutes) -GROUPBY uid, name; -``` - - -### Conclusion -![inline](./attachments/the end.png) \ No newline at end of file diff --git a/Kafka Homework-Students.ipynb b/Kafka Homework-Students.ipynb new file mode 100644 index 0000000..af3c9cc --- /dev/null +++ b/Kafka Homework-Students.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 5: Data Ingestion (Kafka)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://camo.githubusercontent.com/56166d361c3975dee750ecce16d605bbbf66516b/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f352f35332f4170616368655f6b61666b615f776f7264747970652e737667)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Student ID: [#####]\n", + "### Subtasks Done: [#,#,..]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Working with sensor data\n", + "\n", + "We want to monitor the status of three smart buildings.\n", + "Each building has 8 floors and each floor has 20 rooms, that have a max capacity of 10 people each.\n", + "\n", + "Rooms are equipped with sensors that counts how many people are currently inside the rooms. \n", + "\n", + "Due to COVID-19, we want monitor how many people are in the various rooms, floors, and buildings." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](./buildings.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notes before starting!!!\n", + " \n", + "- you can use multiple jupyter notebook instances: one to generate observations, other to consume them\n", + "- Look at the whole notebook (tasks) before starting\n", + " - (Hint: task 2 and 3 depends on 0 and 1)\n", + "- you can create as many topics as you want \n", + " - (Hint 3 or more)\n", + "- each topic in the exercise should have **at least** 2 partitions. \n", + " - (HINT: to decide how many partition look at task 3)\n", + "- we assume a replication factor of 1 for all the topics is sufficient.\n", + "- there will be multiple observations for same room. You can use the latest observation in aggregations (log compaction). It is also accepted if they are counted multiple times.\n", + "- The minimal required dependencies have been already imported." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 0: Setting the environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka import SerializingProducer, DeserializingConsumer\n", + "from confluent_kafka.serialization import StringSerializer, StringDeserializer\n", + "from confluent_kafka.serialization import IntegerSerializer, IntegerDeserializer\n", + "from confluent_kafka.admin import AdminClient, NewTopic\n", + "from uuid import uuid4\n", + "import sys, lorem, random, time, json, csv\n", + "\n", + "brokers = \"kafka1:9092,kafka2:9093\"\n", + "topics = [] ## Add here your topics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create new topics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your topic Code Here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 1: Counting People\n", + "\n", + "Write a Kafka Producer that generates the observations every 5 seconds (system time)\n", + "for each building, floor, and room, and pushes them to a topic.\n", + "\n", + "We recommend \"murmur2_random\" as partitioner." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Populate the topics with 1000 observations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pconf = {\n", + " 'bootstrap.servers': brokers,\n", + " ## Your Producer Configuration Code Here\n", + "\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your Producer Code Here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Populate a topic with the 1000 observations in obs.csv, sending one every 5 seconds (system time)\n", + "\n", + "#### Hints:\n", + "- represent the message as a json\n", + "- use the a random key (check the json)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f = open('obs.csv', 'r')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with f:\n", + " print(next(csv.reader(f)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f = open('obs.csv', 'r')\n", + "with f:\n", + " reader = csv.reader(f)\n", + " for row in reader:\n", + " try:\n", + " ## Your Code Here\n", + " except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + " except BufferError:\n", + " sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\\n' % len(p))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 2: Reading observations\n", + "\n", + "Write a Kafka Consumer that reads the previous topic and prints the result out." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " ## Your Configuration Code Here\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your CONSUMER Code Here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Consume 1000 observations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " for i in range(0,1000):\n", + " ## Your consuming Code Here\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " consumer.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 3: Aggregating the number of people \n", + "\n", + "Write a Kafka Consumer that reads the previous topics and count\n", + "the number of people per floor and per building every minute,\n", + "\n", + "Always ensure the result are durable (save them in a topic)\n", + "\n", + "Carry on the minimal ammount of information in the key and the value (remove unnecessary information)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### HINT: How did you organize the data in partitions?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Change the message key to simplify counting by floor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " ## Your Configuration Code Here\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your consumer Code Here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pconf = {\n", + " 'bootstrap.servers': brokers,\n", + " ## Your Configuration Code Here\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your producer Code Here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " for i in range(0,1000):\n", + " # Your consuming and producing code here\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Total number of people Per Floor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "keep the local count of people on each floor. Floor are uniquely identified by building and floor number. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " ## Your configuration code here\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your Consumer Code Here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use the following dictionary to maintain aggregate the results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "floors = {}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " for i in range(0,1000):\n", + " # Your consuming code here\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Let's visualize the results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "plt.bar(floors.keys(), floors.values(), color='g')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Let's save the aggregated result in a topic and progress from there." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pconf = {\n", + " 'bootstrap.servers': brokers,\n", + " ## Your Configuration Code Here\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your producer Code Here " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " for f in floors.keys():\n", + " ## Your producering Code Here \n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Total number of people per building" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " ## Your configuration Code Here\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Your consumer Code Here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " for i in range(1,1000):\n", + " ## Your consuming Code Here\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Let's visualize the results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "plt.bar(buildings.keys(), buildings.values(), color='g')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Draw the dataflow between topic using a tool of choice" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](http://placehold.it/256x256)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optional Tasks (but useful for preparing the final examm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 4: Add a 1 minute window to the aggreagtion (see wordcount example)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 5 Redo Task 0-3 modelling observations using AVRO." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Kafka Practice Python [(de)Serialisers].ipynb b/Kafka Practice Python [(de)Serialisers].ipynb new file mode 100644 index 0000000..c62108f --- /dev/null +++ b/Kafka Practice Python [(de)Serialisers].ipynb @@ -0,0 +1,689 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Advanced Serializion and Deserializion in Kafka" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://camo.githubusercontent.com/56166d361c3975dee750ecce16d605bbbf66516b/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f352f35332f4170616368655f6b61666b615f776f7264747970652e737667)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preparing the field" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka.admin import AdminClient, NewTopic, NewPartitions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Topics" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "topics = [\"string_topic\", \"avro_topic\"]\n", + "brokers = \"kafka1:9092,kafka2:9093,\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preparing the field" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "a = AdminClient(conf = {'bootstrap.servers': brokers})" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "new_topics = [NewTopic(topic, num_partitions=2, replication_factor=1) for topic in topics]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Topic string_topic created\n", + "Topic avro_topic created\n" + ] + } + ], + "source": [ + "fs = a.create_topics(new_topics)\n", + "for topic, f in fs.items():\n", + " try:\n", + " f.result() # The result itself is None\n", + " print(\"Topic {} created\".format(topic))\n", + " except Exception as e:\n", + " print(\"Failed to create topic {}: {}\".format(topic, e))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Producer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka import SerializingProducer\n", + "from confluent_kafka.serialization import StringSerializer\n", + "import json\n", + "from uuid import uuid4" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "producer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " 'partitioner': 'murmur2_random',\n", + " 'key.serializer': StringSerializer('utf_8'),\n", + " 'value.serializer': StringSerializer('utf_8')\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "producer = SerializingProducer(producer_conf)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'picture': 'http://placehold.it/32x32',\n", + " 'age': 29,\n", + " 'name': 'Hull Gardner',\n", + " 'gender': 'male',\n", + " 'email': 'hullgardner@kneedles.com',\n", + " 'phone': '+372 (860) 490-3549',\n", + " 'address': '946 Victor Road, Adelino, Rhode Island, 3562',\n", + " 'about': 'Exercitation sint eu voluptate duis dolor ea occaecat officia. Consequat dolor et consectetur non adipisicing anim aliquip non mollit officia qui. Minim excepteur Lorem labore est officia ad voluptate. Consectetur elit aliqua reprehenderit cupidatat officia dolore sunt.\\r\\n'},\n", + " {'picture': 'http://placehold.it/32x32',\n", + " 'age': 28,\n", + " 'name': 'Rice Gonzalez',\n", + " 'gender': 'male',\n", + " 'email': 'ricegonzalez@kneedles.com',\n", + " 'phone': '+372 (956) 471-3802',\n", + " 'address': '251 Church Avenue, Weedville, Michigan, 9128',\n", + " 'about': 'Sunt Lorem dolore cillum duis ipsum sit officia dolor elit. Cupidatat magna magna occaecat id incididunt et est enim incididunt ex exercitation est. Ea dolor ad enim duis ea.\\r\\n'},\n", + " {'picture': 'http://placehold.it/32x32',\n", + " 'age': 30,\n", + " 'name': 'Augusta Yates',\n", + " 'gender': 'female',\n", + " 'email': 'augustayates@kneedles.com',\n", + " 'phone': '+372 (937) 447-3768',\n", + " 'address': '196 Lawton Street, Glenshaw, Palau, 3979',\n", + " 'about': 'Esse excepteur velit elit dolor Lorem dolore nostrud excepteur reprehenderit eiusmod deserunt sint. Velit Lorem est magna irure et dolore veniam dolore labore labore ea laboris ad nisi. Consequat sit exercitation enim cupidatat esse esse reprehenderit ullamco nisi irure qui laborum nulla.\\r\\n'},\n", + " {'picture': 'http://placehold.it/32x32',\n", + " 'age': 27,\n", + " 'name': 'Acevedo Burns',\n", + " 'gender': 'male',\n", + " 'email': 'acevedoburns@kneedles.com',\n", + " 'phone': '+372 (982) 441-3165',\n", + " 'address': '572 Bath Avenue, Downsville, Alaska, 1693',\n", + " 'about': 'Veniam anim in culpa occaecat anim pariatur incididunt adipisicing qui sunt. Labore reprehenderit fugiat reprehenderit irure incididunt laborum mollit laboris duis. Laborum cillum anim qui duis nisi dolor. Qui ut pariatur exercitation eu ullamco minim sunt aliquip veniam ea. Commodo qui reprehenderit eiusmod veniam veniam ad Lorem duis et qui cillum.\\r\\n'},\n", + " {'picture': 'http://placehold.it/32x32',\n", + " 'age': 32,\n", + " 'name': 'Erna Keith',\n", + " 'gender': 'female',\n", + " 'email': 'ernakeith@kneedles.com',\n", + " 'phone': '+372 (930) 554-3624',\n", + " 'address': '256 Navy Street, Whipholt, American Samoa, 3834',\n", + " 'about': 'Ad veniam anim deserunt sint veniam et pariatur est. Cillum et aliquip anim consequat quis eiusmod elit. Cillum qui et est magna non irure sint. Fugiat occaecat nostrud consectetur non sunt deserunt fugiat nisi veniam consectetur do incididunt cillum. Adipisicing reprehenderit ex velit consequat et culpa id esse deserunt eiusmod veniam.\\r\\n'}]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open('./data.json') as f:\n", + " data = json.load(f)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'picture': 'http://placehold.it/32x32', 'age': 29, 'name': 'Hull Gardner', 'gender': 'male', 'email': 'hullgardner@kneedles.com', 'phone': '+372 (860) 490-3549', 'address': '946 Victor Road, Adelino, Rhode Island, 3562', 'about': 'Exercitation sint eu voluptate duis dolor ea occaecat officia. Consequat dolor et consectetur non adipisicing anim aliquip non mollit officia qui. Minim excepteur Lorem labore est officia ad voluptate. Consectetur elit aliqua reprehenderit cupidatat officia dolore sunt.\\r\\n'}\n", + "{'picture': 'http://placehold.it/32x32', 'age': 28, 'name': 'Rice Gonzalez', 'gender': 'male', 'email': 'ricegonzalez@kneedles.com', 'phone': '+372 (956) 471-3802', 'address': '251 Church Avenue, Weedville, Michigan, 9128', 'about': 'Sunt Lorem dolore cillum duis ipsum sit officia dolor elit. Cupidatat magna magna occaecat id incididunt et est enim incididunt ex exercitation est. Ea dolor ad enim duis ea.\\r\\n'}\n", + "{'picture': 'http://placehold.it/32x32', 'age': 30, 'name': 'Augusta Yates', 'gender': 'female', 'email': 'augustayates@kneedles.com', 'phone': '+372 (937) 447-3768', 'address': '196 Lawton Street, Glenshaw, Palau, 3979', 'about': 'Esse excepteur velit elit dolor Lorem dolore nostrud excepteur reprehenderit eiusmod deserunt sint. Velit Lorem est magna irure et dolore veniam dolore labore labore ea laboris ad nisi. Consequat sit exercitation enim cupidatat esse esse reprehenderit ullamco nisi irure qui laborum nulla.\\r\\n'}\n", + "{'picture': 'http://placehold.it/32x32', 'age': 27, 'name': 'Acevedo Burns', 'gender': 'male', 'email': 'acevedoburns@kneedles.com', 'phone': '+372 (982) 441-3165', 'address': '572 Bath Avenue, Downsville, Alaska, 1693', 'about': 'Veniam anim in culpa occaecat anim pariatur incididunt adipisicing qui sunt. Labore reprehenderit fugiat reprehenderit irure incididunt laborum mollit laboris duis. Laborum cillum anim qui duis nisi dolor. Qui ut pariatur exercitation eu ullamco minim sunt aliquip veniam ea. Commodo qui reprehenderit eiusmod veniam veniam ad Lorem duis et qui cillum.\\r\\n'}\n", + "{'picture': 'http://placehold.it/32x32', 'age': 32, 'name': 'Erna Keith', 'gender': 'female', 'email': 'ernakeith@kneedles.com', 'phone': '+372 (930) 554-3624', 'address': '256 Navy Street, Whipholt, American Samoa, 3834', 'about': 'Ad veniam anim deserunt sint veniam et pariatur est. Cillum et aliquip anim consequat quis eiusmod elit. Cillum qui et est magna non irure sint. Fugiat occaecat nostrud consectetur non sunt deserunt fugiat nisi veniam consectetur do incididunt cillum. Adipisicing reprehenderit ex velit consequat et culpa id esse deserunt eiusmod veniam.\\r\\n'}\n" + ] + } + ], + "source": [ + "for m in data:\n", + " print(m)\n", + " producer.produce(topic=topics[0],key=str(uuid4()),value=str(m))\n", + " producer.poll(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "producer.flush()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Consumer" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka import DeserializingConsumer\n", + "from confluent_kafka.serialization import StringDeserializer" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "string_deserializer = StringDeserializer('utf_8')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " 'key.deserializer': string_deserializer,\n", + " 'value.deserializer': string_deserializer,\n", + " 'group.id': str(uuid4()),\n", + " 'session.timeout.ms': 6000,\n", + " 'auto.offset.reset': 'earliest'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'string_topic'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "consumer = DeserializingConsumer(consumer_conf)\n", + "consumer.subscribe([topics[0]])\n", + "topics[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "%% Aborted by user\n" + ] + } + ], + "source": [ + "import sys\n", + "try:\n", + " while True:\n", + " # SIGINT can't be handled when polling, limit timeout to 1 second.\n", + " msg = consumer.poll(1.0)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " print(\"{} [{}] at offset {} with key {}: {}\".format(msg.topic(), msg.partition(), msg.offset(), str(msg.key()), str(msg.value())))\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " consumer.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Apache Avro" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAYEAAACDCAMAAABcOFepAAAAmVBMVEX///8kM1MKIkgWKUywtL6Bh5YfL1AAGEMADT4AHEUbLE4AGkQAHkYYKk0QJUpFUGuHjZsAFEHw8fMuPFr4+fre4OTp6u0AFUHIy9HV19xYYXe+wclocIPr7O80QV51fI2lqbSan6vDxs09SWRtdYeTmKV7gpJcZXqqrrhOWHAACz60uMGPlKIpOFjP0thBTGYAADoAADMAAC6JEKoSAAAeRElEQVR4nO1d6bqiOrMW0CCjoqCiqOBsO/U+939xJwmZSVyuUdfzdf3Yu5cTSapS9daQSmvQ+tWUHbNnD+FzlLWmzx7Cp6jt+kn72YP4FK1b+/6zx/AJ2o0tyxrvnj2MT9Bm2ir/Hp49io/S/GZbiOzb/NlD+Sid/5atrBv80m08cXzLAh6wLN+ZPHswH6Oz1x21Wkc/qJ49ko/Q1kXif8x7Mfy/u332cD5CO8/fw/9du1a4efZY3k3pPkQLv4T/XCJWhPv02UN6Lw2OttXN0b8Kx4oXvwzTZRcHaqA/tf5s/4GayLn8simsnMiKO/ifKQBwMr8KErW7aM0BXfMMIH78Klg6KBJg+QvyVx7A+cTg9NQhvYd2iaJ30inSScnvgaVVEEGZcUb075kLWWAF69kzB/Uw1SBUsb21Xf4dsDQtAZoBsHP+2gztAojs1u3Xt2cYhPrOqv4r75FprCL0cvz6sHR09rEb40e5+PLV99GrIATnFzdoVS3sZP+2Pd8j6n90w7D0xZH1pAhjJO0Q/IzkdwbHEPuXIPamp9eN1qWFh5aZKnwMRDEkRbRDf3nF627jvH/x/HqZx5vmMEsvwm9avm3v26+pULMhAj1dIvVzLPVQnKj6ryHS8DV3cX4eBk69wlbsa2HPvOj6FmWC16ty3YeeSieEGCKfrPDE5sMl6j8DCGK4LwdL00kfeA4gw3W6Z9M+zfYJ2QdIHQX28jQyfPIphCOh4ZSMfpvQGSFfgCCjGpa+VrQ0r6bj0GFjdZLdPQWTFwH/rBXZyXB3epFNPUA6B1AQWkclODHvYDsGyFK/hilL86rwgtjn44zD3VtSPeo4Nhcu6DXYY6uork83b5MYo00CQnFUQiIWlVg5rwFLR7P+0ZVW3/I9a/uIaKTtXuAITLBAFHvuellOnihY1RibXApCA2CpBIJXgaXpaLbdX6DmkQYJ1fr0IZc3LeAss/PClb8P2WB7wWJ/PmVP4AMBoQx2jhvrj0gGqU+BpYO83Z/CxZdEHy+/e6vQwj6SDqv+W6L9fD0vxurvWMB37CBeF9tT/pN8qEFoQkY/79laBkD13yM27pD8OCwdZKdtsbAD21EXDUK15FbhNd3/30MAs3DD4or+kVVT13M0+92PIR+G0/Ph+iNY6YR0jmORwU8aciHMlSbLcgvBUu9HYOk8b5/3QyewY7+5Vj6ElMUBL9PqGPwpH/vJoxMFvdplSFfnXrexp/h+CP3erpx8r6xhV5eDUNe4/ojGRP3XsNT9XliaTcrOEWC5b6490ttBMq2u9WdPN88PHx1O2ostYIcdsrDppJqG8Cmah8DHACcOPW+9qybfsx8GKBIKkjMZiwJCm8Rg6Tn5PliaZrPtZuEFXuwA7bJEcej6m5LqnMnSgwDzPanIPdK0TnKpmHRns/MxCkPdRqv5HXvjpNcpv5oP2PE1gVAVkdYvMliKv2p/MSyFa19YXb3KqSXS9myI3yfM7crPPg74BO/akB2cMQBxsj5f+aujVbVZR4ZNZ2HnIRyHveV2ln8RDqmQIMdrCkJdCSS7Wy0LwJgEXTAsBclXwVI8+7Frx5Huqdg8hgG47cor93nTSedSo0rglu973Cx2yKIGflFmwoIio79fxDqjT0YSQUMdrDfVp/0HFYQm4mN8e9bq6keQfDEsHVwPu2OchHFkkDwoeoG9KKqZuFDptdqHNBwRX67m3zc8tRgDuqK2a+2r1UicSA18XeOgEB/CYL37TGQpWyAQOtaDULwxDBwQYKn7WVg6Ou3Wnmffm+YYHHdtCZ4P8lPnaHsMwfhJ5yNSMFl77LFoh7mXaecg6fkUbsxi4Xp32GAHt/MHg6wnTwKhkbTlbLwxTBywfJ/BUsTF8IM58Eln4RogCNa5nruGgi+K5jw/VcXakb7lJ9OPisBp7Uo6D+JPL7SOy+qUC+G9wbVcrhPPNFLojS+qD+yETh0JJaJVSSbAimpYYeQAVLtE/Q/qaGnn/QPI+pbOGyKL74bH3YGHywaja7va9C4xshPSl5xk+m4FJNB107VVBxvpeQiu1sX5MGEhihSqylvXsBsgE4p3SsGgdxeE2rWvZeaABEvRF47vNEr53tXaeQuh/GN/RueDhH53vDihh5Zenb0furvPekuD9rQb6hxj5JEhRAyhT5v6xnm59MfazRAly/dowiuOhHrGSGhQ66Z7HBBgqYcR7XskcbQfa3AGFP0ELNvkZyEi2SxCL1CFnq1+HHb3sy8Bhenk3LORMdIjYLQlxt3Lvn/AMGyw6i88deOgBaGg/gGq0NIyEHrqNh4c1RLNOAB08gq6RP3P1yha2n0cllbdBuKEJi1c9GfoudAV7heXrnHpaxMRT7eTr4wNYmUP4FZzDIzARmJMlONgtgGhOgnQPT/2rBqEBtSDlEFo/VO3+i3KAdDbejqUzmFpYD0OS9Oppz7Pt63NaYBdgkXiegYkDrCGDuLL8fy1ea02LZ5IR5PDuegNIy8IbcgLv8kMxP3kUlTX0WR3iZW3H/MK60godWC0kVCH/BDnQGu0CTSrYvfI0N8BSwdDZUOBeLibZJPzNB5rggJQHUdObNuha1sLCaVk269Ktae3v8dK9HIH82xyKrfL4ngbRoHrhXYss8OPvfDYb5c9ZbjeA9WpSiTU10mbTXwEgQPQCE3d5u70IwmWeg/A0rUKPaZVuVtzD5QteRiMg+iyPu43ne3hNMnmsjvW+2//0PI+RNMwDqzioJGgNIU4bNauOsv9cX2JE8YOZB/AWl2Q5M0V6OCaUBoJlUEo+7dH+CNxAJrcRdjclO+FpX11z4GbgyQfIo8YCnoSL/CSl6fZJB8NUo1ig7pq6gdx8IUMaLWKAOnC4LLfzu7myNJRDndHfzkdBuNQoy2BfV8Vo5p6AYQWEgiNhkPyL4eMQeEAVDZO0ySHhRQtfQOWZk2r46NIZ7DYd6rZNbsz/sEon5W749DDuuqrCzZK7JvVuRkwvB2nxbLTP2+3VVWWh3b7NFtNrnmWQaGg35hfD/2jq7qU8V0ZvPoosRJSECorZPs4IhYSrMnnGxxopbuksQ2Y+l8hLzuK7sLSpfRMHOO/7UoS6RyMsnyymp3ah8OhLMuqqrbbbb+/2yAFAJwQwiMyXeejbriZhHouaPKBD3Wh48SIbEyh5wWBB3WQHVvr/e5cznIEJkez/jCQ1vGOENWRUFpQeZLXsttvXQkHIhqra3IA5dCamogqv9HirWhpKuog4A53CP2nGfS6pkOs+r0wrCccE3JipzaCwmOj7uY78hJ54TYnpyNiqgK3C1Bmv5WLQPGObNSRUD0IBcEM6hiyPNQQaznQml+ao2SwdFNHS41jmIlAdJ215pOqsBOtv2ucvu1uvitrOKqQr/XoSCySyUym4kuOSQ3JIHSgiYS2OkRDUEOs50BrpMGvDJaW7t0jT31BCYF1z2uWPdyfsGOHvfJbqxiyQzFEIRAf7zvKDID/IOQLBIC8PeFHj/ofxiA0MkRCiXc2JS/adIp6DrSquLk2AizFtaWGrTgVnsvnE0Fi80FTEqdu4c8goBT4vc6PFHrOJ+3tZnpcDy8gwgRHfRkuFuv1rXc8HqfT/b6AtN/vp8feenFxgjFfErDQ/ua9SCjbGES2wZB+y8CBgWR52K9IsDTR78UFf7Bt+dYCYg40m82mns/0eLyt14vhxaKCBuDMbwiflu3J6PtLeMqV+IwUOgMp/c8dA5um85KzYKj5RA1CxwyESlGBCBDwMieWgRliEwdaa62mZFGJs2uEpUP26bhsmeeUiiS/lZffWXS++s++ddpGjyCdjzLonpXVubOEMjOF2wGh1vPhemAcABoOXAE6HR+Smr4GCKWPo1AIrg0hEwe4Npcz+/TQ7gyVxvpAA0sv/Ln56lB1NnAWaF/v4Ty2ZRs6BALgVmmenZaXJPzWsxerMa6aG0471QH6hFkG8TFc8m1/M+0NUTwW+8QIn0VIaeJ96qBAIVcGTS2EdQ4Hoa4KQimVRAt5bOVMHDhRfbXYKohKgqWa2lJBC4XIuye6H9kCiL0h5nZDBwzXx6KzrSBDoBd0nUxmp0PVL3qW7dqRH39zpV4eIamqPXToAkAK8ZIjRNyIWdWfwsRk0W+006lBKAWIHakmFIyFYlcKhcZsD5o4kFFDEMxnMkNZVMIAS3kkK4YzQ9AfipMSgQQ4OYIdoRAvAHKCyIfiy7db4rTQ5S6kAaIQbQhd88W+vz2sVhNErLDEUYJzI3w6Pinrv1QQKh13Y6vDXjFxgL0BPQcs7pw4LMW1peqCbajvElf5arVC0r3tIPEOg/BtpyD6mSPN16mn88zQXkWILLZum+1hQgMo6Si/Tq5Zh87MlmHgLBFB6FUG355cZEYUis9fNXKApk6w0d5Ihp0ly2pYmsgV5QzI2qc8E7HNPFsdtpueFRNOaLKGsbf/qVrhrOrFyE3Eah65v1A92mBxxCtPR52tyt1+jaOlUF8xt9iThK6Pc7gMhMoaSKlxGpGKUW6IzRyY0kobHEEqZU2UMFiKNlwibcor45btBe44sYe9olMKFWiDbNKulscFZEUQ0uAEVEeBc/xIPcLHaXBtb5cFwjqbToUgAg+Op/NVuenZbhiT2jpkx9iyioa4BqGuHoQ2oMqEBEq5ITZzYMeUCR7WVc4y3IGlKQdtJBkDoIiFgTvc7w5iyVQ6GOWrU7tE4bkS2uQfLec3E6vjYqEL4Ifher/sLOiKCM0IahBqUxC6MIBQSlRBdPkbRg6c6W95tV7AvObEoqVNWFoQ3oF9tfcTHtlFAZZw7B+XpVy69iwqilJKFMyzWdmfLiAcE3MCIHajDUpXD9oUZ7tcU1aBBEI9WVM0s2lLKtf8JSMHmDpnG6Yvw1JPgqVCj6sV2WkROnE3mHXWiS0GFnEp4AXyYZaLG2Kel8cf7hY37YYQ5C960Gef9haWHYRqPa9vd4/4TPL1vAipBLLIPk76CJHQjnwsAGhOXBGoHgnm2ciBNn0gN/wzmcWuCZbSWUSBf0S144N24SmVW3VC3kZ+AQpV3IYgDv4uWz9MSGmTMJwGGER2tz6Xf+0Acfg2PdUyqk/Hl/VfipKwfF34mHwkFtwoIwdmNLkmfNrka5f1SXxqRreC9xgHVh8J0aQTaSLz2C/w6xjlM44wV80qHjIwJ/SKE9qi2dlRpCcgX65BqK8HoWhuzedlxE6HwoEAIwcmjANiExwl3uRQWOqLsHT+RxqKE4At0pz5duFqqqBq8qk/87OUr72mWPixZ+1WaPnn5Tohy8/2CHXHOjUIpen4ZunbuAmsqVgngvl5mwOOJJtK9Smt4aq3IDU9e54LJFxMbiXaz6NyH+sqSX13+qzeA6eFcPAYFdG7fq36Ua1jwratM6XwvDtiEwYumXBaaGLJdvMMHjWuF+E1IwcYrI9kfXaNpMKugMLSvgBLJ8Qk+TtWtwLsZNpGb6ar/i32BN8Y+Ha4/0xl7mfp2rnF4yAYu6G/OHYOteMymBVi2WI8HZHx1updAaFqfVS9cE27RuIF0pIaOZBTDvhKycgjsJSYfGBlN84v3/YKXLPYGkyq4gagP4ayxsPi8PQ2NNAzyUbzAdUNo/Y+lgoW432rR9jhogmW7h0QSkkTQSXrIhriRzjQCATehaXYDacxWMjsmzgTP4z3bTLudJ5NZqv86asvUzo69W9q0S5kwJnGihEUlWtC+6azqYk6t5RslVCs/zVzgGo2TVL0FBpgaV1bCh9FjYhXttZyOSrcCev+6SVcMoFO0Cuvqv6mZ2lOcUAGUL1q2SWUNCkSKquESNBHoeoQ0ICzxJoPccDofZe0WGZH3w+yVK1hRMn4wLoV/QpOe/v0/iGY0mGAyma0kdt42howKbLrjoI+A6G+JGDeRshlqkFslnSRUmxvayFtYYCaBvWJKcV2GngzGgNEuzZdaIqy6/xAnPReZDOkQ8P5TsQAZgQsf7NFsU8mcopDAaHhiOukxpoSR0n21d7GQk07QJ4tLSiNlqZ4U7pnlomIO5AFJi8gNpR9PIHShf7Ajw2ByE5QNI4AQlsyCI1w1F6ote4q4kVQumSIH/AHVCxE6RorsJS8jg2TMBvop2l3AZndy1A61bU78Zao9FB66Y10vBAR8BQVS6CQZIgfiAtFpk4CRlgq22lgj1rpTVN/9M4j8t9Py6ZbheL+I/k0CgOhcpgMUDyS81+Rhb01IIvgShjJyAHGSmOJHu0bwMYQENlQspo+AnD7hoDpsvxPpsqV1aWPgXZPV/qmglDAA5hDtiiKAs/JN+RaFyMHCvrc+M7FBDMZOoypftzIpcaIh52x/NkPHJH/fsqOokyFa7SrD+IW4DWhigIQWu93uG5KpF8nUEhR60YOMDEWUpoqDfKdrOA5LJWaqHXRYp9isQPi+w6G/hzNjl18uhBEoY33aCr2DPQZ5gMyCBXP1004ywKp9IwkvRSRNlarsD1mG+pDB4dp3Dj8x6rzcOCESQ7ed4Ndtw50+XF3/3qtWCll5WYRx4vlqV7Us6BROQiV1e8fSZ+mPMtjSzknAoUUR+3tmjlPqy1Gy1Abaea1peI2Desw4by6dZMkmP5sQv5TlApJDZZ1VNLxobJCrF5HQTEECrny7N+u3A01y5XuEj16toRkWYeDOCGxN3iNfPyjxFUKcCnQkE/Hxz11Riy5ZQFfeHlAXr7I+tfAAcH+O81hrWIduGRfoMkyodgu+D1SLxM98iKAUKU/RzMGPeBn98TKIuLjqh6ulgNXwYNqmGiooUy5PfqVJiwNH+8G8FpEwzys9K2vnFPUOaz8CIVoRImH5SjYksUW1rOaTofOWgQyTXfgzUZ1AiylGtNkzl+eZnYtU2X9p5qOh5Q0k5Fs40irR1CqGjHl0R2bkhydbazd8Z4GYt86ikeeTOb8F9AJzZaVQ12tZnjFaXr2I1a5KGZpyM4IFI7d7a2CSLHcranCAIBOhDZPximwNP6le6DvacRJoaDp2fAKfpub6brgC1jKx9/iALjIn99IuxA1Mtz0z/1lL7GVwSne4yPdGF6O6mAdO61VqO1KyCKXjS/yY3i8LmVQe1iNYP9bHHDkbi6lHG64lDTINGjf1F7iMixldR2/h+oanD8UVmjT8Yj8xjczpoa4A0yCzY7aHuctDnQlrZWJDABKic9MddE4LEVX/kXgdf1gLZV/xDq0WbPTAKVGNrLV4pUgTOSJeW7Y1Tc4oGDRo7DG0VDF+KlqpFkNV13f92jP6NcguRZTBaHSNJt5Jh5S8OnWJ4lbT4VOb3AgkNg7EwKy0VqjVQoVJ8mOvPt77k+V65E1IFQktwFIuSvNgnOk4quhsu5zgHbiInTjGxEMtWp9qqI1HsyS6rxfnWa4S989ECqSxi9uhpbrQ0nN/XKfA4l86Z2wBVRUSyhtdKSQYalv/4p7I+VzKXoQaglsCRvBLnZQgKblR7Uaa9RP3OdAeNb/qhaB1bRqZPpkWMrO+7wwKWezNrr+ApDiJbOKcSPdx6NzJCNGoFAzOHCPA7ESQ3qzsQKifTNmzRJ7vwOWyucTlZQrJ38vLp76IykzBMRIkABTwxDf44CtMODKZUFTFqz5FGel1PCInft8TSJndGnZgVwTKtiDYCL0svIaypVJIpF6kk0MG88zc6BhXoR+K9EdMdZ1pOAFHtLZ51ekeyAU2Gde6A7VQMa7tTYiyCw6R9axLiPSRJrvcECNOAmpH3N/J6leRqBfAkvlXg2NepwRx/k4Acn9o0Dd12LVIKJaNTQNscAB2veHP1AR9IUh/anQ1RA9kXKsrBvwaxEudWLoLVfT8UIHN4DVAO8n1qyqOtLlwuVBJO+uidJzDlwwCU9Uu6lxBX831jw3OC8sKoHrDF4SliogVK6vQXWZJQ/818LMa4Ma/WCY0sYamBww1YB4IUeGG/2MuOJT0mkpZ9b9nKOY3pFKCqSCb9YL6WVI6Vu1kSqy6gZjfMHJElSMJY2yKhZDw8wi/Og2H9vIUgqhH7l9woDzJr6bbec92ZwSSNjUlWBp+FqwtAahrHnPWulmgkRcUDrEEg64WgLqD1Ktjd3g2pdSYgyYGhxocyUi+xkCB5y7HOBNtoJMsWUclkqNmV+BDmMgHcmSRIectBUML9XD7KBEswUms9peStmhqwBtcCAV6t/X4idTzgH7wT2AtF7z8iFMcl/I59NGai3fH8vqs15c3kyYyzKvbGvIN/OMkNms11nnRzVrJTjotMa57qPNzKVEKf8+VmNy21lAk/jp/oVgae34JgYQSr0zHpUR5J3XRYwVfJJSUYS6hBiFBmZt6Tiw4mhS3jT8sobw3tGjkVAtg0VKuf+Dw1IkUi8RLV1hEBqx4+mSBmJ3wQy4/2WJ36WvNrwkKsv+npRcKJW8NWnqhbhTIJV8cYCrCUQJJFSMEeOk3IHDGwFEYo/s55Hcz7yUk2FjNlceE/B3o4wRjwZ3FUDKG0MQN1VniHUcEKqvpXAHNzqmnrPKx3gxUyXX2kvNMFif+CdR+jYIrUm4sMUPGAlxIzUFTE9sQK29wdtKU9ii5UDOxyBtLAEleXdUh6XbKkoHYA5L6za1T4Sl8r0WKgg9crE+vV2o5im/zTocz+rmUNpYgq5qUYitjQXUIzhrsVlshYEGgmmaK7CU3VEUPheWyne7zOQGKa5oB9XbUDSkrjCNzoGi/q7OEGs5IARBpd8UOGPGo2K7UemNnby9YxmW/nBzJ0rytZtnpVW0iB1zQ55GJDVtMmJnsglHdXtdxwGhJkWyHQJnHFN0dMtFXXU/1HthpeZ47o+3d2qRO74A7Q+jHKZUustv7qeKa1IPT8qRek1oumWoneaoR6pLnYt5Yn2ORkwld9UoVCZ3UBZ68+Bo6Y93mFiFOHMt9kjiFMpRsfnbVsBqFlLLLewb8X5MWg4chMiEqPHF+026uvjoSHii7iT4XjmJT2Epvon6p2HpGTGeX/b1R46EKoE2fdajQUrsU76SRx/U13KAx5tkl0A6Wfun6ZZl4g17ic7sKE2saQ1XfYfmo/fdfQnVINSVbhbh0hMrk0sfWv+m5pW4qg/q68/QFEKAVBzLWdSUjTsSpT47jl6xT+Tbpfjd1WgFfhCW1pHQ8F4kVCQRig6bxH1YJX8rXkhCQgQq6TnA/VolXbwQmRr2RO5ke1GMGkXalHTXVOM5ej8JS2sQujCA0Ka+5lB0rBshd2KVMh6xbkHrEZs4kArqJBaXMpMUOQhu1RXdXjHIy6N8paJrDh2ZYOni52CpfMnmWar6BhqQwQGGPhwgRE0VQCqIm6ayDpHhJB8PLSipTbXLReyF1tBCV6xJr98NO9+FpcH3w9LRTbzRqzYIjLRXHBUMJjXLUjDxKilPxhNvV7kZOCCk2xVMUzWKiJWLivAw7p/XUGApv8ERN4i6fXO0dIVrQm16rRGItGMRifcMUg+0UJowbaMAUkGfG7LrphPd4qDkFdF01lTJfevqSkXuWBJ/gmHpO65h/gDVkdAbBaHSuVDQ1TbR4OfqjVFhbiDlk32pcDBbH0cwcUDAv+pT23/uR0jAIxdJb5WZE/U/uH1ztPR9IJR8R4jSmKIxwvFJ2Y7z3qtr/TeNfSW0B+Jryq27B7rth1rHTWJ9tPR7YWkGojuR0AYIrcm4ugIJ2EWWdRZR9g3pQGNvFSGk4aoALN25zfpc8jtB8WDbgruwNALfchfHIREjoStZCIwNp7iGMdTsIxKq6SQVwEI5ph41Rg6U5hBbC98lrYtURcHxHb0Td1K4kan/OlqamIuDP0w4EhrqI6EWMB24ZZ0ujQ3fEPFCNcVa027JnsHVMXIgFZdH88Vs1zisAazN+w5un+QlGBMzWF+qrPGLPkejHtI5Yz0ItcB029cSb1pogf1Z/xn0MV6k4yzF12mYM+rov0f3DhgqPy78ovyT9P3tcmHJNNxszSPU0XYqM5Elxeskfu9LYWkNQmPxyneZBY6BwAOfcaR7tSPt16M3vtf4ccv0k5SapsC/M0AtqbuIRSWwnfYdDktXm91u2Wn1cWuu5QeOh59RPjRmINR8NvV/nMCYwVKsMhg8z0794WzWWv8tIbz4az7CY6Ba54ypv206mPSPLCEqsVOS+Ctkqda7sNWKdu/lQIZPx9OsowJC/5FCDJbi8FHkUwA4QxxY5Iv2bJjrbqe9Q20JhM669nu15f8Y2YmUxKewtObAdXVZzHJDdMZAtZNHrfx12flHb9GSgNq62I7AUsKB1nDYur6HAyPs+b1ee9lfQviWRxvD0hlS/v6kNZm0rvHjv7BCmA28YnvZX0JXFPrG7vIAqfH6ttT08ZDFFju00fbU/kcfo1N9KbB7p/XyHWI1w479jz5K9OqW/QeipTkwdkX9R+8mx9wg6v8BhLZZcKHckm4AAAAASUVORK5CYII=)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Avro is a binary encoding format that uses a schema to specify the structure of the data being encoded.\n", + "\n", + "Avro's encoding consists only of values concatenated together, and the there is nothing to identify fields or their datatypes in the byte sequence." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```json\n", + "{\n", + "\"namespace\": \"kafka.exercise.avro\",\n", + " \"type\": \"record\",\n", + " \"name\": \"Observation\",\n", + " \"fields\": [\n", + " {\"name\": \"id\", \"type\": \"long\", \"doc\" : \"The observation id\"},\n", + " {\"name\": \"value\", \"type\": \"double\", \"doc\" : \"The actual measurement from the sensor\"},\n", + " {\"name\": \"measurement\", \"type\": \"string\", \"doc\" : \"The measurement type, e.g., temperature\"},\n", + " {\"name\": \"timestamp\", \"type\": \"long\", \"doc\" : \"The measurement timestamp\"}\n", + " ]\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Schema Registry" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Schema registry provides centralized management of schemas\n", + "* Stores a versioned history of all schemas\n", + "* Provides a RESTful interface for storing and retrieving Avro schemas\n", + "* Checks schemas and throws an exception if data does not conform to the schema – Allows evolution of schemas according to the configured compatibility setting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://www.marcusvieira.tech/wp-content/uploads/2019/06/screenshot-2019-06-03-at-00.40.39.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka.schema_registry import SchemaRegistryClient" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schema_registry_conf = {'url': \"http://schema-registry:8081\"}\n", + "schema_registry_client = SchemaRegistryClient(schema_registry_conf)\n", + "\n", + "schema_registry_client.get_subjects()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka.schema_registry.avro import AvroSerializer\n", + "import random, time\n", + "from uuid import uuid4" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\\n\"namespace\": \"kafka.exercise.avro\",\\n \"type\": \"record\",\\n \"name\": \"Observation\",\\n \"fields\": [\\n {\"name\": \"id\", \"type\": \"long\", \"doc\" : \"The observation id\"},\\n {\"name\": \"value\", \"type\": \"double\", \"doc\" : \"The actual measurement from the sensor\"},\\n {\"name\": \"measurement\", \"type\": \"string\", \"doc\" : \"The measurement type, e.g., temperature\"},\\n {\"name\": \"timestamp\", \"type\": \"long\", \"doc\" : \"The measurement timestamp\"}\\n ]\\n}'" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "value_schema = open(\"observation.avsc\").read()\n", + "value_schema" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Writing to Topic \"avro_topic\"" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "avro_serializer = AvroSerializer(value_schema, schema_registry_client)\n", + "\n", + "producer_conf = {'bootstrap.servers': brokers,\n", + " 'key.serializer': StringSerializer('utf_8'),\n", + " 'value.serializer': avro_serializer,\n", + " 'partitioner': 'murmur2_random',\n", + "}\n", + "\n", + "producer = SerializingProducer(producer_conf)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "rooms = [\"room1\", \"room2\", \"room3\", \"room4\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "room2 {'id': 1, 'value': 24.37080295452068, 'measurement': 'temperature', 'timestamp': 1603102789554}\n", + "room3 {'id': 2, 'value': 30.262965494046583, 'measurement': 'temperature', 'timestamp': 1603102790185}\n", + "room1 {'id': 3, 'value': 15.669931204367932, 'measurement': 'temperature', 'timestamp': 1603102790191}\n", + "room2 {'id': 4, 'value': 25.328791768931243, 'measurement': 'temperature', 'timestamp': 1603102790199}\n", + "room3 {'id': 5, 'value': 26.7696589703205, 'measurement': 'temperature', 'timestamp': 1603102790206}\n", + "room1 {'id': 6, 'value': 18.612616645039502, 'measurement': 'temperature', 'timestamp': 1603102790212}\n", + "room2 {'id': 7, 'value': 1.6352841134687046, 'measurement': 'temperature', 'timestamp': 1603102790220}\n", + "room2 {'id': 8, 'value': 8.857234834915129, 'measurement': 'temperature', 'timestamp': 1603102790226}\n", + "room2 {'id': 9, 'value': 44.662981770667024, 'measurement': 'temperature', 'timestamp': 1603102790232}\n" + ] + } + ], + "source": [ + "for i in range(1,10):\n", + " value = {\"id\": i, \n", + " \"value\": random.uniform(0,50), \n", + " \"measurement\":\"temperature\", \n", + " \"timestamp\": round(time.time()*1000)}\n", + " key = rooms[ random.randint(0,3)]\n", + " print(key + \" \" + str(value))\n", + " producer.poll(0)\n", + " producer.produce(topic=topics[1], value=value, key=key)\n", + " producer.flush(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['avro_topic-value']" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schema_registry_client.get_subjects()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the producer writes, it communicates with the Schema Registry, registering the schema of the message." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka import DeserializingConsumer\n", + "from confluent_kafka.schema_registry.avro import AvroDeserializer" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "avro_deserializer = AvroDeserializer(value_schema,schema_registry_client)\n", + "\n", + "string_deserializer = StringDeserializer('utf_8')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {'bootstrap.servers': brokers,\n", + " 'key.deserializer': string_deserializer,\n", + " 'value.deserializer': avro_deserializer,\n", + " 'group.id': str(uuid4()),\n", + " 'auto.offset.reset': \"earliest\"}\n", + "\n", + "consumer = DeserializingConsumer(consumer_conf)\n", + "consumer.subscribe([topics[1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'id': 1, 'value': 24.37080295452068, 'measurement': 'temperature', 'timestamp': 1603102789554}\n", + "{'id': 2, 'value': 30.262965494046583, 'measurement': 'temperature', 'timestamp': 1603102790185}\n", + "{'id': 4, 'value': 25.328791768931243, 'measurement': 'temperature', 'timestamp': 1603102790199}\n", + "{'id': 5, 'value': 26.7696589703205, 'measurement': 'temperature', 'timestamp': 1603102790206}\n", + "{'id': 7, 'value': 1.6352841134687046, 'measurement': 'temperature', 'timestamp': 1603102790220}\n", + "{'id': 8, 'value': 8.857234834915129, 'measurement': 'temperature', 'timestamp': 1603102790226}\n", + "{'id': 9, 'value': 44.662981770667024, 'measurement': 'temperature', 'timestamp': 1603102790232}\n", + "{'id': 3, 'value': 15.669931204367932, 'measurement': 'temperature', 'timestamp': 1603102790191}\n", + "{'id': 6, 'value': 18.612616645039502, 'measurement': 'temperature', 'timestamp': 1603102790212}\n" + ] + } + ], + "source": [ + "while True:\n", + " try:\n", + " # SIGINT can't be handled when polling, limit timeout to 1 second.\n", + " msg = consumer.poll(1.0)\n", + " if msg is None:\n", + " continue\n", + " m = msg.value()\n", + " if m is not None:\n", + " print(m)\n", + " except KeyboardInterrupt:\n", + " break\n", + " consumer.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### What if we do not know the schema?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can't see this in python because of a bug of the Avro library. However, we can observe it from [Java](link to the other repo class)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Github Rebo Java Code](https://github.com/riccardotommasini/kafka-training/tree/dataeng)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Kafka Practice Python [Basics].ipynb b/Kafka Practice Python [Basics].ipynb new file mode 100644 index 0000000..71f1861 --- /dev/null +++ b/Kafka Practice Python [Basics].ipynb @@ -0,0 +1,867 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://camo.githubusercontent.com/56166d361c3975dee750ecce16d605bbbf66516b/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f352f35332f4170616368655f6b61666b615f776f7264747970652e737667)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Apache Kafka](https://kafka.apache.org/) is a horizontally-scalable, faul-tolerant, distributed message queue. Designed at Linkeding and maintaned by Confluent, it was open-sourced un 2009. Around Kafka, it evolved an ecosystem of solution tailored for (streaming) data ingestion. Today we will focus on Kafka's bacis." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Kafka is implemented using Java and Scala. However, for a number of libraries exist to interact with it your favourite language. Today, we will use python 3 and notebook, as they are a convenient environment for teaching." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kafka Architecture" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://github.com/DataSystemsGroupUT/dataeng/blob/dataeng/attachments/KafkaArchitecture.png?raw=true)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interacting with the Cluster (Zookeper) using the Admin API." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "During the first part of the class, we will understand how we can interact with the cluster, handle the topic life-cycle. If not explicitly created, topics are create the first time a producer tries to write or a consumer tries to read using the default configuration (1 partition no replicas, 1 week retention)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka.admin import AdminClient, NewTopic, NewPartitions\n", + "from confluent_kafka import KafkaException\n", + "import sys\n", + "from uuid import uuid4" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "brokers = \"kafka1:9092,kafka2:9093\" # Brokers act as cluster entripoints" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "conf = {'bootstrap.servers': brokers}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "a = AdminClient(conf)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "md = a.list_topics(timeout=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 4 topics:\n", + " \"_confluent-metrics\" with 12 partition(s)\n", + " \"__confluent.support.metrics\" with 1 partition(s)\n", + " \"_schemas\" with 1 partition(s)\n", + " \"__consumer_offsets\" with 50 partition(s)\n" + ] + } + ], + "source": [ + "print(\" {} topics:\".format(len(md.topics)))\n", + "for t in iter(md.topics.values()):\n", + " if t.error is not None:\n", + " errstr = \": {}\".format(t.error)\n", + " else:\n", + " errstr = \"\"\n", + " print(\" \\\"{}\\\" with {} partition(s){}\".format(t, len(t.partitions), errstr))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You probably have noticed that there are some topics we did not create. They are prefixed with underscores, and are in practice \"private\" topics used internally by Kafka to manage the cluster." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating Topics" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "topic_names = [\"test1p\", \"test2p\", \"test2die\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "new_topics = [NewTopic(topic, num_partitions=1, replication_factor=1) for topic in topic_names]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Number of Partitions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://miro.medium.com/max/915/1*GoRlq7O8qMNui6tvnq30cg.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Replication Factor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://content.linkedin.com/content/dam/engineering/en-us/blog/migrated/kafka_replication_diagram.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remember: Producers/Consumers do not access followers\n", + "![](https://github.com/DataSystemsGroupUT/dataeng/raw/dataeng/attachments/replicas2.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "fs = a.create_topics(new_topics)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Topic test1p created\n", + "Topic test2p created\n", + "Topic test2die created\n" + ] + } + ], + "source": [ + "for topic, f in fs.items():\n", + " try:\n", + " f.result() # The result itself is None\n", + " print(\"Topic {} created\".format(topic))\n", + " except Exception as e:\n", + " print(\"Failed to create topic {}: {}\".format(topic, e))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's see how each topics are configured" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 7 topics:\n", + " \"test2p\" with 1 partition(s)\n", + " \"_schemas\" with 1 partition(s)\n", + " \"test1p\" with 1 partition(s)\n", + " \"__consumer_offsets\" with 50 partition(s)\n", + " \"_confluent-metrics\" with 12 partition(s)\n", + " \"test2die\" with 1 partition(s)\n", + " \"__confluent.support.metrics\" with 1 partition(s)\n" + ] + } + ], + "source": [ + "md = a.list_topics(timeout=10)\n", + "print(\" {} topics:\".format(len(md.topics)))\n", + "for t in iter(md.topics.values()):\n", + " if t.error is not None:\n", + " errstr = \": {}\".format(t.error)\n", + " else:\n", + " errstr = \"\"\n", + " print(\" \\\"{}\\\" with {} partition(s){}\".format(t, len(t.partitions), errstr))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### \"Deleting\" Topics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We actually schedule for deletion. Topics are deleted **eventually** by the Kafka Cluster." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Topic test2die deleted\n" + ] + } + ], + "source": [ + "ds = a.delete_topics([topic_names[2]], operation_timeout=30)\n", + "for topic, f in ds.items():\n", + " try:\n", + " f.result() # The result itself is None\n", + " print(\"Topic {} deleted\".format(topic))\n", + " except Exception as e:\n", + " print(\"Failed to delete topic {}: {}\".format(topic, e))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating Partitions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Topics are created with a default number of partitions. Partition can be alterated (add/remove). However, this practice requires to rebalance the brokers (leaders and replicas) and requires time." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "topic = topic_names[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "new_parts = [NewPartitions(topic, int(2))]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Additional partitions created for topic test2p\n" + ] + } + ], + "source": [ + "fs = a.create_partitions(new_parts, validate_only=False)\n", + "# Wait for operation to finish.\n", + "for topic, f in fs.items():\n", + " try:\n", + " f.result() # The result itself is None\n", + " print(\"Additional partitions created for topic {}\".format(topic))\n", + " except Exception as e:\n", + " print(\"Failed to add partitions to topic {}: {}\".format(topic, e))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Topic test2p has 2 partitions\n", + "partition 0 leader: 1, replicas: [1], isrs: [1] errstr: \n", + "partition 1 leader: 2, replicas: [2], isrs: [2] errstr: \n" + ] + } + ], + "source": [ + "md = a.list_topics(timeout=10)\n", + "for t in iter(md.topics.values()):\n", + " if str(t)==topic:\n", + " l = t.partitions.values()\n", + " print(\"Topic {} has {} partitions\".format(t, len(l)))\n", + " for p in iter(l):\n", + " if p.error is not None:\n", + " errstr = \": {}\".format(p.error)\n", + " else:\n", + " errstr = \"\"\n", + " print(\"partition {} leader: {}, replicas: {},\" \n", + " \" isrs: {} errstr: {}\".format(p.id, p.leader, p.replicas, p.isrs, errstr))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Producer and Consumer API" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this part of the class, we start producing and consuming from a topic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Write to Topic \"test1p\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka import Producer\n", + "import sys\n", + "conf = {'bootstrap.servers': brokers}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "p = Producer(**conf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Producer API requires a call back function to control the message delivery (Python)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def delivery_callback(err, msg):\n", + " if err:\n", + " sys.stderr.write('%% Message failed delivery: %s\\n' % err)\n", + " else:\n", + " sys.stderr.write('%% Message delivered to %s [%d] @ %d\\n' %\n", + " (msg.topic(), msg.partition(), msg.offset()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's send some messages" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":5: DeprecationWarning: PY_SSIZE_T_CLEAN will be required for '#' formats\n", + " p.produce(topic_names[0], str(n), callback=delivery_callback)\n" + ] + } + ], + "source": [ + "for n in range(1,10):\n", + " try:\n", + " # Produce line (without newline)\n", + " print(n)\n", + " p.produce(topic_names[0], str(n), callback=delivery_callback)\n", + " p.poll(0)\n", + " except BufferError:\n", + " sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\\n' % len(p))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Ensure nothing is in the buffered by the producer.**" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "% Message delivered to test1p [0] @ 0\n", + "% Message delivered to test1p [0] @ 1\n", + "% Message delivered to test1p [0] @ 2\n", + "% Message delivered to test1p [0] @ 3\n", + "% Message delivered to test1p [0] @ 4\n", + "% Message delivered to test1p [0] @ 5\n", + "% Message delivered to test1p [0] @ 6\n", + "% Message delivered to test1p [0] @ 7\n", + "% Message delivered to test1p [0] @ 8\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p.flush()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reading from Topic \"test1p\"" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka import Consumer, KafkaException" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "conf = {\n", + " 'bootstrap.servers': brokers, \n", + " 'group.id': str(uuid4()), \n", + " 'session.timeout.ms': 6000,\n", + " 'auto.offset.reset': 'earliest'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "c = Consumer(conf)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "c.subscribe([topic_names[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "% test1p [0] at offset 0 with key None:\n", + "% test1p [0] at offset 1 with key None:\n", + "% test1p [0] at offset 2 with key None:\n", + "% test1p [0] at offset 3 with key None:\n", + "% test1p [0] at offset 4 with key None:\n", + "% test1p [0] at offset 5 with key None:\n", + "% test1p [0] at offset 6 with key None:\n", + "% test1p [0] at offset 7 with key None:\n", + "% test1p [0] at offset 8 with key None:\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b'1'\n", + "b'2'\n", + "b'3'\n", + "b'4'\n", + "b'5'\n", + "b'6'\n", + "b'7'\n", + "b'8'\n", + "b'9'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "%% Aborted by user\n" + ] + } + ], + "source": [ + "# Read messages from Kafka, print to stdout\n", + "try:\n", + " while True: #Consumer run forever\n", + " msg = c.poll(timeout=1.0)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " # Proper message\n", + " sys.stderr.write('%% %s [%d] at offset %d with key %s:\\n' %\n", + " (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))\n", + " print(msg.value())\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " c.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Write to Topic \"test2p\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### What is important about \"test2p\"?" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "conf = {'bootstrap.servers': brokers}\n", + "p = Producer(**conf)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":5: DeprecationWarning: PY_SSIZE_T_CLEAN will be required for '#' formats\n", + " p.produce(topic_names[1], str(n), callback=delivery_callback)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "% Message delivered to test2p [1] @ 0\n", + "% Message delivered to test2p [0] @ 0\n", + "% Message delivered to test2p [1] @ 1\n", + "% Message delivered to test2p [1] @ 2\n", + "% Message delivered to test2p [1] @ 3\n", + "% Message delivered to test2p [0] @ 1\n", + "% Message delivered to test2p [1] @ 4\n", + "% Message delivered to test2p [1] @ 5\n", + "% Message delivered to test2p [0] @ 2\n" + ] + } + ], + "source": [ + "for n in range(1,10):\n", + " try:\n", + " # Produce line (without newline)\n", + " print(n)\n", + " p.produce(topic_names[1], str(n), callback=delivery_callback)\n", + " p.poll(0)\n", + " p.flush()\n", + " except BufferError:\n", + " sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\\n' % len(p))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reading from Topic \"test2p\"" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "conf = {\n", + " 'bootstrap.servers': brokers, \n", + " 'group.id': str(uuid4()), \n", + " 'session.timeout.ms': 6000,\n", + " 'auto.offset.reset': 'earliest'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "c = Consumer(conf)\n", + "c.subscribe([topic_names[1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test2p [1] at offset 0 with key None: b'1'\n", + "test2p [1] at offset 1 with key None: b'3'\n", + "test2p [1] at offset 2 with key None: b'4'\n", + "test2p [1] at offset 3 with key None: b'5'\n", + "test2p [1] at offset 4 with key None: b'7'\n", + "test2p [1] at offset 5 with key None: b'8'\n", + "test2p [0] at offset 0 with key None: b'2'\n", + "test2p [0] at offset 1 with key None: b'6'\n", + "test2p [0] at offset 2 with key None: b'9'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "%% Aborted by user\n" + ] + } + ], + "source": [ + "# Read messages from Kafka, print to stdout\n", + "try:\n", + " while True:\n", + " msg = c.poll(timeout=1.0)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " # Proper message\n", + " print(\"{} [{}] at offset {} with key {}: {}\".format(msg.topic(), msg.partition(), msg.offset(), str(msg.key()), str(msg.value())))\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " c.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What do you notice?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://github.com/DataSystemsGroupUT/dataeng/raw/dataeng/attachments/order.png)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Kafka Practice Python [Wordcount].ipynb b/Kafka Practice Python [Wordcount].ipynb new file mode 100644 index 0000000..6c0c94c --- /dev/null +++ b/Kafka Practice Python [Wordcount].ipynb @@ -0,0 +1,1004 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wordcount using Kafka" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://camo.githubusercontent.com/56166d361c3975dee750ecce16d605bbbf66516b/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f352f35332f4170616368655f6b61666b615f776f7264747970652e737667)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka import SerializingProducer, DeserializingConsumer\n", + "from confluent_kafka.serialization import StringSerializer, StringDeserializer\n", + "from confluent_kafka.admin import AdminClient, NewTopic\n", + "from uuid import uuid4\n", + "import sys, lorem, random\n", + "\n", + "brokers = \"kafka1:9092,kafka2:9093\"\n", + "topics = [\"sentences1\", \"words\", \"lengths\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "new_topics = [NewTopic(topic, num_partitions=2, replication_factor=1) for topic in topics]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'sentences1': ,\n", + " 'words': ,\n", + " 'lengths': }" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = AdminClient({'bootstrap.servers': brokers})\n", + "a.create_topics(new_topics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Producing Sentences" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Non amet porro labore neque.\n", + "Amet adipisci dolore amet aliquam amet consectetur.\n", + "Neque ut neque porro non.\n", + "Quiquia dolore magnam eius.\n", + "Dolorem quiquia etincidunt magnam non.\n", + "Modi quiquia neque adipisci.\n", + "Ipsum modi ut neque.\n", + "Velit est velit est neque quiquia.\n", + "Neque dolorem velit amet quisquam quisquam.\n", + "Tempora etincidunt non quiquia magnam dolor dolore.\n", + "Sed amet dolore ipsum voluptatem.\n", + "Est neque quaerat aliquam eius." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "pconf = {\n", + " 'bootstrap.servers': brokers,\n", + " 'partitioner': 'murmur2_random',\n", + " 'key.serializer': StringSerializer('utf_8'),\n", + " 'value.serializer': StringSerializer('utf_8')\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "p = SerializingProducer(pconf)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Neque modi est modi neque.\n", + "Voluptatem ipsum quiquia voluptatem adipisci.\n", + "Etincidunt porro dolorem dolore magnam.\n", + "Voluptatem dolor etincidunt sed etincidunt neque porro.\n", + "Sed numquam ut dolorem labore eius modi est.\n", + "Tempora quiquia sit voluptatem sed.\n", + "Dolor adipisci sed adipisci quiquia neque consectetur.\n", + "Tempora voluptatem quaerat labore numquam ipsum eius neque.\n", + "Adipisci magnam sit modi ut ut ipsum.\n", + "Tempora porro porro quaerat sed amet.\n", + "Ipsum modi quiquia quaerat numquam etincidunt neque ut.\n", + "Aliquam quiquia ut consectetur quiquia modi eius.\n", + "Numquam magnam neque non etincidunt.\n", + "Sit magnam dolor quaerat non dolore consectetur modi.\n", + "Ut porro tempora neque magnam.\n", + "Modi est sit quaerat ipsum.\n", + "Adipisci magnam quisquam numquam ut dolor voluptatem numquam.\n", + "Magnam labore aliquam aliquam.\n", + "Ipsum sit tempora modi non etincidunt consectetur.\n", + "Quaerat ipsum ut ipsum quiquia magnam non est.\n", + "Sit dolorem aliquam consectetur amet magnam.\n", + "Modi labore tempora consectetur quisquam non non.\n", + "Voluptatem modi aliquam porro sed.\n", + "Sed neque adipisci numquam neque voluptatem.\n", + "Labore adipisci ut amet.\n", + "Porro velit quiquia magnam sed velit.\n", + "Magnam quaerat sed neque adipisci quiquia est quiquia.\n", + "Sed quiquia numquam velit.\n", + "Voluptatem ipsum dolor ut dolor modi tempora consectetur.\n", + "Etincidunt voluptatem modi labore aliquam ut dolorem.\n", + "Voluptatem quiquia modi consectetur ipsum velit eius quiquia.\n", + "Sit neque eius tempora modi numquam dolor quaerat.\n", + "Quaerat tempora modi quiquia non ipsum adipisci.\n", + "Sed sit est porro.\n", + "Numquam non magnam amet dolorem.\n", + "Adipisci adipisci quaerat porro neque porro.\n", + "Voluptatem magnam sit non velit quaerat tempora.\n", + "Neque sit modi eius.\n", + "Eius sit velit quiquia voluptatem amet.\n", + "Sed est velit adipisci quisquam.\n", + "Porro ipsum dolorem voluptatem eius magnam.\n", + "Consectetur velit voluptatem numquam aliquam ut magnam dolor.\n", + "Porro velit labore voluptatem dolorem labore.\n", + "Neque sit aliquam neque adipisci velit non.\n", + "Aliquam quaerat quaerat adipisci numquam dolorem sit.\n", + "Quisquam modi dolorem quaerat adipisci eius.\n", + "Neque quaerat quiquia voluptatem dolor.\n", + "Aliquam non etincidunt quaerat.\n", + "Magnam sed porro eius.\n", + "Aliquam tempora porro voluptatem magnam sit eius sed.\n", + "Tempora modi velit adipisci numquam est labore consectetur.\n", + "Quaerat velit ut ipsum dolore voluptatem velit amet.\n", + "Quaerat ipsum numquam porro quaerat tempora modi.\n", + "Voluptatem ut non magnam.\n", + "Amet quiquia porro voluptatem porro ipsum consectetur.\n", + "Porro neque ipsum porro dolorem ipsum neque neque.\n", + "Tempora etincidunt quiquia numquam quiquia quaerat.\n", + "Dolorem tempora numquam quaerat quaerat velit consectetur sed.\n", + "Sed eius est sit labore dolorem.\n", + "Ipsum dolorem consectetur adipisci neque tempora neque.\n", + "Quisquam modi velit magnam.\n", + "Quisquam est quaerat dolorem aliquam modi magnam est.\n", + "Ipsum adipisci eius modi dolorem.\n", + "Sit magnam neque numquam.\n", + "Sed porro numquam dolore.\n", + "Quaerat quaerat eius magnam quisquam est.\n", + "Velit eius adipisci non.\n", + "Sit labore dolor consectetur modi sed est adipisci.\n", + "Ut est dolore porro numquam.\n", + "Ut ipsum modi eius.\n", + "Consectetur etincidunt neque neque quisquam voluptatem.\n", + "Sed voluptatem voluptatem quisquam magnam consectetur dolore dolore.\n", + "Dolor dolor velit modi neque.\n", + "Adipisci amet quiquia voluptatem.\n", + "Velit neque tempora eius velit dolore quisquam.\n", + "Est dolor ut velit ut quisquam velit.\n", + "Ipsum ipsum modi ut modi dolor.\n", + "Ut tempora dolore magnam consectetur velit dolorem.\n", + "Modi numquam consectetur porro non tempora sed est.\n", + "Aliquam aliquam neque modi.\n", + "Velit magnam dolor velit dolorem tempora.\n", + "Voluptatem adipisci modi tempora quisquam velit.\n", + "Non velit aliquam numquam non non.\n", + "Neque quiquia tempora porro modi ipsum voluptatem neque.\n", + "Ipsum dolorem amet quisquam consectetur dolorem.\n", + "Consectetur neque ut sed numquam magnam dolor dolorem.\n", + "Magnam adipisci dolor velit quaerat dolore.\n", + "Numquam numquam ipsum quaerat modi etincidunt numquam.\n", + "Adipisci dolorem neque adipisci eius.\n", + "Consectetur tempora dolorem etincidunt aliquam porro neque.\n", + "Quiquia quiquia quaerat aliquam labore adipisci adipisci.\n", + "Sit labore consectetur magnam.\n", + "Adipisci modi etincidunt ut eius eius etincidunt est.\n", + "Tempora ut tempora voluptatem modi dolor eius quisquam.\n", + "Sit aliquam dolore velit porro neque dolorem.\n", + "Neque dolor ut etincidunt labore etincidunt est voluptatem.\n", + "Dolor voluptatem magnam adipisci adipisci adipisci neque.\n", + "Dolorem adipisci amet dolore quiquia.\n", + "Sed dolor neque quaerat quiquia amet dolor.\n" + ] + } + ], + "source": [ + "for n in range(1,100):\n", + " try:\n", + " # Produce line (without newline)\n", + " line = lorem.sentence()\n", + " p.produce(topics[0], key=str(uuid4()), value=line)\n", + " p.poll(0)\n", + " p.flush()\n", + " print(line)\n", + " except BufferError:\n", + " sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\\n' % len(p))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Consuming Sentences" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "string_deserializer = StringDeserializer('utf_8')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " 'key.deserializer': string_deserializer,\n", + " 'value.deserializer': string_deserializer,\n", + " 'group.id': str(uuid4()),\n", + " 'session.timeout.ms': 6000,\n", + " 'auto.offset.reset': 'earliest'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "c = DeserializingConsumer(consumer_conf)\n", + "c.subscribe([topics[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sentences1 [1] at offset 0 with key f4396aaa-30e6-4e8b-b826-ee8a4a4c2d3c: Voluptatem ipsum quiquia voluptatem adipisci.\n", + "sentences1 [1] at offset 1 with key eb08ecba-8894-4e3c-832a-856ec0306cbb: Etincidunt porro dolorem dolore magnam.\n", + "sentences1 [1] at offset 2 with key ac8e70b1-64b4-4d1e-8561-c7711d6a349c: Voluptatem dolor etincidunt sed etincidunt neque porro.\n", + "sentences1 [1] at offset 3 with key 158297da-a604-45f2-9b68-4d6963b437b3: Sed numquam ut dolorem labore eius modi est.\n", + "sentences1 [1] at offset 4 with key fe3ca354-6e71-49d1-b053-f4cda577d7ea: Tempora quiquia sit voluptatem sed.\n", + "sentences1 [1] at offset 5 with key 4f547586-77f9-44a7-8179-8176b7be43f6: Tempora voluptatem quaerat labore numquam ipsum eius neque.\n", + "sentences1 [1] at offset 6 with key 6dafd5c9-3e6b-4166-969e-6c3b4448ea3e: Adipisci magnam sit modi ut ut ipsum.\n", + "sentences1 [1] at offset 7 with key 0be3784e-63d4-4e76-b785-0b7de1be85fd: Tempora porro porro quaerat sed amet.\n", + "sentences1 [1] at offset 8 with key 9912cb43-92b1-44bd-b6b7-8a6ff96450e4: Aliquam quiquia ut consectetur quiquia modi eius.\n", + "sentences1 [1] at offset 9 with key 6bf25246-8b71-4764-bb46-35878d9da161: Sit magnam dolor quaerat non dolore consectetur modi.\n", + "sentences1 [1] at offset 10 with key 26e3b385-6b83-4468-ae72-1f104a562b12: Ut porro tempora neque magnam.\n", + "sentences1 [1] at offset 11 with key b67c7a2b-d19a-48a4-9f04-adb175f89b55: Magnam labore aliquam aliquam.\n", + "sentences1 [1] at offset 12 with key 99b0aa23-8389-40c8-8e69-184148756b2c: Quaerat ipsum ut ipsum quiquia magnam non est.\n", + "sentences1 [1] at offset 13 with key c15f9509-b7e7-44f5-97a0-75ab4577c02c: Modi labore tempora consectetur quisquam non non.\n", + "sentences1 [1] at offset 14 with key a9bdc257-8216-4849-8861-b6eac4e4d740: Voluptatem modi aliquam porro sed.\n", + "sentences1 [1] at offset 15 with key 5ab75ebd-a0e7-4c12-b214-0ea47b4cb8d2: Labore adipisci ut amet.\n", + "sentences1 [1] at offset 16 with key b3ef80cd-7c26-48dd-93f8-ec7b0cca3a7c: Sed quiquia numquam velit.\n", + "sentences1 [1] at offset 17 with key 0930e190-e146-408f-8f30-74f22590a796: Etincidunt voluptatem modi labore aliquam ut dolorem.\n", + "sentences1 [1] at offset 18 with key 4536419b-d8a5-4968-a510-236c3e5f52fc: Quaerat tempora modi quiquia non ipsum adipisci.\n", + "sentences1 [1] at offset 19 with key bc76cc5c-e604-4b1b-bdaf-0728aa781f45: Adipisci adipisci quaerat porro neque porro.\n", + "sentences1 [1] at offset 20 with key 70b1190f-2149-436f-baee-9102bbb674da: Voluptatem magnam sit non velit quaerat tempora.\n", + "sentences1 [1] at offset 21 with key d4d1569f-eda4-457e-98e5-3e26d605f559: Neque sit modi eius.\n", + "sentences1 [1] at offset 22 with key 30b09a4a-1901-4f25-89d7-ea4f734c4d6a: Sed est velit adipisci quisquam.\n", + "sentences1 [1] at offset 23 with key 15dfad56-f024-4439-9055-763ec01aab05: Consectetur velit voluptatem numquam aliquam ut magnam dolor.\n", + "sentences1 [1] at offset 24 with key fa9704f5-b718-456f-a89a-a162330e7865: Porro velit labore voluptatem dolorem labore.\n", + "sentences1 [1] at offset 25 with key 293ebf1a-ba54-4623-8210-1bab181a745b: Neque sit aliquam neque adipisci velit non.\n", + "sentences1 [1] at offset 26 with key fd7e5c58-5af0-4411-831d-ef4dcd3fff5e: Aliquam quaerat quaerat adipisci numquam dolorem sit.\n", + "sentences1 [1] at offset 27 with key c34f2a3b-db00-4dad-94ef-993d0f0a4cdd: Quisquam modi dolorem quaerat adipisci eius.\n", + "sentences1 [1] at offset 28 with key f8a59637-c4d9-4d5f-aadc-c20d6470e476: Quaerat velit ut ipsum dolore voluptatem velit amet.\n", + "sentences1 [1] at offset 29 with key cb3ef4bf-45a4-4435-b44f-9da9b04e70ba: Voluptatem ut non magnam.\n", + "sentences1 [1] at offset 30 with key d45b8e11-5925-43f3-80b5-3426dfa65d45: Amet quiquia porro voluptatem porro ipsum consectetur.\n", + "sentences1 [1] at offset 31 with key 24812ec1-8c2d-4a84-8f67-5d36e7849b5e: Tempora etincidunt quiquia numquam quiquia quaerat.\n", + "sentences1 [1] at offset 32 with key ea337b86-7ead-49b3-a3e2-ab0cc72611fd: Sed eius est sit labore dolorem.\n", + "sentences1 [1] at offset 33 with key 2b49eded-42d1-4d40-9d70-72c7bd2f5e10: Quisquam modi velit magnam.\n", + "sentences1 [1] at offset 34 with key cf872ef0-08fb-431e-b9ee-1155579c19fc: Ipsum adipisci eius modi dolorem.\n", + "sentences1 [1] at offset 35 with key 767b5245-b613-4cee-bce7-531bcd05a4f8: Sed porro numquam dolore.\n", + "sentences1 [1] at offset 36 with key fa7a7f33-b525-4080-9fef-ed26134da6ec: Quaerat quaerat eius magnam quisquam est.\n", + "sentences1 [1] at offset 37 with key 5b476e16-8405-4b83-b43c-720064cb88ed: Sit labore dolor consectetur modi sed est adipisci.\n", + "sentences1 [1] at offset 38 with key acf8b94b-f228-4c31-a1a3-a50e173d3643: Ut est dolore porro numquam.\n", + "sentences1 [1] at offset 39 with key 644fd859-6783-4385-be38-663acf1351cc: Ut ipsum modi eius.\n", + "sentences1 [1] at offset 40 with key f2d50ded-2174-480c-a0b7-2c5902cb4b87: Consectetur etincidunt neque neque quisquam voluptatem.\n", + "sentences1 [1] at offset 41 with key d69d1ee4-d0c2-4c5e-8b32-635c19b6d4f2: Dolor dolor velit modi neque.\n", + "sentences1 [1] at offset 42 with key 6d02a4d0-83b1-4f77-9435-49c7952643bf: Adipisci amet quiquia voluptatem.\n", + "sentences1 [1] at offset 43 with key a8b862fc-e3f0-4fac-847c-5da04bed65e3: Velit neque tempora eius velit dolore quisquam.\n", + "sentences1 [1] at offset 44 with key 1348b638-0557-46f9-9dad-2a426145e7c8: Est dolor ut velit ut quisquam velit.\n", + "sentences1 [1] at offset 45 with key 2755506c-332d-4702-951e-692db5eb3538: Ipsum ipsum modi ut modi dolor.\n", + "sentences1 [1] at offset 46 with key 524eb44e-4f4b-4e9b-9080-deb717074996: Ut tempora dolore magnam consectetur velit dolorem.\n", + "sentences1 [1] at offset 47 with key ede5153c-9b4b-46ae-a465-7e1faa13ac18: Modi numquam consectetur porro non tempora sed est.\n", + "sentences1 [1] at offset 48 with key 61b2901c-a66e-4315-9b0a-c18a5310c0bc: Velit magnam dolor velit dolorem tempora.\n", + "sentences1 [1] at offset 49 with key 519ea254-7783-44d2-9e5f-b7016bee6b90: Non velit aliquam numquam non non.\n", + "sentences1 [1] at offset 50 with key 17755a55-dc20-435c-8e4d-eb0355ecc60a: Neque quiquia tempora porro modi ipsum voluptatem neque.\n", + "sentences1 [1] at offset 51 with key d917eaa4-24b2-4992-bb50-de365f296048: Ipsum dolorem amet quisquam consectetur dolorem.\n", + "sentences1 [1] at offset 52 with key d7781aba-b865-4342-8a81-92c9becfd560: Consectetur neque ut sed numquam magnam dolor dolorem.\n", + "sentences1 [1] at offset 53 with key a135c3f4-de10-4472-a0a1-8e2f1709da62: Magnam adipisci dolor velit quaerat dolore.\n", + "sentences1 [1] at offset 54 with key 7af77520-3db5-4e60-a029-d4b9b6dca516: Adipisci dolorem neque adipisci eius.\n", + "sentences1 [1] at offset 55 with key c73851a0-4135-45f5-b922-fd29e8dfd1c4: Quiquia quiquia quaerat aliquam labore adipisci adipisci.\n", + "sentences1 [1] at offset 56 with key be8043a3-d990-42c2-9a02-0831ab5efdeb: Adipisci modi etincidunt ut eius eius etincidunt est.\n", + "sentences1 [1] at offset 57 with key 11a00b77-0d35-4ebe-a7b7-384c923770fb: Tempora ut tempora voluptatem modi dolor eius quisquam.\n", + "sentences1 [1] at offset 58 with key 89044263-61ea-4103-a808-a339bc458c20: Dolor voluptatem magnam adipisci adipisci adipisci neque.\n", + "sentences1 [0] at offset 0 with key c7a649ea-fa5d-4c89-92d1-46b370f92278: Neque modi est modi neque.\n", + "sentences1 [0] at offset 1 with key 0f099505-7a0c-4b1e-b7a1-76a665c32c44: Dolor adipisci sed adipisci quiquia neque consectetur.\n", + "sentences1 [0] at offset 2 with key 1425902c-2cb9-4497-aa83-490da7e872e4: Ipsum modi quiquia quaerat numquam etincidunt neque ut.\n", + "sentences1 [0] at offset 3 with key dcedfdce-4fb7-4202-b37d-6bf58d299ec8: Numquam magnam neque non etincidunt.\n", + "sentences1 [0] at offset 4 with key bb874382-72a5-4f78-a7c3-2c0cd27f65c9: Modi est sit quaerat ipsum.\n", + "sentences1 [0] at offset 5 with key ba22cf0a-54de-4875-a9a6-25ad6a749a69: Adipisci magnam quisquam numquam ut dolor voluptatem numquam.\n", + "sentences1 [0] at offset 6 with key da6fa114-a991-491d-9f24-4c65f475f75b: Ipsum sit tempora modi non etincidunt consectetur.\n", + "sentences1 [0] at offset 7 with key 37847a81-ef61-4844-884d-b1ac9ca1e85b: Sit dolorem aliquam consectetur amet magnam.\n", + "sentences1 [0] at offset 8 with key 4ccf2286-35ad-48e0-8a85-ce58d573b7a4: Sed neque adipisci numquam neque voluptatem.\n", + "sentences1 [0] at offset 9 with key 5ff20230-674d-4dda-ac95-bfea9d34b6b5: Porro velit quiquia magnam sed velit.\n", + "sentences1 [0] at offset 10 with key 66174b7c-e81a-4cf7-a6fd-68be0d760248: Magnam quaerat sed neque adipisci quiquia est quiquia.\n", + "sentences1 [0] at offset 11 with key ebd43613-c50d-487e-a79b-e50872581442: Voluptatem ipsum dolor ut dolor modi tempora consectetur.\n", + "sentences1 [0] at offset 12 with key 04c33111-213b-41f5-81b6-cb8418adf9ae: Voluptatem quiquia modi consectetur ipsum velit eius quiquia.\n", + "sentences1 [0] at offset 13 with key c8ae46d9-b970-4c3b-90a5-a4a667c6ab95: Sit neque eius tempora modi numquam dolor quaerat.\n", + "sentences1 [0] at offset 14 with key 30f2c420-33bc-46f3-8e06-ebca998a1bdb: Sed sit est porro.\n", + "sentences1 [0] at offset 15 with key 45c7ed1a-b26b-445f-a537-389ec557ea1a: Numquam non magnam amet dolorem.\n", + "sentences1 [0] at offset 16 with key 76d185cd-a0be-4b56-a9e2-17a7718414f3: Eius sit velit quiquia voluptatem amet.\n", + "sentences1 [0] at offset 17 with key dbb82ddc-5289-4dbb-aff6-09a7f92c5aec: Porro ipsum dolorem voluptatem eius magnam.\n", + "sentences1 [0] at offset 18 with key e44ae162-22bf-4444-858e-6cf29397c847: Neque quaerat quiquia voluptatem dolor.\n", + "sentences1 [0] at offset 19 with key c8a96508-3478-49f3-98d1-803fd2e52fcc: Aliquam non etincidunt quaerat.\n", + "sentences1 [0] at offset 20 with key 905584dd-257a-4615-9e52-d90f420d079c: Magnam sed porro eius.\n", + "sentences1 [0] at offset 21 with key f7a9da19-b576-4d5a-abbf-9c161cd6a48e: Aliquam tempora porro voluptatem magnam sit eius sed.\n", + "sentences1 [0] at offset 22 with key cd05d45b-1941-4ce3-96d4-6864a2d9da03: Tempora modi velit adipisci numquam est labore consectetur.\n", + "sentences1 [0] at offset 23 with key e3a13cda-84c8-4a1a-acb4-39f41a470852: Quaerat ipsum numquam porro quaerat tempora modi.\n", + "sentences1 [0] at offset 24 with key 47c34af9-c2d2-4149-b0d1-74e8520ac4a0: Porro neque ipsum porro dolorem ipsum neque neque.\n", + "sentences1 [0] at offset 25 with key 4e78a056-283c-499f-95c7-9c90fc9440d6: Dolorem tempora numquam quaerat quaerat velit consectetur sed.\n", + "sentences1 [0] at offset 26 with key 034e2afc-15a7-47af-8c88-7aee80a9025b: Ipsum dolorem consectetur adipisci neque tempora neque.\n", + "sentences1 [0] at offset 27 with key 9a43b495-6140-4025-8a9c-df348143afd3: Quisquam est quaerat dolorem aliquam modi magnam est.\n", + "sentences1 [0] at offset 28 with key 49984d32-54a8-45e3-b720-e2ebc3854fc9: Sit magnam neque numquam.\n", + "sentences1 [0] at offset 29 with key 7cbed079-acf9-41d6-a51b-6808cf387441: Velit eius adipisci non.\n", + "sentences1 [0] at offset 30 with key 7cda12e5-3d23-4fe8-9d53-d039e91f897e: Sed voluptatem voluptatem quisquam magnam consectetur dolore dolore.\n", + "sentences1 [0] at offset 31 with key edd19d03-cb80-4dd9-9e4f-a33b3c074ff3: Aliquam aliquam neque modi.\n", + "sentences1 [0] at offset 32 with key 5a83c231-d9eb-458b-9296-e1a6dcaf6961: Voluptatem adipisci modi tempora quisquam velit.\n", + "sentences1 [0] at offset 33 with key 3981aa47-a177-46ff-9f14-6d7ef633ae08: Numquam numquam ipsum quaerat modi etincidunt numquam.\n", + "sentences1 [0] at offset 34 with key bd54fca3-018e-4d8c-8a78-08b245be89a3: Consectetur tempora dolorem etincidunt aliquam porro neque.\n", + "sentences1 [0] at offset 35 with key 3a02f745-b48a-4bd5-ae20-5bf4c494097c: Sit labore consectetur magnam.\n", + "sentences1 [0] at offset 36 with key 3e326102-3d3f-4d70-b68f-c2e3e077a187: Sit aliquam dolore velit porro neque dolorem.\n", + "sentences1 [0] at offset 37 with key 54230fca-8db4-4d86-a5bb-e1e1d1241d44: Neque dolor ut etincidunt labore etincidunt est voluptatem.\n", + "sentences1 [0] at offset 38 with key 6bce4d27-1478-4164-8281-38bf825d29ef: Dolorem adipisci amet dolore quiquia.\n", + "sentences1 [0] at offset 39 with key f80c1730-3056-40d1-b18d-09b658ee8a77: Sed dolor neque quaerat quiquia amet dolor.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "%% Aborted by user\n" + ] + } + ], + "source": [ + "try:\n", + " while True:\n", + " # SIGINT can't be handled when polling, limit timeout to 1 second.\n", + " msg = c.poll(1.0)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " print(\"{} [{}] at offset {} with key {}: {}\".format(msg.topic(), msg.partition(), msg.offset(), str(msg.key()), str(msg.value())))\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " c.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Consuming Sentences and Producing Words" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from confluent_kafka.serialization import IntegerSerializer, IntegerDeserializer\n", + "\n", + "consumer_conf['group.id'] = str(uuid4())\n", + "c2 = DeserializingConsumer(consumer_conf)\n", + "c2.subscribe([topics[0]])\n", + "\n", + "pconf = {\n", + " 'bootstrap.servers': brokers,\n", + " 'key.serializer': StringSerializer('utf_8'),\n", + " 'value.serializer': IntegerSerializer()\n", + "}\n", + "\n", + "\n", + "p2 = SerializingProducer(pconf)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Consumer closed", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# SIGINT can't be handled when polling, limit timeout to 1 second.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mc2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpoll\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1.0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.8/site-packages/confluent_kafka/deserializing_consumer.py\u001b[0m in \u001b[0;36mpoll\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 124\u001b[0m \"\"\"\n\u001b[0;32m--> 125\u001b[0;31m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDeserializingConsumer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpoll\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 126\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Consumer closed" + ] + } + ], + "source": [ + "try:\n", + " while True:\n", + " # SIGINT can't be handled when polling, limit timeout to 1 second.\n", + " msg = c2.poll(1.0)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " for w in msg.value().split(\" \"):\n", + " print(w)\n", + " p2.produce(topics[1], key=w, value=int(1))\n", + " p2.poll(0)\n", + " p2.flush()\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " c2.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Counting Words" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf['group.id'] = str(uuid4())\n", + "consumer_conf['value.deserializer'] = IntegerDeserializer()\n", + "\n", + "words = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "c3 = DeserializingConsumer(consumer_conf)\n", + "c3.subscribe([topics[1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "%% Aborted by user\n" + ] + } + ], + "source": [ + "try:\n", + " while True:\n", + " # SIGINT can't be handled when polling, limit timeout to 1 second.\n", + " msg = c3.poll(1.0)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " w = str(msg.key())\n", + " if w in words:\n", + " words[w] = words[w] + msg.value()\n", + " else:\n", + " words[w] = msg.value()\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " c3.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Dolore': 8,\n", + " 'porro': 25,\n", + " 'aliquam': 24,\n", + " 'neque': 36,\n", + " 'adipisci': 36,\n", + " 'numquam': 25,\n", + " 'neque.': 8,\n", + " 'Ut': 14,\n", + " 'consectetur': 28,\n", + " 'est': 25,\n", + " 'numquam.': 6,\n", + " 'etincidunt': 16,\n", + " 'modi': 32,\n", + " 'velit.': 7,\n", + " 'Non': 10,\n", + " 'velit': 29,\n", + " 'voluptatem': 35,\n", + " 'tempora': 30,\n", + " 'labore.': 8,\n", + " 'Dolorem': 10,\n", + " 'quiquia': 35,\n", + " 'consectetur.': 10,\n", + " 'Porro': 6,\n", + " 'ipsum': 34,\n", + " 'ut': 27,\n", + " 'dolor': 25,\n", + " 'quiquia.': 8,\n", + " 'labore': 21,\n", + " 'magnam': 34,\n", + " 'quaerat': 33,\n", + " 'Quaerat': 13,\n", + " 'modi.': 2,\n", + " 'Est': 9,\n", + " 'sed.': 8,\n", + " 'Adipisci': 14,\n", + " 'amet': 39,\n", + " 'eius.': 12,\n", + " 'Amet': 9,\n", + " 'dolorem': 21,\n", + " 'non': 18,\n", + " 'dolorem.': 9,\n", + " 'sed': 29,\n", + " 'dolore': 39,\n", + " 'Eius': 5,\n", + " 'eius': 26,\n", + " 'quaerat.': 6,\n", + " 'Aliquam': 6,\n", + " 'sit.': 8,\n", + " 'quisquam': 22,\n", + " 'amet.': 3,\n", + " 'Etincidunt': 5,\n", + " 'Quisquam': 1,\n", + " 'adipisci.': 5,\n", + " 'porro.': 7,\n", + " 'Dolor': 4,\n", + " 'quisquam.': 9,\n", + " 'sit': 27,\n", + " 'Neque': 11,\n", + " 'aliquam.': 11,\n", + " 'Labore': 7,\n", + " 'non.': 5,\n", + " 'ut.': 6,\n", + " 'dolor.': 6,\n", + " 'Modi': 8,\n", + " 'Quiquia': 14,\n", + " 'Magnam': 9,\n", + " 'etincidunt.': 14,\n", + " 'Consectetur': 5,\n", + " 'ipsum.': 8,\n", + " 'Numquam': 5,\n", + " 'est.': 7,\n", + " 'magnam.': 4,\n", + " 'Ipsum': 7,\n", + " 'dolore.': 6,\n", + " 'tempora.': 5,\n", + " 'Velit': 3,\n", + " 'voluptatem.': 10,\n", + " 'Sed': 5,\n", + " 'Sit': 4,\n", + " 'Tempora': 4,\n", + " 'Voluptatem': 2}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "words" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAaNklEQVR4nO3dfbBdVXnH8e9DEgigDkEuGMUYZRgrvgW9IiNaUaDF2OHFQkemYpjRibUySiujlM5UsNMO0woq0sFGYYzWN/BlpAxaMUIp1kITDBAMFowRgjEJL4EkN8nNvffpH2stzsrmnHvOPWef+7Z+n5kzZ7/vZ6+99rPX2Xufc8zdERGR2e+AqQ5AREQmhxK+iEghlPBFRAqhhC8iUgglfBGRQsydzJUdccQRvnjx4slcpYjIjLdmzZrH3X2g1+VMasJfvHgxq1evnsxViojMeGb22zqW0/ElHTObY2a/MLObY//hZnarmT0U3xfUEZCIiPTHRK7hfwxYn/VfAqxy92OBVbFfRESmqY4SvpkdDbwb+HI2+ExgZexeCZxVb2giIlKnTlv4nwM+AYxlw45y980A8f3IZjOa2XIzW21mq7dt29ZTsCIi0r22Cd/M/gTY6u5rulmBu69w90F3HxwY6Pkms4iIdKmTp3ROAs4ws6XAfOAFZvZvwBYzW+jum81sIbC1n4GKiEhv2rbw3f1v3P1od18MvBf4qbu/D7gJWBYnWwb8oG9RiohIz3r5pu0VwGlm9hBwWuwXEZFpakJfvHL324HbY/cTwCn1hyQiIv0wqd+0FamLXW7Pdvun+vcnPpO1HpHJoB9PExEphBK+iEghlPBFRAqhhC8iUgglfBGRQijhi4gUQglfRKQQSvgiIoVQwhcRKYQSvohIIZTwRUQKoYQvIlIIJXwRkUIo4YuIFEIJX0SkELPy9/Crv2Gu3zQXEVELX0SkGG0TvpnNN7O7zexeM3vAzC6Pwy8zs8fMbG18Le1/uCIi0q1OLunsBd7p7jvNbB5wp5n9MI77rLt/pn/hiYhIXdomfHd3YGfsnRdfuhAuIjLDdHQN38zmmNlaYCtwq7vfFUddaGb3mdn1ZragxbzLzWy1ma3etm1bTWHPfna5PfuaivlFZPbpKOG7+6i7LwGOBk4ws9cA1wLHAEuAzcCVLeZd4e6D7j44MDBQU9giIjJRE3pKx923A7cDp7v7lngiGAO+BJzQh/hERKQmnTylM2Bmh8Xug4FTgQfNbGE22dnAuv6EKCIidejkKZ2FwEozm0M4Qdzg7jeb2dfMbAnhBu5G4EP9C1NERHrVyVM69wHHNxl+fl8ikklX5zeR9a1mkelL37QVESmEEr6ISCGU8EVECqGELyJSCCV8EZFCzMrfw5fpQU/siEwvauGLiBRCCV9EpBBK+CIihVDCFxEphG7a9pFuWorIdKIWvohIIZTwRUQKoYQvIlIIJXwRkUIo4YuIFEIJX0SkEJ38p+18M7vbzO41swfM7PI4/HAzu9XMHorvC/ofroiIdKuTFv5e4J3u/npgCXC6mZ0IXAKscvdjgVWxX0REpqm2Cd+DnbF3Xnw5cCawMg5fCZzVlwhFRKQWHV3DN7M5ZrYW2Arc6u53AUe5+2aA+H5k/8IUEZFedfTTCu4+Ciwxs8OA75vZazpdgZktB5YDLFq0qKsgRT/TICK9m9BTOu6+HbgdOB3YYmYLAeL71hbzrHD3QXcfHBgY6DFcERHpVidP6QzElj1mdjBwKvAgcBOwLE62DPhBv4IUEZHedXJJZyGw0szmEE4QN7j7zWb2c+AGM/sA8Ahwbh/jFBGRHrVN+O5+H3B8k+FPAKf0IygREamffg9/GslvzMrU0X6Q2Uo/rSAiUgglfBGRQijhi4gUQglfRKQQumnbhr7hKiKzhVr4IiKFUMIXESmEEr6ISCGU8EVECqGbtvIculE9u2n/lkstfBGRQijhi4gUQglfRKQQSvgiIoVQwhcRKYSe0hHpgZ54kZlELXwRkUJ08ifmLzWz28xsvZk9YGYfi8MvM7PHzGxtfC3tf7giItKtTi7pjAAfd/d7zOz5wBozuzWO+6y7f6Z/4YmISF06+RPzzcDm2L3DzNYDL+l3YCIiUq8J3bQ1s8XA8cBdwEnAhWb2fmA14VPAU03mWQ4sB1i0aFGP4WbL7eFm2VTNOxOVtr0is1nHN23N7HnAd4GL3P0Z4FrgGGAJ4RPAlc3mc/cV7j7o7oMDAwM1hCwiIt3oKOGb2TxCsv+6u38PwN23uPuou48BXwJO6F+YIiLSq06e0jHgOmC9u1+VDV+YTXY2sK7+8EREpC6dXMM/CTgfuN/M1sZhlwLnmdkSwIGNwIf6EqGIiNSik6d07gSsyahb6g+ne/nNRREReS5901ZEpBBK+CIihVDCFxEphBK+iEghlPBFRAqhhC8iUgglfBGRQijhi4gUQglfRKQQSvgiIoXQn5iLTBH914BMNrXwRUQKoYQvIlIIJXwRkUIo4YuIFEI3baUnM+HG40yIUWQyqIUvIlKITv7T9qVmdpuZrTezB8zsY3H44WZ2q5k9FN8X9D9cERHpVict/BHg4+7+KuBE4CNmdhxwCbDK3Y8FVsV+ERGZptomfHff7O73xO4dwHrgJcCZwMo42UrgrH4FKSIivZvQNXwzWwwcD9wFHOXumyGcFIAj6w5ORETq03HCN7PnAd8FLnL3ZyYw33IzW21mq7dt29ZNjCIiUoOOEr6ZzSMk+6+7+/fi4C1mtjCOXwhsbTavu69w90F3HxwYGKgjZhER6UInT+kYcB2w3t2vykbdBCyL3cuAH9QfnoiI1KWTL16dBJwP3G9ma+OwS4ErgBvM7APAI8C5/QlRRETq0Dbhu/udgLUYfUq94YiISL/om7YiIoVQwhcRKYQSvohIIZTwRUQKoYQvIlII/R6+FEe/jy+lUgtfRKQQSvgiIoVQwhcRKYQSvohIIZTwRUQKoYQvIlIIJXwRkUIo4YuIFEIJX0SkEEr4IiKF0E8r1Cz/2r6IyHSiFr6ISCE6+RPz681sq5mty4ZdZmaPmdna+Fra3zBFRKRXnbTwvwKc3mT4Z919SXzdUm9YIiJSt7YJ393vAJ6chFhERKSPermGf6GZ3Rcv+SxoNZGZLTez1Wa2etu2bT2sTkREetFtwr8WOAZYAmwGrmw1obuvcPdBdx8cGBjocnUiItKrrhK+u29x91F3HwO+BJxQb1giIlK3rhK+mS3Mes8G1rWaVkREpoe2X7wys28CJwNHmNkm4FPAyWa2BHBgI/ChPsYoIiI1aJvw3f28JoOv60Ms04a+LTv96I/HZ752+1D7uP/0TVsRkUIo4YuIFEIJX0SkEEr4IiKFUMIXESmEfg9fJo2ewhCZWmrhi4gUQglfRKQQSvgiIoVQwhcRKYRu2sqsoBvCIu2phS8iUgglfBGRQijhi4gUQglfRKQQumkrUrjqDW/dAJ+91MIXESmEEr6ISCHaJnwzu97MtprZumzY4WZ2q5k9FN8X9DdMERHpVSct/K8Ap1eGXQKscvdjgVWxX0REprG2Cd/d7wCerAw+E1gZu1cCZ9Ucl4iI1Kzbp3SOcvfNAO6+2cyObDWhmS0HlgMsWrSoy9XNTnU+DTHesvTUhUh3Ztux0/ebtu6+wt0H3X1wYGCg36sTEZEWuk34W8xsIUB831pfSCIi0g/dJvybgGWxexnwg3rCERGRfunkscxvAj8HXmlmm8zsA8AVwGlm9hBwWuwXEZFprO1NW3c/r8WoU2qORQrXr5vYk7ne6Wi6bt90jWs20zdtRUQKoYQvIlIIJXwRkUIo4YuIFGLG/B5+P2/wzMSbR1MV82Stdybuk3Zm4zaNZ7K+SS6dUwtfRKQQSvgiIoVQwhcRKYQSvohIIZTwRUQKMWOe0pkN9KSB9Eu/6tZMqbP6P4jOqIUvIlIIJXwRkUIo4YuIFEIJX0SkELpp26OZeENoJsY8U7T6Hf7ZWs7VujSR/yGYKiXXf7XwRUQK0VML38w2AjuAUWDE3QfrCEpEROpXxyWdd7j74zUsR0RE+kiXdERECtFrC9+BH5uZA//q7iuqE5jZcmA5wKJFi3pcncjUmswbftPx26NTtf39jGMiy5rpN3x7beGf5O5vAN4FfMTM/rA6gbuvcPdBdx8cGBjocXUiItKtnhK+u/8uvm8Fvg+cUEdQIiJSv64TvpkdambPT93AHwHr6gpMRETq1cs1/KOA75tZWs433P1HtUQlIiK16zrhu/sG4PU1xiIiIn2kn1YQmYVmwk8c9NNkbf94Py0xHZ/i0XP4IiKFUMIXESmEEr6ISCGU8EVECqGbtiLTxHS/4VeK2XzDWy18EZFCKOGLiBRCCV9EpBBK+CIihdBNW5kyukkps910q+Nq4YuIFEIJX0SkEEr4IiKFUMIXESmEbtqKzADT7eafzExq4YuIFEIJX0SkED0lfDM73cx+ZWYPm9kldQUlIiL16zrhm9kc4F+AdwHHAeeZ2XF1BSYiIvXqpYV/AvCwu29w92HgW8CZ9YQlIiJ1M/fu7vib2TnA6e7+wdh/PvBmd7+wMt1yYHnsfSXwq+7DBeAI4PEm3b32lzbvdImj9G0offunSxyTOW83XubuAz0uA9y9qxdwLvDlrP984AvdLm8C613drLvX/tLmnS5xlL4NpW//dIljMuedylcvl3Q2AS/N+o8GftfD8kREpI96Sfj/CxxrZi83swOB9wI31ROWiIjUretv2rr7iJldCPwHMAe43t0fqC2y1la06O61v7R5p0scpW9D6ds/XeKYzHmnTNc3bUVEZGbRN21FRAqhhC8iUorJfiwIGAXWAg8A9wJ/DRzQZLqNwBGxex1w8zjLvAC4Jnb/BfD+NjGcAVyS9S8Gfl+Z5lHg4rjsF8dhXwaOy6ZZAiytTDMIXB27HdiZTfck8E/Azmz8KPAg4fsJv09lEee7GHgY+E5cxoXAJ6tlAVwGXBeXtSuW143AIW3KIcX1aeCrwN5s3CBwddy204A74v7aC2wArgEOGmfZnwZOzfrPjtv7B5V6kF4bgME47hbgkWz/bwHeELfzxricxwnf+0jbfzHwFeCcShyLgf8EzgFeDHynUm7XAm9pUj9/E9dzUxz+LeChuLyHCfXzsCbbcX3clm1xGdcAB8Vt+iqx/gCXZuu8JS7rIsJ3VvK6/NEUc1zXRuDXcfkPAx8n1uUY75XZcp8EvgncDIxWyuXXwD9XY8nLs8k+PT6uY4xG3b46xnIv8ESsH+dk8WwADov9FwOPNjvOCfVtc2VcqoMbs7pwGLCnXcyEentCjGlprBsp5/xxNt2lcTseJxx/j8X9uDluyz7gfuBncd2HER5OeQC4L077aeAbhF8cWEzMJcDJZHVrnGOlo+nqeE1FC3+3uy9x91cTEslS4FNt5rmwzXgAA3D3LwLfiD/90JS73+TuV1QGV7/UcF18v4CQKHD3D7r7L7Np9kv4cZrV7v7ROH4YOAR4a5yumUfj+GsIlSsvi3PiMs+J6zoLOLHJMk4mJK7d7n6ou78mrvvDZjZ3nLLIE/6GNNDM5mbbcQFwEvAmwsHyEHAscDDh5NXK5cBPs/7zgDsJT3NBox4sISSSR9KE7r6UkFSSQ4HXx+5XE8rpHne/a5z1J4uBV8Tl/i6W5bObChwDvCUbtht4I+EptK2E5AJwCSEBAOxx98Xuvr2yHW8EXgP8PeHE+2ZiOcVtWkY4uUNINM9ub1zWRcCB2fAvuvvVWczDwDp3PyaWwwuBV2Z1eS/wHjM7skk57K70fwB4VTUWM2v6IIeZGeGkP0ZIhruAEeAvCUny5YSkuTWbbS+hjPNlXkcTsb4tbDLso5VJDwPm5TG3cAFwFLCAxrF3Z4z7vdl0ZwC/JTRIvgh8lpBvNhLKbDvwp8C5cR+9AziV0Dh5Xew+Fbgqyw0pl5zM/nWrlWenGy9v1WIyziqVs9nOSv8rCBV5DSGZDAFPESrWEHADoaWQWhZjhJ02nA0fJVQ+r0xbHeaEM/0wjVZK9TWWjRsdZ5rxho8RKkqzaVotc6KvTZVY81ez7W4Vczev0cryRoB7mky3szLdvviqxjcay6taNk+12I58/j1x2O6sPEYJyaa6jgcr8Yw22ZZ8H+2N44Z5bhxp3dXtbVdu1W1M29Nq/zTbl2lZKb7qtnZa55qtczTuoy1ZGTw+TnydbucQ4UTRbt5mx3EddbdaxmOV9/HKJeWiamwPED5F5dPtplHXU1n8EngboVG1LyvjdwM/ysp5J+GTyl/F5Q7F93dmn2JWAj8mnJDeQ2h03R+XM69t/p3qhB+HbSecja+IG/8uwscrB74e358ktIKd0BrMD+51WaE/FQs433Fb4s5KlTDtvJHK8IfjjsgP5L3ZK027N1t/qkijwDNxRziNJDEKPJ2tY02TCtbqwM4r5XD2/pPYvY9wqWGYcDCl6fdkFS+VSYpnN+GEmcY1O/n9JBuWx/XvWffns3G7CC27NO7BrNyH4v7dDfxXHL6jsm3bs+5nCB/DU9mnZaZ9kyeSK+L0KbbdcdvvyKZZFePbBXw7DruZ0CpNB96+uK6031NZjbD/Ab2psm9+lPXvy7o3ZMv4djZ9fkIYjWWT+jdmyxkiXHJxQqu0epLbUdmHP8zK8wtZGQ1X4r8t21/5MkcJ9SfNlxLXLmB9ti9GsvHN6mza5k3Ztu7OyiY/1pznnhTSMeSEOvCP2bhfEy577htn/uor5ZDhWGbb43bkDYxq3R9qspyxbHjKD+m4Pz9bzgYadeLdhAT9MHA3MD/OtyLmvN/EWBbSOD5eGMd9B3hr7L6axiWiywj1YR7hE+8Q8K447vvAWTMl4e8hnC2fiQV3Yqw0ewnXyFISS5V8iEaSdcLBm1fUdGLIK3C+A/dlw/Nxe5pMmz49NFvGGKFijsb17qB5K+/pFhWy3SsdQOkAzxNs2rZfxBger8xbbc20ask8E2POD55qy7XZdvxdFt8wsDpbR9of/9divdXXGlqf8FJco5X+McKnv3QgPkHjJPh0Ns3jcRvT/Q0n1LUtNE4oY+xfRnlCy086e9m/nFIreEdcZ7ttTYm2WSv/89k0eV3fnm1XXsd306hrW5uso1p+eb1PJ4G8zPNhI7HMdhHq126ab1v1+Hqysp70Sif0Pewf1w6e+0k4NVR2EVqzKaZmyblVeae4UsPvqbieXU1iqy5jd4tl5p/688bmzmz4EI1G6/2EE84Tcf+8Ppbp3XHcVuI9sxjbb4Elsf+puNzdhDq3C3g+IeH/bZzmgDguPVr/aeCidvl3yp/SMbPzCF/cehPwP4SdOz+OduB5sfs3hILKd2pqaX+usth0zXA4vt9JSCrpunA6iFKyTA4gtHSejv17CZXSCSegYcI197TTLfZD2Elz43saT9yez2Tbs2Oc4hjLpsul/nR9bw2Nk9Mu4AWEG4PJdkKLN8W+M1vvVsL9hhT/Qdl6k/SN6ZHK8J9ksRxM4ykvA16UTXdVfD+axjXwvEzSwbKFcAAOx2WkZJxfB07z/CxOkxjw9hiD00j8I8DtcZnp0sTcOP1v4rxvd/ejaLReU3zr4/xpPfsq2/8o+5fJ1yrj03XydElyD/C9bHyK44A4Pn3yJItjFY1GBoQbzumTCDSui+fuy7ovz6ZNDZFUhpvj+6Y4zRwaLf1U5rsIiSntWye0KNNx14m07SmGwdidl+tYXO6h2XxOaMlDuMF7djY8nQSHs+nzEyE0yizth7S9h8T3+exfB3OpUXdfNj59ohzKpr+fcCJZSziuHovD9xKS9rzY/1bCPYEbadTdQ4Bvuftr4/DcGI289QLCZZyDgQ8BN7p7On73Arj7GLDPY7avzN/SlCZ8MxsAPgFscPchwsefeYS73XNid/r27lxCC8IIGzeHsAOdcC0r7ZDHaPxMc0qCzyfcnEsV7gWxe1HszyvyAhonnHmEnTQW3+cQLj0NZdMfHscfFJfzYhpnX2L3i+I4o5G0q8bYP4GS9R9CoyKlpHYIoTI+QiOJ5Mn4MMLJ0ggJKB1Yw4TyStPPoZGIktfF97nsXzaLsunel23j3FgOadwF2TalRD5KI6mkYYcTDqrbYjzV7c5v+C2J86RPgbB/Mr8nDj+Q0KpPsac649l8f2ZmZxBuWqZEdkCMZy7hqRMICf4L2fYfyP774W2x+5BYFumGa2oBziPUvXSw5uV5AOHmXor/9Dj8rZXtfhFhH6Vlz6dxwjiUxuWo5PAsxjmEupLKM/3W1auy6S2+0o3e1Hg4OK5zZ1zOKM890SSpXPP9l9ebU+P7gTTKZW7sz5NUqhNOuCn9TBw+Ny5zhOymNo0c4dl00EjwC7LhqS6mfVDNfWkd+QloTpxnPmH7IezPw2nkqFS3DiTkmPRQxXsJ+/0Ewr3JB9m/jp9J4wSV8lmyD1hqZvOAP4/rq8cUXNKpPpb5ScI1yPvi+wiNj8dDwGtpHKx7aFxzrl6KSR+t0k3B6nXP/KNmq5tw432Ua/aRfyxbVnVcLzdnU4uuGke+PUPZtNWPpfl1+erlilbL7OTSS7uYU/fDND5ZOI3LE+k6drPLZvklq/Ta3aQc02WofFj1XswIz720tpv9L32k9aZPe3kZjFSW3WpfTrTsml0uSPumWierMTV77aH5AwvVGNtd+qiubzTGM0SjLraqQ81e+fX+tMxWl0qqr/wYzW98TqR+Nrv0k/JHddpmuaDZ/h5pMv8Y4bJNvox0vy5/baJx03aYUNc3AE/EnLgpDvtVnG597N9JaLD8KruGnz96ujPr3m/ctLmG3+ZksJjw2Fmn03+FynPXlfG3A0OdrI8m9xb6EXOLZYy77rgzf9zJDs3mOZlwc3Ij8RnmJsucyPKenZ7wUflRwmW4+XHYMXFdB05VfahpneOWSwfjny1vwqN2vwXeOM70z6nDcR3/kG97rMuDab9OcJtaHifVca3qS5P52m5bZfo5lbqyE1jf4756J3BL7N410fqX9mW3x35138TuSa+zE3l1/eNpUrS9hOSzF7gzfvQ04MMe/v1MAHf/b+BlUx1HP3SxbYcAt2V15efAS3oM4yDgLWZ2L+Gyi+pfG/rxNBGRQkz5UzoiIjI5lPBFRAqhhC8iUgglfBGRQijhi4gU4v8B+5WeiWcIHnsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.bar(words.keys(), words.values(), color='g')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Consuming Sentences and producing words lengths" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'sentences'" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "topics[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf['group.id'] = str(uuid4())\n", + "c4 = DeserializingConsumer(consumer_conf)\n", + "c4.subscribe([topics[0]])\n", + "\n", + "pconf = {\n", + " 'bootstrap.servers': brokers,\n", + " 'key.serializer': StringSerializer('utf_8'),\n", + " 'value.serializer': IntegerSerializer()\n", + "}\n", + "\n", + "\n", + "p2 = SerializingProducer(pconf)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dolore\n", + "porro\n", + "aliquam\n", + "neque\n", + "adipisci\n", + "numquam\n", + "neque.\n", + "Ut\n", + "consectetur\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "%% Aborted by user\n" + ] + } + ], + "source": [ + "try:\n", + " while True:\n", + " # SIGINT can't be handled when polling, limit timeout to 1 second.\n", + " msg = c4.poll(1.0)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " for w in msg.value().split(\" \"):\n", + " print(w)\n", + " p2.produce(topics[2], key=w, value=int(len(w)))\n", + " p2.poll(0)\n", + " p2.flush()\n", + " time.sleep(5)\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " c4.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculating the Average" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [], + "source": [ + "consumer_conf = {\n", + " 'bootstrap.servers': brokers,\n", + " 'key.deserializer': string_deserializer,\n", + " 'value.deserializer': IntegerDeserializer(),\n", + " 'group.id': str(uuid4()),\n", + " 'session.timeout.ms': 6000,\n", + " 'auto.offset.reset': 'earliest'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [], + "source": [ + "c5 = DeserializingConsumer(consumer_conf)\n", + "c5.subscribe([topics[2]])\n", + "\n", + "window = [-1, -1] # opening and closing instance\n", + "buffer = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[6]\n", + "6.0\n", + "[5]\n", + "5.0\n", + "[7]\n", + "7.0\n", + "[5]\n", + "5.0\n", + "[8, 7, 6, 2, 11, 3, 8, 8, 2, 10, 4, 6, 3, 5, 5, 10, 4, 7]\n", + "6.055555555555555\n", + "[7, 7, 7, 7, 4, 12, 5, 5, 4, 2, 5, 8, 3, 10, 6, 4, 5, 8, 3, 8, 6, 7, 3, 5, 6, 12, 3, 4, 7, 10, 6, 4, 6, 6, 7, 7, 11, 5, 3, 6, 10, 7, 8, 4, 8, 5, 10, 5, 4, 5, 4, 7, 7, 8, 7, 8, 8, 5, 7, 4, 4]\n", + "6.213114754098361\n", + "[3, 8, 3, 3, 7, 7, 6, 7, 3, 10, 5, 3, 5, 4, 2, 7, 4, 4, 7, 4, 8, 7, 7, 4, 7, 4, 2, 7, 6, 8, 8, 4, 5, 6, 3, 5, 4, 5, 5, 3, 10, 3, 10, 8, 10, 5, 3, 5, 5, 8, 6, 8, 5, 7, 8, 8, 4, 3, 4]\n", + "5.593220338983051\n", + "[5, 9, 8, 8, 10, 2, 6, 5, 2, 5, 7, 5, 4, 6, 8, 7, 8, 7, 7, 7, 4, 12, 6, 5, 3, 9, 8, 5, 5, 12, 6, 10, 3, 7, 6, 7, 4, 7, 8, 3, 7, 3, 5, 6, 8, 5, 7, 3, 4, 8, 6, 5, 3, 10, 7, 5, 5, 7]\n", + "6.206896551724138\n", + "[11, 5, 5, 5, 5, 6, 7, 5, 4, 2, 7, 8, 4, 3, 5, 4, 4, 3, 6, 4, 6, 6, 4, 3, 4, 7, 6, 7, 7, 5, 8, 5, 3, 5, 6, 7, 3, 2, 6, 8, 7, 3, 8, 7, 4, 4, 7, 5, 4, 6, 7, 2, 5, 5, 6, 11, 11, 7, 11, 11, 2]\n", + "5.639344262295082\n", + "[7, 4, 7, 10, 8, 7, 7, 7, 3, 11, 3, 4, 4, 6, 3, 4, 5, 8, 11, 7, 5, 5, 5, 7, 11, 7, 6, 8, 4, 6, 6, 6, 6, 5, 7, 3, 8, 4, 6, 7, 7, 7, 7, 7, 3, 4, 3, 5, 10, 4, 5, 11, 5, 3, 5, 7, 7, 5, 8]\n", + "6.11864406779661\n", + "[7, 7, 8, 5, 11, 7, 3, 5, 5, 4, 6, 10, 6, 4, 3, 6, 7, 5, 5, 6, 7, 11, 7, 7, 3, 6, 4, 7, 5, 7, 10, 5, 3, 5, 4, 8, 7, 6, 6, 8, 3, 7, 8, 10, 3, 7, 4, 6, 4, 5, 7, 11, 7, 2, 3, 5, 5]\n", + "6.017543859649122\n", + "[5, 6, 3, 5, 5, 6, 7, 10, 7, 8, 4, 6, 12, 7, 7, 8, 2, 10, 8, 2, 5, 7, 11, 4, 6, 3, 5, 7, 7, 7, 8, 5, 3, 6, 6, 3, 10, 7, 4, 7, 10, 8, 7, 4, 3, 5, 5, 5, 11, 5, 5, 5, 2, 3, 6, 2, 5, 3]\n", + "5.913793103448276\n", + "[5, 5, 5, 3, 7, 7, 7, 4, 9, 2, 7, 7, 11, 6, 5, 5, 7, 3, 7, 5, 7, 3, 2, 6, 4, 5, 8, 5, 5, 5, 6, 8, 5, 5, 5, 6, 7, 7, 2, 8, 10, 6, 5, 3, 5, 3, 8, 4, 8, 3, 6, 6, 4, 7, 3, 3, 3, 3]\n", + "5.448275862068965\n", + "[6, 8, 5, 10, 5, 10, 4, 8, 6, 11, 6, 11, 4, 5, 7, 7, 2, 5, 9, 7, 10, 10, 6, 6, 7, 10, 11, 4, 7, 8, 6, 11, 8, 10, 5, 3, 3, 7, 8, 2, 11, 6, 6, 3, 8, 4, 5, 4, 11, 3, 5, 5, 8, 7, 7, 7, 6, 3, 11]\n", + "6.745762711864407\n", + "[3, 7, 7, 8, 7, 2, 3, 11, 4, 5, 4, 4, 7, 5, 6, 7, 7, 3, 8, 6, 7, 6, 11, 6, 4, 7, 4, 11, 11, 8, 11, 6, 5, 10, 6, 3, 10, 8, 5, 11, 7, 10, 5, 10, 5, 9, 5, 6, 4, 5, 7, 6, 6, 8, 10, 7, 6, 4, 6, 11]\n", + "6.683333333333334\n", + "[3, 7, 2, 3, 8, 5, 5, 3, 11, 3, 3, 4, 7, 5, 7, 5, 6, 8, 6, 4, 4, 7, 3, 5, 3, 7, 8, 6, 3, 6, 7, 3, 11, 8, 6, 10, 5, 10, 6, 5, 6, 3, 7, 7, 6, 4, 5, 8, 7, 6, 8, 4, 7, 6, 6, 6, 4, 6, 6, 10]\n", + "5.833333333333333\n", + "[7, 5, 8, 5, 10, 4, 7, 8, 7, 7, 6, 6, 5, 4, 2, 4, 7, 6, 7, 6, 7, 8, 7, 5, 4, 8, 4, 4, 4, 3, 3, 11, 11, 4, 7, 10, 5, 4, 5, 4, 4, 7, 8, 6, 8, 4, 7, 5, 7, 12, 7, 10, 3, 2, 5, 5, 6, 2]\n", + "5.982758620689655\n", + "[7, 2, 3, 6, 10, 12, 2, 3, 4, 7, 6, 7, 8, 2, 4, 7, 7, 7, 4, 6, 9, 5, 6, 10, 8, 6, 2, 7, 10, 7, 3, 3, 7, 4, 6, 7, 7, 8, 11, 10, 6, 5, 11, 4, 8, 7, 8, 3, 5, 8, 7, 8, 5, 8, 8, 4, 2]\n", + "6.2631578947368425\n", + "[7, 7, 7, 4, 12, 3, 3, 7, 7, 6, 4, 7, 5, 7, 4, 6, 5, 10, 3, 3, 7, 5, 5, 6, 4, 5, 7, 11, 6, 11, 8, 7, 7, 9, 5, 7, 7, 2, 6, 5, 3, 6, 4, 7, 6, 5, 6, 7, 5, 7, 3, 11, 7, 8, 6, 5, 8]\n", + "6.157894736842105\n", + "[9, 6, 5, 6, 3, 6, 3, 7, 3, 8, 3, 9, 5, 6, 6, 5, 3, 5, 7, 7, 7, 9, 6, 3, 7, 5, 7, 8, 7, 6, 4, 3, 5, 8, 5, 7, 5, 4, 5, 8, 6, 4, 6, 7, 6, 7, 4, 4, 7, 11, 10, 5, 11, 3, 2, 7, 5, 8]\n", + "5.931034482758621\n", + "[3, 6, 7, 6, 5, 4, 7, 6, 5, 6, 7, 5, 4, 6, 7, 3, 7, 7, 7, 3, 8, 8, 8, 4, 3, 5, 6, 5, 8, 4, 8, 3, 7, 4, 3, 3, 5, 8, 5, 4, 3, 5, 7, 11, 5, 5, 11, 5, 4, 10, 4, 5, 11, 7, 5, 3, 7, 4]\n", + "5.724137931034483\n", + "[6, 10, 3, 7, 9, 3, 6, 7, 6, 7, 6, 7, 5, 6, 7, 10, 3, 3, 7, 4, 7, 5, 6, 2, 7, 5, 5, 5, 6, 6, 4, 7, 3, 4, 4, 7, 2, 4, 2, 11, 4, 7, 6, 4, 5, 6, 7, 11, 6, 7, 8, 4, 3, 5, 3, 7]\n", + "5.660714285714286\n", + "[7, 10, 5, 3, 3, 10, 10, 9, 3, 4, 8, 7, 4, 5, 4, 4, 11, 6, 6, 3, 3, 7, 7, 8, 5, 5, 6, 7, 7, 7, 4, 10, 11, 5, 5, 5, 3, 7, 4, 7, 2, 5, 7, 5, 10, 8, 3, 7, 11, 4, 5, 4, 3, 5, 4, 4, 8, 7, 11, 7, 6]\n", + "6.081967213114754\n", + "[5, 4, 4, 8, 8, 5, 7, 8, 7, 5, 8, 12, 3, 7, 4, 8, 7, 4, 3, 8, 10, 11, 2, 4, 11, 7, 6, 7, 5, 4, 6, 3, 5, 3, 4, 3, 10, 6, 6, 7, 5, 7, 7, 8, 11, 2, 3, 3, 3, 4, 6, 10, 9, 8, 4, 6, 8, 7, 6]\n", + "6.135593220338983\n", + "[10, 7, 4, 11, 8, 8, 4, 4, 4, 9, 7, 5, 4, 7, 8, 4, 2, 7, 5, 5, 2, 8, 8, 6, 8, 5, 5, 11, 11, 4, 12]\n", + "6.548387096774194\n", + "[6]\n", + "6.0\n", + "[5]\n", + "5.0\n", + "[7]\n", + "7.0\n", + "[5]\n", + "5.0\n", + "[8]\n", + "8.0\n", + "[7]\n", + "7.0\n", + "[6]\n", + "6.0\n", + "[2]\n", + "2.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "%% Aborted by user\n" + ] + } + ], + "source": [ + "try:\n", + " while True:\n", + " # SIGINT can't be handled when polling, limit timeout to 1 second.\n", + " msg = c5.poll(1)\n", + " if msg is None:\n", + " continue\n", + " if msg.error():\n", + " raise KafkaException(msg.error())\n", + " else:\n", + " if(window[0]==-1):\n", + " window[0]=msg.timestamp()[1]\n", + " window[1]=msg.timestamp()[1]+4*100\n", + " buffer.append(msg.value())\n", + " elif (msg.timestamp()[1]> window[1]):\n", + " print(buffer)\n", + " print(sum(buffer) / len(buffer))\n", + " old = window[1]\n", + " window[0]= old\n", + " window[1]= old +4*100\n", + " buffer = [msg.value()]\n", + " else:\n", + " buffer.append(msg.value())\n", + "except KeyboardInterrupt:\n", + " sys.stderr.write('%% Aborted by user\\n')\n", + "finally:\n", + " # Close down consumer to commit final offsets.\n", + " c5.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[11]" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "buffer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Kafka Stream.md b/Kafka Stream.md deleted file mode 100644 index c16335c..0000000 --- a/Kafka Stream.md +++ /dev/null @@ -1,235 +0,0 @@ -footer: [Riccardo Tommasini](http://rictomm.me) - riccardo.tommasini@ut.ee - @rictomm - -autoscale: true -slidenumbers: true - - -# Kafka Streams - -### Stream Processing with Kafka - -- As of version 0.10.0, Kafka streams library has been added to Kafka distribution - -- It is no longer just a distributed message broker - -- You can process messages in the different Kafka topics in real-time - -- You can produce new messages to (other) topics - -### Kafka Streaming Library - -![image](./attachments/KafkaStreamingLibrary.png) - ---- - -![inline](./attachments/KafkaStreamingLibrary.png) - - -### Internally - -![inline](./attachments/KafkaStreamingInternally.png) - -### Stream-Table Duality[^3] - -- Streams carry individual stateless events - - - An individual event can represent a state change, e.g., for a table - -- Tables can be used as a state at a certain time (snapshot) - - - Accumulation of the individual events (stateful) - -- See [event sourcing design pattern](https://martinfowler.com/eaaDev/EventSourcing.html) - -### Stream-Table Duality - -![inline](./attachments/img0023.png) - -### Stream-Table Duality - -![inline](./attachments/img0024.png) - -### KStream/KTable - -- KStream - - - Record stream - - - Each record describes an event in the real world - - - Example: click stream - -- KTable - - - Changelog stream - - - Each record describes a change to a previous record - - - Example: position report stream - -- In Kafka Streams: - - - KTable holds a materialized view of the latest update per key as internal state - -### Example - -![inline](./attachments/KafkaStreamExample1.png) - -### Example - -![inline](./attachments/KafkaStreamExample2.png) - -### Example - -![inline](./attachments/KafkaStreamExample3.png) - -### Processor Topology - -- Close idea to Storm Topology - - DAG in General -- Several topologies can be linked together - - Achievable via writing back to Kafka -
-![inline 70%](./attachments/ProcessorLegend.png) - - -![right fit](./attachments/ProcessorTopology.png) - - -### Kafka Streams DSL - -```java -KStream<..> stream1 = builder.stream("topic1"); -KStream<..> stream2 = builder.stream("topic2"); -KStream<..> joined = stream1.leftJoin(stream2, ...); -KTable<..> aggregated = joined.aggregateByKey(...); -aggregated.to("topic3"); -``` - -![left fit](./attachments/KafkaStreamsDSL.png) - -### Stateful Processing - -- Stateful processors - - Windowing - - Joins - - Aggregation -- Kafka provides a configurable local state store - - Memory - - Disk - -![right fit](./attachments/KafkaStatefulProcessing.png) - -### Notions of Time - -- Recall we have - - Event time: when the data was actually generated - - Processing time: when the data was received/processed by the system -- Kafka provides a uniform `Timestamp Extractor` - - - Based on Kafka configuration `log.message.timestamp.type`, Kafka streams will read either the ingestion or the event time (default) - - - You can still create your own extractor - -### Windowing - -- Kafka Streams supports time-based windows only - - - **Tumbling** - - - Sliding (called hopping) - - - Session - -```java -KStream pageViews = ...; -// Count page views per window, per user, with tumbling windows of size 5 minutes -KTable, Long> windowedPageViewCounts = pageViews -.groupByKey(Grouped.with(Serdes.String(), genericAvroSerde)) -.windowedBy(TimeWindows.of(Duration.ofMinutes(5))) .count(); -``` - -### Windowing - -- Kafka Streams supports time-based windows only - - Tumbling - - **Sliding (called hopping)** - - Session - -```java -KStream pageViews = ...; -// Count page views per window, per user, with hopping windows of size 5 minutes -// that advance every 1 minute -KTable, Long> windowedPageViewCounts = pageViews -.groupByKey(Grouped.with(Serdes.String(), genericAvroSerde)) -.windowedBy(TimeWindows.of(Duration.ofMinutes(5).advanceBy(Duration.ofMinutes(1)))).count() -``` - -### Windowing - -- Kafka Streams supports time-based windows only - - Tumbling - - Sliding (called hopping) - - **Session** - -```java -KStream pageViews = ...; -// Count page views per session, per user, with session windows that have -// an inactivity gap of 5 minutes -KTable, Long> sessionizedPageViewCounts = pageViews -.groupByKey(Grouped.with(Serdes.String(), genericAvroSerde)) -.windowedBy(SessionWindows.with(Duration.ofMinutes(5))).count(); -``` - -### Late Arrival - -- By default, Kafka streams emits an updated result for each newly arriving record - - No need for special handling for late arrival, simply a new result is emitted - - Window retention is a configuration parameter in Kafka, default is one day - - No watermark support -- Some times, you need to receive one result at the end of the window - - You can do that in your code - -```java -KGroupedStream grouped = ...; -grouped.windowedBy(TimeWindows.of(Duration.ofHours(1)).grace(Duration.ofMinutes(10))) -.count() -.suppress(Suppressed.untilWindowCloses(unbounded())) -.filter((windowedUserId, count) -> count < 3) -.toStream() -.foreach((windowedUserId, count) -> sendAlert(windowedUserId.window(), -windowedUserId.key(), count)); -``` - - -### Conclusion -![inline](./attachments/the end.png) -[^1]: Slides are based on content from Cloudera and Confluent - -[^3]: Matthias J. Sax, Guozhang Wang, Matthias Weidlich, Johann-Christoph Freytag. *Streams and Tables: Two Sides of the Same Coin*. BIRTE 2018: 1:1-1:10 - - - -### Systems Overview: [[Kafka Stream]]/KSQL-DB ![inline](./attachments/Images/Kafka.png) - -Kafka Stream is an open-source stream processing engine that enables -scalable data processing on top of Apache Kafka. - -Kafka Stream programs make use of topics, on which they build two -further abstractions: Streams and Tables. Kafka Stream Programs consume -and produce data using a functional API. It is also possible to write -directly Dataflow topologies using the so called Processor API. -References: - -Sax, Matthias J., et al. "Streams and tables: Two sides of the same -coin." Proceedings of the International Workshop on Real-Time Business -Intelligence and Analytics. 2018. - -### Systems Overview: Kafka Stream/KSQL-DB ![inline](./attachments/Images/KSQL.png) - -KSQL-DB is a SQL engine built on top of Kafka Streams. SQL queries are -rewritten into Operator topologies and executed over the Kafka Cluster. - -References: - -Johan Desai. "KSQL: Streaming SQL Engine for Apache Kafka." EDBT. -2019. \ No newline at end of file diff --git a/Key-Value Store.md b/Key-Value Store.md deleted file mode 100644 index 6f7f351..0000000 --- a/Key-Value Store.md +++ /dev/null @@ -1,40 +0,0 @@ -# Key-Value Stores - -### Why Key-value Store? - -(Business) Key -> Value - -(twitter.com) tweet id -> information about tweet - -(kayak.com) Flight number -> information about flight - -(yourbank.com) Account number -> information about it - -(amazon.com) item number -> information about it - -Search by ID is usually built on top of a key-value store - -### Isn’t that just a database? - -- Queried using SQL -- Key-based -- Foreign keys -- Indexes -- Joins - -```sql -SELECT user_id from users WHERE - username = “jbellis” -``` - -![right fit](./attachments/reldbex.png) - -### Systems - -- Project Voldemort -- [[Redis]] -- Memcached -- ETCD -- Amazon's Dynamo -- RocksDB - diff --git a/Log.md b/Log.md deleted file mode 100644 index 97ac64f..0000000 --- a/Log.md +++ /dev/null @@ -1,32 +0,0 @@ - - -### The World's most simple database - -```bash -#!/bin/bash - -db_set () { - echo "$1,$2" >> db - -} - -db_get () { - grep "^$1," db | sed -e "s/^$1,//" | tail -n 1 -} -``` - -^ db_set is appending data to a file. This is generally quite efficient. - -Indeed, many databases internally use the same strategy, but it is not a normal file, is a log. - -### The Log - -A log is an append-only sequence of records. It doesn’t have to be human-readable; it might be binary and intended only for other programs to read. - -![inline](./attachments/commitlog.png) - -^ Questions: -- What is the cost of lookup O(n) -- What is the cost of write O(1) -- What is the cost of read from the head O(1). - diff --git a/Memcached.md b/Memcached.md deleted file mode 100644 index 6e70ef2..0000000 --- a/Memcached.md +++ /dev/null @@ -1,226 +0,0 @@ -# Memcached - -### About memcached - -[[Key-Value Store]] - -**Free & open source, high-performance, distributed memory object caching system** - -**Generic in nature, intended for use in speeding up dynamic web applications by alleviating database load.** - -**key/value dictionary** - -**Developed by Brad Fitzpatrick for LiveJournal in 2003** - -**Now used by Netlog, Facebook, Flickr, Wikipedia, Twitter, YouTube ...** - -### Technically - -* **It’s a server** -* **Client access over TCP or UDP** -* **Servers can run in pools** - * **eg. 3 servers with 64GB mem each give you a single pool of 192GB storage for caching** - * **Servers are independent, clients manage the pool** - -### What to store in memcache? - -**high demand** (used often) - -**expensive** (hard to compute) - -**common** (shared accross users) - -**Best: All three** - -### What to store in memcache? (cont’d) - -* **Typical:** - * **user sessions** (often) - * **user data** (often, shared) - * **homepage data** (eg. often, shared, expensive) - -### Memcached principles - -**Fast network access** (memcached servers close to other application servers) - -**No persistency** (if your server goes down, data in memcached is gone) - -**No redundancy / fail-over** - -**No replication** **(single item in cache lives on one server only)** - -**No authentication** (not in shared environments) - -### Memcached principles (cont’d) - -**1 key is maximum 1MB** - -**keys are strings of 250 characters** (in application typically MD5 of user readable string) - -**No enumeration of keys** (thus no list of valid keys in cache at certain moment) - -**No active clean-up** (only clean up when more space needed, LRU: Least Recently Used ) - -**PHP Client functions** - -**Memcached::decrement** — Decrement numeric item's value - -**Memcached::delete** — Delete an item - -**Memcached::flush** — Invalidate all items in the cache - -**Memcached::get** — Retrieve an item - -**Memcached::getMulti** — Retrieve multiple items - -**Memcached::getStats** — Get server pool statistics - -**Memcached::add** — Add an item under a new key - -**Memcached::addServer** — Add a server to the server pool - -**Memcached::increment** — Increment numeric item's value - -**Memcached::set** — Store an item - -... - -**=** 'mypage' **);** - -$html **)** - -**{** - -**ob*start();** - -"" **;** - -**}** - -// all the fancy - -"" **;** - -$html **=** - -**ob*get*contents();** - -'mypage' **,** - -### Data caching - -* **on a lower level** -* **easier to find all dependencies** -* **ideal solution for offloading database queries** - * **the database is almost always the biggest bottleneck in backend performance problems** - -**getUserData(** $UID **)** - -function - -**{** - -$key **=** - -**=** $key **);** - -$userData **)** - -**{** - -**= Database::query(** "SELECT - -$UID **);** - -**=** $queryResult **->getRow();** - -$userData **);** - -**}** - -$userData **;** - -**}** - -?> - - **“There are only two hard things in Computer Science: cache invalidation and naming things.”** - - **Phil Karlton** - -### Invalidation - -* **Caching for a certain amount of time** - * **eg. 10 minutes** - * **don’t delete caches** - * **thus: You can’t trust that data coming from cache is correct** - -### Invalidation (cont’d) - -* Use: Great for summaries - * Overview - * Pages where it’s not that big a problem if data is a little bit out of date (eg. search results) -* Good for quick and dirty optimizations - -* **Store forever, and expire on certain events** - * **the userdata example** - * **store userdata for ever** - * **when user changes any of his preferences, throw cache away** - -"SELECT * - -FROM USERS - -WHERE uid = - -'UID' **,** - -$invalidateCache **)** - -**{** - -$db **->invalidateCache();** - -**}** - -$db **->getRow();** - -**}** - -?> - -function - -**{** - -**updateUserData(** $UID **,** - -$db **= DB::getInstance();** - -"UPDATE USERS - -... - -WHERE uid = {UID}" **);** - -... - -**getUserData(** $UID **,** - -### Multi-Get Optimisations - -* **We reduced database access** -* **Memcached is faster, but access to memcache still has it’s price** -* **Solution: multiget** - * **fetch multiple keys from memcached in one single call** - * **result is array of items** - -### More tips ... - -**Be carefull when security matters.** (Remember ‘no authentication’?) - -Working on authentication for memcached via SASL Auth Protocol - -**Caching is not an excuse not to do database tuning.** (Remember cold cache?) - -**Make sure to write unit tests for your caching classes and places where you use it.** (Debugging problems related to out-of-date cache data is hard and boring. Very boring.) diff --git a/MongoDB.md b/MongoDB.md deleted file mode 100644 index a2c754f..0000000 --- a/MongoDB.md +++ /dev/null @@ -1,407 +0,0 @@ -# MongoDB - -![inline](https://webassets.mongodb.com/_com_assets/cms/MongoDB_Logo_FullColorBlack_RGB-4td3yuxzjs.png) - -## History and Motivation - - - An open source and document-oriented database. - - Data is stored in JSON-like documents. - - Designed with both scalability and developer agility. - - Dynamic schemas. - - Automatic data sharding - -## What MongoDB is : - -- An In-Memory [[Document Databases]] -- Strong consistency (**C**) -- *Tuneably* available (**~~A~~**) -- Horizontal Scalable (**P**) - -![right fit](./attachments/mongocap.png) - -## What MongoDB is not - -- Always Available[^91] -- No Schemas -- No transactions -- No joins -- Max docuement size of 16MB[^92] - - -[^91]: Larger documents handled with GridFS - -[^92]: there will always be downtime when (i) the new leader is getting elected or (ii) the client driver disconnects from the leader - -## Use Cases - -- ![inline 40%](https://webassets.mongodb.com/_com_assets/cms/sega-circ@2x-6s2xn2hb4b.png) Capture **game** events, scaling to meet high-write workloads. -- ![inline 60%](https://webassets.mongodb.com/_com_assets/cms/hsbc-4lcqo9svqo.jpg) Financial Services: Risk Analytics & Reporting, Trade Repository -- ![inline 20%](https://webassets.mongodb.com/_com_assets/cms/Bosch%20TRANSPARTENTlogo-ygj9l1uydb.png) manufacturing, automotive, retail, and energy -- ![inline 20%](https://webassets.mongodb.com/_com_assets/cms/Thermo%20Fisher%20logo-o1cehazdp8.png) fast-changing sensor data captured from multiple devices and experiments - -## When to consider MongoDB - -- When you need high availability of data -- when you need fast and instant data recovery -- when do not want to sustain schema migration costs - - -### Advantages -- Full featured indexes -- Sophisticated query language -- Easy mapping to object oriented code -- Native language drivers in all popular language -- Simple to setup and manage -- Operates at in-memory speed wherever possible -- Auto-sharding built in -- Dynamically add / remove capacity with no downtime - ---- -![inline](./attachments/mongodb-43.png) - -### Terminology: SQL vs MongoDB - -| SQL Terms/Concepts | MongoDB Terms/Concepts | -|--------------------------------------------|----------------------------------------------| -| database | database | -| table | collection | -| row | document | -| column | field | -| index | index | -| table joins (e.g. select queries) | embedded documents and linking | -| Primary keys | _id field is always the primary key | -| Aggregation (e.g. group by) | aggregation pipeline | - - -## Data Model - -### Structure of a JSON-document: - -![inline](./attachments/mongodb-62.png) - -The value of field: - - Native data types - - Arrays - - Other documents - -### Collections of Documents - -![inline](./attachments/mongodb-61.png) - -> Rule: Every document must have an _id. - -### Embedded documents: - -![inline](./attachments/mongodb-63.png) - -### Reference documents - -![inline](./attachments/mongodb-64.png) - -### Storage: BSON Format - -- Binary-encoded serialization of JSON-like documents optimized for space and speed -- BSON types are a superset of JSON types[^94] -- Zero or more key/value pairs are stored as a single entity[^93] -- Large entities are prefixed with a length field to facilitate scanning - -![right fit](https://www.researchgate.net/profile/Bibhudatta_Sahoo/publication/316197770/figure/fig3/AS:484477379059714@1492519816871/Physical-Schema-of-MongoDB-BSON.png) - -[^93]: Each entry consists of a field name, a data type, and a value - -[^94]: JSON does not have a date or a byte array type, for example - -# Operations - -![original fit](https://upload.wikimedia.org/wikipedia/commons/thumb/1/12/Basic_arithmetic_operators.svg/1200px-Basic_arithmetic_operators.svg.png) - -### Create -Create a database - -``` -use database_name -``` - -Create a collection - -``` -db.createCollection(name, options) -``` - -### Insert - -```python -db..insert({"name": "nguyen". "age": 24, "gender": "male"}) -``` - -```python -db.employee.insert({ - name: "sally", - salary: 15000, - designation: "MTS", - teams: [ "cluster-management" ] -})` -``` - -### Read -``` - db..find().pretty() -``` - -```python -db.employee #collection - .find( { - salary: {$gt:18000}, #condition - {name:1} #projection - }) -.sort({salary:1}) #modifier -``` - - -### Update - -```python - -db.employee #collection -.update( - {salary:{$gt:18000}}, #Update Criteria - {$set: {designation: "Manager"}}, #Update Action - {multi: true} #Update Option -) -``` - -^ Multi-option allows multiple document update - -### Delete - -```python -db.employee.remove( - {salary:{$lt:10000}}, #Remove Criteria -) -``` - -### Aggregates - -SQL-like aggregation functionality - -Pipeline documents from a collection pass through an aggregation pipeline - -Expressions produce output documents based on calculations performed on input documents - -Example: - -```python -db.parts.aggregate( - {$group : {_id: type, totalquantity : - { $sum: quanity} -}}) -``` - ---- -[.column] -#### Save - - -```python -db.employee.save( - { _id:ObjectId('string_id'), - "name": "ben", - "age": 23, - "gender": - "male" -}) -``` - -[.column] - -#### Drop - - - Drop a database - - -```python -Drop it: db.dropDatabase() -``` - - - Drop a collection: - -```python -db..drop() - ``` - -### Mapping to SQL - -| SQL Statement | MongoDB commands | -|--------------------------------------------------------------------|-----------------------------------------------| -| SELECT * FROM table | db.collection.find() | -| SELECT * FROM table WHERE artist = ‘Nirvana’ | db.collection.find({Artist:”Nirvana”}) | -| SELECT* FROM table ORDER BY Title | db.collection.find().sort(Title:1) | -| DISTINCT | .distinct() | -| GROUP BY | .group() | -| >=, < | $gte, $lt | - - -### Comparison Operators - -![inline](./attachments/mongodb-65.png) - -[source](https://docs.mongodb.org/manual/reference/operator/query) - - -### Indexes - -- B+ tree indexes -- An index is automatically created on the _id field (the primary key) -- Users can create other indexes to improve query performance or to enforce Unique values for a particular field -- Supports single field index as well as Compound index -- Like SQL order of the fields in a compound index matters -- If you index a field that holds an array value, MongoDB creates separate index entries for every element of the array - -### Sparse Indexes - -- Sparse_- property of an index ensures that the index only contain entries for documents that have the indexed field. (so ignore records that do not have the field defined) - -- If an index is both unique and sparse – then the system will reject records that have a duplicate key value but allow records that do not have the indexed field defined - -## Architecture - -![right fit](./attachments/mongodb-69.png) - -- Mongod – Database instance - -- Mongos - Sharding processes: - - Like a database router processes all requests - - Decides how many and which *mongod* should receive the query - - No local data - - Collects the results, and sends it back to the client. - -- Config Server - – Stores cluster chunk ranges and locations - – Can have only 1 or 3 (production must have 3) - -### Mongod and Mongos - -![inline](./attachments/mongodb-70.png) - -### Client - -- Mongo – an interactive shell ( a client) -- Fully functional JavaScript environment for use with a MongoDB -- You can have one mongos for the whole system no matter how many mongods you have -- OR you can have one local mongos for every client if you wanted to minimize network latency. - -## Replication -For redundancy MongoDB provides asynchronous replication. - -Only one database node is in charge of write operations at any given time (called primary server/node). - -Read operations may go to this same server for strong consistency semantics or to any of its replica peers if eventual consistency is sufficient. - -### Master Slave Replication - -Consists of two servers out of one which takes the role of a master handling write requests and replicating those operations to the second server, the slave. - -![inline](https://docs.mongodb.com/manual/_images/replica-set-primary-with-two-secondaries.bakedsvg.svg) - - -[.column] - -### Replica Sets - -Consists of groups of MongoDB nodes that work together to provide automated failover. - -^ Replica Sets are described as an "an elaboration on the existing master/slave replication, adding **automatic failover** and automatic recovery of member nodes" - -## Partitioning -- called Sharding in MongoDB -- User defines shard key for partitioning -- Shard key defines range of data -- Key space is like points on a line -- Range is a segment of that line - -### What is a Shard? - -- Shard is a node of the cluster -- Shard can be a single mongod or a replica set -- Default max chunk size: 64mb -- MongoDB automatically splits & migrates chunks when max reached - -![right fit](./attachments/mongodb-68.png) - -### Auto-sharding - -- Minimal effort required - - Enable Sharding for a database - - Shard collection within database - - Decide Sharding Strategy - -### MongoDB Sharding Strategies - -- Ranged -- Hashed -- Tag-aware - ---- -#### Range Sharding - -- Splits shards based on sub-range of a key (or also multiple keys combined) - - Simple Shard Key: {deviceId} - - Composite Shard Key: {deviceId, timestamp} - -![inline](https://docs.mongodb.com/manual/_images/sharding-range-based.bakedsvg.svg) - ---- -#### Hash Sharding - -- MongoDB apples a MD5 hash on the key when a hash shard key is used: - - Hash Shard Key(deviceId) = MD5(deviceId) - - Ensures data is distributed randomly within the range of MD5 values - -![inline](https://docs.mongodb.com/manual/_images/sharding-hash-based.bakedsvg.svg) - ---- -#### Tag Sharding - -Tag-aware sharding allows subset of shards to be tagged, and assigned to a sub-range of the shard-key. - -Example: Sharding User Data belong to users from 100 “regions” - -Collection: Users, Shard Key: {uId, regionCode} - -Tag based on macro regions - ---- -#### Tag Sharding Example - -![inline](./attachments/mongodb-72.png) - ---- -#### Which Sharding to use? - -![inline](./attachments/mongodb-73.png) - -### Routing and Balancing - -![right fit](./attachments/mongodb-67.png) - -- Queries routed to specific shards -- MongoDB balances cluster -- MongoDB migrates data to new nodes - - -### MongoDB Security - -- SSL - - between client and server - - Intra-cluster communication -- Authorization at the database level - - Read Only/Read+Write/Administrator - - -### References - -- [Mongodb.com](http://www.mongodb.com/nosql-explained) -- No SQL Distilled by P. Sadalage and M. Fowler -- MongoDB Applied Design Patters by R. Copeland -- The Definitive Guide to MongoDB by Plugge, Membry and Hawkins - -![right fit](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNTFWf0c7vjNsEOAhDMaSGF1ZTVr8HBLs5V0yOVp6e1XInH8cS) diff --git a/Query Languages.md b/Query Languages.md deleted file mode 100644 index 9f1cb1e..0000000 --- a/Query Languages.md +++ /dev/null @@ -1,104 +0,0 @@ -footer: [Riccardo Tommasini](http://rictomm.me) - riccardo.tommasini@ut.ee - @rictomm - -slidenumbers: true - - -### Querying the Data -[.text: text-scale(2.0)] - -![left](./attachments/Processing%20and%20Querying.png) - -Generic term that describes data manipulation. - - -A query is a request for data or information from a database. - -### Programming with Streams: Declarative Languages - -![right fit](./attachments/Images/programming_step1.pdf) - -*The key idea of declarative programming is that a program is a theory -in some suitable logic, and the computation is deduction from the -theory* - -- J.W. Lloyd - -### Declarative Languages: Why? - -- Writing the optimal solution is as hard as solving the problem (e.g. - JOIN optimisation) - -- We want to enhance programmer productivity by adding Domain-Specific - abstraction (e.g. streams) - -- We want to limit the expressiveness of the languages to ensure some - nice property (e.g. decidability) - -### Declarative Languages: Why? - -![inline](./attachments/Images/flyby.png) - -### Declarative Languages: Why - -![inline](./attachments/Images/flyby2.pdf) - -### Declarative Languages: Parsing -- Obtaining the Declarative Program/Query - -- verify it is is syntactically valid - -- creating an abstract syntax tree - -![left fit](./attachments/Images/parsingmap.png) - -### Declarative Languages: Parsing - -![inline](./attachments/Images/declarative0.pdf) - -### Declarative Languages: Planning (Logical) - -- Obtaining the AST of the program/query -- verify all the preconditions hold -- apply optimizations -- errors: statistics not updated, wrong decision -- generates logical plan. - -![left fit](./attachments/Images/logicalmap.png) - -### Declarative Languages: Planning (Logical) - -![inline](./attachments/Images/declarative1.pdf) - -### Declarative Languages: Planning (Physical) -- Obtaining the logical plan of the program/query -- verify all the preconditions -- errors: table not exists -- generates physical plan - -![left fit](./attachments/Images/physicalmap.png) - -### Declarative Languages: Planning (Physical) - -![inline](./attachments/Images/declarative2.pdf) - -### Example of Physical Plan Optimization - -![inline](./attachments/Images/physicalplanex.png) - -### Declarative Languages: Executing -- obtain physical plan of the query -- load it for execution -- run! - -![right fit](./attachments/Images/runexec.png) - -### Declarative Languages: Errors - -- Input not compliant to the expected one -- table dropped while long-running -- network fail (fixable) -- node fail (fixable) - -![left fit](./attachments/Images/errors.png) - - -### Extras -- [[Functional Programming]] diff --git a/README.md b/README.md index 8cddd61..9e2efb0 100644 --- a/README.md +++ b/README.md @@ -1,110 +1,203 @@ +## Introduction: + +### Apache Kafka + +![](https://upload.wikimedia.org/wikipedia/commons/5/53/Apache_kafka_wordtype.svg) + +### Kafka PreLab + +#### Windows users: + +For running Kafka We recommend a virtual machine or using Docker. + +#### Linux/MacOs users + +Download Confluent Platform [here](https://packages.confluent.io/archive/5.5/confluent-community-5.5.2-2.12.tar.gz?_ga=2.109919813.1538540511.1603024157-1065440358.1582035676) + +enter the bin folder and verify if all the scripts are executable + +```bash +#grant permission +chmod +x *.sh +``` + +Configure your host file to avoid localhost +```bash +#/bin/bash +if [ "$HOSTNAME" = tools ]; then + echo "We don't need to update hosts in the tools container. Exiting." + exit 1 +fi + +if grep "DEV host entries" /etc/hosts >/dev/null; then + echo "Already done!" + exit 0 +fi + +cat << EOF | sudo tee -a /etc/hosts >/dev/null +# DEV host entries +127.0.0.1 kafka1 +127.0.0.1 kafka2 +127.0.0.1 zookeeper +127.0.0.1 schema-registry +127.0.0.1 connect +127.0.0.1 ksqldb-server +127.0.0.1 postgres +EOF +echo Done! + +``` + +Then start zookeeper. It's address is *localhost:2181* +```bash +bin/zookeeper-server-start etc/kafka/zookeeper.properties +``` + +Then we start a kafka broker + +```bash +bin/kafka-server-start etc/kafka/server.properties + +``` + +If you want to start a second broker you MUST change the ID in the configuration and the port as indicated below. Suggestion, also differentiate the log folder. + +```lombok.config + + +etc/kafka//server-1.properties: + broker.id=1 + listeners=PLAINTEXT://:9092 + log.dirs=/tmp/kafka-logs-1 + + +etc/kafka//server-2.properties: + broker.id=2 + listeners=PLAINTEXT://:9093 + log.dirs=/tmp/kafka-logs-2 +``` + +#### Docker Users + +```bash + +docker-compose up -d + +``` + +```yaml +version: '2.2' +services: + zookeeper: + hostname: zookeeper + container_name: zookeeper + image: "confluentinc/cp-zookeeper:5.5.0-1-ubi8" + restart: always + networks: + - kafka-net + restart: always + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + + kafka1: + image: "confluentinc/cp-enterprise-kafka:5.5.0-1-ubi8" + restart: always + hostname: kafka + container_name: kafka1 + ports: + - "9092:9092" + networks: + - kafka-net + restart: always + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:9092 + KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" + KAFKA_DELETE_TOPIC_ENABLE: "true" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_METRIC_REPORTERS: "io.confluent.metrics.reporter.ConfluentMetricsReporter" + CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: "kafka1:9092" + CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + + kafka2: + image: "confluentinc/cp-enterprise-kafka:5.5.0-1-ubi8" + restart: always + hostname: kafka + container_name: kafka2 + ports: + - "9093:9093" + networks: + - kafka-net + restart: always + environment: + KAFKA_BROKER_ID: 2 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka2:9093 + KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" + KAFKA_DELETE_TOPIC_ENABLE: "true" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_METRIC_REPORTERS: "io.confluent.metrics.reporter.ConfluentMetricsReporter" + CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: "kafka2:9093" + CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + + schema-registry: + hostname: schema-registry + container_name: schema-registry + image: "confluentinc/cp-schema-registry:5.5.0-1-ubi8" + restart: always + ports: + - 8081:8081 + networks: + - kafka-net + restart: always + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka1:9092 + SCHEMA_REGISTRY_LISTENERS: http://schema-registry:8081 + + tools: + image: cnfltraining/training-tools:5.5 + restart: always + hostname: tools + container_name: tools + restart: always + networks: + - kafka-net + volumes: + - .:/root/confluent-streams/labs/using-ksql + working_dir: /root/confluent-streams/labs/using-ksql + command: /bin/bash + tty: true + + notebook: + build: notebook/ + networks: + - kafka-net + ports: + - 8888:8888 + volumes: + - ./:/home/jovyan/work/data + environment: + - GRANT_SUDO=yesd + +networks: + kafka-net: +``` + +### Kafka-Training + +The kafka-training submodule links to a complete kafka training in java. + +Java Exercises 1 to 6 concern this practice. -# Data Engineering: -## Repository for the Data Engineering Course (LTAT.02.007) - -![logout](https://upload.wikimedia.org/wikipedia/en/3/39/Tartu_%C3%9Clikool_logo.svg) - -![logodsg](./attachments/logo_dsg_vettoriale.png) - -## Graph View - -![inline](./attachments/latest.png) - -## Lecturer: [Riccardo Tommasini, PhD](https://riccardotommasini.com) - -### Teaching Assistants: -- [Fabiano Spiga](mailto:), -- [Mohamed Ragab](https://bigdata.cs.ut.ee/mohamed-ragab), -- [Hassan Eldeeb](mailto:hassan.eldeeb@ut.ee) - -### [Home Page](https://courses.cs.ut.ee/2020/dataeng) - -### [Forum](https://piazza.com/ut.ee/fall2020/ltat02007/home) - -### [Moodle](https://moodle.ut.ee/course/view.php?id=10457) - -### [Twitter](https://twitter.com/hashtag/DataEngUT?src=hashtag_click) - -### Acknowledgments - -Special Thanks to Emanuele Della Valle and Marco Brambilla from Politecnico di Milano to letting me "steal" some of their great slides. - -# Lectures - -| Date | Title | Material | Mandatory Reads | Extras | -|-------|--------------------|----------|-----------------|--------| -| 01/09 | Course Intro | [Slides](./Data%20Engineer.md) - [pdf](./pdfs/Data%20Engineer.pdf) slide 45-109) | || -| 03/09 | Data Modeling | [Slides](Data%20Modeling.md) - [pdf](./pdfs/Data%20Modeling.pdf) slide 1-44 | Chp 4 p111-127, Chp 5 p151-156, Chp 6 p199-205 of [3] -| 10/09 | DM for Relational Databases | [Slides](Data%20Modeling.md) - [pdf](./pdfs/Data%20Modeling.pdf) slide 45-109 | Chp 2, 6, and 7 (Normal Forms) of [1] | [Relational Model](https://course.ccs.neu.edu/cs3200sp18s3/ssl/readings/codd.pdf) | -|10/09 | DM for Data Warehouse | [Slides](Data%20Modeling.md) - [pdf](./pdfs/Data%20Modeling.pdf)slide 109-118| [pdf](http://www.kimballgroup.com/wp-content/uploads/2013/08/2013.09-Kimball-Dimensional-Modeling-Techniques11.pdf) [video](http://slideshot.epfl.ch/play/suri_stonebraker)| Chp 2 of [2] | -| 17/09 | DM for Big Data | [Slides](Data%20Modeling%20for%20Big%20Data.md) - [pdf](./pdfs/Data%20Modeling%20Big%20Data.pdf)| Chp 2 of [3], [video](https://www.youtube.com/watch?v=LDW0QWie21s)|[paper](https://www.ics.uci.edu/~cs223/papers/cidr07p15.pdf)| -| 17/09 | Key Value Stores |[Slides]()||| -|| Column Oriented Databases |||| -|| Document Databases |||| -|| Graph Databases |||| -|| Data Engineering Pipelines ||Chp 1 of [3]|| -|| Keynote TBA|||| -|| Streaming Data ||Chp 11 of [3]|| -|| Data Wrangling |||| - -# Practices (Videos Will be Available after Group 2 issue) - -| Date | Title | Material | Reads | Videos | Assignment | Notes | -|----------|-------------|----------|-------|-------|-------|----| -| 07-8/09 | Docker | [Slides](./docker/README.md) - [Lab Branch](https://github.com/DataSystemsGroupUT/dataeng/tree/docker) | |[Video GP1](https://panopto.ut.ee/Panopto/Pages/Viewer.aspx?id=31e77abe-b51e-4a39-8c33-ac30009b7ba6) [Video GP2](https://panopto.ut.ee/Panopto/Pages/Viewer.aspx?id=31e77abe-b51e-4a39-8c33-ac30009b7ba6) || [QA GP2 only](https://docs.google.com/document/d/134YKfqp49-rtAXa0FJO30LJonHVO-PeYLqqeo8DQY9I/) -| 14-15 /09 |Modeling and Querying Relational Data with Postgres|[Slides](https://github.com/DataSystemsGroupUT/dataeng/blob/Homework1/PostgreSQL.pdf)|[Chp 32 of [1]§](https://www.db-book.com/db7/online-chapters-dir/32.pdf) |[Video](https://panopto.ut.ee/Panopto/Pages/Viewer.aspx?id=b3221179-fd3a-4b4e-9a67-ac38008f7fbe)||| -| 21-22 /09 |Modeling and Querying Key Value Data with Redis|[Slides](https://github.com/DataSystemsGroupUT/dataeng/blob/Homework2/REDIS.pdf)||[Video](https://panopto.ut.ee/Panopto/Pages/Viewer.aspx?id=0e659b03-7d3e-4b4c-a0f9-ac3d00f462a7)||| -|28-29/09 |Modeling and Querying Document Data with MongoDB|[Slides](https://github.com/DataSystemsGroupUT/dataeng/blob/Homework3/slides/MongoDB.pdf)||[Video](https://panopto.ut.ee/Panopto/Pages/Viewer.aspx?id=9ffba354-a2a1-4131-967e-ac4400c8f226)||| -|5-6/10 | Modeling and Querying Graph Data with Neo4J|||||| -|| Data Ingestion with Apache Kafka|||||| -|| Apache Airflow Data Pipelines|||||| -|| Stream Processing with Kafka Streams|||||| -|| Stream Processing with KSQL|||||| -||Data Cleansing |||||| -||Augmentation|||||| - -# Extras - -[Contributing](./CONTRIBUTING.md) - -- [ ] Modeling and Querying RDF data: SPARQL -- [ ] Domain Driven Design: a summary -- [ ] Event Sourcing: a summary -- [ ] Data Pipelines with Luigi -- [ ] Data Pipelines with Apachi Nifi -- [ ] Data Processing with Apache Flink - -# Syllabus - -- What is (Big) Data? -- The Role of Data Engineer -- Data Modeling - - Data Replication - - Data Partitioning - - Transactions -- Relational Data -- NoSQL - - Document - - Graph -- Data Warehousing - - Star and Snowflake schemas -- Data Vault -- (Big) Data Pipelines - - Big Data Systems Architectures - - ETL and Data Pipelines - - Best Practices and Anti-Patterns - - Batch vs Streaming Processing -- Data Cleansing -- Data Augumentation - -# Books - -- [1] Database System Concepts 7th Edition Avi Silberschatz Henry F. Korth S. Sudarshan McGraw-Hill ISBN 9780078022159 - - [Table of contents](https://www.db-book.com/db7/toc-dir/toc.pdf) - - [slides](https://www.db-book.com/db7/slides-dir/index.html) -- [2] The Data Warehouse Toolkit - The Definitive Guide to Dimensional Modeling Third Edition Ralph Kimball Margy Ross -- [3] [Designing Data-Intensive Applications - Martin Kleppmann ](https://dataintensive.net/) -- [4] [Designing Event-Driven Systems](https://www.oreilly.com/library/view/designing-event-driven-systems/9781492038252/) - -[[slides/Slides]] diff --git a/Redis.md b/Redis.md deleted file mode 100644 index 695511a..0000000 --- a/Redis.md +++ /dev/null @@ -1,336 +0,0 @@ -# Redis - -![inline](https://upload.wikimedia.org/wikipedia/en/thumb/6/6b/Redis_Logo.svg/1200px-Redis_Logo.svg.png) - -### Redis History -- Written in ANSI C by [Salvatore Sanfilippo](https://twitter.com/antirez) :it: -- Works in most POSIX systems like Linux, BSD and OS X. -- **Linux is the recommended** [^65] -- Redis is a single-threaded server, not designed to benefit from multiple CPU cores. -- Several Redis instances can be launched to scale out on several cores. -- All operations are atomic (no two commands can run at the same time). -- It executes most commands in O(1) complexity and with minimal lines of code. - -[^65]: No official support for Windows, but Microsoft develops and maintains an open source Win-64 port of Redis* - -### What Redis is - -- An advanced [[Key-Value Store]]s, where keys can contain data structures such as strings, hashes, lists, sets, and sorted sets. -- It supports a set of atomic operations on these data types. -- Redis is a different evolution path in the key-value databases where values are complex data types that are closely related to fundamental data structures and are exposed to the programmer as such, without additional abstraction layers. -- Redis Can be used as **Database** [^ 61], a **Caching layer** [^62] or a **Message broker** [^63] - -[^62]: it is fast - -[^61]: it is durable - -[^63]: is not only a key-value store - -### What Redis is NOT - -- Redis is not a replacement for Relational Databases nor Document Stores. -- It might be used complementary to a SQL relational store, and/or NoSQL document store. -- Even when Redis offers configurable mechanisms for persistency, increased persistency will tend to increase latency and decrease throughput. -- Best used for rapidly changing data with a foreseeable database size (should fit mostly in memory). - -### Redis Use Cases - -- Caching -- Counting things -- Blocking queues -- Pub/Sub (service bus) -- MVC Output Cache provider -- Backplane for SignalR -- ASP.NET Session State provider[^64] -- Online user data (shopping cart,…Any real-time, cross-platform, cross-application communication - -[^64]: ASP.NET session state providers comparison: http://www.slideshare.net/devopsguys/best-performing-aspnet-session-state-providers - -### When to consider Redis -- Speed is critical -- More than just key-value pairs -- Dataset can fit in memory -- Dataset is not critical - -### Advantages - -- Performance -- Availability -- Fault-Tolerance -- Scalability (adaptability) -- Portability - -[source](https://redislabs.com/blog/the-proven-redis-performance/) - -![right fit](https://redislabs.com/wp-content/uploads/2014/04/redis_proven_performance_2.png) - - -### Data Model - -[.column] -- Key - - Printable ASCII - -![inline fit](https://3.bp.blogspot.com/-HSQTjkw0djk/WcpnMonIX9I/AAAAAAAAAXY/oxbpvKw8mXkKvuGJxBpSVxherKEy37pSACLcBGAs/s1600/ASCII-Table.png) - -[.column] -- Value - - Primitives - - Strings - - Containers (of strings) - - Hashes - - Lists - - Sets - - Sorted Sets - -### Redis data types - -| Collection|Contains | Read/Write Ability| -|----------|--------|-------------------| -|String|Binary-safe strings (up to 512 MB), Integers or Floating point values, Bitmaps.|Operate on the whole string, parts, increment/decrement the integers and floats, get/set bits by position.| -|Hash|Unordered hash table of keys to string values|Add, fetch, or remove individual ítems by key, fetch the whole hash. -|List|Doubly linked list of strings|Push or pop items from both ends, trim based on offsets, read individual or multiple items, find or remove items by value.| -|Set|Unordered collection of unique strings|Add, fetch, or remove individual items, check membership, intersect, union, difference, fetch random items.| -|Sorted Set|Ordered mapping of string members to floating-point scores, ordered by score|Add, fetch, or remove individual items, fetch items based on score ranges or member value.| -|Geospatial Index|Sorted set implementation using geospatial information as the score|Add, fetch or remove individual items, search by coordinates and radius, calculate distance.| -|HyperLogLog|Probabilistic data structure to count unique things using 12Kb of memory|Add individual or multiple items, get the cardinality. - -### Redis Commands - Strings [Quiz] - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Get/Set strings|SET [key value] GET [key] | ? -|Increment numbers|INCRBY [key increment] |? -|Get Multiple Keys at once|MGET [Key key ...] | ? -|Set Multiple Keys at once|MSET [Key key ...] | ? -|Get String Length|STRNEL [key]| ? -|Update Value and Get old one| GETSET|? - -### Redis Commands - Strings [Answers] - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Get/Set strings|SET [key value] GET [key] | O(1) -|Increment numbers|INCRBY [key increment] | O(1) -|Get Multiple Keys at once|MGET [Key key ...] | O(n) n=#Keys -|Set Multiple Keys at once|MSET [Key key ...] | O(n) n=#Keys -|Get String Length|STRNEL [key]| O(1)| -|Update Value and Get old one| GETSET [Key Value]|O(1)| - - -### Redis Commands - Keys [Quiz] - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Key Removal|DEL [key ...] | ? -|Text Existance|EXISTS [key...] | ? -|Get the type of a key| TYPE [Key] | ? -|Rename a key| RENAME [Key NewKey] | ? - -### Redis Commands - Keys [Answers] - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Key Removal|DEL [key ...] | O(1) -|Text Existance|EXISTS [key...] | O(1) -|Get the type of a key| TYPE [Key] | O(1) -|Rename a key| RENAME [Key NewKey] | O(1) - -### Redis Commands [Answers] - -[.column] - -#### Lists - -|Command|Abstract Syntax|Complexity| -|----|-------------------|----| -|Push on either end|RPUSH/LPUSH [key value] | ? -|Pop from either end|RPOP/LPOP [key] | ? -|Blocking Pop| BRPOP/BLPOP [key value] |? -|Pop and Push to other list| RPOPLPUSH [src dst] |? -|Get an element by index|LINDEX [key index] |? -|Get a range of elements|LRANGE [key start stop] | ? - -[.column] - -#### Hashes -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Set a hashed value|HSET [key field value]| ? -|Set multiple fields| HMSET [key field value ...]| ? -|Get a hashed value| HGET [key vield]|? -|Get all the values in a hash|HGETALL [key] | ? -|Increment a hashed value| HINCRBY [key field incr] |? - - -### Redis Commands [Answers] - -[.column] - -#### Lists - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Push on either end|RPUSH/LPUSH [key value] | O(1) -|Pop from either end|RPOP/LPOP [key] | O(1) -|Blocking Pop| BRPOP/BLPOP [key value] | O(1) -|Pop and Push to other list| RPOPLPUSH [src dst] | O(1) -|Get an element by index|LINDEX [key index] | **O(n)** -|Get a range of elements|LRANGE [key start stop] | **O(n)** - -[.column] - -#### Hashes -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Set a hashed value|HSET [key field value]| O(1) -|Set multiple fields| HMSET [key field value ...]| O(1) -|Get a hashed value| HGET [key vield]|O(1) -|Get all the values in a hash|HGETALL [key] | O(N) : N=size of hash -|Increment a hashed value| HINCRBY [key field incr] | O(1) - -### Redis Commands [Quiz] - -[.column] - -#### Sets - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Add member to a set| SADD [key member] | ? -|Pop random element| SPOP [key] | ? -|Get all elements| SMEMEBRS [Key]| ? -|Union multiple sets| SUNION [Key Key ...]|? -|Diff multiple sets| DIFF [Key key ...]| ? - -[.column] - -#### Sorted Sets - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Add member to a sorted set| ZADD [key member] | ? -|Get rank member| ZRANK [key member] | ? -|Get elements by score range| ZRANGEBYSCORE [key min max] [Key] | ? -|Increment score of member| ZINCRBY [Key incr member]| ? -|remover range by score| ZREMRANGEBYSCORE [key min max]| ? - -### Redis Commands [Answers] - -[.column] - -#### Sets - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Add member to a set| SADD [key member] | O(1) -|Pop random element| SPOP [key] | O(1) -|Get all elements| SMEMEBRS [Key]|O(n) : n=size of set -|Union multiple sets| SUNION [Key Key ... ]| O(n) -|Diff multiple sets| DIFF [Key key ...]|O(n) - -[.column] - -#### Sorted Sets - -|Command|Abstract Syntax|Complexity| -|----|---|----| -|Add member to a sorted set| ZADD [key member] | O(log(n)) -|Get rank member| ZRANK [key member] | O(log(n)) -|Get elements by score range| ZRANGEBYSCORE [key min max] [Key] | O(log(n)) -|Increment score of member| ZINCRBY [Key incr member]| O(log(n)) -|remover range by score| ZREMRANGEBYSCORE [key min max]| O(log(n)) - -### Scaling Redis - -- **Replication** - - A Redis instance, known as the **master** , ensures that one or more instances kwown as the **slaves** become exact copies of the master - - Clients can connect *to the master or to the slaves* - - Slaves are* read-only* by default -- **Partitioning** - - Breaking up data and distributing it across different hosts in a cluster. - - Can be implemented in different layers: - - Client: Partitioning on client-side code - - Proxy: An extra layer that proxies all redis queries and performs partitioning (i.e. Twemproxy ) - - Query Router: instances will make sure to forward the query to the right node. (i.e Redis Cluster ) - -### Scaling Redis - -- **Persistence** - - Redis provides two mechanisms to deal with persistence: Redis database snapshots (RDB) and append-only files (AOF) -- **Failover** - - Manual - - Automatic with Redis Sentinel (for master-slave topology) - - Automatic with Redis Cluster (for cluster topology) - -## Redis topologies - - -![right fit](https://blog.octo.com/wp-content/uploads/2017/08/screen-shot-2017-08-11-at-14-34-30.png) - -- Standalone - -- Sentinel (automatic failover) - -- Twemproxy (distribute data) - -- Cluster (automatic failover and distribute data) - - -### Redis topologies - Standalone - -The master data is optionally replicated to slaves. - -The slaves provides data redundancy, reads offloading and save-to-disk offloading. - -Clients can connect to the Master for read/write operations or to the Slaves for read operations. - -Slaves can also replicate to its own slaves. - -There is no automatic failover. - -Master-slave multi-level - -![right fit](https://blog.octo.com/wp-content/uploads/2017/08/screen-shot-2017-08-11-at-14-35-11.png) - -### Redis topologies - Sentinel - -Redis Sentinel provides a reliable automatic failover in a master/slave topology, automatically promoting a slave to master if the existing master fails. - -Sentinel does not distribute data across nodes. - -Master-slave with Sentinel - -![right fit](https://blog.octo.com/wp-content/uploads/2017/08/screen-shot-2017-08-11-at-14-34-42.png) - - - -### Redis topologies - Cluster - - Redis Cluster distributes data across different Redis instances and perform automatic failover if any problem happens to any master instance. - -All nodes are directly connected with a service channel. - -The keyspace is divided into hash slots. Different nodes will hold a subset of hash slots. - -Multi-key commands are only allowed for keys in the same hash slot. - -![right fit](https://blog.octo.com/wp-content/uploads/2017/08/screen-shot-2017-08-11-at-14-34-48.png) - - diff --git a/Sorted String Table.md b/Sorted String Table.md deleted file mode 100644 index e92246b..0000000 --- a/Sorted String Table.md +++ /dev/null @@ -1,5 +0,0 @@ -Make a simple change to logs: sequence of key-value pairs is sorted by key. - -Merging segments is simple and efficient, even if the files are bigger than the available memory (mergesort algorithm). - -In order to find a particular key in the file, you no longer just need a spare index of the offsets \ No newline at end of file diff --git a/Untitled.md b/Untitled.md deleted file mode 100644 index e69de29..0000000 diff --git a/_config.yml b/_config.yml deleted file mode 100644 index 2f7efbe..0000000 --- a/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-minimal \ No newline at end of file diff --git a/attachments/01a_Data-driven-decisions-006.jpg b/attachments/01a_Data-driven-decisions-006.jpg deleted file mode 100644 index 96fb8cc..0000000 Binary files a/attachments/01a_Data-driven-decisions-006.jpg and /dev/null differ diff --git a/attachments/01a_Data-driven-decisions-010.png b/attachments/01a_Data-driven-decisions-010.png deleted file mode 100644 index 90806b8..0000000 Binary files a/attachments/01a_Data-driven-decisions-010.png and /dev/null differ diff --git a/attachments/01a_Data-driven-decisions-016.png b/attachments/01a_Data-driven-decisions-016.png deleted file mode 100644 index 46df9a1..0000000 Binary files a/attachments/01a_Data-driven-decisions-016.png and /dev/null differ diff --git a/attachments/01a_Data-driven-decisions-022.png b/attachments/01a_Data-driven-decisions-022.png deleted file mode 100644 index bc6d42e..0000000 Binary files a/attachments/01a_Data-driven-decisions-022.png and /dev/null differ diff --git a/attachments/01a_Data-driven-decisions-030.jpg b/attachments/01a_Data-driven-decisions-030.jpg deleted file mode 100644 index 19a8a89..0000000 Binary files a/attachments/01a_Data-driven-decisions-030.jpg and /dev/null differ diff --git a/attachments/03-USDE-graph-db11 1.png b/attachments/03-USDE-graph-db11 1.png deleted file mode 100644 index 5dbe599..0000000 Binary files a/attachments/03-USDE-graph-db11 1.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db11.png b/attachments/03-USDE-graph-db11.png deleted file mode 100644 index 5dbe599..0000000 Binary files a/attachments/03-USDE-graph-db11.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db12 1.png b/attachments/03-USDE-graph-db12 1.png deleted file mode 100644 index 3c41a82..0000000 Binary files a/attachments/03-USDE-graph-db12 1.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db12.png b/attachments/03-USDE-graph-db12.png deleted file mode 100644 index 3c41a82..0000000 Binary files a/attachments/03-USDE-graph-db12.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db13 1.png b/attachments/03-USDE-graph-db13 1.png deleted file mode 100644 index b9f13ac..0000000 Binary files a/attachments/03-USDE-graph-db13 1.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db13.png b/attachments/03-USDE-graph-db13.png deleted file mode 100644 index b9f13ac..0000000 Binary files a/attachments/03-USDE-graph-db13.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db14.png b/attachments/03-USDE-graph-db14.png deleted file mode 100644 index 617b9b5..0000000 Binary files a/attachments/03-USDE-graph-db14.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db15.png b/attachments/03-USDE-graph-db15.png deleted file mode 100644 index 5f701e5..0000000 Binary files a/attachments/03-USDE-graph-db15.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db16.png b/attachments/03-USDE-graph-db16.png deleted file mode 100644 index 5acf2f9..0000000 Binary files a/attachments/03-USDE-graph-db16.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db17.png b/attachments/03-USDE-graph-db17.png deleted file mode 100644 index bdd8218..0000000 Binary files a/attachments/03-USDE-graph-db17.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db18.png b/attachments/03-USDE-graph-db18.png deleted file mode 100644 index b570514..0000000 Binary files a/attachments/03-USDE-graph-db18.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db19.png b/attachments/03-USDE-graph-db19.png deleted file mode 100644 index c3fbd7a..0000000 Binary files a/attachments/03-USDE-graph-db19.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db20.png b/attachments/03-USDE-graph-db20.png deleted file mode 100644 index 133a482..0000000 Binary files a/attachments/03-USDE-graph-db20.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db21.png b/attachments/03-USDE-graph-db21.png deleted file mode 100644 index 30586d5..0000000 Binary files a/attachments/03-USDE-graph-db21.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db22.png b/attachments/03-USDE-graph-db22.png deleted file mode 100644 index 54558a6..0000000 Binary files a/attachments/03-USDE-graph-db22.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db25.png b/attachments/03-USDE-graph-db25.png deleted file mode 100644 index b6894eb..0000000 Binary files a/attachments/03-USDE-graph-db25.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db27.png b/attachments/03-USDE-graph-db27.png deleted file mode 100644 index 1b4dfa3..0000000 Binary files a/attachments/03-USDE-graph-db27.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db28.png b/attachments/03-USDE-graph-db28.png deleted file mode 100644 index f710d41..0000000 Binary files a/attachments/03-USDE-graph-db28.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db29.png b/attachments/03-USDE-graph-db29.png deleted file mode 100644 index 4b4d956..0000000 Binary files a/attachments/03-USDE-graph-db29.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db30.png b/attachments/03-USDE-graph-db30.png deleted file mode 100644 index 08a0fc2..0000000 Binary files a/attachments/03-USDE-graph-db30.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db34.png b/attachments/03-USDE-graph-db34.png deleted file mode 100644 index 08a0fc2..0000000 Binary files a/attachments/03-USDE-graph-db34.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db35.png b/attachments/03-USDE-graph-db35.png deleted file mode 100644 index 80129cf..0000000 Binary files a/attachments/03-USDE-graph-db35.png and /dev/null differ diff --git a/attachments/03-USDE-graph-db36.png b/attachments/03-USDE-graph-db36.png deleted file mode 100644 index 2946253..0000000 Binary files a/attachments/03-USDE-graph-db36.png and /dev/null differ diff --git a/attachments/04-USDE-nosql-db14.png b/attachments/04-USDE-nosql-db14.png deleted file mode 100644 index 01ef876..0000000 Binary files a/attachments/04-USDE-nosql-db14.png and /dev/null differ diff --git a/attachments/1024.jpg b/attachments/1024.jpg deleted file mode 100644 index 09f7972..0000000 Binary files a/attachments/1024.jpg and /dev/null differ diff --git a/attachments/1280px-Arrow_west.svg.png b/attachments/1280px-Arrow_west.svg.png deleted file mode 100644 index a5d9e21..0000000 Binary files a/attachments/1280px-Arrow_west.svg.png and /dev/null differ diff --git a/attachments/17-dataWrangling0.png b/attachments/17-dataWrangling0.png deleted file mode 100644 index 6bc692f..0000000 Binary files a/attachments/17-dataWrangling0.png and /dev/null differ diff --git a/attachments/17-dataWrangling12.png b/attachments/17-dataWrangling12.png deleted file mode 100644 index 787f371..0000000 Binary files a/attachments/17-dataWrangling12.png and /dev/null differ diff --git a/attachments/17-dataWrangling13.png b/attachments/17-dataWrangling13.png deleted file mode 100644 index 928ae03..0000000 Binary files a/attachments/17-dataWrangling13.png and /dev/null differ diff --git a/attachments/17-dataWrangling14.png b/attachments/17-dataWrangling14.png deleted file mode 100644 index 5ee2531..0000000 Binary files a/attachments/17-dataWrangling14.png and /dev/null differ diff --git a/attachments/17-dataWrangling15.png b/attachments/17-dataWrangling15.png deleted file mode 100644 index 01837dc..0000000 Binary files a/attachments/17-dataWrangling15.png and /dev/null differ diff --git a/attachments/17-dataWrangling16.png b/attachments/17-dataWrangling16.png deleted file mode 100644 index 928ae03..0000000 Binary files a/attachments/17-dataWrangling16.png and /dev/null differ diff --git a/attachments/17-dataWrangling17.png b/attachments/17-dataWrangling17.png deleted file mode 100644 index 9a5d34d..0000000 Binary files a/attachments/17-dataWrangling17.png and /dev/null differ diff --git a/attachments/17-dataWrangling18.png b/attachments/17-dataWrangling18.png deleted file mode 100644 index 35754c6..0000000 Binary files a/attachments/17-dataWrangling18.png and /dev/null differ diff --git a/attachments/17-dataWrangling19.png b/attachments/17-dataWrangling19.png deleted file mode 100644 index e4a93eb..0000000 Binary files a/attachments/17-dataWrangling19.png and /dev/null differ diff --git a/attachments/17-dataWrangling20.png b/attachments/17-dataWrangling20.png deleted file mode 100644 index ef53fc8..0000000 Binary files a/attachments/17-dataWrangling20.png and /dev/null differ diff --git a/attachments/17-dataWrangling21.png b/attachments/17-dataWrangling21.png deleted file mode 100644 index 323cc6a..0000000 Binary files a/attachments/17-dataWrangling21.png and /dev/null differ diff --git a/attachments/17-dataWrangling25.png b/attachments/17-dataWrangling25.png deleted file mode 100644 index f649207..0000000 Binary files a/attachments/17-dataWrangling25.png and /dev/null differ diff --git a/attachments/17-dataWrangling26.png b/attachments/17-dataWrangling26.png deleted file mode 100644 index 68b1e88..0000000 Binary files a/attachments/17-dataWrangling26.png and /dev/null differ diff --git a/attachments/17-dataWrangling27.png b/attachments/17-dataWrangling27.png deleted file mode 100644 index 4093e8e..0000000 Binary files a/attachments/17-dataWrangling27.png and /dev/null differ diff --git a/attachments/17-dataWrangling28.png b/attachments/17-dataWrangling28.png deleted file mode 100644 index b452374..0000000 Binary files a/attachments/17-dataWrangling28.png and /dev/null differ diff --git a/attachments/17-dataWrangling29.png b/attachments/17-dataWrangling29.png deleted file mode 100644 index b452374..0000000 Binary files a/attachments/17-dataWrangling29.png and /dev/null differ diff --git a/attachments/17-dataWrangling32.png b/attachments/17-dataWrangling32.png deleted file mode 100644 index c18d2ee..0000000 Binary files a/attachments/17-dataWrangling32.png and /dev/null differ diff --git a/attachments/17-dataWrangling33.png b/attachments/17-dataWrangling33.png deleted file mode 100644 index 03bd12d..0000000 Binary files a/attachments/17-dataWrangling33.png and /dev/null differ diff --git a/attachments/17-dataWrangling34.png b/attachments/17-dataWrangling34.png deleted file mode 100644 index 1577e92..0000000 Binary files a/attachments/17-dataWrangling34.png and /dev/null differ diff --git a/attachments/17-dataWrangling35.png b/attachments/17-dataWrangling35.png deleted file mode 100644 index f682705..0000000 Binary files a/attachments/17-dataWrangling35.png and /dev/null differ diff --git a/attachments/17-dataWrangling36.png b/attachments/17-dataWrangling36.png deleted file mode 100644 index f366596..0000000 Binary files a/attachments/17-dataWrangling36.png and /dev/null differ diff --git a/attachments/17-dataWrangling37.png b/attachments/17-dataWrangling37.png deleted file mode 100644 index 737daac..0000000 Binary files a/attachments/17-dataWrangling37.png and /dev/null differ diff --git a/attachments/17-dataWrangling38.png b/attachments/17-dataWrangling38.png deleted file mode 100644 index ffed64d..0000000 Binary files a/attachments/17-dataWrangling38.png and /dev/null differ diff --git a/attachments/17-dataWrangling39.png b/attachments/17-dataWrangling39.png deleted file mode 100644 index 497d19b..0000000 Binary files a/attachments/17-dataWrangling39.png and /dev/null differ diff --git a/attachments/17-dataWrangling4.png b/attachments/17-dataWrangling4.png deleted file mode 100644 index b600484..0000000 Binary files a/attachments/17-dataWrangling4.png and /dev/null differ diff --git a/attachments/17-dataWrangling40.png b/attachments/17-dataWrangling40.png deleted file mode 100644 index 60b41b1..0000000 Binary files a/attachments/17-dataWrangling40.png and /dev/null differ diff --git a/attachments/17-dataWrangling41.png b/attachments/17-dataWrangling41.png deleted file mode 100644 index 0075682..0000000 Binary files a/attachments/17-dataWrangling41.png and /dev/null differ diff --git a/attachments/17-dataWrangling42.png b/attachments/17-dataWrangling42.png deleted file mode 100644 index 716ca6b..0000000 Binary files a/attachments/17-dataWrangling42.png and /dev/null differ diff --git a/attachments/17-dataWrangling43.png b/attachments/17-dataWrangling43.png deleted file mode 100644 index af1d441..0000000 Binary files a/attachments/17-dataWrangling43.png and /dev/null differ diff --git a/attachments/17-dataWrangling44.png b/attachments/17-dataWrangling44.png deleted file mode 100644 index ca7f22f..0000000 Binary files a/attachments/17-dataWrangling44.png and /dev/null differ diff --git a/attachments/17-dataWrangling45.png b/attachments/17-dataWrangling45.png deleted file mode 100644 index e0d08ab..0000000 Binary files a/attachments/17-dataWrangling45.png and /dev/null differ diff --git a/attachments/17-dataWrangling46.png b/attachments/17-dataWrangling46.png deleted file mode 100644 index c016b05..0000000 Binary files a/attachments/17-dataWrangling46.png and /dev/null differ diff --git a/attachments/17-dataWrangling47.png b/attachments/17-dataWrangling47.png deleted file mode 100644 index 9e5fce5..0000000 Binary files a/attachments/17-dataWrangling47.png and /dev/null differ diff --git a/attachments/17-dataWrangling48.png b/attachments/17-dataWrangling48.png deleted file mode 100644 index f28f6e6..0000000 Binary files a/attachments/17-dataWrangling48.png and /dev/null differ diff --git a/attachments/17-dataWrangling49.png b/attachments/17-dataWrangling49.png deleted file mode 100644 index 197bfe8..0000000 Binary files a/attachments/17-dataWrangling49.png and /dev/null differ diff --git a/attachments/17-dataWrangling50.png b/attachments/17-dataWrangling50.png deleted file mode 100644 index 4501351..0000000 Binary files a/attachments/17-dataWrangling50.png and /dev/null differ diff --git a/attachments/17-dataWrangling51.png b/attachments/17-dataWrangling51.png deleted file mode 100644 index 1abf718..0000000 Binary files a/attachments/17-dataWrangling51.png and /dev/null differ diff --git a/attachments/17-dataWrangling52.png b/attachments/17-dataWrangling52.png deleted file mode 100644 index 0174c48..0000000 Binary files a/attachments/17-dataWrangling52.png and /dev/null differ diff --git a/attachments/17-dataWrangling53.png b/attachments/17-dataWrangling53.png deleted file mode 100644 index 78f254b..0000000 Binary files a/attachments/17-dataWrangling53.png and /dev/null differ diff --git a/attachments/17-dataWrangling54.png b/attachments/17-dataWrangling54.png deleted file mode 100644 index 6232a8a..0000000 Binary files a/attachments/17-dataWrangling54.png and /dev/null differ diff --git a/attachments/17-dataWrangling55.png b/attachments/17-dataWrangling55.png deleted file mode 100644 index 2ab2d16..0000000 Binary files a/attachments/17-dataWrangling55.png and /dev/null differ diff --git a/attachments/17-dataWrangling56.png b/attachments/17-dataWrangling56.png deleted file mode 100644 index 6d991e9..0000000 Binary files a/attachments/17-dataWrangling56.png and /dev/null differ diff --git a/attachments/17-dataWrangling57.png b/attachments/17-dataWrangling57.png deleted file mode 100644 index 03bd12d..0000000 Binary files a/attachments/17-dataWrangling57.png and /dev/null differ diff --git a/attachments/17-dataWrangling59.tiff.md b/attachments/17-dataWrangling59.tiff.md deleted file mode 100644 index e69de29..0000000 diff --git a/attachments/17-dataWrangling60.png b/attachments/17-dataWrangling60.png deleted file mode 100644 index 837b936..0000000 Binary files a/attachments/17-dataWrangling60.png and /dev/null differ diff --git a/attachments/1_62WJpBzEdlsjlc2TtjFf3g.jpg b/attachments/1_62WJpBzEdlsjlc2TtjFf3g.jpg deleted file mode 100644 index d8a9d9f..0000000 Binary files a/attachments/1_62WJpBzEdlsjlc2TtjFf3g.jpg and /dev/null differ diff --git a/attachments/1_kDxyqz9MqDuFb5AyYu53AA.png b/attachments/1_kDxyqz9MqDuFb5AyYu53AA.png deleted file mode 100644 index b8ab8ba..0000000 Binary files a/attachments/1_kDxyqz9MqDuFb5AyYu53AA.png and /dev/null differ diff --git a/attachments/2016-internet-minute-infographic.png b/attachments/2016-internet-minute-infographic.png deleted file mode 100644 index 20a8ee4..0000000 Binary files a/attachments/2016-internet-minute-infographic.png and /dev/null differ diff --git a/attachments/2017-internet-minute-infographic.png b/attachments/2017-internet-minute-infographic.png deleted file mode 100644 index 7d25d16..0000000 Binary files a/attachments/2017-internet-minute-infographic.png and /dev/null differ diff --git a/attachments/2018-internet-minute-infographic.png b/attachments/2018-internet-minute-infographic.png deleted file mode 100644 index b18ff37..0000000 Binary files a/attachments/2018-internet-minute-infographic.png and /dev/null differ diff --git a/attachments/2019-internet-minute-infographic.png b/attachments/2019-internet-minute-infographic.png deleted file mode 100644 index 4924191..0000000 Binary files a/attachments/2019-internet-minute-infographic.png and /dev/null differ diff --git a/attachments/2020-internet-minute-infographic.jpg b/attachments/2020-internet-minute-infographic.jpg deleted file mode 100644 index 213ac25..0000000 Binary files a/attachments/2020-internet-minute-infographic.jpg and /dev/null differ diff --git a/attachments/2setvisual.png b/attachments/2setvisual.png deleted file mode 100644 index 2c0ff68..0000000 Binary files a/attachments/2setvisual.png and /dev/null differ diff --git a/attachments/4doohb.jpg b/attachments/4doohb.jpg deleted file mode 100644 index 591037c..0000000 Binary files a/attachments/4doohb.jpg and /dev/null differ diff --git a/attachments/5e08f341edb7545ceaa16494_672340c374e04c44b8d01a085a93ad5f.png b/attachments/5e08f341edb7545ceaa16494_672340c374e04c44b8d01a085a93ad5f.png deleted file mode 100644 index dc78fb3..0000000 Binary files a/attachments/5e08f341edb7545ceaa16494_672340c374e04c44b8d01a085a93ad5f.png and /dev/null differ diff --git a/attachments/6d2.png b/attachments/6d2.png deleted file mode 100644 index c9ba94b..0000000 Binary files a/attachments/6d2.png and /dev/null differ diff --git a/attachments/9909953816_e8cecebfc3.jpg b/attachments/9909953816_e8cecebfc3.jpg deleted file mode 100644 index a9c763a..0000000 Binary files a/attachments/9909953816_e8cecebfc3.jpg and /dev/null differ diff --git a/attachments/AdjacencyList.png b/attachments/AdjacencyList.png deleted file mode 100755 index b972839..0000000 Binary files a/attachments/AdjacencyList.png and /dev/null differ diff --git a/attachments/AdjacencyMaterix.png b/attachments/AdjacencyMaterix.png deleted file mode 100755 index 9e5f73f..0000000 Binary files a/attachments/AdjacencyMaterix.png and /dev/null differ diff --git a/attachments/Agenda.jpg b/attachments/Agenda.jpg deleted file mode 100755 index c4b8b0f..0000000 Binary files a/attachments/Agenda.jpg and /dev/null differ diff --git a/attachments/AppendOutputMode.png b/attachments/AppendOutputMode.png deleted file mode 100755 index e1096d5..0000000 Binary files a/attachments/AppendOutputMode.png and /dev/null differ diff --git a/attachments/ArrowDown.png b/attachments/ArrowDown.png deleted file mode 100755 index 260e8de..0000000 Binary files a/attachments/ArrowDown.png and /dev/null differ diff --git a/attachments/Better Faster Cheaper.png b/attachments/Better Faster Cheaper.png deleted file mode 100644 index 5f345ff..0000000 Binary files a/attachments/Better Faster Cheaper.png and /dev/null differ diff --git a/attachments/CompleteOutputMode.png b/attachments/CompleteOutputMode.png deleted file mode 100755 index 0410627..0000000 Binary files a/attachments/CompleteOutputMode.png and /dev/null differ diff --git a/attachments/Consumers.png b/attachments/Consumers.png deleted file mode 100755 index 296a40e..0000000 Binary files a/attachments/Consumers.png and /dev/null differ diff --git a/attachments/CreateStream.png b/attachments/CreateStream.png deleted file mode 100755 index 91dba11..0000000 Binary files a/attachments/CreateStream.png and /dev/null differ diff --git a/attachments/Creating-Custom-Graph-Views-Over-your-RDF-Data_without-taaext.jpg b/attachments/Creating-Custom-Graph-Views-Over-your-RDF-Data_without-taaext.jpg deleted file mode 100755 index 98e53c5..0000000 Binary files a/attachments/Creating-Custom-Graph-Views-Over-your-RDF-Data_without-taaext.jpg and /dev/null differ diff --git a/attachments/DS-roles.png b/attachments/DS-roles.png deleted file mode 100644 index da40cc8..0000000 Binary files a/attachments/DS-roles.png and /dev/null differ diff --git a/attachments/Data_Modeling_for_Big_Data.pdf b/attachments/Data_Modeling_for_Big_Data.pdf deleted file mode 100644 index 282c33f..0000000 Binary files a/attachments/Data_Modeling_for_Big_Data.pdf and /dev/null differ diff --git a/attachments/Distributed-Data-Processing-min-1.jpg b/attachments/Distributed-Data-Processing-min-1.jpg deleted file mode 100644 index 9adada6..0000000 Binary files a/attachments/Distributed-Data-Processing-min-1.jpg and /dev/null differ diff --git a/attachments/Dream.png b/attachments/Dream.png deleted file mode 100755 index 60ede63..0000000 Binary files a/attachments/Dream.png and /dev/null differ diff --git a/attachments/FlinkEcoSystem.png b/attachments/FlinkEcoSystem.png deleted file mode 100755 index 8978338..0000000 Binary files a/attachments/FlinkEcoSystem.png and /dev/null differ diff --git a/attachments/FlinkWatermarksAndHandlingLateData.png b/attachments/FlinkWatermarksAndHandlingLateData.png deleted file mode 100755 index 86ef041..0000000 Binary files a/attachments/FlinkWatermarksAndHandlingLateData.png and /dev/null differ diff --git a/attachments/GAS.png b/attachments/GAS.png deleted file mode 100755 index df7295a..0000000 Binary files a/attachments/GAS.png and /dev/null differ diff --git a/attachments/GSPARQL.png b/attachments/GSPARQL.png deleted file mode 100755 index aa6e60b..0000000 Binary files a/attachments/GSPARQL.png and /dev/null differ diff --git a/attachments/Giraph-logo-standard.png b/attachments/Giraph-logo-standard.png deleted file mode 100755 index 2c55538..0000000 Binary files a/attachments/Giraph-logo-standard.png and /dev/null differ diff --git a/attachments/Grafy.jpg b/attachments/Grafy.jpg deleted file mode 100755 index 5179a6c..0000000 Binary files a/attachments/Grafy.jpg and /dev/null differ diff --git a/attachments/Graph Processing Frameworks Programming Models.png b/attachments/Graph Processing Frameworks Programming Models.png deleted file mode 100755 index 78e5225..0000000 Binary files a/attachments/Graph Processing Frameworks Programming Models.png and /dev/null differ diff --git a/attachments/Graph-Chat-Blog.jpg b/attachments/Graph-Chat-Blog.jpg deleted file mode 100755 index fa0920f..0000000 Binary files a/attachments/Graph-Chat-Blog.jpg and /dev/null differ diff --git a/attachments/GraphAnalyticsCategories.png b/attachments/GraphAnalyticsCategories.png deleted file mode 100755 index f51015f..0000000 Binary files a/attachments/GraphAnalyticsCategories.png and /dev/null differ diff --git a/attachments/GraphChiArch.png b/attachments/GraphChiArch.png deleted file mode 100755 index 3790ba4..0000000 Binary files a/attachments/GraphChiArch.png and /dev/null differ diff --git a/attachments/GraphDatabsesSpace.png b/attachments/GraphDatabsesSpace.png deleted file mode 100755 index 592032b..0000000 Binary files a/attachments/GraphDatabsesSpace.png and /dev/null differ diff --git a/attachments/GraphPartitioning.jpg b/attachments/GraphPartitioning.jpg deleted file mode 100755 index 58f59a5..0000000 Binary files a/attachments/GraphPartitioning.jpg and /dev/null differ diff --git a/attachments/GraphProcessingCommunicationmodels.png b/attachments/GraphProcessingCommunicationmodels.png deleted file mode 100755 index bfb15f7..0000000 Binary files a/attachments/GraphProcessingCommunicationmodels.png and /dev/null differ diff --git a/attachments/GraphXgraph.png b/attachments/GraphXgraph.png deleted file mode 100755 index a6b9193..0000000 Binary files a/attachments/GraphXgraph.png and /dev/null differ diff --git a/attachments/HDFS.png b/attachments/HDFS.png deleted file mode 100755 index c0caf34..0000000 Binary files a/attachments/HDFS.png and /dev/null differ diff --git a/attachments/Images/AdaptiveWatermark.png b/attachments/Images/AdaptiveWatermark.png deleted file mode 100755 index debf864..0000000 Binary files a/attachments/Images/AdaptiveWatermark.png and /dev/null differ diff --git a/attachments/Images/AggAndWindowsOnStreams1.png b/attachments/Images/AggAndWindowsOnStreams1.png deleted file mode 100755 index 9940ae8..0000000 Binary files a/attachments/Images/AggAndWindowsOnStreams1.png and /dev/null differ diff --git a/attachments/Images/AggAndWindowsOnStreams2.png b/attachments/Images/AggAndWindowsOnStreams2.png deleted file mode 100755 index e2923f7..0000000 Binary files a/attachments/Images/AggAndWindowsOnStreams2.png and /dev/null differ diff --git a/attachments/Images/Algo1.png b/attachments/Images/Algo1.png deleted file mode 100755 index 2a8e7dd..0000000 Binary files a/attachments/Images/Algo1.png and /dev/null differ diff --git a/attachments/Images/Algo2.png b/attachments/Images/Algo2.png deleted file mode 100755 index a9402c7..0000000 Binary files a/attachments/Images/Algo2.png and /dev/null differ diff --git a/attachments/Images/AppendOutputMode.png b/attachments/Images/AppendOutputMode.png deleted file mode 100755 index e1096d5..0000000 Binary files a/attachments/Images/AppendOutputMode.png and /dev/null differ diff --git a/attachments/Images/Architecture.png b/attachments/Images/Architecture.png deleted file mode 100755 index 8044703..0000000 Binary files a/attachments/Images/Architecture.png and /dev/null differ diff --git a/attachments/Images/ArrowDown.png b/attachments/Images/ArrowDown.png deleted file mode 100755 index 260e8de..0000000 Binary files a/attachments/Images/ArrowDown.png and /dev/null differ diff --git a/attachments/Images/AsyncSnapshot.png b/attachments/Images/AsyncSnapshot.png deleted file mode 100755 index 1b7b398..0000000 Binary files a/attachments/Images/AsyncSnapshot.png and /dev/null differ diff --git a/attachments/Images/Batching1.png b/attachments/Images/Batching1.png deleted file mode 100755 index 9582712..0000000 Binary files a/attachments/Images/Batching1.png and /dev/null differ diff --git a/attachments/Images/CEP.png b/attachments/Images/CEP.png deleted file mode 100755 index 1b5ea03..0000000 Binary files a/attachments/Images/CEP.png and /dev/null differ diff --git a/attachments/Images/CEPExample.png b/attachments/Images/CEPExample.png deleted file mode 100755 index b9c4587..0000000 Binary files a/attachments/Images/CEPExample.png and /dev/null differ diff --git a/attachments/Images/Calcite.png b/attachments/Images/Calcite.png deleted file mode 100755 index 8bf8cec..0000000 Binary files a/attachments/Images/Calcite.png and /dev/null differ diff --git a/attachments/Images/Cars.png b/attachments/Images/Cars.png deleted file mode 100755 index 42d0557..0000000 Binary files a/attachments/Images/Cars.png and /dev/null differ diff --git a/attachments/Images/CompleteOutputMode.png b/attachments/Images/CompleteOutputMode.png deleted file mode 100755 index 0410627..0000000 Binary files a/attachments/Images/CompleteOutputMode.png and /dev/null differ diff --git a/attachments/Images/Consumers.png b/attachments/Images/Consumers.png deleted file mode 100755 index 296a40e..0000000 Binary files a/attachments/Images/Consumers.png and /dev/null differ diff --git a/attachments/Images/Contiguity.png b/attachments/Images/Contiguity.png deleted file mode 100755 index e006492..0000000 Binary files a/attachments/Images/Contiguity.png and /dev/null differ diff --git a/attachments/Images/CreateStream.png b/attachments/Images/CreateStream.png deleted file mode 100755 index 91dba11..0000000 Binary files a/attachments/Images/CreateStream.png and /dev/null differ diff --git a/attachments/Images/EventTimeVsProcessingTime.png b/attachments/Images/EventTimeVsProcessingTime.png deleted file mode 100755 index 5a67b71..0000000 Binary files a/attachments/Images/EventTimeVsProcessingTime.png and /dev/null differ diff --git a/attachments/Images/EventTimeWindows.png b/attachments/Images/EventTimeWindows.png deleted file mode 100755 index 96d42cd..0000000 Binary files a/attachments/Images/EventTimeWindows.png and /dev/null differ diff --git a/attachments/Images/ExampleWindowAggregation.png b/attachments/Images/ExampleWindowAggregation.png deleted file mode 100755 index 0d60318..0000000 Binary files a/attachments/Images/ExampleWindowAggregation.png and /dev/null differ diff --git a/attachments/Images/Flink.png b/attachments/Images/Flink.png deleted file mode 100755 index 8952b88..0000000 Binary files a/attachments/Images/Flink.png and /dev/null differ diff --git a/attachments/Images/FlinkEcoSystem.png b/attachments/Images/FlinkEcoSystem.png deleted file mode 100755 index 8978338..0000000 Binary files a/attachments/Images/FlinkEcoSystem.png and /dev/null differ diff --git a/attachments/Images/FlinkWatermarksAndHandlingLateData.png b/attachments/Images/FlinkWatermarksAndHandlingLateData.png deleted file mode 100755 index 86ef041..0000000 Binary files a/attachments/Images/FlinkWatermarksAndHandlingLateData.png and /dev/null differ diff --git a/attachments/Images/FollowBy.png b/attachments/Images/FollowBy.png deleted file mode 100755 index c3d1aa0..0000000 Binary files a/attachments/Images/FollowBy.png and /dev/null differ diff --git a/attachments/Images/Fusion 2.png b/attachments/Images/Fusion 2.png deleted file mode 100755 index f3a4fbb..0000000 Binary files a/attachments/Images/Fusion 2.png and /dev/null differ diff --git a/attachments/Images/Fusion.png b/attachments/Images/Fusion.png deleted file mode 100755 index f3a4fbb..0000000 Binary files a/attachments/Images/Fusion.png and /dev/null differ diff --git a/attachments/Images/IMG_1845.jpg b/attachments/Images/IMG_1845.jpg deleted file mode 100644 index dc6e06a..0000000 Binary files a/attachments/Images/IMG_1845.jpg and /dev/null differ diff --git a/attachments/Images/JoinStreamToAStream.png b/attachments/Images/JoinStreamToAStream.png deleted file mode 100755 index cc7bc83..0000000 Binary files a/attachments/Images/JoinStreamToAStream.png and /dev/null differ diff --git a/attachments/Images/JoinStreamToATable.png b/attachments/Images/JoinStreamToATable.png deleted file mode 100755 index d367554..0000000 Binary files a/attachments/Images/JoinStreamToATable.png and /dev/null differ diff --git a/attachments/Images/KSQL.png b/attachments/Images/KSQL.png deleted file mode 100644 index a52cc48..0000000 Binary files a/attachments/Images/KSQL.png and /dev/null differ diff --git a/attachments/Images/KafkaArchitecture.png b/attachments/Images/KafkaArchitecture.png deleted file mode 100755 index 1655cf9..0000000 Binary files a/attachments/Images/KafkaArchitecture.png and /dev/null differ diff --git a/attachments/Images/KafkaStatefulProcessing.png b/attachments/Images/KafkaStatefulProcessing.png deleted file mode 100755 index 619459e..0000000 Binary files a/attachments/Images/KafkaStatefulProcessing.png and /dev/null differ diff --git a/attachments/Images/KafkaStreamExample1.png b/attachments/Images/KafkaStreamExample1.png deleted file mode 100755 index 9de559b..0000000 Binary files a/attachments/Images/KafkaStreamExample1.png and /dev/null differ diff --git a/attachments/Images/KafkaStreamExample2.png b/attachments/Images/KafkaStreamExample2.png deleted file mode 100755 index 9d19976..0000000 Binary files a/attachments/Images/KafkaStreamExample2.png and /dev/null differ diff --git a/attachments/Images/KafkaStreamExample3.png b/attachments/Images/KafkaStreamExample3.png deleted file mode 100755 index 8937e1d..0000000 Binary files a/attachments/Images/KafkaStreamExample3.png and /dev/null differ diff --git a/attachments/Images/KafkaStreamingInternally.png b/attachments/Images/KafkaStreamingInternally.png deleted file mode 100755 index 9dd0412..0000000 Binary files a/attachments/Images/KafkaStreamingInternally.png and /dev/null differ diff --git a/attachments/Images/KafkaStreamingLibrary.png b/attachments/Images/KafkaStreamingLibrary.png deleted file mode 100755 index b91c1c0..0000000 Binary files a/attachments/Images/KafkaStreamingLibrary.png and /dev/null differ diff --git a/attachments/Images/KafkaStreamsDSL.png b/attachments/Images/KafkaStreamsDSL.png deleted file mode 100755 index c5fc726..0000000 Binary files a/attachments/Images/KafkaStreamsDSL.png and /dev/null differ diff --git a/attachments/Images/KafkaTopics.png b/attachments/Images/KafkaTopics.png deleted file mode 100755 index 50e82db..0000000 Binary files a/attachments/Images/KafkaTopics.png and /dev/null differ diff --git a/attachments/Images/Kappa-Architecture.pdf b/attachments/Images/Kappa-Architecture.pdf deleted file mode 100755 index db56b91..0000000 Binary files a/attachments/Images/Kappa-Architecture.pdf and /dev/null differ diff --git a/attachments/Images/Lambda-Architecture.pdf b/attachments/Images/Lambda-Architecture.pdf deleted file mode 100755 index c2eb699..0000000 Binary files a/attachments/Images/Lambda-Architecture.pdf and /dev/null differ diff --git a/attachments/Images/LoadBalancing.png b/attachments/Images/LoadBalancing.png deleted file mode 100755 index bc87d57..0000000 Binary files a/attachments/Images/LoadBalancing.png and /dev/null differ diff --git a/attachments/Images/MakingProgress.png b/attachments/Images/MakingProgress.png deleted file mode 100755 index 48c4764..0000000 Binary files a/attachments/Images/MakingProgress.png and /dev/null differ diff --git a/attachments/Images/MichaelGrey.png b/attachments/Images/MichaelGrey.png deleted file mode 100755 index 3ad5372..0000000 Binary files a/attachments/Images/MichaelGrey.png and /dev/null differ diff --git a/attachments/Images/MiniBatches.png b/attachments/Images/MiniBatches.png deleted file mode 100755 index e5a9700..0000000 Binary files a/attachments/Images/MiniBatches.png and /dev/null differ diff --git a/attachments/Images/NaiveSnapshot.png b/attachments/Images/NaiveSnapshot.png deleted file mode 100755 index 4b14a0d..0000000 Binary files a/attachments/Images/NaiveSnapshot.png and /dev/null differ diff --git a/attachments/Images/NativeStreamingArchitecture.png b/attachments/Images/NativeStreamingArchitecture.png deleted file mode 100755 index 2d42e1e..0000000 Binary files a/attachments/Images/NativeStreamingArchitecture.png and /dev/null differ diff --git a/attachments/Images/OperatorSeparation.png b/attachments/Images/OperatorSeparation.png deleted file mode 100755 index 754ee69..0000000 Binary files a/attachments/Images/OperatorSeparation.png and /dev/null differ diff --git a/attachments/Images/OperatorSeparation2.png b/attachments/Images/OperatorSeparation2.png deleted file mode 100755 index 908aa45..0000000 Binary files a/attachments/Images/OperatorSeparation2.png and /dev/null differ diff --git a/attachments/Images/OperatorSeparation3.png b/attachments/Images/OperatorSeparation3.png deleted file mode 100755 index 589a719..0000000 Binary files a/attachments/Images/OperatorSeparation3.png and /dev/null differ diff --git a/attachments/Images/PAtternDefinition.png b/attachments/Images/PAtternDefinition.png deleted file mode 100755 index 1bde819..0000000 Binary files a/attachments/Images/PAtternDefinition.png and /dev/null differ diff --git a/attachments/Images/Placement.png b/attachments/Images/Placement.png deleted file mode 100755 index 03bcc45..0000000 Binary files a/attachments/Images/Placement.png and /dev/null differ diff --git a/attachments/Images/ProcessingTimeWindows.png b/attachments/Images/ProcessingTimeWindows.png deleted file mode 100755 index f409d9d..0000000 Binary files a/attachments/Images/ProcessingTimeWindows.png and /dev/null differ diff --git a/attachments/Images/ProcessorLegend.png b/attachments/Images/ProcessorLegend.png deleted file mode 100755 index 6e4a159..0000000 Binary files a/attachments/Images/ProcessorLegend.png and /dev/null differ diff --git a/attachments/Images/ProcessorTopology.png b/attachments/Images/ProcessorTopology.png deleted file mode 100755 index c592357..0000000 Binary files a/attachments/Images/ProcessorTopology.png and /dev/null differ diff --git a/attachments/Images/ProgrammingWithStreams.png b/attachments/Images/ProgrammingWithStreams.png deleted file mode 100755 index 820e252..0000000 Binary files a/attachments/Images/ProgrammingWithStreams.png and /dev/null differ diff --git a/attachments/Images/Questions.jpg b/attachments/Images/Questions.jpg deleted file mode 100755 index b31f196..0000000 Binary files a/attachments/Images/Questions.jpg and /dev/null differ diff --git a/attachments/Images/ReorderEliminate1.png b/attachments/Images/ReorderEliminate1.png deleted file mode 100755 index de930ae..0000000 Binary files a/attachments/Images/ReorderEliminate1.png and /dev/null differ diff --git a/attachments/Images/ReorderEliminate2.png b/attachments/Images/ReorderEliminate2.png deleted file mode 100755 index 156d7e4..0000000 Binary files a/attachments/Images/ReorderEliminate2.png and /dev/null differ diff --git a/attachments/Images/Resources.pptx b/attachments/Images/Resources.pptx deleted file mode 100755 index 5b66209..0000000 Binary files a/attachments/Images/Resources.pptx and /dev/null differ diff --git a/attachments/Images/SimpleQueries.png b/attachments/Images/SimpleQueries.png deleted file mode 100755 index 1ff6598..0000000 Binary files a/attachments/Images/SimpleQueries.png and /dev/null differ diff --git a/attachments/Images/StateSharing.png b/attachments/Images/StateSharing.png deleted file mode 100755 index 9c46060..0000000 Binary files a/attachments/Images/StateSharing.png and /dev/null differ diff --git a/attachments/Images/StreamProcessingViews.png b/attachments/Images/StreamProcessingViews.png deleted file mode 100755 index 903e037..0000000 Binary files a/attachments/Images/StreamProcessingViews.png and /dev/null differ diff --git a/attachments/Images/StreamTable.png b/attachments/Images/StreamTable.png deleted file mode 100755 index 54aef7d..0000000 Binary files a/attachments/Images/StreamTable.png and /dev/null differ diff --git a/attachments/Images/StreamToRelation.png b/attachments/Images/StreamToRelation.png deleted file mode 100644 index 482c8d4..0000000 Binary files a/attachments/Images/StreamToRelation.png and /dev/null differ diff --git a/attachments/Images/StreamWindowing.png b/attachments/Images/StreamWindowing.png deleted file mode 100755 index 2741778..0000000 Binary files a/attachments/Images/StreamWindowing.png and /dev/null differ diff --git a/attachments/Images/StreamingSystemsOverview.png b/attachments/Images/StreamingSystemsOverview.png deleted file mode 100755 index c810268..0000000 Binary files a/attachments/Images/StreamingSystemsOverview.png and /dev/null differ diff --git a/attachments/Images/StreamsAsUnboundedTables.png b/attachments/Images/StreamsAsUnboundedTables.png deleted file mode 100755 index 0d9d649..0000000 Binary files a/attachments/Images/StreamsAsUnboundedTables.png and /dev/null differ diff --git a/attachments/Images/Thankyou.jpeg b/attachments/Images/Thankyou.jpeg deleted file mode 100755 index b61f017..0000000 Binary files a/attachments/Images/Thankyou.jpeg and /dev/null differ diff --git a/attachments/Images/UpdateOutputMode.png b/attachments/Images/UpdateOutputMode.png deleted file mode 100755 index 8bf3d38..0000000 Binary files a/attachments/Images/UpdateOutputMode.png and /dev/null differ diff --git a/attachments/Images/WatermarksAndHandlingLateData.png b/attachments/Images/WatermarksAndHandlingLateData.png deleted file mode 100755 index 2337962..0000000 Binary files a/attachments/Images/WatermarksAndHandlingLateData.png and /dev/null differ diff --git a/attachments/Images/WhyKafka.png b/attachments/Images/WhyKafka.png deleted file mode 100755 index 0ec008c..0000000 Binary files a/attachments/Images/WhyKafka.png and /dev/null differ diff --git a/attachments/Images/WhyKafka2.png b/attachments/Images/WhyKafka2.png deleted file mode 100755 index 4207e4f..0000000 Binary files a/attachments/Images/WhyKafka2.png and /dev/null differ diff --git a/attachments/Images/WhyKafka3.png b/attachments/Images/WhyKafka3.png deleted file mode 100755 index b31d422..0000000 Binary files a/attachments/Images/WhyKafka3.png and /dev/null differ diff --git a/attachments/Images/WhyKafka4.png b/attachments/Images/WhyKafka4.png deleted file mode 100755 index 4418589..0000000 Binary files a/attachments/Images/WhyKafka4.png and /dev/null differ diff --git a/attachments/Images/WhyKafka5.png b/attachments/Images/WhyKafka5.png deleted file mode 100644 index 19470cb..0000000 Binary files a/attachments/Images/WhyKafka5.png and /dev/null differ diff --git a/attachments/Images/WindowFunctions.png b/attachments/Images/WindowFunctions.png deleted file mode 100755 index 7687919..0000000 Binary files a/attachments/Images/WindowFunctions.png and /dev/null differ diff --git a/attachments/Images/actors.png b/attachments/Images/actors.png deleted file mode 100644 index 10082ed..0000000 Binary files a/attachments/Images/actors.png and /dev/null differ diff --git a/attachments/Images/asp.png b/attachments/Images/asp.png deleted file mode 100644 index b63a29d..0000000 Binary files a/attachments/Images/asp.png and /dev/null differ diff --git a/attachments/Images/asp2.png b/attachments/Images/asp2.png deleted file mode 100644 index a181e3a..0000000 Binary files a/attachments/Images/asp2.png and /dev/null differ diff --git a/attachments/Images/buzmeme.png b/attachments/Images/buzmeme.png deleted file mode 100644 index 95e210d..0000000 Binary files a/attachments/Images/buzmeme.png and /dev/null differ diff --git a/attachments/Images/commitlog.pdf b/attachments/Images/commitlog.pdf deleted file mode 100644 index ef58be1..0000000 Binary files a/attachments/Images/commitlog.pdf and /dev/null differ diff --git a/attachments/Images/commitlog2.pdf b/attachments/Images/commitlog2.pdf deleted file mode 100644 index 413b00a..0000000 Binary files a/attachments/Images/commitlog2.pdf and /dev/null differ diff --git a/attachments/Images/compaction.pdf b/attachments/Images/compaction.pdf deleted file mode 100644 index 9465dc2..0000000 Binary files a/attachments/Images/compaction.pdf and /dev/null differ diff --git a/attachments/Images/compafter.png b/attachments/Images/compafter.png deleted file mode 100644 index 4260aae..0000000 Binary files a/attachments/Images/compafter.png and /dev/null differ diff --git a/attachments/Images/compbefore.png b/attachments/Images/compbefore.png deleted file mode 100644 index d4faec4..0000000 Binary files a/attachments/Images/compbefore.png and /dev/null differ diff --git a/attachments/Images/consumergroup.pdf b/attachments/Images/consumergroup.pdf deleted file mode 100644 index 733ccf9..0000000 Binary files a/attachments/Images/consumergroup.pdf and /dev/null differ diff --git a/attachments/Images/declarative0.pdf b/attachments/Images/declarative0.pdf deleted file mode 100644 index 906a4d6..0000000 Binary files a/attachments/Images/declarative0.pdf and /dev/null differ diff --git a/attachments/Images/declarative1.pdf b/attachments/Images/declarative1.pdf deleted file mode 100644 index f3ceb50..0000000 Binary files a/attachments/Images/declarative1.pdf and /dev/null differ diff --git a/attachments/Images/declarative2.pdf b/attachments/Images/declarative2.pdf deleted file mode 100644 index fe15cdf..0000000 Binary files a/attachments/Images/declarative2.pdf and /dev/null differ diff --git a/attachments/Images/errors.png b/attachments/Images/errors.png deleted file mode 100644 index 2c04720..0000000 Binary files a/attachments/Images/errors.png and /dev/null differ diff --git a/attachments/Images/esp.png b/attachments/Images/esp.png deleted file mode 100644 index 165add0..0000000 Binary files a/attachments/Images/esp.png and /dev/null differ diff --git a/attachments/Images/eventitme1.png b/attachments/Images/eventitme1.png deleted file mode 100644 index 4afeae7..0000000 Binary files a/attachments/Images/eventitme1.png and /dev/null differ diff --git a/attachments/Images/flyby 2.png b/attachments/Images/flyby 2.png deleted file mode 100644 index 66ab7a0..0000000 Binary files a/attachments/Images/flyby 2.png and /dev/null differ diff --git a/attachments/Images/flyby.png b/attachments/Images/flyby.png deleted file mode 100644 index 66ab7a0..0000000 Binary files a/attachments/Images/flyby.png and /dev/null differ diff --git a/attachments/Images/flyby2.pdf b/attachments/Images/flyby2.pdf deleted file mode 100644 index 4d9a055..0000000 Binary files a/attachments/Images/flyby2.pdf and /dev/null differ diff --git a/attachments/Images/ibm-velocity.pdf b/attachments/Images/ibm-velocity.pdf deleted file mode 100644 index 85307c8..0000000 Binary files a/attachments/Images/ibm-velocity.pdf and /dev/null differ diff --git a/attachments/Images/ifp.pdf b/attachments/Images/ifp.pdf deleted file mode 100755 index 1f4dc43..0000000 Binary files a/attachments/Images/ifp.pdf and /dev/null differ diff --git a/attachments/Images/img0000.png b/attachments/Images/img0000.png deleted file mode 100755 index 37f8b26..0000000 Binary files a/attachments/Images/img0000.png and /dev/null differ diff --git a/attachments/Images/img0001.png b/attachments/Images/img0001.png deleted file mode 100755 index 0878112..0000000 Binary files a/attachments/Images/img0001.png and /dev/null differ diff --git a/attachments/Images/img0002.png b/attachments/Images/img0002.png deleted file mode 100755 index 410d62c..0000000 Binary files a/attachments/Images/img0002.png and /dev/null differ diff --git a/attachments/Images/img0003.png b/attachments/Images/img0003.png deleted file mode 100755 index bb3ac71..0000000 Binary files a/attachments/Images/img0003.png and /dev/null differ diff --git a/attachments/Images/img0004.png b/attachments/Images/img0004.png deleted file mode 100755 index 1264896..0000000 Binary files a/attachments/Images/img0004.png and /dev/null differ diff --git a/attachments/Images/img0005.png b/attachments/Images/img0005.png deleted file mode 100755 index a3e8237..0000000 Binary files a/attachments/Images/img0005.png and /dev/null differ diff --git a/attachments/Images/img0006.png b/attachments/Images/img0006.png deleted file mode 100755 index 7b29147..0000000 Binary files a/attachments/Images/img0006.png and /dev/null differ diff --git a/attachments/Images/img0007.png b/attachments/Images/img0007.png deleted file mode 100755 index 4c79464..0000000 Binary files a/attachments/Images/img0007.png and /dev/null differ diff --git a/attachments/Images/img0008.png b/attachments/Images/img0008.png deleted file mode 100755 index 993f7da..0000000 Binary files a/attachments/Images/img0008.png and /dev/null differ diff --git a/attachments/Images/img0009.png b/attachments/Images/img0009.png deleted file mode 100755 index 077d15b..0000000 Binary files a/attachments/Images/img0009.png and /dev/null differ diff --git a/attachments/Images/img0010.png b/attachments/Images/img0010.png deleted file mode 100755 index d496a8b..0000000 Binary files a/attachments/Images/img0010.png and /dev/null differ diff --git a/attachments/Images/img0012.png b/attachments/Images/img0012.png deleted file mode 100755 index 980b0de..0000000 Binary files a/attachments/Images/img0012.png and /dev/null differ diff --git a/attachments/Images/img0013.png b/attachments/Images/img0013.png deleted file mode 100755 index bc751c4..0000000 Binary files a/attachments/Images/img0013.png and /dev/null differ diff --git a/attachments/Images/img0014.png b/attachments/Images/img0014.png deleted file mode 100755 index b4b30e8..0000000 Binary files a/attachments/Images/img0014.png and /dev/null differ diff --git a/attachments/Images/img0015.png b/attachments/Images/img0015.png deleted file mode 100755 index 7cde08f..0000000 Binary files a/attachments/Images/img0015.png and /dev/null differ diff --git a/attachments/Images/img0016.png b/attachments/Images/img0016.png deleted file mode 100755 index 352dd00..0000000 Binary files a/attachments/Images/img0016.png and /dev/null differ diff --git a/attachments/Images/img0017.png b/attachments/Images/img0017.png deleted file mode 100755 index f930b40..0000000 Binary files a/attachments/Images/img0017.png and /dev/null differ diff --git a/attachments/Images/img0018.png b/attachments/Images/img0018.png deleted file mode 100755 index 3c6f2fa..0000000 Binary files a/attachments/Images/img0018.png and /dev/null differ diff --git a/attachments/Images/img0019.png b/attachments/Images/img0019.png deleted file mode 100755 index 731faaa..0000000 Binary files a/attachments/Images/img0019.png and /dev/null differ diff --git a/attachments/Images/img0020.png b/attachments/Images/img0020.png deleted file mode 100755 index 1808a17..0000000 Binary files a/attachments/Images/img0020.png and /dev/null differ diff --git a/attachments/Images/img0021.png b/attachments/Images/img0021.png deleted file mode 100755 index 18e55d4..0000000 Binary files a/attachments/Images/img0021.png and /dev/null differ diff --git a/attachments/Images/img0022.png b/attachments/Images/img0022.png deleted file mode 100755 index 1b6ef22..0000000 Binary files a/attachments/Images/img0022.png and /dev/null differ diff --git a/attachments/Images/img0023.png b/attachments/Images/img0023.png deleted file mode 100755 index f3a2a1d..0000000 Binary files a/attachments/Images/img0023.png and /dev/null differ diff --git a/attachments/Images/img0024.png b/attachments/Images/img0024.png deleted file mode 100755 index ad1260f..0000000 Binary files a/attachments/Images/img0024.png and /dev/null differ diff --git a/attachments/Images/img0025.png b/attachments/Images/img0025.png deleted file mode 100755 index 5f6cf0d..0000000 Binary files a/attachments/Images/img0025.png and /dev/null differ diff --git a/attachments/Images/img0026.png b/attachments/Images/img0026.png deleted file mode 100755 index 8ece20b..0000000 Binary files a/attachments/Images/img0026.png and /dev/null differ diff --git a/attachments/Images/kafka 2.png b/attachments/Images/kafka 2.png deleted file mode 100644 index b8d7470..0000000 Binary files a/attachments/Images/kafka 2.png and /dev/null differ diff --git a/attachments/Images/kafka.png b/attachments/Images/kafka.png deleted file mode 100644 index b8d7470..0000000 Binary files a/attachments/Images/kafka.png and /dev/null differ diff --git a/attachments/Images/kafkaconcept.png b/attachments/Images/kafkaconcept.png deleted file mode 100644 index 997699c..0000000 Binary files a/attachments/Images/kafkaconcept.png and /dev/null differ diff --git a/attachments/Images/kafkacvexample.png b/attachments/Images/kafkacvexample.png deleted file mode 100644 index af095a3..0000000 Binary files a/attachments/Images/kafkacvexample.png and /dev/null differ diff --git a/attachments/Images/kafkalogic.pdf b/attachments/Images/kafkalogic.pdf deleted file mode 100644 index a279023..0000000 Binary files a/attachments/Images/kafkalogic.pdf and /dev/null differ diff --git a/attachments/Images/kafkalogic.png b/attachments/Images/kafkalogic.png deleted file mode 100644 index 8826a15..0000000 Binary files a/attachments/Images/kafkalogic.png and /dev/null differ diff --git a/attachments/Images/kafkalogo.png b/attachments/Images/kafkalogo.png deleted file mode 100644 index 44c2ff3..0000000 Binary files a/attachments/Images/kafkalogo.png and /dev/null differ diff --git a/attachments/Images/kafkaphysic.png b/attachments/Images/kafkaphysic.png deleted file mode 100644 index 842e84f..0000000 Binary files a/attachments/Images/kafkaphysic.png and /dev/null differ diff --git a/attachments/Images/kaju-kube-master.png b/attachments/Images/kaju-kube-master.png deleted file mode 100644 index fe69bff..0000000 Binary files a/attachments/Images/kaju-kube-master.png and /dev/null differ diff --git a/attachments/Images/kappa-arch.pdf b/attachments/Images/kappa-arch.pdf deleted file mode 100755 index 39c19d2..0000000 Binary files a/attachments/Images/kappa-arch.pdf and /dev/null differ diff --git a/attachments/Images/keepTheDataMoving.png b/attachments/Images/keepTheDataMoving.png deleted file mode 100755 index be06813..0000000 Binary files a/attachments/Images/keepTheDataMoving.png and /dev/null differ diff --git a/attachments/Images/kstreamktable.pdf b/attachments/Images/kstreamktable.pdf deleted file mode 100644 index 6640315..0000000 Binary files a/attachments/Images/kstreamktable.pdf and /dev/null differ diff --git a/attachments/Images/lambda-arch.pdf b/attachments/Images/lambda-arch.pdf deleted file mode 100755 index aa74f1a..0000000 Binary files a/attachments/Images/lambda-arch.pdf and /dev/null differ diff --git a/attachments/Images/logicalmap.png b/attachments/Images/logicalmap.png deleted file mode 100644 index 442a525..0000000 Binary files a/attachments/Images/logicalmap.png and /dev/null differ diff --git a/attachments/Images/modelsissues.png b/attachments/Images/modelsissues.png deleted file mode 100644 index 9110b04..0000000 Binary files a/attachments/Images/modelsissues.png and /dev/null differ diff --git a/attachments/Images/oneminute.jpg b/attachments/Images/oneminute.jpg deleted file mode 100644 index 684a6f3..0000000 Binary files a/attachments/Images/oneminute.jpg and /dev/null differ diff --git a/attachments/Images/order.png b/attachments/Images/order.png deleted file mode 100644 index 1ae81c5..0000000 Binary files a/attachments/Images/order.png and /dev/null differ diff --git a/attachments/Images/parsingmap.png b/attachments/Images/parsingmap.png deleted file mode 100644 index 4bafcf1..0000000 Binary files a/attachments/Images/parsingmap.png and /dev/null differ diff --git a/attachments/Images/physicalmap.png b/attachments/Images/physicalmap.png deleted file mode 100644 index 761d953..0000000 Binary files a/attachments/Images/physicalmap.png and /dev/null differ diff --git a/attachments/Images/physicalplanex.png b/attachments/Images/physicalplanex.png deleted file mode 100644 index 1472783..0000000 Binary files a/attachments/Images/physicalplanex.png and /dev/null differ diff --git a/attachments/Images/placeholdergraph.png b/attachments/Images/placeholdergraph.png deleted file mode 100644 index b272d17..0000000 Binary files a/attachments/Images/placeholdergraph.png and /dev/null differ diff --git a/attachments/Images/programming.pdf b/attachments/Images/programming.pdf deleted file mode 100644 index b3bc36b..0000000 Binary files a/attachments/Images/programming.pdf and /dev/null differ diff --git a/attachments/Images/programming2.key b/attachments/Images/programming2.key deleted file mode 100755 index 5f58ecd..0000000 Binary files a/attachments/Images/programming2.key and /dev/null differ diff --git a/attachments/Images/programming2.pdf b/attachments/Images/programming2.pdf deleted file mode 100644 index 8e7ffd2..0000000 Binary files a/attachments/Images/programming2.pdf and /dev/null differ diff --git a/attachments/Images/programming_step1.pdf b/attachments/Images/programming_step1.pdf deleted file mode 100644 index 88fb0a2..0000000 Binary files a/attachments/Images/programming_step1.pdf and /dev/null differ diff --git a/attachments/Images/programming_step2.pdf b/attachments/Images/programming_step2.pdf deleted file mode 100644 index 04a31e3..0000000 Binary files a/attachments/Images/programming_step2.pdf and /dev/null differ diff --git a/attachments/Images/programming_step3.pdf b/attachments/Images/programming_step3.pdf deleted file mode 100644 index 7f24d98..0000000 Binary files a/attachments/Images/programming_step3.pdf and /dev/null differ diff --git a/attachments/Images/programming_step4.pdf b/attachments/Images/programming_step4.pdf deleted file mode 100644 index 5f3706f..0000000 Binary files a/attachments/Images/programming_step4.pdf and /dev/null differ diff --git a/attachments/Images/pubsub1.png b/attachments/Images/pubsub1.png deleted file mode 100644 index 19470cb..0000000 Binary files a/attachments/Images/pubsub1.png and /dev/null differ diff --git a/attachments/Images/pubsub2.png b/attachments/Images/pubsub2.png deleted file mode 100644 index 9c5ba2a..0000000 Binary files a/attachments/Images/pubsub2.png and /dev/null differ diff --git a/attachments/Images/replicas.pdf b/attachments/Images/replicas.pdf deleted file mode 100644 index 734b625..0000000 Binary files a/attachments/Images/replicas.pdf and /dev/null differ diff --git a/attachments/Images/replicas2.png b/attachments/Images/replicas2.png deleted file mode 100644 index 0067c33..0000000 Binary files a/attachments/Images/replicas2.png and /dev/null differ diff --git a/attachments/Images/runexec.png b/attachments/Images/runexec.png deleted file mode 100644 index 5a5e2e8..0000000 Binary files a/attachments/Images/runexec.png and /dev/null differ diff --git a/attachments/Images/samzalogo.png b/attachments/Images/samzalogo.png deleted file mode 100755 index dbb9b2b..0000000 Binary files a/attachments/Images/samzalogo.png and /dev/null differ diff --git a/attachments/Images/sparklogo.png b/attachments/Images/sparklogo.png deleted file mode 100755 index ea90207..0000000 Binary files a/attachments/Images/sparklogo.png and /dev/null differ diff --git a/attachments/Images/stormlogo.png b/attachments/Images/stormlogo.png deleted file mode 100755 index b54a752..0000000 Binary files a/attachments/Images/stormlogo.png and /dev/null differ diff --git a/attachments/Images/stream-table-animation-numVisitedLocations-changelog.gif (dragged).pdf b/attachments/Images/stream-table-animation-numVisitedLocations-changelog.gif (dragged).pdf deleted file mode 100644 index 78a80de..0000000 Binary files a/attachments/Images/stream-table-animation-numVisitedLocations-changelog.gif (dragged).pdf and /dev/null differ diff --git a/attachments/Images/stream-table-animation-numVisitedLocations-changelog.gif.pdf b/attachments/Images/stream-table-animation-numVisitedLocations-changelog.gif.pdf deleted file mode 100644 index 67f36f2..0000000 Binary files a/attachments/Images/stream-table-animation-numVisitedLocations-changelog.gif.pdf and /dev/null differ diff --git a/attachments/Images/stream-table-animation-numVisitedLocations-changelog.png b/attachments/Images/stream-table-animation-numVisitedLocations-changelog.png deleted file mode 100644 index e7c7eb0..0000000 Binary files a/attachments/Images/stream-table-animation-numVisitedLocations-changelog.png and /dev/null differ diff --git a/attachments/Images/stream.png b/attachments/Images/stream.png deleted file mode 100644 index bf53d04..0000000 Binary files a/attachments/Images/stream.png and /dev/null differ diff --git a/attachments/Images/windows/1.pdf b/attachments/Images/windows/1.pdf deleted file mode 100644 index f7de593..0000000 Binary files a/attachments/Images/windows/1.pdf and /dev/null differ diff --git a/attachments/Images/windows/2.pdf b/attachments/Images/windows/2.pdf deleted file mode 100644 index f58b3ee..0000000 Binary files a/attachments/Images/windows/2.pdf and /dev/null differ diff --git a/attachments/Images/windows/3.pdf b/attachments/Images/windows/3.pdf deleted file mode 100644 index 141fc39..0000000 Binary files a/attachments/Images/windows/3.pdf and /dev/null differ diff --git a/attachments/Images/windows/4.pdf b/attachments/Images/windows/4.pdf deleted file mode 100644 index 524a3d9..0000000 Binary files a/attachments/Images/windows/4.pdf and /dev/null differ diff --git a/attachments/Images/windows/5.pdf b/attachments/Images/windows/5.pdf deleted file mode 100644 index 7210087..0000000 Binary files a/attachments/Images/windows/5.pdf and /dev/null differ diff --git a/attachments/Images/windows/6.pdf b/attachments/Images/windows/6.pdf deleted file mode 100644 index b815779..0000000 Binary files a/attachments/Images/windows/6.pdf and /dev/null differ diff --git a/attachments/Images/windows/7.pdf b/attachments/Images/windows/7.pdf deleted file mode 100644 index 4c453ed..0000000 Binary files a/attachments/Images/windows/7.pdf and /dev/null differ diff --git a/attachments/Images/windows/duality.png b/attachments/Images/windows/duality.png deleted file mode 100644 index 1ba02d5..0000000 Binary files a/attachments/Images/windows/duality.png and /dev/null differ diff --git a/attachments/InternetMinute17 1.webp b/attachments/InternetMinute17 1.webp deleted file mode 100644 index 1f44316..0000000 Binary files a/attachments/InternetMinute17 1.webp and /dev/null differ diff --git a/attachments/InternetMinute17.webp b/attachments/InternetMinute17.webp deleted file mode 100644 index 1f44316..0000000 Binary files a/attachments/InternetMinute17.webp and /dev/null differ diff --git a/attachments/Joins in php.jpg b/attachments/Joins in php.jpg deleted file mode 100644 index 5437711..0000000 Binary files a/attachments/Joins in php.jpg and /dev/null differ diff --git a/attachments/KafkaArchitecture.png b/attachments/KafkaArchitecture.png deleted file mode 100755 index 1655cf9..0000000 Binary files a/attachments/KafkaArchitecture.png and /dev/null differ diff --git a/attachments/KafkaStatefulProcessing.png b/attachments/KafkaStatefulProcessing.png deleted file mode 100755 index 619459e..0000000 Binary files a/attachments/KafkaStatefulProcessing.png and /dev/null differ diff --git a/attachments/KafkaStreamExample1.png b/attachments/KafkaStreamExample1.png deleted file mode 100755 index 9de559b..0000000 Binary files a/attachments/KafkaStreamExample1.png and /dev/null differ diff --git a/attachments/KafkaStreamExample2.png b/attachments/KafkaStreamExample2.png deleted file mode 100755 index 9d19976..0000000 Binary files a/attachments/KafkaStreamExample2.png and /dev/null differ diff --git a/attachments/KafkaStreamExample3.png b/attachments/KafkaStreamExample3.png deleted file mode 100755 index 8937e1d..0000000 Binary files a/attachments/KafkaStreamExample3.png and /dev/null differ diff --git a/attachments/KafkaStreamingInternally.png b/attachments/KafkaStreamingInternally.png deleted file mode 100755 index 9dd0412..0000000 Binary files a/attachments/KafkaStreamingInternally.png and /dev/null differ diff --git a/attachments/KafkaStreamingLibrary.png b/attachments/KafkaStreamingLibrary.png deleted file mode 100755 index b91c1c0..0000000 Binary files a/attachments/KafkaStreamingLibrary.png and /dev/null differ diff --git a/attachments/KafkaStreamsDSL.png b/attachments/KafkaStreamsDSL.png deleted file mode 100755 index c5fc726..0000000 Binary files a/attachments/KafkaStreamsDSL.png and /dev/null differ diff --git a/attachments/KafkaTopics.png b/attachments/KafkaTopics.png deleted file mode 100755 index 50e82db..0000000 Binary files a/attachments/KafkaTopics.png and /dev/null differ diff --git a/attachments/MovieRDFGraph.png b/attachments/MovieRDFGraph.png deleted file mode 100755 index e62b293..0000000 Binary files a/attachments/MovieRDFGraph.png and /dev/null differ diff --git a/attachments/NavigationalGraph.png b/attachments/NavigationalGraph.png deleted file mode 100755 index 84bea4d..0000000 Binary files a/attachments/NavigationalGraph.png and /dev/null differ diff --git a/attachments/NavigationalGraph2.png b/attachments/NavigationalGraph2.png deleted file mode 100755 index fb78592..0000000 Binary files a/attachments/NavigationalGraph2.png and /dev/null differ diff --git a/attachments/No_No_He's_Got_A_Point_Banner.jpg b/attachments/No_No_He's_Got_A_Point_Banner.jpg deleted file mode 100644 index a4ec538..0000000 Binary files a/attachments/No_No_He's_Got_A_Point_Banner.jpg and /dev/null differ diff --git a/attachments/One-to-Many.png b/attachments/One-to-Many.png deleted file mode 100644 index e766df6..0000000 Binary files a/attachments/One-to-Many.png and /dev/null differ diff --git a/attachments/Partitioning _I_ Replication.png b/attachments/Partitioning _I_ Replication.png deleted file mode 100644 index 1984b62..0000000 Binary files a/attachments/Partitioning _I_ Replication.png and /dev/null differ diff --git a/attachments/Pasted image 1.png b/attachments/Pasted image 1.png deleted file mode 100644 index 18f679a..0000000 Binary files a/attachments/Pasted image 1.png and /dev/null differ diff --git a/attachments/Pasted image 2.png b/attachments/Pasted image 2.png deleted file mode 100644 index 0c4b1a3..0000000 Binary files a/attachments/Pasted image 2.png and /dev/null differ diff --git a/attachments/Pasted image 20201001090110.png b/attachments/Pasted image 20201001090110.png deleted file mode 100644 index 99302db..0000000 Binary files a/attachments/Pasted image 20201001090110.png and /dev/null differ diff --git a/attachments/Pasted image 3.png b/attachments/Pasted image 3.png deleted file mode 100644 index 1a9c586..0000000 Binary files a/attachments/Pasted image 3.png and /dev/null differ diff --git a/attachments/Pasted image 4.png b/attachments/Pasted image 4.png deleted file mode 100644 index 0e8a0cf..0000000 Binary files a/attachments/Pasted image 4.png and /dev/null differ diff --git a/attachments/Pasted image 5.png b/attachments/Pasted image 5.png deleted file mode 100644 index 5763502..0000000 Binary files a/attachments/Pasted image 5.png and /dev/null differ diff --git a/attachments/Pasted image 6.png b/attachments/Pasted image 6.png deleted file mode 100644 index 6e224a3..0000000 Binary files a/attachments/Pasted image 6.png and /dev/null differ diff --git a/attachments/Pasted image 7.png b/attachments/Pasted image 7.png deleted file mode 100644 index bfbd75d..0000000 Binary files a/attachments/Pasted image 7.png and /dev/null differ diff --git a/attachments/Pasted image 8.png b/attachments/Pasted image 8.png deleted file mode 100644 index dbbd2bb..0000000 Binary files a/attachments/Pasted image 8.png and /dev/null differ diff --git a/attachments/Pasted image 9.png b/attachments/Pasted image 9.png deleted file mode 100644 index 984be48..0000000 Binary files a/attachments/Pasted image 9.png and /dev/null differ diff --git a/attachments/Pasted image.png b/attachments/Pasted image.png deleted file mode 100644 index 6b23139..0000000 Binary files a/attachments/Pasted image.png and /dev/null differ diff --git a/attachments/Picture 1.png b/attachments/Picture 1.png deleted file mode 100644 index 564fbc9..0000000 Binary files a/attachments/Picture 1.png and /dev/null differ diff --git a/attachments/Processing and Querying.png b/attachments/Processing and Querying.png deleted file mode 100644 index 3b269a9..0000000 Binary files a/attachments/Processing and Querying.png and /dev/null differ diff --git a/attachments/ProcessorLegend.png b/attachments/ProcessorLegend.png deleted file mode 100755 index 6e4a159..0000000 Binary files a/attachments/ProcessorLegend.png and /dev/null differ diff --git a/attachments/ProcessorTopology.png b/attachments/ProcessorTopology.png deleted file mode 100755 index c592357..0000000 Binary files a/attachments/ProcessorTopology.png and /dev/null differ diff --git a/attachments/Questions 2.jpg b/attachments/Questions 2.jpg deleted file mode 100755 index b31f196..0000000 Binary files a/attachments/Questions 2.jpg and /dev/null differ diff --git a/attachments/Questions.jpg b/attachments/Questions.jpg deleted file mode 100755 index b31f196..0000000 Binary files a/attachments/Questions.jpg and /dev/null differ diff --git a/attachments/RDBMS_NoSQL-1-1024x785.png b/attachments/RDBMS_NoSQL-1-1024x785.png deleted file mode 100755 index 49431d3..0000000 Binary files a/attachments/RDBMS_NoSQL-1-1024x785.png and /dev/null differ diff --git a/attachments/RDFLogo.png b/attachments/RDFLogo.png deleted file mode 100755 index 3a2ee58..0000000 Binary files a/attachments/RDFLogo.png and /dev/null differ diff --git a/attachments/RDFProcessingFrameworks.png b/attachments/RDFProcessingFrameworks.png deleted file mode 100755 index d54cc16..0000000 Binary files a/attachments/RDFProcessingFrameworks.png and /dev/null differ diff --git a/attachments/RDFfig.png b/attachments/RDFfig.png deleted file mode 100755 index 915f07f..0000000 Binary files a/attachments/RDFfig.png and /dev/null differ diff --git a/attachments/RSTAR(1).png b/attachments/RSTAR(1).png deleted file mode 100755 index 98fc92b..0000000 Binary files a/attachments/RSTAR(1).png and /dev/null differ diff --git a/attachments/RSTAR.png b/attachments/RSTAR.png deleted file mode 100755 index fa70b09..0000000 Binary files a/attachments/RSTAR.png and /dev/null differ diff --git a/attachments/Relationship-syntax.png b/attachments/Relationship-syntax.png deleted file mode 100644 index 7911b3f..0000000 Binary files a/attachments/Relationship-syntax.png and /dev/null differ diff --git a/attachments/Run-Length-Encoding-question.png b/attachments/Run-Length-Encoding-question.png deleted file mode 100644 index f2ad00d..0000000 Binary files a/attachments/Run-Length-Encoding-question.png and /dev/null differ diff --git a/attachments/Run-Length-Encoding-question2.png b/attachments/Run-Length-Encoding-question2.png deleted file mode 100644 index a502775..0000000 Binary files a/attachments/Run-Length-Encoding-question2.png and /dev/null differ diff --git a/attachments/Run-Length-Encoding-question3.png b/attachments/Run-Length-Encoding-question3.png deleted file mode 100644 index bb06a52..0000000 Binary files a/attachments/Run-Length-Encoding-question3.png and /dev/null differ diff --git a/attachments/Run-Length-Encoding.png b/attachments/Run-Length-Encoding.png deleted file mode 100644 index 53cf6f3..0000000 Binary files a/attachments/Run-Length-Encoding.png and /dev/null differ diff --git a/attachments/SHARD(1).png b/attachments/SHARD(1).png deleted file mode 100755 index be0e680..0000000 Binary files a/attachments/SHARD(1).png and /dev/null differ diff --git a/attachments/Screenshot 2020-09-03 at 9.25.34 AM.png b/attachments/Screenshot 2020-09-03 at 9.25.34 AM.png deleted file mode 100644 index 0eeee55..0000000 Binary files a/attachments/Screenshot 2020-09-03 at 9.25.34 AM.png and /dev/null differ diff --git a/attachments/Screenshot 2020-09-06 at 7.52.55 PM.png b/attachments/Screenshot 2020-09-06 at 7.52.55 PM.png deleted file mode 100644 index 302b9aa..0000000 Binary files a/attachments/Screenshot 2020-09-06 at 7.52.55 PM.png and /dev/null differ diff --git a/attachments/Screenshot 2020-10-01 at 9.25.20 AM.png b/attachments/Screenshot 2020-10-01 at 9.25.20 AM.png deleted file mode 100644 index 225e796..0000000 Binary files a/attachments/Screenshot 2020-10-01 at 9.25.20 AM.png and /dev/null differ diff --git a/attachments/SkepticalDW.png b/attachments/SkepticalDW.png deleted file mode 100644 index 47d9fb9..0000000 Binary files a/attachments/SkepticalDW.png and /dev/null differ diff --git a/attachments/SparkgraphRDDs.png b/attachments/SparkgraphRDDs.png deleted file mode 100755 index b3aaaf9..0000000 Binary files a/attachments/SparkgraphRDDs.png and /dev/null differ diff --git a/attachments/StreamsAsUnboundedTables.png b/attachments/StreamsAsUnboundedTables.png deleted file mode 100755 index 0d9d649..0000000 Binary files a/attachments/StreamsAsUnboundedTables.png and /dev/null differ diff --git a/attachments/Tango_SixSideIcon_v2.png b/attachments/Tango_SixSideIcon_v2.png deleted file mode 100755 index 58b144a..0000000 Binary files a/attachments/Tango_SixSideIcon_v2.png and /dev/null differ diff --git a/attachments/Thankyou 2.jpeg b/attachments/Thankyou 2.jpeg deleted file mode 100755 index b61f017..0000000 Binary files a/attachments/Thankyou 2.jpeg and /dev/null differ diff --git a/attachments/Thankyou.jpeg b/attachments/Thankyou.jpeg deleted file mode 100755 index b61f017..0000000 Binary files a/attachments/Thankyou.jpeg and /dev/null differ diff --git a/attachments/Thursday-September-17-2020.png b/attachments/Thursday-September-17-2020.png deleted file mode 100644 index a0e297c..0000000 Binary files a/attachments/Thursday-September-17-2020.png and /dev/null differ diff --git a/attachments/Thursday-September-24-2020.png b/attachments/Thursday-September-24-2020.png deleted file mode 100644 index 22ca9c2..0000000 Binary files a/attachments/Thursday-September-24-2020.png and /dev/null differ diff --git a/attachments/UnderstandingDataFreeText.png b/attachments/UnderstandingDataFreeText.png deleted file mode 100644 index cb57565..0000000 Binary files a/attachments/UnderstandingDataFreeText.png and /dev/null differ diff --git a/attachments/Understandingdata.png b/attachments/Understandingdata.png deleted file mode 100644 index b0a9e48..0000000 Binary files a/attachments/Understandingdata.png and /dev/null differ diff --git a/attachments/UpdateOutputMode.png b/attachments/UpdateOutputMode.png deleted file mode 100755 index 8bf3d38..0000000 Binary files a/attachments/UpdateOutputMode.png and /dev/null differ diff --git a/attachments/WatermarksAndHandlingLateData.png b/attachments/WatermarksAndHandlingLateData.png deleted file mode 100755 index 2337962..0000000 Binary files a/attachments/WatermarksAndHandlingLateData.png and /dev/null differ diff --git a/attachments/WhyKafka.png b/attachments/WhyKafka.png deleted file mode 100755 index 0ec008c..0000000 Binary files a/attachments/WhyKafka.png and /dev/null differ diff --git a/attachments/WhyKafka2.png b/attachments/WhyKafka2.png deleted file mode 100755 index 4207e4f..0000000 Binary files a/attachments/WhyKafka2.png and /dev/null differ diff --git a/attachments/WhyKafka3.png b/attachments/WhyKafka3.png deleted file mode 100755 index b31d422..0000000 Binary files a/attachments/WhyKafka3.png and /dev/null differ diff --git a/attachments/WhyKafka4.png b/attachments/WhyKafka4.png deleted file mode 100755 index 4418589..0000000 Binary files a/attachments/WhyKafka4.png and /dev/null differ diff --git a/attachments/WhyKafka5.png b/attachments/WhyKafka5.png deleted file mode 100644 index 19470cb..0000000 Binary files a/attachments/WhyKafka5.png and /dev/null differ diff --git a/attachments/YARS(1).png b/attachments/YARS(1).png deleted file mode 100755 index fb25d4d..0000000 Binary files a/attachments/YARS(1).png and /dev/null differ diff --git a/attachments/YARS.png b/attachments/YARS.png deleted file mode 100755 index 4b951fc..0000000 Binary files a/attachments/YARS.png and /dev/null differ diff --git a/attachments/aj-containers.jpeg b/attachments/aj-containers.jpeg deleted file mode 100755 index 907dec6..0000000 Binary files a/attachments/aj-containers.jpeg and /dev/null differ diff --git a/attachments/allegrograph_logo2.png b/attachments/allegrograph_logo2.png deleted file mode 100755 index 8a52736..0000000 Binary files a/attachments/allegrograph_logo2.png and /dev/null differ diff --git a/attachments/ambassador-diagram.odg b/attachments/ambassador-diagram.odg deleted file mode 100755 index a35d409..0000000 Binary files a/attachments/ambassador-diagram.odg and /dev/null differ diff --git a/attachments/ambassador-diagram.png b/attachments/ambassador-diagram.png deleted file mode 100755 index 5acfcb5..0000000 Binary files a/attachments/ambassador-diagram.png and /dev/null differ diff --git a/attachments/api-request-lifecycle.png b/attachments/api-request-lifecycle.png deleted file mode 100755 index e7656ad..0000000 Binary files a/attachments/api-request-lifecycle.png and /dev/null differ diff --git a/attachments/ascii.jpg b/attachments/ascii.jpg deleted file mode 100644 index d3a65f0..0000000 Binary files a/attachments/ascii.jpg and /dev/null differ diff --git a/attachments/attrer2.png b/attachments/attrer2.png deleted file mode 100644 index 1a9c586..0000000 Binary files a/attachments/attrer2.png and /dev/null differ diff --git a/attachments/attrrel.png b/attachments/attrrel.png deleted file mode 100644 index 9faeaed..0000000 Binary files a/attachments/attrrel.png and /dev/null differ diff --git a/attachments/bell-curve.jpg b/attachments/bell-curve.jpg deleted file mode 100755 index 2d1ce93..0000000 Binary files a/attachments/bell-curve.jpg and /dev/null differ diff --git a/attachments/bigdatatimeline1.png b/attachments/bigdatatimeline1.png deleted file mode 100644 index 5399cef..0000000 Binary files a/attachments/bigdatatimeline1.png and /dev/null differ diff --git a/attachments/bigdatatimeline2.png b/attachments/bigdatatimeline2.png deleted file mode 100644 index c350eda..0000000 Binary files a/attachments/bigdatatimeline2.png and /dev/null differ diff --git a/attachments/biggraphs.jpg b/attachments/biggraphs.jpg deleted file mode 100755 index 822508f..0000000 Binary files a/attachments/biggraphs.jpg and /dev/null differ diff --git a/attachments/binpacking-1d-1.gif b/attachments/binpacking-1d-1.gif deleted file mode 100755 index a77e0aa..0000000 Binary files a/attachments/binpacking-1d-1.gif and /dev/null differ diff --git a/attachments/binpacking-1d-2.gif b/attachments/binpacking-1d-2.gif deleted file mode 100755 index bb0cc2a..0000000 Binary files a/attachments/binpacking-1d-2.gif and /dev/null differ diff --git a/attachments/binpacking-2d.gif b/attachments/binpacking-2d.gif deleted file mode 100755 index 827152e..0000000 Binary files a/attachments/binpacking-2d.gif and /dev/null differ diff --git a/attachments/binpacking-3d.gif b/attachments/binpacking-3d.gif deleted file mode 100755 index 0647747..0000000 Binary files a/attachments/binpacking-3d.gif and /dev/null differ diff --git a/attachments/bipartitegraph.png b/attachments/bipartitegraph.png deleted file mode 100644 index b42b990..0000000 Binary files a/attachments/bipartitegraph.png and /dev/null differ diff --git a/attachments/blackbelt.png b/attachments/blackbelt.png deleted file mode 100755 index d478fd8..0000000 Binary files a/attachments/blackbelt.png and /dev/null differ diff --git a/attachments/blog-spparql.png b/attachments/blog-spparql.png deleted file mode 100755 index 0d4b74c..0000000 Binary files a/attachments/blog-spparql.png and /dev/null differ diff --git a/attachments/bloomfilter.png b/attachments/bloomfilter.png deleted file mode 100644 index 6e224a3..0000000 Binary files a/attachments/bloomfilter.png and /dev/null differ diff --git a/attachments/brandnew.png b/attachments/brandnew.png deleted file mode 100644 index 2577fad..0000000 Binary files a/attachments/brandnew.png and /dev/null differ diff --git a/attachments/bridge1.png b/attachments/bridge1.png deleted file mode 100755 index db19b00..0000000 Binary files a/attachments/bridge1.png and /dev/null differ diff --git a/attachments/bridge2.png b/attachments/bridge2.png deleted file mode 100755 index 5a3d010..0000000 Binary files a/attachments/bridge2.png and /dev/null differ diff --git a/attachments/bridge3.png b/attachments/bridge3.png deleted file mode 100755 index ed5eb91..0000000 Binary files a/attachments/bridge3.png and /dev/null differ diff --git a/attachments/bsbexamplemaxval.png b/attachments/bsbexamplemaxval.png deleted file mode 100755 index f12c95c..0000000 Binary files a/attachments/bsbexamplemaxval.png and /dev/null differ diff --git a/attachments/bsbmodel.png b/attachments/bsbmodel.png deleted file mode 100755 index 52029fb..0000000 Binary files a/attachments/bsbmodel.png and /dev/null differ diff --git a/attachments/bsbpagerankexample1.png b/attachments/bsbpagerankexample1.png deleted file mode 100755 index 4bcad44..0000000 Binary files a/attachments/bsbpagerankexample1.png and /dev/null differ diff --git a/attachments/bsbpagerankexample10.png b/attachments/bsbpagerankexample10.png deleted file mode 100755 index 05d08d6..0000000 Binary files a/attachments/bsbpagerankexample10.png and /dev/null differ diff --git a/attachments/bsbpagerankexample11.png b/attachments/bsbpagerankexample11.png deleted file mode 100755 index fcd1cd5..0000000 Binary files a/attachments/bsbpagerankexample11.png and /dev/null differ diff --git a/attachments/bsbpagerankexample12.png b/attachments/bsbpagerankexample12.png deleted file mode 100755 index 3e2bd65..0000000 Binary files a/attachments/bsbpagerankexample12.png and /dev/null differ diff --git a/attachments/bsbpagerankexample2.png b/attachments/bsbpagerankexample2.png deleted file mode 100755 index 4a1c224..0000000 Binary files a/attachments/bsbpagerankexample2.png and /dev/null differ diff --git a/attachments/bsbpagerankexample3.png b/attachments/bsbpagerankexample3.png deleted file mode 100755 index d140d31..0000000 Binary files a/attachments/bsbpagerankexample3.png and /dev/null differ diff --git a/attachments/bsbpagerankexample4.png b/attachments/bsbpagerankexample4.png deleted file mode 100755 index 60e8b8b..0000000 Binary files a/attachments/bsbpagerankexample4.png and /dev/null differ diff --git a/attachments/bsbpagerankexample5.png b/attachments/bsbpagerankexample5.png deleted file mode 100755 index 561a36c..0000000 Binary files a/attachments/bsbpagerankexample5.png and /dev/null differ diff --git a/attachments/bsbpagerankexample6.png b/attachments/bsbpagerankexample6.png deleted file mode 100755 index 60ef2ea..0000000 Binary files a/attachments/bsbpagerankexample6.png and /dev/null differ diff --git a/attachments/bsbpagerankexample7.png b/attachments/bsbpagerankexample7.png deleted file mode 100755 index a326e82..0000000 Binary files a/attachments/bsbpagerankexample7.png and /dev/null differ diff --git a/attachments/bsbpagerankexample8.png b/attachments/bsbpagerankexample8.png deleted file mode 100755 index 97726d1..0000000 Binary files a/attachments/bsbpagerankexample8.png and /dev/null differ diff --git a/attachments/bsbpagerankexample9.png b/attachments/bsbpagerankexample9.png deleted file mode 100755 index 9fc2bd1..0000000 Binary files a/attachments/bsbpagerankexample9.png and /dev/null differ diff --git a/attachments/bsbseperator.png b/attachments/bsbseperator.png deleted file mode 100755 index 37d6ad4..0000000 Binary files a/attachments/bsbseperator.png and /dev/null differ diff --git a/attachments/captheorem6yo.png b/attachments/captheorem6yo.png deleted file mode 100644 index 031bde2..0000000 Binary files a/attachments/captheorem6yo.png and /dev/null differ diff --git a/attachments/cassandra-25.png b/attachments/cassandra-25.png deleted file mode 100644 index f2ddbbe..0000000 Binary files a/attachments/cassandra-25.png and /dev/null differ diff --git a/attachments/cassandra-26.png b/attachments/cassandra-26.png deleted file mode 100644 index d77bdc7..0000000 Binary files a/attachments/cassandra-26.png and /dev/null differ diff --git a/attachments/cassandra-27.png b/attachments/cassandra-27.png deleted file mode 100644 index 66caef1..0000000 Binary files a/attachments/cassandra-27.png and /dev/null differ diff --git a/attachments/cassandra-28.png b/attachments/cassandra-28.png deleted file mode 100644 index 29b57b1..0000000 Binary files a/attachments/cassandra-28.png and /dev/null differ diff --git a/attachments/cassandra-29.png b/attachments/cassandra-29.png deleted file mode 100644 index 0c90053..0000000 Binary files a/attachments/cassandra-29.png and /dev/null differ diff --git a/attachments/cassandra-30.png b/attachments/cassandra-30.png deleted file mode 100644 index 54e94f7..0000000 Binary files a/attachments/cassandra-30.png and /dev/null differ diff --git a/attachments/cassandra-31.png b/attachments/cassandra-31.png deleted file mode 100644 index 52968d0..0000000 Binary files a/attachments/cassandra-31.png and /dev/null differ diff --git a/attachments/cassandra-32.png b/attachments/cassandra-32.png deleted file mode 100644 index a59c952..0000000 Binary files a/attachments/cassandra-32.png and /dev/null differ diff --git a/attachments/cassandra-33.png b/attachments/cassandra-33.png deleted file mode 100644 index 6f067a3..0000000 Binary files a/attachments/cassandra-33.png and /dev/null differ diff --git a/attachments/cassandra-34.png b/attachments/cassandra-34.png deleted file mode 100644 index 6f067a3..0000000 Binary files a/attachments/cassandra-34.png and /dev/null differ diff --git a/attachments/cassandra-35.png b/attachments/cassandra-35.png deleted file mode 100644 index 8288079..0000000 Binary files a/attachments/cassandra-35.png and /dev/null differ diff --git a/attachments/cassandra-36.png b/attachments/cassandra-36.png deleted file mode 100644 index 564fbc9..0000000 Binary files a/attachments/cassandra-36.png and /dev/null differ diff --git a/attachments/cassandra-37.png b/attachments/cassandra-37.png deleted file mode 100644 index a49e9b8..0000000 Binary files a/attachments/cassandra-37.png and /dev/null differ diff --git a/attachments/casssandra-38.png b/attachments/casssandra-38.png deleted file mode 100644 index ce7e57e..0000000 Binary files a/attachments/casssandra-38.png and /dev/null differ diff --git a/attachments/ccequation.jpg b/attachments/ccequation.jpg deleted file mode 100755 index 8241e3b..0000000 Binary files a/attachments/ccequation.jpg and /dev/null differ diff --git a/attachments/ccequation2.jpg b/attachments/ccequation2.jpg deleted file mode 100755 index 449526b..0000000 Binary files a/attachments/ccequation2.jpg and /dev/null differ diff --git a/attachments/central-model.png b/attachments/central-model.png deleted file mode 100755 index e049e13..0000000 Binary files a/attachments/central-model.png and /dev/null differ diff --git a/attachments/centralized-model.png b/attachments/centralized-model.png deleted file mode 100755 index 7fdb9fc..0000000 Binary files a/attachments/centralized-model.png and /dev/null differ diff --git a/attachments/ch6.pdf b/attachments/ch6.pdf deleted file mode 100644 index 2947ea6..0000000 Binary files a/attachments/ch6.pdf and /dev/null differ diff --git a/attachments/challenges.png b/attachments/challenges.png deleted file mode 100755 index ba42ecf..0000000 Binary files a/attachments/challenges.png and /dev/null differ diff --git a/attachments/ci-cd-with-docker.png b/attachments/ci-cd-with-docker.png deleted file mode 100755 index 333d7ec..0000000 Binary files a/attachments/ci-cd-with-docker.png and /dev/null differ diff --git a/attachments/cleansing.png b/attachments/cleansing.png deleted file mode 100644 index 78a4204..0000000 Binary files a/attachments/cleansing.png and /dev/null differ diff --git a/attachments/clusteredpropertytable.png b/attachments/clusteredpropertytable.png deleted file mode 100755 index 8749d19..0000000 Binary files a/attachments/clusteredpropertytable.png and /dev/null differ diff --git a/attachments/codd.png b/attachments/codd.png deleted file mode 100644 index 0c4b1a3..0000000 Binary files a/attachments/codd.png and /dev/null differ diff --git a/attachments/collaborativefilter.png b/attachments/collaborativefilter.png deleted file mode 100755 index 8b34b1f..0000000 Binary files a/attachments/collaborativefilter.png and /dev/null differ diff --git a/attachments/collaborativefilterexample.png b/attachments/collaborativefilterexample.png deleted file mode 100755 index b49741f..0000000 Binary files a/attachments/collaborativefilterexample.png and /dev/null differ diff --git a/attachments/collaborativefilterlog.png b/attachments/collaborativefilterlog.png deleted file mode 100755 index 0b96036..0000000 Binary files a/attachments/collaborativefilterlog.png and /dev/null differ diff --git a/attachments/commitlog.pdf b/attachments/commitlog.pdf deleted file mode 100644 index ef58be1..0000000 Binary files a/attachments/commitlog.pdf and /dev/null differ diff --git a/attachments/commitlog.png b/attachments/commitlog.png deleted file mode 100644 index 914f07a..0000000 Binary files a/attachments/commitlog.png and /dev/null differ diff --git a/attachments/commitlog2.pdf b/attachments/commitlog2.pdf deleted file mode 100644 index 413b00a..0000000 Binary files a/attachments/commitlog2.pdf and /dev/null differ diff --git a/attachments/communicationmodels.png b/attachments/communicationmodels.png deleted file mode 100755 index 3ffe7f3..0000000 Binary files a/attachments/communicationmodels.png and /dev/null differ diff --git a/attachments/compaction.pdf b/attachments/compaction.pdf deleted file mode 100644 index 9465dc2..0000000 Binary files a/attachments/compaction.pdf and /dev/null differ diff --git a/attachments/compaction.png b/attachments/compaction.png deleted file mode 100644 index ead7668..0000000 Binary files a/attachments/compaction.png and /dev/null differ diff --git a/attachments/composeapp.png b/attachments/composeapp.png deleted file mode 100755 index 907731b..0000000 Binary files a/attachments/composeapp.png and /dev/null differ diff --git a/attachments/composeup.gif b/attachments/composeup.gif deleted file mode 100755 index b84db1c..0000000 Binary files a/attachments/composeup.gif and /dev/null differ diff --git a/attachments/composeup.png b/attachments/composeup.png deleted file mode 100644 index 2717072..0000000 Binary files a/attachments/composeup.png and /dev/null differ diff --git a/attachments/compute_engine.png b/attachments/compute_engine.png deleted file mode 100644 index b0c5354..0000000 Binary files a/attachments/compute_engine.png and /dev/null differ diff --git a/attachments/conductor.jpg b/attachments/conductor.jpg deleted file mode 100755 index 2fa74bb..0000000 Binary files a/attachments/conductor.jpg and /dev/null differ diff --git a/attachments/consolidating.png b/attachments/consolidating.png deleted file mode 100644 index 9ae0265..0000000 Binary files a/attachments/consolidating.png and /dev/null differ diff --git a/attachments/consumergroup.pdf b/attachments/consumergroup.pdf deleted file mode 100644 index 733ccf9..0000000 Binary files a/attachments/consumergroup.pdf and /dev/null differ diff --git a/attachments/container-background.jpg b/attachments/container-background.jpg deleted file mode 100755 index 9d21681..0000000 Binary files a/attachments/container-background.jpg and /dev/null differ diff --git a/attachments/container-layers.jpg b/attachments/container-layers.jpg deleted file mode 100755 index 00c30a2..0000000 Binary files a/attachments/container-layers.jpg and /dev/null differ diff --git a/attachments/containers-as-lightweight-vms.png b/attachments/containers-as-lightweight-vms.png deleted file mode 100755 index 74b9168..0000000 Binary files a/attachments/containers-as-lightweight-vms.png and /dev/null differ diff --git a/attachments/correcting.png b/attachments/correcting.png deleted file mode 100644 index f897d28..0000000 Binary files a/attachments/correcting.png and /dev/null differ diff --git a/attachments/cql.png b/attachments/cql.png deleted file mode 100644 index 57bd127..0000000 Binary files a/attachments/cql.png and /dev/null differ diff --git a/attachments/cropped-relationships-matter-text-logo-2018-b.png b/attachments/cropped-relationships-matter-text-logo-2018-b.png deleted file mode 100755 index 15b99c4..0000000 Binary files a/attachments/cropped-relationships-matter-text-logo-2018-b.png and /dev/null differ diff --git a/attachments/cypherexamples.jpg b/attachments/cypherexamples.jpg deleted file mode 100755 index def94a6..0000000 Binary files a/attachments/cypherexamples.jpg and /dev/null differ diff --git a/attachments/cypherexamples2.jpg b/attachments/cypherexamples2.jpg deleted file mode 100755 index 32bdfd4..0000000 Binary files a/attachments/cypherexamples2.jpg and /dev/null differ diff --git a/attachments/cypherneo4j.jpg b/attachments/cypherneo4j.jpg deleted file mode 100755 index 396c583..0000000 Binary files a/attachments/cypherneo4j.jpg and /dev/null differ diff --git a/attachments/cypherngpsexample1.png b/attachments/cypherngpsexample1.png deleted file mode 100755 index 2a69739..0000000 Binary files a/attachments/cypherngpsexample1.png and /dev/null differ diff --git a/attachments/cypherngpsexample2.png b/attachments/cypherngpsexample2.png deleted file mode 100755 index 77351f5..0000000 Binary files a/attachments/cypherngpsexample2.png and /dev/null differ diff --git a/attachments/cypherngpsexample3.png b/attachments/cypherngpsexample3.png deleted file mode 100755 index b900cef..0000000 Binary files a/attachments/cypherngpsexample3.png and /dev/null differ diff --git a/attachments/cypherngpsexample4.png b/attachments/cypherngpsexample4.png deleted file mode 100755 index c5bb8ab..0000000 Binary files a/attachments/cypherngpsexample4.png and /dev/null differ diff --git a/attachments/cypherngpsexample5.png b/attachments/cypherngpsexample5.png deleted file mode 100755 index f8b60e2..0000000 Binary files a/attachments/cypherngpsexample5.png and /dev/null differ diff --git a/attachments/dag.pdf b/attachments/dag.pdf deleted file mode 100644 index fd418e9..0000000 Binary files a/attachments/dag.pdf and /dev/null differ diff --git a/attachments/dag.png b/attachments/dag.png deleted file mode 100644 index 4af869a..0000000 Binary files a/attachments/dag.png and /dev/null differ diff --git a/attachments/data quality issues.png b/attachments/data quality issues.png deleted file mode 100644 index acbec36..0000000 Binary files a/attachments/data quality issues.png and /dev/null differ diff --git a/attachments/dataengineer.png b/attachments/dataengineer.png deleted file mode 100644 index 090797c..0000000 Binary files a/attachments/dataengineer.png and /dev/null differ diff --git a/attachments/datalake.png b/attachments/datalake.png deleted file mode 100644 index b7ea433..0000000 Binary files a/attachments/datalake.png and /dev/null differ diff --git a/attachments/datalakewf.png b/attachments/datalakewf.png deleted file mode 100644 index ce340e7..0000000 Binary files a/attachments/datalakewf.png and /dev/null differ diff --git a/attachments/datawranglingpipeline.png b/attachments/datawranglingpipeline.png deleted file mode 100644 index efc0b64..0000000 Binary files a/attachments/datawranglingpipeline.png and /dev/null differ diff --git a/attachments/delay-hasher.png b/attachments/delay-hasher.png deleted file mode 100755 index 16c71a0..0000000 Binary files a/attachments/delay-hasher.png and /dev/null differ diff --git a/attachments/delay-rng.png b/attachments/delay-rng.png deleted file mode 100755 index d5cdb7e..0000000 Binary files a/attachments/delay-rng.png and /dev/null differ diff --git a/attachments/demo.jpg b/attachments/demo.jpg deleted file mode 100755 index 13e1d97..0000000 Binary files a/attachments/demo.jpg and /dev/null differ diff --git a/attachments/differencevisual.png b/attachments/differencevisual.png deleted file mode 100644 index a340e9a..0000000 Binary files a/attachments/differencevisual.png and /dev/null differ diff --git a/attachments/docker-ce-ee-lifecycle.png b/attachments/docker-ce-ee-lifecycle.png deleted file mode 100755 index 9eebd65..0000000 Binary files a/attachments/docker-ce-ee-lifecycle.png and /dev/null differ diff --git a/attachments/docker-con-15-logo.svg b/attachments/docker-con-15-logo.svg deleted file mode 100755 index 82df270..0000000 --- a/attachments/docker-con-15-logo.svg +++ /dev/null @@ -1,213 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/attachments/docker-ecosystem-2015.png b/attachments/docker-ecosystem-2015.png deleted file mode 100755 index 00210c1..0000000 Binary files a/attachments/docker-ecosystem-2015.png and /dev/null differ diff --git a/attachments/docker-engine-architecture.svg b/attachments/docker-engine-architecture.svg deleted file mode 100755 index afe563a..0000000 --- a/attachments/docker-engine-architecture.svg +++ /dev/null @@ -1,2597 +0,0 @@ - - - - - 2014-04-15 00:37Z - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/attachments/docker-service-create.svg b/attachments/docker-service-create.svg deleted file mode 100755 index b384626..0000000 --- a/attachments/docker-service-create.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/attachments/dockercoins-2015.png b/attachments/dockercoins-2015.png deleted file mode 100755 index 923c095..0000000 Binary files a/attachments/dockercoins-2015.png and /dev/null differ diff --git a/attachments/dockercoins-diagram.svg b/attachments/dockercoins-diagram.svg deleted file mode 100755 index ac05fe9..0000000 --- a/attachments/dockercoins-diagram.svg +++ /dev/null @@ -1,2 +0,0 @@ - -
worker
worker
rng
rng
hasher
hasher
redis
redis
webui
webui
GET
GET
POST
POST
TCP
TCP
TCP
TCP
user
user
GET
GET
DockerCoins application
(five containers)
DockerCoins application<br>(five containers)<br>
\ No newline at end of file diff --git a/attachments/dockercoins-diagram.xml b/attachments/dockercoins-diagram.xml deleted file mode 100755 index c2c3893..0000000 --- a/attachments/dockercoins-diagram.xml +++ /dev/null @@ -1 +0,0 @@ -7Vnfb9sgEP5rLG0vlQ3+kTyuabs9bFq1Vtr2iO2LjUqMhfGS9q8fxNgxJtXSqmmmrVEeuAMOuO874LCHF6vNR0Hq8gvPgXnIzzcevvAQCkKEPP338/tOk0RxpygEzU2jneKGPoBR+kbb0hwaq6HknEla28qMVxVk0tIRIfjabrbkzB61JgU4ipuMMFf7neay7LSzyN/pPwEtyn7kwDc1KcnuCsHbyoznIbzc/rrqFeltmfZNSXK+HqnwpYcXgnPZlVabBTDt295tXb+rR2qHeQuo5EEdDC6/CGuhn/J2YvK+d8Z2OaA7+B4+X5dUwk1NMl27VvArXSlXTEmBKi4pYwvOuNj2xTmB2TJT+kYKfgejmjibQbpUNQUjTWOMZ3xFM1MeXKOFJa/kFVlRpgn1jadccj0uF/RB1ZBhdCUYNqFQyWZxICRsHvVQMPhd8Rn4CqS4V01Mhx4pw+RgZuT1jhdJrytHnMC9khguFoPpHR6qYCDZDw920FlzcQfCwUg5q9bFrE3hzyClHaKf00Ex0PZrKxmtYOTPZ7h9QgJFf5TtJUEep7HaGvqaPtbwS0FnY4cSF7sA7cHuJaALHehEVbzhdghusQ1bGL4ibJEDW0ma8i3iDow4dELo3KNMQE6bN+QOQW44rk6BXOIec5C29A25g3ZLfELk+gv7CLqPl7cOcFDlH/S1XGOnr3v6kmfdGp+MQDBz/KkxYSQFdj7gPALh6mqhfoPLIXcygInD1QJ4KzKwbmKSiALk6IR3YRm5Pdrj9V4ngBFJf9mT2AeFGeGaUzW9AfVgutPO57aJbvKm1zgDmBpKpvSZGOqW7BjaMmNY9mFkCRyyXH+9+U/YEp2SLWge2SDHk9g/lC04nBgKJoZekC3IYcvt4vr/IEt8UrIk4VniR7MZwhGahz0OBnH8XOqgWXSmzQVJHG7N20SKjkckN4v+N4mU/GVEwoF9tDybOuEkkT8mWdy8vW32pH+qF60b6N6puksp421+wAPZyV6ykokX4+g1L4puYn3SiyJsqPyhyv5ZL/3cSoGRrkFQtUjQkekfM2h7wo2jNjll1E5eX5LnBm0wif7kiFcFN4G84NkdiIUy3ugh65rRTHmFVx6KmdTJoIrpuNCld0vtrO3HBElUViia924jh6kqDqXNTTvvq7jOL60k0agIo0WlRAZLbUHHtJob+2DUkteP7CL2Q7z1Pv7YI/oRtpFJuhnM3V0kjvfQM8BP30aUuPsW0hFj98EJX/4G \ No newline at end of file diff --git a/attachments/dockercoins-multi-node.png b/attachments/dockercoins-multi-node.png deleted file mode 100755 index da8755e..0000000 Binary files a/attachments/dockercoins-multi-node.png and /dev/null differ diff --git a/attachments/dockercoins-single-node.png b/attachments/dockercoins-single-node.png deleted file mode 100755 index 4542b91..0000000 Binary files a/attachments/dockercoins-single-node.png and /dev/null differ diff --git a/attachments/dockercoins.png b/attachments/dockercoins.png deleted file mode 100755 index 01f240a..0000000 Binary files a/attachments/dockercoins.png and /dev/null differ diff --git a/attachments/dockerd-and-containerd.png b/attachments/dockerd-and-containerd.png deleted file mode 100755 index 74e3487..0000000 Binary files a/attachments/dockerd-and-containerd.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/main.png b/attachments/domain-driven-design-model-driven-design/main.png deleted file mode 100644 index 40588b6..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/main.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/stage0.png b/attachments/domain-driven-design-model-driven-design/stage0.png deleted file mode 100644 index a1cea02..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/stage0.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/stage1.png b/attachments/domain-driven-design-model-driven-design/stage1.png deleted file mode 100644 index fbc71a2..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/stage1.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/stage2.png b/attachments/domain-driven-design-model-driven-design/stage2.png deleted file mode 100644 index 9f5bf2a..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/stage2.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/stage3.png b/attachments/domain-driven-design-model-driven-design/stage3.png deleted file mode 100644 index b78d537..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/stage3.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/stage4.png b/attachments/domain-driven-design-model-driven-design/stage4.png deleted file mode 100644 index 277542d..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/stage4.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/stage5.png b/attachments/domain-driven-design-model-driven-design/stage5.png deleted file mode 100644 index 76d3f78..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/stage5.png and /dev/null differ diff --git a/attachments/domain-driven-design-model-driven-design/stage6.png b/attachments/domain-driven-design-model-driven-design/stage6.png deleted file mode 100644 index 69d797c..0000000 Binary files a/attachments/domain-driven-design-model-driven-design/stage6.png and /dev/null differ diff --git a/attachments/download.jpg b/attachments/download.jpg deleted file mode 100644 index f24cb13..0000000 Binary files a/attachments/download.jpg and /dev/null differ diff --git a/attachments/download.png b/attachments/download.png deleted file mode 100644 index 004be73..0000000 Binary files a/attachments/download.png and /dev/null differ diff --git a/attachments/dragons.jpg b/attachments/dragons.jpg deleted file mode 100755 index 1e6dec5..0000000 Binary files a/attachments/dragons.jpg and /dev/null differ diff --git a/attachments/dsglogo.png b/attachments/dsglogo.png deleted file mode 100644 index bfe7505..0000000 Binary files a/attachments/dsglogo.png and /dev/null differ diff --git a/attachments/dwvsdl.png b/attachments/dwvsdl.png deleted file mode 100644 index ad58843..0000000 Binary files a/attachments/dwvsdl.png and /dev/null differ diff --git a/attachments/edge.png b/attachments/edge.png deleted file mode 100644 index 0c2e24e..0000000 Binary files a/attachments/edge.png and /dev/null differ diff --git a/attachments/emc_understanding_data_lakes_infographic.pdf b/attachments/emc_understanding_data_lakes_infographic.pdf deleted file mode 100644 index 77d71dc..0000000 Binary files a/attachments/emc_understanding_data_lakes_infographic.pdf and /dev/null differ diff --git a/attachments/end.jpg b/attachments/end.jpg deleted file mode 100755 index 9e4e424..0000000 Binary files a/attachments/end.jpg and /dev/null differ diff --git a/attachments/entities.png b/attachments/entities.png deleted file mode 100644 index 14d03b4..0000000 Binary files a/attachments/entities.png and /dev/null differ diff --git a/attachments/entrypoint.jpg b/attachments/entrypoint.jpg deleted file mode 100755 index 0d10360..0000000 Binary files a/attachments/entrypoint.jpg and /dev/null differ diff --git a/attachments/equations.png b/attachments/equations.png deleted file mode 100755 index a046ccf..0000000 Binary files a/attachments/equations.png and /dev/null differ diff --git a/attachments/er-syntax-summary-1.png b/attachments/er-syntax-summary-1.png deleted file mode 100644 index 7bca291..0000000 Binary files a/attachments/er-syntax-summary-1.png and /dev/null differ diff --git a/attachments/er-syntax-summary-2.png b/attachments/er-syntax-summary-2.png deleted file mode 100644 index aa0e934..0000000 Binary files a/attachments/er-syntax-summary-2.png and /dev/null differ diff --git a/attachments/exahustiveindexing.png b/attachments/exahustiveindexing.png deleted file mode 100755 index aac6d1e..0000000 Binary files a/attachments/exahustiveindexing.png and /dev/null differ diff --git a/attachments/extra-details.png b/attachments/extra-details.png deleted file mode 100755 index 9b13850..0000000 Binary files a/attachments/extra-details.png and /dev/null differ diff --git a/attachments/firewall_rules.png b/attachments/firewall_rules.png deleted file mode 100644 index 2b5e6e4..0000000 Binary files a/attachments/firewall_rules.png and /dev/null differ diff --git a/attachments/fu-face.jpg b/attachments/fu-face.jpg deleted file mode 100755 index f2c62a0..0000000 Binary files a/attachments/fu-face.jpg and /dev/null differ diff --git a/attachments/gcoreexample1.png b/attachments/gcoreexample1.png deleted file mode 100755 index c4ffb0a..0000000 Binary files a/attachments/gcoreexample1.png and /dev/null differ diff --git a/attachments/gcoreexample2.png b/attachments/gcoreexample2.png deleted file mode 100755 index b47301c..0000000 Binary files a/attachments/gcoreexample2.png and /dev/null differ diff --git a/attachments/gcoreexample3.png b/attachments/gcoreexample3.png deleted file mode 100755 index 7b1ef5f..0000000 Binary files a/attachments/gcoreexample3.png and /dev/null differ diff --git a/attachments/gcoreexample4.png b/attachments/gcoreexample4.png deleted file mode 100755 index de624e4..0000000 Binary files a/attachments/gcoreexample4.png and /dev/null differ diff --git a/attachments/gcoreexample5.png b/attachments/gcoreexample5.png deleted file mode 100755 index fbbf06a..0000000 Binary files a/attachments/gcoreexample5.png and /dev/null differ diff --git a/attachments/gcoreexample6.png b/attachments/gcoreexample6.png deleted file mode 100755 index d7bf810..0000000 Binary files a/attachments/gcoreexample6.png and /dev/null differ diff --git a/attachments/getting-inside.png b/attachments/getting-inside.png deleted file mode 100755 index b1ac54d..0000000 Binary files a/attachments/getting-inside.png and /dev/null differ diff --git a/attachments/giphy-2773.gif b/attachments/giphy-2773.gif deleted file mode 100644 index e76abb2..0000000 Binary files a/attachments/giphy-2773.gif and /dev/null differ diff --git a/attachments/girapharchit.png b/attachments/girapharchit.png deleted file mode 100755 index 29e84a5..0000000 Binary files a/attachments/girapharchit.png and /dev/null differ diff --git a/attachments/girapheffanddeff.png b/attachments/girapheffanddeff.png deleted file mode 100755 index 8f5471f..0000000 Binary files a/attachments/girapheffanddeff.png and /dev/null differ diff --git a/attachments/giraphexamplemaxval.png b/attachments/giraphexamplemaxval.png deleted file mode 100755 index 7305e82..0000000 Binary files a/attachments/giraphexamplemaxval.png and /dev/null differ diff --git a/attachments/giraphimplementationexample.png b/attachments/giraphimplementationexample.png deleted file mode 100755 index 96db8ea..0000000 Binary files a/attachments/giraphimplementationexample.png and /dev/null differ diff --git a/attachments/giraphimplementationexample2.png b/attachments/giraphimplementationexample2.png deleted file mode 100755 index 4c20992..0000000 Binary files a/attachments/giraphimplementationexample2.png and /dev/null differ diff --git a/attachments/giraphjoblifetime.png b/attachments/giraphjoblifetime.png deleted file mode 100755 index 641e6f5..0000000 Binary files a/attachments/giraphjoblifetime.png and /dev/null differ diff --git a/attachments/giraphtimeline.png b/attachments/giraphtimeline.png deleted file mode 100755 index 3df10da..0000000 Binary files a/attachments/giraphtimeline.png and /dev/null differ diff --git a/attachments/giraphwithhadoop.png b/attachments/giraphwithhadoop.png deleted file mode 100755 index 15e7da3..0000000 Binary files a/attachments/giraphwithhadoop.png and /dev/null differ diff --git a/attachments/giraphworking.png b/attachments/giraphworking.png deleted file mode 100755 index 667d086..0000000 Binary files a/attachments/giraphworking.png and /dev/null differ diff --git a/attachments/giraphworking2.png b/attachments/giraphworking2.png deleted file mode 100755 index 8bc419c..0000000 Binary files a/attachments/giraphworking2.png and /dev/null differ diff --git a/attachments/google-dataeng.png b/attachments/google-dataeng.png deleted file mode 100644 index d899f6d..0000000 Binary files a/attachments/google-dataeng.png and /dev/null differ diff --git a/attachments/gradooplogo.png b/attachments/gradooplogo.png deleted file mode 100755 index 9840af1..0000000 Binary files a/attachments/gradooplogo.png and /dev/null differ diff --git a/attachments/gradoopoperators.png b/attachments/gradoopoperators.png deleted file mode 100755 index f6d6531..0000000 Binary files a/attachments/gradoopoperators.png and /dev/null differ diff --git a/attachments/gradoopworking.png b/attachments/gradoopworking.png deleted file mode 100755 index 2e65d62..0000000 Binary files a/attachments/gradoopworking.png and /dev/null differ diff --git a/attachments/gradoopworking2.png b/attachments/gradoopworking2.png deleted file mode 100755 index f3f3588..0000000 Binary files a/attachments/gradoopworking2.png and /dev/null differ diff --git a/attachments/grafana-add-graph.png b/attachments/grafana-add-graph.png deleted file mode 100755 index 759a798..0000000 Binary files a/attachments/grafana-add-graph.png and /dev/null differ diff --git a/attachments/grafana-add-source.png b/attachments/grafana-add-source.png deleted file mode 100755 index 2295752..0000000 Binary files a/attachments/grafana-add-source.png and /dev/null differ diff --git a/attachments/graph-databases-10-638o.jpg b/attachments/graph-databases-10-638o.jpg deleted file mode 100755 index ffcc4fe..0000000 Binary files a/attachments/graph-databases-10-638o.jpg and /dev/null differ diff --git a/attachments/graph0.png b/attachments/graph0.png deleted file mode 100644 index ddb6a00..0000000 Binary files a/attachments/graph0.png and /dev/null differ diff --git a/attachments/graph2.png b/attachments/graph2.png deleted file mode 100644 index 94a5a25..0000000 Binary files a/attachments/graph2.png and /dev/null differ diff --git a/attachments/graph3.png b/attachments/graph3.png deleted file mode 100644 index c15e13c..0000000 Binary files a/attachments/graph3.png and /dev/null differ diff --git a/attachments/graph5.png b/attachments/graph5.png deleted file mode 100644 index 1435c8f..0000000 Binary files a/attachments/graph5.png and /dev/null differ diff --git a/attachments/graph5b.png b/attachments/graph5b.png deleted file mode 100644 index df39ac8..0000000 Binary files a/attachments/graph5b.png and /dev/null differ diff --git a/attachments/graphDBexample.png b/attachments/graphDBexample.png deleted file mode 100755 index 1717a92..0000000 Binary files a/attachments/graphDBexample.png and /dev/null differ diff --git a/attachments/graphDatamodels.jpg b/attachments/graphDatamodels.jpg deleted file mode 100755 index 0ba9465..0000000 Binary files a/attachments/graphDatamodels.jpg and /dev/null differ diff --git a/attachments/graph_big-data-number of v's over time.webp b/attachments/graph_big-data-number of v's over time.webp deleted file mode 100644 index 61e496f..0000000 Binary files a/attachments/graph_big-data-number of v's over time.webp and /dev/null differ diff --git a/attachments/graphanalytic.png b/attachments/graphanalytic.png deleted file mode 100755 index 85d4b7d..0000000 Binary files a/attachments/graphanalytic.png and /dev/null differ diff --git a/attachments/graphapplications.png b/attachments/graphapplications.png deleted file mode 100755 index 45dac66..0000000 Binary files a/attachments/graphapplications.png and /dev/null differ diff --git a/attachments/graphcomputeengine.png b/attachments/graphcomputeengine.png deleted file mode 100755 index efc9ece..0000000 Binary files a/attachments/graphcomputeengine.png and /dev/null differ diff --git a/attachments/graphcomputeengines.png b/attachments/graphcomputeengines.png deleted file mode 100755 index 3846fd1..0000000 Binary files a/attachments/graphcomputeengines.png and /dev/null differ diff --git a/attachments/graphdatabases.jpg b/attachments/graphdatabases.jpg deleted file mode 100755 index 51e253b..0000000 Binary files a/attachments/graphdatabases.jpg and /dev/null differ diff --git a/attachments/graphdbsRelations.jpg b/attachments/graphdbsRelations.jpg deleted file mode 100755 index e8f5d85..0000000 Binary files a/attachments/graphdbsRelations.jpg and /dev/null differ diff --git a/attachments/graphdbspopular.jpg b/attachments/graphdbspopular.jpg deleted file mode 100755 index 076ea0a..0000000 Binary files a/attachments/graphdbspopular.jpg and /dev/null differ diff --git a/attachments/graphemergingtechnologies.png b/attachments/graphemergingtechnologies.png deleted file mode 100755 index 8ded9dc..0000000 Binary files a/attachments/graphemergingtechnologies.png and /dev/null differ diff --git a/attachments/graphexamplecc.png b/attachments/graphexamplecc.png deleted file mode 100755 index 0d9398c..0000000 Binary files a/attachments/graphexamplecc.png and /dev/null differ diff --git a/attachments/graphframes(2).png b/attachments/graphframes(2).png deleted file mode 100755 index b1dc419..0000000 Binary files a/attachments/graphframes(2).png and /dev/null differ diff --git a/attachments/graphframes.png b/attachments/graphframes.png deleted file mode 100755 index 426ae19..0000000 Binary files a/attachments/graphframes.png and /dev/null differ diff --git a/attachments/graphg.png b/attachments/graphg.png deleted file mode 100755 index dfaf792..0000000 Binary files a/attachments/graphg.png and /dev/null differ diff --git a/attachments/graphlablogo.png b/attachments/graphlablogo.png deleted file mode 100755 index 3ef777f..0000000 Binary files a/attachments/graphlablogo.png and /dev/null differ diff --git a/attachments/graphlabvertices.png b/attachments/graphlabvertices.png deleted file mode 100755 index 038530e..0000000 Binary files a/attachments/graphlabvertices.png and /dev/null differ diff --git a/attachments/graphlabworking.jpg b/attachments/graphlabworking.jpg deleted file mode 100755 index 92cafc0..0000000 Binary files a/attachments/graphlabworking.jpg and /dev/null differ diff --git a/attachments/graphpartioningviews.png b/attachments/graphpartioningviews.png deleted file mode 100755 index 821aa6c..0000000 Binary files a/attachments/graphpartioningviews.png and /dev/null differ diff --git a/attachments/graphprocessingarchitectures.png b/attachments/graphprocessingarchitectures.png deleted file mode 100755 index d6453a3..0000000 Binary files a/attachments/graphprocessingarchitectures.png and /dev/null differ diff --git a/attachments/graphprocessingcategories.png b/attachments/graphprocessingcategories.png deleted file mode 100755 index 113c3b9..0000000 Binary files a/attachments/graphprocessingcategories.png and /dev/null differ diff --git a/attachments/graphprocessingstates.png b/attachments/graphprocessingstates.png deleted file mode 100755 index 5dd4736..0000000 Binary files a/attachments/graphprocessingstates.png and /dev/null differ diff --git a/attachments/graphprogrammingmodels.png b/attachments/graphprogrammingmodels.png deleted file mode 100755 index 17054b6..0000000 Binary files a/attachments/graphprogrammingmodels.png and /dev/null differ diff --git a/attachments/graphql.png b/attachments/graphql.png deleted file mode 100755 index dcff1c6..0000000 Binary files a/attachments/graphql.png and /dev/null differ diff --git a/attachments/graphqlscompar.png b/attachments/graphqlscompar.png deleted file mode 100755 index bb495f7..0000000 Binary files a/attachments/graphqlscompar.png and /dev/null differ diff --git a/attachments/graphquerylangsexamples.jpg b/attachments/graphquerylangsexamples.jpg deleted file mode 100755 index 2cbc349..0000000 Binary files a/attachments/graphquerylangsexamples.jpg and /dev/null differ diff --git a/attachments/graphrealworldapps.png b/attachments/graphrealworldapps.png deleted file mode 100755 index 20f1608..0000000 Binary files a/attachments/graphrealworldapps.png and /dev/null differ diff --git a/attachments/graphs-graphs-everywhere.jpg b/attachments/graphs-graphs-everywhere.jpg deleted file mode 100755 index 4914d24..0000000 Binary files a/attachments/graphs-graphs-everywhere.jpg and /dev/null differ diff --git a/attachments/graphs.png b/attachments/graphs.png deleted file mode 100755 index f49e0c8..0000000 Binary files a/attachments/graphs.png and /dev/null differ diff --git a/attachments/graphsEverywher.png b/attachments/graphsEverywher.png deleted file mode 100755 index 05cec60..0000000 Binary files a/attachments/graphsEverywher.png and /dev/null differ diff --git a/attachments/graphsarecomplex.png b/attachments/graphsarecomplex.png deleted file mode 100755 index 56b95fb..0000000 Binary files a/attachments/graphsarecomplex.png and /dev/null differ diff --git a/attachments/graphscomplex.png b/attachments/graphscomplex.png deleted file mode 100755 index b377156..0000000 Binary files a/attachments/graphscomplex.png and /dev/null differ diff --git a/attachments/graphtree.png b/attachments/graphtree.png deleted file mode 100644 index ed5c75d..0000000 Binary files a/attachments/graphtree.png and /dev/null differ diff --git a/attachments/graphxStages.png b/attachments/graphxStages.png deleted file mode 100755 index c311c79..0000000 Binary files a/attachments/graphxStages.png and /dev/null differ diff --git a/attachments/graphxdataviews.png b/attachments/graphxdataviews.png deleted file mode 100755 index a174a6d..0000000 Binary files a/attachments/graphxdataviews.png and /dev/null differ diff --git a/attachments/graphxjoins.png b/attachments/graphxjoins.png deleted file mode 100755 index c43da48..0000000 Binary files a/attachments/graphxjoins.png and /dev/null differ diff --git a/attachments/graphxoptimizations.png b/attachments/graphxoptimizations.png deleted file mode 100755 index f82a244..0000000 Binary files a/attachments/graphxoptimizations.png and /dev/null differ diff --git a/attachments/grayareahv.png b/attachments/grayareahv.png deleted file mode 100644 index 5ae07db..0000000 Binary files a/attachments/grayareahv.png and /dev/null differ diff --git a/attachments/gremlin-logo.png b/attachments/gremlin-logo.png deleted file mode 100755 index afd4b30..0000000 Binary files a/attachments/gremlin-logo.png and /dev/null differ diff --git a/attachments/gremlin-running.png b/attachments/gremlin-running.png deleted file mode 100755 index fbc2773..0000000 Binary files a/attachments/gremlin-running.png and /dev/null differ diff --git a/attachments/gremlinexamples.jpg b/attachments/gremlinexamples.jpg deleted file mode 100755 index 14e8e5a..0000000 Binary files a/attachments/gremlinexamples.jpg and /dev/null differ diff --git a/attachments/gremlinngpsexample1.png b/attachments/gremlinngpsexample1.png deleted file mode 100755 index 8605879..0000000 Binary files a/attachments/gremlinngpsexample1.png and /dev/null differ diff --git a/attachments/gremlinngpsexample2.png b/attachments/gremlinngpsexample2.png deleted file mode 100755 index 35b87fd..0000000 Binary files a/attachments/gremlinngpsexample2.png and /dev/null differ diff --git a/attachments/gremlinngpsexample3.png b/attachments/gremlinngpsexample3.png deleted file mode 100755 index 358a4d9..0000000 Binary files a/attachments/gremlinngpsexample3.png and /dev/null differ diff --git a/attachments/greyarea2.png b/attachments/greyarea2.png deleted file mode 100644 index de9a788..0000000 Binary files a/attachments/greyarea2.png and /dev/null differ diff --git a/attachments/hadoop-mapreduce-big.png b/attachments/hadoop-mapreduce-big.png deleted file mode 100755 index fe299a3..0000000 Binary files a/attachments/hadoop-mapreduce-big.png and /dev/null differ diff --git a/attachments/hadoop-mapreduceproscons.png b/attachments/hadoop-mapreduceproscons.png deleted file mode 100755 index 897cd61..0000000 Binary files a/attachments/hadoop-mapreduceproscons.png and /dev/null differ diff --git a/attachments/hadoopsparql.png b/attachments/hadoopsparql.png deleted file mode 100755 index 332279f..0000000 Binary files a/attachments/hadoopsparql.png and /dev/null differ diff --git a/attachments/hadoopwithgrahs.jpg b/attachments/hadoopwithgrahs.jpg deleted file mode 100755 index 0d237de..0000000 Binary files a/attachments/hadoopwithgrahs.jpg and /dev/null differ diff --git a/attachments/hadoopwithgraphs.png b/attachments/hadoopwithgraphs.png deleted file mode 100755 index 16bbb20..0000000 Binary files a/attachments/hadoopwithgraphs.png and /dev/null differ diff --git a/attachments/hadoopwithgraphs2.JPG b/attachments/hadoopwithgraphs2.JPG deleted file mode 100755 index ae385de..0000000 Binary files a/attachments/hadoopwithgraphs2.JPG and /dev/null differ diff --git a/attachments/hbase-36.png b/attachments/hbase-36.png deleted file mode 100644 index 2724101..0000000 Binary files a/attachments/hbase-36.png and /dev/null differ diff --git a/attachments/hbase-37.png b/attachments/hbase-37.png deleted file mode 100644 index 31e9f3e..0000000 Binary files a/attachments/hbase-37.png and /dev/null differ diff --git a/attachments/hbase-38.png b/attachments/hbase-38.png deleted file mode 100644 index 896a65b..0000000 Binary files a/attachments/hbase-38.png and /dev/null differ diff --git a/attachments/hbase-39.png b/attachments/hbase-39.png deleted file mode 100644 index 2920c4a..0000000 Binary files a/attachments/hbase-39.png and /dev/null differ diff --git a/attachments/hbase-40.png b/attachments/hbase-40.png deleted file mode 100644 index 2573313..0000000 Binary files a/attachments/hbase-40.png and /dev/null differ diff --git a/attachments/heroku-first-homepage.png b/attachments/heroku-first-homepage.png deleted file mode 100755 index 519bf2d..0000000 Binary files a/attachments/heroku-first-homepage.png and /dev/null differ diff --git a/attachments/hexastores.png b/attachments/hexastores.png deleted file mode 100755 index 4f3d984..0000000 Binary files a/attachments/hexastores.png and /dev/null differ diff --git a/attachments/historyofdatamodels.png b/attachments/historyofdatamodels.png deleted file mode 100644 index 13011c7..0000000 Binary files a/attachments/historyofdatamodels.png and /dev/null differ diff --git a/attachments/horizontalscalability.png b/attachments/horizontalscalability.png deleted file mode 100644 index f6796c0..0000000 Binary files a/attachments/horizontalscalability.png and /dev/null differ diff --git a/attachments/https___miro.medium.png b/attachments/https___miro.medium.png deleted file mode 100644 index 70aef42..0000000 Binary files a/attachments/https___miro.medium.png and /dev/null differ diff --git a/attachments/icdt20b.pdf b/attachments/icdt20b.pdf deleted file mode 100644 index d764083..0000000 Binary files a/attachments/icdt20b.pdf and /dev/null differ diff --git a/attachments/image3.jpeg b/attachments/image3.jpeg deleted file mode 100644 index 4971aa1..0000000 Binary files a/attachments/image3.jpeg and /dev/null differ diff --git a/attachments/image89.png b/attachments/image89.png deleted file mode 100644 index 5179048..0000000 Binary files a/attachments/image89.png and /dev/null differ diff --git a/attachments/img0000.png b/attachments/img0000.png deleted file mode 100755 index 37f8b26..0000000 Binary files a/attachments/img0000.png and /dev/null differ diff --git a/attachments/img0001.png b/attachments/img0001.png deleted file mode 100755 index 0878112..0000000 Binary files a/attachments/img0001.png and /dev/null differ diff --git a/attachments/img0003.png b/attachments/img0003.png deleted file mode 100755 index bb3ac71..0000000 Binary files a/attachments/img0003.png and /dev/null differ diff --git a/attachments/img0005.png b/attachments/img0005.png deleted file mode 100755 index a3e8237..0000000 Binary files a/attachments/img0005.png and /dev/null differ diff --git a/attachments/img0006.png b/attachments/img0006.png deleted file mode 100755 index 7b29147..0000000 Binary files a/attachments/img0006.png and /dev/null differ diff --git a/attachments/img0007.png b/attachments/img0007.png deleted file mode 100755 index 4c79464..0000000 Binary files a/attachments/img0007.png and /dev/null differ diff --git a/attachments/img0008.png b/attachments/img0008.png deleted file mode 100755 index 993f7da..0000000 Binary files a/attachments/img0008.png and /dev/null differ diff --git a/attachments/img0009.png b/attachments/img0009.png deleted file mode 100755 index 077d15b..0000000 Binary files a/attachments/img0009.png and /dev/null differ diff --git a/attachments/img0010.png b/attachments/img0010.png deleted file mode 100755 index d496a8b..0000000 Binary files a/attachments/img0010.png and /dev/null differ diff --git a/attachments/img0022.png b/attachments/img0022.png deleted file mode 100755 index 22315d1..0000000 Binary files a/attachments/img0022.png and /dev/null differ diff --git a/attachments/img0023.png b/attachments/img0023.png deleted file mode 100755 index f3a2a1d..0000000 Binary files a/attachments/img0023.png and /dev/null differ diff --git a/attachments/img0024.png b/attachments/img0024.png deleted file mode 100755 index ad1260f..0000000 Binary files a/attachments/img0024.png and /dev/null differ diff --git a/attachments/img0025.png b/attachments/img0025.png deleted file mode 100755 index 5f6cf0d..0000000 Binary files a/attachments/img0025.png and /dev/null differ diff --git a/attachments/img0026.png b/attachments/img0026.png deleted file mode 100755 index 8ece20b..0000000 Binary files a/attachments/img0026.png and /dev/null differ diff --git a/attachments/img0034.png b/attachments/img0034.png deleted file mode 100755 index adba391..0000000 Binary files a/attachments/img0034.png and /dev/null differ diff --git a/attachments/img0036.png b/attachments/img0036.png deleted file mode 100755 index 9967274..0000000 Binary files a/attachments/img0036.png and /dev/null differ diff --git a/attachments/img0037.png b/attachments/img0037.png deleted file mode 100755 index 58e555d..0000000 Binary files a/attachments/img0037.png and /dev/null differ diff --git a/attachments/inapplicability.png b/attachments/inapplicability.png deleted file mode 100644 index 980feff..0000000 Binary files a/attachments/inapplicability.png and /dev/null differ diff --git a/attachments/info.png b/attachments/info.png deleted file mode 100755 index bbd00be..0000000 Binary files a/attachments/info.png and /dev/null differ diff --git a/attachments/ingestion.png b/attachments/ingestion.png deleted file mode 100644 index 31c00ed..0000000 Binary files a/attachments/ingestion.png and /dev/null differ diff --git a/attachments/ingress-lb.png b/attachments/ingress-lb.png deleted file mode 100755 index 90b7505..0000000 Binary files a/attachments/ingress-lb.png and /dev/null differ diff --git a/attachments/ingress-routing-mesh.png b/attachments/ingress-routing-mesh.png deleted file mode 100755 index 55ad8e1..0000000 Binary files a/attachments/ingress-routing-mesh.png and /dev/null differ diff --git a/attachments/intersectionvisual.png b/attachments/intersectionvisual.png deleted file mode 100644 index 2f47861..0000000 Binary files a/attachments/intersectionvisual.png and /dev/null differ diff --git a/attachments/iot.png b/attachments/iot.png deleted file mode 100755 index e1bf257..0000000 Binary files a/attachments/iot.png and /dev/null differ diff --git a/attachments/k8s-arch1.png b/attachments/k8s-arch1.png deleted file mode 100755 index 6dfa093..0000000 Binary files a/attachments/k8s-arch1.png and /dev/null differ diff --git a/attachments/k8s-arch2.png b/attachments/k8s-arch2.png deleted file mode 100755 index 6bb3847..0000000 Binary files a/attachments/k8s-arch2.png and /dev/null differ diff --git a/attachments/k8s-arch3-thanks-weave.png b/attachments/k8s-arch3-thanks-weave.png deleted file mode 100755 index 1487e60..0000000 Binary files a/attachments/k8s-arch3-thanks-weave.png and /dev/null differ diff --git a/attachments/k8s-arch4-thanks-luxas.png b/attachments/k8s-arch4-thanks-luxas.png deleted file mode 100755 index 6516954..0000000 Binary files a/attachments/k8s-arch4-thanks-luxas.png and /dev/null differ diff --git a/attachments/kafka.png b/attachments/kafka.png deleted file mode 100644 index b8d7470..0000000 Binary files a/attachments/kafka.png and /dev/null differ diff --git a/attachments/kafkaconcept.png b/attachments/kafkaconcept.png deleted file mode 100644 index 997699c..0000000 Binary files a/attachments/kafkaconcept.png and /dev/null differ diff --git a/attachments/kafkacvexample.png b/attachments/kafkacvexample.png deleted file mode 100644 index af095a3..0000000 Binary files a/attachments/kafkacvexample.png and /dev/null differ diff --git a/attachments/kafkalogic.pdf b/attachments/kafkalogic.pdf deleted file mode 100644 index a279023..0000000 Binary files a/attachments/kafkalogic.pdf and /dev/null differ diff --git a/attachments/kafkalogic.png b/attachments/kafkalogic.png deleted file mode 100644 index 8826a15..0000000 Binary files a/attachments/kafkalogic.png and /dev/null differ diff --git a/attachments/kafkaphysic.png b/attachments/kafkaphysic.png deleted file mode 100644 index 842e84f..0000000 Binary files a/attachments/kafkaphysic.png and /dev/null differ diff --git a/attachments/kaju-kube-master.png b/attachments/kaju-kube-master.png deleted file mode 100644 index fe69bff..0000000 Binary files a/attachments/kaju-kube-master.png and /dev/null differ diff --git a/attachments/keyboard.png b/attachments/keyboard.png deleted file mode 100755 index d09c9a2..0000000 Binary files a/attachments/keyboard.png and /dev/null differ diff --git a/attachments/keystable.png b/attachments/keystable.png deleted file mode 100644 index d3034e1..0000000 Binary files a/attachments/keystable.png and /dev/null differ diff --git a/attachments/kibana.png b/attachments/kibana.png deleted file mode 100755 index f60c121..0000000 Binary files a/attachments/kibana.png and /dev/null differ diff --git a/attachments/kstreamktable.pdf b/attachments/kstreamktable.pdf deleted file mode 100644 index 6640315..0000000 Binary files a/attachments/kstreamktable.pdf and /dev/null differ diff --git a/attachments/kubectl-create-deployment-slideshow/01.svg b/attachments/kubectl-create-deployment-slideshow/01.svg deleted file mode 100755 index 715344f..0000000 --- a/attachments/kubectl-create-deployment-slideshow/01.svg +++ /dev/null @@ -1,1203 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/02.svg b/attachments/kubectl-create-deployment-slideshow/02.svg deleted file mode 100755 index e2e47b4..0000000 --- a/attachments/kubectl-create-deployment-slideshow/02.svg +++ /dev/null @@ -1,1193 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/03.svg b/attachments/kubectl-create-deployment-slideshow/03.svg deleted file mode 100755 index 5aed19c..0000000 --- a/attachments/kubectl-create-deployment-slideshow/03.svg +++ /dev/null @@ -1,1203 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ kubectl create \ deployment web \ --image=nginx - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/04.svg b/attachments/kubectl-create-deployment-slideshow/04.svg deleted file mode 100755 index 1b247d7..0000000 --- a/attachments/kubectl-create-deployment-slideshow/04.svg +++ /dev/null @@ -1,1207 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ kubectl create \ deployment web \ --image=nginx - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/05.svg b/attachments/kubectl-create-deployment-slideshow/05.svg deleted file mode 100755 index 6696d3e..0000000 --- a/attachments/kubectl-create-deployment-slideshow/05.svg +++ /dev/null @@ -1,1234 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ kubectl create \ deployment web \ --image=nginxdeployment.apps/webcreated$ - 😎👍 - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/06.svg b/attachments/kubectl-create-deployment-slideshow/06.svg deleted file mode 100755 index f20cc90..0000000 --- a/attachments/kubectl-create-deployment-slideshow/06.svg +++ /dev/null @@ -1,1209 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/07.svg b/attachments/kubectl-create-deployment-slideshow/07.svg deleted file mode 100755 index 587a8b4..0000000 --- a/attachments/kubectl-create-deployment-slideshow/07.svg +++ /dev/null @@ -1,1209 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/08.svg b/attachments/kubectl-create-deployment-slideshow/08.svg deleted file mode 100755 index a6202c3..0000000 --- a/attachments/kubectl-create-deployment-slideshow/08.svg +++ /dev/null @@ -1,1209 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/09.svg b/attachments/kubectl-create-deployment-slideshow/09.svg deleted file mode 100755 index d7df5e4..0000000 --- a/attachments/kubectl-create-deployment-slideshow/09.svg +++ /dev/null @@ -1,1209 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/10.svg b/attachments/kubectl-create-deployment-slideshow/10.svg deleted file mode 100755 index a63c856..0000000 --- a/attachments/kubectl-create-deployment-slideshow/10.svg +++ /dev/null @@ -1,1209 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/11.svg b/attachments/kubectl-create-deployment-slideshow/11.svg deleted file mode 100755 index 1d61190..0000000 --- a/attachments/kubectl-create-deployment-slideshow/11.svg +++ /dev/null @@ -1,1137 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - PENDING - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/12.svg b/attachments/kubectl-create-deployment-slideshow/12.svg deleted file mode 100755 index b3e5019..0000000 --- a/attachments/kubectl-create-deployment-slideshow/12.svg +++ /dev/null @@ -1,1137 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - API server - - controllermanager - - scheduler - - etcd - - kubelet - - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - PENDING - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/13.svg b/attachments/kubectl-create-deployment-slideshow/13.svg deleted file mode 100755 index b9fc3f2..0000000 --- a/attachments/kubectl-create-deployment-slideshow/13.svg +++ /dev/null @@ -1,1137 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - - - - - - API server - controllermanager - scheduler - etcd - kubelet - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - PENDING - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/14.svg b/attachments/kubectl-create-deployment-slideshow/14.svg deleted file mode 100755 index a28b1a5..0000000 --- a/attachments/kubectl-create-deployment-slideshow/14.svg +++ /dev/null @@ -1,1137 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - - - - - - API server - controllermanager - scheduler - etcd - kubelet - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - → node 1 - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/15.svg b/attachments/kubectl-create-deployment-slideshow/15.svg deleted file mode 100755 index e583236..0000000 --- a/attachments/kubectl-create-deployment-slideshow/15.svg +++ /dev/null @@ -1,1137 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - - - - - - API server - controllermanager - scheduler - etcd - kubelet - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - → node 1 - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/16.svg b/attachments/kubectl-create-deployment-slideshow/16.svg deleted file mode 100755 index dd875db..0000000 --- a/attachments/kubectl-create-deployment-slideshow/16.svg +++ /dev/null @@ -1,1121 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - - - - - - API server - controllermanager - scheduler - etcd - kubelet - kubelet - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - → node 1 - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/17.svg b/attachments/kubectl-create-deployment-slideshow/17.svg deleted file mode 100755 index b1a3dd5..0000000 --- a/attachments/kubectl-create-deployment-slideshow/17.svg +++ /dev/null @@ -1,981 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - - - - - - API server - controllermanager - scheduler - etcd - kubelet - kubelet - - - - - - pod - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - CREATING - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/18.svg b/attachments/kubectl-create-deployment-slideshow/18.svg deleted file mode 100755 index d51064a..0000000 --- a/attachments/kubectl-create-deployment-slideshow/18.svg +++ /dev/null @@ -1,981 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - - - - - - API server - controllermanager - scheduler - etcd - kubelet - kubelet - - - - - - pod - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - RUNNING - - - - diff --git a/attachments/kubectl-create-deployment-slideshow/19.svg b/attachments/kubectl-create-deployment-slideshow/19.svg deleted file mode 100755 index a16d67c..0000000 --- a/attachments/kubectl-create-deployment-slideshow/19.svg +++ /dev/null @@ -1,997 +0,0 @@ - - - - - - image/svg+xml - - how-does-k8s-work - - - - - how-does-k8s-work - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WORKER NODES - CONTROL PLANE - DEVOPS - - 👩🏼‍💻👨🏾‍💻🤖 - - $ - - - - - - - - - - - - - - - - - - - - API server - controllermanager - scheduler - etcd - kubelet - kubelet - - - - - - pod - - - - - - - - - - - - - - - - - - - - - Deployment - - - - ReplicaSet - - - - Pod - RUNNING - - - - diff --git a/attachments/kubernetes_pods.drawio b/attachments/kubernetes_pods.drawio deleted file mode 100755 index 05573b1..0000000 --- a/attachments/kubernetes_pods.drawio +++ /dev/null @@ -1 +0,0 @@ -3VhLU9swEP41nmkPzcR2EpIjCaHtUEo6HCi9dBRb2BoUy8hyHvz6rmzJD9mBQBJgmoMjrVcr6dtvVytb7mSx/spRHF4yH1PL6fpryz2zHMcZ9Rz4k5JNLrFtd5RLAk58JSsF1+QRK2FXSVPi46SmKBijgsR1oceiCHuiJkOcs1Vd7Y7R+qwxCnBDcO0h2pTeEF+EuXTonJTyb5gEoZ7ZHqj9LZBWVjtJQuSzVUXkTi13whkTeWuxnmAq0dO45OPOt7wtFsZxJHYZ8PfXAwsvwsvLP5duOpn2bx4ufnyx1WqXiKZqx5YzoGBw7JMlNAPZDFkiQOkTGF8iDk9K5vC8T+eYYnhz3ul0Putxc66HaQkoVIwpNMRGQ8xZGvlYrrILr1chEfg6Rp58uwJWyfnFgkLPhiaiJIigzXPMlbUl5gKvtwJjF3ADUTFbYME3oKIGDJWDFEXdruqvSn/3ekoWVn2tPYsUx4LCdOkGaChPvMQrDafMmL8fbiHj5JFFAmmBhIwAz08VoILFR4GztyOaheLB0XQaaMYoTXCeNAQiEeb7YXsA0AoubeogVlBz3RbUjgaa2wAtCki0/nBA2S38elukei0Z1AAJR/6pPIug51GUJMSr4wJ755vf1c6tBLTT192ztQI47210b01EMQzalVHQKwfJTjHGh/NNLQ3TOVtNS4FykFR52j2wO5ZyDz9PIIF4gMVz0dl0d8Wd/RZvahnHFAmyrC+3zcVqhhkjkaiwqWvk/oHBknybalT1cDUN9Q1DtmEox6FhCGiBNhW1WCok2xfcM7Kr7dYOfWjkFks6F5i+nuHNGiHm0miI00TSZR0ziOiPl0SdlpP8bXOD3TzJd0sOCfBFaHHEIvxBE0a2znMiUcmUd00g7xXwPSNOHbOG2zXgTUNFJjl2wA/eIODtYQttG7eCn1isGL+3JIQDtJDxnD9B8n02yeU7XgkaxiO0wEmWLLKbEydRsON1AvKHaL8zeMBBSFPN2ndBfD+jM8cJeUTzzJSks/IO2O2Prf6ZnM4dUwTXnjHy7oMswU0YZTyb2r3LftIOE8BSJm2PyrBoSW7q2qqmtAo6VgPmicyyNRV2O1Bl92rM0XXwvkfm0AigugF2d5dgYVD0MKRslqQN3wNTYpxlTIGfP3LmhQ+vUkGJTLKZ3Ef8/gpGEZHlwE5XJsgk/zThHOmscp3mWTVoyYPDox1VB6hjP3r2t/XnKBP0F5d7hiF7aITBlux/sFgY/E+x4JhV+LvHwsn+saBLLV1P3VZrK7lxe1QWXtX6bIY5gW3Ig+pFJdUOd7KcNu8VfeaHoZNXBp9jlvlm+f7q4INu+T02Vy8/a7vTfw== \ No newline at end of file diff --git a/attachments/kubernetes_pods.svg b/attachments/kubernetes_pods.svg deleted file mode 100755 index b878be9..0000000 --- a/attachments/kubernetes_pods.svg +++ /dev/null @@ -1,3 +0,0 @@ - - -
host (/var/lib/kubelet/...)
<div>host (/var/lib/kubelet/...)<br></div>
Pod
Pod
pause container
pause container
nginx
nginx
prometheus exporter
prometheus exporter
Network & IPC
namespace sharing
[Not supported by viewer]
:80
:80
private IP
private IP
\ No newline at end of file diff --git a/attachments/largegraphfamilies.png b/attachments/largegraphfamilies.png deleted file mode 100755 index eddbf01..0000000 Binary files a/attachments/largegraphfamilies.png and /dev/null differ diff --git a/attachments/largegraphsystemfamilies.png b/attachments/largegraphsystemfamilies.png deleted file mode 100755 index 7ece083..0000000 Binary files a/attachments/largegraphsystemfamilies.png and /dev/null differ diff --git a/attachments/latest.png b/attachments/latest.png deleted file mode 120000 index 324c766..0000000 --- a/attachments/latest.png +++ /dev/null @@ -1 +0,0 @@ -Thursday-September-24-2020.png \ No newline at end of file diff --git a/attachments/leaderfollower.png b/attachments/leaderfollower.png deleted file mode 100644 index 2f4ebe2..0000000 Binary files a/attachments/leaderfollower.png and /dev/null differ diff --git a/attachments/logo_dsg_vettoriale.png b/attachments/logo_dsg_vettoriale.png deleted file mode 100644 index 27962c1..0000000 Binary files a/attachments/logo_dsg_vettoriale.png and /dev/null differ diff --git a/attachments/m2_structure.png b/attachments/m2_structure.png deleted file mode 100644 index e5229de..0000000 Binary files a/attachments/m2_structure.png and /dev/null differ diff --git a/attachments/many-to-many.png b/attachments/many-to-many.png deleted file mode 100644 index f9ddf56..0000000 Binary files a/attachments/many-to-many.png and /dev/null differ diff --git a/attachments/many-to-one.png b/attachments/many-to-one.png deleted file mode 100644 index 80e0ffc..0000000 Binary files a/attachments/many-to-one.png and /dev/null differ diff --git a/attachments/map-reducenotoptimizedforgraphs.png b/attachments/map-reducenotoptimizedforgraphs.png deleted file mode 100755 index b9975fd..0000000 Binary files a/attachments/map-reducenotoptimizedforgraphs.png and /dev/null differ diff --git a/attachments/map-reducenotoptimizedforgraphs2.png b/attachments/map-reducenotoptimizedforgraphs2.png deleted file mode 100755 index 3e5cf21..0000000 Binary files a/attachments/map-reducenotoptimizedforgraphs2.png and /dev/null differ diff --git a/attachments/mapreducebsp.png b/attachments/mapreducebsp.png deleted file mode 100755 index 6cb34d4..0000000 Binary files a/attachments/mapreducebsp.png and /dev/null differ diff --git a/attachments/mapreducepagerank.png b/attachments/mapreducepagerank.png deleted file mode 100755 index 320f9e8..0000000 Binary files a/attachments/mapreducepagerank.png and /dev/null differ diff --git a/attachments/mapreducepagerank1.png b/attachments/mapreducepagerank1.png deleted file mode 100755 index a45ac0e..0000000 Binary files a/attachments/mapreducepagerank1.png and /dev/null differ diff --git a/attachments/mapreducepagerank2.png b/attachments/mapreducepagerank2.png deleted file mode 100755 index c953e7a..0000000 Binary files a/attachments/mapreducepagerank2.png and /dev/null differ diff --git a/attachments/mapreducepagerank3.png b/attachments/mapreducepagerank3.png deleted file mode 100755 index 047a30a..0000000 Binary files a/attachments/mapreducepagerank3.png and /dev/null differ diff --git a/attachments/mapreducepagerank4.png b/attachments/mapreducepagerank4.png deleted file mode 100755 index 13e052a..0000000 Binary files a/attachments/mapreducepagerank4.png and /dev/null differ diff --git a/attachments/mario-red-shell.png b/attachments/mario-red-shell.png deleted file mode 100755 index bd34f9e..0000000 Binary files a/attachments/mario-red-shell.png and /dev/null differ diff --git a/attachments/match-pattern.png b/attachments/match-pattern.png deleted file mode 100644 index b9fc3fc..0000000 Binary files a/attachments/match-pattern.png and /dev/null differ diff --git a/attachments/matching.png b/attachments/matching.png deleted file mode 100644 index 0284c44..0000000 Binary files a/attachments/matching.png and /dev/null differ diff --git a/attachments/meandmarti.jpg b/attachments/meandmarti.jpg deleted file mode 100644 index fdd6941..0000000 Binary files a/attachments/meandmarti.jpg and /dev/null differ diff --git a/attachments/media_httpfarm5static_mevIk.png b/attachments/media_httpfarm5static_mevIk.png deleted file mode 100644 index a614186..0000000 Binary files a/attachments/media_httpfarm5static_mevIk.png and /dev/null differ diff --git a/attachments/mizan.png b/attachments/mizan.png deleted file mode 100755 index 45be6c9..0000000 Binary files a/attachments/mizan.png and /dev/null differ diff --git a/attachments/mizanworking.png b/attachments/mizanworking.png deleted file mode 100755 index 8eabb04..0000000 Binary files a/attachments/mizanworking.png and /dev/null differ diff --git a/attachments/mizanworking2.png b/attachments/mizanworking2.png deleted file mode 100755 index c866088..0000000 Binary files a/attachments/mizanworking2.png and /dev/null differ diff --git a/attachments/model.pdf b/attachments/model.pdf deleted file mode 100644 index 779d36e..0000000 Binary files a/attachments/model.pdf and /dev/null differ diff --git a/attachments/mongocap.png b/attachments/mongocap.png deleted file mode 100644 index 57d26d8..0000000 Binary files a/attachments/mongocap.png and /dev/null differ diff --git a/attachments/mongodb-42.png b/attachments/mongodb-42.png deleted file mode 100644 index 86c9f75..0000000 Binary files a/attachments/mongodb-42.png and /dev/null differ diff --git a/attachments/mongodb-43.png b/attachments/mongodb-43.png deleted file mode 100644 index 96f6ff5..0000000 Binary files a/attachments/mongodb-43.png and /dev/null differ diff --git a/attachments/mongodb-60.png b/attachments/mongodb-60.png deleted file mode 100644 index e5aad8c..0000000 Binary files a/attachments/mongodb-60.png and /dev/null differ diff --git a/attachments/mongodb-61.png b/attachments/mongodb-61.png deleted file mode 100644 index b87cf71..0000000 Binary files a/attachments/mongodb-61.png and /dev/null differ diff --git a/attachments/mongodb-62.png b/attachments/mongodb-62.png deleted file mode 100644 index 1a9d452..0000000 Binary files a/attachments/mongodb-62.png and /dev/null differ diff --git a/attachments/mongodb-63.png b/attachments/mongodb-63.png deleted file mode 100644 index 23203d3..0000000 Binary files a/attachments/mongodb-63.png and /dev/null differ diff --git a/attachments/mongodb-64.png b/attachments/mongodb-64.png deleted file mode 100644 index 488fced..0000000 Binary files a/attachments/mongodb-64.png and /dev/null differ diff --git a/attachments/mongodb-65.png b/attachments/mongodb-65.png deleted file mode 100644 index 4c17516..0000000 Binary files a/attachments/mongodb-65.png and /dev/null differ diff --git a/attachments/mongodb-66.png b/attachments/mongodb-66.png deleted file mode 100644 index 5a9d5d1..0000000 Binary files a/attachments/mongodb-66.png and /dev/null differ diff --git a/attachments/mongodb-67.png b/attachments/mongodb-67.png deleted file mode 100644 index 5798bfe..0000000 Binary files a/attachments/mongodb-67.png and /dev/null differ diff --git a/attachments/mongodb-68.png b/attachments/mongodb-68.png deleted file mode 100644 index 0586ac1..0000000 Binary files a/attachments/mongodb-68.png and /dev/null differ diff --git a/attachments/mongodb-69.png b/attachments/mongodb-69.png deleted file mode 100644 index 3b7804d..0000000 Binary files a/attachments/mongodb-69.png and /dev/null differ diff --git a/attachments/mongodb-70.png b/attachments/mongodb-70.png deleted file mode 100644 index 1ff16da..0000000 Binary files a/attachments/mongodb-70.png and /dev/null differ diff --git a/attachments/mongodb-71.png b/attachments/mongodb-71.png deleted file mode 100644 index a19aa9d..0000000 Binary files a/attachments/mongodb-71.png and /dev/null differ diff --git a/attachments/mongodb-72.png b/attachments/mongodb-72.png deleted file mode 100644 index cc1e8a2..0000000 Binary files a/attachments/mongodb-72.png and /dev/null differ diff --git a/attachments/mongodb-73.png b/attachments/mongodb-73.png deleted file mode 100644 index 8c99ecb..0000000 Binary files a/attachments/mongodb-73.png and /dev/null differ diff --git a/attachments/mongodb-74.png b/attachments/mongodb-74.png deleted file mode 100644 index 79bef85..0000000 Binary files a/attachments/mongodb-74.png and /dev/null differ diff --git a/attachments/mongodb-76.png.md b/attachments/mongodb-76.png.md deleted file mode 100644 index e69de29..0000000 diff --git a/attachments/naturaljoin.png b/attachments/naturaljoin.png deleted file mode 100644 index 3c45892..0000000 Binary files a/attachments/naturaljoin.png and /dev/null differ diff --git a/attachments/navigationalqueries.jpg b/attachments/navigationalqueries.jpg deleted file mode 100755 index d470774..0000000 Binary files a/attachments/navigationalqueries.jpg and /dev/null differ diff --git a/attachments/navigationalqueries.png b/attachments/navigationalqueries.png deleted file mode 100755 index 4336a0a..0000000 Binary files a/attachments/navigationalqueries.png and /dev/null differ diff --git a/attachments/neo4j_logo_globe.png b/attachments/neo4j_logo_globe.png deleted file mode 100755 index 4eaaa26..0000000 Binary files a/attachments/neo4j_logo_globe.png and /dev/null differ diff --git a/attachments/ngbs.png b/attachments/ngbs.png deleted file mode 100755 index c62ba06..0000000 Binary files a/attachments/ngbs.png and /dev/null differ diff --git a/attachments/ngpexample2.jpg b/attachments/ngpexample2.jpg deleted file mode 100755 index 083992a..0000000 Binary files a/attachments/ngpexample2.jpg and /dev/null differ diff --git a/attachments/ngpsexample(1).png b/attachments/ngpsexample(1).png deleted file mode 100755 index 9ad3d47..0000000 Binary files a/attachments/ngpsexample(1).png and /dev/null differ diff --git a/attachments/nosqldbs.jpg b/attachments/nosqldbs.jpg deleted file mode 100755 index 33606f3..0000000 Binary files a/attachments/nosqldbs.jpg and /dev/null differ diff --git a/attachments/nosqldbs.pdf b/attachments/nosqldbs.pdf deleted file mode 100644 index cb40cca..0000000 Binary files a/attachments/nosqldbs.pdf and /dev/null differ diff --git a/attachments/nottoday.png b/attachments/nottoday.png deleted file mode 100644 index 67c0ba3..0000000 Binary files a/attachments/nottoday.png and /dev/null differ diff --git a/attachments/nsqlfamily.png b/attachments/nsqlfamily.png deleted file mode 100644 index d360c6b..0000000 Binary files a/attachments/nsqlfamily.png and /dev/null differ diff --git a/attachments/o2m-m2m.png b/attachments/o2m-m2m.png deleted file mode 100644 index 2ea26ea..0000000 Binary files a/attachments/o2m-m2m.png and /dev/null differ diff --git a/attachments/o2o-o2m.png b/attachments/o2o-o2m.png deleted file mode 100644 index e80effd..0000000 Binary files a/attachments/o2o-o2m.png and /dev/null differ diff --git a/attachments/oltpvsolap.png b/attachments/oltpvsolap.png deleted file mode 100644 index b8ab8ba..0000000 Binary files a/attachments/oltpvsolap.png and /dev/null differ diff --git a/attachments/one to many.png b/attachments/one to many.png deleted file mode 100644 index e81426e..0000000 Binary files a/attachments/one to many.png and /dev/null differ diff --git a/attachments/order.png b/attachments/order.png deleted file mode 100644 index 1ae81c5..0000000 Binary files a/attachments/order.png and /dev/null differ diff --git a/attachments/othergraphcategories.jpg b/attachments/othergraphcategories.jpg deleted file mode 100755 index b68a1db..0000000 Binary files a/attachments/othergraphcategories.jpg and /dev/null differ diff --git a/attachments/othergraphcategoriess.jpg b/attachments/othergraphcategoriess.jpg deleted file mode 100755 index a85c485..0000000 Binary files a/attachments/othergraphcategoriess.jpg and /dev/null differ diff --git a/attachments/otherpartitioningViews.png b/attachments/otherpartitioningViews.png deleted file mode 100755 index 919dd08..0000000 Binary files a/attachments/otherpartitioningViews.png and /dev/null differ diff --git a/attachments/outlines.jpg b/attachments/outlines.jpg deleted file mode 100755 index c4a8b5a..0000000 Binary files a/attachments/outlines.jpg and /dev/null differ diff --git a/attachments/pagerank.jpg b/attachments/pagerank.jpg deleted file mode 100755 index b3113b7..0000000 Binary files a/attachments/pagerank.jpg and /dev/null differ diff --git a/attachments/pagerankexample.png b/attachments/pagerankexample.png deleted file mode 100755 index 984a138..0000000 Binary files a/attachments/pagerankexample.png and /dev/null differ diff --git a/attachments/pageranktlav.png b/attachments/pageranktlav.png deleted file mode 100755 index cfba690..0000000 Binary files a/attachments/pageranktlav.png and /dev/null differ diff --git a/attachments/parsing.png b/attachments/parsing.png deleted file mode 100644 index 2c16802..0000000 Binary files a/attachments/parsing.png and /dev/null differ diff --git a/attachments/parttioningviews.png b/attachments/parttioningviews.png deleted file mode 100755 index a3dc879..0000000 Binary files a/attachments/parttioningviews.png and /dev/null differ diff --git a/attachments/pattern.jpg b/attachments/pattern.jpg deleted file mode 100755 index b99137b..0000000 Binary files a/attachments/pattern.jpg and /dev/null differ diff --git a/attachments/patternmatching.jpg b/attachments/patternmatching.jpg deleted file mode 100755 index c11fe68..0000000 Binary files a/attachments/patternmatching.jpg and /dev/null differ diff --git a/attachments/pegasus_logo_300.png b/attachments/pegasus_logo_300.png deleted file mode 100755 index 3402822..0000000 Binary files a/attachments/pegasus_logo_300.png and /dev/null differ diff --git a/attachments/performance.png b/attachments/performance.png deleted file mode 100755 index be74714..0000000 Binary files a/attachments/performance.png and /dev/null differ diff --git a/attachments/pgpsemantics.png b/attachments/pgpsemantics.png deleted file mode 100755 index 7d18fb5..0000000 Binary files a/attachments/pgpsemantics.png and /dev/null differ diff --git a/attachments/pregelexecution.png b/attachments/pregelexecution.png deleted file mode 100755 index 62b85e0..0000000 Binary files a/attachments/pregelexecution.png and /dev/null differ diff --git a/attachments/pregelextensions.png b/attachments/pregelextensions.png deleted file mode 100755 index 736b55e..0000000 Binary files a/attachments/pregelextensions.png and /dev/null differ diff --git a/attachments/pregelsystem.png b/attachments/pregelsystem.png deleted file mode 100755 index 07f6f93..0000000 Binary files a/attachments/pregelsystem.png and /dev/null differ diff --git a/attachments/pregelvsgraphlab.png b/attachments/pregelvsgraphlab.png deleted file mode 100755 index 20c1ea9..0000000 Binary files a/attachments/pregelvsgraphlab.png and /dev/null differ diff --git a/attachments/pregelvshadoop.png b/attachments/pregelvshadoop.png deleted file mode 100755 index 45e0b03..0000000 Binary files a/attachments/pregelvshadoop.png and /dev/null differ diff --git a/attachments/preprocessingvsstreaming.png b/attachments/preprocessingvsstreaming.png deleted file mode 100755 index 60536eb..0000000 Binary files a/attachments/preprocessingvsstreaming.png and /dev/null differ diff --git a/attachments/process-png.png b/attachments/process-png.png deleted file mode 100755 index ae93e7e..0000000 Binary files a/attachments/process-png.png and /dev/null differ diff --git a/attachments/productvisual.png b/attachments/productvisual.png deleted file mode 100644 index 062f4cc..0000000 Binary files a/attachments/productvisual.png and /dev/null differ diff --git a/attachments/projectionvisual.png b/attachments/projectionvisual.png deleted file mode 100644 index 9d7d2c8..0000000 Binary files a/attachments/projectionvisual.png and /dev/null differ diff --git a/attachments/propertyGraphExample.jpg b/attachments/propertyGraphExample.jpg deleted file mode 100755 index 291fda5..0000000 Binary files a/attachments/propertyGraphExample.jpg and /dev/null differ diff --git a/attachments/propertygraph.png b/attachments/propertygraph.png deleted file mode 100755 index 9616b99..0000000 Binary files a/attachments/propertygraph.png and /dev/null differ diff --git a/attachments/pseudo-practitioner.png b/attachments/pseudo-practitioner.png deleted file mode 100644 index beb38e7..0000000 Binary files a/attachments/pseudo-practitioner.png and /dev/null differ diff --git a/attachments/pseudopractitioner.png b/attachments/pseudopractitioner.png deleted file mode 100644 index b6ad087..0000000 Binary files a/attachments/pseudopractitioner.png and /dev/null differ diff --git a/attachments/pubsub1.png b/attachments/pubsub1.png deleted file mode 100644 index 19470cb..0000000 Binary files a/attachments/pubsub1.png and /dev/null differ diff --git a/attachments/pubsub2.png b/attachments/pubsub2.png deleted file mode 100644 index 9c5ba2a..0000000 Binary files a/attachments/pubsub2.png and /dev/null differ diff --git a/attachments/pwd-icons.png b/attachments/pwd-icons.png deleted file mode 100755 index 9906e53..0000000 Binary files a/attachments/pwd-icons.png and /dev/null differ diff --git a/attachments/qb (1).svg b/attachments/qb (1).svg deleted file mode 100644 index d607e00..0000000 --- a/attachments/qb (1).svg +++ /dev/null @@ -1,92 +0,0 @@ - - - - -Created by potrace 1.16, written by Peter Selinger 2001-2019 - - - - - - - - - diff --git a/attachments/questionmark.png b/attachments/questionmark.png deleted file mode 100644 index 462b53b..0000000 Binary files a/attachments/questionmark.png and /dev/null differ diff --git a/attachments/randommissingvalues.png b/attachments/randommissingvalues.png deleted file mode 100644 index 29674b1..0000000 Binary files a/attachments/randommissingvalues.png and /dev/null differ diff --git a/attachments/rdbsjoinpains.jpg b/attachments/rdbsjoinpains.jpg deleted file mode 100755 index 7e1dd0a..0000000 Binary files a/attachments/rdbsjoinpains.jpg and /dev/null differ diff --git a/attachments/rdfgraph.jpg b/attachments/rdfgraph.jpg deleted file mode 100755 index b9ed747..0000000 Binary files a/attachments/rdfgraph.jpg and /dev/null differ diff --git a/attachments/rdftontriples.png b/attachments/rdftontriples.png deleted file mode 100755 index 653b2e5..0000000 Binary files a/attachments/rdftontriples.png and /dev/null differ diff --git a/attachments/redis.png b/attachments/redis.png deleted file mode 100644 index a17a972..0000000 Binary files a/attachments/redis.png and /dev/null differ diff --git a/attachments/redisarch.png b/attachments/redisarch.png deleted file mode 100644 index b1b67d3..0000000 Binary files a/attachments/redisarch.png and /dev/null differ diff --git a/attachments/registry-frontends.png b/attachments/registry-frontends.png deleted file mode 100755 index 152b815..0000000 Binary files a/attachments/registry-frontends.png and /dev/null differ diff --git a/attachments/reldbex.png b/attachments/reldbex.png deleted file mode 100644 index 566f4f4..0000000 Binary files a/attachments/reldbex.png and /dev/null differ diff --git a/attachments/replicas.pdf b/attachments/replicas.pdf deleted file mode 100644 index 734b625..0000000 Binary files a/attachments/replicas.pdf and /dev/null differ diff --git a/attachments/replicas2.png b/attachments/replicas2.png deleted file mode 100644 index 0067c33..0000000 Binary files a/attachments/replicas2.png and /dev/null differ diff --git a/attachments/rick-mason-2FaCKyEEtis-unsplash.jpg b/attachments/rick-mason-2FaCKyEEtis-unsplash.jpg deleted file mode 100644 index fa7af80..0000000 Binary files a/attachments/rick-mason-2FaCKyEEtis-unsplash.jpg and /dev/null differ diff --git a/attachments/row-vs-column-storage.png b/attachments/row-vs-column-storage.png deleted file mode 100644 index f7faae1..0000000 Binary files a/attachments/row-vs-column-storage.png and /dev/null differ diff --git a/attachments/s2rdf.png b/attachments/s2rdf.png deleted file mode 100755 index 8d72bc2..0000000 Binary files a/attachments/s2rdf.png and /dev/null differ diff --git a/attachments/scalechallenge.png b/attachments/scalechallenge.png deleted file mode 100755 index ee7d965..0000000 Binary files a/attachments/scalechallenge.png and /dev/null differ diff --git a/attachments/schemaonread.png b/attachments/schemaonread.png deleted file mode 100644 index 0683cf0..0000000 Binary files a/attachments/schemaonread.png and /dev/null differ diff --git a/attachments/schemaonwrite.png b/attachments/schemaonwrite.png deleted file mode 100644 index 1a63256..0000000 Binary files a/attachments/schemaonwrite.png and /dev/null differ diff --git a/attachments/secret.png b/attachments/secret.png deleted file mode 100644 index 7df24fc..0000000 Binary files a/attachments/secret.png and /dev/null differ diff --git a/attachments/seen.png b/attachments/seen.png deleted file mode 100644 index 9189513..0000000 Binary files a/attachments/seen.png and /dev/null differ diff --git a/attachments/selected-bytes-per-tuple.png b/attachments/selected-bytes-per-tuple.png deleted file mode 100644 index 860f59c..0000000 Binary files a/attachments/selected-bytes-per-tuple.png and /dev/null differ diff --git a/attachments/selectvisual.png b/attachments/selectvisual.png deleted file mode 100644 index 1c947f8..0000000 Binary files a/attachments/selectvisual.png and /dev/null differ diff --git a/attachments/service-discovery.png b/attachments/service-discovery.png deleted file mode 100755 index 86c8645..0000000 Binary files a/attachments/service-discovery.png and /dev/null differ diff --git a/attachments/seth-rosen-twitter-pull-data.png b/attachments/seth-rosen-twitter-pull-data.png deleted file mode 100644 index a05d23e..0000000 Binary files a/attachments/seth-rosen-twitter-pull-data.png and /dev/null differ diff --git a/attachments/shard.png b/attachments/shard.png deleted file mode 100755 index 3939511..0000000 Binary files a/attachments/shard.png and /dev/null differ diff --git a/attachments/sharedcommunicationmodel.png b/attachments/sharedcommunicationmodel.png deleted file mode 100755 index be92aa8..0000000 Binary files a/attachments/sharedcommunicationmodel.png and /dev/null differ diff --git a/attachments/sharing-layers.jpg b/attachments/sharing-layers.jpg deleted file mode 100755 index e59fa83..0000000 Binary files a/attachments/sharing-layers.jpg and /dev/null differ diff --git a/attachments/shipping-indsutry-results.png b/attachments/shipping-indsutry-results.png deleted file mode 100755 index 977d81e..0000000 Binary files a/attachments/shipping-indsutry-results.png and /dev/null differ diff --git a/attachments/shipping-industry-problem.png b/attachments/shipping-industry-problem.png deleted file mode 100755 index f4794eb..0000000 Binary files a/attachments/shipping-industry-problem.png and /dev/null differ diff --git a/attachments/shipping-industry-solution.png b/attachments/shipping-industry-solution.png deleted file mode 100755 index 7513b09..0000000 Binary files a/attachments/shipping-industry-solution.png and /dev/null differ diff --git a/attachments/shipping-matrix-from-hell.png b/attachments/shipping-matrix-from-hell.png deleted file mode 100755 index 12358c9..0000000 Binary files a/attachments/shipping-matrix-from-hell.png and /dev/null differ diff --git a/attachments/shipping-matrix-solved.png b/attachments/shipping-matrix-solved.png deleted file mode 100755 index 8a2cdf6..0000000 Binary files a/attachments/shipping-matrix-solved.png and /dev/null differ diff --git a/attachments/shipping-software-problem.png b/attachments/shipping-software-problem.png deleted file mode 100755 index f9e1caf..0000000 Binary files a/attachments/shipping-software-problem.png and /dev/null differ diff --git a/attachments/shipping-software-solution.png b/attachments/shipping-software-solution.png deleted file mode 100755 index 8ffff1c..0000000 Binary files a/attachments/shipping-software-solution.png and /dev/null differ diff --git a/attachments/slide_4.jpg b/attachments/slide_4.jpg deleted file mode 100644 index b02002f..0000000 Binary files a/attachments/slide_4.jpg and /dev/null differ diff --git a/attachments/social-media-marketing-tr.png b/attachments/social-media-marketing-tr.png deleted file mode 100755 index f28cd1c..0000000 Binary files a/attachments/social-media-marketing-tr.png and /dev/null differ diff --git a/attachments/social-networks.jpg b/attachments/social-networks.jpg deleted file mode 100755 index 5c14aff..0000000 Binary files a/attachments/social-networks.jpg and /dev/null differ diff --git a/attachments/socialnetworks.png b/attachments/socialnetworks.png deleted file mode 100755 index e8a054b..0000000 Binary files a/attachments/socialnetworks.png and /dev/null differ diff --git a/attachments/sowhat.png b/attachments/sowhat.png deleted file mode 100755 index 6c5ff8f..0000000 Binary files a/attachments/sowhat.png and /dev/null differ diff --git a/attachments/spanning.png b/attachments/spanning.png deleted file mode 100644 index 962ba62..0000000 Binary files a/attachments/spanning.png and /dev/null differ diff --git a/attachments/sparkgraphx.png b/attachments/sparkgraphx.png deleted file mode 100755 index 47a48f4..0000000 Binary files a/attachments/sparkgraphx.png and /dev/null differ diff --git a/attachments/sparkgraphxArch.png b/attachments/sparkgraphxArch.png deleted file mode 100755 index c5399a9..0000000 Binary files a/attachments/sparkgraphxArch.png and /dev/null differ diff --git a/attachments/sparkgraphxoperators1.png b/attachments/sparkgraphxoperators1.png deleted file mode 100755 index 5896543..0000000 Binary files a/attachments/sparkgraphxoperators1.png and /dev/null differ diff --git a/attachments/sparkgraphxoperators2.png b/attachments/sparkgraphxoperators2.png deleted file mode 100755 index fd19f1d..0000000 Binary files a/attachments/sparkgraphxoperators2.png and /dev/null differ diff --git a/attachments/sparkgraphxoperators3.png b/attachments/sparkgraphxoperators3.png deleted file mode 100755 index f711991..0000000 Binary files a/attachments/sparkgraphxoperators3.png and /dev/null differ diff --git a/attachments/sparkgraphxrepresnt1.png b/attachments/sparkgraphxrepresnt1.png deleted file mode 100755 index 6735f83..0000000 Binary files a/attachments/sparkgraphxrepresnt1.png and /dev/null differ diff --git a/attachments/sparkgraphxrepresnt2.png b/attachments/sparkgraphxrepresnt2.png deleted file mode 100755 index 1b4434a..0000000 Binary files a/attachments/sparkgraphxrepresnt2.png and /dev/null differ diff --git a/attachments/sparkgraphxrepresnt3.png b/attachments/sparkgraphxrepresnt3.png deleted file mode 100755 index 9d9d4e6..0000000 Binary files a/attachments/sparkgraphxrepresnt3.png and /dev/null differ diff --git a/attachments/sparkql(1).png b/attachments/sparkql(1).png deleted file mode 100755 index 9efd3cb..0000000 Binary files a/attachments/sparkql(1).png and /dev/null differ diff --git a/attachments/sparkql(2).png b/attachments/sparkql(2).png deleted file mode 100755 index d7db303..0000000 Binary files a/attachments/sparkql(2).png and /dev/null differ diff --git a/attachments/sparkql.png b/attachments/sparkql.png deleted file mode 100755 index aa14ec7..0000000 Binary files a/attachments/sparkql.png and /dev/null differ diff --git a/attachments/sparql projecttion.jpg b/attachments/sparql projecttion.jpg deleted file mode 100755 index 7e12474..0000000 Binary files a/attachments/sparql projecttion.jpg and /dev/null differ diff --git a/attachments/sparql.png b/attachments/sparql.png deleted file mode 100644 index 2cfe31f..0000000 Binary files a/attachments/sparql.png and /dev/null differ diff --git a/attachments/sparqlexample1.png b/attachments/sparqlexample1.png deleted file mode 100755 index da57ff5..0000000 Binary files a/attachments/sparqlexample1.png and /dev/null differ diff --git a/attachments/sparqlexamples.png b/attachments/sparqlexamples.png deleted file mode 100755 index 40d61df..0000000 Binary files a/attachments/sparqlexamples.png and /dev/null differ diff --git a/attachments/sparqlngbsexample1.png b/attachments/sparqlngbsexample1.png deleted file mode 100755 index e143cc9..0000000 Binary files a/attachments/sparqlngbsexample1.png and /dev/null differ diff --git a/attachments/sparqlngbsexample2.png b/attachments/sparqlngbsexample2.png deleted file mode 100755 index 80dd6cb..0000000 Binary files a/attachments/sparqlngbsexample2.png and /dev/null differ diff --git a/attachments/sparqlngbsexample3.png b/attachments/sparqlngbsexample3.png deleted file mode 100755 index bccc9e6..0000000 Binary files a/attachments/sparqlngbsexample3.png and /dev/null differ diff --git a/attachments/sparqlngbsexample4.png b/attachments/sparqlngbsexample4.png deleted file mode 100755 index 29703d1..0000000 Binary files a/attachments/sparqlngbsexample4.png and /dev/null differ diff --git a/attachments/sqlstandard.jpg b/attachments/sqlstandard.jpg deleted file mode 100755 index fa8bbdd..0000000 Binary files a/attachments/sqlstandard.jpg and /dev/null differ diff --git a/attachments/standardizing.png b/attachments/standardizing.png deleted file mode 100644 index 95b24f0..0000000 Binary files a/attachments/standardizing.png and /dev/null differ diff --git a/attachments/starclique.png b/attachments/starclique.png deleted file mode 100755 index 2140eb1..0000000 Binary files a/attachments/starclique.png and /dev/null differ diff --git a/attachments/startrek-federation.jpg b/attachments/startrek-federation.jpg deleted file mode 100755 index 88befb1..0000000 Binary files a/attachments/startrek-federation.jpg and /dev/null differ diff --git a/attachments/staticdynamicpartitioning.png b/attachments/staticdynamicpartitioning.png deleted file mode 100755 index 66ed2d1..0000000 Binary files a/attachments/staticdynamicpartitioning.png and /dev/null differ diff --git a/attachments/storageview.png b/attachments/storageview.png deleted file mode 100755 index ed485ce..0000000 Binary files a/attachments/storageview.png and /dev/null differ diff --git a/attachments/stream-table-animation-numVisitedLocations-changelog.gif (dragged).pdf b/attachments/stream-table-animation-numVisitedLocations-changelog.gif (dragged).pdf deleted file mode 100644 index 78a80de..0000000 Binary files a/attachments/stream-table-animation-numVisitedLocations-changelog.gif (dragged).pdf and /dev/null differ diff --git a/attachments/stream-table-animation-numVisitedLocations-changelog.gif.pdf b/attachments/stream-table-animation-numVisitedLocations-changelog.gif.pdf deleted file mode 100644 index 67f36f2..0000000 Binary files a/attachments/stream-table-animation-numVisitedLocations-changelog.gif.pdf and /dev/null differ diff --git a/attachments/stream-table-animation-numVisitedLocations-changelog.png b/attachments/stream-table-animation-numVisitedLocations-changelog.png deleted file mode 100644 index e7c7eb0..0000000 Binary files a/attachments/stream-table-animation-numVisitedLocations-changelog.png and /dev/null differ diff --git a/attachments/string.png b/attachments/string.png deleted file mode 100644 index de2333b..0000000 Binary files a/attachments/string.png and /dev/null differ diff --git a/attachments/subgraphtransform.png b/attachments/subgraphtransform.png deleted file mode 100755 index 92e89a4..0000000 Binary files a/attachments/subgraphtransform.png and /dev/null differ diff --git a/attachments/subgraphtransform2.png b/attachments/subgraphtransform2.png deleted file mode 100755 index 981a119..0000000 Binary files a/attachments/subgraphtransform2.png and /dev/null differ diff --git a/attachments/supercolumn.png b/attachments/supercolumn.png deleted file mode 100644 index 8357c09..0000000 Binary files a/attachments/supercolumn.png and /dev/null differ diff --git a/attachments/supported-sites-sendible-hands-up.png.png b/attachments/supported-sites-sendible-hands-up.png.png deleted file mode 100755 index 34aad15..0000000 Binary files a/attachments/supported-sites-sendible-hands-up.png.png and /dev/null differ diff --git a/attachments/swarm-mode.svg b/attachments/swarm-mode.svg deleted file mode 100755 index 7350be3..0000000 --- a/attachments/swarm-mode.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/attachments/swarm.png b/attachments/swarm.png deleted file mode 100755 index 56d1148..0000000 Binary files a/attachments/swarm.png and /dev/null differ diff --git a/attachments/swstore.png b/attachments/swstore.png deleted file mode 100755 index 3d80642..0000000 Binary files a/attachments/swstore.png and /dev/null differ diff --git a/attachments/tableex1.png b/attachments/tableex1.png deleted file mode 100644 index 75886cd..0000000 Binary files a/attachments/tableex1.png and /dev/null differ diff --git a/attachments/tableex2.png b/attachments/tableex2.png deleted file mode 100644 index f9fe749..0000000 Binary files a/attachments/tableex2.png and /dev/null differ diff --git a/attachments/tableex2png.png b/attachments/tableex2png.png deleted file mode 100644 index c7e9b16..0000000 Binary files a/attachments/tableex2png.png and /dev/null differ diff --git a/attachments/tangram.gif b/attachments/tangram.gif deleted file mode 100755 index d08ccd1..0000000 Binary files a/attachments/tangram.gif and /dev/null differ diff --git a/attachments/tesla.jpg b/attachments/tesla.jpg deleted file mode 100755 index dca8128..0000000 Binary files a/attachments/tesla.jpg and /dev/null differ diff --git a/attachments/tetris-1.png b/attachments/tetris-1.png deleted file mode 100755 index 7302c2d..0000000 Binary files a/attachments/tetris-1.png and /dev/null differ diff --git a/attachments/tetris-2.gif b/attachments/tetris-2.gif deleted file mode 100755 index 505c5c1..0000000 Binary files a/attachments/tetris-2.gif and /dev/null differ diff --git a/attachments/tetris-3.png b/attachments/tetris-3.png deleted file mode 100755 index 15069e7..0000000 Binary files a/attachments/tetris-3.png and /dev/null differ diff --git a/attachments/the end.png b/attachments/the end.png deleted file mode 100644 index 24e3285..0000000 Binary files a/attachments/the end.png and /dev/null differ diff --git a/attachments/the_gift_of_knowledge.jpeg b/attachments/the_gift_of_knowledge.jpeg deleted file mode 100644 index 17605b2..0000000 Binary files a/attachments/the_gift_of_knowledge.jpeg and /dev/null differ diff --git a/attachments/theend2.png b/attachments/theend2.png deleted file mode 100644 index 1112a6f..0000000 Binary files a/attachments/theend2.png and /dev/null differ diff --git a/attachments/timelinenosql.png b/attachments/timelinenosql.png deleted file mode 100644 index 686a647..0000000 Binary files a/attachments/timelinenosql.png and /dev/null differ diff --git a/attachments/titan-logo2.png b/attachments/titan-logo2.png deleted file mode 100755 index 82c85ae..0000000 Binary files a/attachments/titan-logo2.png and /dev/null differ diff --git a/attachments/titangremlin.png b/attachments/titangremlin.png deleted file mode 100755 index 343b83d..0000000 Binary files a/attachments/titangremlin.png and /dev/null differ diff --git a/attachments/title-advanced-dockerfiles.jpg b/attachments/title-advanced-dockerfiles.jpg deleted file mode 100755 index 94736b8..0000000 Binary files a/attachments/title-advanced-dockerfiles.jpg and /dev/null differ diff --git a/attachments/title-ambassador.jpg b/attachments/title-ambassador.jpg deleted file mode 100755 index 4be767b..0000000 Binary files a/attachments/title-ambassador.jpg and /dev/null differ diff --git a/attachments/title-background-containers.jpg b/attachments/title-background-containers.jpg deleted file mode 100755 index 828a265..0000000 Binary files a/attachments/title-background-containers.jpg and /dev/null differ diff --git a/attachments/title-building-docker-images-with-a-dockerfile.jpg b/attachments/title-building-docker-images-with-a-dockerfile.jpg deleted file mode 100755 index fb281b3..0000000 Binary files a/attachments/title-building-docker-images-with-a-dockerfile.jpg and /dev/null differ diff --git a/attachments/title-connecting-containers-with-links.gif b/attachments/title-connecting-containers-with-links.gif deleted file mode 100755 index 0ed3f4c..0000000 Binary files a/attachments/title-connecting-containers-with-links.gif and /dev/null differ diff --git a/attachments/title-container-networking-basics.jpg b/attachments/title-container-networking-basics.jpg deleted file mode 100755 index 68f6b96..0000000 Binary files a/attachments/title-container-networking-basics.jpg and /dev/null differ diff --git a/attachments/title-copying-files-during-build.jpg b/attachments/title-copying-files-during-build.jpg deleted file mode 100755 index eb14228..0000000 Binary files a/attachments/title-copying-files-during-build.jpg and /dev/null differ diff --git a/attachments/title-installing-docker.jpg b/attachments/title-installing-docker.jpg deleted file mode 100755 index bb7f31d..0000000 Binary files a/attachments/title-installing-docker.jpg and /dev/null differ diff --git a/attachments/title-local-development-workflow-with-docker.jpg b/attachments/title-local-development-workflow-with-docker.jpg deleted file mode 100755 index 0fc7b48..0000000 Binary files a/attachments/title-local-development-workflow-with-docker.jpg and /dev/null differ diff --git a/attachments/title-naming-and-inspecting-containers.jpg b/attachments/title-naming-and-inspecting-containers.jpg deleted file mode 100755 index 65d5f6c..0000000 Binary files a/attachments/title-naming-and-inspecting-containers.jpg and /dev/null differ diff --git a/attachments/title-our-first-containers.jpg b/attachments/title-our-first-containers.jpg deleted file mode 100755 index 58d7670..0000000 Binary files a/attachments/title-our-first-containers.jpg and /dev/null differ diff --git a/attachments/title-our-training-environment.jpg b/attachments/title-our-training-environment.jpg deleted file mode 100755 index 512c207..0000000 Binary files a/attachments/title-our-training-environment.jpg and /dev/null differ diff --git a/attachments/title-the-container-network-model.jpg b/attachments/title-the-container-network-model.jpg deleted file mode 100755 index b3d81e6..0000000 Binary files a/attachments/title-the-container-network-model.jpg and /dev/null differ diff --git a/attachments/title-understanding-docker-images.png b/attachments/title-understanding-docker-images.png deleted file mode 100755 index e4e2087..0000000 Binary files a/attachments/title-understanding-docker-images.png and /dev/null differ diff --git a/attachments/title-working-with-volumes.jpg b/attachments/title-working-with-volumes.jpg deleted file mode 100755 index 0c72532..0000000 Binary files a/attachments/title-working-with-volumes.jpg and /dev/null differ diff --git a/attachments/tlav.png b/attachments/tlav.png deleted file mode 100755 index f2c6b3f..0000000 Binary files a/attachments/tlav.png and /dev/null differ diff --git a/attachments/tradeoff.png b/attachments/tradeoff.png deleted file mode 100755 index 6784ff1..0000000 Binary files a/attachments/tradeoff.png and /dev/null differ diff --git a/attachments/traffic-graph.png b/attachments/traffic-graph.png deleted file mode 100755 index 1b154ac..0000000 Binary files a/attachments/traffic-graph.png and /dev/null differ diff --git a/attachments/trainingwheels-error.png b/attachments/trainingwheels-error.png deleted file mode 100755 index c4b28cc..0000000 Binary files a/attachments/trainingwheels-error.png and /dev/null differ diff --git a/attachments/trainingwheels-ok.png b/attachments/trainingwheels-ok.png deleted file mode 100755 index 8ed1f4e..0000000 Binary files a/attachments/trainingwheels-ok.png and /dev/null differ diff --git a/attachments/triangles.png b/attachments/triangles.png deleted file mode 100755 index 7cc871d..0000000 Binary files a/attachments/triangles.png and /dev/null differ diff --git a/attachments/tripletview.png b/attachments/tripletview.png deleted file mode 100755 index 5539b23..0000000 Binary files a/attachments/tripletview.png and /dev/null differ diff --git a/attachments/trollface.png b/attachments/trollface.png deleted file mode 100755 index cce7c75..0000000 Binary files a/attachments/trollface.png and /dev/null differ diff --git a/attachments/tuple-reconstruction.png b/attachments/tuple-reconstruction.png deleted file mode 100644 index 01ef876..0000000 Binary files a/attachments/tuple-reconstruction.png and /dev/null differ diff --git a/attachments/unisionvisual.png b/attachments/unisionvisual.png deleted file mode 100644 index 3e2983b..0000000 Binary files a/attachments/unisionvisual.png and /dev/null differ diff --git a/attachments/variety.pdf b/attachments/variety.pdf deleted file mode 100644 index c849f3c..0000000 Binary files a/attachments/variety.pdf and /dev/null differ diff --git a/attachments/velocity.pdf b/attachments/velocity.pdf deleted file mode 100644 index ed9c161..0000000 Binary files a/attachments/velocity.pdf and /dev/null differ diff --git a/attachments/verticalscalability.png b/attachments/verticalscalability.png deleted file mode 100644 index b160cf7..0000000 Binary files a/attachments/verticalscalability.png and /dev/null differ diff --git a/attachments/verticalvshorizontalscalability.png b/attachments/verticalvshorizontalscalability.png deleted file mode 100644 index 492249e..0000000 Binary files a/attachments/verticalvshorizontalscalability.png and /dev/null differ diff --git a/attachments/volume-1.pdf b/attachments/volume-1.pdf deleted file mode 100644 index 347ccd5..0000000 Binary files a/attachments/volume-1.pdf and /dev/null differ diff --git a/attachments/volume-2.pdf b/attachments/volume-2.pdf deleted file mode 100644 index 856c3a9..0000000 Binary files a/attachments/volume-2.pdf and /dev/null differ diff --git a/attachments/vvvvvv.png b/attachments/vvvvvv.png deleted file mode 100644 index 16b3054..0000000 Binary files a/attachments/vvvvvv.png and /dev/null differ diff --git a/attachments/warning.png b/attachments/warning.png deleted file mode 100755 index 9db992c..0000000 Binary files a/attachments/warning.png and /dev/null differ diff --git a/attachments/wasm.png b/attachments/wasm.png deleted file mode 100644 index c5b4536..0000000 Binary files a/attachments/wasm.png and /dev/null differ diff --git a/attachments/webapp-in-blue.png b/attachments/webapp-in-blue.png deleted file mode 100755 index 41cd9a4..0000000 Binary files a/attachments/webapp-in-blue.png and /dev/null differ diff --git a/attachments/webapp-in-red.png b/attachments/webapp-in-red.png deleted file mode 100755 index 711a0c3..0000000 Binary files a/attachments/webapp-in-red.png and /dev/null differ diff --git a/attachments/welcome-to-nginx.png b/attachments/welcome-to-nginx.png deleted file mode 100755 index 5ce935f..0000000 Binary files a/attachments/welcome-to-nginx.png and /dev/null differ diff --git a/attachments/what-is-data-science.jpg b/attachments/what-is-data-science.jpg deleted file mode 100644 index 9644b1a..0000000 Binary files a/attachments/what-is-data-science.jpg and /dev/null differ diff --git a/attachments/what-is-data-science.webp b/attachments/what-is-data-science.webp deleted file mode 100644 index a48ded0..0000000 Binary files a/attachments/what-is-data-science.webp and /dev/null differ diff --git a/attachments/whatido.png b/attachments/whatido.png deleted file mode 100644 index 87b6204..0000000 Binary files a/attachments/whatido.png and /dev/null differ diff --git a/attachments/windows-containers.jpg b/attachments/windows-containers.jpg deleted file mode 100755 index 44d85c1..0000000 Binary files a/attachments/windows-containers.jpg and /dev/null differ diff --git a/attachments/word-image-29.png b/attachments/word-image-29.png deleted file mode 100644 index 8076113..0000000 Binary files a/attachments/word-image-29.png and /dev/null differ diff --git a/attachments/wranglingsteps.png b/attachments/wranglingsteps.png deleted file mode 100644 index a10c6b7..0000000 Binary files a/attachments/wranglingsteps.png and /dev/null differ diff --git a/attachments/wranglingsteps4.png b/attachments/wranglingsteps4.png deleted file mode 100644 index 891d63f..0000000 Binary files a/attachments/wranglingsteps4.png and /dev/null differ diff --git a/attachments/wrongingestion1.png b/attachments/wrongingestion1.png deleted file mode 100644 index 68318df..0000000 Binary files a/attachments/wrongingestion1.png and /dev/null differ diff --git a/attachments/wrongingestion2.png b/attachments/wrongingestion2.png deleted file mode 100644 index f89bf3e..0000000 Binary files a/attachments/wrongingestion2.png and /dev/null differ diff --git a/attachments/you-get-a-cluster.jpg b/attachments/you-get-a-cluster.jpg deleted file mode 100755 index 0f8949a..0000000 Binary files a/attachments/you-get-a-cluster.jpg and /dev/null differ diff --git a/attachments/you-get-five-vms.jpg b/attachments/you-get-five-vms.jpg deleted file mode 100755 index 2d475a3..0000000 Binary files a/attachments/you-get-five-vms.jpg and /dev/null differ diff --git a/buildings.png b/buildings.png new file mode 100644 index 0000000..5297865 Binary files /dev/null and b/buildings.png differ diff --git a/data.json b/data.json new file mode 100644 index 0000000..2de0a99 --- /dev/null +++ b/data.json @@ -0,0 +1,52 @@ +[ + { + "picture": "http://placehold.it/32x32", + "age": 29, + "name": "Hull Gardner", + "gender": "male", + "email": "hullgardner@kneedles.com", + "phone": "+372 (860) 490-3549", + "address": "946 Victor Road, Adelino, Rhode Island, 3562", + "about": "Exercitation sint eu voluptate duis dolor ea occaecat officia. Consequat dolor et consectetur non adipisicing anim aliquip non mollit officia qui. Minim excepteur Lorem labore est officia ad voluptate. Consectetur elit aliqua reprehenderit cupidatat officia dolore sunt.\r\n" + }, + { + "picture": "http://placehold.it/32x32", + "age": 28, + "name": "Rice Gonzalez", + "gender": "male", + "email": "ricegonzalez@kneedles.com", + "phone": "+372 (956) 471-3802", + "address": "251 Church Avenue, Weedville, Michigan, 9128", + "about": "Sunt Lorem dolore cillum duis ipsum sit officia dolor elit. Cupidatat magna magna occaecat id incididunt et est enim incididunt ex exercitation est. Ea dolor ad enim duis ea.\r\n" + }, + { + "picture": "http://placehold.it/32x32", + "age": 30, + "name": "Augusta Yates", + "gender": "female", + "email": "augustayates@kneedles.com", + "phone": "+372 (937) 447-3768", + "address": "196 Lawton Street, Glenshaw, Palau, 3979", + "about": "Esse excepteur velit elit dolor Lorem dolore nostrud excepteur reprehenderit eiusmod deserunt sint. Velit Lorem est magna irure et dolore veniam dolore labore labore ea laboris ad nisi. Consequat sit exercitation enim cupidatat esse esse reprehenderit ullamco nisi irure qui laborum nulla.\r\n" + }, + { + "picture": "http://placehold.it/32x32", + "age": 27, + "name": "Acevedo Burns", + "gender": "male", + "email": "acevedoburns@kneedles.com", + "phone": "+372 (982) 441-3165", + "address": "572 Bath Avenue, Downsville, Alaska, 1693", + "about": "Veniam anim in culpa occaecat anim pariatur incididunt adipisicing qui sunt. Labore reprehenderit fugiat reprehenderit irure incididunt laborum mollit laboris duis. Laborum cillum anim qui duis nisi dolor. Qui ut pariatur exercitation eu ullamco minim sunt aliquip veniam ea. Commodo qui reprehenderit eiusmod veniam veniam ad Lorem duis et qui cillum.\r\n" + }, + { + "picture": "http://placehold.it/32x32", + "age": 32, + "name": "Erna Keith", + "gender": "female", + "email": "ernakeith@kneedles.com", + "phone": "+372 (930) 554-3624", + "address": "256 Navy Street, Whipholt, American Samoa, 3834", + "about": "Ad veniam anim deserunt sint veniam et pariatur est. Cillum et aliquip anim consequat quis eiusmod elit. Cillum qui et est magna non irure sint. Fugiat occaecat nostrud consectetur non sunt deserunt fugiat nisi veniam consectetur do incididunt cillum. Adipisicing reprehenderit ex velit consequat et culpa id esse deserunt eiusmod veniam.\r\n" + } +] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d18c2ca --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,106 @@ +version: '2.2' +services: + zookeeper: + hostname: zookeeper + container_name: zookeeper + image: "confluentinc/cp-zookeeper:5.5.0-1-ubi8" + restart: always + networks: + - kafka-net + restart: always + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + + kafka1: + image: "confluentinc/cp-enterprise-kafka:5.5.0-1-ubi8" + restart: always + hostname: kafka + container_name: kafka1 + ports: + - "9092:9092" + networks: + - kafka-net + restart: always + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:9092 + KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" + KAFKA_DELETE_TOPIC_ENABLE: "true" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_METRIC_REPORTERS: "io.confluent.metrics.reporter.ConfluentMetricsReporter" + CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: "kafka1:9092" + CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + + kafka2: + image: "confluentinc/cp-enterprise-kafka:5.5.0-1-ubi8" + restart: always + hostname: kafka + container_name: kafka2 + ports: + - "9093:9093" + networks: + - kafka-net + restart: always + environment: + KAFKA_BROKER_ID: 2 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka2:9093 + KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" + KAFKA_DELETE_TOPIC_ENABLE: "true" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_METRIC_REPORTERS: "io.confluent.metrics.reporter.ConfluentMetricsReporter" + CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: "kafka2:9093" + CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + + schema-registry: + hostname: schema-registry + container_name: schema-registry + image: "confluentinc/cp-schema-registry:5.5.0-1-ubi8" + restart: always + ports: + - 8081:8081 + networks: + - kafka-net + restart: always + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka1:9092 + SCHEMA_REGISTRY_LISTENERS: http://schema-registry:8081 + + tools: + image: cnfltraining/training-tools:5.5 + restart: always + hostname: tools + container_name: tools + restart: always + networks: + - kafka-net + volumes: + - .:/root/confluent-streams/labs/using-ksql + working_dir: /root/confluent-streams/labs/using-ksql + command: /bin/bash + tty: true + + notebook: + build: notebook/ + networks: + - kafka-net + ports: + - 8888:8888 + volumes: + - ./:/home/jovyan/work/data + environment: + - GRANT_SUDO=yesd + +networks: + kafka-net: diff --git a/docker/Docker Crash Course.md b/docker/Docker Crash Course.md deleted file mode 100644 index 158c00f..0000000 --- a/docker/Docker Crash Course.md +++ /dev/null @@ -1,1247 +0,0 @@ -footer: [Riccardo Tommasini](http://rictomm.me) - riccardo.tommasini@ut.ee - @rictomm -slidenumbers: true -autoscale: true -theme: Plain Jane - -# Data Engineering: Docker 101 -#### LTAT.02.007 -#### Ass Prof. Riccardo Tommasini -#### Assistants: [Fabiano Spiga](mailto:), [Mohamed Ragab](mailto:mohamed.ragab@ut.ee), [Hassan Eldeeb](mailto:hassan.eldeeb@ut.ee) -- [https://courses.cs.ut.ee/2020/dataeng](https://courses.cs.ut.ee/2020/dataeng) -- [Forum](https://piazza.com/ut.ee/fall2020/ltat02007/home) -- [Moodle](https://moodle.ut.ee/course/view.php?id=10457) - ---- - -# Our first containers - -![inline](../attachments/title-our-first-containers.jpg) - ---- - -## Objectives - -At the end of this lesson, you will have: - -* Seen Docker in action. - -* Started your first containers. - -* Understood what is an image. - -* What is a layer. - -* The various image namespaces. - -* How to search and download images. - -* Image tags and when to use them. - ---- - -## Hello World - -In your Docker environment, just run the following command: - -```bash -$ docker run busybox echo hello world -hello world -``` - -(If your Docker install is brand new, you will also see a few extra lines, -corresponding to the download of the `busybox` image.) - ---- - -## That was our first container! - -* We used one of the smallest, simplest images available: `busybox`. - -* `busybox` is typically used in embedded systems (phones, routers...) - -* We ran a single process and echo'ed `hello world`. - ---- - -## A more useful container - -Let's run a more exciting container: - -```bash -$ docker run -it ubuntu -root@04c0bb0a6c07:/# -``` - -* This is a brand new container. - -* It runs a bare-bones, no-frills `ubuntu` system. - -* `-it` is shorthand for `-i -t`. - - * `-i` tells Docker to connect us to the container's stdin. - - * `-t` tells Docker that we want a pseudo-terminal. - ---- - -## Do something in our container - -Try to run `figlet` in our container. - -```bash -root@04c0bb0a6c07:/# figlet hello -bash: figlet: command not found -``` - -Alright, we need to install it. - ---- - -## Install a package in our container - -We want `figlet`, so let's install it: - -```bash -root@04c0bb0a6c07:/# apt-get update -... -Fetched 1514 kB in 14s (103 kB/s) -Reading package lists... Done -root@04c0bb0a6c07:/# apt-get install figlet -Reading package lists... Done -... -``` - -One minute later, `figlet` is installed! - ---- - -## Try to run our freshly installed program - -The `figlet` program takes a message as parameter. - -```bash -root@04c0bb0a6c07:/# figlet hello - _ _ _ -| |__ ___| | | ___ -| '_ \ / _ \ | |/ _ \ -| | | | __/ | | (_) | -|_| |_|\___|_|_|\___/ -``` - -Beautiful! .emoji[😍] - ---- - - - -## Counting packages in the container - -Let's check how many packages are installed there. - -```bash -root@04c0bb0a6c07:/# dpkg -l | wc -l -190 -``` - -* `dpkg -l` lists the packages installed in our container - -* `wc -l` counts them - -How many packages do we have on our host? - ---- - - - -## Counting packages on the host - -Exit the container by logging out of the shell, like you would usually do. - -(E.g. with `^D` or `exit`) - -```bash -root@04c0bb0a6c07:/# exit -``` - -Now, try to: - -* run `dpkg -l | wc -l`. How many packages are installed? - -* run `figlet`. Does that work? - ---- - - - -## Comparing the container and the host - -Exit the container by logging out of the shell, with `^D` or `exit`. - -Now try to run `figlet`. Does that work? - -(It shouldn't; except if, by coincidence, you are running on a machine where figlet was installed before.) - ---- - -## Host and containers are independent things - -* We ran an `ubuntu` container on an Linux/Windows/macOS host. - -* They have different, independent packages. - -* Installing something on the host doesn't expose it to the container. - -* And vice-versa. - -* Even if both the host and the container have the same Linux distro! - -* We can run *any container* on *any host*. - - (One exception: Windows containers cannot run on Linux machines; at least not yet.) - ---- - -## Where's our container? - -* Our container is now in a *stopped* state. - -* It still exists on disk, but all compute resources have been freed up. - -* We will see later how to get back to that container. - ---- - -## Starting another container - -What if we start a new container, and try to run `figlet` again? - -```bash -$ docker run -it ubuntu -root@b13c164401fb:/# figlet -bash: figlet: command not found -``` - -* We started a *brand new container*. - -* The basic Ubuntu image was used, and `figlet` is not here. - ---- - -## Where's my container? - -* Can we reuse that container that we took time to customize? - - *We can, but that's not the default workflow with Docker.* - -* What's the default workflow, then? - - *Always start with a fresh container.* -
- *If we need something installed in our container, build a custom image.* - -* That seems complicated! - - *We'll see that it's actually pretty easy!* - -* And what's the point? - - *This puts a strong emphasis on automation and repeatability. Let's see why ...* - ---- - -## Local development with Docker - -* With Docker, the workflow looks like this: - - * create container image with our dev environment - - * run container with that image - - * work on project - - * when done, shut down container - - * next time we need to work on project, start a new container - - * if we need to tweak the environment, we create a new image - -* We have a clear definition of our environment, and can share it reliably with others. - -* Let's see in the next chapters how to bake a custom image with `figlet`! - ---- -# Build first Image - -![inline](../attachments/title-understanding-docker-images.png) - ---- -## What is an image? - -* Image = files + metadata - -* These files form the root filesystem of our container. - -* The metadata can indicate a number of things, e.g.: - - * the author of the image - * the command to execute in the container when starting it - * environment variables to be set - * etc. - -* Images are made of *layers*, conceptually stacked on top of each other. - -* Each layer can add, change, and remove files and/or metadata. - -* Images can share layers to optimize disk usage, transfer times, and memory use. - ---- - -## Example for a Java webapp - -Each of the following items will correspond to one layer: - -* CentOS base layer -* Packages and configuration files added by our local IT -* JRE -* Tomcat -* Our application's dependencies -* Our application code and assets -* Our application configuration - ---- - - - -## The read-write layer - -![inline](../attachments/container-layers.jpg) - ---- - -## Differences between containers and images - -* An image is a read-only filesystem. - -* A container is an encapsulated set of processes, - - running in a read-write copy of that filesystem. - -* To optimize container boot time, *copy-on-write* is used - instead of regular copy. - -* `docker run` starts a container from a given image. - ---- - - - -## Multiple containers sharing the same image - -![inline](../attachments/sharing-layers.jpg) - ---- - -## Comparison with object-oriented programming - -* Images are conceptually similar to *classes*. - -* Layers are conceptually similar to *inheritance*. - -* Containers are conceptually similar to *instances*. - ---- - -## Wait a minute... - -If an image is read-only, how do we change it? - -* We don't. - -* We create a new container from that image. - -* Then we make changes to that container. - -* When we are satisfied with those changes, we transform them into a new layer. - -* A new image is created by stacking the new layer on top of the old image. - ---- - -## A chicken-and-egg problem - -* The only way to create an image is by "freezing" a container. - -* The only way to create a container is by instantiating an image. - -* Help! - -![inline](https://images-na.ssl-images-amazon.com/images/I/515qtu1LfUL._AC_SL1000_.jpg) - ---- - -## Creating the first images - -There is a special empty image called `scratch`. - -* It allows to *build from scratch*. - -The `docker import` command loads a tarball into Docker. - -* The imported tarball becomes a standalone image. -* That new image has a single layer. - -Note: you will probably never have to do this yourself. - ---- - -## Creating other images - -`docker commit` - -* Saves all the changes made to a container into a new layer. -* Creates a new image (effectively a copy of the container). - -`docker build` **(used 99% of the time)** - -* Performs a repeatable build sequence. -* This is the preferred method! - -We will explain both methods in a moment. - ---- - -## Images namespaces - -There are three namespaces: - -* Official images - - e.g. `ubuntu`, `busybox` ... - -* User (and organizations) images - - e.g. `jpetazzo/clock` - -* Self-hosted images - - e.g. `registry.example.com:5000/my-private/image` - -Let's explain each of them. - ---- - -## Root namespace - -The root namespace is for official images. - -They are gated by Docker Inc. - -They are generally authored and maintained by third parties. - -Those images include: - -* Small, "swiss-army-knife" images like busybox. - -* Distro images to be used as bases for your builds, like ubuntu, fedora... - -* Ready-to-use components and services, like redis, postgresql... - -* Over 150 at this point! - ---- - -## User namespace - -The user namespace holds images for Docker Hub users and organizations. - -For example: - -```bash -jpetazzo/clock -``` - -The Docker Hub user is: - -```bash -jpetazzo -``` - -The image name is: - -```bash -clock -``` - ---- - -## Showing current images - -Let's look at what images are on our host now. - -```bash -$ docker images -REPOSITORY TAG IMAGE ID CREATED SIZE -fedora latest ddd5c9c1d0f2 3 days ago 204.7 MB -centos latest d0e7f81ca65c 3 days ago 196.6 MB -ubuntu latest 07c86167cdc4 4 days ago 188 MB -redis latest 4f5f397d4b7c 5 days ago 177.6 MB -postgres latest afe2b5e1859b 5 days ago 264.5 MB -alpine latest 70c557e50ed6 5 days ago 4.798 MB -debian latest f50f9524513f 6 days ago 125.1 MB -busybox latest 3240943c9ea3 2 weeks ago 1.114 MB -training/namer latest 902673acc741 9 months ago 289.3 MB -jpetazzo/clock latest 12068b93616f 12 months ago 2.433 MB -``` - ---- - -## Downloading images - -There are two ways to download images. - -* Explicitly, with `docker pull`. - -* Implicitly, when executing `docker run` and the image is not found locally. - ---- - -## Pulling an image - -```bash -$ docker pull debian:jessie -Pulling repository debian -b164861940b8: Download complete -b164861940b8: Pulling image (jessie) from debian -d1881793a057: Download complete -``` - -* As seen previously, images are made up of layers. - -* Docker has downloaded all the necessary layers. - -* In this example, `:jessie` indicates which exact version of Debian - we would like. - - It is a *version tag*. - ---- - -## Image and tags - -* Images can have tags. - -* Tags define image versions or variants. - -* `docker pull ubuntu` will refer to `ubuntu:latest`. - -* The `:latest` tag is generally updated often. - ---- - -## When to (not) use tags - -Don't specify tags: - -* When doing rapid testing and prototyping. -* When experimenting. -* When you want the latest version. - -Do specify tags: - -* When recording a procedure into a script. -* When going to production. -* To ensure that the same version will be used everywhere. -* To ensure repeatability later. - -This is similar to what we would do with `pip install`, `npm install`, etc. - ---- - -## Section summary - -We've learned how to: - -* Understand images and layers. -* Understand Docker image namespacing. -* Search and download images. - ---- - - -# Building Docker images with a Dockerfile - ---- - -## Objectives - -We will build a container image automatically, with a `Dockerfile`. - -At the end of this lesson, you will be able to: - -* Write a `Dockerfile`. - -* Build an image from a `Dockerfile`. - ---- - -## `Dockerfile` overview - -* A `Dockerfile` is a build recipe for a Docker image. - -* It contains a series of instructions telling Docker how an image is constructed. - -* The `docker build` command builds an image from a `Dockerfile`. - ---- - -## Writing our first `Dockerfile` - -Our Dockerfile must be in a **new, empty directory**. - -1. Create a directory to hold our `Dockerfile`. - -```bash -$ mkdir myimage -``` - -2. Create a `Dockerfile` inside this directory. - -```bash -$ cd myimage -$ vim Dockerfile -``` - -Of course, you can use any other editor of your choice. - ---- - -## Type this into our Dockerfile... - -```dockerfile -FROM ubuntu -RUN apt-get update -RUN apt-get install figlet -``` - -* `FROM` indicates the base image for our build. - -* Each `RUN` line will be executed by Docker during the build. - -* Our `RUN` commands **must be non-interactive.** -
(No input can be provided to Docker during the build.) - -* In many cases, we will add the `-y` flag to `apt-get`. - ---- - -## Build it! - -Save our file, then execute: - -```bash -$ docker build -t figlet . -``` - -* `-t` indicates the tag to apply to the image. - -* `.` indicates the location of the *build context*. - -We will talk more about the build context later. - -To keep things simple for now: this is the directory where our Dockerfile is located. - ---- - -## What happens when we build the image? - -The output of `docker build` looks like this: - -.small[ -```bash -docker build -t figlet . -Sending build context to Docker daemon 2.048kB -Step 1/3 : FROM ubuntu - ---> f975c5035748 -Step 2/3 : RUN apt-get update - ---> Running in e01b294dbffd -(...output of the RUN command...) -Removing intermediate container e01b294dbffd - ---> eb8d9b561b37 -Step 3/3 : RUN apt-get install figlet - ---> Running in c29230d70f9b -(...output of the RUN command...) -Removing intermediate container c29230d70f9b - ---> 0dfd7a253f21 -Successfully built 0dfd7a253f21 -Successfully tagged figlet:latest -``` -] - -* The output of the `RUN` commands has been omitted. -* Let's explain what this output means. - ---- - -## Sending the build context to Docker - -```bash -Sending build context to Docker daemon 2.048 kB -``` - -* The build context is the `.` directory given to `docker build`. - -* It is sent (as an archive) by the Docker client to the Docker daemon. - -* This allows to use a remote machine to build using local files. - -* Be careful (or patient) if that directory is big and your link is slow. - -* You can speed up the process with a [`.dockerignore`](https://docs.docker.com/engine/reference/builder/[[dockerignore-file]]) file - - * It tells docker to ignore specific files in the directory - - * Only ignore files that you won't need in the build context! - ---- - -## Executing each step - -```bash -Step 2/3 : RUN apt-get update - ---> Running in e01b294dbffd -(...output of the RUN command...) -Removing intermediate container e01b294dbffd - ---> eb8d9b561b37 -``` - -* A container (`e01b294dbffd`) is created from the base image. - -* The `RUN` command is executed in this container. - -* The container is committed into an image (`eb8d9b561b37`). - -* The build container (`e01b294dbffd`) is removed. - -* The output of this step will be the base image for the next one. - ---- - -## The caching system - -If you run the same build again, it will be instantaneous. Why? - -* After each build step, Docker takes a snapshot of the resulting image. - -* Before executing a step, Docker checks if it has already built the same sequence. - -* Docker uses the exact strings defined in your Dockerfile, so: - - * `RUN apt-get install figlet cowsay ` -
is different from -
`RUN apt-get install cowsay figlet` - - * `RUN apt-get update` is not re-executed when the mirrors are updated - -You can force a rebuild with `docker build --no-cache ...`. - ---- - -## Running the image - -The resulting image is not different from the one produced manually. - -```bash -$ docker run -ti figlet -root@91f3c974c9a1:/# figlet hello - _ _ _ -| |__ ___| | | ___ -| '_ \ / _ \ | |/ _ \ -| | | | __/ | | (_) | -|_| |_|\___|_|_|\___/ -``` - - -Yay! .emoji[🎉] - ---- - -## Using image and viewing history - -The `history` command lists all the layers composing an image. - -For each layer, it shows its creation time, size, and creation command. - -When an image was built with a Dockerfile, each layer corresponds to -a line of the Dockerfile. - -```bash -$ docker history figlet -IMAGE CREATED CREATED BY SIZE -f9e8f1642759 About an hour ago /bin/sh -c apt-get install fi 1.627 MB -7257c37726a1 About an hour ago /bin/sh -c apt-get update 21.58 MB -07c86167cdc4 4 days ago /bin/sh -c #(nop) CMD ["/bin 0 B - 4 days ago /bin/sh -c sed -i 's/^#\s*\( 1.895 kB - 4 days ago /bin/sh -c echo '#!/bin/sh' 194.5 kB - 4 days ago /bin/sh -c #(nop) ADD file:b 187.8 MB -``` - ---- - - - -## Why `sh -c`? - -* On UNIX, to start a new program, we need two system calls: - - - `fork()`, to create a new child process; - - - `execve()`, to replace the new child process with the program to run. - -* Conceptually, `execve()` works like this: - - `execve(program, [list, of, arguments])` - -* When we run a command, e.g. `ls -l /tmp`, something needs to parse the command. - - (i.e. split the program and its arguments into a list.) - -* The shell is usually doing that. - - (It also takes care of expanding environment variables and special things like `~`.) - ---- - - - -## Why `sh -c`? - -* When we do `RUN ls -l /tmp`, the Docker builder needs to parse the command. - -* Instead of implementing its own parser, it outsources the job to the shell. - -* That's why we see `sh -c ls -l /tmp` in that case. - -* But we can also do the parsing jobs ourselves. - -* This means passing `RUN` a list of arguments. - -* This is called the *exec syntax*. - ---- - -## Shell syntax vs exec syntax - -Dockerfile commands that execute something can have two forms: - -* plain string, or *shell syntax*: -
`RUN apt-get install figlet` - -* JSON list, or *exec syntax*: -
`RUN ["apt-get", "install", "figlet"]` - -We are going to change our Dockerfile to see how it affects the resulting image. - ---- - -## Using exec syntax in our Dockerfile - -Let's change our Dockerfile as follows! - -```dockerfile -FROM ubuntu -RUN apt-get update -RUN ["apt-get", "install", "figlet"] -``` - -Then build the new Dockerfile. - -```bash -$ docker build -t figlet . -``` - ---- - -## History with exec syntax - -Compare the new history: - -```bash -$ docker history figlet -IMAGE CREATED CREATED BY SIZE -27954bb5faaf 10 seconds ago apt-get install figlet 1.627 MB -7257c37726a1 About an hour ago /bin/sh -c apt-get update 21.58 MB -07c86167cdc4 4 days ago /bin/sh -c #(nop) CMD ["/bin 0 B - 4 days ago /bin/sh -c sed -i 's/^#\s*\( 1.895 kB - 4 days ago /bin/sh -c echo '#!/bin/sh' 194.5 kB - 4 days ago /bin/sh -c #(nop) ADD file:b 187.8 MB -``` - -* Exec syntax specifies an *exact* command to execute. - -* Shell syntax specifies a command to be wrapped within `/bin/sh -c "..."`. - ---- - -## When to use exec syntax and shell syntax - -* shell syntax: - - * is easier to write - * interpolates environment variables and other shell expressions - * creates an extra process (`/bin/sh -c ...`) to parse the string - * requires `/bin/sh` to exist in the container - -* exec syntax: - - * is harder to write (and read!) - * passes all arguments without extra processing - * doesn't create an extra process - * doesn't require `/bin/sh` to exist in the container - ---- - -## Pro-tip: the `exec` shell built-in - -POSIX shells have a built-in command named `exec`. - -`exec` should be followed by a program and its arguments. - -From a user perspective: - -- it looks like the shell exits right away after the command execution, - -- in fact, the shell exits just *before* command execution; - -- or rather, the shell gets *replaced* by the command. - ---- - -## Example using `exec` - -```dockerfile -CMD exec figlet -f script hello -``` - -In this example, `sh -c` will still be used, but -`figlet` will be PID 1 in the container. - -The shell gets replaced by `figlet` when `figlet` starts execution. - -This allows to run processes as PID 1 without using JSON. - ---- - -# Compose for development stacks - -Dockerfiles are great to build container images. - -But what if we work with a complex stack made of multiple containers? - -Eventually, we will want to write some custom scripts and automation to build, run, and connect -our containers together. - -There is a better way: using Docker Compose. - -In this section, you will use Compose to bootstrap a development environment. - ---- - -## What is Docker Compose? - -Docker Compose (formerly known as `fig`) is an external tool. - -Unlike the Docker Engine, it is written in Python. It's open source as well. - -The general idea of Compose is to enable a very simple, powerful onboarding workflow: - -1. Checkout your code. - -2. Run `docker-compose up`. - -3. Your app is up and running! - ---- - -## Compose overview - -This is how you work with Compose: - -* You describe a set (or stack) of containers in a YAML file called `docker-compose.yml`. - -* You run `docker-compose up`. - -* Compose automatically pulls images, builds containers, and starts them. - -* Compose can set up links, volumes, and other Docker options for you. - -* Compose can run the containers in the background, or in the foreground. - -* When containers are running in the foreground, their aggregated output is shown. - -Before diving in, let's see a small example of Compose in action. - ---- - - - -![inline](../attachments/composeup.gif) - ---- - -## Checking if Compose is installed - -If you are using the official training virtual machines, Compose has been -pre-installed. - -If you are using Docker for Mac/Windows or the Docker Toolbox, Compose comes with them. - -If you are on Linux (desktop or server environment), you will need to install Compose from its [release page](https://github.com/docker/compose/releases) or with `pip install docker-compose`. - -You can always check that it is installed by running: - -```bash -$ docker-compose --version -``` - ---- - -## Launching Our First Stack with Compose - -First step: clone the source code for the app we will be working on. - -```bash -$ cd -$ git clone --branch docker https://github.com/DataSystemsGroupUT/dataeng.git -... -$ cd dataeng -``` - ---- -## Launching Our First Stack with Compose - -Second step: start your app. - -```bash -$ docker-compose up -``` - -Watch Compose build and run your app with the correct parameters, -including linking the relevant containers together. - ---- - -![inline](../attachments/composeup.gif) - ---- -## Launching Our First Stack with Compose - -In a new terminal - -```bash -$ docker ps -``` - -![inline](../attachments/Screenshot 2020-09-06 at 7.52.55 PM.png) - - ---- - -## Stopping the app - -When you hit `^C`, Compose tries to gracefully terminate all of the containers. - -After ten seconds (or if you press `^C` again) it will forcibly kill -them. - ---- - -## The `docker-compose.yml` file - -Here is the file used in the demo: - -```yaml -version: "3" - -services: - web: - build: web - ports: - - 80 - db: - build: db - words: - build: words - ports: - - 8080 -``` - ---- - - -## Compose file structure - -A Compose file has multiple sections: - -* `version` is mandatory. (We should use `"2"` or later; version 1 is deprecated.) - -* `services` is mandatory. A service is one or more replicas of the same image running as containers. - -* `networks` is optional and indicates to which networks containers should be connected. -
(By default, containers will be connected on a private, per-compose-file network.) - -* `volumes` is optional and can define volumes to be used and/or shared by the containers. - ---- - -## Compose file versions - -* Version 1 is legacy and shouldn't be used. - - (If you see a Compose file without `version` and `services`, it's a legacy v1 file.) - -* Version 2 added support for networks and volumes. - -* Version 3 added support for deployment options (scaling, rolling updates, etc). - -The [Docker documentation](https://docs.docker.com/compose/compose-file/) -has excellent information about the Compose file format if you need to know more about versions. - ---- - -## Containers in `docker-compose.yml` - -Each service in the YAML file must contain either `build`, or `image`. - -* `build` indicates a path containing a Dockerfile. - -* `image` indicates an image name (local, or on a registry). - -* If both are specified, an image will be built from the `build` directory and named `image`. - -The other parameters are optional. - -They encode the parameters that you would typically add to `docker run`. - -Sometimes they have several minor improvements. - ---- - -## Container parameters - -* `command` indicates what to run (like `CMD` in a Dockerfile). - -* `ports` translates to one (or multiple) `-p` options to map ports. -
You can specify local ports (i.e. `x:y` to expose public port `x`). - -* `volumes` translates to one (or multiple) `-v` options. -
You can use relative paths here. - -For the full list, check: https://docs.docker.com/compose/compose-file/ - ---- - -## Compose commands - -We already saw `docker-compose up`, but another one is `docker-compose build`. - -It will execute `docker build` for all containers mentioning a `build` path. - -It can also be invoked automatically when starting the application: - -```bash -docker-compose up --build -``` - -Another common option is to start containers in the background: - -```bash -docker-compose up -d -``` - ---- - -## Check container status - -It can be tedious to check the status of your containers with `docker ps`, -especially when running multiple apps at the same time. - -Compose makes it easier; with `docker-compose ps` you will see only the status of the -containers of the current stack: - - -```bash -$ docker-compose ps -Name Command State Ports ----------------------------------------------------------------------------- -trainingwheels_redis_1 /entrypoint.sh red Up 6379/tcp -trainingwheels_www_1 python counter.py Up 0.0.0.0:8000->5000/tcp -``` - ---- - -## Cleaning up (1) - -If you have started your application in the background with Compose and -want to stop it easily, you can use the `kill` command: - -```bash -$ docker-compose kill -``` - -Likewise, `docker-compose rm` will let you remove containers (after confirmation): - -```bash -$ docker-compose rm -Going to remove trainingwheels_redis_1, trainingwheels_www_1 -Are you sure? [yN] y -Removing trainingwheels_redis_1... -Removing trainingwheels_www_1... -``` - ---- - -## Cleaning up (2) - -Alternatively, `docker-compose down` will stop and remove containers. - -It will also remove other resources, like networks that were created for the application. - -```bash -$ docker-compose down -Stopping trainingwheels_www_1 ... done -Stopping trainingwheels_redis_1 ... done -Removing trainingwheels_www_1 ... done -Removing trainingwheels_redis_1 ... done -``` - -Use `docker-compose down -v` to remove everything including volumes. - ---- - -## Special handling of volumes - -Compose is smart. If your container uses volumes, when you restart your -application, Compose will create a new container, but carefully re-use -the volumes it was using previously. - -This makes it easy to upgrade a stateful service, by pulling its -new image and just restarting your stack with Compose. - ---- - -## Compose project name - -* When you run a Compose command, Compose infers the "project name" of your app. - -* By default, the "project name" is the name of the current directory. - -* For instance, if you are in `/home/zelda/src/ocarina`, the project name is `ocarina`. - -* All resources created by Compose are tagged with this project name. - -* The project name also appears as a prefix of the names of the resources. - - E.g. in the previous example, service `www` will create a container `ocarina_www_1`. - -* The project name can be overridden with `docker-compose -p`. - diff --git a/docker/Docker On Cloud.md b/docker/Docker On Cloud.md deleted file mode 100644 index 172e937..0000000 --- a/docker/Docker On Cloud.md +++ /dev/null @@ -1,51 +0,0 @@ -# Hey there. - -Ok, so, hypothetically speaking, let's say that you don't have any debian-based linux distribution. - -In that case, you can use google cloud in order to create a 24/7 debian virtual machine. - -here's how to do that: - -### Step 1 - - -go to [google cloud](console.cloud.google.com) and register, put your credit card there(the first 300 USD in cloud costs are for free, during the first 12 months, and they will warn you if you exceed it) - -### Step 2 - - -On console.cloud.google.com, with your newly created account, click on "Compute Engine" on the left side of the screen. - -![inline](../attachments/compute_engine.png) - -Now, create your virtual machine, with DEBIAN as the OS. Get one whose specs cost at max 30 bucks a month. - -### Step 3 - most complex one - -Ok, so, now you have your shiny debian vm. - -The next step is to set up the firewall rules so whenever you deploy the airflow server you won't be frustrated out of your mind trying to figure out how to access it. - -#### Step 3.1 - - -On the same side navbar where you found compute engine, find VPC network. It should be under networking. - -#### Step 3.2 - - -After finding VPC network, click on firewall. - -#### Step 3.3 - - -Now, select create firewall rule - -#### Step 3.4 - - -Make sure your firewall rule looks like this: - -![inline](../attachments/firewall_rules.png) - -### Step 4 - - -Test it. - -Go to your compute engine public ip's port 8080. - -You should see the airflow webserver. \ No newline at end of file diff --git a/docker/Docker Overview.md b/docker/Docker Overview.md deleted file mode 100644 index 9d99a6d..0000000 --- a/docker/Docker Overview.md +++ /dev/null @@ -1,323 +0,0 @@ -footer: [Riccardo Tommasini](http://rictomm.me) - riccardo.tommasini@ut.ee - @rictomm -slide-dividers: #, ##, ### -slidenumbers: true -autoscale: true - -# Data Engineering: Docker Overview -#### LTAT.02.007 -#### Ass Prof. Riccardo Tommasini -#### Assistants: [Fabiano Spiga](mailto:), [Mohamed Ragab](mailto:mohamed.ragab@ut.ee), [Hassan Eldeeb](mailto:hassan.eldeeb@ut.ee) -- [https://courses.cs.ut.ee/2020/dataeng](https://courses.cs.ut.ee/2020/dataeng) -- [Forum](https://piazza.com/ut.ee/fall2020/ltat02007/home) -- [Moodle](https://moodle.ut.ee/course/view.php?id=10457) - - -# Docker 30,000ft overview - -In this lesson, we will learn about: - -* Why containers (non-technical elevator pitch) - -* Why containers (technical elevator pitch) - -* How Docker helps us to build, ship, and run - -* The history of containers - -We won't actually run Docker or containers in this chapter (yet!). - -Don't worry, we will get to that fast enough! - ---- - -## Elevator pitch - -### (for your manager, your boss...) - ---- - -## OK... Why the buzz around containers? - -* The software industry has changed - -* Before: - * monolithic applications - * long development cycles - * single environment - * slowly scaling up - -* Now: - * decoupled services - * fast, iterative improvements - * multiple environments - * quickly scaling out - ---- - -## Deployment becomes very complex - -* Many different stacks: - * languages - * frameworks - * databases - -* Many different targets: - * individual development environments - * pre-production, QA, staging... - * production: on prem, cloud, hybrid - ---- - -## The deployment problem - -![inline](../attachments/shipping-software-problem.png) - ---- - - - -## The matrix from hell - -![inline](../attachments/shipping-matrix-from-hell.png) - ---- - - - -## The parallel with the shipping industry - -![inline](../attachments/shipping-industry-problem.png) - ---- - - - -## Intermodal shipping containers - -![inline](../attachments/shipping-industry-solution.png) - ---- - - - -## A new shipping ecosystem - -![inline](../attachments/shipping-indsutry-results.png) - ---- - - - -## A shipping container system for applications - -![inline](../attachments/shipping-software-solution.png) - ---- - - - -## Eliminate the matrix from hell - -![inline](../attachments/shipping-matrix-solved.png) - ---- - -## Results - -* [Dev-to-prod reduced from 9 months to 15 minutes (ING)]( - https://www.docker.com/sites/default/files/CS_ING_01.25.2015_1.pdf) - -* [Continuous integration job time reduced by more than 60% (BBC)]( - https://www.docker.com/sites/default/files/CS_BBCNews_01.25.2015_1.pdf) - -* [Deploy 100 times a day instead of once a week (GILT)]( - https://www.docker.com/sites/default/files/CS_Gilt%20Groupe_03.18.2015_0.pdf) - -* [70% infrastructure consolidation (MetLife)]( - https://www.docker.com/customers/metlife-transforms-customer-experience-legacy-and-microservices-mashup) - -* [60% infrastructure consolidation (Intesa Sanpaolo)]( - https://blog.docker.com/2017/11/intesa-sanpaolo-builds-resilient-foundation-banking-docker-enterprise-edition/) - -* [14x application density; 60% of legacy datacenter migrated in 4 months (GE Appliances)]( - https://www.docker.com/customers/ge-uses-docker-enable-self-service-their-developers) - -* etc. - ---- - -## Elevator pitch - -### (for your fellow devs and ops) - ---- - -## Escape dependency hell - -1. Write installation instructions into an `INSTALL.txt` file - -2. Using this file, write an `install.sh` script that works *for you* - -3. Turn this file into a `Dockerfile`, test it on your machine - -4. If the Dockerfile builds on your machine, it will build *anywhere* - -5. Rejoice as you escape dependency hell and "works on my machine" - -Never again "worked in dev - ops problem now!" - ---- - -## On-board developers and contributors rapidly - -1. Write Dockerfiles for your application components - -2. Use pre-made images from the Docker Hub (mysql, redis...) - -3. Describe your stack with a Compose file - -4. On-board somebody with two commands: - -```bash -git clone ... -docker-compose up -``` - -With this, you can create development, integration, QA environments in minutes! - ---- - - -## Implement reliable CI easily - -1. Build test environment with a Dockerfile or Compose file - -2. For each test run, stage up a new container or stack - -3. Each run is now in a clean environment - -4. No pollution from previous tests - -Way faster and cheaper than creating VMs each time! - ---- - - -## Use container images as build artefacts - -1. Build your app from Dockerfiles - -2. Store the resulting images in a registry - -3. Keep them forever (or as long as necessary) - -4. Test those images in QA, CI, integration... - -5. Run the same images in production - -6. Something goes wrong? Rollback to previous image - -7. Investigating old regression? Old image has your back! - -Images contain all the libraries, dependencies, etc. needed to run the app. - ---- - - -## Decouple "plumbing" from application logic - -1. Write your code to connect to named services ("db", "api"...) - -2. Use Compose to start your stack - -3. Docker will setup per-container DNS resolver for those names - -4. You can now scale, add load balancers, replication ... without changing your code - -Note: this is not covered in this intro level workshop! - ---- - - -## What did Docker bring to the table? - -### Docker before/after - ---- - - -## Formats and APIs, before Docker - -* No standardized exchange format. -
(No, a rootfs tarball is *not* a format!) - -* Containers are hard to use for developers. -
(Where's the equivalent of `docker run debian`?) - -* As a result, they are *hidden* from the end users. - -* No re-usable components, APIs, tools. -
(At best: VM abstractions, e.g. libvirt.) - -Analogy: - -* Shipping containers are not just steel boxes. -* They are steel boxes that are a standard size, with the same hooks and holes. - ---- - - -## Formats and APIs, after Docker - -* Standardize the container format, because containers were not portable. -* Make containers easy to use for developers. -* Emphasis on re-usable components, APIs, ecosystem of standard tools. -* Improvement over ad-hoc, in-house, specific tools. - ---- - - -## Shipping, before Docker - -* Ship packages: deb, rpm, gem, jar, homebrew... -* Dependency hell -* "Works on my machine." -* Base deployment often done from scratch (debootstrap...) and unreliable. - ---- - - -## Shipping, after Docker - -* Ship container images with all their dependencies. - -* Images are bigger, but they are broken down into layers. - -* Only ship layers that have changed. - -* Save disk, network, memory usage. - ---- - - -## Example - -Layers: - -* CentOS -* JRE -* Tomcat -* Dependencies -* Application JAR -* Configuration - ---- - -## Our training environment - -- If you are attending #DataEng - - docker is an easy way to deploy various technologies without - affecting your local environment - - you don't have to worry about networking - - you need to take care of persistance thoug -- For testing purposes use [Play with Docker](https://www.play-with-docker.com/) to instantly get a training environment diff --git a/docker/Dockerfiles Tips.md b/docker/Dockerfiles Tips.md deleted file mode 100644 index 1031adf..0000000 --- a/docker/Dockerfiles Tips.md +++ /dev/null @@ -1,715 +0,0 @@ -# Tips for efficient Dockerfiles - -We will see how to: - -* Reduce the number of layers. - -* Leverage the build cache so that builds can be faster. - -* Embed unit testing in the build process. - ---- - -## Reducing the number of layers - -* Each line in a `Dockerfile` creates a new layer. - -* Build your `Dockerfile` to take advantage of Docker's caching system. - -* Combine commands by using `&&` to continue commands and `\` to wrap lines. - -Note: it is frequent to build a Dockerfile line by line: - -```dockerfile -RUN apt-get install thisthing -RUN apt-get install andthatthing andthatotherone -RUN apt-get install somemorestuff -``` - -And then refactor it trivially before shipping: - -```dockerfile -RUN apt-get install thisthing andthatthing andthatotherone somemorestuff -``` - ---- - -## Avoid re-installing dependencies at each build - -* Classic Dockerfile problem: - - "each time I change a line of code, all my dependencies are re-installed!" - -* Solution: `COPY` dependency lists (`package.json`, `requirements.txt`, etc.) - by themselves to avoid reinstalling unchanged dependencies every time. - ---- - -## Example "bad" `Dockerfile` - -The dependencies are reinstalled every time, because the build system does not know if `requirements.txt` has been updated. - -```bash -FROM python -WORKDIR /src -COPY . . -RUN pip install -qr requirements.txt -EXPOSE 5000 -CMD ["python", "app.py"] -``` - ---- - -## Fixed `Dockerfile` - -Adding the dependencies as a separate step means that Docker can cache more efficiently and only install them when `requirements.txt` changes. - -```bash -FROM python -COPY requirements.txt /tmp/requirements.txt -RUN pip install -qr /tmp/requirements.txt -WORKDIR /src -COPY . . -EXPOSE 5000 -CMD ["python", "app.py"] -``` - ---- - -## Be careful with `chown`, `chmod`, `mv` - -* Layers cannot store efficiently changes in permissions or ownership. - -* Layers cannot represent efficiently when a file is moved either. - -* As a result, operations like `chown`, `chown`, `mv` can be expensive. - -* For instance, in the Dockerfile snippet below, each `RUN` line - creates a layer with an entire copy of `some-file`. - - ```dockerfile - COPY some-file . - RUN chown www-data:www-data some-file - RUN chmod 644 some-file - RUN mv some-file /var/www - ``` - -* How can we avoid that? - ---- - -## Put files on the right place - -* Instead of using `mv`, directly put files at the right place. - -* When extracting archives (tar, zip...), merge operations in a single layer. - - Example: - - ```dockerfile - ... - RUN wget http://.../foo.tar.gz \ - && tar -zxf foo.tar.gz \ - && mv foo/fooctl /usr/local/bin \ - && rm -rf foo - ... - ``` - ---- - -## Use `COPY --chown` - -* The Dockerfile instruction `COPY` can take a `--chown` parameter. - - Examples: - - ```dockerfile - ... - COPY --chown=1000 some-file . - COPY --chown=1000:1000 some-file . - COPY --chown=www-data:www-data some-file . - ``` - -* The `--chown` flag can specify a user, or a user:group pair. - -* The user and group can be specified as names or numbers. - -* When using names, the names must exist in `/etc/passwd` or `/etc/group`. - - *(In the container, not on the host!)* - ---- - -## Set correct permissions locally - -* Instead of using `chmod`, set the right file permissions locally. - -* When files are copied with `COPY`, permissions are preserved. - ---- - -## Embedding unit tests in the build process - -```dockerfile -FROM -RUN -COPY -RUN -RUN -COPY -RUN -FROM -RUN -COPY -RUN -CMD, EXPOSE ... -``` - -* The build fails as soon as an instruction fails -* If `RUN ` fails, the build doesn't produce an image -* If it succeeds, it produces a clean image (without test libraries and data) - ---- - -# Dockerfile examples - -There are a number of tips, tricks, and techniques that we can use in Dockerfiles. - -But sometimes, we have to use different (and even opposed) practices depending on: - -- the complexity of our project, - -- the programming language or framework that we are using, - -- the stage of our project (early MVP vs. super-stable production), - -- whether we're building a final image or a base for further images, - -- etc. - -We are going to show a few examples using very different techniques. - ---- - -## When to optimize an image - -When authoring official images, it is a good idea to reduce as much as possible: - -- the number of layers, - -- the size of the final image. - -This is often done at the expense of build time and convenience for the image maintainer; -but when an image is downloaded millions of time, saving even a few seconds of pull time -can be worth it. - -.small[ -```dockerfile -RUN apt-get update && apt-get install -y libpng12-dev libjpeg-dev && rm -rf /var/lib/apt/lists/* \ - && docker-php-ext-configure gd --with-png-dir=/usr --with-jpeg-dir=/usr \ - && docker-php-ext-install gd -... -RUN curl -o wordpress.tar.gz -SL https://wordpress.org/wordpress-${WORDPRESS_UPSTREAM_VERSION}.tar.gz \ - && echo "$WORDPRESS_SHA1 *wordpress.tar.gz" | sha1sum -c - \ - && tar -xzf wordpress.tar.gz -C /usr/src/ \ - && rm wordpress.tar.gz \ - && chown -R www-data:www-data /usr/src/wordpress -``` -] - -(Source: [Wordpress official image](https://github.com/docker-library/wordpress/blob/618490d4bdff6c5774b84b717979bfe3d6ba8ad1/apache/Dockerfile)) - ---- - -## When to *not* optimize an image - -Sometimes, it is better to prioritize *maintainer convenience*. - -In particular, if: - -- the image changes a lot, - -- the image has very few users (e.g. only 1, the maintainer!), - -- the image is built and run on the same machine, - -- the image is built and run on machines with a very fast link ... - -In these cases, just keep things simple! - -(Next slide: a Dockerfile that can be used to preview a Jekyll / github pages site.) - ---- - -```dockerfile -FROM debian:sid - -RUN apt-get update -q -RUN apt-get install -yq build-essential make -RUN apt-get install -yq zlib1g-dev -RUN apt-get install -yq ruby ruby-dev -RUN apt-get install -yq python-pygments -RUN apt-get install -yq nodejs -RUN apt-get install -yq cmake -RUN gem install --no-rdoc --no-ri github-pages - -COPY . /blog -WORKDIR /blog - -VOLUME /blog/_site - -EXPOSE 4000 -CMD ["jekyll", "serve", "--host", "0.0.0.0", "--incremental"] -``` - ---- - -## Multi-dimensional versioning systems - -Images can have a tag, indicating the version of the image. - -But sometimes, there are multiple important components, and we need to indicate the versions -for all of them. - -This can be done with environment variables: - -```dockerfile -ENV PIP=9.0.3 \ - ZC_BUILDOUT=2.11.2 \ - SETUPTOOLS=38.7.0 \ - PLONE_MAJOR=5.1 \ - PLONE_VERSION=5.1.0 \ - PLONE_MD5=76dc6cfc1c749d763c32fff3a9870d8d -``` - -(Source: [Plone official image](https://github.com/plone/plone.docker/blob/master/5.1/5.1.0/alpine/Dockerfile)) - ---- - -## Entrypoints and wrappers - -It is very common to define a custom entrypoint. - -That entrypoint will generally be a script, performing any combination of: - -- pre-flights checks (if a required dependency is not available, display - a nice error message early instead of an obscure one in a deep log file), - -- generation or validation of configuration files, - -- dropping privileges (with e.g. `su` or `gosu`, sometimes combined with `chown`), - -- and more. - ---- - -## A typical entrypoint script - -```dockerfile - #!/bin/sh - set -e - - # first arg is '-f' or '--some-option' - # or first arg is 'something.conf' - if [ "${1#-}" != "$1" ] || [ "${1%.conf}" != "$1" ]; then - set -- redis-server "$@" - fi - - # allow the container to be started with '--user' - if [ "$1" = 'redis-server' -a "$(id -u)" = '0' ]; then - chown -R redis . - exec su-exec redis "$0" "$@" - fi - - exec "$@" -``` - -(Source: [Redis official image](https://github.com/docker-library/redis/blob/d24f2be82673ccef6957210cc985e392ebdc65e4/4.0/alpine/docker-entrypoint.sh)) - ---- - -## Factoring information - -To facilitate maintenance (and avoid human errors), avoid to repeat information like: - -- version numbers, - -- remote asset URLs (e.g. source tarballs) ... - -Instead, use environment variables. - -.small[ -```dockerfile -ENV NODE_VERSION 10.2.1 -... -RUN ... - && curl -fsSLO --compressed "https://nodejs.org/dist/v$NODE_VERSION/node-v$NODE_VERSION.tar.xz" \ - && curl -fsSLO --compressed "https://nodejs.org/dist/v$NODE_VERSION/SHASUMS256.txt.asc" \ - && gpg --batch --decrypt --output SHASUMS256.txt SHASUMS256.txt.asc \ - && grep " node-v$NODE_VERSION.tar.xz\$" SHASUMS256.txt | sha256sum -c - \ - && tar -xf "node-v$NODE_VERSION.tar.xz" \ - && cd "node-v$NODE_VERSION" \ -... -``` -] - -(Source: [Nodejs official image](https://github.com/nodejs/docker-node/blob/master/10/alpine/Dockerfile)) - ---- - -## Overrides - -In theory, development and production images should be the same. - -In practice, we often need to enable specific behaviors in development (e.g. debug statements). - -One way to reconcile both needs is to use Compose to enable these behaviors. - -Let's look at the [trainingwheels](https://github.com/jpetazzo/trainingwheels) demo app for an example. - ---- - -## Production image - -This Dockerfile builds an image leveraging gunicorn: - -```dockerfile -FROM python -RUN pip install flask -RUN pip install gunicorn -RUN pip install redis -COPY . /src -WORKDIR /src -CMD gunicorn --bind 0.0.0.0:5000 --workers 10 counter:app -EXPOSE 5000 -``` - -(Source: [trainingwheels Dockerfile](https://github.com/jpetazzo/trainingwheels/blob/master/www/Dockerfile)) - ---- - -## Development Compose file - -This Compose file uses the same image, but with a few overrides for development: - -- the Flask development server is used (overriding `CMD`), - -- the `DEBUG` environment variable is set, - -- a volume is used to provide a faster local development workflow. - -.small[ -```yaml -services: - www: - build: www - ports: - - 8000:5000 - user: nobody - environment: - DEBUG: 1 - command: python counter.py - volumes: - - ./www:/src -``` -] - -(Source: [trainingwheels Compose file](https://github.com/jpetazzo/trainingwheels/blob/master/docker-compose.yml)) - ---- - -## How to know which best practices are better? - -- The main goal of containers is to make our lives easier. - -- In this chapter, we showed many ways to write Dockerfiles. - -- These Dockerfiles use sometimes diametrally opposed techniques. - -- Yet, they were the "right" ones *for a specific situation.* - -- It's OK (and even encouraged) to start simple and evolve as needed. - -- Feel free to review this chapter later (after writing a few Dockerfiles) for inspiration! - - - - - -# `CMD` and `ENTRYPOINT` - -![Container entry doors](entrypoint.jpg) - ---- - -## Objectives - -In this lesson, we will learn about two important -Dockerfile commands: - -`CMD` and `ENTRYPOINT`. - -These commands allow us to set the default command -to run in a container. - ---- - -## Defining a default command - -When people run our container, we want to greet them with a nice hello message, and using a custom font. - -For that, we will execute: - -```bash -figlet -f script hello -``` - -* `-f script` tells figlet to use a fancy font. - -* `hello` is the message that we want it to display. - ---- - -## Adding `CMD` to our Dockerfile - -Our new Dockerfile will look like this: - -```dockerfile -FROM ubuntu -RUN apt-get update -RUN ["apt-get", "install", "figlet"] -CMD figlet -f script hello -``` - -* `CMD` defines a default command to run when none is given. - -* It can appear at any point in the file. - -* Each `CMD` will replace and override the previous one. - -* As a result, while you can have multiple `CMD` lines, it is useless. - ---- - -## Build and test our image - -Let's build it: - -```bash -$ docker build -t figlet . -... -Successfully built 042dff3b4a8d -Successfully tagged figlet:latest -``` - -And run it: - -```bash -$ docker run figlet - _ _ _ -| | | | | | -| | _ | | | | __ -|/ \ |/ |/ |/ / \_ -| |_/|__/|__/|__/\__/ -``` - ---- - -## Overriding `CMD` - -If we want to get a shell into our container (instead of running -`figlet`), we just have to specify a different program to run: - -```bash -$ docker run -it figlet bash -root@7ac86a641116:/# -``` - -* We specified `bash`. - -* It replaced the value of `CMD`. - ---- - -## Using `ENTRYPOINT` - -We want to be able to specify a different message on the command line, -while retaining `figlet` and some default parameters. - -In other words, we would like to be able to do this: - -```bash -$ docker run figlet salut - _ - | | - , __, | | _|_ -/ \_/ | |/ | | | - \/ \_/|_/|__/ \_/|_/|_/ -``` - - -We will use the `ENTRYPOINT` verb in Dockerfile. - ---- - -## Adding `ENTRYPOINT` to our Dockerfile - -Our new Dockerfile will look like this: - -```dockerfile -FROM ubuntu -RUN apt-get update -RUN ["apt-get", "install", "figlet"] -ENTRYPOINT ["figlet", "-f", "script"] -``` - -* `ENTRYPOINT` defines a base command (and its parameters) for the container. - -* The command line arguments are appended to those parameters. - -* Like `CMD`, `ENTRYPOINT` can appear anywhere, and replaces the previous value. - -Why did we use JSON syntax for our `ENTRYPOINT`? - ---- - -## Implications of JSON vs string syntax - -* When CMD or ENTRYPOINT use string syntax, they get wrapped in `sh -c`. - -* To avoid this wrapping, we can use JSON syntax. - -What if we used `ENTRYPOINT` with string syntax? - -```bash -$ docker run figlet salut -``` - -This would run the following command in the `figlet` image: - -```bash -sh -c "figlet -f script" salut -``` - ---- - -## Build and test our image - -Let's build it: - -```bash -$ docker build -t figlet . -... -Successfully built 36f588918d73 -Successfully tagged figlet:latest -``` - -And run it: - -```bash -$ docker run figlet salut - _ - | | - , __, | | _|_ -/ \_/ | |/ | | | - \/ \_/|_/|__/ \_/|_/|_/ -``` - ---- - -## Using `CMD` and `ENTRYPOINT` together - -What if we want to define a default message for our container? - -Then we will use `ENTRYPOINT` and `CMD` together. - -* `ENTRYPOINT` will define the base command for our container. - -* `CMD` will define the default parameter(s) for this command. - -* They *both* have to use JSON syntax. - ---- - -## `CMD` and `ENTRYPOINT` together - -Our new Dockerfile will look like this: - -```dockerfile -FROM ubuntu -RUN apt-get update -RUN ["apt-get", "install", "figlet"] -ENTRYPOINT ["figlet", "-f", "script"] -CMD ["hello world"] -``` - -* `ENTRYPOINT` defines a base command (and its parameters) for the container. - -* If we don't specify extra command-line arguments when starting the container, - the value of `CMD` is appended. - -* Otherwise, our extra command-line arguments are used instead of `CMD`. - ---- - -## Build and test our image - -Let's build it: - -```bash -$ docker build -t myfiglet . -... -Successfully built 6e0b6a048a07 -Successfully tagged myfiglet:latest -``` - -Run it without parameters: - -```bash -$ docker run myfiglet - _ _ _ _ -| | | | | | | | | -| | _ | | | | __ __ ,_ | | __| -|/ \ |/ |/ |/ / \_ | | |_/ \_/ | |/ / | -| |_/|__/|__/|__/\__/ \/ \/ \__/ |_/|__/\_/|_/ -``` - ---- - -## Overriding the image default parameters - -Now let's pass extra arguments to the image. - -```bash -$ docker run myfiglet hola mundo - _ _ -| | | | | -| | __ | | __, _ _ _ _ _ __| __ -|/ \ / \_|/ / | / |/ |/ | | | / |/ | / | / \_ -| |_/\__/ |__/\_/|_/ | | |_/ \_/|_/ | |_/\_/|_/\__/ -``` - -We overrode `CMD` but still used `ENTRYPOINT`. - ---- - -## Overriding `ENTRYPOINT` - -What if we want to run a shell in our container? - -We cannot just do `docker run myfiglet bash` because -that would just tell figlet to display the word "bash." - -We use the `--entrypoint` parameter: - -```bash -$ docker run -it --entrypoint bash myfiglet -root@6027e44e2955:/# -``` - -??? - -:EN:- CMD and ENTRYPOINT -:FR:- CMD et ENTRYPOINT -§ - diff --git a/docker/Installing Docker.md b/docker/Installing Docker.md deleted file mode 100644 index 86e6935..0000000 --- a/docker/Installing Docker.md +++ /dev/null @@ -1,190 +0,0 @@ -footer: [Riccardo Tommasini](http://rictomm.me) - riccardo.tommasini@ut.ee - @rictomm -slide-dividers: #, ##, ### -slidenumbers: true -autoscale: true - -# Data Engineering: Docker -#### LTAT.02.007 -#### Ass Prof. Riccardo Tommasini -#### Assistants: [Fabiano Spiga](mailto:), [Mohamed Ragab](mailto:mohamed.ragab@ut.ee), [Hassan Eldeeb](mailto:hassan.eldeeb@ut.ee) -- [https://courses.cs.ut.ee/2020/dataeng](https://courses.cs.ut.ee/2020/dataeng) -- [Forum](https://piazza.com/ut.ee/fall2020/ltat02007/home) -- [Moodle](https://moodle.ut.ee/course/view.php?id=10457) - - - -# Installing Docker - -![install](../attachments/title-installing-docker.jpg) - ---- - -## Objectives - -At the end of this lesson, you will know: - -* How to install Docker. - -* When to use `sudo` when running Docker commands. - -*Note:* if you were provided with a training VM for a hands-on -tutorial, you can skip this chapter, since that VM already -has Docker installed, and Docker has already been setup to run -without `sudo`. - ---- - -## Installing Docker - -There are many ways to install Docker. - -We can arbitrarily distinguish: - -* Installing Docker on an existing Linux machine (physical or VM) - -* Installing Docker on macOS or Windows - -* Installing Docker on a fleet of cloud VMs - ---- - -## Installing Docker on Linux - -* The recommended method is to install the packages supplied by Docker Inc : - - - add Docker Inc.'s package repositories to your system configuration - - - install the Docker Engine - -* Detailed installation instructions (distro by distro) are available on: - - https://docs.docker.com/engine/installation/ - -* You can also install from binaries (if your distro is not supported): - - https://docs.docker.com/engine/installation/linux/docker-ce/binaries/ - -* To quickly setup a dev environment, Docker provides a convenience install script: - - ```bash - curl -fsSL get.docker.com | sh - ``` - ---- - -class: extra-details - -## Docker Inc. packages vs distribution packages - -* Docker Inc. releases new versions monthly (edge) and quarterly (stable) - -* Releases are immediately available on Docker Inc.'s package repositories - -* Linux distros don't always update to the latest Docker version - - (Sometimes, updating would break their guidelines for major/minor upgrades) - -* Sometimes, some distros have carried packages with custom patches - -* Sometimes, these patches added critical security bugs ☹ - -* Installing through Docker Inc.'s repositories is a bit of extra work … - - … but it is generally worth it! - ---- - -## Installing Docker on macOS and Windows - -* On macOS, the recommended method is to use Docker Desktop for Mac: - - https://docs.docker.com/docker-for-mac/install/ - -* On Windows 10 Pro, Enterprise, and Education, you can use Docker Desktop for Windows: - - https://docs.docker.com/docker-for-windows/install/ - -* On older versions of Windows, you can use the Docker Toolbox: - - https://docs.docker.com/toolbox/toolbox_install_windows/ - -* On Windows Server 2016, you can also install the native engine: - - https://docs.docker.com/install/windows/docker-ee/ - ---- - -## Docker Desktop - -* Special Docker edition available for Mac and Windows - -* Integrates well with the host OS: - - * installed like normal user applications on the host - - * provides user-friendly GUI to edit Docker configuration and settings - -* Only support running one Docker VM at a time ... - - ... but we can use `docker-machine`, the Docker Toolbox, VirtualBox, etc. to get a cluster. - ---- - -class: extra-details - -## Docker Desktop internals - -* Leverages the host OS virtualization subsystem - - (e.g. the [Hypervisor API](https://developer.apple.com/documentation/hypervisor) on macOS) - -* Under the hood, runs a tiny VM - - (transparent to our daily use) - -* Accesses network resources like normal applications - - (and therefore, plays better with enterprise VPNs and firewalls) - -* Supports filesystem sharing through volumes - - (we'll talk about this later) - ---- - -## Running Docker on macOS and Windows - -When you execute `docker version` from the terminal: - -* the CLI connects to the Docker Engine over a standard socket, -* the Docker Engine is, in fact, running in a VM, -* ... but the CLI doesn't know or care about that, -* the CLI sends a request using the REST API, -* the Docker Engine in the VM processes the request, -* the CLI gets the response and displays it to you. - -All communication with the Docker Engine happens over the API. - -This will also allow to use remote Engines exactly as if they were local. - ---- - -## Important PSA about security - -* If you have access to the Docker control socket, you can take over the machine - - (Because you can run containers that will access the machine's resources) - -* Therefore, on Linux machines, the `docker` user is equivalent to `root` - -* You should restrict access to it like you would protect `root` - -* By default, the Docker control socket belongs to the `docker` group - -* You can add trusted users to the `docker` group - -* Otherwise, you will have to prefix every `docker` command with `sudo`, e.g.: - - ```bash - sudo docker version - ``` diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index c15dfcf..0000000 --- a/docker/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Docker Practice - -## Obejctives - -- get started with docker -- understanding container philosophy -- icebreaker with the pracitice environment - - some bash - - some python - - some java - - Jupyter Notebooks - - -## Material - -- Slides and Exercises are taken from [container training](http://container.training) -- Addtional resources - - JPetazzo's [Wordsmith](https://github.com/jpetazzo/wordsmith) - - diff --git a/hosts.sh b/hosts.sh new file mode 100755 index 0000000..e399cea --- /dev/null +++ b/hosts.sh @@ -0,0 +1,22 @@ +#/bin/bash +if [ "$HOSTNAME" = tools ]; then + echo "We don't need to update hosts in the tools container. Exiting." + exit 1 +fi + +if grep "DEV host entries" /etc/hosts >/dev/null; then + echo "Already done!" + exit 0 +fi + +cat << EOF | sudo tee -a /etc/hosts >/dev/null +# DEV host entries +127.0.0.1 kafka1 +127.0.0.1 kafka2 +127.0.0.1 zookeeper +127.0.0.1 schema-registry +127.0.0.1 connect +127.0.0.1 ksqldb-server +127.0.0.1 postgres +EOF +echo Done! \ No newline at end of file diff --git a/kafka-java-consumer/.gitignore b/kafka-java-consumer/.gitignore new file mode 100644 index 0000000..3083cb8 --- /dev/null +++ b/kafka-java-consumer/.gitignore @@ -0,0 +1,35 @@ +# Compiled class file + +.DS_Store +.settings/ +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* + +#Idea + +.idea +target/ +*.iml +src/main/java/it/polimi/rsp/services/ + +conf/ +kafka211/ \ No newline at end of file diff --git a/kafka-java-consumer/pom.xml b/kafka-java-consumer/pom.xml new file mode 100644 index 0000000..f814d90 --- /dev/null +++ b/kafka-java-consumer/pom.xml @@ -0,0 +1,112 @@ + + 4.0.0 + io.confluent.helloworld + kafka-training + 1.0 + jar + + + confluent + http://packages.confluent.io/maven/ + + + + + 2.4.0 + 1.7.21 + 1.8.2 + 5.3.0 + 5.3.0 + UTF-8 + + + + + com.thedeanda + lorem + 2.1 + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + org.apache.kafka + kafka-streams + ${kafka.version} + + + io.confluent + kafka-avro-serializer + ${confluent.version} + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + + + org.apache.avro + avro + ${avro.version} + + + org.apache.avro + avro-tools + ${avro.version} + + + org.apache.avro + avro-compiler + ${avro.version} + + + com.google.code.gson + gson + 2.8.5 + + + + + + + + maven-compiler-plugin + 3.0 + + 1.8 + 1.8 + + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.9 + + true + + + + org.apache.avro + avro-maven-plugin + ${avro.version} + + + generate-sources + + schema + + + ./src/main/resources/avro/ + ${project.basedir}/src/main/java/ + + + + + + + diff --git a/kafka-java-consumer/src/main/java/dataeng/AvroConsumer.java b/kafka-java-consumer/src/main/java/dataeng/AvroConsumer.java new file mode 100644 index 0000000..1d26196 --- /dev/null +++ b/kafka-java-consumer/src/main/java/dataeng/AvroConsumer.java @@ -0,0 +1,74 @@ +package dataeng; + +import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; +import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException; +import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry; +import io.confluent.kafka.serializers.KafkaAvroDeserializer; +import io.confluent.kafka.serializers.KafkaAvroDeserializerConfig; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.serialization.StringDeserializer; + +import java.io.IOException; +import java.time.Duration; +import java.util.Arrays; +import java.util.Properties; +import java.util.UUID; + + +public class AvroConsumer { + + static String scope = "observations"; + static SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry.getClientForScope(scope); + + public void createProducer() throws IOException, RestClientException { + + + Properties props = new Properties(); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092"); + props.put(ConsumerConfig.GROUP_ID_CONFIG, "generictempgroup"+ UUID.randomUUID().toString()); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + +// props.put(KafkaAvroDeserializerConfig.SCHEMA_REGISTRY_URL_CONFIG, "mock://" + scope); + props.put(KafkaAvroDeserializerConfig.SCHEMA_REGISTRY_URL_CONFIG, "http://schema-registry:8081"); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaAvroDeserializer.class); + + +// schemaRegistryClient.register(Observation.class.getName(), Observation.getClassSchema()); +// schemaRegistryClient.register(Location.class.getName(), Location.getClassSchema()); + + Consumer consumer = new KafkaConsumer<>(props); + + consumer.subscribe(Arrays.asList("avro_topic")); + + while (true) { + ConsumerRecords records = consumer.poll(Duration.ofMillis(100)); + + records.forEach(record -> { + + System.out.println(record.offset()); + System.out.println("Key "+record.key()); + System.out.println("Partition "+record.partition()); + System.out.println(record.value()); + System.out.println(record.value().get("measurement")); + System.out.println(record.value().get("value")); + System.out.println(record.value().get("value2")); + System.out.println(record.value().get("id")); + + System.out.println(record.timestampType() + ": " + record.timestamp()); + + }); + } + + } + + public static void main(String[] args) throws IOException, RestClientException { + AvroConsumer helloProducer = new AvroConsumer(); + helloProducer.createProducer(); + } +} diff --git a/kafka-java-consumer/src/main/java/dataeng/StringConsumer.java b/kafka-java-consumer/src/main/java/dataeng/StringConsumer.java new file mode 100644 index 0000000..48031af --- /dev/null +++ b/kafka-java-consumer/src/main/java/dataeng/StringConsumer.java @@ -0,0 +1,42 @@ +package dataeng; + +import org.apache.kafka.clients.consumer.*; +import org.apache.kafka.common.serialization.IntegerDeserializer; +import org.apache.kafka.common.serialization.StringDeserializer; + +import java.time.Duration; +import java.util.Arrays; +import java.util.Properties; +import java.util.UUID; + + +public class StringConsumer { + public static void main(String[] args) { + Properties props = new Properties(); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092"); + props.put(ConsumerConfig.GROUP_ID_CONFIG, "stringconsumer"+ UUID.randomUUID().toString()); + props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000"); + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + + Consumer consumer = new KafkaConsumer<>(props); + + consumer.subscribe(Arrays.asList("sentences")); + + while (true) { + + ConsumerRecords poll = consumer.poll(Duration.ofMillis(0)); + + poll.forEach(record -> + { + ConsumerRecord record1 = record; + + System.out.printf("offset = %s, key = %s, value = %s\n", + record1.offset(), record1.key(), record1.value()); + }); + } + } + +} diff --git a/kafka-java-consumer/src/main/resources/log4j.properties b/kafka-java-consumer/src/main/resources/log4j.properties new file mode 100644 index 0000000..3d41dac --- /dev/null +++ b/kafka-java-consumer/src/main/resources/log4j.properties @@ -0,0 +1,8 @@ +# Root logger option +log4j.rootLogger=INFO, stdout + +# Direct log messages to stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.err +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n \ No newline at end of file diff --git a/kafka-students/kafka-students.iml b/kafka-students/kafka-students.iml new file mode 100644 index 0000000..78b2cc5 --- /dev/null +++ b/kafka-students/kafka-students.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/kafka-students/pom.xml b/kafka-students/pom.xml new file mode 100644 index 0000000..578bd6f --- /dev/null +++ b/kafka-students/pom.xml @@ -0,0 +1,112 @@ + + 4.0.0 + ee.ut.cs.dsg.lt02007.kafka + kafka-students + 1.0 + jar + + + confluent + http://packages.confluent.io/maven/ + + + + + 2.4.0 + 1.7.21 + 1.8.2 + 5.3.0 + 5.3.0 + UTF-8 + + + + + com.thedeanda + lorem + 2.1 + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + org.apache.kafka + kafka-streams + ${kafka.version} + + + io.confluent + kafka-avro-serializer + ${confluent.version} + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + + + org.apache.avro + avro + ${avro.version} + + + org.apache.avro + avro-tools + ${avro.version} + + + org.apache.avro + avro-compiler + ${avro.version} + + + com.google.code.gson + gson + 2.8.5 + + + + + + + + maven-compiler-plugin + 3.0 + + 1.8 + 1.8 + + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.9 + + true + + + + org.apache.avro + avro-maven-plugin + ${avro.version} + + + generate-sources + + schema + + + ./src/main/resources/avro/ + ${project.basedir}/src/main/java/ + + + + + + + diff --git a/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task0/Task0.java b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task0/Task0.java new file mode 100644 index 0000000..dcc6d88 --- /dev/null +++ b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task0/Task0.java @@ -0,0 +1,26 @@ +package ee.ut.cs.dsg.lt02007.kafka.task0; + +import java.util.List; +import java.util.Properties; +import java.util.concurrent.ExecutionException; + +public class Task0 { + + public static List TOPIC_LIST;//TODO topics here + + public static void main(String[] args) throws ExecutionException, InterruptedException { + + //First we need to initialize Kafka properties + Properties properties = new Properties(); + properties.put("bootstrap.servers", "kafka1:9092;kafka2:9093"); + properties.put("client.id", "java-admin-client"); + + + //TODO CREATE TOPICS + + + //TODO List Topics + } + +} + diff --git a/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task1/Task1A.java b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task1/Task1A.java new file mode 100644 index 0000000..ad549a0 --- /dev/null +++ b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task1/Task1A.java @@ -0,0 +1,42 @@ +package ee.ut.cs.dsg.lt02007.kafka.task1; + + +import org.apache.kafka.clients.producer.ProducerConfig; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.Properties; + + +public class Task1A { + private String OBS = "obs.csv"; + + public void createProducer() throws InterruptedException, IOException { + Properties props = new Properties(); + props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092,kafka2:9093"); + //TODO Your Producer Configuration Code Here + + String file = Task1A.class.getClassLoader().getResource(OBS).getPath(); + + System.out.println(file); + + //TODO Your Producer Here + + BufferedReader br = new BufferedReader(new FileReader(file)); + + String line = ""; + + while ((line = br.readLine()) != null) { + + System.out.println(line); + //TODO production code here + } + + } + + public static void main(String[] args) throws InterruptedException, IOException { + Task1A helloProducer = new Task1A(); + helloProducer.createProducer(); + } +} diff --git a/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task1/Task1B.java b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task1/Task1B.java new file mode 100644 index 0000000..843cc2a --- /dev/null +++ b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task1/Task1B.java @@ -0,0 +1,28 @@ +package ee.ut.cs.dsg.lt02007.kafka.task1; + +import org.apache.kafka.clients.consumer.ConsumerConfig; + +import java.util.Properties; + + +public class Task1B { + public static void main(String[] args) { + + Properties props = new Properties(); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092,kafka2:9093"); + //TODO Your Configuration Code Here + + // TODO: Consumer code here + + // TODO: Subscribe to the topic + + try { + for (int i = 0; i < 1000; i++) { + //TODO consuming code here + } + } finally { + //TODO close the consumer + } + } + +} diff --git a/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2A.java b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2A.java new file mode 100644 index 0000000..63ffb38 --- /dev/null +++ b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2A.java @@ -0,0 +1,40 @@ +package ee.ut.cs.dsg.lt02007.kafka.task2; + + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; + +import java.io.IOException; +import java.util.Properties; + + +public class Task2A { + private String OBS = "obs.csv"; + + public void createProducer() throws InterruptedException, IOException { + Properties pprops = new Properties(); + pprops.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092,kafka2:9093"); + //TODO Your Producer Configuration Code Here + + + Properties cprops = new Properties(); + pprops.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092,kafka2:9093"); + //TODO Your consumer Configuration Code Here + + //TODO Your producer Code Here + //TODO Your consumer Code Here + + try { + for (int i = 0; i < 1000; i++) { + //TODO consuming and producing code here + } + } finally { + //TODO close the consumer + } + } + + public static void main(String[] args) throws InterruptedException, IOException { + Task2A helloProducer = new Task2A(); + helloProducer.createProducer(); + } +} diff --git a/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2B.java b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2B.java new file mode 100644 index 0000000..990de7c --- /dev/null +++ b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2B.java @@ -0,0 +1,29 @@ +package ee.ut.cs.dsg.lt02007.kafka.task2; + +import org.apache.kafka.clients.consumer.ConsumerConfig; + +import java.util.Properties; + +// Total number of people Per Floor +//keep the local count of people on each floor. Floor are uniquely identified by building and floor number. +public class Task2B { + public static void main(String[] args) { + + Properties props = new Properties(); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092,kafka2:9093"); + //TODO Your Configuration Code Here + + // TODO: Consumer code here + + // TODO: Subscribe to the topic + + try { + for (int i = 0; i < 1000; i++) { + //TODO consuming code here + } + } finally { + //TODO close the consumer + } + } + +} diff --git a/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2C.java b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2C.java new file mode 100644 index 0000000..6589c89 --- /dev/null +++ b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2C.java @@ -0,0 +1,36 @@ +package ee.ut.cs.dsg.lt02007.kafka.task2; + + +import org.apache.kafka.clients.producer.ProducerConfig; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.Properties; + +//Let's save the aggregated result in a topic and progress from there. + +public class Task2C { + + public void createProducer() throws InterruptedException, IOException { + Properties props = new Properties(); + props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092,kafka2:9093"); + //TODO Your Producer Configuration Code Here + + //TODO Your Producer Here + + try { + for (int i = 0; i < 1000; i++) { + //TODO consuming code here + } + } finally { + //TODO close the consumer + } + + } + + public static void main(String[] args) throws InterruptedException, IOException { + Task2C helloProducer = new Task2C(); + helloProducer.createProducer(); + } +} diff --git a/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2D.java b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2D.java new file mode 100644 index 0000000..742a436 --- /dev/null +++ b/kafka-students/src/main/java/ee/ut/cs/dsg/lt02007/kafka/task2/Task2D.java @@ -0,0 +1,28 @@ +package ee.ut.cs.dsg.lt02007.kafka.task2; + +import org.apache.kafka.clients.consumer.ConsumerConfig; + +import java.util.Properties; + +// Total number of people Per Bulding +public class Task2D { + public static void main(String[] args) { + + Properties props = new Properties(); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka1:9092,kafka2:9093"); + //TODO Your Configuration Code Here + + // TODO: Consumer code here + + // TODO: Subscribe to the topic + + try { + for (int i = 0; i < 1000; i++) { + //TODO consuming code here + } + } finally { + //TODO close the consumer + } + } + +} diff --git a/kafka-students/src/main/resources/location.avsc b/kafka-students/src/main/resources/location.avsc new file mode 100644 index 0000000..6e9b6c7 --- /dev/null +++ b/kafka-students/src/main/resources/location.avsc @@ -0,0 +1,9 @@ +{"namespace": "kafka.exercise.avro", + "type": "record", + "name": "Location", + "fields": [ + {"name": "building", "type": "int", "doc" : "The building address"}, + {"name": "floor", "type": "int", "doc" : "The floor number"}, + {"name": "room", "type": "int", "doc" : "The room number"} + ] +} \ No newline at end of file diff --git a/kafka-students/src/main/resources/obs.csv b/kafka-students/src/main/resources/obs.csv new file mode 100644 index 0000000..93e832c --- /dev/null +++ b/kafka-students/src/main/resources/obs.csv @@ -0,0 +1,1000 @@ +UUID,COUNT,BUILDING,FLOOR,ROOM +dae9bef5-1edc-496d-8bb4-dd63906546b8,5,0,1,13 +fcb8180a-377c-4e52-b032-1384a3b0b9c4,2,1,6,8 +3263b63d-a202-476e-9caa-e78edbe66d8d,7,2,1,12 +79b7585c-c56a-46c9-8278-b6935fb89c62,3,0,6,11 +df8d5082-c8e4-412b-888a-3292064b926e,8,0,5,15 +9b19ffb8-33b8-4df5-bc84-3ae4880c759f,6,2,5,12 +e0207da1-2ff9-419b-a8a7-7f2559e95611,4,1,5,14 +fdfa60a8-47e3-455d-bee4-76a0b0802112,2,0,4,9 +d5ca2db7-c2a2-4a9e-ace5-ff520fae4dad,4,0,4,16 +954215bf-cfc3-43ae-a70b-bb030d15fd68,3,1,5,13 +37de8704-5db6-42db-9391-18446f54c5ca,3,0,6,9 +634b3010-2a60-427b-a086-31e7a6e261fe,8,2,6,18 +57473145-4e9d-4451-931f-5bf5ac0d26da,6,2,6,14 +2afd174f-d46c-4de0-8ebb-cc540fa11ac7,2,0,4,5 +edd6ba0d-a000-428d-a70a-1723d6dc15d4,2,2,7,15 +2e6fe083-25f4-472e-96a3-4d745107d72d,7,0,6,0 +f60dd459-8eda-458e-a080-6b984f01cbe3,0,0,1,17 +69e7bf8c-931d-4cd9-a359-2fb1866f2698,1,1,0,6 +f3f24adc-5c56-48df-abad-89b826445006,3,2,7,3 +893437bd-38d5-426a-a373-bf6d57ee6e63,4,1,6,11 +1051596c-21a5-47b0-b63e-efab157c92e6,9,2,0,16 +671b82dc-7b7f-42b9-9df8-28ed844c74e4,9,2,6,3 +88789d88-c5a6-4fae-b29a-0ffc62900607,0,1,4,2 +0d3142d7-5e02-452c-9d67-48fef7dac338,5,0,6,18 +ad64e144-62c5-4a7d-bad6-c93e7ec02d4b,1,2,6,14 +8ac9edde-a1c9-4350-8a16-a6db00c64f14,0,2,2,3 +4071de10-7dc1-49d3-aa6c-693a9e6b1196,6,0,3,2 +18683b33-5c51-4178-b907-becf2347edd8,3,2,6,18 +1a72c1ac-a41a-4bfb-ae33-d8374503b1b6,0,0,2,7 +545d4eee-7849-4d24-a293-881ca3adec71,4,2,4,2 +ce7bc6af-4454-4e76-8a43-8bd7965a7c2f,4,0,1,11 +120a26f2-3119-4c4d-badd-d9bc8eb6c6d6,2,1,0,15 +4c589a4c-27d9-4deb-ac4a-96850a5c4126,6,0,5,12 +553dbef7-a38c-446b-a4ab-2427811ce0b4,3,1,2,7 +d408dfe6-b215-485a-aad3-528d4143f188,1,0,3,4 +1e1c183e-d4be-4513-8e1f-a6d8ef9a1331,2,2,3,18 +bb69bb41-b19a-4090-9539-43caee72a910,0,1,7,6 +2a237f9b-56e4-4b39-9ad5-3d7f29149d9f,6,0,3,16 +5f9dd42c-812f-4210-b330-32806673a98b,6,2,1,1 +a62429cb-313e-49cd-a1af-a8c4d280a0dc,2,2,3,13 +ea9af3d1-9639-4dc7-9614-772452b0bd06,9,2,0,15 +d9e0206a-2f06-4109-84cc-85af4b2f97fd,0,2,6,0 +cb853dd8-b88e-4573-a5ef-301b758aa846,3,0,2,8 +4c158ef7-487d-44ce-b1b1-dd66831c7fa2,3,2,6,15 +fc6e6696-d653-46d6-b8f7-1b1f0762584b,0,1,6,9 +6dfd7d4e-8cb4-47b4-a026-30ffb07fdd79,8,1,0,15 +b6a4e41e-2287-4ec7-ba92-800d83555131,4,2,0,9 +7b69f839-9d9c-4f0d-8f25-f644d7b5b7aa,4,0,2,14 +5d58f542-1dd3-4619-9a0c-bf47c5741916,0,2,0,5 +3dc47bfc-5b5f-4e86-ac6b-58a424fe24aa,7,1,5,15 +4ec4832e-e888-4c2e-9d69-f4b184279f6c,3,0,2,6 +8ac5d63d-ec6c-4593-8a0a-2dfc181c91f0,2,1,2,14 +e3daf6ab-a341-4abc-a790-e29a289f9397,1,1,2,18 +87342742-f37d-4fc9-a727-0529aaef8a50,1,1,6,16 +b1e980d0-3ef4-40a5-9f42-35c28c1336ea,3,1,6,2 +e5e1bd72-3457-4506-8b0b-0d5c13c0dcce,9,1,6,4 +0329f9a7-1a23-4de6-955a-7473804c3494,0,1,1,9 +266c8d81-4e06-4724-8269-1c29de943640,6,0,4,8 +e31b24ea-f609-4942-8884-a0428eae66e6,6,2,5,17 +402eb843-aa23-4771-b5b2-0249ee51ac81,4,0,1,17 +4cb45b99-e4f9-4d45-b6d5-dbef4878916e,6,1,1,13 +3fb02401-d26e-4ddd-961b-406d47d49ddd,4,0,3,18 +c6e9e17d-34da-4fec-9367-549b4a1a7e07,1,2,5,15 +24e47d2f-c48b-489e-9c17-b5ad0e452430,3,0,2,19 +8bdb2298-d7f7-44d1-82f9-5071916177e0,1,0,0,0 +bf16381d-02e3-4891-b117-c32b816c18e2,5,2,3,4 +e302bf57-f11a-45e8-9544-7c862c0fef55,8,0,6,3 +cfe2ace5-d93f-48c9-bd04-817343e61c6e,1,0,2,17 +76287b00-f7ff-4a8a-bcb4-928fe237b165,5,2,4,3 +590cf5c4-45b1-4a51-9d6e-466da5db43ab,7,0,3,2 +1251d399-a94b-42f0-a3b8-8564a6f50608,3,1,4,10 +07b8b43e-2e6a-475d-a261-d15b29b9154a,6,0,4,7 +549d9bf5-ef29-44ca-b1d1-bd5c61b81ede,5,0,6,13 +0b721d1d-d523-4fd2-bdb2-6b542cbf037c,0,2,7,6 +0763a9f5-eaaa-4c64-addb-c6023e98adb4,8,0,6,5 +e906ef94-3cd5-4d49-be0e-a188cfbe1619,5,0,3,5 +b01ea143-c580-4a6a-8cfb-285821cc4d47,2,0,4,9 +4a1ac5d8-40ca-427a-b854-5f9ae1580f46,3,1,3,15 +dc170ea4-6635-4d41-812b-8c72f551e87e,5,1,4,12 +49c28b2e-9471-42a6-8772-76edf833f058,4,2,2,15 +8ef67c6f-3c87-4688-8298-448d63882869,5,0,6,3 +b94315c1-4e13-41e4-80e3-af3b940f985f,8,2,2,8 +fe8a0138-28e7-4465-8008-d0835f031621,5,0,3,7 +2185f9be-cf70-43b5-bdae-8ab7430e8938,6,0,7,13 +6f8e5eb6-c04b-4213-a71d-172cfcfe0b75,2,2,5,7 +757852e3-1b28-4e7c-a24d-d0e0a3319e08,5,2,2,7 +9de43373-b1ea-4b9b-ab2b-d14927c21393,2,1,6,13 +608d0035-7595-41e0-a723-ebfc1a267cf7,7,0,2,4 +233985b3-2e07-40b1-bbb3-252ebe0584cb,8,0,2,1 +7c8c9e89-1f19-4aaa-b166-77499c179bfd,9,0,4,8 +5e7dbba4-3621-49ec-820d-b38296f6089c,4,1,5,19 +bb1e911a-6e7c-4308-9375-e80e7e32c4b8,7,0,3,10 +9c2f844f-1608-499d-952e-3b56a2fc9bc9,7,0,0,19 +c2ebc9a0-11a3-46be-9827-51ebb9c578a6,2,0,4,6 +cf671048-61e2-4b66-b8f8-9c3111bb3754,3,1,6,0 +c9d48dc1-b5ca-4b8a-8bb5-f4d675a4930f,2,1,0,16 +24094670-9bda-430b-b12a-3c82ed8f818c,1,0,6,9 +9707e3f3-e0bf-4b15-aa3b-b4de4f36df69,6,0,6,14 +3b64d2c4-cf04-44d9-9c07-1469081dbc85,4,1,2,5 +aa4b0745-dd84-4519-a932-b79e7c230cc6,5,0,5,0 +28569f65-c2fc-4373-8b94-370a173de9c1,8,0,4,8 +32dbb4d0-47b0-44f2-8781-6fdd013eba95,6,1,3,7 +9af80929-24f9-4984-be46-a0ec43b7ff08,5,1,4,15 +c201a4dc-4878-44ad-956b-0b8205955df4,0,1,2,11 +572bb5b8-6a0a-4568-a57e-054c0c092f07,3,0,6,8 +2e19a17f-fc94-46e4-aedb-a73c4e0651ed,0,0,2,8 +3eeb30f9-2d97-4994-b15d-9a6d58ed957a,9,1,6,5 +f1f67405-149e-499c-b15a-b6460cfc85f5,7,0,7,11 +70cd9f65-336c-4a31-ad5c-072669237498,4,1,1,4 +608d0ecc-c916-47c0-9e9c-193796d25549,2,2,6,8 +2e67adbf-9f86-4a10-844c-3a69ed66e0cb,5,2,1,11 +dcbd65f5-adb4-4e58-8321-51b202582adc,3,1,4,11 +9256ac74-4ba8-49e2-9138-ff0e258942f1,1,2,6,6 +59689c93-a56c-4a21-890a-adf6b017f36b,1,0,4,8 +1d9a6ca5-7a86-4672-a76d-b675680f6d69,9,0,7,0 +13b95727-4401-4492-8468-b664fbc54bca,8,2,0,14 +012c6700-7ce6-4734-a1e3-a4f3f9aa5a49,6,2,0,1 +f992d3ad-0ce2-49b0-899b-cb7d8f1cd72a,6,2,0,3 +65ef4e36-ab8b-4b9f-a41d-d7c1532689dd,5,2,6,13 +9b6d7e9d-1988-4003-909a-29cf8cd14bd0,5,1,2,4 +6dbab413-b7c2-4740-841b-ad99824b8bdb,7,2,6,17 +f7f3e5f0-62d2-46c8-b1fa-efc9e51b71ce,2,2,1,13 +94155616-bfc7-4b4f-b148-9ea6378d909e,5,2,5,2 +3e1ab79a-d071-4d5d-8276-ddc47d171ae7,0,0,4,1 +52911969-f75e-4058-9df1-a6478095ba39,7,2,5,4 +da374458-baa5-4f17-b03c-e7493d99e5cc,6,0,3,19 +112f95a4-a53c-49e4-bcf0-9ce25e9a7f5f,8,1,4,6 +b745e976-5407-44b0-84d8-fde40359fa42,5,1,3,11 +5a3b6320-a811-4c54-80ed-315d739582e2,4,0,5,1 +5fa06752-489a-470e-b064-5d84298c2e78,9,1,0,12 +7aa00621-7eb3-4034-8e7c-82d8ff79ba81,4,1,2,2 +ec280e4b-7d18-4b0e-aba7-a70e58842ed4,5,2,7,14 +b258176f-8cce-4952-bc8e-39b4ceec48eb,0,2,0,13 +af7aef87-a4de-4c8f-a7e5-639b6b227af8,6,0,2,2 +fd991fb6-09fc-4e89-be12-8cc3e4c5b348,6,0,7,4 +06765458-f6ba-4b66-ade2-b1b624b30cc3,9,1,0,4 +0df73f08-1b7d-46a3-b044-a655dcff7823,0,0,1,14 +5c17ccf4-8a8a-47e9-aef4-dff32bdbe70f,7,2,2,6 +660a23f9-80dd-439a-b73d-0644dd56d8cd,2,2,5,13 +0ec7a78c-5e2d-45a7-a5fb-9a888353b161,2,1,1,5 +0ec1986b-b565-487b-a3a7-64c66d5fe905,9,1,0,1 +77a667ba-c291-40f9-8f80-15f5775aac7b,9,1,3,10 +f18cbd95-5eae-400c-bcf3-6ea17df16ce6,7,2,1,6 +7dbed679-7838-4490-83c1-d72dcfa77fde,3,2,0,0 +2c8eb408-95e1-4e22-9ee8-4ca38d7c7e1c,8,2,1,7 +fac8c1a2-acf9-4fdb-845b-2ffa87313793,9,2,3,13 +bd4793cc-66ce-4cda-a1db-6b0a4e9156e4,9,1,3,15 +d3da5771-4ca2-46c1-ae9a-068bf2fde52c,8,0,6,10 +334be3e1-43e5-43f9-b34a-07d212a4d280,9,2,3,16 +10bab532-b948-47ef-80d9-dec25d71c52c,8,0,0,13 +48425023-4fc2-4d60-b585-774c7baeeabc,8,0,3,15 +dce49f55-e82b-48ee-891a-1fbecd7bddb5,8,0,5,10 +a3d51a88-45b8-4b93-8db4-cb2315f818f0,1,1,3,19 +ceb0e2e2-132a-4a52-bda2-d94ea033d81d,7,0,1,17 +63e5663b-17bf-4ff3-b67a-c15fa97e0042,9,2,4,18 +1c177536-8649-4382-acab-e8f3a8b849d8,8,2,6,19 +720d756a-4261-4682-88bd-99cd98b867c2,2,0,6,13 +f19386f7-dbbd-49c3-bf18-0e14a9505813,9,0,0,18 +5b4f847b-be4e-4657-8cd6-cbe8c3be5e43,1,0,2,6 +842f1f21-9b70-4739-8110-ac3027099ce7,4,0,0,11 +1f4dec1f-736e-4f82-9f73-900527076b3c,0,0,1,13 +b49f7aae-3842-4b43-832a-bfbd09e64822,0,2,0,14 +f63b397f-2149-4b22-8081-492ffa63ded4,6,0,4,16 +2012e82c-4ac5-4bc6-b5f8-17302717d9a2,3,0,7,0 +6eb5af96-4e61-4ee3-9b6e-96d2a23944d3,9,2,4,2 +e26156e3-13fc-4783-978c-5264123817a7,6,2,1,18 +09697ab8-db1f-4702-9b74-8b6ce2b66f4b,7,2,1,15 +8315345b-859e-42be-9e32-56dfc56ff354,9,0,7,11 +664cedb2-af7c-425f-942a-29c3b819f5cc,7,1,4,16 +ccc11978-94df-4e8f-abeb-876a085708c8,0,1,5,12 +f88e9ef8-0925-45b4-b288-1d5b9647a19d,1,1,3,0 +f18ca24b-1215-4423-a55c-7ab2a4500c58,3,2,6,9 +91c2b8e8-43ab-42cc-a70e-d37020bd9b06,4,2,2,2 +d144fe64-da38-4ad4-9fc9-0b7a014a0541,7,1,4,1 +af992593-a736-42d5-809e-2bf33f90b726,2,0,0,17 +95c13d8a-77e8-42e6-8b99-a99172082cc7,8,0,5,8 +67527c79-897c-45df-a587-7b0736cf8996,8,1,7,2 +73fb31b3-e9a4-40d5-b76b-1f37dadb7759,8,2,6,11 +2a3b9fbc-4f2b-4404-a9ed-9442a6dc7fc2,8,2,6,9 +ff3f3f92-2535-41b5-a757-839ab49d4cfc,0,0,5,13 +15219ed0-49ad-41e9-9002-173dc0bf0857,8,1,3,2 +01cde457-22fd-4f78-8177-c53cb81443a4,4,0,4,19 +9eb98339-7029-4efd-9e5b-f4883fe6bcd0,8,1,0,17 +bc2bb33b-7172-4663-ac00-31400aef68f3,4,1,7,18 +287bcfaa-41b2-4470-80f0-2d1dfcd9f340,4,0,0,15 +29f7e7a2-a3bc-4210-99a2-19e46af92448,1,2,6,13 +065b1c2c-e5d5-4b15-bed9-b38508e9f603,9,1,1,19 +7ee187c7-b067-4cde-abce-706dda4c5c93,3,0,6,9 +8f0be669-230e-4aec-8e25-d7890215a6e7,1,0,4,8 +c8323afc-7214-487e-a0f5-46f7697d438d,4,2,7,10 +7132cfac-0696-4a7c-9b77-382a6cd4ecfd,7,1,5,11 +16efa524-8315-403e-9b93-c7d227d406eb,8,2,5,10 +4ba44c3c-4b9d-4d03-9276-fa80dbde4594,8,2,5,9 +03c29727-52b2-4112-b579-d56beba4c2c2,3,2,5,11 +7913ca2d-c94b-4618-9393-58f5a5bba741,2,1,3,12 +60417a84-ecf3-4642-a253-7a98adc15721,5,0,3,16 +3eea5d1d-36e7-4085-8889-c4d6bb434cee,2,0,5,1 +402243a0-9065-4396-a761-e6a991ac6e0d,3,0,3,9 +25592754-5e52-401d-97f6-93aee6d93361,9,2,5,13 +707a66fd-6926-4cc7-a782-14783c510fa9,3,2,4,16 +bb62a5b2-0506-4d47-8f62-b6d84bb1b9ea,0,0,1,12 +559087b6-d895-43af-9778-03172326ac1c,1,0,1,12 +0e0ecf90-8d5b-4c24-a543-bc094db11d5a,2,2,3,14 +42d8e1a3-77bd-498e-a75f-4954c4bd559e,4,1,7,18 +c0cde665-da96-4ff9-8438-eaa56fdbecab,6,1,1,13 +615f2693-32c2-42ca-94d0-22c6376bd6cc,3,0,0,4 +1435abc3-5f6a-441f-8390-b260ee6b663f,5,1,2,19 +1532df42-c29c-46a6-9949-78c3b721832a,4,1,4,12 +2dc71ffc-b5b9-4cf5-9a81-efabbed5d5a9,2,1,5,1 +8cb7808a-51f7-4176-9c7a-43285cfdc5b4,8,0,6,7 +547ee688-2a5a-4106-bd93-7ad3ca1c7496,7,1,1,11 +5110167d-9f75-499d-8594-9505cc4bdbf2,1,2,6,16 +2b8a20ad-86f9-48ad-9169-34d64c1224d2,8,1,3,6 +fef35d84-5118-46e0-9547-39a73640eb14,5,2,1,2 +6d86696a-827d-478b-b4bd-3d869db4543a,6,0,1,10 +b10a75be-f944-4ae8-82b2-71331aa5d330,0,0,7,7 +12286b84-cc03-44ec-b417-c5800d42ad02,8,0,7,3 +18289c28-6ec1-4cfe-aeba-bfb508261ac2,5,2,7,5 +1b47836f-009d-4bb4-8647-75cb9d8457e2,8,0,2,3 +d5a60600-dcee-4cc4-b273-5cb3147f5661,9,0,3,9 +86058523-f661-4f42-8dc6-a5b5796953a9,3,2,3,12 +7351038d-8ee8-4f3c-8504-dfa09ea3f123,3,2,3,4 +5bf779be-f986-403b-8d97-db24a6e27e07,2,2,5,4 +0bcee255-17c8-4e1a-b9c1-e5fc2b10fbf5,2,0,5,1 +63e54158-c9d1-4477-9f86-5675076d17e2,1,0,1,5 +bc2978af-01a2-4db0-bf7c-85ca43cf729e,3,0,6,2 +e86ced3a-1d3d-436a-bf75-8c052721c787,3,0,1,6 +7a8f9cc6-42c0-4465-ada9-370f01e0b247,3,0,6,1 +30771e35-c26a-4025-860f-2261bfc247a3,1,0,7,10 +3a600fb8-a5dd-43f5-9da2-70796340017d,3,2,5,14 +846faf2f-fb9a-485a-8ebc-03c0af20e4fa,3,1,6,1 +d4821dc6-ad0e-4488-97d1-cdeb347ba491,6,1,5,18 +762816f0-2ead-43ee-b476-f67e516b19d7,9,1,1,0 +86c28838-4949-497a-98e6-09fe7bd23446,3,1,0,1 +716e14e3-0362-4fb9-a21a-93d8bfcc003a,8,2,3,12 +979ef36c-fd8e-4d16-a5e7-b813e32c5882,8,2,1,6 +280be36a-5553-454c-987f-6a39c3a86109,1,2,2,11 +99e47cdb-6c4a-4d4f-a2ac-7fa958d866f9,9,2,1,7 +fb1072cd-253c-4753-9276-b82adc903ecb,2,1,0,0 +993b2a5c-2eb8-4dec-8aea-10fd8ab43b0b,0,1,6,0 +2f8cd00b-d84f-4227-ba26-5cf41d7fbeaa,1,0,2,3 +b1169144-d166-438f-a148-ec494d15b933,0,2,7,6 +c0c1f1a7-8420-4440-a81d-4e9476a9ccaf,1,2,2,6 +ccb4c7e9-3e52-4ce4-932e-ce2e088964f3,5,1,7,19 +d5756e87-0a58-4dd5-9a0e-bb0a3d02ef12,4,1,2,9 +d23ca3ce-d31f-43f6-935a-2d38b96bb529,2,2,1,17 +1e59e881-6777-44df-b4b0-336fadfa7f98,9,0,4,2 +289eef08-bc21-47f1-9fa9-2f549bbca5a3,8,0,1,8 +124c5131-b73d-4cab-9458-c9778a160a56,4,1,6,9 +a346b4ed-ae32-47b2-8776-4bd0775f9eb5,1,0,2,16 +e4e37b06-2cc0-48fe-8f3b-7381f8f7e162,8,2,4,4 +1270e32d-384a-4128-9501-98d0e4f19c98,3,2,5,16 +641cb43e-1514-4833-b5eb-65001ddf67cd,9,0,7,17 +cb6c3324-b447-4fd1-9910-4a1ca6d15bb2,5,0,7,9 +33ae79c1-571d-4956-b350-eded0ce9db5e,8,1,4,0 +3c3bdfb8-96f5-4c6b-9489-258dcc0abea4,8,2,0,2 +d9e02b0f-aafa-4b50-8cf7-4ce8bcb1c0bb,4,2,2,13 +98ac4b13-d090-4805-9904-d8f9ed4be56e,3,1,2,7 +5fdabb09-a5da-4af7-a80a-598fc41f62e0,0,0,5,19 +ef23fe50-f33d-4bcb-9394-114c34062072,9,2,1,6 +18b8143a-fdea-4048-a4a6-ef99adaff36c,8,2,5,9 +a50bef64-b877-406c-9114-c5e7f803f48f,3,1,4,10 +2f87d35c-1f55-4296-bf46-29edc69a9a0a,6,2,7,7 +9c1b29c4-aad8-41b8-a75c-91403880e35b,9,0,6,15 +5893384f-e43a-46d5-8ec0-1609b6239071,9,0,3,0 +a0a55a0e-5ec3-42fa-92d9-8dc04a78f447,9,0,1,18 +f7d89179-7e1b-42a7-a59c-a16e3631a5a1,7,1,4,11 +eb4ab321-0b50-40f0-852e-006952113c73,1,2,1,12 +b5aa9d67-5f71-4068-abb9-deec65a72b27,1,2,6,3 +1f06f836-bce3-46db-a511-7b07e66f56fe,8,1,7,3 +124e1af9-5b31-4d38-91c0-95e95eb572b9,6,0,7,10 +15af9834-b20a-46ec-9ea7-c7ba18878ebf,9,0,7,16 +968ecc15-9662-4e74-8147-6f5901cefc99,6,0,0,16 +976b8974-a550-468c-a7df-e5913bf70411,0,1,0,11 +64171fe9-0671-48c3-a9c6-59c02e275665,1,1,5,3 +f07b5273-df74-4804-95ff-a22dcd9082de,3,0,7,14 +a51f7ee2-2de9-4ce4-8883-bfefe6514a53,8,2,5,14 +17519ee7-7b5e-4427-bcf4-2ac0b275b541,5,0,2,13 +b50623da-6196-43b0-a19b-a611ee824b74,7,0,3,19 +3c730002-ac2f-4dff-b896-d778e35b7516,7,2,3,8 +5640a15e-f143-4f47-86d1-23e644e18df1,9,1,4,3 +f4faf795-229f-4532-a4d7-3e8a4027fb52,6,0,5,4 +155ad45b-1245-4ea3-ad7f-06a21f9ba5b7,0,2,7,5 +e2d55ee2-7358-41f4-91af-d1da792fb91f,9,0,2,19 +10a64765-2f33-4dd0-90f0-8e99f6ec53b8,2,1,7,16 +468a4ad0-32ae-4931-ba4e-a71a8d8263d0,5,1,2,15 +688019c3-68ba-4577-a9cc-23c522b0aa4d,4,0,2,16 +1f8a1273-7422-4db2-84dc-03c331e28fcc,4,0,2,9 +988528a0-c5a5-486c-9b83-c4ad35d167f9,9,1,5,3 +e06ab573-944d-41ec-8550-85b182b0c6c8,0,1,1,16 +07e3bf78-6c62-4b16-b57d-d13d8743929c,4,2,1,11 +0085071a-98e2-4aaa-81eb-7f40be15cb33,2,1,6,8 +c1db1073-de2a-4224-9982-407293cfb974,5,0,7,5 +c8eae414-833b-4ed9-9b77-4f5bc0927649,7,1,5,17 +21e540f6-3c54-4037-98d7-ea4ed1cc790b,7,0,1,8 +95ee3fd7-679b-4f62-8ef7-b00a4b4a92b9,6,1,3,0 +436f5ded-fd92-4bd6-8c74-50f96e4c2ca9,2,1,4,4 +f235f674-6490-443c-884e-f5d9090975c6,6,1,6,3 +a78addc8-d50b-4f34-b67a-331789bef07d,9,0,3,13 +54a4bbc8-a034-404e-b2a7-87cd024ff845,0,2,4,12 +ee0ad94b-710f-4835-b8c0-f1e1f11b803a,3,0,1,1 +c0876962-3fe5-4671-a915-7e952972bf78,3,1,4,0 +7e4a8b88-e30e-4b59-a7de-521205caf7ac,0,1,1,1 +19513f54-8e3f-4e5d-acc5-50586875610a,7,0,0,6 +66be367e-e120-4b90-98fa-586fd737c967,9,0,2,7 +caf6ae4c-3599-4ff6-a76b-2db9b4854b21,4,2,3,4 +72058e31-8383-4d50-bf82-b36a95265889,5,1,5,13 +57274431-7899-4fa5-93b3-dfc943be77a2,5,2,0,8 +ab0d5aad-1e21-49c8-bb0d-cf6d4f3e5f2f,1,0,7,3 +4b7bd29d-dda4-4845-a8c4-b1828a03e0a7,1,2,4,3 +ac19acb7-faac-4ac9-9338-38d98e35b8f7,2,1,2,18 +b411462b-be2a-4425-8bbb-673489294ea6,0,0,5,15 +b31ec535-8dd3-473a-af08-880ae9ba4b18,8,0,7,13 +bcd6f70b-1625-4ec3-a7d0-c38cff82ddda,3,1,5,4 +abc751b3-68ee-4828-9ac0-a6d1d0b601f0,9,2,0,11 +6c72681b-d115-41a1-a96a-24a0e7235c22,8,0,0,8 +e9dc75a1-46ab-43d6-9a66-de8d60d6cd47,1,2,2,15 +7eaf5fe4-e9a7-401c-acf3-15406a2d6949,3,1,5,11 +a526cf54-3996-40c0-9856-aa83c6d5d916,8,0,0,16 +18589c29-18b4-4dbe-aeb6-8df188841f4a,2,2,2,8 +ce8272bc-c537-4ce9-8056-e62879cf07a4,9,1,2,11 +6bcccfbd-7d67-4986-9227-10a91afb267a,1,1,4,4 +158c9e41-9654-43d4-8b75-a148d3dc8de1,8,0,2,4 +8b5afa3b-d5e0-44d2-bd5b-fc0c6ba5c1e6,8,2,1,19 +23ad7cb1-c8db-4c20-b0a4-2acac67da15e,9,2,0,17 +49b48195-c055-49b7-9e9c-9762e7500444,8,0,7,1 +c216df38-b5d7-4220-aa1e-0bd20eba02df,7,0,2,7 +4f402ced-846d-4a2d-943f-99c633fd986c,9,2,6,16 +47f7bc92-d5c5-4d75-becc-07c3f1a39a14,2,2,2,19 +62b3084a-6b1e-4ac5-a11a-f38c358edc96,5,1,6,7 +325a046b-ef3a-4522-ab7e-4f044cd4705b,8,1,5,4 +048c6edc-7af6-4f25-9102-10c89619edb3,9,1,7,14 +f8b2fcc1-ecce-4258-a8b5-4e4da286d14a,3,2,7,0 +a08a6914-030c-4233-b075-7a33d8779a7f,2,1,1,4 +62ed3c6e-9d87-46ae-9e45-4d514b0fee42,7,1,5,3 +2b0c1eb9-7fd7-46a2-9a12-4da239213059,9,1,1,11 +5994afad-de9a-43e9-ab1a-4aacf52da835,6,0,1,1 +90f7880f-d3f5-41b5-ba49-c919246f2924,1,0,7,5 +dbd98636-195a-47bc-82e9-a7b59e3cf3de,9,1,0,9 +56292a3c-4979-4171-950e-c4792dc9d3c2,2,1,2,11 +3173bbf4-4d3f-45b2-8c40-360ad26bbac7,1,1,5,18 +afe86574-2a68-4ccb-a7e5-61139555b5e3,0,0,6,3 +01b11424-73ff-4484-8e6c-dfa82207ce3c,5,2,5,13 +8a72bfb7-9d44-4a2b-92d9-9f6a67e03803,4,0,1,2 +bd497fcd-949d-4940-afcf-55dbfd1e67d2,8,2,5,6 +50937541-1561-49ef-ac7d-82694f9474c4,1,2,1,12 +faf0ba6c-02b2-4086-ad85-a09300fdb86a,8,1,7,14 +8da8a04c-432a-4cf9-a92c-566a13f405de,7,2,7,14 +08ef300e-cb76-41e4-bdf0-8f3c3f219efd,4,2,1,9 +d6c92492-c8df-4db8-995c-20d04671c2dd,0,0,2,18 +9b20a146-2321-46e3-af22-7d59db93d0b0,5,0,0,14 +0f745354-7d21-4e95-a2d2-1e14fbc01590,8,0,7,7 +b4071bba-4ae6-4e05-b3ec-6d422c95b766,0,0,7,13 +80553266-637e-4d13-a2a1-4cad4e81442c,7,2,4,14 +6a64c06d-e701-4df9-bcd7-25b5af5764a3,4,1,4,7 +577eb697-b927-408f-9535-a15ac8ab1330,0,1,1,3 +24627c97-f2e3-44ec-84c7-1cd8327587bb,4,1,3,10 +249cf333-57af-425f-9435-c76073c97c29,1,1,7,10 +e24cca7e-275c-470c-a11b-323c47887368,0,0,7,11 +f89f0a19-84f9-4cd9-9c46-87f89f5f0915,2,2,3,3 +683d0b1e-ae58-4aba-bc43-2391520d29aa,8,2,1,13 +74f04aa3-13ce-480b-a5fb-1e4e9f65fbbc,4,1,0,2 +3afa4acf-2e0c-4cce-a263-6f9511187e58,5,1,6,19 +595c9c94-6ad0-434d-bf32-e42e0fafafd8,8,1,6,5 +641c35e6-419e-41b8-8988-45c7858a038c,4,2,0,0 +c754f990-232b-442b-b356-2fdf16f621d6,3,2,0,16 +1d974ae7-49dd-4ee4-bbcc-ece4b93a6a11,8,1,5,14 +06601f17-dd23-44ff-babc-f84fdf87e2e8,1,0,5,2 +c798cf71-1957-42e5-8c00-9fc232306c17,6,2,4,6 +2075b0ce-6f3f-4427-8e9f-33c554ab7e60,2,0,4,16 +d444630e-74c2-4603-848e-245c013891d6,7,1,2,8 +d74073e5-5f25-4552-a1fb-00205ead738e,1,0,4,13 +c89e8ad2-0d9e-4c2a-9772-770fdb2eac27,8,2,1,17 +ca1bc2dc-f947-4321-88da-c0066ba7d592,5,1,6,9 +17b002ff-eaca-42ff-8046-0140f847a8fb,9,0,7,10 +471347c3-7608-4058-b454-d1a5223dfe1c,5,2,2,2 +b08cbaff-36de-4e63-9093-9438f9ec382d,6,2,7,19 +ff722302-3e19-49bf-b152-41418b7d7416,4,1,3,16 +cc0ecfc9-064c-4e30-b002-3716a05a935d,0,2,0,2 +2db6731f-4a4d-45b7-980d-527afb472be3,5,1,6,10 +e547e0a7-f8e7-492a-b1d2-ba4ccffbd0c6,1,0,3,0 +e8ff1f74-6324-4cbd-986b-d8fec49b203e,6,2,5,0 +1dab1a0e-091a-499c-94dc-2a5792c49b78,6,2,6,19 +a069b99f-da79-410b-8720-2cc05904fdc6,8,1,6,9 +db95ebfb-9f9d-4045-ba1a-88a5830d2eb9,1,2,7,13 +0455e7d6-daf2-48c3-854d-08fc286850da,0,1,5,4 +f53764e2-5bb2-405f-8555-d0561e6fe1d8,2,1,5,5 +671faad4-8504-4f44-855a-c09d249c4fc5,8,0,3,16 +ab47f4c1-0ec4-4ee2-b78b-c87baf3b708c,5,0,1,3 +d88917d7-3e5f-4461-a1ed-60443e538807,2,1,7,3 +eb6dd61e-bf2a-45a6-8e1a-2e5d9700d6de,0,2,7,6 +bbf32df8-2ae0-4180-bc18-f9d28c17b439,2,1,0,16 +4640a5b6-9686-4df3-9edb-f35d9d9ce949,3,1,0,7 +485be11a-adfd-46a8-8a04-ff8849926879,1,2,0,9 +9db74126-2fba-4f92-b7c2-14be64e8342b,5,0,3,18 +7ca9a07c-df5c-4d7c-ad32-2a52a0e8fb16,2,1,4,17 +f5afbf2c-4741-40a7-8202-5fce6f572b9b,6,2,4,12 +5a98b2b8-9f02-4cbd-9574-2e57c2d1ba35,6,2,1,1 +b52c848d-ff16-4682-8042-9701a0970375,3,0,6,4 +77cbc2dd-481b-4e1a-9e34-b3975c3e6d81,8,0,7,10 +f42769ee-c61b-43ad-b796-2f41ca1fb1cd,0,0,7,16 +1a1b07dc-8255-4bed-af38-a1e6cbda041b,5,2,2,0 +5f8446ff-5a09-4855-9530-bbcc0c6fc2f0,2,0,3,8 +e9b9397a-eb04-4ee6-8d92-bef8f66f5bb6,9,1,2,14 +8085645c-9154-4036-8992-5fdaa4aaed92,8,0,0,19 +c3da2e13-af6f-47f2-92e0-a48c6f2af5de,0,2,0,0 +0ecd0f82-501f-48ec-9d67-65a4ea97e69d,7,2,7,16 +ee8e727d-3b80-4491-bc7c-fff70ea30f56,5,0,1,11 +1c28ddd0-e9e9-44c7-87a3-76db1b81da2c,4,2,4,9 +585cdd23-e298-4d80-877b-c16fbb4f1c4e,5,2,7,15 +fdef567f-e39c-4f54-ae36-fcd213a7bb6a,4,0,7,18 +2dc56fea-15e3-4866-acdd-9f9d3fd9d170,2,0,0,0 +2eacb10e-3b95-4dff-a947-46308c0a5abb,1,1,1,1 +efa33dde-bd19-432e-b831-1efb2c1cd1cd,8,2,5,2 +619cf346-7640-41c7-b274-093562406cbd,6,1,6,3 +f01f0f18-061d-4ac7-96b6-45dbb2b29a44,0,0,3,10 +d0965ed5-f9ab-4e06-b249-e86c26f91322,3,0,1,15 +da866b32-472f-4824-aed5-cb6e55848661,4,2,4,7 +52100206-4f2a-42d7-add9-85cc62e3b314,3,1,7,7 +b428c0c7-a1ec-4e33-b1ab-50ca424c283f,2,0,4,8 +043d4fce-4c56-46ef-9257-17022ae003b4,6,0,6,13 +952c7e67-8a88-4f31-8c75-160ea10f3994,4,1,7,12 +6aca944c-ad76-4a51-8177-ff131f70e992,0,0,3,2 +a188050f-c260-4d25-a226-3e1c7701b6c4,1,0,1,19 +f7dd6835-e774-4221-bae5-b306f113119c,4,1,7,2 +d41020fe-b812-4a6b-8e79-488a86776b33,7,1,0,19 +7a75a6ff-b673-4d58-8f88-f85c2604aa56,2,0,3,10 +b11ab9c9-24b1-44ff-bddc-66a0c4b7bff5,1,0,5,5 +f55613ee-7954-4eee-ad89-104a435f121d,1,0,6,15 +95c33b88-b374-473a-aea6-2d90eaa89db5,0,1,1,3 +f8f5323e-391d-4c58-96dc-a5f85c994f54,3,2,4,7 +3e82bf4f-64fc-4414-bcae-89391605932a,8,1,3,7 +c19ae122-6379-4307-afa4-1c0b92a55184,3,0,4,6 +bde23803-566d-40e5-bade-a484e0552592,3,1,1,3 +2e8bb56d-29e1-4ec5-8cb2-16b428818826,8,2,5,6 +d53c668a-f8b1-4abc-9cbf-4b7f85e16cb1,6,1,2,8 +0895b9d5-f3d2-4717-a3bf-39f5938d4583,1,2,0,9 +e8a23a12-9080-4957-a8f7-e1956776d5dc,1,0,3,9 +b5464f6f-8f95-44d7-97a1-ffe8911e53af,9,2,2,3 +be0e163f-bd2f-4224-8639-26fa9266d5a8,9,2,1,15 +a845b5a1-09b1-4cbe-9141-b896ec883229,7,1,6,16 +2a8f2cc3-b669-4f1f-b979-e138fdc3a1e8,4,2,5,2 +c58f4fe8-db73-4f50-a54d-6cfdcf299d3d,3,0,2,8 +6b5c4b71-7deb-4022-99a1-64b11a0c0738,0,0,6,1 +8b4ddda3-16f3-4c6e-9cc1-9eee89a64991,2,0,0,17 +81af75da-3ecc-4e5d-a677-33d81c4bbe18,6,2,5,7 +075c9248-244b-4a26-adcd-7110424559a9,9,0,7,9 +b9f62146-b7bc-4f9a-831d-c1a2524c6ca2,2,0,2,6 +2de4e04b-0af8-486b-b6d0-11b1b2499927,8,0,6,4 +bc331b02-a523-413c-947b-48af7426af55,0,2,7,8 +1c615e77-8eff-4088-88d0-bf3ddf784abd,7,2,4,17 +644ddc21-3845-47ec-bd67-2a88eee84bce,2,2,6,1 +41f73c56-1ab8-4ce9-a011-a92325172628,6,1,4,10 +ffb6b0e3-1ebd-4d5b-8e87-e7c9d1aaf0c1,6,0,5,6 +3e240868-9dd9-4906-bb23-1cf796b9139c,4,1,0,10 +c9990e39-c7f9-4f07-89a3-a3d40c439fb3,8,2,6,0 +9e1b023f-d3b0-45f9-8d00-b3696d17e373,5,2,5,7 +1eb0ae60-3c63-4245-b77e-0cd624f488a4,0,0,2,6 +9f61e5f4-37de-49b8-9d10-96106a7962ff,1,1,7,13 +cbd77939-9255-4987-9a30-13b6aa953e2d,8,0,5,15 +df0f30b9-8ed0-4f79-a29f-c82ce564912b,9,1,3,2 +4c38c3d4-dbea-4d73-9306-6fd1e67b4d38,6,0,2,16 +0dd3bc71-60d6-4eac-9a10-2e6498fbb8c2,1,1,2,16 +cd065efe-bdc7-4464-904b-7e223a398277,0,2,1,9 +a8978de4-115a-4d6b-bbc6-8064c2d49261,2,2,6,4 +6cdeffeb-45e3-40f8-85c6-4b29c00b9cf5,6,2,3,2 +c8b0b1cd-b7b7-4d4b-9a46-76a82ad45acb,5,1,3,19 +c402ab57-ca1c-4c42-a982-45e7d270aa5c,1,0,0,1 +3eb811e4-5c55-44bc-a4c3-8bda97660114,3,0,0,6 +555f7824-e6de-4d6f-ad08-7d0ac838d7ed,1,1,3,13 +b26f156b-74e5-409a-9aa2-b9de0d1b5329,8,0,2,9 +1b4d1b6c-65be-4654-8ae0-a612cdf6eee7,8,1,6,1 +24a83524-bfdd-4483-a1fd-a5124f46443a,2,0,1,1 +b2020013-f186-45a9-bbcc-1b2e1e532aed,4,0,5,5 +28ba9e8a-afa8-4b22-a51b-b894701b38e2,1,2,0,13 +5736caf1-d312-4208-8e11-a97e6ff0260d,3,0,2,7 +fe845bc4-124c-4337-ae5b-e02e8745cc69,0,2,4,7 +54368b77-f9e5-4d2a-9254-477c31ccf07d,3,2,3,11 +dea5c769-0ce8-4226-b41a-e1625f6b99a1,8,2,6,18 +25edd20d-3f54-476f-9e4e-e19200d38f61,5,1,2,2 +10d99383-6268-4edb-a12e-b14cb6fcae7a,5,0,7,2 +a9d640ac-2f8a-42cc-a07a-7bbf1f3059aa,2,1,2,16 +ca3e10f7-726a-46f3-8758-e9fb14961ce2,5,0,7,1 +4b84f9b0-58dc-4cff-a1ff-904516b27a2a,8,0,4,18 +176b984f-5375-44ce-ab7d-9195310e15dd,2,2,5,19 +d4fc6f01-3cd4-4c03-a9e4-3a5f17daca71,6,1,0,4 +bb7fcf81-f68d-451a-93f7-6e7049c2b397,4,0,4,0 +88487690-6073-41c5-b32b-e7c7436b2fdb,3,0,6,5 +4eee4537-d848-4328-80ef-e1bddd5291ff,3,1,2,18 +71be1c11-907b-4eed-9797-7ab1dc0928ac,7,0,2,3 +5b72d74a-08ef-4115-8625-a1ca47d457d8,5,2,1,18 +ed2f5df0-287e-4321-922f-ea6c7123225b,0,2,6,16 +ad46086a-3b02-47eb-8f65-591fefa68ca2,3,0,3,2 +986a278a-7996-44a0-813b-a78fa8636c6e,9,2,0,8 +3c785c31-1a90-42bb-872b-1d47eed793e9,2,1,6,14 +d10dbdf1-31ca-4f3a-8608-e59b9934e393,1,1,4,19 +c9a28608-fc6c-475d-89cb-71f740ea3f8c,5,1,4,2 +dcf5572e-0147-401b-87de-59383f9d5b68,9,0,2,9 +78461c79-19a3-4c22-b4da-09f1a9a1fb99,3,2,0,16 +1d3e3d3c-2a7a-4e84-8afc-e750a2d92571,0,1,4,13 +56c2e0a6-df22-4c8d-b834-59e2ffcec75f,9,0,0,4 +166c8113-13c9-42a9-b31c-d6cbcb735edf,4,0,7,3 +dedd7614-7043-427a-907c-08075e63f8ba,1,2,3,2 +ae9d3248-3168-42d9-be5f-cc567fa2b6c1,5,1,5,3 +7bf3792e-c0b7-4b79-9113-f0b3d5c3b9e5,7,2,2,0 +4c8afcfb-602a-4364-8fbd-574a9cbecbfc,3,0,5,14 +e0afcd6e-8742-4641-95e2-8b317b49e68a,6,0,6,17 +f9c89f8e-3254-445c-b050-d3638bdf8ca8,5,0,7,9 +f4059d6a-3c92-4355-8f2e-d839eca30c97,3,0,2,14 +365bc54d-9612-46a1-9a00-10530b9cae10,8,1,6,14 +bd65870e-e1b3-446b-b827-b0136c277cc3,5,1,1,12 +c9995afa-4b3b-4de5-8e6f-1c716e36fd88,3,1,3,9 +30547065-0d5f-44d9-bee2-9b05c91a9dbd,0,1,5,4 +78d90ac1-d11e-4d39-9e9d-009794f06ce2,4,0,0,1 +cd37ab04-a467-46b7-ac6b-5af48580376b,2,1,5,16 +7b95a33a-615a-4a93-ae56-22692dfafcbb,1,1,2,17 +0a571287-6f06-4c44-9e2f-64318950fc11,0,1,6,8 +5ab8ee97-2468-449d-88ed-c844b29a886f,3,1,2,19 +82217dd2-b018-4552-a553-f05040a4a820,2,1,7,14 +789e457a-afa6-4122-b6af-1ae10a61d167,7,1,6,13 +32519d57-cacc-4be6-9d75-472f8b72cbc3,9,0,1,5 +84bc15d3-31c5-4362-b921-3512dd6cfd19,7,1,1,7 +bf7489b1-9e25-4b51-88b2-bf87f1152534,0,1,0,18 +b93ffdd9-325b-4e6c-b7b2-fa29c74b1077,8,0,7,5 +37b04ef3-00f8-4910-b1f8-d0913dc34ef1,9,1,4,2 +3a826f1c-c5e9-4ed3-b220-f17e3b9fba59,9,1,7,3 +c1bdf662-f984-401e-8851-9403f302b134,4,2,0,10 +12f28299-8685-44cd-b3e8-c28d0d9c78fe,8,0,1,9 +60e3ec0c-585f-4893-b65a-b47b4cd65ab0,8,0,0,15 +eb7bbd8e-c273-4cf8-a7f1-ed046e8baf97,0,0,1,5 +57c75259-51a9-46da-a7fe-980ccabbf648,4,0,0,5 +24acff0e-65ee-4bef-9097-15b0dc439e5e,8,1,1,17 +d88c0083-ca3f-4b26-867d-874dc49873eb,5,1,6,3 +224b968c-2b07-4495-a334-eb5af961d64a,5,0,1,16 +fa966142-5d97-421c-917f-18900d68ee6d,8,0,6,14 +3621e513-3c69-469d-8665-c94551b1933b,8,0,1,12 +b6e6d740-deab-490c-8bff-c273a5155852,4,2,2,3 +56904703-2e9c-45eb-9ed6-1482c103ae3b,8,1,6,13 +661f7659-bd52-4d6e-82fe-b6ffda7abb5d,4,0,7,3 +871d898d-29ad-4718-888e-831d94a4a1c1,9,1,3,19 +7829c3f2-9438-4d28-8d94-0d8fdd0e74fd,8,0,5,13 +b7b45c80-cc15-4271-a59f-8f8cb0f737f2,4,1,4,18 +5875c2d0-faf9-4c4d-ba36-7534a1f69fa2,5,1,5,15 +693a70b7-53c6-41c6-8071-3b45cc6e7842,3,1,2,9 +33f762cd-c2a8-4452-80da-20698e02a1f8,3,1,2,1 +8be2e4d1-a469-4990-bca3-7c1129e3a197,8,1,3,12 +24de31fd-d418-4c0b-a2f6-ab2da2e08938,8,1,4,2 +55d68420-3473-4f87-8288-e23dacdd33f3,4,1,5,17 +83e03ab7-853c-4d6e-a91c-3935a1336b31,3,2,3,2 +0709f020-da4c-4418-a08f-f5920f181dd0,5,2,4,3 +66023dd6-0c1e-431e-a895-76c2ea8e26c3,6,2,5,15 +375b387f-81af-4bb4-abe4-e514aca2d4e9,5,0,1,19 +53d800a9-d2b5-4e9f-8f62-64ce0ac3de17,3,0,5,2 +022e5fac-3952-4fa7-a556-0c0ba0531509,2,1,4,7 +de6fce90-d7bc-4c93-8001-cf7ef77e9557,0,2,2,14 +970dc3a8-3918-4a48-8bb9-3a7e3a5d83a3,4,0,6,19 +ac513145-4f30-4690-b8dd-cf97ded13865,6,0,2,19 +3947ce95-b754-44dc-893d-93c2070972d6,9,0,6,10 +3e5a84f0-d971-4d68-a287-7d00db8a510f,2,1,2,17 +ec4ef713-1500-4fa5-a231-616a91a8f42b,9,1,5,0 +47ad5739-f319-4de3-bda9-eb8d1344eff6,1,2,7,16 +b07a20e8-fcce-44e7-bc2c-7c2e07188c79,2,1,1,15 +f155b345-f50a-4866-b764-0874edb454c2,2,0,0,4 +d655a1a2-d5f0-46fd-be97-4fc5f3271dc0,6,2,1,5 +c8f2d9c7-5a68-47b6-8f87-362e5a67c625,2,1,1,10 +37f05499-f290-4edb-b9fa-de8766557d50,9,0,4,0 +f02a8d31-aa1c-4aed-987f-6c198a0725f5,3,2,4,0 +8079a195-a39a-4d35-88d5-f409505bd01a,0,2,5,2 +1bcf416c-b2c8-4c01-a4d8-c1145853af14,9,1,5,2 +9483af60-6b66-45d5-8b59-a2372f41d168,3,0,4,15 +ce828593-4c65-407d-a6bc-2bbe5d74998a,1,1,3,19 +867a663d-07cf-4d1d-b4dd-d66ae6806f98,2,1,5,8 +6cfe3947-f904-44e2-a948-4aca907dd8b3,7,1,4,14 +4d8b0efb-67dc-41c5-9603-09724a416206,5,1,2,19 +d275282a-ca8e-49fc-b95c-fe8c88cf29a8,5,2,1,6 +139408fe-f068-4f8a-a3b5-70799ec8fb72,6,0,3,18 +645717f0-f0ee-4249-8dc7-5277645e369d,5,2,5,8 +7fc7b055-890a-4231-a9f1-30c6f1020dc0,7,1,2,7 +8bdb86dc-52ca-4714-9360-cf5a860727b0,4,2,5,9 +37337107-a05f-42bc-a5ac-b0cf5b6edc9d,6,1,1,15 +a84a6c8e-1771-4845-83b5-ec97636d62ea,6,1,7,5 +cd8077d9-35f9-4f25-beda-30d178b0561f,4,2,3,13 +576eccb0-5442-41cc-95ca-275578767c4d,1,2,7,19 +3e98a640-3662-4955-8698-83b89f9c8d25,3,1,0,14 +d740e911-9192-4e18-845f-1a10f04bbf82,8,0,6,18 +1e80498c-ca27-4869-9a08-1b072f8059ee,2,2,4,6 +43bac767-1882-4e93-a4bb-7f20e1687422,8,2,7,0 +21813b57-400e-4abf-9c67-3e984015344f,6,2,2,1 +4b418636-f3e3-43af-ae9a-352f3ff3ad22,1,0,2,1 +4e6f3a35-0b92-42cf-97cb-44bb5e884bf8,9,0,2,2 +f362adb2-cd82-476f-8239-9109df799865,5,2,1,1 +365c9ec4-02c7-4f98-9911-e54d74c18307,3,2,3,0 +0976b49a-58f7-4138-80a5-dd868db8c388,6,2,4,19 +4391df68-ddcd-41e2-8f7b-ef660cc431a5,9,2,5,16 +fbd0d362-343b-4ad9-8f90-85c4d31d486f,2,1,3,16 +48ca27e3-0288-426e-874a-3f194d3a0e49,9,0,1,13 +355c19dd-a8a1-4532-a4ee-98b1b0bf2e3a,9,2,5,11 +40d642c4-f91e-4172-81a8-22938a1e2a34,0,1,3,0 +3e1f5000-f5d3-4b03-bd2b-c84ec4054dc6,6,2,0,9 +0b5c3f20-0e72-4c26-9bd1-f179052c4574,1,2,6,0 +9a864cfe-6ef8-4747-bc43-b1b10028834d,5,2,6,2 +15adccd7-ad17-45b5-a61d-ed21b8787c23,7,0,6,16 +29ed2476-b3b6-4ee8-bcd4-4dc7a8598f3c,4,2,3,12 +0130ac38-9d5a-48a9-ae34-eed48bd0109d,0,1,6,4 +477239b2-a3c6-478b-8219-37d3769f4dc2,7,2,1,5 +565b42b3-8923-4f9f-bf0b-1b9c365aff20,9,0,3,16 +dc611cf5-d479-41f2-bd55-de7203e23242,5,0,5,9 +094857c3-5a8a-4b7e-b26c-013252b9a3c4,9,2,2,17 +066d8a9b-d3ff-46c5-8e83-3ede0d6f9f11,2,2,3,19 +749a2ecd-6f10-4b88-a5ab-e97e7f8926f9,2,0,4,2 +78035bcd-36e6-4abe-86a6-905e372766fa,5,0,4,2 +bb9ed34e-f99e-45aa-905b-a6edcc8470f5,6,0,1,5 +8825164a-4858-4092-9096-707aa0397895,1,1,4,12 +0d33dd12-6897-4893-84d0-59c4051c5b43,1,1,6,15 +4e2adae1-f460-46fd-82df-e7b1129d0e28,1,2,3,16 +4f0f6293-df44-4c1d-9ab9-872b747f2cd0,0,1,5,8 +50df13b2-01ca-4726-97e1-67a5f27c754a,4,1,4,8 +8fac9d55-8c4a-4e4b-9e42-68a498605f9a,7,1,4,16 +fc2cbadf-1e0d-4de3-b41d-ddee08ff59f7,9,2,6,18 +0a10bb26-095b-4e94-b525-110ae95b3879,4,1,2,7 +245860eb-0de7-4f5a-954a-aba5199daf1e,4,0,3,17 +43867cf9-642b-41ea-a237-1da65a418744,8,2,0,15 +fec53094-042e-43f2-a8b5-3e8f20fa5136,4,2,3,0 +fbb1cf52-1ee2-4683-83ac-1ddab5cc52f1,6,1,1,17 +4a0b31ce-03d2-4f4c-8580-de4c7ed3439d,2,0,7,13 +b55c71ea-2b08-4ae8-b0e5-7d6b4a3b9fbc,5,0,1,3 +5d4afb31-fdb6-423b-9ec2-e5f582e31e31,8,1,3,6 +716ccab5-a552-4435-b089-471723041d47,6,1,5,17 +bd711523-b582-48cd-ab7e-805f43f76da7,4,0,1,17 +d4c991af-fafa-47e5-99eb-ed8a393c8ccf,3,2,1,0 +833618e8-680c-46b2-998a-e47c7db7be29,1,2,6,11 +fda372d4-261c-4ee5-85fa-cbff135c8b6d,3,1,6,6 +47e48bc5-718f-4844-9133-8a817b09c92e,8,1,5,9 +0894899d-0d53-4f37-85c0-7f5163c26615,6,1,7,12 +f1fb8d18-efe9-43a6-a570-df4a06f64713,7,1,2,8 +a29b918d-43f9-46d1-9e75-28b93b157bbc,4,2,4,10 +5290f6c1-e4f8-4ab5-9927-7dc564a43532,6,2,6,8 +6d79ff84-909f-4e0a-a543-f4d73cbffe00,0,2,2,1 +f86798d1-28fb-4d3a-89fc-1c2684283eb0,5,2,2,3 +45c3b873-5990-4078-8455-d9af3df131bd,4,2,1,17 +160eae41-116f-4793-a3d0-e9416c25e70d,4,1,2,7 +070bc7ff-7d8f-4173-8718-544586d92455,0,0,3,19 +429d64e2-76d7-4fe8-90fe-2ff5f3ded1f0,8,0,6,18 +4fb2243c-28ea-4f2e-bd55-c90add1ee921,3,2,4,0 +0387dcfb-c588-4fec-a359-d0e97d783a19,4,0,4,11 +311cc609-769a-47dc-80eb-bd657e094f38,4,0,3,19 +bb828f29-7b28-4044-9475-028464b193bc,5,1,7,9 +1b2d92eb-0601-4f6e-bb27-09c27d4e121a,9,1,5,17 +69a38e03-be8e-4365-876b-06d711027fe8,5,0,3,2 +1463d4d0-1f99-4f23-949d-e1ca00babcee,5,1,6,19 +5fbf2a0f-1b9e-4d0c-a61d-b973c7d3dd4f,8,2,3,8 +2cf61aa8-59ad-4db7-87b3-a6630ad66037,3,1,3,14 +b1e0a20f-61ae-4868-ba48-fb07e510df82,8,0,0,19 +18b36a46-db5c-48a6-8d95-29d7183d2e25,3,1,6,10 +a6df5ce2-4c5c-4e9d-be2e-fac95aac1de0,3,1,4,1 +11a557c2-c674-4a95-ba6d-10633160a707,4,2,3,4 +43640338-2141-4670-b66c-97529cf46916,0,2,2,7 +12502564-8796-496f-b93e-87834ed5e79a,3,2,1,5 +fcf4473c-ae78-4178-8ba3-0a2c2a73b450,6,2,3,11 +f562e6db-46eb-466c-9ddf-b3b48845d9e9,9,2,1,5 +92714eae-1af6-4465-b3da-cb29ef392f02,0,2,2,1 +1cfcee5c-78df-495d-866b-3c332a43c701,7,1,7,17 +280b9fa3-f5c2-4cf8-9f36-a861dd0536d4,2,0,7,10 +9a317774-45c7-46a8-a14a-7d640611be26,0,1,3,3 +f267e275-815e-400a-9df3-20a96ee2236f,5,1,3,16 +68ae115b-fec3-445f-a5cd-213f89e7f390,6,1,0,6 +d9ef1ced-eb1d-4f71-b23f-11d781da430b,1,2,4,0 +dafdf136-b5b5-44f2-a0ba-924f0c11b46c,8,2,1,3 +1c205c12-ee5a-4ff1-a1a7-80dadba29f34,2,2,3,19 +540c07c2-b4f0-4ee8-b963-f8ae266ae199,4,0,7,19 +79f19c17-725e-4b22-baf2-9b24b8487814,4,1,6,10 +4f2e7779-d5ef-49d5-9e3d-d5603af1e84a,1,0,1,13 +8243f778-82aa-437b-b486-e66942b9606c,6,1,2,13 +46297b6f-a7ea-45ca-bcad-e90bacbc21af,3,2,1,2 +150a3ef0-5a87-4ef3-a8f4-7d72d57286d1,7,0,2,15 +3131ff41-3492-4690-befb-81c7954cce89,2,2,1,13 +dfb27a65-38db-47af-9b40-990c312b07fc,0,0,5,15 +490942ab-7210-43d8-85be-72a788f4c0a1,5,2,5,18 +5599b824-d0a5-44c0-bd6e-c337d6d0606b,0,1,6,13 +9318bfe7-aed1-4da0-975e-b28d46a753a6,1,0,1,6 +6c1a4ebf-d208-43ea-a42c-81492aaea715,8,0,5,6 +71f94918-09a3-4881-9e4c-0d312b99fd89,4,0,2,11 +5e777bd5-c470-4033-abc5-5c2f81526fc1,9,1,0,14 +64f0f59f-3503-48d1-8506-e5fe2844eb56,2,0,4,10 +265c63fc-ebef-4dd7-a85e-4e6063b07df1,9,1,3,1 +e09d3bf7-aeac-42f3-8198-a4470ea2b18b,1,1,7,11 +b7236fc6-47d1-48f7-b1d5-c2b033f5776b,2,0,0,14 +35daa92b-7807-48ed-9419-5d26fc730d29,0,1,5,9 +bca26262-a79e-48a6-98d0-a04a44a2e853,2,2,3,3 +e3093d00-86e7-429f-a8ca-7ebad8384bdc,9,2,3,13 +b013d589-1f5c-491f-8f08-c788b4f1e550,3,0,5,0 +8ae2ceb7-7f97-4f94-b338-93122b33a1f6,2,1,3,9 +887c424a-57ec-4f4d-96fb-b31483d8c717,7,2,4,14 +5b6d9b1d-415b-4ffd-8620-7490ba3805d8,7,0,6,10 +7b9a8c8e-3c63-4a9d-beff-053dc3211097,7,1,6,11 +0b8246a9-da53-4ee2-b5e6-44ce336b8272,8,0,4,15 +c37fa362-db2e-42f5-bd25-e95b1781e3f4,4,1,5,11 +c0ba7b8b-e28a-43c6-86d3-6f792e037c00,8,1,7,9 +00b3bf53-8e4a-4bd9-b52c-40803ca4aa6b,5,0,5,17 +a1ab850f-daae-419d-a42c-6fcc732d02a1,3,1,4,1 +be63d8b0-4d80-471e-9aee-e4d9fed7190a,6,1,0,1 +8dfc87b5-cedd-4806-b470-a581f90a3737,7,2,0,5 +350ed97e-8eba-4272-b8b2-82f3b6165e48,0,2,3,5 +bee72dc0-8dbc-4344-8e11-367246e4ebe0,2,1,0,15 +564e36df-6353-4e2a-a2f3-a818d799a8dc,9,1,6,6 +6a1729ae-3586-4bab-832f-ac6fc3a3ea75,2,0,3,8 +6c21f224-4b53-4789-929e-4dce834ac8e0,2,1,7,4 +47efcd50-d554-4960-8c43-f3afa5b8f4a7,0,1,0,5 +a4f5db6b-2bca-4776-b3b6-226e55364a1a,5,0,7,10 +4b2a7253-7675-41fa-9c4f-795c140c5890,7,0,2,5 +493e96fb-f220-48fa-bd31-e1b65abcdff1,5,1,7,8 +c7c41263-e74b-4966-b38b-17d1050ecbca,8,0,3,13 +85405d8d-a8b2-426d-8c8f-83128b63e026,5,0,0,2 +19ae0cb1-348b-4721-9645-c08df5d825d6,3,1,5,5 +6cc92239-e406-48f0-8d4b-9e83305d9ebe,6,1,6,1 +206c07f8-2481-40a0-9aad-b02ddd6ff1d5,4,1,2,17 +3f5588ea-9d5f-41d0-acff-b130da9e7e37,3,1,2,6 +792c6ff1-9336-4dd5-bf3c-d9b858383f9c,3,0,6,4 +b5972461-6a9c-4757-b7ab-be4fc1c34188,4,2,3,6 +4dbd4d23-1e96-4dd9-b087-2106552805e4,4,2,4,1 +f87121eb-8411-4a01-a04f-f5cdb889d5e5,2,2,0,6 +c80d69e0-8170-471c-857f-cabf6cf52ec6,3,0,2,10 +3723c975-6745-4214-abcb-c5a755cfc15b,3,0,7,13 +87fd51d5-74b9-4991-a0e6-3ad359b8da78,2,2,4,15 +f38d7a0a-d9e7-413e-b786-bf7ccbc31cdc,3,1,7,14 +0408c05c-abf3-4989-ab9e-fd8c0c1c25ac,0,0,6,2 +5f91b3d1-ff3e-4b33-9399-42460c85a734,6,0,6,4 +80fbbf72-1a5a-4599-a6fb-cd94f16f48c7,5,2,1,18 +30c2a33f-f0db-4f1b-905f-204cd65c8d51,0,0,6,3 +9b7fbf03-a049-4fcd-bd1a-7c64291efd6a,3,1,7,15 +2c341a1b-0912-49f5-aea3-02929bd0c5a4,9,2,7,1 +8c5aedb5-4a6f-430f-bc6c-d2e921885c8c,6,2,4,2 +ee51d1cc-1ebd-4557-8208-598ad676af89,0,0,7,11 +a7217e7d-9960-4c00-8aeb-b98267bca27d,7,1,3,2 +f266c813-2a09-4266-9c05-095bd0086701,0,1,7,3 +483dc5b6-f8a2-4c03-9d00-6ce24892d04f,7,2,2,1 +34f34775-9e65-4942-9921-ff260d7df121,0,1,2,17 +8d178d47-8d8e-4bab-b341-9fc539d29faa,2,2,4,11 +7f67aaed-3db8-4565-aad3-dcfebe67ddb0,9,0,0,3 +73f8ff59-74f8-4557-b6a6-a7c23d19a2bb,4,2,7,17 +8d5a288c-1a8e-4b16-914a-df6da969ca2e,9,0,1,15 +b9fa3571-5f56-4a69-ad82-2cdae8c4a76b,6,0,0,0 +88612bd7-9dc9-49d2-861b-bbd07b0dd521,9,1,1,12 +bae96fe4-2b30-4434-80e3-b428991d73a5,4,0,3,4 +4b53c02c-9389-43fc-93c1-02ba680963b4,1,0,5,9 +ed4369c4-ecde-4254-9477-efb6aa4425f3,7,0,5,4 +5f988e23-acd6-4b2e-8388-9379ba6526ed,8,1,7,7 +54c05592-fd5a-4c85-bdf0-b8fd084233c3,3,0,5,5 +96eb296e-ab4e-47b7-8a5f-293a3df27a63,0,1,4,5 +1a67ae69-8e14-4771-a30c-0fbb7d00dfef,0,0,0,15 +ad08a7b6-ce0f-46d2-960b-89c1e5fc3053,5,2,5,15 +b7d81688-fdeb-4fe0-9432-daf41db1bc37,6,2,4,5 +2c6c6efb-ef09-492a-8791-2fe5502721fb,4,1,6,6 +4f113dfa-f1f8-4daa-ad2a-ace2c38b34b7,3,1,0,5 +c8782977-76d7-4e29-a61f-7eee61e5349d,0,1,5,18 +5f3832b5-fcbf-4db1-b2c9-1aa770fde72d,6,0,4,9 +271cabdd-1a06-441c-b0df-e9f014e40433,4,2,4,0 +2c6f8ba2-3c6e-40c9-bf0c-32555d2338a3,1,2,1,1 +bef62990-8318-43a0-a193-ee331447a6d1,7,2,0,16 +88959d49-ab71-4d7e-98d8-b76f721bd3a6,8,2,5,18 +f40118b4-62bc-4d4d-85fd-2ae30a1cb2ca,0,1,1,2 +204e3d71-ea61-4556-9611-ebbac1acfef6,3,2,3,10 +6276ce22-61a2-43df-87b3-3a36eb882bb1,8,1,1,5 +431b69a9-bbac-4c94-8766-95a260f4f81c,4,0,4,2 +7f9e3c13-f6ea-410f-a02c-8a9967bd6531,1,1,1,17 +4cc55c12-4cbc-40f4-b284-8f3de8c91bb2,2,2,6,12 +da0f4a40-b056-456e-bb84-f92b8c6d1759,1,2,2,17 +e43ea46c-5a41-4aa0-826c-684d4a5a67f5,7,1,4,18 +ff659a80-a7e6-4523-abc5-d479fad41a90,8,1,4,8 +8a9bc47e-995e-4f5e-868e-24be0f2ed49f,3,1,0,2 +a3a3647f-e902-4b30-b307-5ba5d3fa2a2a,9,2,0,3 +984dbd71-65d4-46b7-a0b2-ccdcb6ba1d03,1,2,7,18 +1e249804-f321-432c-b8d8-7ce3ceab50ff,4,1,5,18 +074684c9-8d1e-49aa-bb17-a7094bc3b5c8,6,0,7,15 +6f458a63-a519-4eef-9dbf-38c5c21ca345,4,0,3,3 +afd88348-79a8-44e3-a304-0a5877c17fb8,9,1,5,0 +0c07928c-a0db-4500-ad97-8943851ab110,3,0,7,4 +5d5e93d8-0230-4c85-b4e8-047d91c47f4e,4,2,0,13 +a52e1320-9cfe-45d5-ae4a-a29c883d53cb,9,0,1,6 +3b548919-b327-4521-87e5-c5231b337654,7,1,2,11 +c2927c4b-311a-4efb-b402-2db00f051670,1,0,2,0 +9a3e0edd-1a40-4e7d-9641-2d44a6ebd7b1,1,1,7,18 +60e942e8-7525-4bc2-b787-78b1b3c9d9cc,2,0,0,1 +0e4aaef8-63a0-4858-9bae-160b0d9cd920,4,2,5,7 +61365dd7-98ab-45e4-980f-dbf00b801b61,3,1,6,0 +d0ddd7e1-f265-4692-8c21-bddc70974cd4,9,2,0,15 +ffc8d403-6434-4a57-99e5-84174dcaba65,2,0,2,8 +203f21ee-a6d9-42e5-a60d-25423b043c30,9,2,1,18 +2e5b9dc5-208f-47ac-b326-cb89698ac8f5,2,2,3,3 +778ce3f5-5301-429a-b482-599df11146a2,3,0,4,17 +58987a44-44a4-4a5f-8821-e428501371a6,0,0,7,14 +67f371a7-e196-495f-bc77-0bd5ce16ed89,2,0,7,10 +746fa1b4-4fa0-47e7-b621-1f808f3b23c2,5,2,2,3 +452f8e17-9028-4201-8222-0c18f37e1828,8,0,4,1 +bd847f54-5ed2-4fe1-a562-24905e621ecc,1,0,1,0 +cc90a6a8-35eb-4803-ac0d-dba7d2554c83,3,1,3,8 +d6ab1748-6a5d-4dcb-bc4c-564af34f857c,3,2,2,5 +ef19d505-d805-4a8a-a0b9-084f81720012,9,2,7,15 +5e40b0b7-6436-4330-8d2a-e4fa6d56f03d,6,0,2,13 +fec4372b-30b9-4b6d-9a2a-e783ade289af,1,2,7,13 +f74aab29-4ef5-41d1-8e76-81e015e563b2,2,1,3,11 +ea9345ea-cb6a-403d-bc86-f78eacb6a619,0,1,3,12 +26ffa513-9cce-4a0a-84fe-968d3cbf7fa2,2,0,1,4 +84913419-e2e3-45fa-b25a-7863ef7d8744,5,0,3,9 +80b50280-31bb-4b8f-8eba-3f0413d28c1e,7,2,6,10 +f0c7e7a0-e643-4809-b796-7387e4db9ce1,3,1,6,6 +412dc0cc-16c2-49f2-afe6-a401ae6a65e5,5,1,0,0 +34bcff3d-4cc5-448a-b552-e12ed09c778d,1,1,7,9 +6d222f2f-d0ea-45e7-8e99-6437c24aa22d,8,2,7,12 +06b84368-732d-4711-b7da-d1d792905bdd,1,1,7,9 +abf2da7b-b170-4e26-85bb-4b80417ebd46,3,2,6,10 +e5971fbe-25ac-4d58-956d-c7019907280a,9,2,4,13 +a197236a-7f47-4555-ac82-89abd920d340,3,0,0,5 +f6697dbf-18cf-4026-9a6f-24e007145475,7,0,5,5 +75e95192-7b80-4efc-b04d-448423583e34,5,1,2,13 +ac99c2d5-89d1-46dc-9fc7-62c9f8fd3689,6,0,4,4 +1ab646ad-ec54-434d-8f4d-fbff49c446e0,0,1,3,14 +4b862c40-4b59-462b-9b47-a0b3e92ca8c9,9,2,5,15 +58c1e2ad-18e7-4a1d-8346-16339c1bab75,2,0,5,7 +35b99bc7-bb1f-4790-8da6-3b311b8ea513,3,0,6,17 +33f35876-19af-45b0-b815-8826f7f0a76d,2,1,2,12 +013b9df9-2a3f-4cfe-bb4b-ce5064d8073a,5,2,0,0 +ccaaaf64-4397-40d6-a8ba-0a0dfde6ca9a,6,0,1,5 +30d63763-ba83-4402-a66b-0df3d297f656,5,1,4,13 +9479f50b-dff4-4c40-b1d5-d838e6138ab5,9,1,2,2 +fce40c86-4c2e-4c8a-b34c-6a3b526632ee,9,0,6,6 +f51010e9-2757-4be2-bed2-66b4a4cab15e,4,1,6,15 +150ae568-8207-4df9-ab75-892ab62a2cc0,5,0,2,13 +ebbf442c-0e84-4a21-bad5-e9c37412b8b2,7,2,5,16 +c084712e-7812-44ca-8707-1d5bd0bbf19c,1,2,1,19 +aced2774-5e49-4345-8a0e-44fcbec14655,4,1,4,17 +bab51bdd-6272-4273-a0bc-0bce8718edd6,2,2,5,1 +1319819a-3bdc-4e8f-a922-4cb52b006b91,1,2,5,11 +eadea757-cb85-46be-9a00-ac36aa2ffe24,1,0,0,4 +5beb942c-751b-4580-b56c-a78732b4db6e,0,1,6,0 +263733df-2b08-4da9-b096-a06271645f0b,0,2,3,6 +f482864d-81e8-4ce5-9709-2e8c2a0ecad0,9,2,0,11 +24235fba-8210-41d8-8af7-088c2fd13137,8,1,2,5 +1f5698ec-8e60-42e6-b91b-59d2eedd62c0,2,2,0,11 +34a66775-67db-4bbd-bb41-297df89055aa,2,2,2,2 +2cc5ef83-e15d-49c3-9712-59abfc06c3bc,7,0,3,3 +fe68d081-5422-434e-a93f-68fd236b0616,4,0,5,0 +5e81951c-8de2-4114-b5e6-2cbe8471a2a5,3,1,2,8 +be652962-6d63-4215-a4fa-2d8012bd0767,0,2,2,18 +10c8668a-88a5-4257-9a1a-28fe536f6106,1,2,6,0 +37070bb8-a9ac-47d1-b3d0-d6c04a275778,4,2,6,6 +acdb55fb-b64f-4a0b-97fe-37cc971a6cda,6,0,6,4 +c9d20e17-f0e7-4fa1-b5c5-8ff7bff5d23b,7,2,0,17 +abb0afb8-68bb-499d-bef2-431ad9da3d41,3,0,0,14 +68a9f4d1-a11f-4bae-a561-2463cf7b13d2,8,2,1,18 +2130b4f2-a078-462f-a4fd-6991a92a8ed5,7,1,2,10 +4cb4763a-99c0-438a-bfd1-a84afd734750,2,1,2,7 +dd9a2f5c-3f99-422d-8dfa-31488412c2e2,9,1,1,3 +3249992f-459f-4efd-8e10-373caa1feeb0,3,1,2,6 +16c8dda3-c44e-4bf9-9088-c305a6a42928,4,0,2,0 +3601559a-bd87-49b0-835a-71f0f82302cf,0,2,3,14 +f5c08ebb-4fd0-4331-9a58-2989d7a5ea12,0,2,1,5 +01966f8c-5608-4baa-b312-8071fd2329f6,4,2,5,0 +fb0b088e-7ac6-40dc-8a71-0dba370ee63e,9,0,5,10 +ddc210b6-b6d6-4dff-b711-37be64d63aa4,6,1,3,13 +54bf2854-f06b-416e-899e-fee34633e906,1,1,6,11 +2f50238c-895b-4252-8283-edc00fed5f76,8,0,1,15 +5f59df43-7610-44d1-a4bd-9cff4b296d1d,5,1,5,11 +68a29ee3-e867-4575-8505-a78e58fb5492,5,0,7,7 +ecc0064c-c97a-4e7c-aece-a2ef3baced59,4,2,0,3 +372c592d-ae57-4f4d-ad37-adf1f357dbec,1,0,0,2 +5ddc60df-e616-449b-be11-088f4704af45,0,0,7,9 +809a3c0b-b2db-4af5-a3e8-524e2cd92cf3,9,2,4,12 +36726d98-f29a-47cf-a6a4-b39a84836e88,9,1,6,16 +9d04c43f-1837-4ea2-9f7a-734f6dee5ff7,9,2,6,16 +225381bc-632a-41be-b070-ed0302989049,9,0,6,17 +0e04494a-6faa-4981-bfe3-140ab45470ef,9,0,1,4 +32c4ecb0-b897-47a7-aec4-a5ab8f6bfdb1,8,0,7,10 +c8e26747-46e1-4561-93e8-8f01390a5184,0,1,6,14 +6a153058-11ec-43e3-b0d2-f7f6ef36384c,8,0,5,7 +fb4b8b4d-6642-4045-9083-e84c35a467cf,2,1,1,9 +a34570bc-c673-4074-96cb-8caa3aae6a2f,1,0,6,16 +97bf0dab-b438-4a5f-a739-90855a92216d,5,0,7,5 +ffcd57a1-cd08-4a2f-907f-de2f7be480bf,4,2,2,9 +5c1a086a-3499-4f9c-adff-dc58f45e0543,2,1,1,16 +190497d3-d7aa-43a3-854f-795dc5c5ad01,0,2,1,1 +776e77b2-9704-447c-aeb8-11f3011405ba,4,2,2,14 +54410ec7-19cb-4de2-ab7a-772ece2e11f0,5,0,2,18 +cca2f398-ab18-4386-892c-f7bbe109f2db,7,1,1,3 +4c635eaa-5e53-4a65-9005-75af695bb502,1,1,7,17 +ea286576-90cf-42fa-b5d4-1531d7cb6206,3,2,7,15 +1d6f96d9-4870-460c-9049-10ba749b69c5,7,1,3,8 +2a1fd7da-0c9b-4992-9a50-6d0b15ea7c2f,4,0,1,1 +8f375a42-4ab3-4d2d-8453-e46c88a905a9,7,1,7,8 +0feb0d26-1559-4b00-a1dd-de6417e174bb,7,1,0,8 +ac1eb08e-5c2f-423c-b356-37865d087311,7,2,2,7 +ea8a283a-c359-48e1-b429-ad52823fcd17,3,0,2,3 +12b26448-6076-4baf-ac70-429057e7b0f5,6,1,2,9 +fca15ef7-8426-491d-8527-faa396d6be2f,2,1,6,15 +a8a370d9-6a6d-42ac-ad02-b0cc564713c4,4,0,6,5 +5dc5d14e-875d-4a16-8d78-fed73a09a964,2,1,0,0 +6da7bb4c-4e0d-4a2e-8366-6c8b8fd7ce55,3,0,5,5 +a3e6293d-57a4-4f61-970f-49df2fa01865,0,1,6,19 +3aaa2109-23a4-4a84-90e3-bc2e74007845,9,2,7,14 +ab1ab04b-8b8f-423d-980b-ecd3f94b5509,4,1,3,0 +704f5573-920e-4e9d-bdbf-1ace0396f92b,2,1,7,0 +6db60fca-4261-463e-a8d1-23ebd0289d7e,0,0,1,8 +2eba2b2b-c572-4198-8982-bdeb86263397,3,2,0,5 +f902d297-bd05-444c-be19-489f22d50104,7,0,1,9 +2f349c0c-afb4-4453-b19b-bb06c5fcacd7,1,1,2,8 +d21ad56a-4f32-48d5-a378-df6c746e3c14,0,1,0,11 +a8c08390-aaad-4cbd-92c6-f23e8e8d434e,5,0,0,7 +ffbfe395-4363-458f-8d92-60fc5cf02106,4,0,3,4 +5ed203d0-fb4a-4313-ac9d-b4a4fc229153,2,1,1,11 +b3fb82a9-80eb-41cf-ba9c-e0b01898dbf3,7,0,5,10 +1fb15675-4108-4b56-b507-aaafb8be5c4d,7,0,0,5 +45f254b7-008e-42a2-ab9b-2da8aabd8b6a,3,2,4,15 +177b6126-8955-42a7-b0cd-44a9a7e5852b,8,0,1,14 +efcd7011-6a16-45b6-becf-a71c91fe4bc2,6,0,6,18 +62873376-9766-47d9-9e26-1fab8c3e3d9d,6,0,6,1 +e8ce895a-115a-489f-bb8f-6bd578251c3f,1,0,3,17 +bb46ea36-4eda-4c9d-871e-ecddea83b5d6,1,2,7,17 +04ecbb5d-3855-4e90-a290-a515d552bb94,6,0,4,12 +54be9c90-8846-42ed-9b11-8ef554399948,8,0,1,17 +2d1d5869-040a-4ffd-ad98-8719f2270e57,4,0,4,15 +ef1752b3-7562-48c1-b9b3-3383908e1b1e,7,2,0,6 +429aa251-6b85-4ad4-81cb-45b4ae7de28f,1,2,4,12 +35f92784-aca2-4427-a2b8-1292d8e77e0b,9,1,2,0 +b2453a90-fb9c-41d4-acbd-4f1c7cdca625,4,2,1,7 +3a468839-2c21-413f-b472-fe4054944df7,5,0,0,19 +68e5cb78-ae78-497f-9091-20931d6f918a,8,0,5,0 +3b7eda9f-39b9-4e1d-9fc9-7744491f6459,5,0,1,6 +8de093af-e53a-4b8b-bc69-2ddf1bfac314,8,1,0,1 +63a8d386-08d2-42a1-9485-b83054bcda00,0,2,3,14 +f8786f31-4e49-4e3d-a9b5-acc4840a5cde,1,0,7,5 +bfd4e20f-33fb-4a42-b9a5-b1a87fb56f98,0,1,4,3 +344ab874-793c-418b-8317-6915663e84bc,8,1,0,11 +7bf49a9d-15ed-44da-a7dc-72944d6238a3,9,0,3,4 +ff3c9f06-e841-4646-952e-36fb84f2416a,6,2,6,10 +7876e32a-c304-4cdd-80a6-551e9ccc3db4,5,1,0,9 +49c477c4-ff4c-4061-87b7-907bbf5b1c69,4,1,6,6 +f90acef6-6303-4678-876d-6342bf1117bb,0,0,1,11 +c2d38a15-adae-4359-be1f-6024ef92d53b,9,2,5,5 +bb6266ad-969c-418f-b0c6-9412a2cd182c,3,1,7,11 +1f60a974-05f4-4f7e-8eea-dc5193022951,4,2,1,19 +8522d15c-bd24-4ff4-b121-32da9baee12e,7,0,3,18 +4404d152-ccd7-4aa0-9679-c5a0c151d856,4,2,4,16 +f98e03f3-b81a-4686-b630-38d2d3035e67,2,2,5,16 +abb1095c-3f72-4f21-be71-cf3e530e6865,4,1,2,2 +c304f710-a643-4323-8f89-5643900ab924,6,2,6,19 +06bbbee4-3339-419b-8a5c-7553dd4a1415,5,2,1,3 +8513f19f-a14f-4a24-87e8-3e51ac833558,7,0,5,5 +9886095f-9b87-4754-aa4a-7b4beff53cc4,4,0,1,6 +2e84b6f3-bfba-447b-8304-94f734796214,9,0,1,6 +b6470f49-4b99-43e8-a33b-5bce1aabb487,3,0,0,3 +b9e7bd41-be9a-4b7b-bd08-98e8cfeb490d,1,2,6,8 +6a12385b-0033-4182-8b2a-88f501e8a260,6,2,1,2 +db637eaa-7a52-4e42-9ed6-6ababa1cfc36,9,0,3,6 +88eb926e-1034-4e86-a7e5-bec922ed6a33,7,2,5,6 +ab69e76c-c761-49c1-bf7f-e92944fc6eef,9,2,0,7 +ddad6937-1a27-40fa-a890-060332337230,5,2,3,15 +144589cf-2a1f-4665-a4ce-2e90bf54b4e2,6,2,1,19 +a5037033-d436-4012-806e-6e1b01a5c544,3,2,0,4 +c420217d-f333-4f52-997c-0b1d0d96f8f0,4,1,7,15 +4918ec5a-08ab-46c7-93eb-4043606136e8,3,0,5,8 +72a0b858-b1c7-4a61-b8b8-46829420a0fd,4,0,4,17 +35bf4bfb-aadb-4548-b4a6-e01d6ea91bb7,0,0,6,4 +bb96e980-99c5-472b-89d3-12eedc435c86,3,0,6,13 +75098468-9494-4260-b0fd-7133b7652c33,8,0,2,19 +d32d60e4-7d93-49fd-a62b-4337a1ef9354,3,0,7,1 +987a2bcf-94c9-49c5-bcd1-d5e355cb796e,3,1,7,11 +b176a5a4-415d-44c1-80dd-19927339ff8e,9,1,6,13 +6800ad32-4914-422e-a827-2ba87619c686,2,1,1,15 +d1960f61-50e1-42a7-ad22-16131fb80034,2,2,7,6 +4e0a2eb4-1968-4e28-884e-1ede88d45bf1,3,0,6,14 +942d9f1c-4383-48c5-89d7-c53c7ad5f74b,9,2,1,12 +3772803d-f945-403a-954c-7a8f119ab43e,5,1,6,4 +1352ea72-181c-457c-8c7b-250d6c080674,4,0,1,7 +1101fd87-7cbe-462e-bd9b-3fb3e0bc6997,6,2,1,10 +b441bcbf-fdaa-494e-a04b-6d323a077eab,2,2,5,10 +ad7df445-cdda-4737-a5a4-b4b1ebffe7c1,8,0,0,10 +5ea49c03-897b-402d-af8c-803737cbe9b2,9,2,3,15 +848e7311-2b66-43a7-9fef-be1e24506f44,9,2,0,19 +43f7bd31-66a6-426e-b495-83300024710a,8,0,4,1 +953e02cf-2989-4a5d-b818-15f8eaf694c1,7,1,1,1 +55f6f4f2-2169-4f4b-ba57-11689e9e09c6,2,2,0,12 +0ca9100e-f24f-432b-85d5-236451f5a813,5,0,1,5 +6264e11d-00f3-42ae-859f-7c10d857c705,5,2,3,12 +c727a796-6cae-4e11-9f6e-575cd8f6ffa9,9,1,5,7 +c45f7fb0-1e57-4c07-b9af-46998edb11e9,6,0,3,5 +1c49dc9a-433a-484e-a4ec-04c98fcf2110,0,0,3,5 +452a87f3-3051-4714-a44e-8f4243d51bd2,1,1,1,2 +595e3c50-80ce-46c6-b2f8-37929c5326ff,5,2,6,1 +e0752bdf-f0fb-4f72-acac-e812aa2e1a83,5,2,4,19 +983b9247-48b0-4015-a2d7-05aa7d12b8ae,2,2,4,6 +1717c8f7-939a-48bc-9a5f-b8e74a37128c,5,2,1,14 +e1e95264-9fa8-4e9a-8352-095b08b30edf,3,0,0,15 +510fbff5-fc4f-4155-ad50-0a2abafd2d36,4,0,0,17 +3c7ee4f1-bf73-4c79-ad64-3dfa67f75647,9,1,7,13 +0ff48b59-0211-4755-9c96-fde93b647236,3,1,6,9 +4ba27137-c563-46c2-974e-3351f8b76739,3,1,3,19 +9273091e-33ad-492b-a313-31927e49fb78,5,2,5,2 +03152b83-d1b0-4381-9ce6-f921ba8e1144,4,0,7,0 +c9c3640b-15c8-40d4-9f1c-8d8991ff6560,2,0,6,9 diff --git a/kafka-students/src/main/resources/observation.avsc b/kafka-students/src/main/resources/observation.avsc new file mode 100644 index 0000000..f48aa59 --- /dev/null +++ b/kafka-students/src/main/resources/observation.avsc @@ -0,0 +1,11 @@ +{ +"namespace": "kafka.exercise.avro", + "type": "record", + "name": "Observation", + "fields": [ + {"name": "id", "type": "long", "doc" : "The observation id"}, + {"name": "value", "type": "double", "doc" : "The actual measurement from the sensor"}, + {"name": "measurement", "type": "string", "doc" : "The measurement type, e.g., temperature"}, + {"name": "timestamp", "type": "long", "doc" : "The measurement timestamp"} + ] +} \ No newline at end of file diff --git a/kafka-training b/kafka-training new file mode 160000 index 0000000..0c192a9 --- /dev/null +++ b/kafka-training @@ -0,0 +1 @@ +Subproject commit 0c192a97c879a35848554b37d35e53afc8da5ea3 diff --git a/lecturers/Emanuele Della Valle.md b/lecturers/Emanuele Della Valle.md deleted file mode 100644 index e69de29..0000000 diff --git a/lecturers/Fabiano Spiga.md b/lecturers/Fabiano Spiga.md deleted file mode 100644 index a7c6f33..0000000 --- a/lecturers/Fabiano Spiga.md +++ /dev/null @@ -1,7 +0,0 @@ -# Who I Am - -Fabiano Spiga, PhD Student :it: - -Machine Learning and Data Engineering (Early Stage Researcher) at TalTech (Tallinn - Estonia) :computer: - -Online resumé: [toxiro.net](http://toxiro.net) diff --git a/lecturers/Hassan Eldeeb.md b/lecturers/Hassan Eldeeb.md deleted file mode 100644 index e69de29..0000000 diff --git a/lecturers/Marco Brambilla.md b/lecturers/Marco Brambilla.md deleted file mode 100644 index e69de29..0000000 diff --git a/lecturers/Mohamed Ragab.md b/lecturers/Mohamed Ragab.md deleted file mode 100644 index 154e9b7..0000000 --- a/lecturers/Mohamed Ragab.md +++ /dev/null @@ -1 +0,0 @@ -Mohamed Ragab is a PhD student in the Data Systems Group. He received the B.Sc. degree in Information Systems from the faculty of Computers and Information, Fayoum University, Fayoum, Egypt, in 2013 and the M.Sc. degree in Information Systems from the faculty of Computers and Information, Cairo University, Cairo, Egypt, in 2018. He is currently pursuing the Ph.D. degree in Computer Science with the Faculty of Science and Technology, University of Tartu, Estonia. His current research interests include Semantic Web, Big Data, Large graph processing and Linked Data Integration. diff --git a/lecturers/README.md b/lecturers/README.md deleted file mode 100644 index cad26fd..0000000 --- a/lecturers/README.md +++ /dev/null @@ -1,31 +0,0 @@ - -# Lecturers - -## [[Riccardo Tommasini]], PhD -Assistant Professor of Data Management at University of Tartu -Research on Big Data streams and graph processing. - -## [[Radwa ElShawi]], PhD -Senior Research Fellow at University of Tartu -Research on ML explainability and AutoML - -# Teaching Assistants - -## [[Mohamed Ragab]] -PhD Student at University of Tartu -Research on Big Graph Processing - -## [[Fabiano Spiga]] -PhD Student at Taltech -Research on Graph Embeddings - -## [[Hassan Eldeeb]] -PhD Student at University of Tartu -Research on AutoML - -# Contributors - -## Prof [[Emanuele Della Valle]] - -## Prof -[[Marco Brambilla]] \ No newline at end of file diff --git a/lecturers/Riccardo Tommasini.md b/lecturers/Riccardo Tommasini.md deleted file mode 100644 index 153e38c..0000000 --- a/lecturers/Riccardo Tommasini.md +++ /dev/null @@ -1,11 +0,0 @@ -# Who I Am - -Riccardo Tommasini, PhD :it: - -Assistant Professor of Data Management - -Expert in graph and streaming data processing, data integration and semantic technologies - -Main contributor of the RSP-QL stack Engine, author of VoCaLS ontology - -~5 years experience in innovation and research projects \ No newline at end of file diff --git a/location.avsc b/location.avsc new file mode 100644 index 0000000..6e9b6c7 --- /dev/null +++ b/location.avsc @@ -0,0 +1,9 @@ +{"namespace": "kafka.exercise.avro", + "type": "record", + "name": "Location", + "fields": [ + {"name": "building", "type": "int", "doc" : "The building address"}, + {"name": "floor", "type": "int", "doc" : "The floor number"}, + {"name": "room", "type": "int", "doc" : "The room number"} + ] +} \ No newline at end of file diff --git a/notebook/Dockerfile b/notebook/Dockerfile new file mode 100755 index 0000000..ab91c26 --- /dev/null +++ b/notebook/Dockerfile @@ -0,0 +1,7 @@ +FROM jupyter/datascience-notebook + +COPY requirements.txt requirements.txt + +RUN pip install -r requirements.txt + +ENTRYPOINT ["jupyter","notebook","--NotebookApp.token=''"] \ No newline at end of file diff --git a/notebook/requirements.txt b/notebook/requirements.txt new file mode 100755 index 0000000..0959128 --- /dev/null +++ b/notebook/requirements.txt @@ -0,0 +1,23 @@ +psycopg2-binary + +redis +py2 +neo +neo4j + +pymongo +ipython-cypher + +ipython-sql + +kafka-python + +rdflib +tf-nightly + +pandasql + +confluent_kafka +avro +lorem +fastavro \ No newline at end of file diff --git a/obs.csv b/obs.csv new file mode 100644 index 0000000..93e832c --- /dev/null +++ b/obs.csv @@ -0,0 +1,1000 @@ +UUID,COUNT,BUILDING,FLOOR,ROOM +dae9bef5-1edc-496d-8bb4-dd63906546b8,5,0,1,13 +fcb8180a-377c-4e52-b032-1384a3b0b9c4,2,1,6,8 +3263b63d-a202-476e-9caa-e78edbe66d8d,7,2,1,12 +79b7585c-c56a-46c9-8278-b6935fb89c62,3,0,6,11 +df8d5082-c8e4-412b-888a-3292064b926e,8,0,5,15 +9b19ffb8-33b8-4df5-bc84-3ae4880c759f,6,2,5,12 +e0207da1-2ff9-419b-a8a7-7f2559e95611,4,1,5,14 +fdfa60a8-47e3-455d-bee4-76a0b0802112,2,0,4,9 +d5ca2db7-c2a2-4a9e-ace5-ff520fae4dad,4,0,4,16 +954215bf-cfc3-43ae-a70b-bb030d15fd68,3,1,5,13 +37de8704-5db6-42db-9391-18446f54c5ca,3,0,6,9 +634b3010-2a60-427b-a086-31e7a6e261fe,8,2,6,18 +57473145-4e9d-4451-931f-5bf5ac0d26da,6,2,6,14 +2afd174f-d46c-4de0-8ebb-cc540fa11ac7,2,0,4,5 +edd6ba0d-a000-428d-a70a-1723d6dc15d4,2,2,7,15 +2e6fe083-25f4-472e-96a3-4d745107d72d,7,0,6,0 +f60dd459-8eda-458e-a080-6b984f01cbe3,0,0,1,17 +69e7bf8c-931d-4cd9-a359-2fb1866f2698,1,1,0,6 +f3f24adc-5c56-48df-abad-89b826445006,3,2,7,3 +893437bd-38d5-426a-a373-bf6d57ee6e63,4,1,6,11 +1051596c-21a5-47b0-b63e-efab157c92e6,9,2,0,16 +671b82dc-7b7f-42b9-9df8-28ed844c74e4,9,2,6,3 +88789d88-c5a6-4fae-b29a-0ffc62900607,0,1,4,2 +0d3142d7-5e02-452c-9d67-48fef7dac338,5,0,6,18 +ad64e144-62c5-4a7d-bad6-c93e7ec02d4b,1,2,6,14 +8ac9edde-a1c9-4350-8a16-a6db00c64f14,0,2,2,3 +4071de10-7dc1-49d3-aa6c-693a9e6b1196,6,0,3,2 +18683b33-5c51-4178-b907-becf2347edd8,3,2,6,18 +1a72c1ac-a41a-4bfb-ae33-d8374503b1b6,0,0,2,7 +545d4eee-7849-4d24-a293-881ca3adec71,4,2,4,2 +ce7bc6af-4454-4e76-8a43-8bd7965a7c2f,4,0,1,11 +120a26f2-3119-4c4d-badd-d9bc8eb6c6d6,2,1,0,15 +4c589a4c-27d9-4deb-ac4a-96850a5c4126,6,0,5,12 +553dbef7-a38c-446b-a4ab-2427811ce0b4,3,1,2,7 +d408dfe6-b215-485a-aad3-528d4143f188,1,0,3,4 +1e1c183e-d4be-4513-8e1f-a6d8ef9a1331,2,2,3,18 +bb69bb41-b19a-4090-9539-43caee72a910,0,1,7,6 +2a237f9b-56e4-4b39-9ad5-3d7f29149d9f,6,0,3,16 +5f9dd42c-812f-4210-b330-32806673a98b,6,2,1,1 +a62429cb-313e-49cd-a1af-a8c4d280a0dc,2,2,3,13 +ea9af3d1-9639-4dc7-9614-772452b0bd06,9,2,0,15 +d9e0206a-2f06-4109-84cc-85af4b2f97fd,0,2,6,0 +cb853dd8-b88e-4573-a5ef-301b758aa846,3,0,2,8 +4c158ef7-487d-44ce-b1b1-dd66831c7fa2,3,2,6,15 +fc6e6696-d653-46d6-b8f7-1b1f0762584b,0,1,6,9 +6dfd7d4e-8cb4-47b4-a026-30ffb07fdd79,8,1,0,15 +b6a4e41e-2287-4ec7-ba92-800d83555131,4,2,0,9 +7b69f839-9d9c-4f0d-8f25-f644d7b5b7aa,4,0,2,14 +5d58f542-1dd3-4619-9a0c-bf47c5741916,0,2,0,5 +3dc47bfc-5b5f-4e86-ac6b-58a424fe24aa,7,1,5,15 +4ec4832e-e888-4c2e-9d69-f4b184279f6c,3,0,2,6 +8ac5d63d-ec6c-4593-8a0a-2dfc181c91f0,2,1,2,14 +e3daf6ab-a341-4abc-a790-e29a289f9397,1,1,2,18 +87342742-f37d-4fc9-a727-0529aaef8a50,1,1,6,16 +b1e980d0-3ef4-40a5-9f42-35c28c1336ea,3,1,6,2 +e5e1bd72-3457-4506-8b0b-0d5c13c0dcce,9,1,6,4 +0329f9a7-1a23-4de6-955a-7473804c3494,0,1,1,9 +266c8d81-4e06-4724-8269-1c29de943640,6,0,4,8 +e31b24ea-f609-4942-8884-a0428eae66e6,6,2,5,17 +402eb843-aa23-4771-b5b2-0249ee51ac81,4,0,1,17 +4cb45b99-e4f9-4d45-b6d5-dbef4878916e,6,1,1,13 +3fb02401-d26e-4ddd-961b-406d47d49ddd,4,0,3,18 +c6e9e17d-34da-4fec-9367-549b4a1a7e07,1,2,5,15 +24e47d2f-c48b-489e-9c17-b5ad0e452430,3,0,2,19 +8bdb2298-d7f7-44d1-82f9-5071916177e0,1,0,0,0 +bf16381d-02e3-4891-b117-c32b816c18e2,5,2,3,4 +e302bf57-f11a-45e8-9544-7c862c0fef55,8,0,6,3 +cfe2ace5-d93f-48c9-bd04-817343e61c6e,1,0,2,17 +76287b00-f7ff-4a8a-bcb4-928fe237b165,5,2,4,3 +590cf5c4-45b1-4a51-9d6e-466da5db43ab,7,0,3,2 +1251d399-a94b-42f0-a3b8-8564a6f50608,3,1,4,10 +07b8b43e-2e6a-475d-a261-d15b29b9154a,6,0,4,7 +549d9bf5-ef29-44ca-b1d1-bd5c61b81ede,5,0,6,13 +0b721d1d-d523-4fd2-bdb2-6b542cbf037c,0,2,7,6 +0763a9f5-eaaa-4c64-addb-c6023e98adb4,8,0,6,5 +e906ef94-3cd5-4d49-be0e-a188cfbe1619,5,0,3,5 +b01ea143-c580-4a6a-8cfb-285821cc4d47,2,0,4,9 +4a1ac5d8-40ca-427a-b854-5f9ae1580f46,3,1,3,15 +dc170ea4-6635-4d41-812b-8c72f551e87e,5,1,4,12 +49c28b2e-9471-42a6-8772-76edf833f058,4,2,2,15 +8ef67c6f-3c87-4688-8298-448d63882869,5,0,6,3 +b94315c1-4e13-41e4-80e3-af3b940f985f,8,2,2,8 +fe8a0138-28e7-4465-8008-d0835f031621,5,0,3,7 +2185f9be-cf70-43b5-bdae-8ab7430e8938,6,0,7,13 +6f8e5eb6-c04b-4213-a71d-172cfcfe0b75,2,2,5,7 +757852e3-1b28-4e7c-a24d-d0e0a3319e08,5,2,2,7 +9de43373-b1ea-4b9b-ab2b-d14927c21393,2,1,6,13 +608d0035-7595-41e0-a723-ebfc1a267cf7,7,0,2,4 +233985b3-2e07-40b1-bbb3-252ebe0584cb,8,0,2,1 +7c8c9e89-1f19-4aaa-b166-77499c179bfd,9,0,4,8 +5e7dbba4-3621-49ec-820d-b38296f6089c,4,1,5,19 +bb1e911a-6e7c-4308-9375-e80e7e32c4b8,7,0,3,10 +9c2f844f-1608-499d-952e-3b56a2fc9bc9,7,0,0,19 +c2ebc9a0-11a3-46be-9827-51ebb9c578a6,2,0,4,6 +cf671048-61e2-4b66-b8f8-9c3111bb3754,3,1,6,0 +c9d48dc1-b5ca-4b8a-8bb5-f4d675a4930f,2,1,0,16 +24094670-9bda-430b-b12a-3c82ed8f818c,1,0,6,9 +9707e3f3-e0bf-4b15-aa3b-b4de4f36df69,6,0,6,14 +3b64d2c4-cf04-44d9-9c07-1469081dbc85,4,1,2,5 +aa4b0745-dd84-4519-a932-b79e7c230cc6,5,0,5,0 +28569f65-c2fc-4373-8b94-370a173de9c1,8,0,4,8 +32dbb4d0-47b0-44f2-8781-6fdd013eba95,6,1,3,7 +9af80929-24f9-4984-be46-a0ec43b7ff08,5,1,4,15 +c201a4dc-4878-44ad-956b-0b8205955df4,0,1,2,11 +572bb5b8-6a0a-4568-a57e-054c0c092f07,3,0,6,8 +2e19a17f-fc94-46e4-aedb-a73c4e0651ed,0,0,2,8 +3eeb30f9-2d97-4994-b15d-9a6d58ed957a,9,1,6,5 +f1f67405-149e-499c-b15a-b6460cfc85f5,7,0,7,11 +70cd9f65-336c-4a31-ad5c-072669237498,4,1,1,4 +608d0ecc-c916-47c0-9e9c-193796d25549,2,2,6,8 +2e67adbf-9f86-4a10-844c-3a69ed66e0cb,5,2,1,11 +dcbd65f5-adb4-4e58-8321-51b202582adc,3,1,4,11 +9256ac74-4ba8-49e2-9138-ff0e258942f1,1,2,6,6 +59689c93-a56c-4a21-890a-adf6b017f36b,1,0,4,8 +1d9a6ca5-7a86-4672-a76d-b675680f6d69,9,0,7,0 +13b95727-4401-4492-8468-b664fbc54bca,8,2,0,14 +012c6700-7ce6-4734-a1e3-a4f3f9aa5a49,6,2,0,1 +f992d3ad-0ce2-49b0-899b-cb7d8f1cd72a,6,2,0,3 +65ef4e36-ab8b-4b9f-a41d-d7c1532689dd,5,2,6,13 +9b6d7e9d-1988-4003-909a-29cf8cd14bd0,5,1,2,4 +6dbab413-b7c2-4740-841b-ad99824b8bdb,7,2,6,17 +f7f3e5f0-62d2-46c8-b1fa-efc9e51b71ce,2,2,1,13 +94155616-bfc7-4b4f-b148-9ea6378d909e,5,2,5,2 +3e1ab79a-d071-4d5d-8276-ddc47d171ae7,0,0,4,1 +52911969-f75e-4058-9df1-a6478095ba39,7,2,5,4 +da374458-baa5-4f17-b03c-e7493d99e5cc,6,0,3,19 +112f95a4-a53c-49e4-bcf0-9ce25e9a7f5f,8,1,4,6 +b745e976-5407-44b0-84d8-fde40359fa42,5,1,3,11 +5a3b6320-a811-4c54-80ed-315d739582e2,4,0,5,1 +5fa06752-489a-470e-b064-5d84298c2e78,9,1,0,12 +7aa00621-7eb3-4034-8e7c-82d8ff79ba81,4,1,2,2 +ec280e4b-7d18-4b0e-aba7-a70e58842ed4,5,2,7,14 +b258176f-8cce-4952-bc8e-39b4ceec48eb,0,2,0,13 +af7aef87-a4de-4c8f-a7e5-639b6b227af8,6,0,2,2 +fd991fb6-09fc-4e89-be12-8cc3e4c5b348,6,0,7,4 +06765458-f6ba-4b66-ade2-b1b624b30cc3,9,1,0,4 +0df73f08-1b7d-46a3-b044-a655dcff7823,0,0,1,14 +5c17ccf4-8a8a-47e9-aef4-dff32bdbe70f,7,2,2,6 +660a23f9-80dd-439a-b73d-0644dd56d8cd,2,2,5,13 +0ec7a78c-5e2d-45a7-a5fb-9a888353b161,2,1,1,5 +0ec1986b-b565-487b-a3a7-64c66d5fe905,9,1,0,1 +77a667ba-c291-40f9-8f80-15f5775aac7b,9,1,3,10 +f18cbd95-5eae-400c-bcf3-6ea17df16ce6,7,2,1,6 +7dbed679-7838-4490-83c1-d72dcfa77fde,3,2,0,0 +2c8eb408-95e1-4e22-9ee8-4ca38d7c7e1c,8,2,1,7 +fac8c1a2-acf9-4fdb-845b-2ffa87313793,9,2,3,13 +bd4793cc-66ce-4cda-a1db-6b0a4e9156e4,9,1,3,15 +d3da5771-4ca2-46c1-ae9a-068bf2fde52c,8,0,6,10 +334be3e1-43e5-43f9-b34a-07d212a4d280,9,2,3,16 +10bab532-b948-47ef-80d9-dec25d71c52c,8,0,0,13 +48425023-4fc2-4d60-b585-774c7baeeabc,8,0,3,15 +dce49f55-e82b-48ee-891a-1fbecd7bddb5,8,0,5,10 +a3d51a88-45b8-4b93-8db4-cb2315f818f0,1,1,3,19 +ceb0e2e2-132a-4a52-bda2-d94ea033d81d,7,0,1,17 +63e5663b-17bf-4ff3-b67a-c15fa97e0042,9,2,4,18 +1c177536-8649-4382-acab-e8f3a8b849d8,8,2,6,19 +720d756a-4261-4682-88bd-99cd98b867c2,2,0,6,13 +f19386f7-dbbd-49c3-bf18-0e14a9505813,9,0,0,18 +5b4f847b-be4e-4657-8cd6-cbe8c3be5e43,1,0,2,6 +842f1f21-9b70-4739-8110-ac3027099ce7,4,0,0,11 +1f4dec1f-736e-4f82-9f73-900527076b3c,0,0,1,13 +b49f7aae-3842-4b43-832a-bfbd09e64822,0,2,0,14 +f63b397f-2149-4b22-8081-492ffa63ded4,6,0,4,16 +2012e82c-4ac5-4bc6-b5f8-17302717d9a2,3,0,7,0 +6eb5af96-4e61-4ee3-9b6e-96d2a23944d3,9,2,4,2 +e26156e3-13fc-4783-978c-5264123817a7,6,2,1,18 +09697ab8-db1f-4702-9b74-8b6ce2b66f4b,7,2,1,15 +8315345b-859e-42be-9e32-56dfc56ff354,9,0,7,11 +664cedb2-af7c-425f-942a-29c3b819f5cc,7,1,4,16 +ccc11978-94df-4e8f-abeb-876a085708c8,0,1,5,12 +f88e9ef8-0925-45b4-b288-1d5b9647a19d,1,1,3,0 +f18ca24b-1215-4423-a55c-7ab2a4500c58,3,2,6,9 +91c2b8e8-43ab-42cc-a70e-d37020bd9b06,4,2,2,2 +d144fe64-da38-4ad4-9fc9-0b7a014a0541,7,1,4,1 +af992593-a736-42d5-809e-2bf33f90b726,2,0,0,17 +95c13d8a-77e8-42e6-8b99-a99172082cc7,8,0,5,8 +67527c79-897c-45df-a587-7b0736cf8996,8,1,7,2 +73fb31b3-e9a4-40d5-b76b-1f37dadb7759,8,2,6,11 +2a3b9fbc-4f2b-4404-a9ed-9442a6dc7fc2,8,2,6,9 +ff3f3f92-2535-41b5-a757-839ab49d4cfc,0,0,5,13 +15219ed0-49ad-41e9-9002-173dc0bf0857,8,1,3,2 +01cde457-22fd-4f78-8177-c53cb81443a4,4,0,4,19 +9eb98339-7029-4efd-9e5b-f4883fe6bcd0,8,1,0,17 +bc2bb33b-7172-4663-ac00-31400aef68f3,4,1,7,18 +287bcfaa-41b2-4470-80f0-2d1dfcd9f340,4,0,0,15 +29f7e7a2-a3bc-4210-99a2-19e46af92448,1,2,6,13 +065b1c2c-e5d5-4b15-bed9-b38508e9f603,9,1,1,19 +7ee187c7-b067-4cde-abce-706dda4c5c93,3,0,6,9 +8f0be669-230e-4aec-8e25-d7890215a6e7,1,0,4,8 +c8323afc-7214-487e-a0f5-46f7697d438d,4,2,7,10 +7132cfac-0696-4a7c-9b77-382a6cd4ecfd,7,1,5,11 +16efa524-8315-403e-9b93-c7d227d406eb,8,2,5,10 +4ba44c3c-4b9d-4d03-9276-fa80dbde4594,8,2,5,9 +03c29727-52b2-4112-b579-d56beba4c2c2,3,2,5,11 +7913ca2d-c94b-4618-9393-58f5a5bba741,2,1,3,12 +60417a84-ecf3-4642-a253-7a98adc15721,5,0,3,16 +3eea5d1d-36e7-4085-8889-c4d6bb434cee,2,0,5,1 +402243a0-9065-4396-a761-e6a991ac6e0d,3,0,3,9 +25592754-5e52-401d-97f6-93aee6d93361,9,2,5,13 +707a66fd-6926-4cc7-a782-14783c510fa9,3,2,4,16 +bb62a5b2-0506-4d47-8f62-b6d84bb1b9ea,0,0,1,12 +559087b6-d895-43af-9778-03172326ac1c,1,0,1,12 +0e0ecf90-8d5b-4c24-a543-bc094db11d5a,2,2,3,14 +42d8e1a3-77bd-498e-a75f-4954c4bd559e,4,1,7,18 +c0cde665-da96-4ff9-8438-eaa56fdbecab,6,1,1,13 +615f2693-32c2-42ca-94d0-22c6376bd6cc,3,0,0,4 +1435abc3-5f6a-441f-8390-b260ee6b663f,5,1,2,19 +1532df42-c29c-46a6-9949-78c3b721832a,4,1,4,12 +2dc71ffc-b5b9-4cf5-9a81-efabbed5d5a9,2,1,5,1 +8cb7808a-51f7-4176-9c7a-43285cfdc5b4,8,0,6,7 +547ee688-2a5a-4106-bd93-7ad3ca1c7496,7,1,1,11 +5110167d-9f75-499d-8594-9505cc4bdbf2,1,2,6,16 +2b8a20ad-86f9-48ad-9169-34d64c1224d2,8,1,3,6 +fef35d84-5118-46e0-9547-39a73640eb14,5,2,1,2 +6d86696a-827d-478b-b4bd-3d869db4543a,6,0,1,10 +b10a75be-f944-4ae8-82b2-71331aa5d330,0,0,7,7 +12286b84-cc03-44ec-b417-c5800d42ad02,8,0,7,3 +18289c28-6ec1-4cfe-aeba-bfb508261ac2,5,2,7,5 +1b47836f-009d-4bb4-8647-75cb9d8457e2,8,0,2,3 +d5a60600-dcee-4cc4-b273-5cb3147f5661,9,0,3,9 +86058523-f661-4f42-8dc6-a5b5796953a9,3,2,3,12 +7351038d-8ee8-4f3c-8504-dfa09ea3f123,3,2,3,4 +5bf779be-f986-403b-8d97-db24a6e27e07,2,2,5,4 +0bcee255-17c8-4e1a-b9c1-e5fc2b10fbf5,2,0,5,1 +63e54158-c9d1-4477-9f86-5675076d17e2,1,0,1,5 +bc2978af-01a2-4db0-bf7c-85ca43cf729e,3,0,6,2 +e86ced3a-1d3d-436a-bf75-8c052721c787,3,0,1,6 +7a8f9cc6-42c0-4465-ada9-370f01e0b247,3,0,6,1 +30771e35-c26a-4025-860f-2261bfc247a3,1,0,7,10 +3a600fb8-a5dd-43f5-9da2-70796340017d,3,2,5,14 +846faf2f-fb9a-485a-8ebc-03c0af20e4fa,3,1,6,1 +d4821dc6-ad0e-4488-97d1-cdeb347ba491,6,1,5,18 +762816f0-2ead-43ee-b476-f67e516b19d7,9,1,1,0 +86c28838-4949-497a-98e6-09fe7bd23446,3,1,0,1 +716e14e3-0362-4fb9-a21a-93d8bfcc003a,8,2,3,12 +979ef36c-fd8e-4d16-a5e7-b813e32c5882,8,2,1,6 +280be36a-5553-454c-987f-6a39c3a86109,1,2,2,11 +99e47cdb-6c4a-4d4f-a2ac-7fa958d866f9,9,2,1,7 +fb1072cd-253c-4753-9276-b82adc903ecb,2,1,0,0 +993b2a5c-2eb8-4dec-8aea-10fd8ab43b0b,0,1,6,0 +2f8cd00b-d84f-4227-ba26-5cf41d7fbeaa,1,0,2,3 +b1169144-d166-438f-a148-ec494d15b933,0,2,7,6 +c0c1f1a7-8420-4440-a81d-4e9476a9ccaf,1,2,2,6 +ccb4c7e9-3e52-4ce4-932e-ce2e088964f3,5,1,7,19 +d5756e87-0a58-4dd5-9a0e-bb0a3d02ef12,4,1,2,9 +d23ca3ce-d31f-43f6-935a-2d38b96bb529,2,2,1,17 +1e59e881-6777-44df-b4b0-336fadfa7f98,9,0,4,2 +289eef08-bc21-47f1-9fa9-2f549bbca5a3,8,0,1,8 +124c5131-b73d-4cab-9458-c9778a160a56,4,1,6,9 +a346b4ed-ae32-47b2-8776-4bd0775f9eb5,1,0,2,16 +e4e37b06-2cc0-48fe-8f3b-7381f8f7e162,8,2,4,4 +1270e32d-384a-4128-9501-98d0e4f19c98,3,2,5,16 +641cb43e-1514-4833-b5eb-65001ddf67cd,9,0,7,17 +cb6c3324-b447-4fd1-9910-4a1ca6d15bb2,5,0,7,9 +33ae79c1-571d-4956-b350-eded0ce9db5e,8,1,4,0 +3c3bdfb8-96f5-4c6b-9489-258dcc0abea4,8,2,0,2 +d9e02b0f-aafa-4b50-8cf7-4ce8bcb1c0bb,4,2,2,13 +98ac4b13-d090-4805-9904-d8f9ed4be56e,3,1,2,7 +5fdabb09-a5da-4af7-a80a-598fc41f62e0,0,0,5,19 +ef23fe50-f33d-4bcb-9394-114c34062072,9,2,1,6 +18b8143a-fdea-4048-a4a6-ef99adaff36c,8,2,5,9 +a50bef64-b877-406c-9114-c5e7f803f48f,3,1,4,10 +2f87d35c-1f55-4296-bf46-29edc69a9a0a,6,2,7,7 +9c1b29c4-aad8-41b8-a75c-91403880e35b,9,0,6,15 +5893384f-e43a-46d5-8ec0-1609b6239071,9,0,3,0 +a0a55a0e-5ec3-42fa-92d9-8dc04a78f447,9,0,1,18 +f7d89179-7e1b-42a7-a59c-a16e3631a5a1,7,1,4,11 +eb4ab321-0b50-40f0-852e-006952113c73,1,2,1,12 +b5aa9d67-5f71-4068-abb9-deec65a72b27,1,2,6,3 +1f06f836-bce3-46db-a511-7b07e66f56fe,8,1,7,3 +124e1af9-5b31-4d38-91c0-95e95eb572b9,6,0,7,10 +15af9834-b20a-46ec-9ea7-c7ba18878ebf,9,0,7,16 +968ecc15-9662-4e74-8147-6f5901cefc99,6,0,0,16 +976b8974-a550-468c-a7df-e5913bf70411,0,1,0,11 +64171fe9-0671-48c3-a9c6-59c02e275665,1,1,5,3 +f07b5273-df74-4804-95ff-a22dcd9082de,3,0,7,14 +a51f7ee2-2de9-4ce4-8883-bfefe6514a53,8,2,5,14 +17519ee7-7b5e-4427-bcf4-2ac0b275b541,5,0,2,13 +b50623da-6196-43b0-a19b-a611ee824b74,7,0,3,19 +3c730002-ac2f-4dff-b896-d778e35b7516,7,2,3,8 +5640a15e-f143-4f47-86d1-23e644e18df1,9,1,4,3 +f4faf795-229f-4532-a4d7-3e8a4027fb52,6,0,5,4 +155ad45b-1245-4ea3-ad7f-06a21f9ba5b7,0,2,7,5 +e2d55ee2-7358-41f4-91af-d1da792fb91f,9,0,2,19 +10a64765-2f33-4dd0-90f0-8e99f6ec53b8,2,1,7,16 +468a4ad0-32ae-4931-ba4e-a71a8d8263d0,5,1,2,15 +688019c3-68ba-4577-a9cc-23c522b0aa4d,4,0,2,16 +1f8a1273-7422-4db2-84dc-03c331e28fcc,4,0,2,9 +988528a0-c5a5-486c-9b83-c4ad35d167f9,9,1,5,3 +e06ab573-944d-41ec-8550-85b182b0c6c8,0,1,1,16 +07e3bf78-6c62-4b16-b57d-d13d8743929c,4,2,1,11 +0085071a-98e2-4aaa-81eb-7f40be15cb33,2,1,6,8 +c1db1073-de2a-4224-9982-407293cfb974,5,0,7,5 +c8eae414-833b-4ed9-9b77-4f5bc0927649,7,1,5,17 +21e540f6-3c54-4037-98d7-ea4ed1cc790b,7,0,1,8 +95ee3fd7-679b-4f62-8ef7-b00a4b4a92b9,6,1,3,0 +436f5ded-fd92-4bd6-8c74-50f96e4c2ca9,2,1,4,4 +f235f674-6490-443c-884e-f5d9090975c6,6,1,6,3 +a78addc8-d50b-4f34-b67a-331789bef07d,9,0,3,13 +54a4bbc8-a034-404e-b2a7-87cd024ff845,0,2,4,12 +ee0ad94b-710f-4835-b8c0-f1e1f11b803a,3,0,1,1 +c0876962-3fe5-4671-a915-7e952972bf78,3,1,4,0 +7e4a8b88-e30e-4b59-a7de-521205caf7ac,0,1,1,1 +19513f54-8e3f-4e5d-acc5-50586875610a,7,0,0,6 +66be367e-e120-4b90-98fa-586fd737c967,9,0,2,7 +caf6ae4c-3599-4ff6-a76b-2db9b4854b21,4,2,3,4 +72058e31-8383-4d50-bf82-b36a95265889,5,1,5,13 +57274431-7899-4fa5-93b3-dfc943be77a2,5,2,0,8 +ab0d5aad-1e21-49c8-bb0d-cf6d4f3e5f2f,1,0,7,3 +4b7bd29d-dda4-4845-a8c4-b1828a03e0a7,1,2,4,3 +ac19acb7-faac-4ac9-9338-38d98e35b8f7,2,1,2,18 +b411462b-be2a-4425-8bbb-673489294ea6,0,0,5,15 +b31ec535-8dd3-473a-af08-880ae9ba4b18,8,0,7,13 +bcd6f70b-1625-4ec3-a7d0-c38cff82ddda,3,1,5,4 +abc751b3-68ee-4828-9ac0-a6d1d0b601f0,9,2,0,11 +6c72681b-d115-41a1-a96a-24a0e7235c22,8,0,0,8 +e9dc75a1-46ab-43d6-9a66-de8d60d6cd47,1,2,2,15 +7eaf5fe4-e9a7-401c-acf3-15406a2d6949,3,1,5,11 +a526cf54-3996-40c0-9856-aa83c6d5d916,8,0,0,16 +18589c29-18b4-4dbe-aeb6-8df188841f4a,2,2,2,8 +ce8272bc-c537-4ce9-8056-e62879cf07a4,9,1,2,11 +6bcccfbd-7d67-4986-9227-10a91afb267a,1,1,4,4 +158c9e41-9654-43d4-8b75-a148d3dc8de1,8,0,2,4 +8b5afa3b-d5e0-44d2-bd5b-fc0c6ba5c1e6,8,2,1,19 +23ad7cb1-c8db-4c20-b0a4-2acac67da15e,9,2,0,17 +49b48195-c055-49b7-9e9c-9762e7500444,8,0,7,1 +c216df38-b5d7-4220-aa1e-0bd20eba02df,7,0,2,7 +4f402ced-846d-4a2d-943f-99c633fd986c,9,2,6,16 +47f7bc92-d5c5-4d75-becc-07c3f1a39a14,2,2,2,19 +62b3084a-6b1e-4ac5-a11a-f38c358edc96,5,1,6,7 +325a046b-ef3a-4522-ab7e-4f044cd4705b,8,1,5,4 +048c6edc-7af6-4f25-9102-10c89619edb3,9,1,7,14 +f8b2fcc1-ecce-4258-a8b5-4e4da286d14a,3,2,7,0 +a08a6914-030c-4233-b075-7a33d8779a7f,2,1,1,4 +62ed3c6e-9d87-46ae-9e45-4d514b0fee42,7,1,5,3 +2b0c1eb9-7fd7-46a2-9a12-4da239213059,9,1,1,11 +5994afad-de9a-43e9-ab1a-4aacf52da835,6,0,1,1 +90f7880f-d3f5-41b5-ba49-c919246f2924,1,0,7,5 +dbd98636-195a-47bc-82e9-a7b59e3cf3de,9,1,0,9 +56292a3c-4979-4171-950e-c4792dc9d3c2,2,1,2,11 +3173bbf4-4d3f-45b2-8c40-360ad26bbac7,1,1,5,18 +afe86574-2a68-4ccb-a7e5-61139555b5e3,0,0,6,3 +01b11424-73ff-4484-8e6c-dfa82207ce3c,5,2,5,13 +8a72bfb7-9d44-4a2b-92d9-9f6a67e03803,4,0,1,2 +bd497fcd-949d-4940-afcf-55dbfd1e67d2,8,2,5,6 +50937541-1561-49ef-ac7d-82694f9474c4,1,2,1,12 +faf0ba6c-02b2-4086-ad85-a09300fdb86a,8,1,7,14 +8da8a04c-432a-4cf9-a92c-566a13f405de,7,2,7,14 +08ef300e-cb76-41e4-bdf0-8f3c3f219efd,4,2,1,9 +d6c92492-c8df-4db8-995c-20d04671c2dd,0,0,2,18 +9b20a146-2321-46e3-af22-7d59db93d0b0,5,0,0,14 +0f745354-7d21-4e95-a2d2-1e14fbc01590,8,0,7,7 +b4071bba-4ae6-4e05-b3ec-6d422c95b766,0,0,7,13 +80553266-637e-4d13-a2a1-4cad4e81442c,7,2,4,14 +6a64c06d-e701-4df9-bcd7-25b5af5764a3,4,1,4,7 +577eb697-b927-408f-9535-a15ac8ab1330,0,1,1,3 +24627c97-f2e3-44ec-84c7-1cd8327587bb,4,1,3,10 +249cf333-57af-425f-9435-c76073c97c29,1,1,7,10 +e24cca7e-275c-470c-a11b-323c47887368,0,0,7,11 +f89f0a19-84f9-4cd9-9c46-87f89f5f0915,2,2,3,3 +683d0b1e-ae58-4aba-bc43-2391520d29aa,8,2,1,13 +74f04aa3-13ce-480b-a5fb-1e4e9f65fbbc,4,1,0,2 +3afa4acf-2e0c-4cce-a263-6f9511187e58,5,1,6,19 +595c9c94-6ad0-434d-bf32-e42e0fafafd8,8,1,6,5 +641c35e6-419e-41b8-8988-45c7858a038c,4,2,0,0 +c754f990-232b-442b-b356-2fdf16f621d6,3,2,0,16 +1d974ae7-49dd-4ee4-bbcc-ece4b93a6a11,8,1,5,14 +06601f17-dd23-44ff-babc-f84fdf87e2e8,1,0,5,2 +c798cf71-1957-42e5-8c00-9fc232306c17,6,2,4,6 +2075b0ce-6f3f-4427-8e9f-33c554ab7e60,2,0,4,16 +d444630e-74c2-4603-848e-245c013891d6,7,1,2,8 +d74073e5-5f25-4552-a1fb-00205ead738e,1,0,4,13 +c89e8ad2-0d9e-4c2a-9772-770fdb2eac27,8,2,1,17 +ca1bc2dc-f947-4321-88da-c0066ba7d592,5,1,6,9 +17b002ff-eaca-42ff-8046-0140f847a8fb,9,0,7,10 +471347c3-7608-4058-b454-d1a5223dfe1c,5,2,2,2 +b08cbaff-36de-4e63-9093-9438f9ec382d,6,2,7,19 +ff722302-3e19-49bf-b152-41418b7d7416,4,1,3,16 +cc0ecfc9-064c-4e30-b002-3716a05a935d,0,2,0,2 +2db6731f-4a4d-45b7-980d-527afb472be3,5,1,6,10 +e547e0a7-f8e7-492a-b1d2-ba4ccffbd0c6,1,0,3,0 +e8ff1f74-6324-4cbd-986b-d8fec49b203e,6,2,5,0 +1dab1a0e-091a-499c-94dc-2a5792c49b78,6,2,6,19 +a069b99f-da79-410b-8720-2cc05904fdc6,8,1,6,9 +db95ebfb-9f9d-4045-ba1a-88a5830d2eb9,1,2,7,13 +0455e7d6-daf2-48c3-854d-08fc286850da,0,1,5,4 +f53764e2-5bb2-405f-8555-d0561e6fe1d8,2,1,5,5 +671faad4-8504-4f44-855a-c09d249c4fc5,8,0,3,16 +ab47f4c1-0ec4-4ee2-b78b-c87baf3b708c,5,0,1,3 +d88917d7-3e5f-4461-a1ed-60443e538807,2,1,7,3 +eb6dd61e-bf2a-45a6-8e1a-2e5d9700d6de,0,2,7,6 +bbf32df8-2ae0-4180-bc18-f9d28c17b439,2,1,0,16 +4640a5b6-9686-4df3-9edb-f35d9d9ce949,3,1,0,7 +485be11a-adfd-46a8-8a04-ff8849926879,1,2,0,9 +9db74126-2fba-4f92-b7c2-14be64e8342b,5,0,3,18 +7ca9a07c-df5c-4d7c-ad32-2a52a0e8fb16,2,1,4,17 +f5afbf2c-4741-40a7-8202-5fce6f572b9b,6,2,4,12 +5a98b2b8-9f02-4cbd-9574-2e57c2d1ba35,6,2,1,1 +b52c848d-ff16-4682-8042-9701a0970375,3,0,6,4 +77cbc2dd-481b-4e1a-9e34-b3975c3e6d81,8,0,7,10 +f42769ee-c61b-43ad-b796-2f41ca1fb1cd,0,0,7,16 +1a1b07dc-8255-4bed-af38-a1e6cbda041b,5,2,2,0 +5f8446ff-5a09-4855-9530-bbcc0c6fc2f0,2,0,3,8 +e9b9397a-eb04-4ee6-8d92-bef8f66f5bb6,9,1,2,14 +8085645c-9154-4036-8992-5fdaa4aaed92,8,0,0,19 +c3da2e13-af6f-47f2-92e0-a48c6f2af5de,0,2,0,0 +0ecd0f82-501f-48ec-9d67-65a4ea97e69d,7,2,7,16 +ee8e727d-3b80-4491-bc7c-fff70ea30f56,5,0,1,11 +1c28ddd0-e9e9-44c7-87a3-76db1b81da2c,4,2,4,9 +585cdd23-e298-4d80-877b-c16fbb4f1c4e,5,2,7,15 +fdef567f-e39c-4f54-ae36-fcd213a7bb6a,4,0,7,18 +2dc56fea-15e3-4866-acdd-9f9d3fd9d170,2,0,0,0 +2eacb10e-3b95-4dff-a947-46308c0a5abb,1,1,1,1 +efa33dde-bd19-432e-b831-1efb2c1cd1cd,8,2,5,2 +619cf346-7640-41c7-b274-093562406cbd,6,1,6,3 +f01f0f18-061d-4ac7-96b6-45dbb2b29a44,0,0,3,10 +d0965ed5-f9ab-4e06-b249-e86c26f91322,3,0,1,15 +da866b32-472f-4824-aed5-cb6e55848661,4,2,4,7 +52100206-4f2a-42d7-add9-85cc62e3b314,3,1,7,7 +b428c0c7-a1ec-4e33-b1ab-50ca424c283f,2,0,4,8 +043d4fce-4c56-46ef-9257-17022ae003b4,6,0,6,13 +952c7e67-8a88-4f31-8c75-160ea10f3994,4,1,7,12 +6aca944c-ad76-4a51-8177-ff131f70e992,0,0,3,2 +a188050f-c260-4d25-a226-3e1c7701b6c4,1,0,1,19 +f7dd6835-e774-4221-bae5-b306f113119c,4,1,7,2 +d41020fe-b812-4a6b-8e79-488a86776b33,7,1,0,19 +7a75a6ff-b673-4d58-8f88-f85c2604aa56,2,0,3,10 +b11ab9c9-24b1-44ff-bddc-66a0c4b7bff5,1,0,5,5 +f55613ee-7954-4eee-ad89-104a435f121d,1,0,6,15 +95c33b88-b374-473a-aea6-2d90eaa89db5,0,1,1,3 +f8f5323e-391d-4c58-96dc-a5f85c994f54,3,2,4,7 +3e82bf4f-64fc-4414-bcae-89391605932a,8,1,3,7 +c19ae122-6379-4307-afa4-1c0b92a55184,3,0,4,6 +bde23803-566d-40e5-bade-a484e0552592,3,1,1,3 +2e8bb56d-29e1-4ec5-8cb2-16b428818826,8,2,5,6 +d53c668a-f8b1-4abc-9cbf-4b7f85e16cb1,6,1,2,8 +0895b9d5-f3d2-4717-a3bf-39f5938d4583,1,2,0,9 +e8a23a12-9080-4957-a8f7-e1956776d5dc,1,0,3,9 +b5464f6f-8f95-44d7-97a1-ffe8911e53af,9,2,2,3 +be0e163f-bd2f-4224-8639-26fa9266d5a8,9,2,1,15 +a845b5a1-09b1-4cbe-9141-b896ec883229,7,1,6,16 +2a8f2cc3-b669-4f1f-b979-e138fdc3a1e8,4,2,5,2 +c58f4fe8-db73-4f50-a54d-6cfdcf299d3d,3,0,2,8 +6b5c4b71-7deb-4022-99a1-64b11a0c0738,0,0,6,1 +8b4ddda3-16f3-4c6e-9cc1-9eee89a64991,2,0,0,17 +81af75da-3ecc-4e5d-a677-33d81c4bbe18,6,2,5,7 +075c9248-244b-4a26-adcd-7110424559a9,9,0,7,9 +b9f62146-b7bc-4f9a-831d-c1a2524c6ca2,2,0,2,6 +2de4e04b-0af8-486b-b6d0-11b1b2499927,8,0,6,4 +bc331b02-a523-413c-947b-48af7426af55,0,2,7,8 +1c615e77-8eff-4088-88d0-bf3ddf784abd,7,2,4,17 +644ddc21-3845-47ec-bd67-2a88eee84bce,2,2,6,1 +41f73c56-1ab8-4ce9-a011-a92325172628,6,1,4,10 +ffb6b0e3-1ebd-4d5b-8e87-e7c9d1aaf0c1,6,0,5,6 +3e240868-9dd9-4906-bb23-1cf796b9139c,4,1,0,10 +c9990e39-c7f9-4f07-89a3-a3d40c439fb3,8,2,6,0 +9e1b023f-d3b0-45f9-8d00-b3696d17e373,5,2,5,7 +1eb0ae60-3c63-4245-b77e-0cd624f488a4,0,0,2,6 +9f61e5f4-37de-49b8-9d10-96106a7962ff,1,1,7,13 +cbd77939-9255-4987-9a30-13b6aa953e2d,8,0,5,15 +df0f30b9-8ed0-4f79-a29f-c82ce564912b,9,1,3,2 +4c38c3d4-dbea-4d73-9306-6fd1e67b4d38,6,0,2,16 +0dd3bc71-60d6-4eac-9a10-2e6498fbb8c2,1,1,2,16 +cd065efe-bdc7-4464-904b-7e223a398277,0,2,1,9 +a8978de4-115a-4d6b-bbc6-8064c2d49261,2,2,6,4 +6cdeffeb-45e3-40f8-85c6-4b29c00b9cf5,6,2,3,2 +c8b0b1cd-b7b7-4d4b-9a46-76a82ad45acb,5,1,3,19 +c402ab57-ca1c-4c42-a982-45e7d270aa5c,1,0,0,1 +3eb811e4-5c55-44bc-a4c3-8bda97660114,3,0,0,6 +555f7824-e6de-4d6f-ad08-7d0ac838d7ed,1,1,3,13 +b26f156b-74e5-409a-9aa2-b9de0d1b5329,8,0,2,9 +1b4d1b6c-65be-4654-8ae0-a612cdf6eee7,8,1,6,1 +24a83524-bfdd-4483-a1fd-a5124f46443a,2,0,1,1 +b2020013-f186-45a9-bbcc-1b2e1e532aed,4,0,5,5 +28ba9e8a-afa8-4b22-a51b-b894701b38e2,1,2,0,13 +5736caf1-d312-4208-8e11-a97e6ff0260d,3,0,2,7 +fe845bc4-124c-4337-ae5b-e02e8745cc69,0,2,4,7 +54368b77-f9e5-4d2a-9254-477c31ccf07d,3,2,3,11 +dea5c769-0ce8-4226-b41a-e1625f6b99a1,8,2,6,18 +25edd20d-3f54-476f-9e4e-e19200d38f61,5,1,2,2 +10d99383-6268-4edb-a12e-b14cb6fcae7a,5,0,7,2 +a9d640ac-2f8a-42cc-a07a-7bbf1f3059aa,2,1,2,16 +ca3e10f7-726a-46f3-8758-e9fb14961ce2,5,0,7,1 +4b84f9b0-58dc-4cff-a1ff-904516b27a2a,8,0,4,18 +176b984f-5375-44ce-ab7d-9195310e15dd,2,2,5,19 +d4fc6f01-3cd4-4c03-a9e4-3a5f17daca71,6,1,0,4 +bb7fcf81-f68d-451a-93f7-6e7049c2b397,4,0,4,0 +88487690-6073-41c5-b32b-e7c7436b2fdb,3,0,6,5 +4eee4537-d848-4328-80ef-e1bddd5291ff,3,1,2,18 +71be1c11-907b-4eed-9797-7ab1dc0928ac,7,0,2,3 +5b72d74a-08ef-4115-8625-a1ca47d457d8,5,2,1,18 +ed2f5df0-287e-4321-922f-ea6c7123225b,0,2,6,16 +ad46086a-3b02-47eb-8f65-591fefa68ca2,3,0,3,2 +986a278a-7996-44a0-813b-a78fa8636c6e,9,2,0,8 +3c785c31-1a90-42bb-872b-1d47eed793e9,2,1,6,14 +d10dbdf1-31ca-4f3a-8608-e59b9934e393,1,1,4,19 +c9a28608-fc6c-475d-89cb-71f740ea3f8c,5,1,4,2 +dcf5572e-0147-401b-87de-59383f9d5b68,9,0,2,9 +78461c79-19a3-4c22-b4da-09f1a9a1fb99,3,2,0,16 +1d3e3d3c-2a7a-4e84-8afc-e750a2d92571,0,1,4,13 +56c2e0a6-df22-4c8d-b834-59e2ffcec75f,9,0,0,4 +166c8113-13c9-42a9-b31c-d6cbcb735edf,4,0,7,3 +dedd7614-7043-427a-907c-08075e63f8ba,1,2,3,2 +ae9d3248-3168-42d9-be5f-cc567fa2b6c1,5,1,5,3 +7bf3792e-c0b7-4b79-9113-f0b3d5c3b9e5,7,2,2,0 +4c8afcfb-602a-4364-8fbd-574a9cbecbfc,3,0,5,14 +e0afcd6e-8742-4641-95e2-8b317b49e68a,6,0,6,17 +f9c89f8e-3254-445c-b050-d3638bdf8ca8,5,0,7,9 +f4059d6a-3c92-4355-8f2e-d839eca30c97,3,0,2,14 +365bc54d-9612-46a1-9a00-10530b9cae10,8,1,6,14 +bd65870e-e1b3-446b-b827-b0136c277cc3,5,1,1,12 +c9995afa-4b3b-4de5-8e6f-1c716e36fd88,3,1,3,9 +30547065-0d5f-44d9-bee2-9b05c91a9dbd,0,1,5,4 +78d90ac1-d11e-4d39-9e9d-009794f06ce2,4,0,0,1 +cd37ab04-a467-46b7-ac6b-5af48580376b,2,1,5,16 +7b95a33a-615a-4a93-ae56-22692dfafcbb,1,1,2,17 +0a571287-6f06-4c44-9e2f-64318950fc11,0,1,6,8 +5ab8ee97-2468-449d-88ed-c844b29a886f,3,1,2,19 +82217dd2-b018-4552-a553-f05040a4a820,2,1,7,14 +789e457a-afa6-4122-b6af-1ae10a61d167,7,1,6,13 +32519d57-cacc-4be6-9d75-472f8b72cbc3,9,0,1,5 +84bc15d3-31c5-4362-b921-3512dd6cfd19,7,1,1,7 +bf7489b1-9e25-4b51-88b2-bf87f1152534,0,1,0,18 +b93ffdd9-325b-4e6c-b7b2-fa29c74b1077,8,0,7,5 +37b04ef3-00f8-4910-b1f8-d0913dc34ef1,9,1,4,2 +3a826f1c-c5e9-4ed3-b220-f17e3b9fba59,9,1,7,3 +c1bdf662-f984-401e-8851-9403f302b134,4,2,0,10 +12f28299-8685-44cd-b3e8-c28d0d9c78fe,8,0,1,9 +60e3ec0c-585f-4893-b65a-b47b4cd65ab0,8,0,0,15 +eb7bbd8e-c273-4cf8-a7f1-ed046e8baf97,0,0,1,5 +57c75259-51a9-46da-a7fe-980ccabbf648,4,0,0,5 +24acff0e-65ee-4bef-9097-15b0dc439e5e,8,1,1,17 +d88c0083-ca3f-4b26-867d-874dc49873eb,5,1,6,3 +224b968c-2b07-4495-a334-eb5af961d64a,5,0,1,16 +fa966142-5d97-421c-917f-18900d68ee6d,8,0,6,14 +3621e513-3c69-469d-8665-c94551b1933b,8,0,1,12 +b6e6d740-deab-490c-8bff-c273a5155852,4,2,2,3 +56904703-2e9c-45eb-9ed6-1482c103ae3b,8,1,6,13 +661f7659-bd52-4d6e-82fe-b6ffda7abb5d,4,0,7,3 +871d898d-29ad-4718-888e-831d94a4a1c1,9,1,3,19 +7829c3f2-9438-4d28-8d94-0d8fdd0e74fd,8,0,5,13 +b7b45c80-cc15-4271-a59f-8f8cb0f737f2,4,1,4,18 +5875c2d0-faf9-4c4d-ba36-7534a1f69fa2,5,1,5,15 +693a70b7-53c6-41c6-8071-3b45cc6e7842,3,1,2,9 +33f762cd-c2a8-4452-80da-20698e02a1f8,3,1,2,1 +8be2e4d1-a469-4990-bca3-7c1129e3a197,8,1,3,12 +24de31fd-d418-4c0b-a2f6-ab2da2e08938,8,1,4,2 +55d68420-3473-4f87-8288-e23dacdd33f3,4,1,5,17 +83e03ab7-853c-4d6e-a91c-3935a1336b31,3,2,3,2 +0709f020-da4c-4418-a08f-f5920f181dd0,5,2,4,3 +66023dd6-0c1e-431e-a895-76c2ea8e26c3,6,2,5,15 +375b387f-81af-4bb4-abe4-e514aca2d4e9,5,0,1,19 +53d800a9-d2b5-4e9f-8f62-64ce0ac3de17,3,0,5,2 +022e5fac-3952-4fa7-a556-0c0ba0531509,2,1,4,7 +de6fce90-d7bc-4c93-8001-cf7ef77e9557,0,2,2,14 +970dc3a8-3918-4a48-8bb9-3a7e3a5d83a3,4,0,6,19 +ac513145-4f30-4690-b8dd-cf97ded13865,6,0,2,19 +3947ce95-b754-44dc-893d-93c2070972d6,9,0,6,10 +3e5a84f0-d971-4d68-a287-7d00db8a510f,2,1,2,17 +ec4ef713-1500-4fa5-a231-616a91a8f42b,9,1,5,0 +47ad5739-f319-4de3-bda9-eb8d1344eff6,1,2,7,16 +b07a20e8-fcce-44e7-bc2c-7c2e07188c79,2,1,1,15 +f155b345-f50a-4866-b764-0874edb454c2,2,0,0,4 +d655a1a2-d5f0-46fd-be97-4fc5f3271dc0,6,2,1,5 +c8f2d9c7-5a68-47b6-8f87-362e5a67c625,2,1,1,10 +37f05499-f290-4edb-b9fa-de8766557d50,9,0,4,0 +f02a8d31-aa1c-4aed-987f-6c198a0725f5,3,2,4,0 +8079a195-a39a-4d35-88d5-f409505bd01a,0,2,5,2 +1bcf416c-b2c8-4c01-a4d8-c1145853af14,9,1,5,2 +9483af60-6b66-45d5-8b59-a2372f41d168,3,0,4,15 +ce828593-4c65-407d-a6bc-2bbe5d74998a,1,1,3,19 +867a663d-07cf-4d1d-b4dd-d66ae6806f98,2,1,5,8 +6cfe3947-f904-44e2-a948-4aca907dd8b3,7,1,4,14 +4d8b0efb-67dc-41c5-9603-09724a416206,5,1,2,19 +d275282a-ca8e-49fc-b95c-fe8c88cf29a8,5,2,1,6 +139408fe-f068-4f8a-a3b5-70799ec8fb72,6,0,3,18 +645717f0-f0ee-4249-8dc7-5277645e369d,5,2,5,8 +7fc7b055-890a-4231-a9f1-30c6f1020dc0,7,1,2,7 +8bdb86dc-52ca-4714-9360-cf5a860727b0,4,2,5,9 +37337107-a05f-42bc-a5ac-b0cf5b6edc9d,6,1,1,15 +a84a6c8e-1771-4845-83b5-ec97636d62ea,6,1,7,5 +cd8077d9-35f9-4f25-beda-30d178b0561f,4,2,3,13 +576eccb0-5442-41cc-95ca-275578767c4d,1,2,7,19 +3e98a640-3662-4955-8698-83b89f9c8d25,3,1,0,14 +d740e911-9192-4e18-845f-1a10f04bbf82,8,0,6,18 +1e80498c-ca27-4869-9a08-1b072f8059ee,2,2,4,6 +43bac767-1882-4e93-a4bb-7f20e1687422,8,2,7,0 +21813b57-400e-4abf-9c67-3e984015344f,6,2,2,1 +4b418636-f3e3-43af-ae9a-352f3ff3ad22,1,0,2,1 +4e6f3a35-0b92-42cf-97cb-44bb5e884bf8,9,0,2,2 +f362adb2-cd82-476f-8239-9109df799865,5,2,1,1 +365c9ec4-02c7-4f98-9911-e54d74c18307,3,2,3,0 +0976b49a-58f7-4138-80a5-dd868db8c388,6,2,4,19 +4391df68-ddcd-41e2-8f7b-ef660cc431a5,9,2,5,16 +fbd0d362-343b-4ad9-8f90-85c4d31d486f,2,1,3,16 +48ca27e3-0288-426e-874a-3f194d3a0e49,9,0,1,13 +355c19dd-a8a1-4532-a4ee-98b1b0bf2e3a,9,2,5,11 +40d642c4-f91e-4172-81a8-22938a1e2a34,0,1,3,0 +3e1f5000-f5d3-4b03-bd2b-c84ec4054dc6,6,2,0,9 +0b5c3f20-0e72-4c26-9bd1-f179052c4574,1,2,6,0 +9a864cfe-6ef8-4747-bc43-b1b10028834d,5,2,6,2 +15adccd7-ad17-45b5-a61d-ed21b8787c23,7,0,6,16 +29ed2476-b3b6-4ee8-bcd4-4dc7a8598f3c,4,2,3,12 +0130ac38-9d5a-48a9-ae34-eed48bd0109d,0,1,6,4 +477239b2-a3c6-478b-8219-37d3769f4dc2,7,2,1,5 +565b42b3-8923-4f9f-bf0b-1b9c365aff20,9,0,3,16 +dc611cf5-d479-41f2-bd55-de7203e23242,5,0,5,9 +094857c3-5a8a-4b7e-b26c-013252b9a3c4,9,2,2,17 +066d8a9b-d3ff-46c5-8e83-3ede0d6f9f11,2,2,3,19 +749a2ecd-6f10-4b88-a5ab-e97e7f8926f9,2,0,4,2 +78035bcd-36e6-4abe-86a6-905e372766fa,5,0,4,2 +bb9ed34e-f99e-45aa-905b-a6edcc8470f5,6,0,1,5 +8825164a-4858-4092-9096-707aa0397895,1,1,4,12 +0d33dd12-6897-4893-84d0-59c4051c5b43,1,1,6,15 +4e2adae1-f460-46fd-82df-e7b1129d0e28,1,2,3,16 +4f0f6293-df44-4c1d-9ab9-872b747f2cd0,0,1,5,8 +50df13b2-01ca-4726-97e1-67a5f27c754a,4,1,4,8 +8fac9d55-8c4a-4e4b-9e42-68a498605f9a,7,1,4,16 +fc2cbadf-1e0d-4de3-b41d-ddee08ff59f7,9,2,6,18 +0a10bb26-095b-4e94-b525-110ae95b3879,4,1,2,7 +245860eb-0de7-4f5a-954a-aba5199daf1e,4,0,3,17 +43867cf9-642b-41ea-a237-1da65a418744,8,2,0,15 +fec53094-042e-43f2-a8b5-3e8f20fa5136,4,2,3,0 +fbb1cf52-1ee2-4683-83ac-1ddab5cc52f1,6,1,1,17 +4a0b31ce-03d2-4f4c-8580-de4c7ed3439d,2,0,7,13 +b55c71ea-2b08-4ae8-b0e5-7d6b4a3b9fbc,5,0,1,3 +5d4afb31-fdb6-423b-9ec2-e5f582e31e31,8,1,3,6 +716ccab5-a552-4435-b089-471723041d47,6,1,5,17 +bd711523-b582-48cd-ab7e-805f43f76da7,4,0,1,17 +d4c991af-fafa-47e5-99eb-ed8a393c8ccf,3,2,1,0 +833618e8-680c-46b2-998a-e47c7db7be29,1,2,6,11 +fda372d4-261c-4ee5-85fa-cbff135c8b6d,3,1,6,6 +47e48bc5-718f-4844-9133-8a817b09c92e,8,1,5,9 +0894899d-0d53-4f37-85c0-7f5163c26615,6,1,7,12 +f1fb8d18-efe9-43a6-a570-df4a06f64713,7,1,2,8 +a29b918d-43f9-46d1-9e75-28b93b157bbc,4,2,4,10 +5290f6c1-e4f8-4ab5-9927-7dc564a43532,6,2,6,8 +6d79ff84-909f-4e0a-a543-f4d73cbffe00,0,2,2,1 +f86798d1-28fb-4d3a-89fc-1c2684283eb0,5,2,2,3 +45c3b873-5990-4078-8455-d9af3df131bd,4,2,1,17 +160eae41-116f-4793-a3d0-e9416c25e70d,4,1,2,7 +070bc7ff-7d8f-4173-8718-544586d92455,0,0,3,19 +429d64e2-76d7-4fe8-90fe-2ff5f3ded1f0,8,0,6,18 +4fb2243c-28ea-4f2e-bd55-c90add1ee921,3,2,4,0 +0387dcfb-c588-4fec-a359-d0e97d783a19,4,0,4,11 +311cc609-769a-47dc-80eb-bd657e094f38,4,0,3,19 +bb828f29-7b28-4044-9475-028464b193bc,5,1,7,9 +1b2d92eb-0601-4f6e-bb27-09c27d4e121a,9,1,5,17 +69a38e03-be8e-4365-876b-06d711027fe8,5,0,3,2 +1463d4d0-1f99-4f23-949d-e1ca00babcee,5,1,6,19 +5fbf2a0f-1b9e-4d0c-a61d-b973c7d3dd4f,8,2,3,8 +2cf61aa8-59ad-4db7-87b3-a6630ad66037,3,1,3,14 +b1e0a20f-61ae-4868-ba48-fb07e510df82,8,0,0,19 +18b36a46-db5c-48a6-8d95-29d7183d2e25,3,1,6,10 +a6df5ce2-4c5c-4e9d-be2e-fac95aac1de0,3,1,4,1 +11a557c2-c674-4a95-ba6d-10633160a707,4,2,3,4 +43640338-2141-4670-b66c-97529cf46916,0,2,2,7 +12502564-8796-496f-b93e-87834ed5e79a,3,2,1,5 +fcf4473c-ae78-4178-8ba3-0a2c2a73b450,6,2,3,11 +f562e6db-46eb-466c-9ddf-b3b48845d9e9,9,2,1,5 +92714eae-1af6-4465-b3da-cb29ef392f02,0,2,2,1 +1cfcee5c-78df-495d-866b-3c332a43c701,7,1,7,17 +280b9fa3-f5c2-4cf8-9f36-a861dd0536d4,2,0,7,10 +9a317774-45c7-46a8-a14a-7d640611be26,0,1,3,3 +f267e275-815e-400a-9df3-20a96ee2236f,5,1,3,16 +68ae115b-fec3-445f-a5cd-213f89e7f390,6,1,0,6 +d9ef1ced-eb1d-4f71-b23f-11d781da430b,1,2,4,0 +dafdf136-b5b5-44f2-a0ba-924f0c11b46c,8,2,1,3 +1c205c12-ee5a-4ff1-a1a7-80dadba29f34,2,2,3,19 +540c07c2-b4f0-4ee8-b963-f8ae266ae199,4,0,7,19 +79f19c17-725e-4b22-baf2-9b24b8487814,4,1,6,10 +4f2e7779-d5ef-49d5-9e3d-d5603af1e84a,1,0,1,13 +8243f778-82aa-437b-b486-e66942b9606c,6,1,2,13 +46297b6f-a7ea-45ca-bcad-e90bacbc21af,3,2,1,2 +150a3ef0-5a87-4ef3-a8f4-7d72d57286d1,7,0,2,15 +3131ff41-3492-4690-befb-81c7954cce89,2,2,1,13 +dfb27a65-38db-47af-9b40-990c312b07fc,0,0,5,15 +490942ab-7210-43d8-85be-72a788f4c0a1,5,2,5,18 +5599b824-d0a5-44c0-bd6e-c337d6d0606b,0,1,6,13 +9318bfe7-aed1-4da0-975e-b28d46a753a6,1,0,1,6 +6c1a4ebf-d208-43ea-a42c-81492aaea715,8,0,5,6 +71f94918-09a3-4881-9e4c-0d312b99fd89,4,0,2,11 +5e777bd5-c470-4033-abc5-5c2f81526fc1,9,1,0,14 +64f0f59f-3503-48d1-8506-e5fe2844eb56,2,0,4,10 +265c63fc-ebef-4dd7-a85e-4e6063b07df1,9,1,3,1 +e09d3bf7-aeac-42f3-8198-a4470ea2b18b,1,1,7,11 +b7236fc6-47d1-48f7-b1d5-c2b033f5776b,2,0,0,14 +35daa92b-7807-48ed-9419-5d26fc730d29,0,1,5,9 +bca26262-a79e-48a6-98d0-a04a44a2e853,2,2,3,3 +e3093d00-86e7-429f-a8ca-7ebad8384bdc,9,2,3,13 +b013d589-1f5c-491f-8f08-c788b4f1e550,3,0,5,0 +8ae2ceb7-7f97-4f94-b338-93122b33a1f6,2,1,3,9 +887c424a-57ec-4f4d-96fb-b31483d8c717,7,2,4,14 +5b6d9b1d-415b-4ffd-8620-7490ba3805d8,7,0,6,10 +7b9a8c8e-3c63-4a9d-beff-053dc3211097,7,1,6,11 +0b8246a9-da53-4ee2-b5e6-44ce336b8272,8,0,4,15 +c37fa362-db2e-42f5-bd25-e95b1781e3f4,4,1,5,11 +c0ba7b8b-e28a-43c6-86d3-6f792e037c00,8,1,7,9 +00b3bf53-8e4a-4bd9-b52c-40803ca4aa6b,5,0,5,17 +a1ab850f-daae-419d-a42c-6fcc732d02a1,3,1,4,1 +be63d8b0-4d80-471e-9aee-e4d9fed7190a,6,1,0,1 +8dfc87b5-cedd-4806-b470-a581f90a3737,7,2,0,5 +350ed97e-8eba-4272-b8b2-82f3b6165e48,0,2,3,5 +bee72dc0-8dbc-4344-8e11-367246e4ebe0,2,1,0,15 +564e36df-6353-4e2a-a2f3-a818d799a8dc,9,1,6,6 +6a1729ae-3586-4bab-832f-ac6fc3a3ea75,2,0,3,8 +6c21f224-4b53-4789-929e-4dce834ac8e0,2,1,7,4 +47efcd50-d554-4960-8c43-f3afa5b8f4a7,0,1,0,5 +a4f5db6b-2bca-4776-b3b6-226e55364a1a,5,0,7,10 +4b2a7253-7675-41fa-9c4f-795c140c5890,7,0,2,5 +493e96fb-f220-48fa-bd31-e1b65abcdff1,5,1,7,8 +c7c41263-e74b-4966-b38b-17d1050ecbca,8,0,3,13 +85405d8d-a8b2-426d-8c8f-83128b63e026,5,0,0,2 +19ae0cb1-348b-4721-9645-c08df5d825d6,3,1,5,5 +6cc92239-e406-48f0-8d4b-9e83305d9ebe,6,1,6,1 +206c07f8-2481-40a0-9aad-b02ddd6ff1d5,4,1,2,17 +3f5588ea-9d5f-41d0-acff-b130da9e7e37,3,1,2,6 +792c6ff1-9336-4dd5-bf3c-d9b858383f9c,3,0,6,4 +b5972461-6a9c-4757-b7ab-be4fc1c34188,4,2,3,6 +4dbd4d23-1e96-4dd9-b087-2106552805e4,4,2,4,1 +f87121eb-8411-4a01-a04f-f5cdb889d5e5,2,2,0,6 +c80d69e0-8170-471c-857f-cabf6cf52ec6,3,0,2,10 +3723c975-6745-4214-abcb-c5a755cfc15b,3,0,7,13 +87fd51d5-74b9-4991-a0e6-3ad359b8da78,2,2,4,15 +f38d7a0a-d9e7-413e-b786-bf7ccbc31cdc,3,1,7,14 +0408c05c-abf3-4989-ab9e-fd8c0c1c25ac,0,0,6,2 +5f91b3d1-ff3e-4b33-9399-42460c85a734,6,0,6,4 +80fbbf72-1a5a-4599-a6fb-cd94f16f48c7,5,2,1,18 +30c2a33f-f0db-4f1b-905f-204cd65c8d51,0,0,6,3 +9b7fbf03-a049-4fcd-bd1a-7c64291efd6a,3,1,7,15 +2c341a1b-0912-49f5-aea3-02929bd0c5a4,9,2,7,1 +8c5aedb5-4a6f-430f-bc6c-d2e921885c8c,6,2,4,2 +ee51d1cc-1ebd-4557-8208-598ad676af89,0,0,7,11 +a7217e7d-9960-4c00-8aeb-b98267bca27d,7,1,3,2 +f266c813-2a09-4266-9c05-095bd0086701,0,1,7,3 +483dc5b6-f8a2-4c03-9d00-6ce24892d04f,7,2,2,1 +34f34775-9e65-4942-9921-ff260d7df121,0,1,2,17 +8d178d47-8d8e-4bab-b341-9fc539d29faa,2,2,4,11 +7f67aaed-3db8-4565-aad3-dcfebe67ddb0,9,0,0,3 +73f8ff59-74f8-4557-b6a6-a7c23d19a2bb,4,2,7,17 +8d5a288c-1a8e-4b16-914a-df6da969ca2e,9,0,1,15 +b9fa3571-5f56-4a69-ad82-2cdae8c4a76b,6,0,0,0 +88612bd7-9dc9-49d2-861b-bbd07b0dd521,9,1,1,12 +bae96fe4-2b30-4434-80e3-b428991d73a5,4,0,3,4 +4b53c02c-9389-43fc-93c1-02ba680963b4,1,0,5,9 +ed4369c4-ecde-4254-9477-efb6aa4425f3,7,0,5,4 +5f988e23-acd6-4b2e-8388-9379ba6526ed,8,1,7,7 +54c05592-fd5a-4c85-bdf0-b8fd084233c3,3,0,5,5 +96eb296e-ab4e-47b7-8a5f-293a3df27a63,0,1,4,5 +1a67ae69-8e14-4771-a30c-0fbb7d00dfef,0,0,0,15 +ad08a7b6-ce0f-46d2-960b-89c1e5fc3053,5,2,5,15 +b7d81688-fdeb-4fe0-9432-daf41db1bc37,6,2,4,5 +2c6c6efb-ef09-492a-8791-2fe5502721fb,4,1,6,6 +4f113dfa-f1f8-4daa-ad2a-ace2c38b34b7,3,1,0,5 +c8782977-76d7-4e29-a61f-7eee61e5349d,0,1,5,18 +5f3832b5-fcbf-4db1-b2c9-1aa770fde72d,6,0,4,9 +271cabdd-1a06-441c-b0df-e9f014e40433,4,2,4,0 +2c6f8ba2-3c6e-40c9-bf0c-32555d2338a3,1,2,1,1 +bef62990-8318-43a0-a193-ee331447a6d1,7,2,0,16 +88959d49-ab71-4d7e-98d8-b76f721bd3a6,8,2,5,18 +f40118b4-62bc-4d4d-85fd-2ae30a1cb2ca,0,1,1,2 +204e3d71-ea61-4556-9611-ebbac1acfef6,3,2,3,10 +6276ce22-61a2-43df-87b3-3a36eb882bb1,8,1,1,5 +431b69a9-bbac-4c94-8766-95a260f4f81c,4,0,4,2 +7f9e3c13-f6ea-410f-a02c-8a9967bd6531,1,1,1,17 +4cc55c12-4cbc-40f4-b284-8f3de8c91bb2,2,2,6,12 +da0f4a40-b056-456e-bb84-f92b8c6d1759,1,2,2,17 +e43ea46c-5a41-4aa0-826c-684d4a5a67f5,7,1,4,18 +ff659a80-a7e6-4523-abc5-d479fad41a90,8,1,4,8 +8a9bc47e-995e-4f5e-868e-24be0f2ed49f,3,1,0,2 +a3a3647f-e902-4b30-b307-5ba5d3fa2a2a,9,2,0,3 +984dbd71-65d4-46b7-a0b2-ccdcb6ba1d03,1,2,7,18 +1e249804-f321-432c-b8d8-7ce3ceab50ff,4,1,5,18 +074684c9-8d1e-49aa-bb17-a7094bc3b5c8,6,0,7,15 +6f458a63-a519-4eef-9dbf-38c5c21ca345,4,0,3,3 +afd88348-79a8-44e3-a304-0a5877c17fb8,9,1,5,0 +0c07928c-a0db-4500-ad97-8943851ab110,3,0,7,4 +5d5e93d8-0230-4c85-b4e8-047d91c47f4e,4,2,0,13 +a52e1320-9cfe-45d5-ae4a-a29c883d53cb,9,0,1,6 +3b548919-b327-4521-87e5-c5231b337654,7,1,2,11 +c2927c4b-311a-4efb-b402-2db00f051670,1,0,2,0 +9a3e0edd-1a40-4e7d-9641-2d44a6ebd7b1,1,1,7,18 +60e942e8-7525-4bc2-b787-78b1b3c9d9cc,2,0,0,1 +0e4aaef8-63a0-4858-9bae-160b0d9cd920,4,2,5,7 +61365dd7-98ab-45e4-980f-dbf00b801b61,3,1,6,0 +d0ddd7e1-f265-4692-8c21-bddc70974cd4,9,2,0,15 +ffc8d403-6434-4a57-99e5-84174dcaba65,2,0,2,8 +203f21ee-a6d9-42e5-a60d-25423b043c30,9,2,1,18 +2e5b9dc5-208f-47ac-b326-cb89698ac8f5,2,2,3,3 +778ce3f5-5301-429a-b482-599df11146a2,3,0,4,17 +58987a44-44a4-4a5f-8821-e428501371a6,0,0,7,14 +67f371a7-e196-495f-bc77-0bd5ce16ed89,2,0,7,10 +746fa1b4-4fa0-47e7-b621-1f808f3b23c2,5,2,2,3 +452f8e17-9028-4201-8222-0c18f37e1828,8,0,4,1 +bd847f54-5ed2-4fe1-a562-24905e621ecc,1,0,1,0 +cc90a6a8-35eb-4803-ac0d-dba7d2554c83,3,1,3,8 +d6ab1748-6a5d-4dcb-bc4c-564af34f857c,3,2,2,5 +ef19d505-d805-4a8a-a0b9-084f81720012,9,2,7,15 +5e40b0b7-6436-4330-8d2a-e4fa6d56f03d,6,0,2,13 +fec4372b-30b9-4b6d-9a2a-e783ade289af,1,2,7,13 +f74aab29-4ef5-41d1-8e76-81e015e563b2,2,1,3,11 +ea9345ea-cb6a-403d-bc86-f78eacb6a619,0,1,3,12 +26ffa513-9cce-4a0a-84fe-968d3cbf7fa2,2,0,1,4 +84913419-e2e3-45fa-b25a-7863ef7d8744,5,0,3,9 +80b50280-31bb-4b8f-8eba-3f0413d28c1e,7,2,6,10 +f0c7e7a0-e643-4809-b796-7387e4db9ce1,3,1,6,6 +412dc0cc-16c2-49f2-afe6-a401ae6a65e5,5,1,0,0 +34bcff3d-4cc5-448a-b552-e12ed09c778d,1,1,7,9 +6d222f2f-d0ea-45e7-8e99-6437c24aa22d,8,2,7,12 +06b84368-732d-4711-b7da-d1d792905bdd,1,1,7,9 +abf2da7b-b170-4e26-85bb-4b80417ebd46,3,2,6,10 +e5971fbe-25ac-4d58-956d-c7019907280a,9,2,4,13 +a197236a-7f47-4555-ac82-89abd920d340,3,0,0,5 +f6697dbf-18cf-4026-9a6f-24e007145475,7,0,5,5 +75e95192-7b80-4efc-b04d-448423583e34,5,1,2,13 +ac99c2d5-89d1-46dc-9fc7-62c9f8fd3689,6,0,4,4 +1ab646ad-ec54-434d-8f4d-fbff49c446e0,0,1,3,14 +4b862c40-4b59-462b-9b47-a0b3e92ca8c9,9,2,5,15 +58c1e2ad-18e7-4a1d-8346-16339c1bab75,2,0,5,7 +35b99bc7-bb1f-4790-8da6-3b311b8ea513,3,0,6,17 +33f35876-19af-45b0-b815-8826f7f0a76d,2,1,2,12 +013b9df9-2a3f-4cfe-bb4b-ce5064d8073a,5,2,0,0 +ccaaaf64-4397-40d6-a8ba-0a0dfde6ca9a,6,0,1,5 +30d63763-ba83-4402-a66b-0df3d297f656,5,1,4,13 +9479f50b-dff4-4c40-b1d5-d838e6138ab5,9,1,2,2 +fce40c86-4c2e-4c8a-b34c-6a3b526632ee,9,0,6,6 +f51010e9-2757-4be2-bed2-66b4a4cab15e,4,1,6,15 +150ae568-8207-4df9-ab75-892ab62a2cc0,5,0,2,13 +ebbf442c-0e84-4a21-bad5-e9c37412b8b2,7,2,5,16 +c084712e-7812-44ca-8707-1d5bd0bbf19c,1,2,1,19 +aced2774-5e49-4345-8a0e-44fcbec14655,4,1,4,17 +bab51bdd-6272-4273-a0bc-0bce8718edd6,2,2,5,1 +1319819a-3bdc-4e8f-a922-4cb52b006b91,1,2,5,11 +eadea757-cb85-46be-9a00-ac36aa2ffe24,1,0,0,4 +5beb942c-751b-4580-b56c-a78732b4db6e,0,1,6,0 +263733df-2b08-4da9-b096-a06271645f0b,0,2,3,6 +f482864d-81e8-4ce5-9709-2e8c2a0ecad0,9,2,0,11 +24235fba-8210-41d8-8af7-088c2fd13137,8,1,2,5 +1f5698ec-8e60-42e6-b91b-59d2eedd62c0,2,2,0,11 +34a66775-67db-4bbd-bb41-297df89055aa,2,2,2,2 +2cc5ef83-e15d-49c3-9712-59abfc06c3bc,7,0,3,3 +fe68d081-5422-434e-a93f-68fd236b0616,4,0,5,0 +5e81951c-8de2-4114-b5e6-2cbe8471a2a5,3,1,2,8 +be652962-6d63-4215-a4fa-2d8012bd0767,0,2,2,18 +10c8668a-88a5-4257-9a1a-28fe536f6106,1,2,6,0 +37070bb8-a9ac-47d1-b3d0-d6c04a275778,4,2,6,6 +acdb55fb-b64f-4a0b-97fe-37cc971a6cda,6,0,6,4 +c9d20e17-f0e7-4fa1-b5c5-8ff7bff5d23b,7,2,0,17 +abb0afb8-68bb-499d-bef2-431ad9da3d41,3,0,0,14 +68a9f4d1-a11f-4bae-a561-2463cf7b13d2,8,2,1,18 +2130b4f2-a078-462f-a4fd-6991a92a8ed5,7,1,2,10 +4cb4763a-99c0-438a-bfd1-a84afd734750,2,1,2,7 +dd9a2f5c-3f99-422d-8dfa-31488412c2e2,9,1,1,3 +3249992f-459f-4efd-8e10-373caa1feeb0,3,1,2,6 +16c8dda3-c44e-4bf9-9088-c305a6a42928,4,0,2,0 +3601559a-bd87-49b0-835a-71f0f82302cf,0,2,3,14 +f5c08ebb-4fd0-4331-9a58-2989d7a5ea12,0,2,1,5 +01966f8c-5608-4baa-b312-8071fd2329f6,4,2,5,0 +fb0b088e-7ac6-40dc-8a71-0dba370ee63e,9,0,5,10 +ddc210b6-b6d6-4dff-b711-37be64d63aa4,6,1,3,13 +54bf2854-f06b-416e-899e-fee34633e906,1,1,6,11 +2f50238c-895b-4252-8283-edc00fed5f76,8,0,1,15 +5f59df43-7610-44d1-a4bd-9cff4b296d1d,5,1,5,11 +68a29ee3-e867-4575-8505-a78e58fb5492,5,0,7,7 +ecc0064c-c97a-4e7c-aece-a2ef3baced59,4,2,0,3 +372c592d-ae57-4f4d-ad37-adf1f357dbec,1,0,0,2 +5ddc60df-e616-449b-be11-088f4704af45,0,0,7,9 +809a3c0b-b2db-4af5-a3e8-524e2cd92cf3,9,2,4,12 +36726d98-f29a-47cf-a6a4-b39a84836e88,9,1,6,16 +9d04c43f-1837-4ea2-9f7a-734f6dee5ff7,9,2,6,16 +225381bc-632a-41be-b070-ed0302989049,9,0,6,17 +0e04494a-6faa-4981-bfe3-140ab45470ef,9,0,1,4 +32c4ecb0-b897-47a7-aec4-a5ab8f6bfdb1,8,0,7,10 +c8e26747-46e1-4561-93e8-8f01390a5184,0,1,6,14 +6a153058-11ec-43e3-b0d2-f7f6ef36384c,8,0,5,7 +fb4b8b4d-6642-4045-9083-e84c35a467cf,2,1,1,9 +a34570bc-c673-4074-96cb-8caa3aae6a2f,1,0,6,16 +97bf0dab-b438-4a5f-a739-90855a92216d,5,0,7,5 +ffcd57a1-cd08-4a2f-907f-de2f7be480bf,4,2,2,9 +5c1a086a-3499-4f9c-adff-dc58f45e0543,2,1,1,16 +190497d3-d7aa-43a3-854f-795dc5c5ad01,0,2,1,1 +776e77b2-9704-447c-aeb8-11f3011405ba,4,2,2,14 +54410ec7-19cb-4de2-ab7a-772ece2e11f0,5,0,2,18 +cca2f398-ab18-4386-892c-f7bbe109f2db,7,1,1,3 +4c635eaa-5e53-4a65-9005-75af695bb502,1,1,7,17 +ea286576-90cf-42fa-b5d4-1531d7cb6206,3,2,7,15 +1d6f96d9-4870-460c-9049-10ba749b69c5,7,1,3,8 +2a1fd7da-0c9b-4992-9a50-6d0b15ea7c2f,4,0,1,1 +8f375a42-4ab3-4d2d-8453-e46c88a905a9,7,1,7,8 +0feb0d26-1559-4b00-a1dd-de6417e174bb,7,1,0,8 +ac1eb08e-5c2f-423c-b356-37865d087311,7,2,2,7 +ea8a283a-c359-48e1-b429-ad52823fcd17,3,0,2,3 +12b26448-6076-4baf-ac70-429057e7b0f5,6,1,2,9 +fca15ef7-8426-491d-8527-faa396d6be2f,2,1,6,15 +a8a370d9-6a6d-42ac-ad02-b0cc564713c4,4,0,6,5 +5dc5d14e-875d-4a16-8d78-fed73a09a964,2,1,0,0 +6da7bb4c-4e0d-4a2e-8366-6c8b8fd7ce55,3,0,5,5 +a3e6293d-57a4-4f61-970f-49df2fa01865,0,1,6,19 +3aaa2109-23a4-4a84-90e3-bc2e74007845,9,2,7,14 +ab1ab04b-8b8f-423d-980b-ecd3f94b5509,4,1,3,0 +704f5573-920e-4e9d-bdbf-1ace0396f92b,2,1,7,0 +6db60fca-4261-463e-a8d1-23ebd0289d7e,0,0,1,8 +2eba2b2b-c572-4198-8982-bdeb86263397,3,2,0,5 +f902d297-bd05-444c-be19-489f22d50104,7,0,1,9 +2f349c0c-afb4-4453-b19b-bb06c5fcacd7,1,1,2,8 +d21ad56a-4f32-48d5-a378-df6c746e3c14,0,1,0,11 +a8c08390-aaad-4cbd-92c6-f23e8e8d434e,5,0,0,7 +ffbfe395-4363-458f-8d92-60fc5cf02106,4,0,3,4 +5ed203d0-fb4a-4313-ac9d-b4a4fc229153,2,1,1,11 +b3fb82a9-80eb-41cf-ba9c-e0b01898dbf3,7,0,5,10 +1fb15675-4108-4b56-b507-aaafb8be5c4d,7,0,0,5 +45f254b7-008e-42a2-ab9b-2da8aabd8b6a,3,2,4,15 +177b6126-8955-42a7-b0cd-44a9a7e5852b,8,0,1,14 +efcd7011-6a16-45b6-becf-a71c91fe4bc2,6,0,6,18 +62873376-9766-47d9-9e26-1fab8c3e3d9d,6,0,6,1 +e8ce895a-115a-489f-bb8f-6bd578251c3f,1,0,3,17 +bb46ea36-4eda-4c9d-871e-ecddea83b5d6,1,2,7,17 +04ecbb5d-3855-4e90-a290-a515d552bb94,6,0,4,12 +54be9c90-8846-42ed-9b11-8ef554399948,8,0,1,17 +2d1d5869-040a-4ffd-ad98-8719f2270e57,4,0,4,15 +ef1752b3-7562-48c1-b9b3-3383908e1b1e,7,2,0,6 +429aa251-6b85-4ad4-81cb-45b4ae7de28f,1,2,4,12 +35f92784-aca2-4427-a2b8-1292d8e77e0b,9,1,2,0 +b2453a90-fb9c-41d4-acbd-4f1c7cdca625,4,2,1,7 +3a468839-2c21-413f-b472-fe4054944df7,5,0,0,19 +68e5cb78-ae78-497f-9091-20931d6f918a,8,0,5,0 +3b7eda9f-39b9-4e1d-9fc9-7744491f6459,5,0,1,6 +8de093af-e53a-4b8b-bc69-2ddf1bfac314,8,1,0,1 +63a8d386-08d2-42a1-9485-b83054bcda00,0,2,3,14 +f8786f31-4e49-4e3d-a9b5-acc4840a5cde,1,0,7,5 +bfd4e20f-33fb-4a42-b9a5-b1a87fb56f98,0,1,4,3 +344ab874-793c-418b-8317-6915663e84bc,8,1,0,11 +7bf49a9d-15ed-44da-a7dc-72944d6238a3,9,0,3,4 +ff3c9f06-e841-4646-952e-36fb84f2416a,6,2,6,10 +7876e32a-c304-4cdd-80a6-551e9ccc3db4,5,1,0,9 +49c477c4-ff4c-4061-87b7-907bbf5b1c69,4,1,6,6 +f90acef6-6303-4678-876d-6342bf1117bb,0,0,1,11 +c2d38a15-adae-4359-be1f-6024ef92d53b,9,2,5,5 +bb6266ad-969c-418f-b0c6-9412a2cd182c,3,1,7,11 +1f60a974-05f4-4f7e-8eea-dc5193022951,4,2,1,19 +8522d15c-bd24-4ff4-b121-32da9baee12e,7,0,3,18 +4404d152-ccd7-4aa0-9679-c5a0c151d856,4,2,4,16 +f98e03f3-b81a-4686-b630-38d2d3035e67,2,2,5,16 +abb1095c-3f72-4f21-be71-cf3e530e6865,4,1,2,2 +c304f710-a643-4323-8f89-5643900ab924,6,2,6,19 +06bbbee4-3339-419b-8a5c-7553dd4a1415,5,2,1,3 +8513f19f-a14f-4a24-87e8-3e51ac833558,7,0,5,5 +9886095f-9b87-4754-aa4a-7b4beff53cc4,4,0,1,6 +2e84b6f3-bfba-447b-8304-94f734796214,9,0,1,6 +b6470f49-4b99-43e8-a33b-5bce1aabb487,3,0,0,3 +b9e7bd41-be9a-4b7b-bd08-98e8cfeb490d,1,2,6,8 +6a12385b-0033-4182-8b2a-88f501e8a260,6,2,1,2 +db637eaa-7a52-4e42-9ed6-6ababa1cfc36,9,0,3,6 +88eb926e-1034-4e86-a7e5-bec922ed6a33,7,2,5,6 +ab69e76c-c761-49c1-bf7f-e92944fc6eef,9,2,0,7 +ddad6937-1a27-40fa-a890-060332337230,5,2,3,15 +144589cf-2a1f-4665-a4ce-2e90bf54b4e2,6,2,1,19 +a5037033-d436-4012-806e-6e1b01a5c544,3,2,0,4 +c420217d-f333-4f52-997c-0b1d0d96f8f0,4,1,7,15 +4918ec5a-08ab-46c7-93eb-4043606136e8,3,0,5,8 +72a0b858-b1c7-4a61-b8b8-46829420a0fd,4,0,4,17 +35bf4bfb-aadb-4548-b4a6-e01d6ea91bb7,0,0,6,4 +bb96e980-99c5-472b-89d3-12eedc435c86,3,0,6,13 +75098468-9494-4260-b0fd-7133b7652c33,8,0,2,19 +d32d60e4-7d93-49fd-a62b-4337a1ef9354,3,0,7,1 +987a2bcf-94c9-49c5-bcd1-d5e355cb796e,3,1,7,11 +b176a5a4-415d-44c1-80dd-19927339ff8e,9,1,6,13 +6800ad32-4914-422e-a827-2ba87619c686,2,1,1,15 +d1960f61-50e1-42a7-ad22-16131fb80034,2,2,7,6 +4e0a2eb4-1968-4e28-884e-1ede88d45bf1,3,0,6,14 +942d9f1c-4383-48c5-89d7-c53c7ad5f74b,9,2,1,12 +3772803d-f945-403a-954c-7a8f119ab43e,5,1,6,4 +1352ea72-181c-457c-8c7b-250d6c080674,4,0,1,7 +1101fd87-7cbe-462e-bd9b-3fb3e0bc6997,6,2,1,10 +b441bcbf-fdaa-494e-a04b-6d323a077eab,2,2,5,10 +ad7df445-cdda-4737-a5a4-b4b1ebffe7c1,8,0,0,10 +5ea49c03-897b-402d-af8c-803737cbe9b2,9,2,3,15 +848e7311-2b66-43a7-9fef-be1e24506f44,9,2,0,19 +43f7bd31-66a6-426e-b495-83300024710a,8,0,4,1 +953e02cf-2989-4a5d-b818-15f8eaf694c1,7,1,1,1 +55f6f4f2-2169-4f4b-ba57-11689e9e09c6,2,2,0,12 +0ca9100e-f24f-432b-85d5-236451f5a813,5,0,1,5 +6264e11d-00f3-42ae-859f-7c10d857c705,5,2,3,12 +c727a796-6cae-4e11-9f6e-575cd8f6ffa9,9,1,5,7 +c45f7fb0-1e57-4c07-b9af-46998edb11e9,6,0,3,5 +1c49dc9a-433a-484e-a4ec-04c98fcf2110,0,0,3,5 +452a87f3-3051-4714-a44e-8f4243d51bd2,1,1,1,2 +595e3c50-80ce-46c6-b2f8-37929c5326ff,5,2,6,1 +e0752bdf-f0fb-4f72-acac-e812aa2e1a83,5,2,4,19 +983b9247-48b0-4015-a2d7-05aa7d12b8ae,2,2,4,6 +1717c8f7-939a-48bc-9a5f-b8e74a37128c,5,2,1,14 +e1e95264-9fa8-4e9a-8352-095b08b30edf,3,0,0,15 +510fbff5-fc4f-4155-ad50-0a2abafd2d36,4,0,0,17 +3c7ee4f1-bf73-4c79-ad64-3dfa67f75647,9,1,7,13 +0ff48b59-0211-4755-9c96-fde93b647236,3,1,6,9 +4ba27137-c563-46c2-974e-3351f8b76739,3,1,3,19 +9273091e-33ad-492b-a313-31927e49fb78,5,2,5,2 +03152b83-d1b0-4381-9ce6-f921ba8e1144,4,0,7,0 +c9c3640b-15c8-40d4-9f1c-8d8991ff6560,2,0,6,9 diff --git a/observation.avsc b/observation.avsc new file mode 100644 index 0000000..f48aa59 --- /dev/null +++ b/observation.avsc @@ -0,0 +1,11 @@ +{ +"namespace": "kafka.exercise.avro", + "type": "record", + "name": "Observation", + "fields": [ + {"name": "id", "type": "long", "doc" : "The observation id"}, + {"name": "value", "type": "double", "doc" : "The actual measurement from the sensor"}, + {"name": "measurement", "type": "string", "doc" : "The measurement type, e.g., temperature"}, + {"name": "timestamp", "type": "long", "doc" : "The measurement timestamp"} + ] +} \ No newline at end of file diff --git a/pdfs/Column Stores - Cassandra.pdf b/pdfs/Column Stores - Cassandra.pdf deleted file mode 100644 index fd21ee6..0000000 Binary files a/pdfs/Column Stores - Cassandra.pdf and /dev/null differ diff --git a/pdfs/Data Engineer.pdf b/pdfs/Data Engineer.pdf deleted file mode 100644 index 2ccd67a..0000000 Binary files a/pdfs/Data Engineer.pdf and /dev/null differ diff --git a/pdfs/Data Modeling Big Data.pdf b/pdfs/Data Modeling Big Data.pdf deleted file mode 100644 index d4556b6..0000000 Binary files a/pdfs/Data Modeling Big Data.pdf and /dev/null differ diff --git a/pdfs/Data Modeling.pdf b/pdfs/Data Modeling.pdf deleted file mode 100644 index 8176e9a..0000000 Binary files a/pdfs/Data Modeling.pdf and /dev/null differ diff --git a/pdfs/Document Stores - MongoDB.pdf b/pdfs/Document Stores - MongoDB.pdf deleted file mode 100644 index c7cc4b9..0000000 Binary files a/pdfs/Document Stores - MongoDB.pdf and /dev/null differ diff --git a/pdfs/Graph Databases.pdf b/pdfs/Graph Databases.pdf deleted file mode 100644 index bdaf349..0000000 Binary files a/pdfs/Graph Databases.pdf and /dev/null differ diff --git a/pdfs/Graph Theory.pdf b/pdfs/Graph Theory.pdf deleted file mode 100644 index 2964ebf..0000000 Binary files a/pdfs/Graph Theory.pdf and /dev/null differ diff --git a/pdfs/Key-Value Stores - Redis.pdf b/pdfs/Key-Value Stores - Redis.pdf deleted file mode 100644 index ca52775..0000000 Binary files a/pdfs/Key-Value Stores - Redis.pdf and /dev/null differ diff --git a/pdfs/today.pdf b/pdfs/today.pdf deleted file mode 100644 index 62fdb51..0000000 Binary files a/pdfs/today.pdf and /dev/null differ diff --git a/phdthesis.pdf b/phdthesis.pdf deleted file mode 100644 index 3f80551..0000000 Binary files a/phdthesis.pdf and /dev/null differ diff --git a/synonimns/Data Collection.md b/synonimns/Data Collection.md deleted file mode 100644 index e472609..0000000 --- a/synonimns/Data Collection.md +++ /dev/null @@ -1 +0,0 @@ -Synonym for ![[Data Acquisition]] \ No newline at end of file diff --git a/synonimns/Data Ingestion.md b/synonimns/Data Ingestion.md deleted file mode 100644 index 5937061..0000000 --- a/synonimns/Data Ingestion.md +++ /dev/null @@ -1,2 +0,0 @@ -Synonym for ![[Data Acquisition]] - diff --git a/synonimns/Data Systems.md b/synonimns/Data Systems.md deleted file mode 100644 index ea9fd5c..0000000 --- a/synonimns/Data Systems.md +++ /dev/null @@ -1,41 +0,0 @@ -### Relational Data [^6] -- Modeling Techniques: Entity-Relationship Model/UML -- Normal Forms -- Star and Snowflake Schema -- Reference Systems: - - MySQL - - Postgress - -[^6]: TODO Add reference book - -### NoSQL Data[^7] - - - -### Key-Value: -- Data Model Definition -- Modeling Techniques[^7]:[Ilya Katsov](https://highlyscalable.wordpress.com/2012/03/01/nosql-data-modeling-techniques/) -- Normal Forms? -- Reference Systems: redis or memcache - -### Document -- Data Model Definition -- Modeling Techniques -- Normal Forms? -- Reference Systems: mongodb - -### Column Based - -### Full-Text Search - -### Graph: Property Graphs -- Data Model Definition -- Modeling Techniques -- Normal Forms? -- Reference Systems: neo4j - -### Graph: RDF and SPARQL -- Data Model Definition -- Modeling Techniques -- Normal Forms? -- Reference Systems: neo4j \ No newline at end of file diff --git a/templates/2020 Template.md b/templates/2020 Template.md deleted file mode 100644 index bb1c332..0000000 --- a/templates/2020 Template.md +++ /dev/null @@ -1,21 +0,0 @@ -footer: [Riccardo Tommasini](http://rictomm.me) - riccardo.tommasini@ut.ee - @rictomm -slide-dividers: #, ##, ### -slidenumbers: true -autoscale: true -theme: Plain Jane - -# Data Engineering 2020 Fall -#### LTAT.02.007 -#### Ass Prof. Riccardo Tommasini -#### Assistants: [Fabiano Spiga](mailto:), [Mohamed Ragab](mailto:mohamed.ragab@ut.ee), [Hassan Eldeeb](mailto:hassan.eldeeb@ut.ee) - -[.column] -![inline](https://upload.wikimedia.org/wikipedia/en/3/39/Tartu_%C3%9Clikool_logo.svg) - -[.column] -#### [https://courses.cs.ut.ee/2020/dataeng](https://courses.cs.ut.ee/2020/dataeng) -####[Forum](https://piazza.com/ut.ee/fall2020/ltat02007/home) -####[Moodle](https://moodle.ut.ee/course/view.php?id=10457) - -[.column] -![inline](./attachments/logo_dsg_vettoriale.png) diff --git a/todos/Apache Samza.md b/todos/Apache Samza.md deleted file mode 100644 index ac1cb81..0000000 --- a/todos/Apache Samza.md +++ /dev/null @@ -1,18 +0,0 @@ -# Apache Samza - -![[attachments/Images/samzalogo.png]] - -Apache Samza is a stream processing engine built at LinkedIn in 2013. It -enables stateless and stateful computation. It offer both functional and -SQL-like APIs. - -References: - -https://www.confluent.io/blog/turning-the-database-inside-out-with-apache-samza/ - -Noghabi, Shadi A., et al. "Samza: stateful scalable stream processing -at LinkedIn." Proceedings of the VLDB Endowment 10.12 (2017): -1634-1645. - -Kleppmann, Martin, and Jay Kreps. "Kafka, Samza and the Unix philosophy -of distributed data." (2015). \ No newline at end of file diff --git a/todos/Apache Spark.md b/todos/Apache Spark.md deleted file mode 100644 index a99ea58..0000000 --- a/todos/Apache Spark.md +++ /dev/null @@ -1,26 +0,0 @@ - -### Systems Overview: [[Apache Spark]] - -![[attachments/Images/sparklogo.png]] - -Spark's extensions for stream processing include - -- Spark Streaming is an extension of the core Spark API that enables - scalable, high-throughput, fault-tolerant and real-time processing - of data. Its key abstraction behind is the Discretized Stream, a - potentially infinite flow of small batches. - -- Spark Structured Streaming is a new declarative streaming API - available starting from Apache Spark 2.0 to support continuous - applications. It is a higher-level API than the one offered by Spark - Streaming and it is integrated into Dataset and DataFrame API. - -References: - -Zaharia, Matei, et al. "Discretized streams: Fault-tolerant streaming -computation at scale." Proceedings of the twenty-fourth ACM symposium -on operating systems principles. 2013. - -Armbrust, Michael, et al. "Structured streaming: A declarative api for -real-time applications in apache spark." Proceedings of the 2018 -International Conference on Management of Data. 2018. \ No newline at end of file diff --git a/todos/Apache Storm.md b/todos/Apache Storm.md deleted file mode 100644 index d6f2c54..0000000 --- a/todos/Apache Storm.md +++ /dev/null @@ -1,20 +0,0 @@ -### Systems Overview: [[Apache Storm]] - -![[attachments/Images/stormlogo.png]] - -- First Big Data Stream Processing platform - -- it enables very high throughput (single purpose programs with small - overhead) - -- Programming Model is Dataflow, i.e., it requires deep of the - underlying system - -- API: Programmers have to specify the logic of each vertex in the - flow graph - -References: - -Iqbal, Muhammad Hussain, and Tariq Rahim Soomro. "Big data analysis: -Apache storm perspective." International journal of computer trends and -technology 19.1 (2015): 9-14. \ No newline at end of file diff --git a/todos/Data Pipelines.md b/todos/Data Pipelines.md deleted file mode 100644 index c7b2acb..0000000 --- a/todos/Data Pipelines.md +++ /dev/null @@ -1,29 +0,0 @@ -### Orchestration or Choreography - -- [https://app.getpocket.com/read/2491444407](https://app.getpocket.com/read/2491444407) - - -## Lineage - -### Metadata - -Metadata is descriptive data about data - -In traditional data warehouses, they are typically related to the structural schemas used to organize the data - - In big data context, they also include data lineage and measured quality information of the systems supplying data - -Moreover, given the volume, variety and velocity of the data, metadata management must be automated. - -![[https___miro.medium.png]] - -### Many features benefit from lineage data at Netflix - -- ranking of search results -- table column usage for downstream jobs, -- deriving upstream dependencies in workflows, and -- building visibility of jobs writing to downstream tables. - -foo bar - -![[Distributed-Data-Processing-min-1.jpg]] \ No newline at end of file diff --git a/todos/Datalog.md b/todos/Datalog.md deleted file mode 100644 index d768654..0000000 --- a/todos/Datalog.md +++ /dev/null @@ -1,3 +0,0 @@ -# Datalog - -test modifying the content in the course repo diff --git a/todos/Domain-Driven Design.md b/todos/Domain-Driven Design.md deleted file mode 100644 index f37637e..0000000 --- a/todos/Domain-Driven Design.md +++ /dev/null @@ -1,4 +0,0 @@ - DDD is an approach to #software-development that centers the development on programming a domain model that has a rich understanding of the processes and rules of a domain. - - --- - potentially related to [[MODELS]] \ No newline at end of file diff --git a/todos/Fault Tolerrance.md b/todos/Fault Tolerrance.md deleted file mode 100644 index e69de29..0000000 diff --git a/todos/Graph Query Languages.md b/todos/Graph Query Languages.md deleted file mode 100644 index 2d620a7..0000000 --- a/todos/Graph Query Languages.md +++ /dev/null @@ -1,5 +0,0 @@ -![[SPARQL]] - -![[Cypher]] - -![[GQL]] \ No newline at end of file diff --git a/todos/README.md b/todos/README.md deleted file mode 100644 index f77f247..0000000 --- a/todos/README.md +++ /dev/null @@ -1,3 +0,0 @@ -### Spark - -![inline](attachments/Images/img0000.png) diff --git a/todos/Relational Algebra.md b/todos/Relational Algebra.md deleted file mode 100644 index e93ff2d..0000000 --- a/todos/Relational Algebra.md +++ /dev/null @@ -1,4 +0,0 @@ -### Relational Algbra - - -### SQL \ No newline at end of file diff --git a/todos/Titan.md b/todos/Titan.md deleted file mode 100644 index db6230e..0000000 --- a/todos/Titan.md +++ /dev/null @@ -1,33 +0,0 @@ -### Titan Graph DB[^3] -![inline](./attachments/titangremlin.png) - -- Another open-source distributed transactional graph database. - -- It that provides linear elasticity and scalability for growing data, data distribution, and replication for fault-tolerance and performance. - -- It supports ACID and different storage back-ends such as Apache HBase and Apache Cassandra. - -- Titan also uses the Gremlin query language, in which traversal operators are chained together to form path-oriented expressions to retrieve data from the graph and modify them. - -### Gremlin[^5] -- The Titan DB Query Language - - -- Gremlin is a query language used to retrieve data from and modify data in the graph. Gremlin is a component of Apache TinkerPop. - -- Gremlin is a path-oriented language which succinctly expresses complex graph traversals and mutation operations. - -- Gremlin is a functional language whereby traversal operators are chained together to form path-like expressions. - -![right fit](../attachments/gremlin-logo.png) - -### Graph Patterns in Action: Gremlin - -- Although Gremlin is also specified with the property graph model in mind, it differs quite significantly from the previous two declarative languages and has a more "functional" feel. - -- Likewise, its focus is on navigational queries rather than matching patterns; however, amongst the "graph traversal" operations that it defines, we can find familiar graph pattern matching features. - -- Example: The following Gremlin traversal allows us to obtain all co-actors of *"Clint Eastwood"*. - -- Gremlin: PROJECTION this example adds a projection to return only results for the x1 and x2 variables. - -![inline](../attachments/gremlinexamples.jpg) \ No newline at end of file