From 188b86f3e1c731d3fb5f4c25f009a26fc53a0eb7 Mon Sep 17 00:00:00 2001 From: ENate Date: Mon, 25 Sep 2023 13:15:01 +0200 Subject: [PATCH] Updated folders and config files --- .env | 10 +++ .gitignore | 2 + .gitpod.Dockerfile | 1 + .idea/vcs.xml | 3 +- .vscode/settings.json | 19 +---- README.md | 53 ++++++++----- docker-compose.yaml | 79 +++++++++++++++++++ etc/dashboards.yaml | 2 +- infra/docker/airflow/Dockerfile | 1 + .../docker/mlflow/Dockerfile | 0 .../observe/grafana/monitor.json | 0 .../observe/loki/monitor.json | 0 supervised/generative-ai/README.md | 14 ++++ .../generative-ai/notebooks/REAME.md | 0 .../generative-ai/src}/__init__.py | 0 .../generative-ai}/src/models/__init__.py | 0 .../generative-ai/src/models/generate_data.py | 0 .../generative-ai}/src/models/train_model.py | 0 .../src/visualization/__init__.py | 0 .../src/visualization/visualize.py | 0 supervised/recommenders/notebooks/REAME.md | 0 supervised/recommenders/src/__init__.py | 0 .../recommenders/src/models/__init__.py | 0 .../recommenders/src/models/train_model.py | 0 .../src/visualization/__init__.py | 0 .../src/visualization/visualize.py | 0 .../reinforcement}/README.md | 0 .../reinforcement}/basic_example.ipynb | 0 .../reinforcement/reinforcement_trainer.py | 0 supervised/reinforcement/src/__init__.py | 0 .../reinforcement/src/models/__init__.py | 0 .../reinforcement/src/models/train_model.py | 0 .../src/visualization/__init__.py | 0 .../src/visualization/visualize.py | 0 supervised/transformers/README.md | 29 +++++++ .../transformers/src/models/__init__.py | 0 supervised/transformers/src/models/decoder.py | 0 supervised/transformers/src/models/encoder.py | 0 .../transformers/src/models/predict_model.py | 0 .../transformers/src/models/train_model.py | 0 transformers/README.md | 14 ---- unsupervised/README.md | 12 +-- 42 files changed, 180 insertions(+), 59 deletions(-) create mode 100644 .env create mode 100644 docker-compose.yaml create mode 100644 infra/docker/airflow/Dockerfile rename reinforcement/reinforcement_trainer.py => infra/docker/mlflow/Dockerfile (100%) rename reinforcement/src/__init__.py => infra/observe/grafana/monitor.json (100%) rename reinforcement/src/models/__init__.py => infra/observe/loki/monitor.json (100%) create mode 100644 supervised/generative-ai/README.md rename reinforcement/src/models/train_model.py => supervised/generative-ai/notebooks/REAME.md (100%) rename {reinforcement/src/visualization => supervised/generative-ai/src}/__init__.py (100%) rename {transformers => supervised/generative-ai}/src/models/__init__.py (100%) rename reinforcement/src/visualization/visualize.py => supervised/generative-ai/src/models/generate_data.py (100%) rename {transformers => supervised/generative-ai}/src/models/train_model.py (100%) rename transformers/src/models/predict_model.py => supervised/generative-ai/src/visualization/__init__.py (100%) create mode 100644 supervised/generative-ai/src/visualization/visualize.py create mode 100644 supervised/recommenders/notebooks/REAME.md create mode 100644 supervised/recommenders/src/__init__.py create mode 100644 supervised/recommenders/src/models/__init__.py create mode 100644 supervised/recommenders/src/models/train_model.py create mode 100644 supervised/recommenders/src/visualization/__init__.py create mode 100644 supervised/recommenders/src/visualization/visualize.py rename {reinforcement => supervised/reinforcement}/README.md (100%) rename {reinforcement => supervised/reinforcement}/basic_example.ipynb (100%) create mode 100644 supervised/reinforcement/reinforcement_trainer.py create mode 100644 supervised/reinforcement/src/__init__.py create mode 100644 supervised/reinforcement/src/models/__init__.py create mode 100644 supervised/reinforcement/src/models/train_model.py create mode 100644 supervised/reinforcement/src/visualization/__init__.py create mode 100644 supervised/reinforcement/src/visualization/visualize.py create mode 100644 supervised/transformers/README.md create mode 100644 supervised/transformers/src/models/__init__.py create mode 100644 supervised/transformers/src/models/decoder.py create mode 100644 supervised/transformers/src/models/encoder.py create mode 100644 supervised/transformers/src/models/predict_model.py create mode 100644 supervised/transformers/src/models/train_model.py delete mode 100644 transformers/README.md diff --git a/.env b/.env new file mode 100644 index 0000000..c0b619f --- /dev/null +++ b/.env @@ -0,0 +1,10 @@ +AWS_ACCESS_KEY_ID=admin +AWS_SECRET_ACCESS_KEY=sample_key +AWS_REGION=us-east-1 +AWS_BUCKET_NAME=mlflow +MYSQL_DATABASE=mlflow +MYSQL_USER=mlflow_user +MYSQL_PASSWORD=mlflow_password +MYSQL_ROOT_PASSWORD=toor +MLFLOW_S3_ENDPOINT_URL=http://localhost:9000 +MLFLOW_TRACKING_URI=http://localhost:5000 \ No newline at end of file diff --git a/.gitignore b/.gitignore index e69de29..5513f04 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,2 @@ +.settings +.vscode \ No newline at end of file diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile index 00c0c25..6b5bfa3 100644 --- a/.gitpod.Dockerfile +++ b/.gitpod.Dockerfile @@ -26,6 +26,7 @@ RUN pip install --upgrade pip RUN sudo apt-get install -y protobuf-compiler python-pil python-lxml # Install tensorflow ranking and datasets +RUN pip install tensorflow RUN pip install -q tensorflow-ranking && pip install -q --upgrade tensorflow-datasets RUN pip install pip install --upgrade tensorflow-hub diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 3fb0020..35eb1dd 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,6 +2,5 @@ - - + \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 8378cf1..7ee20ab 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,24 +2,7 @@ "editor.fontFamily": "'Droid Sans Mono', 'monospace'", "terminal.integrated.letterSpacing": 1, "workbench.colorCustomizations": { - "activityBar.activeBackground": "#64d25b", - "activityBar.activeBorder": "#0e321f", - "activityBar.background": "#64d25b", - "activityBar.foreground": "#15202b", - "activityBar.inactiveForeground": "#15202b99", - "activityBarBadge.background": "#6971d6", - "activityBarBadge.foreground": "#e7e7e7", - "sash.hoverBorder": "#64d25b", - "statusBar.background": "#41c436", - "statusBar.foreground": "#15202b", - "statusBarItem.hoverBackground": "#349c2b", - "statusBarItem.remoteBackground": "#41c436", - "statusBarItem.remoteForeground": "#15202b", - "titleBar.activeBackground": "#41c436", - "titleBar.activeForeground": "#15202b", - "titleBar.inactiveBackground": "#41c43699", - "titleBar.inactiveForeground": "#15202b99", - "commandCenter.border": "#15202b99" + "activityBar.activeBorder": "#0e321f" }, "peacock.remoteColor": "#41c436" } \ No newline at end of file diff --git a/README.md b/README.md index 2f04916..799f178 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,30 @@ -# Main Contents +### Introduction -The main parts of this repository consists of the following folders: +The repository provides a theoretical and practical guide on how to prepare environments, train and apply several machine learning models to problems in different settings. We begin with how to prepare a training environment using the most popular technology stacks. Most of the implementations and examples discussed in the repository are done with the proposed tools. However, this is not a recommendation for a particular tool but a matter of choice, convenience and performance. In specific cases, I will mention why using a particular tool may be suitable in a given scenario. For now, I will begin by listing the main tools and discuss the contents of the repository. -- `misc_folders` - containing deep neural network and epidemiological models - In details, we begin by identifying the contents of the ```misc_folders``` -folders and the misc_folder consists of supporting files. The ``` misc_folder ``` contains the following files: - * A Deep neural network architecture drawing file. - * Epidemiological models to study the `n=2` strain in a given population implemented in Python. The aim is to analyze the effect of `n=2` disease strains consisting of different variations. The problem solves the case for `n=2` disease strains affecting a given population, which was submitted in partial fulfillment of the award of the Postgraduate Diploma at the African Institute for Mathematical Sciences, Capetown, South Africa. - * A python program implemented to study the simulation of a molecule in the nucleus of an atom. +### Technology Stack +- Training of the models discussed in this repository is done using + * TensorFlow + * Pytorch + * FLAX - flexible API and built on JAX + * Many python based deep learning frameworks and libraries + * Use of Other languages will be highlighted where necessary. +- Observability ([as discussed here](https://grafana.com/grafana/dashboards/16110-fastapi-observability/) +) using Grafana, Tempo, Loki and Prometheus + + +### Contents + +Outline of the main folders contain the following -- `supervised` - containing deep neural networks models and applications -- `unsupervised` - containing models without label data -- `reinforcement` - describing implementation of models with agents -- `Quantum` - discusses concepts in quantum computing, algorithms and deep learning +### supervised +- containing deep neural networks models and applications +### unsupervised + In this repository, we discuss examples of models without output labels. In relation to this, we also present examples of problems where non classical training approaches. Note that the main different between unsupervised and supervised learning models is based on the absence of output labels associated with the data corresponding to the proble. Hence, we must distinguish the availability of output labels in the training data before proceeding with making the choice of the training algorithm. +### reinforcement + - describing implementation of models with agents +### Quantum +- discusses concepts in quantum computing, algorithms and deep learning ## Observability @@ -25,10 +37,13 @@ We use the example from [this repo](https://github.com/blueswen/fastapi-observab References will be made to progress in the different models implemented in the folders contained in this repository. For instance, we will present the links to the papers, tutorials and other forms of publications associated with the topics covered in this repository. -## Tech Stack -- Training is based on using python and related packages such as - * TensorFlow - * FLAX - flexible form and built on JAX - * Transformers -- Observability ([from here](https://grafana.com/grafana/dashboards/16110-fastapi-observability/) -) using Grafana, Tempo, Loki and Prometheus +### misc_folders +- containing deep neural network and epidemiological models + In details, we begin by identifying the contents of the ```misc_folders``` +folders and the misc_folder consists of supporting files. The ``` misc_folder ``` contains the following files: + * A Deep neural network architecture drawing file. + * Epidemiological models to study the `n=2` strain in a given population implemented in Python. The aim is to analyze the effect of `n=2` disease strains consisting of different variations. The problem solves the case for `n=2` disease strains affecting a given population, which was submitted in partial fulfillment of the award of the Postgraduate Diploma at the African Institute for Mathematical Sciences, Capetown, South Africa. + * A python program implemented to study the simulation of a molecule in the nucleus of an atom. + + +## Structure of the repository diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..3f73700 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,79 @@ +version: "3.9" +services: + s3: + image: minio/minio + restart: unless-stopped + ports: + - "9000:9000" + - "9001:9001" + environment: + - MINIO_ROOT_USER=${AWS_ACCESS_KEY_ID} + - MINIO_ROOT_PASSWORD=${AWS_SECRET_ACCESS_KEY} + command: server /data --console-address ":9001" + networks: + - internal + - public + volumes: + - minio_volume:/data + db: + image: mysql/mysql-server:5.7.28 + restart: unless-stopped + container_name: mlflow_db + expose: + - "3306" + environment: + - MYSQL_DATABASE=${MYSQL_DATABASE} + - MYSQL_USER=${MYSQL_USER} + - MYSQL_PASSWORD=${MYSQL_PASSWORD} + - MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD} + volumes: + - db_volume:/var/lib/mysql + networks: + - internal + mlflow: + container_name: tracker_mlflow + image: tracker_ml + restart: unless-stopped + build: + context: ./Dockerfile + dockerfile: Dockerfile + ports: + - "5000:5000" + environment: + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} + - AWS_DEFAULT_REGION=${AWS_REGION} + - MLFLOW_S3_ENDPOINT_URL=http://s3:9000 + networks: + - public + - internal + entrypoint: Dockerfile server --backend-store-uri mysql+pymysql://${MYSQL_USER}:${MYSQL_PASSWORD}@db:3306/${MYSQL_DATABASE} --default-artifact-root s3://${AWS_BUCKET_NAME}/ --artifacts-destination s3://${AWS_BUCKET_NAME}/ -h 0.0.0.0 + depends_on: + wait-for-db: + condition: service_completed_successfully + create_s3_buckets: + image: minio/mc + depends_on: + - "s3" + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://s3:9000 '${AWS_ACCESS_KEY_ID}' '${AWS_SECRET_ACCESS_KEY}') do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/${AWS_BUCKET_NAME}; + exit 0; + " + networks: + - internal + wait-for-db: + image: atkrad/wait4x + depends_on: + - db + command: tcp db:3306 -t 90s -i 250ms + networks: + - internal +networks: + internal: + public: + driver: bridge +volumes: + db_volume: + minio_volume: \ No newline at end of file diff --git a/etc/dashboards.yaml b/etc/dashboards.yaml index 1ec1fee..97af47e 100644 --- a/etc/dashboards.yaml +++ b/etc/dashboards.yaml @@ -1,6 +1,6 @@ apiVersion: 1 providers: -- name: 'FastAPI Observability' +- name: 'Application Observability' orgId: 1 folder: '' type: 'file' diff --git a/infra/docker/airflow/Dockerfile b/infra/docker/airflow/Dockerfile new file mode 100644 index 0000000..1a89afa --- /dev/null +++ b/infra/docker/airflow/Dockerfile @@ -0,0 +1 @@ +FROM airflow2/ariflow diff --git a/reinforcement/reinforcement_trainer.py b/infra/docker/mlflow/Dockerfile similarity index 100% rename from reinforcement/reinforcement_trainer.py rename to infra/docker/mlflow/Dockerfile diff --git a/reinforcement/src/__init__.py b/infra/observe/grafana/monitor.json similarity index 100% rename from reinforcement/src/__init__.py rename to infra/observe/grafana/monitor.json diff --git a/reinforcement/src/models/__init__.py b/infra/observe/loki/monitor.json similarity index 100% rename from reinforcement/src/models/__init__.py rename to infra/observe/loki/monitor.json diff --git a/supervised/generative-ai/README.md b/supervised/generative-ai/README.md new file mode 100644 index 0000000..a64dbea --- /dev/null +++ b/supervised/generative-ai/README.md @@ -0,0 +1,14 @@ +### Generative Adversarial Networks (GANS) + +### Introduction +This supervised deep learning method is based on a generator feed forward neural network and a distributor. Formulated on ideas linked to game theory, it is meant to present to competing networks which will output a given probability using information derived from the data set. + +### Features of GANs +- Two competing agents whose objectives is to work for opossing goals. +- This implies each participating agent continues to come up with strategies to decieve one another +- This method is associated with Game theoretic minimax methods. + +In order to understand the foundation, implementation and application of GANs, we provide a basic desciption of the model. Then provide concrete examples on how GANs can be applied to a real life problem. Before delving into these steps, we will like to describe a simple example of a typical scenario which can be used to replicate GANs models. + +### Example Description +Consider a situation involving two agents in real life: a police officer and a criminal. As stated in the example here, if the criminal is a counterfeiter, and often tries to come up with ways to evade detection, the police officer will also come up with a much better way to provide security. \ No newline at end of file diff --git a/reinforcement/src/models/train_model.py b/supervised/generative-ai/notebooks/REAME.md similarity index 100% rename from reinforcement/src/models/train_model.py rename to supervised/generative-ai/notebooks/REAME.md diff --git a/reinforcement/src/visualization/__init__.py b/supervised/generative-ai/src/__init__.py similarity index 100% rename from reinforcement/src/visualization/__init__.py rename to supervised/generative-ai/src/__init__.py diff --git a/transformers/src/models/__init__.py b/supervised/generative-ai/src/models/__init__.py similarity index 100% rename from transformers/src/models/__init__.py rename to supervised/generative-ai/src/models/__init__.py diff --git a/reinforcement/src/visualization/visualize.py b/supervised/generative-ai/src/models/generate_data.py similarity index 100% rename from reinforcement/src/visualization/visualize.py rename to supervised/generative-ai/src/models/generate_data.py diff --git a/transformers/src/models/train_model.py b/supervised/generative-ai/src/models/train_model.py similarity index 100% rename from transformers/src/models/train_model.py rename to supervised/generative-ai/src/models/train_model.py diff --git a/transformers/src/models/predict_model.py b/supervised/generative-ai/src/visualization/__init__.py similarity index 100% rename from transformers/src/models/predict_model.py rename to supervised/generative-ai/src/visualization/__init__.py diff --git a/supervised/generative-ai/src/visualization/visualize.py b/supervised/generative-ai/src/visualization/visualize.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/recommenders/notebooks/REAME.md b/supervised/recommenders/notebooks/REAME.md new file mode 100644 index 0000000..e69de29 diff --git a/supervised/recommenders/src/__init__.py b/supervised/recommenders/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/recommenders/src/models/__init__.py b/supervised/recommenders/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/recommenders/src/models/train_model.py b/supervised/recommenders/src/models/train_model.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/recommenders/src/visualization/__init__.py b/supervised/recommenders/src/visualization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/recommenders/src/visualization/visualize.py b/supervised/recommenders/src/visualization/visualize.py new file mode 100644 index 0000000..e69de29 diff --git a/reinforcement/README.md b/supervised/reinforcement/README.md similarity index 100% rename from reinforcement/README.md rename to supervised/reinforcement/README.md diff --git a/reinforcement/basic_example.ipynb b/supervised/reinforcement/basic_example.ipynb similarity index 100% rename from reinforcement/basic_example.ipynb rename to supervised/reinforcement/basic_example.ipynb diff --git a/supervised/reinforcement/reinforcement_trainer.py b/supervised/reinforcement/reinforcement_trainer.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/reinforcement/src/__init__.py b/supervised/reinforcement/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/reinforcement/src/models/__init__.py b/supervised/reinforcement/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/reinforcement/src/models/train_model.py b/supervised/reinforcement/src/models/train_model.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/reinforcement/src/visualization/__init__.py b/supervised/reinforcement/src/visualization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/reinforcement/src/visualization/visualize.py b/supervised/reinforcement/src/visualization/visualize.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/transformers/README.md b/supervised/transformers/README.md new file mode 100644 index 0000000..766e6a0 --- /dev/null +++ b/supervised/transformers/README.md @@ -0,0 +1,29 @@ +## Concepts + +----------------------------------------------- +We discuss the advent of transformers and their applications to training various machine learning problems. To begin, we highlight the steps and the evolution of prior deep learning architectures and their limitations in training. +### Tools and Tech Stack +- Python 3.10+ +- Observation via Grafana (UI), Loki(logs), tempo(traces) and Prometheus (metrics). +- Pytorch (for some examples) +- Examples implemented using mlflow for monitoring and used in training pipelines. +- TensorFlow - a library from training machine learning models +- Flax is a flexible user experience library via JAX +### Models +The following models are implemented in this folder: + +### In the begining.. +As we know, deep learning neural networks are known to exhibit universal approximation abilites in predicting or classifying problems. However, their limitations in translation tasks, image processing and similar problems have been widely encountered and discussed in literature. Hence, improvements on DNNs have resulted in other types of architectures. For instance, recurrent neural networks (RNNs) -- with a special case of Long Short Term Memory networks (LSTMs), convolutional neural networks (CNNs) and more. Even these architectures have shown remarkable results in translation tasks, image processing, segmentation, speech recognition tasks, they are limited in a number of applications. +Language models represent supervised learning models used to train and develop text and document based learning. + +### Life before BERT +- Bidirectional Encoder Representations from Transformers (BERT) +- Use of machine language translation +- Attention based models via the `Àttention is All you Need` paper +### BERT Model +- Based on introduction of optimal training to model + +### Life After BERT +- Usage of `simplified` training architecture +- Removed bottlenecks in training +- Added more simplified attention based parts in training diff --git a/supervised/transformers/src/models/__init__.py b/supervised/transformers/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/transformers/src/models/decoder.py b/supervised/transformers/src/models/decoder.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/transformers/src/models/encoder.py b/supervised/transformers/src/models/encoder.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/transformers/src/models/predict_model.py b/supervised/transformers/src/models/predict_model.py new file mode 100644 index 0000000..e69de29 diff --git a/supervised/transformers/src/models/train_model.py b/supervised/transformers/src/models/train_model.py new file mode 100644 index 0000000..e69de29 diff --git a/transformers/README.md b/transformers/README.md deleted file mode 100644 index 43e84d3..0000000 --- a/transformers/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# Main Concepts - ------------------------------------------------ -## Tools used in training Transformers -- Python 3.10+ -- Observation via Grafana (UI), Loki(logs), tempo(traces) and Prometheus (metrics). -- Pytorch (for some examples) -- Examples implemented using mlflow for monitoring and used in training pipelines. -- TensorFlow - a library from training machine learning models -- Flax is a flexible user experience library via JAX -## Models Implemented -The following models are implemented in this folder: - -- Bidirectional Encoder Representations from Transformers (BERT) diff --git a/unsupervised/README.md b/unsupervised/README.md index 2a5aabf..ea91260 100644 --- a/unsupervised/README.md +++ b/unsupervised/README.md @@ -1,14 +1,16 @@ -# Contents of the folder +### Contents -------------------------- -## Algorithms +### Algorithms * Identify different data collection techniques * Data types, preparation and analysis. -* Apply use cases to clustering, principal component analysis (PCA) etc. -* Apply unsupervised learning algorithms to prepare different types of data sets prior to training. +* Apply use cases to clustering, principal component analysis (PCA) +* Introduce methods to deal with existing algorithms. +* Apply unsupervised learning algorithms for the preparation of various data sets in different formats +prior to training. -### Machine Learning Folder +### Main Tasks and implementation * Contains code, examples and explanations on how to apply different types of ML methods to several problems.