8thlight · EByrdS · Mar 20, 2023 · Mar 3, 2023 · Mar 3, 2023 · Mar 3, 2023
diff --git a/.gitignore b/.gitignore
@@ -128,4 +128,8 @@ dmypy.json
 # Pyre type checker
 .pyre/
 
-*.DS_Store
+*.DS_Store
+
+# Ignore until pip has multi-platform support
+/Pipfile
+/Pipfile.lock
diff --git a/.travis.yml b/.travis.yml
@@ -4,9 +4,12 @@ python:
 before_install:
   - export PYTHONPATH=$PYTHONPATH:$(pwd)/src
 
+arch:
+  - amd64
+
 install:
   - pip install pipenv
-  - pipenv install --dev
+  - make install
 
 stages:
   - style

diff --git a/Makefile b/Makefile
@@ -30,10 +30,21 @@ lintfix-hard:
 pipfile: install
 
 install:
-	pipenv install --dev
+	@PLATFORM=$$(python platform_pipfile.py) && \
+	cp platforms/$$PLATFORM/Pipfile Pipfile && \
+	cp platforms/$$PLATFORM/Pipfile.lock Pipfile.lock && \
+	echo "Installing for platform $$PLATFORM" && \
+	pipenv install --dev;
+
+mmm:
+	cp Pipfile platforms/arm64/Pipfile; \
 
 lock:
-	pipenv lock
+	@PLATFORM=$$(python platform_pipfile.py) && \
+	pipenv lock && \
+	cp Pipfile platforms/$$PLATFORM/Pipfile && \
+	cp Pipfile.lock platforms/$$PLATFORM/Pipfile.lock && \
+	echo "Locking for platform $$PLATFORM";
 
 clean:
 	pipenv clean

diff --git a/README.md b/README.md
@@ -32,6 +32,18 @@ To activate this environment run
 make activate
 ```
 
+To add this project's packages to the environment run
+```bash
+make packages
+```
+
+Install and lock packages always through `make`, as it will handle different platforms for you.
+If a new platform is not supported, please update the `platform_pipfile.py` appropriately, or create a new platform directory if necessary. This should be a temporary fix, as Pipenv creates multi-platform Pipfile.lock [support](https://github.com/pypa/pipenv/issues/5130).
+
+To see graphs of ML models using `tf.keras.utils.plot_model`, you will also need [graphviz](https://graphviz.gitlab.io/download/).
+
+The `[packages]` and `[dev-packages]` sections of the Pipfiles correspond to dependencies imported in the code, and those used through the cli respectively.
+
 ## Running examples
 Every file in the `cli/` folder is an independent example available through
 CLI commands. Use `python <file>.py --help` to see the available options

diff --git a/cli/qlearning_snake.py b/cli/qlearning_snake.py
@@ -51,7 +51,7 @@ def buil_arg_parser():
         help="Maximum board width"
     )
     parser.add_argument(
-        "--max-height", metavar="300", type=int, default=300,
+        "--max-height", metavar="320", type=int, default=320,
         help="Maximum board height"
     )
     return parser

diff --git a/platform_pipfile.py b/platform_pipfile.py
@@ -0,0 +1,36 @@
+"""Prints the platform name for the current architecture
+as needed by the Pipfile files under /platforms/ directory
+
+If a new architecture is added, this script should be updated.
+
+If a new architecture is compatible with an existing platform,
+make sure it is mapped to the correct platform name.
+
+If a new architecture is incompatible with an existing platform,
+create a new platform directory, add a new Pipfile file, and
+add a new entry to the if-else statement below.
+
+Usage:
+  python platform_pipfile.py
+
+Example:
+  $ python platform_pipfile.py
+  x86_64
+
+  $ python platform_pipfile.py
+  arm64
+"""
+import platform
+
+def main():
+  machine = platform.machine()
+
+  if machine == "arm64" or machine == "aarch64":
+    print("arm64")
+  elif machine == "x86_64":
+    print("x86_64")
+  else:
+    raise Exception("Unsupported architecture: " + machine)
+
+if __name__ == "__main__":
+  main()
diff --git a/platforms/arm64/Pipfile b/platforms/arm64/Pipfile
@@ -0,0 +1,19 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+tensorflow-macos = "~=2.9.0"
+tensorflow-metal = "~=0.5.0"
+matplotlib = "~=3.5.1"
+pygame = "~=2.1.2"
+
+[requires]
+python_version = "3.9"
+
+[dev-packages]
+pydot = "~=1.4.2"
+pylint = "~=2.12.2"
+pytest = "~=7.0.1"
+autopep8 = "~=1.6.0"
diff --git a/platforms/arm64/Pipfile.lock b/platforms/arm64/Pipfile.lock
diff --git a/Pipfile → platforms/x86_64/Pipfile b/Pipfile → platforms/x86_64/Pipfile
diff --git a/Pipfile.lock → platforms/x86_64/Pipfile.lock b/Pipfile.lock → platforms/x86_64/Pipfile.lock
diff --git a/src/reinforcement_learning/q_learning/model.py b/src/reinforcement_learning/q_learning/model.py
@@ -30,23 +30,21 @@ def __init__(self, model, learning_rate=1e-4, gamma=0.9):
         self.loss_object = keras.losses.MeanSquaredError()
 
 
+    @tf.function
     def train_step(self, states, actions, rewards, next_states, dones):
         """
         Updates the model's parameters by calculating their derivatives
         with respect to the loss function
         """
-        future_rewards = self.model.predict(next_states)
+        future_rewards = tf.reduce_max(self.model(next_states), axis=1)
         # Q value = reward + discount factor * expected future reward
-        updated_q_values = rewards + self.gamma * tf.reduce_max(
-            future_rewards, axis=1
-        )
+        updated_q_values = rewards + tf.math.multiply(self.gamma, future_rewards)
 
-        updated_q_values = updated_q_values * (1 - dones)
+        updated_q_values = tf.math.multiply(updated_q_values, (1 - dones))
 
         masks = actions
 
         with tf.GradientTape() as tape:
-            tape.watch(self.model.trainable_variables)
             # train the model on the states and updated Q-values
             q_values = self.model(states)  # similar to action_probs
 
@@ -56,7 +54,7 @@ def train_step(self, states, actions, rewards, next_states, dones):
             # calculate loss between new Q-value and old Q-value
             loss = self.loss_object(updated_q_values, q_action)
 
-            # Backpropagation
+        # Backpropagation
         grads = tape.gradient(loss, self.model.trainable_variables)
         self.optimizer.apply_gradients(
             zip(grads, self.model.trainable_variables))