{ "cells": [ { "cell_type": "markdown", "id": "9c5c18a1", "metadata": {}, "source": [ "* Pong V4: https://gymnasium.farama.org/environments/atari/pong/\n", "* JAX installation: https://github.com/google/jax#installation\n", "* Proximal Policy Optimization: https://coax.readthedocs.io/en/latest/examples/stubs/ppo.html\n", "* Original: https://coax.readthedocs.io/en/latest/examples/atari/ppo.html" ] }, { "cell_type": "markdown", "id": "2df4fa0b", "metadata": {}, "source": [ "# Libraries" ] }, { "cell_type": "code", "execution_count": null, "id": "df27797f", "metadata": { "scrolled": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "2a0272f8-5939-444e-b8fa-7e62ddb8c609", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T21:56:37.199046Z", "iopub.status.busy": "2023-11-27T21:56:37.198632Z", "iopub.status.idle": "2023-11-27T21:56:37.201551Z", "shell.execute_reply": "2023-11-27T21:56:37.201190Z", "shell.execute_reply.started": "2023-11-27T21:56:37.199011Z" } }, "source": [ "## Environment Variables" ] }, { "cell_type": "code", "execution_count": null, "id": "68fde155-7757-4110-8ff5-1cfd9d91c7d4", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "4876213b", "metadata": {}, "source": [ "# Environment" ] }, { "cell_type": "code", "execution_count": null, "id": "092108a6", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "6a2a2b8f", "metadata": {}, "source": [ "## Possible actions" ] }, { "cell_type": "code", "execution_count": null, "id": "baa95bb6", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "a412017e-b1b1-4d03-8adb-ac28dd74b5eb", "metadata": {}, "source": [ "# Support Functions" ] }, { "cell_type": "code", "execution_count": null, "id": "6274ef74-a283-48cd-a74e-469e3ae530d3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "57f66abe-c7c6-452f-ae5e-1459a1f42688", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:48:58.667049Z", "iopub.status.busy": "2023-11-27T22:48:58.666635Z", "iopub.status.idle": "2023-11-27T22:48:58.673072Z", "shell.execute_reply": "2023-11-27T22:48:58.671381Z", "shell.execute_reply.started": "2023-11-27T22:48:58.667017Z" } }, "source": [ "# Function Approximators" ] }, { "cell_type": "code", "execution_count": null, "id": "25031e45-d381-4e3f-bda6-655f9835e58f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "4d049ed3-d80e-4d4c-b51c-aaabaa52b798", "metadata": {}, "source": [ "# Target Networks" ] }, { "cell_type": "code", "execution_count": null, "id": "6fb71c60-0855-48c0-a5f0-dfed66b64f1f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "cf0d0c61-2cb7-4e72-9a85-978beaa53fcc", "metadata": {}, "source": [ "# Policy Regularizer (Avoid Premature Exploitation)" ] }, { "cell_type": "code", "execution_count": null, "id": "f4d82b64-980c-46a4-9dde-399e54f4ff64", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "706d4d5c-d9a6-4b21-a08e-9ea1089a531c", "metadata": {}, "source": [ "# Updaters" ] }, { "cell_type": "code", "execution_count": null, "id": "b8e0169b-168c-49bc-8427-afd9b9730bbe", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "360b2db3-fb04-4621-bef6-4a8ba574a06f", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:50:33.890963Z", "iopub.status.busy": "2023-11-27T22:50:33.890531Z", "iopub.status.idle": "2023-11-27T22:50:33.896271Z", "shell.execute_reply": "2023-11-27T22:50:33.895067Z", "shell.execute_reply.started": "2023-11-27T22:50:33.890928Z" } }, "source": [ "# Reward Tracer and Replay Buffer" ] }, { "cell_type": "code", "execution_count": null, "id": "83290b28-fd4c-4511-8c15-1dd12d899b2e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "e438eb96-d5a1-4fda-83c6-af717686b97d", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:52:03.161776Z", "iopub.status.busy": "2023-11-27T22:52:03.160449Z", "iopub.status.idle": "2023-11-27T22:52:03.164469Z", "shell.execute_reply": "2023-11-27T22:52:03.163946Z", "shell.execute_reply.started": "2023-11-27T22:52:03.161723Z" } }, "source": [ "# Training" ] }, { "cell_type": "code", "execution_count": null, "id": "387e1e90-c6ff-4110-94b7-58fa00e4a90c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "3cc509e8-1bc2-43f1-8927-064e698cf414", "metadata": {}, "source": [ "# Save Model" ] }, { "cell_type": "code", "execution_count": null, "id": "24f23062-e018-4f59-8b11-ce83b306d27b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }