|
|
@@ -0,0 +1,262 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "9c5c18a1",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "* Pong V4: https://gymnasium.farama.org/environments/atari/pong/\n",
|
|
|
+ "* JAX installation: https://github.com/google/jax#installation\n",
|
|
|
+ "* Proximal Policy Optimization: https://coax.readthedocs.io/en/latest/examples/stubs/ppo.html\n",
|
|
|
+ "* Original: https://coax.readthedocs.io/en/latest/examples/atari/ppo.html"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "2df4fa0b",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Libraries"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "df27797f",
|
|
|
+ "metadata": {
|
|
|
+ "scrolled": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "2a0272f8-5939-444e-b8fa-7e62ddb8c609",
|
|
|
+ "metadata": {
|
|
|
+ "execution": {
|
|
|
+ "iopub.execute_input": "2023-11-27T21:56:37.199046Z",
|
|
|
+ "iopub.status.busy": "2023-11-27T21:56:37.198632Z",
|
|
|
+ "iopub.status.idle": "2023-11-27T21:56:37.201551Z",
|
|
|
+ "shell.execute_reply": "2023-11-27T21:56:37.201190Z",
|
|
|
+ "shell.execute_reply.started": "2023-11-27T21:56:37.199011Z"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "## Environment Variables"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "68fde155-7757-4110-8ff5-1cfd9d91c7d4",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "4876213b",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Environment"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "092108a6",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "6a2a2b8f",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## Possible actions"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "baa95bb6",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "a412017e-b1b1-4d03-8adb-ac28dd74b5eb",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Support Functions"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "6274ef74-a283-48cd-a74e-469e3ae530d3",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "57f66abe-c7c6-452f-ae5e-1459a1f42688",
|
|
|
+ "metadata": {
|
|
|
+ "execution": {
|
|
|
+ "iopub.execute_input": "2023-11-27T22:48:58.667049Z",
|
|
|
+ "iopub.status.busy": "2023-11-27T22:48:58.666635Z",
|
|
|
+ "iopub.status.idle": "2023-11-27T22:48:58.673072Z",
|
|
|
+ "shell.execute_reply": "2023-11-27T22:48:58.671381Z",
|
|
|
+ "shell.execute_reply.started": "2023-11-27T22:48:58.667017Z"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "# Function Approximators"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "25031e45-d381-4e3f-bda6-655f9835e58f",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "4d049ed3-d80e-4d4c-b51c-aaabaa52b798",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Target Networks"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "6fb71c60-0855-48c0-a5f0-dfed66b64f1f",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "cf0d0c61-2cb7-4e72-9a85-978beaa53fcc",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Policy Regularizer (Avoid Premature Exploitation)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "f4d82b64-980c-46a4-9dde-399e54f4ff64",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "706d4d5c-d9a6-4b21-a08e-9ea1089a531c",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Updaters"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "b8e0169b-168c-49bc-8427-afd9b9730bbe",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "360b2db3-fb04-4621-bef6-4a8ba574a06f",
|
|
|
+ "metadata": {
|
|
|
+ "execution": {
|
|
|
+ "iopub.execute_input": "2023-11-27T22:50:33.890963Z",
|
|
|
+ "iopub.status.busy": "2023-11-27T22:50:33.890531Z",
|
|
|
+ "iopub.status.idle": "2023-11-27T22:50:33.896271Z",
|
|
|
+ "shell.execute_reply": "2023-11-27T22:50:33.895067Z",
|
|
|
+ "shell.execute_reply.started": "2023-11-27T22:50:33.890928Z"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "# Reward Tracer and Replay Buffer"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "83290b28-fd4c-4511-8c15-1dd12d899b2e",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "e438eb96-d5a1-4fda-83c6-af717686b97d",
|
|
|
+ "metadata": {
|
|
|
+ "execution": {
|
|
|
+ "iopub.execute_input": "2023-11-27T22:52:03.161776Z",
|
|
|
+ "iopub.status.busy": "2023-11-27T22:52:03.160449Z",
|
|
|
+ "iopub.status.idle": "2023-11-27T22:52:03.164469Z",
|
|
|
+ "shell.execute_reply": "2023-11-27T22:52:03.163946Z",
|
|
|
+ "shell.execute_reply.started": "2023-11-27T22:52:03.161723Z"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "# Training"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "387e1e90-c6ff-4110-94b7-58fa00e4a90c",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "3cc509e8-1bc2-43f1-8927-064e698cf414",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Save Model"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "24f23062-e018-4f59-8b11-ce83b306d27b",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3 (ipykernel)",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.11.6"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 5
|
|
|
+}
|