il y a 2 ans · dcd5ddf9d7
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 
				 **/.ipynb_checkpoints/
			
 
				 **/00*.ipynb
			
 
				+**/*.lz4
			
 
				 **/*.npy
			
 
				 Embeddings/data/*
			
 
				 Embeddings/dist/*
			
 
				 Milvus/volumes
			
 
				+Pong/data
			
--- a/Learn.ipynb
+++ b/Learn.ipynb
@@ -0,0 +1,262 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "9c5c18a1",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "* Pong V4: https://gymnasium.farama.org/environments/atari/pong/\n",
			
 
				+    "* JAX installation: https://github.com/google/jax#installation\n",
			
 
				+    "* Proximal Policy Optimization: https://coax.readthedocs.io/en/latest/examples/stubs/ppo.html\n",
			
 
				+    "* Original: https://coax.readthedocs.io/en/latest/examples/atari/ppo.html"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2df4fa0b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Libraries"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "df27797f",
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2a0272f8-5939-444e-b8fa-7e62ddb8c609",
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2023-11-27T21:56:37.199046Z",
			
 
				+     "iopub.status.busy": "2023-11-27T21:56:37.198632Z",
			
 
				+     "iopub.status.idle": "2023-11-27T21:56:37.201551Z",
			
 
				+     "shell.execute_reply": "2023-11-27T21:56:37.201190Z",
			
 
				+     "shell.execute_reply.started": "2023-11-27T21:56:37.199011Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "## Environment Variables"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "68fde155-7757-4110-8ff5-1cfd9d91c7d4",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "4876213b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Environment"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "092108a6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "6a2a2b8f",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Possible actions"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "baa95bb6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "a412017e-b1b1-4d03-8adb-ac28dd74b5eb",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Support Functions"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "6274ef74-a283-48cd-a74e-469e3ae530d3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "57f66abe-c7c6-452f-ae5e-1459a1f42688",
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2023-11-27T22:48:58.667049Z",
			
 
				+     "iopub.status.busy": "2023-11-27T22:48:58.666635Z",
			
 
				+     "iopub.status.idle": "2023-11-27T22:48:58.673072Z",
			
 
				+     "shell.execute_reply": "2023-11-27T22:48:58.671381Z",
			
 
				+     "shell.execute_reply.started": "2023-11-27T22:48:58.667017Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# Function Approximators"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "25031e45-d381-4e3f-bda6-655f9835e58f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "4d049ed3-d80e-4d4c-b51c-aaabaa52b798",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Target Networks"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "6fb71c60-0855-48c0-a5f0-dfed66b64f1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "cf0d0c61-2cb7-4e72-9a85-978beaa53fcc",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Policy Regularizer (Avoid Premature Exploitation)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "f4d82b64-980c-46a4-9dde-399e54f4ff64",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "706d4d5c-d9a6-4b21-a08e-9ea1089a531c",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Updaters"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "b8e0169b-168c-49bc-8427-afd9b9730bbe",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "360b2db3-fb04-4621-bef6-4a8ba574a06f",
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2023-11-27T22:50:33.890963Z",
			
 
				+     "iopub.status.busy": "2023-11-27T22:50:33.890531Z",
			
 
				+     "iopub.status.idle": "2023-11-27T22:50:33.896271Z",
			
 
				+     "shell.execute_reply": "2023-11-27T22:50:33.895067Z",
			
 
				+     "shell.execute_reply.started": "2023-11-27T22:50:33.890928Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# Reward Tracer and Replay Buffer"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "83290b28-fd4c-4511-8c15-1dd12d899b2e",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "e438eb96-d5a1-4fda-83c6-af717686b97d",
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2023-11-27T22:52:03.161776Z",
			
 
				+     "iopub.status.busy": "2023-11-27T22:52:03.160449Z",
			
 
				+     "iopub.status.idle": "2023-11-27T22:52:03.164469Z",
			
 
				+     "shell.execute_reply": "2023-11-27T22:52:03.163946Z",
			
 
				+     "shell.execute_reply.started": "2023-11-27T22:52:03.161723Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# Training"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "387e1e90-c6ff-4110-94b7-58fa00e4a90c",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "3cc509e8-1bc2-43f1-8927-064e698cf414",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Save Model"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "24f23062-e018-4f59-8b11-ce83b306d27b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Play.ipynb
+++ b/Play.ipynb
@@ -0,0 +1,99 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "9c5c18a1",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "* Taxi V3: https://gymnasium.farama.org/environments/toy_text/taxi/"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2df4fa0b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Libraries"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "df27797f",
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "4876213b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Environment"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "092108a6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "29f90cc2",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Load"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "992284e8-4a1e-49b4-89b0-c48e0676fda3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "e86a4fbe",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Playthrough"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "b609ed20",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Pong/Play.ipynb
+++ b/Pong/Play.ipynb
@@ -0,0 +1,139 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "9c5c18a1",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "* Pong V4: https://gymnasium.farama.org/environments/atari/pong/"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2df4fa0b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Libraries"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "df27797f",
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "4876213b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Environment"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "092108a6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "6a2a2b8f",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Possible actions"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "baa95bb6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "c9401905",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Taking Screenshot"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "bca13d96",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "22a99803",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Running Games"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "d5fe9402",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Spectator"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "097a8526",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "c07e53c6",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Full Random Run"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7f4d8911",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Pong/requirements.txt
+++ b/Pong/requirements.txt
@@ -0,0 +1,4 @@
 
				+coax==0.1.13
			
 
				+gymnasium[other]==0.29.1
			
 
				+imageio==2.33.0
			
 
				+jupyterlab==4.0.9
			
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@ The objective is to give a very brief introduction to _DQN_ training using games
 
				 
			
 
				 Things to cover:
			
 
				 - Simple theory of Q-learning, with a practical example using the a Taxi game.
			
 
				+- Simple thory of Proximal Policy Optimization, with a practical example using a Pong game.
			
 
				 - _OpenSource Web_ interfaces available for Stable Diffusion, Inference and Embeddings.
			
 
				 - A practical example on how to create and use _LLMs_ and _Embeddings_ using a _Python_ script.