rasbt
diff --git a/‎appendix-A/01_main-chapter-code/code-part1.ipynb
Lines changed: 9 additions & 9 deletions b/‎appendix-A/01_main-chapter-code/code-part1.ipynb
Lines changed: 9 additions & 9 deletions
diff --git a/‎appendix-A/01_main-chapter-code/code-part2.ipynb
Lines changed: 42 additions & 37 deletions b/‎appendix-A/01_main-chapter-code/code-part2.ipynb
Lines changed: 42 additions & 37 deletions
diff --git a/‎ch02/01_main-chapter-code/ch02.ipynb
Lines changed: 2 additions & 2 deletions b/‎ch02/01_main-chapter-code/ch02.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎ch02/01_main-chapter-code/dataloader.ipynb
Lines changed: 32 additions & 2 deletions b/‎ch02/01_main-chapter-code/dataloader.ipynb
Lines changed: 32 additions & 2 deletions
@@ -46,7 +46,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2.2.1\n"
+      "2.4.0\n"
      ]
     }
    ],
@@ -658,13 +658,13 @@
      "output_type": "stream",
      "text": [
       "Parameter containing:\n",
-      "tensor([[ 0.0956,  0.1280, -0.0696,  ...,  0.0961,  0.0631,  0.1349],\n",
-      "        [ 0.0983,  0.0580, -0.0574,  ...,  0.0981,  0.0370,  0.0516],\n",
-      "        [-0.0429, -0.1411, -0.1399,  ...,  0.0767,  0.0019,  0.1400],\n",
+      "tensor([[ 0.1182,  0.0606, -0.1292,  ..., -0.1126,  0.0735, -0.0597],\n",
+      "        [-0.0249,  0.0154, -0.0476,  ..., -0.1001, -0.1288,  0.1295],\n",
+      "        [ 0.0641,  0.0018, -0.0367,  ..., -0.0990, -0.0424, -0.0043],\n",
       "        ...,\n",
-      "        [-0.0777, -0.0726,  0.1273,  ..., -0.0613,  0.0491, -0.1381],\n",
-      "        [-0.0830, -0.0969, -0.0473,  ...,  0.0762,  0.1318, -0.1174],\n",
-      "        [ 0.0468, -0.0213,  0.0387,  ...,  0.0639,  0.0927, -0.0668]],\n",
+      "        [ 0.0618,  0.0867,  0.1361,  ..., -0.0254,  0.0399,  0.1006],\n",
+      "        [ 0.0842, -0.0512, -0.0960,  ..., -0.1091,  0.1242, -0.0428],\n",
+      "        [ 0.0518, -0.1390, -0.0923,  ..., -0.0954, -0.0668, -0.0037]],\n",
       "       requires_grad=True)\n"
      ]
     }
@@ -1264,7 +1264,7 @@
    ],
    "source": [
     "model = NeuralNetwork(2, 2) # needs to match the original model exactly\n",
-    "model.load_state_dict(torch.load(\"model.pth\"))"
+    "model.load_state_dict(torch.load(\"model.pth\", weights_only=True))"
    ]
   },
   {
@@ -1340,7 +1340,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,
 
@@ -2,7 +2,9 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "AAAnDw04iAm4"
+   },
    "source": [
     "<table style=\"width:100%\">\n",
     "<tr>\n",
@@ -54,14 +56,14 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "RM7kGhwMF_nO",
-    "outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
+    "outputId": "b1872617-aacd-46fa-e5f3-f130fd81b246"
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2.0.1+cu118\n"
+      "2.4.0+cu121\n"
      ]
     }
    ],
@@ -79,7 +81,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "OXLCKXhiUkZt",
-    "outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
+    "outputId": "e9ca3c58-d92c-4c8b-a9c9-cd7fcc1fedb4"
    },
    "outputs": [
     {
@@ -102,18 +104,15 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "MTTlfh53Va-T",
-    "outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
+    "outputId": "bae76cb5-d1d3-441f-a7c5-93a161e2e86a"
    },
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "tensor([5., 7., 9.])"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([5., 7., 9.])\n"
+     ]
     }
    ],
    "source": [
@@ -125,13 +124,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
     "id": "Z4LwTNw7Vmmb",
-    "outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
+    "outputId": "9ad97923-bc8e-4c49-88bf-48dc1de56804"
    },
    "outputs": [
     {
@@ -151,24 +150,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
-     "height": 184
+     "height": 158
     },
     "id": "tKT6URN1Vuft",
-    "outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
+    "outputId": "8396eb18-47c8-47a1-c1b6-8bcb9480fb52"
    },
    "outputs": [
     {
      "ename": "RuntimeError",
-     "evalue": "ignored",
+     "evalue": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/tmp/ipykernel_2321/2079609735.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
       "\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
      ]
     }
@@ -189,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "metadata": {
     "id": "GyY59cjieitv"
    },
@@ -215,7 +214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "metadata": {
     "id": "v41gKqEJempa"
    },
@@ -243,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 8,
    "metadata": {
     "id": "UPGVRuylep8Y"
    },
@@ -271,7 +270,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 9,
    "metadata": {
     "id": "drhg6IXofAXh"
    },
@@ -302,13 +301,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 10,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
     "id": "7jaS5sqPWCY0",
-    "outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
+    "outputId": "8a5cd93d-671c-4abf-d5cd-97845f300ffd"
    },
    "outputs": [
     {
@@ -362,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 11,
    "metadata": {
     "id": "4qrlmnPPe7FO"
    },
@@ -391,13 +390,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 12,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
     "id": "1_-BfkfEf4HX",
-    "outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
+    "outputId": "9453154f-0a5b-4a44-a3c9-f010e08d5a2c"
    },
    "outputs": [
     {
@@ -406,7 +405,7 @@
        "1.0"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -417,13 +416,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 13,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
     "id": "iYtXKBGEgKss",
-    "outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
+    "outputId": "d6cc870a-34de-490e-e5d3-23e6956744bd"
    },
    "outputs": [
     {
@@ -432,7 +431,7 @@
        "1.0"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -443,21 +442,27 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "nc2LGFVbiAnB"
+   },
    "source": [
     "### A.9.3 Training with multiple GPUs"
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "cOUza9iQiAnC"
+   },
    "source": [
     "See [DDP-script.py](DDP-script.py)"
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "id": "YOYk5Fh7iAnC"
+   },
    "source": [
     "<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/12.webp\" width=\"600px\">\n",
     "<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/13.webp\" width=\"600px\">"
@@ -485,7 +490,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,
 
@@ -46,7 +46,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "torch version: 2.3.1\n",
+      "torch version: 2.4.0\n",
       "tiktoken version: 0.7.0\n"
      ]
     }
@@ -1244,7 +1244,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "PyTorch version: 2.3.1\n"
+      "PyTorch version: 2.4.0\n"
      ]
     }
    ],
 
@@ -38,9 +38,39 @@
     "This notebook contains the main takeaway, the data loading pipeline without the intermediate steps."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "2b4e8f2d-cb81-41a3-8780-a70b382e18ae",
+   "metadata": {},
+   "source": [
+    "Packages that are being used in this notebook:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
+   "id": "c7ed6fbe-45ac-40ce-8ea5-4edb212565e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch version: 2.4.0\n",
+      "tiktoken version: 0.7.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "from importlib.metadata import version\n",
+    "\n",
+    "print(\"torch version:\", version(\"torch\"))\n",
+    "print(\"tiktoken version:\", version(\"tiktoken\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
    "id": "0ed4b7db-3b47-4fd3-a4a6-5f4ed5dd166e",
    "metadata": {},
    "outputs": [],
@@ -107,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846",
    "metadata": {},
    "outputs": [],
@@ -125,7 +155,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "d3664332-e6bb-447e-8b96-203aafde8b24",
    "metadata": {},
    "outputs": [
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,7 @@`
`46`	`46`	`"name": "stdout",`
`47`	`47`	`"output_type": "stream",`
`48`	`48`	`"text": [`
`49`		`- "torch version: 2.3.1\n",`
	`49`	`+ "torch version: 2.4.0\n",`
`50`	`50`	`"tiktoken version: 0.7.0\n"`
`51`	`51`	`]`
`52`	`52`	`}`
`@@ -1244,7 +1244,7 @@`
`1244`	`1244`	`"name": "stdout",`
`1245`	`1245`	`"output_type": "stream",`
`1246`	`1246`	`"text": [`
`1247`		`- "PyTorch version: 2.3.1\n"`
	`1247`	`+ "PyTorch version: 2.4.0\n"`
`1248`	`1248`	`]`
`1249`	`1249`	`}`
`1250`	`1250`	`],`