|
16 | 16 | },
|
17 | 17 | {
|
18 | 18 | "cell_type": "code",
|
19 |
| - "execution_count": 2, |
| 19 | + "execution_count": 1, |
20 | 20 | "id": "be6ffdc5-fbaa-40b5-8b33-5540a3f957ba",
|
21 | 21 | "metadata": {},
|
22 | 22 | "outputs": [],
|
23 | 23 | "source": [
|
24 | 24 | "import numpy as np\n",
|
25 | 25 | "\n",
|
26 | 26 | "\n",
|
27 |
| - "def to_bit_vectors(embedding: list) -> list:\n", |
28 |
| - " embeddings = []\n", |
29 |
| - " for idx, patch_embedding in enumerate(embedding):\n", |
30 |
| - " patch_embedding = np.array(patch_embedding)\n", |
31 |
| - " binary_vector = (\n", |
32 |
| - " np.packbits(np.where(patch_embedding > 0, 1, 0))\n", |
33 |
| - " .astype(np.int8)\n", |
34 |
| - " .tobytes()\n", |
35 |
| - " .hex()\n", |
36 |
| - " )\n", |
37 |
| - " embeddings.append(binary_vector)\n", |
38 |
| - " return embeddings" |
| 27 | + "def to_bit_vectors(embeddings: list) -> list:\n", |
| 28 | + " return [\n", |
| 29 | + " np.packbits(np.where(np.array(embedding) > 0, 1, 0))\n", |
| 30 | + " .astype(np.int8)\n", |
| 31 | + " .tobytes()\n", |
| 32 | + " .hex()\n", |
| 33 | + " for embedding in embeddings\n", |
| 34 | + " ]" |
39 | 35 | ]
|
40 | 36 | },
|
41 | 37 | {
|
|
49 | 45 | },
|
50 | 46 | {
|
51 | 47 | "cell_type": "code",
|
52 |
| - "execution_count": 3, |
| 48 | + "execution_count": 2, |
53 | 49 | "id": "9871c9c5-c923-4deb-9f5b-aa6796ba0bbf",
|
54 | 50 | "metadata": {},
|
55 | 51 | "outputs": [],
|
|
70 | 66 | },
|
71 | 67 | {
|
72 | 68 | "cell_type": "code",
|
73 |
| - "execution_count": 1, |
| 69 | + "execution_count": 3, |
74 | 70 | "id": "2de5872d-b372-40fe-85c5-111b9f9fa6c8",
|
75 | 71 | "metadata": {},
|
76 | 72 | "outputs": [
|
77 | 73 | {
|
78 | 74 | "name": "stdout",
|
79 | 75 | "output_type": "stream",
|
80 | 76 | "text": [
|
81 |
| - "[INFO] Index 'searchlabs-colpali-token-pooling' already exists.\n" |
| 77 | + "[INFO] Creating index: searchlabs-colpali-token-pooling\n" |
82 | 78 | ]
|
83 | 79 | }
|
84 | 80 | ],
|
|
126 | 122 | },
|
127 | 123 | {
|
128 | 124 | "cell_type": "code",
|
129 |
| - "execution_count": 5, |
| 125 | + "execution_count": null, |
130 | 126 | "id": "bdf6ff33-3e22-43c1-9f3e-c3dd663b40e2",
|
131 | 127 | "metadata": {},
|
132 | 128 | "outputs": [
|
133 | 129 | {
|
134 | 130 | "data": {
|
135 | 131 | "application/vnd.jupyter.widget-view+json": {
|
136 |
| - "model_id": "cef0c48b9b5d4b3982fbdb4773494ec8", |
| 132 | + "model_id": "047c33b3344f49328bda552b123c168d", |
137 | 133 | "version_major": 2,
|
138 | 134 | "version_minor": 0
|
139 | 135 | },
|
|
143 | 139 | },
|
144 | 140 | "metadata": {},
|
145 | 141 | "output_type": "display_data"
|
146 |
| - }, |
147 |
| - { |
148 |
| - "name": "stdout", |
149 |
| - "output_type": "stream", |
150 |
| - "text": [ |
151 |
| - "Completed indexing 500 documents\n" |
152 |
| - ] |
153 | 142 | }
|
154 | 143 | ],
|
155 | 144 | "source": [
|
|
193 | 182 | },
|
194 | 183 | {
|
195 | 184 | "cell_type": "code",
|
196 |
| - "execution_count": 6, |
| 185 | + "execution_count": null, |
197 | 186 | "id": "1dfc3713-d649-46db-aa81-171d6d92668e",
|
198 | 187 | "metadata": {},
|
199 |
| - "outputs": [ |
200 |
| - { |
201 |
| - "data": { |
202 |
| - "application/vnd.jupyter.widget-view+json": { |
203 |
| - "model_id": "d4361ebd1e59483aa8060a4fbe71715b", |
204 |
| - "version_major": 2, |
205 |
| - "version_minor": 0 |
206 |
| - }, |
207 |
| - "text/plain": [ |
208 |
| - "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]" |
209 |
| - ] |
210 |
| - }, |
211 |
| - "metadata": {}, |
212 |
| - "output_type": "display_data" |
213 |
| - } |
214 |
| - ], |
| 188 | + "outputs": [], |
215 | 189 | "source": [
|
216 | 190 | "import torch\n",
|
217 | 191 | "from PIL import Image\n",
|
|
235 | 209 | },
|
236 | 210 | {
|
237 | 211 | "cell_type": "code",
|
238 |
| - "execution_count": 7, |
| 212 | + "execution_count": null, |
239 | 213 | "id": "8e322b23-b4bc-409d-9e00-2dab93f6a295",
|
240 | 214 | "metadata": {},
|
241 |
| - "outputs": [ |
242 |
| - { |
243 |
| - "data": { |
244 |
| - "text/html": [ |
245 |
| - "<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_120.jpg\" alt=\"image_120.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>" |
246 |
| - ], |
247 |
| - "text/plain": [ |
248 |
| - "<IPython.core.display.HTML object>" |
249 |
| - ] |
250 |
| - }, |
251 |
| - "metadata": {}, |
252 |
| - "output_type": "display_data" |
253 |
| - } |
254 |
| - ], |
| 215 | + "outputs": [], |
255 | 216 | "source": [
|
256 | 217 | "from IPython.display import display, HTML\n",
|
257 | 218 | "import os\n",
|
|
0 commit comments