Skip to content

Commit b868fd3

Browse files
authored
code clean up (#425)
1 parent d5dff87 commit b868fd3

File tree

3 files changed

+43
-90
lines changed

3 files changed

+43
-90
lines changed

supporting-blog-content/colpali/02_bit_vectors.ipynb

+12-16
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,14 @@
3939
"import numpy as np\n",
4040
"\n",
4141
"\n",
42-
"def to_bit_vectors(embedding: list) -> list:\n",
43-
" embeddings = []\n",
44-
" for idx, patch_embedding in enumerate(embedding):\n",
45-
" patch_embedding = np.array(patch_embedding)\n",
46-
" binary_vector = (\n",
47-
" np.packbits(np.where(patch_embedding > 0, 1, 0))\n",
48-
" .astype(np.int8)\n",
49-
" .tobytes()\n",
50-
" .hex()\n",
51-
" )\n",
52-
" embeddings.append(binary_vector)\n",
53-
" return embeddings"
42+
"def to_bit_vectors(embeddings: list) -> list:\n",
43+
" return [\n",
44+
" np.packbits(np.where(np.array(embedding) > 0, 1, 0))\n",
45+
" .astype(np.int8)\n",
46+
" .tobytes()\n",
47+
" .hex()\n",
48+
" for embedding in embeddings\n",
49+
" ]"
5450
]
5551
},
5652
{
@@ -71,7 +67,7 @@
7167
"name": "stdout",
7268
"output_type": "stream",
7369
"text": [
74-
"[INFO] Index 'searchlabs-colpali-hamming' already exists.\n"
70+
"[INFO] Creating index: searchlabs-colpali-hamming\n"
7571
]
7672
}
7773
],
@@ -126,7 +122,7 @@
126122
{
127123
"data": {
128124
"application/vnd.jupyter.widget-view+json": {
129-
"model_id": "022b4af8891b4a06962e023c7f92d8f4",
125+
"model_id": "8be1b809674143c486705f1699f440dd",
130126
"version_major": 2,
131127
"version_minor": 0
132128
},
@@ -191,7 +187,7 @@
191187
{
192188
"data": {
193189
"application/vnd.jupyter.widget-view+json": {
194-
"model_id": "064e33061bac40e4802138e30599225b",
190+
"model_id": "63a206f0fc2d491196b83b450eb4b93a",
195191
"version_major": 2,
196192
"version_minor": 0
197193
},
@@ -251,7 +247,7 @@
251247
{
252248
"data": {
253249
"text/html": [
254-
"<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_92.jpg\" alt=\"image_92.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>"
250+
"<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_110.jpg\" alt=\"image_110.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>"
255251
],
256252
"text/plain": [
257253
"<IPython.core.display.HTML object>"

supporting-blog-content/colpali/03_average_vector.ipynb

+13-17
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,14 @@
3232
"import numpy as np\n",
3333
"\n",
3434
"\n",
35-
"def to_bit_vectors(embedding: list) -> list:\n",
36-
" embeddings = []\n",
37-
" for idx, patch_embedding in enumerate(embedding):\n",
38-
" patch_embedding = np.array(patch_embedding)\n",
39-
" binary_vector = (\n",
40-
" np.packbits(np.where(patch_embedding > 0, 1, 0))\n",
41-
" .astype(np.int8)\n",
42-
" .tobytes()\n",
43-
" .hex()\n",
44-
" )\n",
45-
" embeddings.append(binary_vector)\n",
46-
" return embeddings\n",
35+
"def to_bit_vectors(embeddings: list) -> list:\n",
36+
" return [\n",
37+
" np.packbits(np.where(np.array(embedding) > 0, 1, 0))\n",
38+
" .astype(np.int8)\n",
39+
" .tobytes()\n",
40+
" .hex()\n",
41+
" for embedding in embeddings\n",
42+
" ]\n",
4743
"\n",
4844
"\n",
4945
"def to_avg_vector(vectors):\n",
@@ -79,7 +75,7 @@
7975
"name": "stdout",
8076
"output_type": "stream",
8177
"text": [
82-
"[INFO] Index 'searchlabs-colpali-average-vector' already exists.\n"
78+
"[INFO] Creating index: searchlabs-colpali-average-vector\n"
8379
]
8480
}
8581
],
@@ -149,7 +145,7 @@
149145
{
150146
"data": {
151147
"application/vnd.jupyter.widget-view+json": {
152-
"model_id": "4a9d08424a504956a4f50208a19cce90",
148+
"model_id": "bb2e8a4c74b5494c8203308df49ec750",
153149
"version_major": 2,
154150
"version_minor": 0
155151
},
@@ -216,7 +212,7 @@
216212
{
217213
"data": {
218214
"application/vnd.jupyter.widget-view+json": {
219-
"model_id": "b7967bc15d67486ab5e2db2c673db036",
215+
"model_id": "bdf787df90d24a289fa70e6abd0d9597",
220216
"version_major": 2,
221217
"version_minor": 0
222218
},
@@ -267,7 +263,7 @@
267263
{
268264
"data": {
269265
"text/html": [
270-
"<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_49.jpg\" alt=\"image_49.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_123.jpg\" alt=\"image_123.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>"
266+
"<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_250.jpg\" alt=\"image_250.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>"
271267
],
272268
"text/plain": [
273269
"<IPython.core.display.HTML object>"
@@ -329,7 +325,7 @@
329325
{
330326
"data": {
331327
"text/html": [
332-
"<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_49.jpg\" alt=\"image_49.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>"
328+
"<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_250.jpg\" alt=\"image_250.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>"
333329
],
334330
"text/plain": [
335331
"<IPython.core.display.HTML object>"

supporting-blog-content/colpali/04_token_pooling.ipynb

+18-57
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,22 @@
1616
},
1717
{
1818
"cell_type": "code",
19-
"execution_count": 2,
19+
"execution_count": 1,
2020
"id": "be6ffdc5-fbaa-40b5-8b33-5540a3f957ba",
2121
"metadata": {},
2222
"outputs": [],
2323
"source": [
2424
"import numpy as np\n",
2525
"\n",
2626
"\n",
27-
"def to_bit_vectors(embedding: list) -> list:\n",
28-
" embeddings = []\n",
29-
" for idx, patch_embedding in enumerate(embedding):\n",
30-
" patch_embedding = np.array(patch_embedding)\n",
31-
" binary_vector = (\n",
32-
" np.packbits(np.where(patch_embedding > 0, 1, 0))\n",
33-
" .astype(np.int8)\n",
34-
" .tobytes()\n",
35-
" .hex()\n",
36-
" )\n",
37-
" embeddings.append(binary_vector)\n",
38-
" return embeddings"
27+
"def to_bit_vectors(embeddings: list) -> list:\n",
28+
" return [\n",
29+
" np.packbits(np.where(np.array(embedding) > 0, 1, 0))\n",
30+
" .astype(np.int8)\n",
31+
" .tobytes()\n",
32+
" .hex()\n",
33+
" for embedding in embeddings\n",
34+
" ]"
3935
]
4036
},
4137
{
@@ -49,7 +45,7 @@
4945
},
5046
{
5147
"cell_type": "code",
52-
"execution_count": 3,
48+
"execution_count": 2,
5349
"id": "9871c9c5-c923-4deb-9f5b-aa6796ba0bbf",
5450
"metadata": {},
5551
"outputs": [],
@@ -70,15 +66,15 @@
7066
},
7167
{
7268
"cell_type": "code",
73-
"execution_count": 1,
69+
"execution_count": 3,
7470
"id": "2de5872d-b372-40fe-85c5-111b9f9fa6c8",
7571
"metadata": {},
7672
"outputs": [
7773
{
7874
"name": "stdout",
7975
"output_type": "stream",
8076
"text": [
81-
"[INFO] Index 'searchlabs-colpali-token-pooling' already exists.\n"
77+
"[INFO] Creating index: searchlabs-colpali-token-pooling\n"
8278
]
8379
}
8480
],
@@ -126,14 +122,14 @@
126122
},
127123
{
128124
"cell_type": "code",
129-
"execution_count": 5,
125+
"execution_count": null,
130126
"id": "bdf6ff33-3e22-43c1-9f3e-c3dd663b40e2",
131127
"metadata": {},
132128
"outputs": [
133129
{
134130
"data": {
135131
"application/vnd.jupyter.widget-view+json": {
136-
"model_id": "cef0c48b9b5d4b3982fbdb4773494ec8",
132+
"model_id": "047c33b3344f49328bda552b123c168d",
137133
"version_major": 2,
138134
"version_minor": 0
139135
},
@@ -143,13 +139,6 @@
143139
},
144140
"metadata": {},
145141
"output_type": "display_data"
146-
},
147-
{
148-
"name": "stdout",
149-
"output_type": "stream",
150-
"text": [
151-
"Completed indexing 500 documents\n"
152-
]
153142
}
154143
],
155144
"source": [
@@ -193,25 +182,10 @@
193182
},
194183
{
195184
"cell_type": "code",
196-
"execution_count": 6,
185+
"execution_count": null,
197186
"id": "1dfc3713-d649-46db-aa81-171d6d92668e",
198187
"metadata": {},
199-
"outputs": [
200-
{
201-
"data": {
202-
"application/vnd.jupyter.widget-view+json": {
203-
"model_id": "d4361ebd1e59483aa8060a4fbe71715b",
204-
"version_major": 2,
205-
"version_minor": 0
206-
},
207-
"text/plain": [
208-
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
209-
]
210-
},
211-
"metadata": {},
212-
"output_type": "display_data"
213-
}
214-
],
188+
"outputs": [],
215189
"source": [
216190
"import torch\n",
217191
"from PIL import Image\n",
@@ -235,23 +209,10 @@
235209
},
236210
{
237211
"cell_type": "code",
238-
"execution_count": 7,
212+
"execution_count": null,
239213
"id": "8e322b23-b4bc-409d-9e00-2dab93f6a295",
240214
"metadata": {},
241-
"outputs": [
242-
{
243-
"data": {
244-
"text/html": [
245-
"<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_120.jpg\" alt=\"image_120.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>"
246-
],
247-
"text/plain": [
248-
"<IPython.core.display.HTML object>"
249-
]
250-
},
251-
"metadata": {},
252-
"output_type": "display_data"
253-
}
254-
],
215+
"outputs": [],
255216
"source": [
256217
"from IPython.display import display, HTML\n",
257218
"import os\n",

0 commit comments

Comments
 (0)