Skip to content

Commit ea8b8e6

Browse files
Upgrade redisvl (#32)
* update redisvl * updates * cleanup * make an autouse fixture to clean up index client * Clear logging handlers to prevent I/O errors in tests * update to use lifespan manager from asgi * remove fixtures * update --------- Co-authored-by: Andrew Brookins <a.m.brookins@gmail.com>
1 parent 3e3eae6 commit ea8b8e6

File tree

18 files changed

+669
-520
lines changed

18 files changed

+669
-520
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ new_backend/arxivsearch/templates/
1414
.coverage*
1515
coverage.*
1616
htmlcov/
17-
legacy-data/
17+
legacy-data/
18+
.python-version

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ RUN mkdir -p /app/backend
3535
COPY ./backend/poetry.lock ./backend/pyproject.toml ./backend/
3636

3737
WORKDIR /app/backend
38-
RUN poetry install --all-extras --no-interaction --no-root
38+
RUN poetry install --all-extras --no-interaction
3939

4040
COPY ./backend/ .
4141

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: build
1+
.PHONY: deploy
22

3-
build:
4-
docker compose -f docker-local-redis.yml up
3+
deploy:
4+
docker compose -f docker-local-redis.yml up

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
<a href="https://github.com/redis-developer/redis-arxiv-search"><img src="https://redis.io/wp-content/uploads/2024/04/Logotype.svg?raw=true" width="30%"><img></a>
44
<br />
55
<br />
6+
<h1>🔎 arXiv Search API</h1>
67
<div display="inline-block">
78
<a href="https://docsearch.redisvl.com"><b>Hosted Demo</b></a>&nbsp;&nbsp;&nbsp;
89
<a href="https://github.com/redis-developer/redis-arxiv-search"><b>Code</b></a>&nbsp;&nbsp;&nbsp;
@@ -14,7 +15,7 @@
1415
<br />
1516
</div>
1617

17-
# 🔎 Redis arXiv Search
18+
1819
*This repository is the official codebase for the arxiv paper search app hosted at: **https://docsearch.redisvl.com***
1920

2021

@@ -111,17 +112,17 @@ Embeddings represent the semantic properies of the raw text and enable vector si
111112
- Add your `OPENAI_API_KEY` to the `.env` file. **Need one?** [Get an API key](https://platform.openai.com)
112113
- Add you `COHERE_API_KEY` to the `.env` file. **Need one?** [Get an API key](https://cohere.ai)
113114

114-
### Redis Stack Docker (Local) with make
115+
### Run locally with Redis 8 CE
115116
```bash
116-
make build
117+
make deploy
117118
```
118119

119120

120121
## Customizing (optional)
121122

122123
### Run local redis with Docker
123124
```bash
124-
docker run -d --name redis -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
125+
docker run -d --name redis -p 6379:6379 -p 8001:8001 redis:8.0-M03
125126
```
126127

127128
### FastApi with poetry

backend/arxivsearch/api/routes/papers.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,19 @@
22
import logging
33

44
import numpy as np
5+
from fastapi import APIRouter, Depends, Query
6+
from redisvl.index import AsyncSearchIndex
7+
from redisvl.query import CountQuery, FilterQuery, VectorQuery
8+
59
from arxivsearch import config
6-
from arxivsearch.db import redis_helpers
10+
from arxivsearch.db import utils
711
from arxivsearch.schema.models import (
812
PaperSimilarityRequest,
913
SearchResponse,
1014
UserTextSimilarityRequest,
1115
VectorSearchResponse,
1216
)
1317
from arxivsearch.utils.embeddings import Embeddings
14-
from fastapi import APIRouter, Depends, Query
15-
from redisvl.index import AsyncSearchIndex
16-
from redisvl.query import CountQuery, FilterQuery, VectorQuery
1718

1819
logger = logging.getLogger(__name__)
1920

@@ -26,7 +27,7 @@
2627

2728
@router.get("/", response_model=SearchResponse)
2829
async def get_papers(
29-
index: AsyncSearchIndex = Depends(redis_helpers.get_async_index),
30+
index: AsyncSearchIndex = Depends(utils.get_async_index),
3031
limit: int = Query(default=20, description="Maximum number of papers to return."),
3132
skip: int = Query(
3233
default=0, description="Number of papers to skip for pagination."
@@ -53,9 +54,8 @@ async def get_papers(
5354
Returns:
5455
SearchResponse: Pydantic model containing papers and total count.
5556
"""
56-
5757
# Build queries
58-
filter_expression = redis_helpers.build_filter_expression(
58+
filter_expression = utils.build_filter_expression(
5959
years.split(","), categories.split(",")
6060
)
6161
filter_query = FilterQuery(return_fields=[], filter_expression=filter_expression)
@@ -72,7 +72,7 @@ async def get_papers(
7272
@router.post("/vector_search/by_paper", response_model=VectorSearchResponse)
7373
async def find_papers_by_paper(
7474
similarity_request: PaperSimilarityRequest,
75-
index: AsyncSearchIndex = Depends(redis_helpers.get_async_index),
75+
index: AsyncSearchIndex = Depends(utils.get_async_index),
7676
):
7777
"""
7878
Find and return papers similar to a given paper based on vector
@@ -93,7 +93,7 @@ async def find_papers_by_paper(
9393
paper[similarity_request.provider.value], dtype=np.float32
9494
)
9595
# Build filter expression
96-
filter_expression = redis_helpers.build_filter_expression(
96+
filter_expression = utils.build_filter_expression(
9797
similarity_request.years, similarity_request.categories
9898
)
9999
# Create queries
@@ -115,7 +115,7 @@ async def find_papers_by_paper(
115115
@router.post("/vector_search/by_text", response_model=VectorSearchResponse)
116116
async def find_papers_by_text(
117117
similarity_request: UserTextSimilarityRequest,
118-
index: AsyncSearchIndex = Depends(redis_helpers.get_async_index),
118+
index: AsyncSearchIndex = Depends(utils.get_async_index),
119119
):
120120
"""
121121
Find and return papers similar to user-provided text based on
@@ -131,7 +131,7 @@ async def find_papers_by_text(
131131
"""
132132

133133
# Build filter expression
134-
filter_expression = redis_helpers.build_filter_expression(
134+
filter_expression = utils.build_filter_expression(
135135
similarity_request.years, similarity_request.categories
136136
)
137137
# Check available paper count and create vector from user text

backend/arxivsearch/db/load.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from redisvl.index import AsyncSearchIndex
1010

1111
from arxivsearch import config
12-
from arxivsearch.db import redis_helpers
12+
from arxivsearch.db.utils import get_async_index, get_schema
1313
from arxivsearch.schema.models import Provider
1414

1515
logger = logging.getLogger(__name__)
@@ -74,8 +74,7 @@ async def preprocess_paper(paper: dict) -> dict:
7474

7575
async def load_data():
7676
# Load schema specs and create index in Redis
77-
index = AsyncSearchIndex(redis_helpers.schema)
78-
await index.set_client(redis_helpers.client)
77+
index = await get_async_index()
7978

8079
# Load dataset and create index
8180
try:

backend/arxivsearch/db/redis_helpers.py renamed to backend/arxivsearch/db/utils.py

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,30 @@
22
import os
33
from typing import List
44

5-
from arxivsearch import config
6-
from redis.asyncio import Redis
7-
from redisvl.index import AsyncSearchIndex, SearchIndex
5+
from redisvl.index import AsyncSearchIndex
86
from redisvl.query.filter import FilterExpression, Tag
97
from redisvl.schema import IndexSchema
108

11-
logger = logging.getLogger(__name__)
12-
13-
14-
dir_path = os.path.dirname(os.path.realpath(__file__))
15-
schema_path = os.path.join(dir_path, "index.yaml")
16-
schema = IndexSchema.from_yaml(schema_path)
17-
client = Redis.from_url(config.REDIS_URL)
18-
global_index = None
19-
9+
from arxivsearch import config
2010

21-
def get_schema():
22-
return IndexSchema.from_yaml(schema_path)
11+
logger = logging.getLogger(__name__)
2312

2413

25-
def get_test_index():
26-
index = SearchIndex.from_yaml(schema_path)
27-
index.connect(redis_url=config.REDIS_URL)
14+
# global search index
15+
_global_index = None
2816

29-
if not index.exists():
30-
index.create(overwrite=True)
3117

32-
return index
18+
def get_schema() -> IndexSchema:
19+
dir_path = os.path.dirname(os.path.realpath(__file__)) + "/schema"
20+
file_path = os.path.join(dir_path, "index.yaml")
21+
return IndexSchema.from_yaml(file_path)
3322

3423

3524
async def get_async_index():
36-
global global_index
37-
if not global_index:
38-
global_index = AsyncSearchIndex.from_yaml(schema_path)
39-
await global_index.set_client(client)
40-
yield global_index
25+
global _global_index
26+
if not _global_index:
27+
_global_index = AsyncSearchIndex(get_schema(), redis_url=config.REDIS_URL)
28+
return _global_index
4129

4230

4331
def build_filter_expression(

backend/arxivsearch/main.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,34 @@
11
import logging
2+
from contextlib import asynccontextmanager
23
from pathlib import Path
34

45
import uvicorn
5-
from arxivsearch import config
6-
from arxivsearch.api.main import api_router
7-
from arxivsearch.spa import SinglePageApplication
86
from fastapi import FastAPI
97
from fastapi.staticfiles import StaticFiles
108
from starlette.middleware.cors import CORSMiddleware
119

10+
from arxivsearch import config
11+
from arxivsearch.api.main import api_router
12+
from arxivsearch.db.utils import get_async_index
13+
from arxivsearch.spa import SinglePageApplication
14+
1215
logging.basicConfig(
1316
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
1417
)
1518

19+
20+
@asynccontextmanager
21+
async def lifespan(app: FastAPI):
22+
index = await get_async_index()
23+
async with index:
24+
yield
25+
26+
1627
app = FastAPI(
17-
title=config.PROJECT_NAME, docs_url=config.API_DOCS, openapi_url=config.OPENAPI_DOCS
28+
title=config.PROJECT_NAME,
29+
docs_url=config.API_DOCS,
30+
openapi_url=config.OPENAPI_DOCS,
31+
lifespan=lifespan,
1832
)
1933

2034
app.add_middleware(

backend/arxivsearch/schema/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from enum import Enum
2+
23
from pydantic import BaseModel
34

45

backend/arxivsearch/tests/api/routes/test_papers.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,25 @@
11
import pytest
22
from httpx import AsyncClient
33

4-
from arxivsearch.main import app
5-
from arxivsearch.schema.models import (
6-
PaperSimilarityRequest,
7-
UserTextSimilarityRequest,
8-
)
4+
from arxivsearch.schema.models import PaperSimilarityRequest, UserTextSimilarityRequest
95

106

11-
@pytest.fixture
12-
def years(papers):
13-
return papers[0]["year"]
7+
@pytest.fixture(scope="module")
8+
def years(test_data):
9+
return test_data[0]["year"]
1410

1511

16-
@pytest.fixture
17-
def categories(papers):
18-
return papers[0]["categories"]
12+
@pytest.fixture(scope="module")
13+
def categories(test_data):
14+
return test_data[0]["categories"]
1915

2016

21-
@pytest.fixture
17+
@pytest.fixture(scope="module")
2218
def bad_req_json():
2319
return {"not": "valid"}
2420

2521

26-
@pytest.fixture
22+
@pytest.fixture(scope="module")
2723
def text_req(years, categories):
2824
return UserTextSimilarityRequest(
2925
categories=[categories],
@@ -33,10 +29,13 @@ def text_req(years, categories):
3329
)
3430

3531

36-
@pytest.fixture
37-
def paper_req(papers):
32+
@pytest.fixture(scope="module")
33+
def paper_req(test_data):
3834
return PaperSimilarityRequest(
39-
categories=[], years=[], provider="huggingface", paper_id=papers[0]["paper_id"]
35+
categories=[],
36+
years=[],
37+
provider="huggingface",
38+
paper_id=test_data[0]["paper_id"],
4039
)
4140

4241

backend/arxivsearch/tests/conftest.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,49 @@
1+
import json
2+
import os
3+
4+
import httpx
5+
import numpy as np
16
import pytest
27
import pytest_asyncio
8+
from asgi_lifespan import LifespanManager
9+
from httpx import AsyncClient
10+
from redisvl.index import SearchIndex
11+
312
from arxivsearch import config
13+
from arxivsearch.db.utils import get_async_index, get_schema
414
from arxivsearch.main import app
5-
from arxivsearch.tests.utils.seed import seed_test_db
6-
from httpx import AsyncClient
7-
from redis.asyncio import Redis
815

916

10-
@pytest.fixture(scope="module")
11-
def papers():
12-
papers = seed_test_db()
13-
return papers
17+
@pytest.fixture(scope="session")
18+
def index():
19+
index = SearchIndex(schema=get_schema(), redis_url=config.REDIS_URL)
20+
index.create()
21+
yield index
22+
index.disconnect()
1423

1524

16-
@pytest.fixture
17-
async def client():
18-
client = await Redis.from_url(config.REDIS_URL)
19-
yield client
20-
try:
21-
await client.aclose()
22-
except RuntimeError as e:
23-
if "Event loop is closed" not in str(e):
24-
raise
25+
@pytest.fixture(scope="session", autouse=True)
26+
def test_data(index):
27+
cwd = os.getcwd()
28+
with open(f"{cwd}/arxivsearch/tests/test_vectors.json", "r") as f:
29+
papers = json.load(f)
2530

31+
# convert to bytes
32+
for paper in papers:
33+
paper["huggingface"] = np.array(
34+
paper["huggingface"], dtype=np.float32
35+
).tobytes()
36+
paper["openai"] = np.array(paper["openai"], dtype=np.float32).tobytes()
37+
paper["cohere"] = np.array(paper["cohere"], dtype=np.float32).tobytes()
2638

27-
@pytest_asyncio.fixture(scope="session")
28-
async def async_client():
39+
_ = index.load(data=papers, id_field="paper_id")
40+
return papers
2941

30-
async with AsyncClient(app=app, base_url="http://test/api/v1/") as client:
3142

32-
yield client
43+
@pytest_asyncio.fixture(scope="session")
44+
async def async_client():
45+
async with LifespanManager(app=app) as lifespan:
46+
async with AsyncClient(
47+
transport=httpx.ASGITransport(app=app), base_url="http://test/api/v1/" # type: ignore
48+
) as client:
49+
yield client

backend/arxivsearch/tests/utils/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)