Skip to content

Commit 02abfb2

Browse files
authored
Fix issue requiring to install spacy and rapidfuzz even if not used (#337)
* Fix issue requiring to install spacy and rapidfuzz even if not used * Update CHANGELOG and doc
1 parent 8b163e4 commit 02abfb2

File tree

3 files changed

+59
-16
lines changed

3 files changed

+59
-16
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## Next
44

5+
### Fixed
6+
7+
- Fixed a bug where `spacy` and `rapidfuzz` needed to be installed even if not using the relevant entity resolvers.
8+
59
## 1.7.0
610

711
### Added

docs/source/index.rst

+2-4
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,8 @@ List of extra dependencies:
9898
- **pinecone**: store vectors in Pinecone
9999
- **qdrant**: store vectors in Qdrant
100100
- **experimental**: experimental features mainly from the Knowledge Graph creation pipelines.
101-
- nlp:
102-
- **spaCy**: load spaCy trained models for nlp pipelines, used by `SpaCySemanticMatchResolver` component from the Knowledge Graph creation pipelines.
103-
- fuzzy-matching:
104-
- **rapidfuzz**: apply fuzzy matching using string similarity, used by `FuzzyMatchResolver` component from the Knowledge Graph creation pipelines.
101+
- **nlp**: installs spaCy for nlp pipelines, used by `SpaCySemanticMatchResolver` component from the Knowledge Graph creation pipelines.
102+
- **fuzzy-matching**: installs **rapidfuzz** to fuzzy matching using string similarity, used by `FuzzyMatchResolver` component from the Knowledge Graph creation pipelines.
105103

106104
********
107105
Examples

src/neo4j_graphrag/experimental/components/resolver.py

+53-12
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,36 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
from __future__ import annotations
16+
1517
import abc
1618
import logging
1719
from itertools import combinations
18-
from typing import Any, List, Optional
20+
from typing import Any, List, Optional, TYPE_CHECKING
21+
22+
23+
try:
24+
from rapidfuzz import fuzz
25+
from rapidfuzz import utils
26+
27+
IS_RAPIDFUZZ_INSTALLED = True
28+
except ImportError:
29+
IS_RAPIDFUZZ_INSTALLED = False
30+
31+
try:
32+
import spacy
33+
from spacy.cli.download import download as spacy_download
34+
from spacy.language import Language
35+
import numpy as np
36+
37+
IS_SPACY_INSTALLED = True
38+
except ImportError:
39+
IS_SPACY_INSTALLED = False
1940

20-
import numpy as np
21-
import rapidfuzz.fuzz
22-
import spacy
23-
from numpy.typing import NDArray
24-
from rapidfuzz import utils
25-
from spacy.cli.download import download as spacy_download
26-
from spacy.language import Language
41+
42+
if TYPE_CHECKING:
43+
import numpy as np
44+
from numpy.typing import NDArray
2745

2846
import neo4j
2947
from neo4j_graphrag.experimental.components.types import ResolutionStats
@@ -334,6 +352,11 @@ def __init__(
334352
spacy_model: str = "en_core_web_lg",
335353
neo4j_database: Optional[str] = None,
336354
) -> None:
355+
if not IS_SPACY_INSTALLED:
356+
raise ImportError("""`spacy` python module needs to be installed to use
357+
the SpaCySemanticMatchResolver. Install it with:
358+
`pip install "neo4j-graphrag[nlp]"`
359+
""")
337360
super().__init__(
338361
driver,
339362
filter_query,
@@ -398,6 +421,27 @@ class FuzzyMatchResolver(BasePropertySimilarityResolver):
398421
and 1.
399422
"""
400423

424+
def __init__(
425+
self,
426+
driver: neo4j.Driver,
427+
filter_query: Optional[str] = None,
428+
resolve_properties: Optional[List[str]] = None,
429+
similarity_threshold: float = 0.8,
430+
neo4j_database: Optional[str] = None,
431+
) -> None:
432+
if not IS_RAPIDFUZZ_INSTALLED:
433+
raise ImportError("""`rapidfuzz` python module needs to be installed to use
434+
the SpaCySemanticMatchResolver. Install it with:
435+
`pip install "neo4j-graphrag[fuzzy-matching]"`
436+
""")
437+
super().__init__(
438+
driver,
439+
filter_query,
440+
resolve_properties,
441+
similarity_threshold,
442+
neo4j_database,
443+
)
444+
401445
async def run(self) -> ResolutionStats:
402446
return await super().run()
403447

@@ -406,7 +450,4 @@ def compute_similarity(self, text_a: str, text_b: str) -> float:
406450
# normalize the input strings before the comparison is done (processor=utils.default_process)
407451
# e.g., lowercase the text, strip whitespace, and remove punctuation
408452
# normalize the score to the 0..1 range
409-
return (
410-
rapidfuzz.fuzz.WRatio(text_a, text_b, processor=utils.default_process)
411-
/ 100.0
412-
)
453+
return fuzz.WRatio(text_a, text_b, processor=utils.default_process) / 100.0

0 commit comments

Comments
 (0)