Merge pull request #931 from mvalentsev/fix/i18n-entity-metadata
fix: use i18n candidate patterns for entity extraction in miner and palace
This commit is contained in:
@@ -656,7 +656,9 @@ class EntityRegistry:
|
||||
Find capitalized words in query that aren't in registry or common words.
|
||||
These are candidates for Wikipedia research.
|
||||
"""
|
||||
candidates = re.findall(r"\b[A-Z][a-z]{2,15}\b", query)
|
||||
from .palace import _candidate_entity_words
|
||||
|
||||
candidates = _candidate_entity_words(query)
|
||||
unknown = []
|
||||
for word in set(candidates):
|
||||
if word.lower() in COMMON_ENGLISH_WORDS:
|
||||
|
||||
Reference in New Issue
Block a user