Initial project commit
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SENSITIVE_WORDS: list[str] = []
|
||||
_PATTERN: re.Pattern | None = None
|
||||
|
||||
|
||||
def _load_words():
|
||||
global _SENSITIVE_WORDS, _PATTERN
|
||||
words_file = Path(__file__).parent.parent.parent / "sensitive_words.txt"
|
||||
if words_file.exists():
|
||||
raw = words_file.read_text(encoding="utf-8")
|
||||
_SENSITIVE_WORDS = [w.strip() for w in raw.splitlines() if w.strip()]
|
||||
else:
|
||||
_SENSITIVE_WORDS = []
|
||||
|
||||
if _SENSITIVE_WORDS:
|
||||
escaped = [re.escape(w) for w in _SENSITIVE_WORDS]
|
||||
_PATTERN = re.compile("|".join(escaped), re.IGNORECASE)
|
||||
else:
|
||||
_PATTERN = None
|
||||
|
||||
|
||||
_load_words()
|
||||
|
||||
|
||||
def reload_sensitive_words():
|
||||
"""Hot-reload the sensitive word list from disk."""
|
||||
_load_words()
|
||||
logger.info("Reloaded %d sensitive words", len(_SENSITIVE_WORDS))
|
||||
|
||||
|
||||
def check_text(text: str) -> dict:
|
||||
"""
|
||||
Check text against the sensitive word list.
|
||||
Returns {"safe": True/False, "matched": [...]}
|
||||
"""
|
||||
if not text or _PATTERN is None:
|
||||
return {"safe": True, "matched": []}
|
||||
|
||||
matches = _PATTERN.findall(text)
|
||||
if matches:
|
||||
unique = list(set(matches))
|
||||
return {"safe": False, "matched": unique}
|
||||
return {"safe": True, "matched": []}
|
||||
|
||||
|
||||
def filter_text(text: str, replacement: str = "**") -> str:
|
||||
"""Replace sensitive words with the replacement string."""
|
||||
if not text or _PATTERN is None:
|
||||
return text
|
||||
return _PATTERN.sub(replacement, text)
|
||||
|
||||
|
||||
async def check_image_safety(image_url: str) -> dict:
|
||||
"""
|
||||
Placeholder for third-party image audit.
|
||||
In production, integrate with Tencent Cloud / Aliyun content moderation API.
|
||||
Returns {"safe": True/False, "labels": [...]}
|
||||
"""
|
||||
logger.debug("Image safety check (placeholder): %s", image_url)
|
||||
return {"safe": True, "labels": []}
|
||||
Reference in New Issue
Block a user