import logging import re from pathlib import Path logger = logging.getLogger(__name__) _SENSITIVE_WORDS: list[str] = [] _PATTERN: re.Pattern | None = None def _load_words(): global _SENSITIVE_WORDS, _PATTERN words_file = Path(__file__).parent.parent.parent / "sensitive_words.txt" if words_file.exists(): raw = words_file.read_text(encoding="utf-8") _SENSITIVE_WORDS = [w.strip() for w in raw.splitlines() if w.strip()] else: _SENSITIVE_WORDS = [] if _SENSITIVE_WORDS: escaped = [re.escape(w) for w in _SENSITIVE_WORDS] _PATTERN = re.compile("|".join(escaped), re.IGNORECASE) else: _PATTERN = None _load_words() def reload_sensitive_words(): """Hot-reload the sensitive word list from disk.""" _load_words() logger.info("Reloaded %d sensitive words", len(_SENSITIVE_WORDS)) def check_text(text: str) -> dict: """ Check text against the sensitive word list. Returns {"safe": True/False, "matched": [...]} """ if not text or _PATTERN is None: return {"safe": True, "matched": []} matches = _PATTERN.findall(text) if matches: unique = list(set(matches)) return {"safe": False, "matched": unique} return {"safe": True, "matched": []} def filter_text(text: str, replacement: str = "**") -> str: """Replace sensitive words with the replacement string.""" if not text or _PATTERN is None: return text return _PATTERN.sub(replacement, text) async def check_image_safety(image_url: str) -> dict: """ Placeholder for third-party image audit. In production, integrate with Tencent Cloud / Aliyun content moderation API. Returns {"safe": True/False, "labels": [...]} """ logger.debug("Image safety check (placeholder): %s", image_url) return {"safe": True, "labels": []}