Files
CosScene/server/app/services/content_safety.py
T
2026-05-09 16:40:29 +08:00

66 lines
1.8 KiB
Python

import logging
import re
from pathlib import Path
logger = logging.getLogger(__name__)
_SENSITIVE_WORDS: list[str] = []
_PATTERN: re.Pattern | None = None
def _load_words():
global _SENSITIVE_WORDS, _PATTERN
words_file = Path(__file__).parent.parent.parent / "sensitive_words.txt"
if words_file.exists():
raw = words_file.read_text(encoding="utf-8")
_SENSITIVE_WORDS = [w.strip() for w in raw.splitlines() if w.strip()]
else:
_SENSITIVE_WORDS = []
if _SENSITIVE_WORDS:
escaped = [re.escape(w) for w in _SENSITIVE_WORDS]
_PATTERN = re.compile("|".join(escaped), re.IGNORECASE)
else:
_PATTERN = None
_load_words()
def reload_sensitive_words():
"""Hot-reload the sensitive word list from disk."""
_load_words()
logger.info("Reloaded %d sensitive words", len(_SENSITIVE_WORDS))
def check_text(text: str) -> dict:
"""
Check text against the sensitive word list.
Returns {"safe": True/False, "matched": [...]}
"""
if not text or _PATTERN is None:
return {"safe": True, "matched": []}
matches = _PATTERN.findall(text)
if matches:
unique = list(set(matches))
return {"safe": False, "matched": unique}
return {"safe": True, "matched": []}
def filter_text(text: str, replacement: str = "**") -> str:
"""Replace sensitive words with the replacement string."""
if not text or _PATTERN is None:
return text
return _PATTERN.sub(replacement, text)
async def check_image_safety(image_url: str) -> dict:
"""
Placeholder for third-party image audit.
In production, integrate with Tencent Cloud / Aliyun content moderation API.
Returns {"safe": True/False, "labels": [...]}
"""
logger.debug("Image safety check (placeholder): %s", image_url)
return {"safe": True, "labels": []}