CosScene/server/app/services/content_safety.py

import logging
import re
from pathlib import Path

logger = logging.getLogger(__name__)

_SENSITIVE_WORDS: list[str] = []
_PATTERN: re.Pattern | None = None


def _load_words():
    global _SENSITIVE_WORDS, _PATTERN
    words_file = Path(__file__).parent.parent.parent / "sensitive_words.txt"
    if words_file.exists():
        raw = words_file.read_text(encoding="utf-8")
        _SENSITIVE_WORDS = [w.strip() for w in raw.splitlines() if w.strip()]
    else:
        _SENSITIVE_WORDS = []

    if _SENSITIVE_WORDS:
        escaped = [re.escape(w) for w in _SENSITIVE_WORDS]
        _PATTERN = re.compile("|".join(escaped), re.IGNORECASE)
    else:
        _PATTERN = None


_load_words()


def reload_sensitive_words():
    """Hot-reload the sensitive word list from disk."""
    _load_words()
    logger.info("Reloaded %d sensitive words", len(_SENSITIVE_WORDS))


def check_text(text: str) -> dict:
    """
    Check text against the sensitive word list.
    Returns {"safe": True/False, "matched": [...]}
    """
    if not text or _PATTERN is None:
        return {"safe": True, "matched": []}

    matches = _PATTERN.findall(text)
    if matches:
        unique = list(set(matches))
        return {"safe": False, "matched": unique}
    return {"safe": True, "matched": []}


def filter_text(text: str, replacement: str = "**") -> str:
    """Replace sensitive words with the replacement string."""
    if not text or _PATTERN is None:
        return text
    return _PATTERN.sub(replacement, text)


async def check_image_safety(image_url: str) -> dict:
    """
    Placeholder for third-party image audit.
    In production, integrate with Tencent Cloud / Aliyun content moderation API.
    Returns {"safe": True/False, "labels": [...]}
    """
    logger.debug("Image safety check (placeholder): %s", image_url)
    return {"safe": True, "labels": []}