newsreport_agent_for_traffic/email_web_app.py

478 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Web UI for managing recipients, manual send, and weekly scheduled send."""
import json
import os
import re
import subprocess
import sys
import threading
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import List
from zoneinfo import ZoneInfo
from dotenv import load_dotenv
from flask import Flask, flash, redirect, render_template, request, url_for
from email_sender import EmailSender
load_dotenv()
app = Flask(__name__)
app.config["SECRET_KEY"] = os.getenv("WEB_UI_SECRET_KEY", "dev-secret-key-change-in-env")
PROJECT_ROOT = Path(__file__).resolve().parent
DATA_DIR = Path(os.getenv("DATA_DIR", "./data")).resolve()
MANAGED_RECIPIENTS_FILE = Path(
os.getenv("RECIPIENTS_STORE_FILE", str(DATA_DIR / "managed_recipients.json"))
).resolve()
SCHEDULE_CONFIG_FILE = Path(
os.getenv("SCHEDULE_STORE_FILE", str(DATA_DIR / "scheduled_send_config.json"))
).resolve()
EMAIL_PATTERN = re.compile(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$")
SCHEDULE_WEEKDAY_OPTIONS = [
(0, "星期一"),
(1, "星期二"),
(2, "星期三"),
(3, "星期四"),
(4, "星期五"),
(5, "星期六"),
(6, "星期日"),
]
SCHEDULE_DEFAULT = {
"enabled": False,
"weekday": 0,
"time": "09:00",
"subject": "",
"recipients": [],
"last_run_week": "",
}
_SCHEDULE_LOCK = threading.Lock()
_SCHEDULER_STARTED = False
def _ensure_data_dir() -> None:
DATA_DIR.mkdir(parents=True, exist_ok=True)
def _split_recipients(raw_text: str) -> List[str]:
if not raw_text:
return []
tokens = re.split(r"[,\n;\s]+", raw_text.strip())
return [item.strip() for item in tokens if item.strip()]
def _normalize_email(email: str) -> str:
return email.strip().lower()
def _dedupe_emails(items: List[str]) -> List[str]:
seen = set()
result = []
for raw in items:
item = _normalize_email(raw)
if item and item not in seen:
seen.add(item)
result.append(item)
return result
def _save_managed_recipients(items: List[str]) -> None:
_ensure_data_dir()
MANAGED_RECIPIENTS_FILE.write_text(
json.dumps(items, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def get_managed_recipients() -> List[str]:
if MANAGED_RECIPIENTS_FILE.exists():
try:
payload = json.loads(MANAGED_RECIPIENTS_FILE.read_text(encoding="utf-8"))
if isinstance(payload, list):
recipients = _dedupe_emails([str(item) for item in payload])
valid = [x for x in recipients if EMAIL_PATTERN.match(x)]
if valid != recipients:
_save_managed_recipients(valid)
return valid
except Exception:
pass
seed = _dedupe_emails(_split_recipients(os.getenv("EMAIL_RECIPIENTS", "")))
valid_seed = [x for x in seed if EMAIL_PATTERN.match(x)]
if valid_seed:
_save_managed_recipients(valid_seed)
return valid_seed
def _normalize_weekly_schedule(payload: dict | None) -> dict:
data = dict(SCHEDULE_DEFAULT)
if isinstance(payload, dict):
data.update(payload)
data["enabled"] = bool(data.get("enabled", False))
weekday_raw = data.get("weekday", 0)
try:
weekday = int(weekday_raw)
except (TypeError, ValueError):
weekday = 0
data["weekday"] = min(max(weekday, 0), 6)
time_raw = str(data.get("time", "09:00")).strip()
if not re.match(r"^\d{2}:\d{2}$", time_raw):
time_raw = "09:00"
hour, minute = time_raw.split(":", 1)
hour_val = int(hour)
minute_val = int(minute)
if hour_val < 0 or hour_val > 23 or minute_val < 0 or minute_val > 59:
time_raw = "09:00"
data["time"] = time_raw
subject = str(data.get("subject", "")).strip()
data["subject"] = subject[:120]
recipients = data.get("recipients", [])
if not isinstance(recipients, list):
recipients = []
recipients = _dedupe_emails([str(item) for item in recipients])
recipients = [item for item in recipients if EMAIL_PATTERN.match(item)]
data["recipients"] = recipients
week_key = str(data.get("last_run_week", "")).strip()
if not re.match(r"^\d{4}-W\d{2}$", week_key):
week_key = ""
data["last_run_week"] = week_key
return data
def get_weekly_schedule_config() -> dict:
if not SCHEDULE_CONFIG_FILE.exists():
return dict(SCHEDULE_DEFAULT)
try:
payload = json.loads(SCHEDULE_CONFIG_FILE.read_text(encoding="utf-8"))
except Exception:
return dict(SCHEDULE_DEFAULT)
return _normalize_weekly_schedule(payload)
def save_weekly_schedule_config(schedule: dict) -> dict:
_ensure_data_dir()
normalized = _normalize_weekly_schedule(schedule)
with _SCHEDULE_LOCK:
SCHEDULE_CONFIG_FILE.write_text(
json.dumps(normalized, ensure_ascii=False, indent=2),
encoding="utf-8",
)
return normalized
def get_app_timezone() -> ZoneInfo:
tz_name = os.getenv("WEB_UI_TIMEZONE", "Asia/Shanghai").strip() or "Asia/Shanghai"
try:
return ZoneInfo(tz_name)
except Exception:
# Windows may lack IANA timezone data; fall back to fixed UTC+8.
return timezone(timedelta(hours=8), name="Asia/Shanghai")
def _current_week_key(now: datetime) -> str:
iso = now.isocalendar()
return f"{iso.year}-W{iso.week:02d}"
def run_full_refresh_and_generate_report() -> tuple[Path | None, str]:
max_news = os.getenv("WEB_UI_MAX_NEWS", "20")
wechat_count = os.getenv("WEB_UI_WECHAT_COUNT", "10")
baidu_count = os.getenv("WEB_UI_BAIDU_COUNT", "10")
ccgp_count = os.getenv("WEB_UI_CCGP_COUNT", "10")
ccgp_keywords = os.getenv("WEB_UI_CCGP_KEYWORDS", "").strip()
timeout_sec = int(os.getenv("WEB_UI_REFRESH_TIMEOUT_SEC", "1800"))
command = [
sys.executable,
"main.py",
"--mode",
"full",
"--sources",
"all",
"--max-news",
str(max_news),
"--wechat-count",
str(wechat_count),
"--baidu-count",
str(baidu_count),
"--ccgp-count",
str(ccgp_count),
]
if ccgp_keywords:
command.extend(["--ccgp-keywords", ccgp_keywords])
try:
process = subprocess.run(
command,
cwd=str(PROJECT_ROOT),
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
timeout=timeout_sec,
check=False,
)
except subprocess.TimeoutExpired:
return None, "Refresh and report generation timed out."
except Exception as exc:
return None, f"Refresh pipeline failed: {exc}"
if process.returncode != 0:
debug_text = (process.stderr or process.stdout or "").strip()
if len(debug_text) > 500:
debug_text = debug_text[-500:]
return None, f"Refresh/report failed (exit={process.returncode}). {debug_text}"
report_files = sorted(
[
path
for path in DATA_DIR.glob("report_*")
if path.is_file() and path.suffix.lower() in {".txt", ".md"}
],
key=lambda p: p.stat().st_mtime,
reverse=True,
)
if not report_files:
return None, "No report file found after refresh."
return report_files[0], ""
def send_report_with_refresh(recipients: List[str], subject: str | None = None) -> tuple[bool, str]:
sender = os.getenv("EMAIL_SENDER", "").strip()
sender_password = os.getenv("EMAIL_PASSWORD", "").strip()
if not sender or not sender_password:
return False, "EMAIL_SENDER or EMAIL_PASSWORD is missing."
if not recipients:
return False, "At least one recipient is required."
invalid = [item for item in recipients if not EMAIL_PATTERN.match(item)]
if invalid:
return False, f"Invalid recipient(s): {', '.join(invalid)}"
report_path, refresh_error = run_full_refresh_and_generate_report()
if not report_path:
return False, refresh_error or "No report generated."
email_sender = EmailSender(sender, sender_password)
success = email_sender.send_report(
recipient_emails=recipients,
report_path=str(report_path),
subject=subject,
)
if not success:
return False, "SMTP send failed."
return True, report_path.name
def _run_weekly_schedule_once() -> None:
schedule = get_weekly_schedule_config()
if not schedule.get("enabled"):
return
recipients = schedule.get("recipients", [])
if not recipients:
return
now = datetime.now(get_app_timezone())
if now.weekday() != int(schedule.get("weekday", 0)):
return
schedule_time = str(schedule.get("time", "09:00"))
try:
hour_str, minute_str = schedule_time.split(":", 1)
schedule_hour = int(hour_str)
schedule_minute = int(minute_str)
except Exception:
return
if now.hour != schedule_hour or now.minute != schedule_minute:
return
week_key = _current_week_key(now)
if schedule.get("last_run_week") == week_key:
return
ok, message = send_report_with_refresh(
recipients=recipients,
subject=schedule.get("subject") or None,
)
if ok:
schedule["last_run_week"] = week_key
save_weekly_schedule_config(schedule)
print(
f"[scheduler] Weekly send completed at {now.strftime('%Y-%m-%d %H:%M:%S')} "
f"for {len(recipients)} recipients, report={message}"
)
else:
print(f"[scheduler] Weekly send failed at {now.strftime('%Y-%m-%d %H:%M:%S')}: {message}")
def _scheduler_loop() -> None:
print("[scheduler] Weekly scheduler loop started.")
while True:
try:
_run_weekly_schedule_once()
except Exception as exc:
print(f"[scheduler] Unexpected error: {exc}")
time.sleep(30)
def start_scheduler_thread() -> None:
global _SCHEDULER_STARTED
if _SCHEDULER_STARTED:
return
thread = threading.Thread(target=_scheduler_loop, daemon=True, name="weekly-scheduler")
thread.start()
_SCHEDULER_STARTED = True
def is_scheduler_started() -> bool:
"""Return whether the background scheduler loop has started."""
return _SCHEDULER_STARTED
def mask_email(email: str) -> str:
if not email or "@" not in email:
return "未配置"
username, domain = email.split("@", 1)
if len(username) <= 2:
masked = username[0] + "*"
else:
masked = username[:2] + "*" * (len(username) - 2)
return f"{masked}@{domain}"
@app.route("/", methods=["GET"])
def index():
sender = os.getenv("EMAIL_SENDER", "").strip()
sender_password = os.getenv("EMAIL_PASSWORD", "").strip()
managed_recipients = get_managed_recipients()
schedule_config = get_weekly_schedule_config()
return render_template(
"email_console.html",
sender_masked=mask_email(sender),
sender_ready=bool(sender and sender_password),
managed_recipients=managed_recipients,
schedule_config=schedule_config,
schedule_weekday_options=SCHEDULE_WEEKDAY_OPTIONS,
scheduler_started=is_scheduler_started(),
)
@app.route("/send", methods=["POST"])
def send_email():
subject = request.form.get("subject", "").strip() or None
selected_managed = request.form.getlist("managed_recipients")
recipients = _dedupe_emails(selected_managed)
if not recipients:
flash("Please select at least one recipient.", "error")
return redirect(url_for("index"))
success, message = send_report_with_refresh(recipients=recipients, subject=subject)
if success:
flash(f"Sent successfully to {len(recipients)} recipient(s), report: {message}", "success")
else:
flash(f"Send failed: {message}", "error")
return redirect(url_for("index"))
@app.route("/schedule/update", methods=["POST"])
def update_schedule():
enabled = request.form.get("schedule_enabled") == "on"
subject = request.form.get("schedule_subject", "").strip()
weekday_raw = request.form.get("schedule_weekday", "0").strip()
time_raw = request.form.get("schedule_time", "09:00").strip()
recipients = get_managed_recipients()
try:
weekday = int(weekday_raw)
except ValueError:
weekday = 0
if weekday < 0 or weekday > 6:
weekday = 0
if not re.match(r"^\d{2}:\d{2}$", time_raw):
flash("Schedule time format should be HH:MM.", "error")
return redirect(url_for("index"))
if enabled and not recipients:
flash("Please add at least one managed recipient before enabling weekly schedule.", "error")
return redirect(url_for("index"))
current = get_weekly_schedule_config()
schedule = {
"enabled": enabled,
"weekday": weekday,
"time": time_raw,
"subject": subject[:120],
"recipients": recipients,
"last_run_week": current.get("last_run_week", ""),
}
save_weekly_schedule_config(schedule)
flash("Weekly schedule saved.", "success")
return redirect(url_for("index"))
@app.route("/recipients/add", methods=["POST"])
def add_recipient():
raw_email = request.form.get("new_recipient", "").strip()
if not raw_email:
flash("Please input a recipient email.", "error")
return redirect(url_for("index"))
email = _normalize_email(raw_email)
if not EMAIL_PATTERN.match(email):
flash("Invalid email format.", "error")
return redirect(url_for("index"))
recipients = get_managed_recipients()
if email in recipients:
flash("Recipient already exists.", "error")
return redirect(url_for("index"))
recipients.append(email)
_save_managed_recipients(recipients)
flash(f"Recipient added: {email}", "success")
return redirect(url_for("index"))
@app.route("/recipients/delete", methods=["POST"])
def delete_recipient():
raw_email = request.form.get("email", "").strip()
email = _normalize_email(raw_email)
recipients = get_managed_recipients()
if email not in recipients:
flash("Recipient not found.", "error")
return redirect(url_for("index"))
updated = [item for item in recipients if item != email]
_save_managed_recipients(updated)
flash(f"Recipient deleted: {email}", "success")
return redirect(url_for("index"))
if __name__ == "__main__":
host = os.getenv("WEB_UI_HOST", "127.0.0.1")
port = int(os.getenv("WEB_UI_PORT", "7860"))
debug = os.getenv("WEB_UI_DEBUG", "0") == "1"
app.run(host=host, port=port, debug=debug)