newsreport_agent_for_traffic/crawler/wechat_crawler.py

"""
微信公众号文章爬虫 - 高德地图公众号
通过微信公众平台接口获取文章数据
"""
import os

from dotenv import load_dotenv

try:
    from .wechat_mp_crawler_base import WeChatMPCrawlerBase
except ImportError:
    from wechat_mp_crawler_base import WeChatMPCrawlerBase


class WeChatCrawler(WeChatMPCrawlerBase):
    """微信公众号文章爬虫（高德地图）"""

    source_name = "高德地图公众号"
    file_prefix = "wechat_articles"
    start_message = "开始爬取微信公众号文章..."
    backend_biz_query = "gaodeditu"


def main():
    """测试函数"""
    load_dotenv()

    cookie = os.getenv("WECHAT_MP_COOKIE", "").strip()
    if not cookie:
        print("错误: 请在.env文件中配置WECHAT_MP_COOKIE")
        return

    print("=" * 60)
    print("微信公众号文章爬虫 - 高德地图")
    print("=" * 60)

    crawler = WeChatCrawler()
    articles = crawler.crawl_and_save(
        max_count=50,
        keyword="交通",
        save_to_rag=True,
    )

    print(f"\n{'=' * 60}")
    print(f"爬取完成，共获取 {len(articles)} 篇文章")
    print(f"{'=' * 60}")


if __name__ == "__main__":
    main()