newsreport_agent_for_traffic/crawler/wechat_crawler.py

51 lines
1.2 KiB
Python
Raw Permalink Normal View History

2026-05-09 10:46:52 +08:00
"""
微信公众号文章爬虫 - 高德地图公众号
通过微信公众平台接口获取文章数据
"""
import os
from dotenv import load_dotenv
try:
from .wechat_mp_crawler_base import WeChatMPCrawlerBase
except ImportError:
from wechat_mp_crawler_base import WeChatMPCrawlerBase
class WeChatCrawler(WeChatMPCrawlerBase):
"""微信公众号文章爬虫(高德地图)"""
source_name = "高德地图公众号"
file_prefix = "wechat_articles"
start_message = "开始爬取微信公众号文章..."
backend_biz_query = "gaodeditu"
def main():
"""测试函数"""
load_dotenv()
cookie = os.getenv("WECHAT_MP_COOKIE", "").strip()
if not cookie:
print("错误: 请在.env文件中配置WECHAT_MP_COOKIE")
return
print("=" * 60)
print("微信公众号文章爬虫 - 高德地图")
print("=" * 60)
crawler = WeChatCrawler()
articles = crawler.crawl_and_save(
max_count=50,
keyword="交通",
save_to_rag=True,
)
print(f"\n{'=' * 60}")
print(f"爬取完成,共获取 {len(articles)} 篇文章")
print(f"{'=' * 60}")
if __name__ == "__main__":
main()