51 lines
1.2 KiB
Python
51 lines
1.2 KiB
Python
"""
|
|
微信公众号文章爬虫 - 高德地图公众号
|
|
通过微信公众平台接口获取文章数据
|
|
"""
|
|
import os
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
try:
|
|
from .wechat_mp_crawler_base import WeChatMPCrawlerBase
|
|
except ImportError:
|
|
from wechat_mp_crawler_base import WeChatMPCrawlerBase
|
|
|
|
|
|
class WeChatCrawler(WeChatMPCrawlerBase):
|
|
"""微信公众号文章爬虫(高德地图)"""
|
|
|
|
source_name = "高德地图公众号"
|
|
file_prefix = "wechat_articles"
|
|
start_message = "开始爬取微信公众号文章..."
|
|
backend_biz_query = "gaodeditu"
|
|
|
|
|
|
def main():
|
|
"""测试函数"""
|
|
load_dotenv()
|
|
|
|
cookie = os.getenv("WECHAT_MP_COOKIE", "").strip()
|
|
if not cookie:
|
|
print("错误: 请在.env文件中配置WECHAT_MP_COOKIE")
|
|
return
|
|
|
|
print("=" * 60)
|
|
print("微信公众号文章爬虫 - 高德地图")
|
|
print("=" * 60)
|
|
|
|
crawler = WeChatCrawler()
|
|
articles = crawler.crawl_and_save(
|
|
max_count=50,
|
|
keyword="交通",
|
|
save_to_rag=True,
|
|
)
|
|
|
|
print(f"\n{'=' * 60}")
|
|
print(f"爬取完成,共获取 {len(articles)} 篇文章")
|
|
print(f"{'=' * 60}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|