guba2vec/check_page.py

import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Referer': 'https://guba.eastmoney.com/',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive'
}

post_id = '1708066915'
url = f'https://guba.eastmoney.com/news,002624,{post_id}.html'

print(f'请求: {url}')
response = requests.get(url, headers=headers, timeout=15)
response.encoding = 'utf-8'
print(f'状态码: {response.status_code}')
print(f'页面长度: {len(response.text)}')

# 检查关键字符串
print('\n检查页面中的关键字符串:')
print(f'post_article: {"post_article" in response.text}')
print(f'comment_list: {"comment_list" in response.text}')
print(f'news_content: {"news_content" in response.text}')

# 保存页面
with open('current_page.html', 'w', encoding='utf-8') as f:
    f.write(response.text)
print('\n页面已保存到 current_page.html')

# 查看开头部分
print('\n页面开头:')
print(response.text[:500])