guba
This commit is contained in:
@@ -0,0 +1,34 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Referer': 'https://guba.eastmoney.com/',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Connection': 'keep-alive'
|
||||||
|
}
|
||||||
|
|
||||||
|
post_id = '1708066915'
|
||||||
|
url = f'https://guba.eastmoney.com/news,002624,{post_id}.html'
|
||||||
|
|
||||||
|
print(f'请求: {url}')
|
||||||
|
response = requests.get(url, headers=headers, timeout=15)
|
||||||
|
response.encoding = 'utf-8'
|
||||||
|
print(f'状态码: {response.status_code}')
|
||||||
|
print(f'页面长度: {len(response.text)}')
|
||||||
|
|
||||||
|
# 检查关键字符串
|
||||||
|
print('\n检查页面中的关键字符串:')
|
||||||
|
print(f'post_article: {"post_article" in response.text}')
|
||||||
|
print(f'comment_list: {"comment_list" in response.text}')
|
||||||
|
print(f'news_content: {"news_content" in response.text}')
|
||||||
|
|
||||||
|
# 保存页面
|
||||||
|
with open('current_page.html', 'w', encoding='utf-8') as f:
|
||||||
|
f.write(response.text)
|
||||||
|
print('\n页面已保存到 current_page.html')
|
||||||
|
|
||||||
|
# 查看开头部分
|
||||||
|
print('\n页面开头:')
|
||||||
|
print(response.text[:500])
|
||||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,241 @@
|
|||||||
|
page,title,author,time,reply_count,click_count
|
||||||
|
1,无论是人才还是技术还是渠道,希望这次管理成的变动能够让这个曾经行业龙头走向正轨,雪球小鲁班,2026/5/14 12:07,24,71
|
||||||
|
1,【今日股市】指数午后低位震荡,资源股跌幅居前,雪球小鲁班,2026/5/14 10:30,31,62
|
||||||
|
1,该跌了吧,沉心静气扬帆起航,2026/5/14 11:45,18,79
|
||||||
|
1,感觉像是有组织的散户进场了,就是所谓的老鼠,回本困难户第N位,2026/5/13 22:21,35,89
|
||||||
|
1,终于涨了,昨天跌那么狠,裤衩子都没有了,花火飞鸟,2026/5/14 12:04,47,83
|
||||||
|
1,算不算放量暴跌?!,股友63F0o88663,2026/5/14 12:03,22,78
|
||||||
|
1,今天爆跌,明天一大堆利好又红红火火长阳,TCL一贯套路,心碎股票人,2026/5/14 12:02,25,81
|
||||||
|
1,没事 拿住 今天主力动用资金拉券商股 明天活埋券商再来拉你,雪球小鲁班,2026/5/14 10:37,32,64
|
||||||
|
1,大神解释一下,为什么大单疯狂出货,都出了一个亿了还是能这么涨,月茨星,2026/5/14 11:52,43,66
|
||||||
|
1,为什么还在卖,要停牌了,股友36E919X121,2026/5/14 11:17,24,63
|
||||||
|
1,涨一天跌一周,就这尿性。。。,股友7715N807H3,2026/5/14 11:24,20,76
|
||||||
|
1,今天应该有榜,瑜佳不佳,2026/5/14 11:56,4,17
|
||||||
|
1,大盘涨跌工具,厉害的小散,2026/5/14 11:56,12,78
|
||||||
|
1,眼光看长远的几十个点,不纠结每天的几个点,股友3Y063588A1,2026/5/14 11:52,20,66
|
||||||
|
1,8.15箱底满仓干满仓干,半导体与智能消费最优质龙头TCL,SEO神话,2026/5/14 11:51,44,3
|
||||||
|
1,中信还在加仓,要命哟,金炫宇1,2026/5/14 11:50,1,5
|
||||||
|
1,这压单这么低的价格你出货 出完你买啥呢?想不开,是但哥,2026/5/14 11:34,38,93
|
||||||
|
1,了,股友01zC725523,2026/5/14 10:40,43,76
|
||||||
|
1,温水煮青蛙,一路闷杀,股友38M080U658,2026/5/14 11:48,20,58
|
||||||
|
1,1,谷神布斯,2026/5/14 11:26,34,0
|
||||||
|
1,!,飞驰股生的牛马,2026/5/14 10:24,35,63
|
||||||
|
1,同时加快梳理现有业务板块,去弱留强,对环球易购业务进行项目制改革,九溪你要赢,2026/5/14 11:27,1,21
|
||||||
|
1,外盘木材大跌,纸浆机会来了!,哥伦比娅,2026/5/14 11:44,33,61
|
||||||
|
1,今天收盘10.05,从头再来。,花火飞鸟,2026/5/14 10:20,4,41
|
||||||
|
1,9409+60070中签,隔壁的兄弟立高食品已经94倍市盈率了,红红的嘴唇,2026/5/14 11:42,35,4
|
||||||
|
1,太便宜了,唯一一次翻倍的机会,就在面前,不要再犹豫了,坚韧的武桦16,2026/5/14 11:18,26,5
|
||||||
|
1,7728手一口吃掉,欣锐80等你来,2026/5/14 11:40,27,93
|
||||||
|
1,又到箱底,准备干了,股友995O308r73,2026/5/14 11:40,6,78
|
||||||
|
1,地天,盯紧五日线,2026/5/14 11:39,7,70
|
||||||
|
1,纽威的管理层会招报应的,你们这代不报,你们的下一代也跑不掉!!!!!!!!!!!,一一路長虹,2026/5/14 10:04,37,73
|
||||||
|
1,国务院日前发布生物产业发展规划,7500亿产业链呼之欲出,意味着生物医药概念股面,股友965al72890,2026/5/14 10:21,30,39
|
||||||
|
1,看来跨境通的要回暖了,一念上塔山,2026/5/14 11:36,3,38
|
||||||
|
1,跌到3个点再补,好些,股友01zC725523,2026/5/14 10:15,23,11
|
||||||
|
1,火箭弹都救不了中兵红箭嘛,魍魉灬,2026/5/14 11:35,25,85
|
||||||
|
1,槽,单刃剪钳,2026/5/14 1:34,17,20
|
||||||
|
1,大盘涨狗垃圾东西还在跌,欣锐80等你来,2026/5/14 11:31,33,15
|
||||||
|
1,还会升回去吗?,小韭菜误入高端局,2026/5/14 11:28,26,78
|
||||||
|
1,啊,狐狸叫的猫,2026/5/14 11:28,32,48
|
||||||
|
1,我一定是脑子抽了,昨天盈利9个点今早盘卖了,没多久又接了回来,梦想是在股市买房,2026/5/14 10:49,26,71
|
||||||
|
1,果断卖出,不玩了,这股没戏。今天大阴线 明天大跌,炒股2年半,2026/5/14 11:22,0,54
|
||||||
|
1,没大哥一堆压单,雨文和文,2026/5/14 11:21,24,80
|
||||||
|
1,哎,赚点狗粮猫粮,2026/5/14 11:21,1,26
|
||||||
|
1,调仓换股啦!太弱了,买入券商,VINN,2026/5/14 10:31,12,90
|
||||||
|
1,现在该股是死猪不怕开水烫,阿月姐,2026/5/14 11:20,26,24
|
||||||
|
1,这股算稳吧,感觉大部分都在绿,不绿就好,高大的金青槐4,2026/5/14 11:19,9,87
|
||||||
|
1,主力说 ,马上拉升10个板,快进,5.81就是不锈钢底,鸣潮牛比,2026/5/14 11:18,45,82
|
||||||
|
1,川王挂了,市场只有一条龙了,津荣你是金龙,孤独者AA,2026/5/14 11:18,23,84
|
||||||
|
1,狗狗币 比特 “炒币”和炒股一样都有大起大落,这首歌唱出了你的心声吗?,股友68c80F8191,2026/5/14 11:16,4,29
|
||||||
|
1,啊,国运长牛632,2026/5/14 11:17,7,29
|
||||||
|
1,河钢已经跌无可跌了,再也没有一丝下降的空间了,我可以回本,2026/5/14 11:16,9,52
|
||||||
|
1,说好的3元呢?砸下来啊,gou装,仓又加錯,2026/5/14 11:16,21,74
|
||||||
|
1,出,昔涟丶爱莉希雅,2026/5/14 11:15,16,92
|
||||||
|
1,从168开始人人中签,从168开始一路发。,住在异环的别墅里,2026/5/14 11:14,1,28
|
||||||
|
1,所以之前费劲巴力的涨上来是为了个什么,买廖就涨,2026/5/14 11:14,46,53
|
||||||
|
1,who这些鬼佬办事效率真是低,印度人民生命分秒必争阿,面朝阳光追梦,2026/5/14 11:13,22,24
|
||||||
|
1,这条横线画得漂亮,futuregs,2026/5/14 11:12,16,92
|
||||||
|
1,跌停给筹码 涨停给筹码 这样要死不活不会给你筹码的,股友0f56Mr,2026/5/14 11:11,0,39
|
||||||
|
1,一堆出货的不亏钱吗,关注acgn,2026/5/14 10:40,26,35
|
||||||
|
1,又是一个美好的周末,金元马,2026/5/14 11:09,35,92
|
||||||
|
1,你不得不佩服老邓,他去年就判断到现在的行情了,3季度多卖猪仔,今年二季度少出栏。,油手好闲,2026/5/13 16:31,47,83
|
||||||
|
1,收盘价13.14,和4月28日一样,B哥sama,2026/5/14 11:01,29,85
|
||||||
|
1,减,买股不宜慌,2026/5/14 11:03,31,89
|
||||||
|
1,趋势大时代 指数中位线选择方向,如林的岑缈,2026/5/14 10:41,20,17
|
||||||
|
1,要跌停,君月00后实盘,2026/5/14 10:46,7,90
|
||||||
|
1,尾盘跌停,股民222,2026/5/14 10:36,21,11
|
||||||
|
1,今天都有哪些吃了这碗大面的?来说说后面走势,Cialloo,2026/5/14 10:23,24,11
|
||||||
|
1,大家一起去骂股市老兵这个死庄托,大智若愚量在價先,2026/5/14 10:51,22,36
|
||||||
|
1,短线小赚一波赶紧走,下一波跌至18,东方嘉木,2026/5/14 10:50,28,2
|
||||||
|
1,跨境通ZAFUL拥有成功、成熟的体系和打法,其后续发展充满期待,心中有财,2026/5/14 10:49,4,86
|
||||||
|
1,各位好啊,我来抄底了,拉萨帮二当家,2026/5/14 10:31,32,97
|
||||||
|
1,5月7日前有九阳,也必将有后九阳!,做t糕手,2026/5/14 10:46,46,19
|
||||||
|
1,还能跌破6吗?感觉都快垫底了,梦绮紫,2026/5/14 10:48,31,88
|
||||||
|
1,跑了,和这股耗不起,涨两天一天能降回去,少赔点撤了,飞逸天,2026/5/14 10:47,38,91
|
||||||
|
1,雨后彩莲死庄托还敢唱多吗,脸被打的啪啪响吧,股友Q1713355G8,2026/5/14 10:40,3,66
|
||||||
|
1,天天高抛低吸,股友C08120l209,2026/5/14 10:46,37,89
|
||||||
|
1,圣龙股份,好是好,公司发展方向无疑是好!但是盘子太小,业绩越来越好就没有大机构要!千万不要大买,谷神布斯,2026/5/14 10:45,20,93
|
||||||
|
1,几点出公告停牌,花火飞鸟,2026/5/14 10:25,28,63
|
||||||
|
1,这是已经谈拢了,杀跌两天就该停牌了.不用等到21号股东大会了。,股友61057ac363,2026/5/14 10:42,9,97
|
||||||
|
1,昨天是谁说的今天会复制3.5号的行情?简直神准,股友639537p9w3,2026/5/14 9:20,17,69
|
||||||
|
1,济南高新,东大东大红,2026/5/14 10:39,25,89
|
||||||
|
2,无论是人才还是技术还是渠道,希望这次管理成的变动能够让这个曾经行业龙头走向正轨,雪球小鲁班,2026/5/14 12:07,24,71
|
||||||
|
2,【今日股市】指数午后低位震荡,资源股跌幅居前,雪球小鲁班,2026/5/14 10:30,31,62
|
||||||
|
2,该跌了吧,沉心静气扬帆起航,2026/5/14 11:45,18,79
|
||||||
|
2,感觉像是有组织的散户进场了,就是所谓的老鼠,回本困难户第N位,2026/5/13 22:21,35,89
|
||||||
|
2,终于涨了,昨天跌那么狠,裤衩子都没有了,花火飞鸟,2026/5/14 12:04,47,83
|
||||||
|
2,算不算放量暴跌?!,股友63F0o88663,2026/5/14 12:03,22,78
|
||||||
|
2,今天爆跌,明天一大堆利好又红红火火长阳,TCL一贯套路,心碎股票人,2026/5/14 12:02,25,81
|
||||||
|
2,没事 拿住 今天主力动用资金拉券商股 明天活埋券商再来拉你,雪球小鲁班,2026/5/14 10:37,32,64
|
||||||
|
2,大神解释一下,为什么大单疯狂出货,都出了一个亿了还是能这么涨,月茨星,2026/5/14 11:52,43,66
|
||||||
|
2,为什么还在卖,要停牌了,股友36E919X121,2026/5/14 11:17,24,63
|
||||||
|
2,涨一天跌一周,就这尿性。。。,股友7715N807H3,2026/5/14 11:24,20,76
|
||||||
|
2,今天应该有榜,瑜佳不佳,2026/5/14 11:56,4,17
|
||||||
|
2,大盘涨跌工具,厉害的小散,2026/5/14 11:56,12,78
|
||||||
|
2,眼光看长远的几十个点,不纠结每天的几个点,股友3Y063588A1,2026/5/14 11:52,20,66
|
||||||
|
2,8.15箱底满仓干满仓干,半导体与智能消费最优质龙头TCL,SEO神话,2026/5/14 11:51,44,3
|
||||||
|
2,中信还在加仓,要命哟,金炫宇1,2026/5/14 11:50,1,5
|
||||||
|
2,这压单这么低的价格你出货 出完你买啥呢?想不开,是但哥,2026/5/14 11:34,38,93
|
||||||
|
2,了,股友01zC725523,2026/5/14 10:40,43,76
|
||||||
|
2,温水煮青蛙,一路闷杀,股友38M080U658,2026/5/14 11:48,20,58
|
||||||
|
2,1,谷神布斯,2026/5/14 11:26,34,0
|
||||||
|
2,!,飞驰股生的牛马,2026/5/14 10:24,35,63
|
||||||
|
2,同时加快梳理现有业务板块,去弱留强,对环球易购业务进行项目制改革,九溪你要赢,2026/5/14 11:27,1,21
|
||||||
|
2,外盘木材大跌,纸浆机会来了!,哥伦比娅,2026/5/14 11:44,33,61
|
||||||
|
2,今天收盘10.05,从头再来。,花火飞鸟,2026/5/14 10:20,4,41
|
||||||
|
2,9409+60070中签,隔壁的兄弟立高食品已经94倍市盈率了,红红的嘴唇,2026/5/14 11:42,35,4
|
||||||
|
2,太便宜了,唯一一次翻倍的机会,就在面前,不要再犹豫了,坚韧的武桦16,2026/5/14 11:18,26,5
|
||||||
|
2,7728手一口吃掉,欣锐80等你来,2026/5/14 11:40,27,93
|
||||||
|
2,又到箱底,准备干了,股友995O308r73,2026/5/14 11:40,6,78
|
||||||
|
2,地天,盯紧五日线,2026/5/14 11:39,7,70
|
||||||
|
2,纽威的管理层会招报应的,你们这代不报,你们的下一代也跑不掉!!!!!!!!!!!,一一路長虹,2026/5/14 10:04,37,73
|
||||||
|
2,国务院日前发布生物产业发展规划,7500亿产业链呼之欲出,意味着生物医药概念股面,股友965al72890,2026/5/14 10:21,30,39
|
||||||
|
2,看来跨境通的要回暖了,一念上塔山,2026/5/14 11:36,3,38
|
||||||
|
2,跌到3个点再补,好些,股友01zC725523,2026/5/14 10:15,23,11
|
||||||
|
2,火箭弹都救不了中兵红箭嘛,魍魉灬,2026/5/14 11:35,25,85
|
||||||
|
2,槽,单刃剪钳,2026/5/14 1:34,17,20
|
||||||
|
2,大盘涨狗垃圾东西还在跌,欣锐80等你来,2026/5/14 11:31,33,15
|
||||||
|
2,还会升回去吗?,小韭菜误入高端局,2026/5/14 11:28,26,78
|
||||||
|
2,啊,狐狸叫的猫,2026/5/14 11:28,32,48
|
||||||
|
2,我一定是脑子抽了,昨天盈利9个点今早盘卖了,没多久又接了回来,梦想是在股市买房,2026/5/14 10:49,26,71
|
||||||
|
2,果断卖出,不玩了,这股没戏。今天大阴线 明天大跌,炒股2年半,2026/5/14 11:22,0,54
|
||||||
|
2,没大哥一堆压单,雨文和文,2026/5/14 11:21,24,80
|
||||||
|
2,哎,赚点狗粮猫粮,2026/5/14 11:21,1,26
|
||||||
|
2,调仓换股啦!太弱了,买入券商,VINN,2026/5/14 10:31,12,90
|
||||||
|
2,现在该股是死猪不怕开水烫,阿月姐,2026/5/14 11:20,26,24
|
||||||
|
2,这股算稳吧,感觉大部分都在绿,不绿就好,高大的金青槐4,2026/5/14 11:19,9,87
|
||||||
|
2,主力说 ,马上拉升10个板,快进,5.81就是不锈钢底,鸣潮牛比,2026/5/14 11:18,45,82
|
||||||
|
2,川王挂了,市场只有一条龙了,津荣你是金龙,孤独者AA,2026/5/14 11:18,23,84
|
||||||
|
2,狗狗币 比特 “炒币”和炒股一样都有大起大落,这首歌唱出了你的心声吗?,股友68c80F8191,2026/5/14 11:16,4,29
|
||||||
|
2,啊,国运长牛632,2026/5/14 11:17,7,29
|
||||||
|
2,河钢已经跌无可跌了,再也没有一丝下降的空间了,我可以回本,2026/5/14 11:16,9,52
|
||||||
|
2,说好的3元呢?砸下来啊,gou装,仓又加錯,2026/5/14 11:16,21,74
|
||||||
|
2,出,昔涟丶爱莉希雅,2026/5/14 11:15,16,92
|
||||||
|
2,从168开始人人中签,从168开始一路发。,住在异环的别墅里,2026/5/14 11:14,1,28
|
||||||
|
2,所以之前费劲巴力的涨上来是为了个什么,买廖就涨,2026/5/14 11:14,46,53
|
||||||
|
2,who这些鬼佬办事效率真是低,印度人民生命分秒必争阿,面朝阳光追梦,2026/5/14 11:13,22,24
|
||||||
|
2,这条横线画得漂亮,futuregs,2026/5/14 11:12,16,92
|
||||||
|
2,跌停给筹码 涨停给筹码 这样要死不活不会给你筹码的,股友0f56Mr,2026/5/14 11:11,0,39
|
||||||
|
2,一堆出货的不亏钱吗,关注acgn,2026/5/14 10:40,26,35
|
||||||
|
2,又是一个美好的周末,金元马,2026/5/14 11:09,35,92
|
||||||
|
2,你不得不佩服老邓,他去年就判断到现在的行情了,3季度多卖猪仔,今年二季度少出栏。,油手好闲,2026/5/13 16:31,47,83
|
||||||
|
2,收盘价13.14,和4月28日一样,B哥sama,2026/5/14 11:01,29,85
|
||||||
|
2,减,买股不宜慌,2026/5/14 11:03,31,89
|
||||||
|
2,趋势大时代 指数中位线选择方向,如林的岑缈,2026/5/14 10:41,20,17
|
||||||
|
2,要跌停,君月00后实盘,2026/5/14 10:46,7,90
|
||||||
|
2,尾盘跌停,股民222,2026/5/14 10:36,21,11
|
||||||
|
2,今天都有哪些吃了这碗大面的?来说说后面走势,Cialloo,2026/5/14 10:23,24,11
|
||||||
|
2,大家一起去骂股市老兵这个死庄托,大智若愚量在價先,2026/5/14 10:51,22,36
|
||||||
|
2,短线小赚一波赶紧走,下一波跌至18,东方嘉木,2026/5/14 10:50,28,2
|
||||||
|
2,跨境通ZAFUL拥有成功、成熟的体系和打法,其后续发展充满期待,心中有财,2026/5/14 10:49,4,86
|
||||||
|
2,各位好啊,我来抄底了,拉萨帮二当家,2026/5/14 10:31,32,97
|
||||||
|
2,5月7日前有九阳,也必将有后九阳!,做t糕手,2026/5/14 10:46,46,19
|
||||||
|
2,还能跌破6吗?感觉都快垫底了,梦绮紫,2026/5/14 10:48,31,88
|
||||||
|
2,跑了,和这股耗不起,涨两天一天能降回去,少赔点撤了,飞逸天,2026/5/14 10:47,38,91
|
||||||
|
2,雨后彩莲死庄托还敢唱多吗,脸被打的啪啪响吧,股友Q1713355G8,2026/5/14 10:40,3,66
|
||||||
|
2,天天高抛低吸,股友C08120l209,2026/5/14 10:46,37,89
|
||||||
|
2,圣龙股份,好是好,公司发展方向无疑是好!但是盘子太小,业绩越来越好就没有大机构要!千万不要大买,谷神布斯,2026/5/14 10:45,20,93
|
||||||
|
2,几点出公告停牌,花火飞鸟,2026/5/14 10:25,28,63
|
||||||
|
2,这是已经谈拢了,杀跌两天就该停牌了.不用等到21号股东大会了。,股友61057ac363,2026/5/14 10:42,9,97
|
||||||
|
2,昨天是谁说的今天会复制3.5号的行情?简直神准,股友639537p9w3,2026/5/14 9:20,17,69
|
||||||
|
2,济南高新,东大东大红,2026/5/14 10:39,25,89
|
||||||
|
3,无论是人才还是技术还是渠道,希望这次管理成的变动能够让这个曾经行业龙头走向正轨,雪球小鲁班,2026/5/14 12:07,24,71
|
||||||
|
3,【今日股市】指数午后低位震荡,资源股跌幅居前,雪球小鲁班,2026/5/14 10:30,31,62
|
||||||
|
3,该跌了吧,沉心静气扬帆起航,2026/5/14 11:45,18,79
|
||||||
|
3,感觉像是有组织的散户进场了,就是所谓的老鼠,回本困难户第N位,2026/5/13 22:21,35,89
|
||||||
|
3,终于涨了,昨天跌那么狠,裤衩子都没有了,花火飞鸟,2026/5/14 12:04,47,83
|
||||||
|
3,算不算放量暴跌?!,股友63F0o88663,2026/5/14 12:03,22,78
|
||||||
|
3,今天爆跌,明天一大堆利好又红红火火长阳,TCL一贯套路,心碎股票人,2026/5/14 12:02,25,81
|
||||||
|
3,没事 拿住 今天主力动用资金拉券商股 明天活埋券商再来拉你,雪球小鲁班,2026/5/14 10:37,32,64
|
||||||
|
3,大神解释一下,为什么大单疯狂出货,都出了一个亿了还是能这么涨,月茨星,2026/5/14 11:52,43,66
|
||||||
|
3,为什么还在卖,要停牌了,股友36E919X121,2026/5/14 11:17,24,63
|
||||||
|
3,涨一天跌一周,就这尿性。。。,股友7715N807H3,2026/5/14 11:24,20,76
|
||||||
|
3,今天应该有榜,瑜佳不佳,2026/5/14 11:56,4,17
|
||||||
|
3,大盘涨跌工具,厉害的小散,2026/5/14 11:56,12,78
|
||||||
|
3,眼光看长远的几十个点,不纠结每天的几个点,股友3Y063588A1,2026/5/14 11:52,20,66
|
||||||
|
3,8.15箱底满仓干满仓干,半导体与智能消费最优质龙头TCL,SEO神话,2026/5/14 11:51,44,3
|
||||||
|
3,中信还在加仓,要命哟,金炫宇1,2026/5/14 11:50,1,5
|
||||||
|
3,这压单这么低的价格你出货 出完你买啥呢?想不开,是但哥,2026/5/14 11:34,38,93
|
||||||
|
3,了,股友01zC725523,2026/5/14 10:40,43,76
|
||||||
|
3,温水煮青蛙,一路闷杀,股友38M080U658,2026/5/14 11:48,20,58
|
||||||
|
3,1,谷神布斯,2026/5/14 11:26,34,0
|
||||||
|
3,!,飞驰股生的牛马,2026/5/14 10:24,35,63
|
||||||
|
3,同时加快梳理现有业务板块,去弱留强,对环球易购业务进行项目制改革,九溪你要赢,2026/5/14 11:27,1,21
|
||||||
|
3,外盘木材大跌,纸浆机会来了!,哥伦比娅,2026/5/14 11:44,33,61
|
||||||
|
3,今天收盘10.05,从头再来。,花火飞鸟,2026/5/14 10:20,4,41
|
||||||
|
3,9409+60070中签,隔壁的兄弟立高食品已经94倍市盈率了,红红的嘴唇,2026/5/14 11:42,35,4
|
||||||
|
3,太便宜了,唯一一次翻倍的机会,就在面前,不要再犹豫了,坚韧的武桦16,2026/5/14 11:18,26,5
|
||||||
|
3,7728手一口吃掉,欣锐80等你来,2026/5/14 11:40,27,93
|
||||||
|
3,又到箱底,准备干了,股友995O308r73,2026/5/14 11:40,6,78
|
||||||
|
3,地天,盯紧五日线,2026/5/14 11:39,7,70
|
||||||
|
3,纽威的管理层会招报应的,你们这代不报,你们的下一代也跑不掉!!!!!!!!!!!,一一路長虹,2026/5/14 10:04,37,73
|
||||||
|
3,国务院日前发布生物产业发展规划,7500亿产业链呼之欲出,意味着生物医药概念股面,股友965al72890,2026/5/14 10:21,30,39
|
||||||
|
3,看来跨境通的要回暖了,一念上塔山,2026/5/14 11:36,3,38
|
||||||
|
3,跌到3个点再补,好些,股友01zC725523,2026/5/14 10:15,23,11
|
||||||
|
3,火箭弹都救不了中兵红箭嘛,魍魉灬,2026/5/14 11:35,25,85
|
||||||
|
3,槽,单刃剪钳,2026/5/14 1:34,17,20
|
||||||
|
3,大盘涨狗垃圾东西还在跌,欣锐80等你来,2026/5/14 11:31,33,15
|
||||||
|
3,还会升回去吗?,小韭菜误入高端局,2026/5/14 11:28,26,78
|
||||||
|
3,啊,狐狸叫的猫,2026/5/14 11:28,32,48
|
||||||
|
3,我一定是脑子抽了,昨天盈利9个点今早盘卖了,没多久又接了回来,梦想是在股市买房,2026/5/14 10:49,26,71
|
||||||
|
3,果断卖出,不玩了,这股没戏。今天大阴线 明天大跌,炒股2年半,2026/5/14 11:22,0,54
|
||||||
|
3,没大哥一堆压单,雨文和文,2026/5/14 11:21,24,80
|
||||||
|
3,哎,赚点狗粮猫粮,2026/5/14 11:21,1,26
|
||||||
|
3,调仓换股啦!太弱了,买入券商,VINN,2026/5/14 10:31,12,90
|
||||||
|
3,现在该股是死猪不怕开水烫,阿月姐,2026/5/14 11:20,26,24
|
||||||
|
3,这股算稳吧,感觉大部分都在绿,不绿就好,高大的金青槐4,2026/5/14 11:19,9,87
|
||||||
|
3,主力说 ,马上拉升10个板,快进,5.81就是不锈钢底,鸣潮牛比,2026/5/14 11:18,45,82
|
||||||
|
3,川王挂了,市场只有一条龙了,津荣你是金龙,孤独者AA,2026/5/14 11:18,23,84
|
||||||
|
3,狗狗币 比特 “炒币”和炒股一样都有大起大落,这首歌唱出了你的心声吗?,股友68c80F8191,2026/5/14 11:16,4,29
|
||||||
|
3,啊,国运长牛632,2026/5/14 11:17,7,29
|
||||||
|
3,河钢已经跌无可跌了,再也没有一丝下降的空间了,我可以回本,2026/5/14 11:16,9,52
|
||||||
|
3,说好的3元呢?砸下来啊,gou装,仓又加錯,2026/5/14 11:16,21,74
|
||||||
|
3,出,昔涟丶爱莉希雅,2026/5/14 11:15,16,92
|
||||||
|
3,从168开始人人中签,从168开始一路发。,住在异环的别墅里,2026/5/14 11:14,1,28
|
||||||
|
3,所以之前费劲巴力的涨上来是为了个什么,买廖就涨,2026/5/14 11:14,46,53
|
||||||
|
3,who这些鬼佬办事效率真是低,印度人民生命分秒必争阿,面朝阳光追梦,2026/5/14 11:13,22,24
|
||||||
|
3,这条横线画得漂亮,futuregs,2026/5/14 11:12,16,92
|
||||||
|
3,跌停给筹码 涨停给筹码 这样要死不活不会给你筹码的,股友0f56Mr,2026/5/14 11:11,0,39
|
||||||
|
3,一堆出货的不亏钱吗,关注acgn,2026/5/14 10:40,26,35
|
||||||
|
3,又是一个美好的周末,金元马,2026/5/14 11:09,35,92
|
||||||
|
3,你不得不佩服老邓,他去年就判断到现在的行情了,3季度多卖猪仔,今年二季度少出栏。,油手好闲,2026/5/13 16:31,47,83
|
||||||
|
3,收盘价13.14,和4月28日一样,B哥sama,2026/5/14 11:01,29,85
|
||||||
|
3,减,买股不宜慌,2026/5/14 11:03,31,89
|
||||||
|
3,趋势大时代 指数中位线选择方向,如林的岑缈,2026/5/14 10:41,20,17
|
||||||
|
3,要跌停,君月00后实盘,2026/5/14 10:46,7,90
|
||||||
|
3,尾盘跌停,股民222,2026/5/14 10:36,21,11
|
||||||
|
3,今天都有哪些吃了这碗大面的?来说说后面走势,Cialloo,2026/5/14 10:23,24,11
|
||||||
|
3,大家一起去骂股市老兵这个死庄托,大智若愚量在價先,2026/5/14 10:51,22,36
|
||||||
|
3,短线小赚一波赶紧走,下一波跌至18,东方嘉木,2026/5/14 10:50,28,2
|
||||||
|
3,跨境通ZAFUL拥有成功、成熟的体系和打法,其后续发展充满期待,心中有财,2026/5/14 10:49,4,86
|
||||||
|
3,各位好啊,我来抄底了,拉萨帮二当家,2026/5/14 10:31,32,97
|
||||||
|
3,5月7日前有九阳,也必将有后九阳!,做t糕手,2026/5/14 10:46,46,19
|
||||||
|
3,还能跌破6吗?感觉都快垫底了,梦绮紫,2026/5/14 10:48,31,88
|
||||||
|
3,跑了,和这股耗不起,涨两天一天能降回去,少赔点撤了,飞逸天,2026/5/14 10:47,38,91
|
||||||
|
3,雨后彩莲死庄托还敢唱多吗,脸被打的啪啪响吧,股友Q1713355G8,2026/5/14 10:40,3,66
|
||||||
|
3,天天高抛低吸,股友C08120l209,2026/5/14 10:46,37,89
|
||||||
|
3,圣龙股份,好是好,公司发展方向无疑是好!但是盘子太小,业绩越来越好就没有大机构要!千万不要大买,谷神布斯,2026/5/14 10:45,20,93
|
||||||
|
3,几点出公告停牌,花火飞鸟,2026/5/14 10:25,28,63
|
||||||
|
3,这是已经谈拢了,杀跌两天就该停牌了.不用等到21号股东大会了。,股友61057ac363,2026/5/14 10:42,9,97
|
||||||
|
3,昨天是谁说的今天会复制3.5号的行情?简直神准,股友639537p9w3,2026/5/14 9:20,17,69
|
||||||
|
3,济南高新,东大东大红,2026/5/14 10:39,25,89
|
||||||
|
+2609
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,12 @@
|
|||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36',
|
||||||
|
'Referer': 'https://guba.eastmoney.com/',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.
|
||||||
+343
@@ -0,0 +1,343 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36',
|
||||||
|
'Referer': 'https://guba.eastmoney.com/',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Cache-Control': 'max-age=0',
|
||||||
|
'Upgrade-Insecure-Requests': '1',
|
||||||
|
'Sec-Ch-Ua': '"Chromium";v="148", "Not;A=Brand";v="24", "Microsoft Edge";v="148"',
|
||||||
|
'Sec-Ch-Ua-Mobile': '?0',
|
||||||
|
'Sec-Ch-Ua-Platform': '"Windows"',
|
||||||
|
'Sec-Fetch-Dest': 'document',
|
||||||
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'Sec-Fetch-User': '?1',
|
||||||
|
'Cookie': 'qgqp_b_id=30059d8839ad5c045fa8856e38013e9c; st_nvi=XwpSfYXGjCxfCdbgapK5_cac4; nid18=0daec1df8064f04edd20b4e69250a8f5; nid18_create_time=1776263017375; gviem=UrMH_tSu1UpW8B_TKmytl803f; gviem_create_time=1776263017375; fullscreengg=1; fullscreengg2=1; st_si=63999118594852; wsc_checkuser_ok=1; st_asi=delete; st_pvi=26838250597806; st_sp=2026-04-15%2022%3A23%3A37; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=30; st_psi=20260520214901287-117001354293-0422265952',
|
||||||
|
}
|
||||||
|
|
||||||
|
comment_headers = {
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Origin': 'https://guba.eastmoney.com',
|
||||||
|
'Pragma': 'no-cache',
|
||||||
|
'Referer': 'https://guba.eastmoney.com/',
|
||||||
|
'Sec-Fetch-Dest': 'empty',
|
||||||
|
'Sec-Fetch-Mode': 'cors',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36',
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
'Cookie': 'qgqp_b_id=30059d8839ad5c045fa8856e38013e9c; st_nvi=XwpSfYXGjCxfCdbgapK5_cac4; nid18=0daec1df8064f04edd20b4e69250a8f5; nid18_create_time=1776263017375; gviem=UrMH_tSu1UpW8B_TKmytl803f; gviem_create_time=1776263017375; fullscreengg=1; fullscreengg2=1; st_si=63999118594852; wsc_checkuser_ok=1; st_asi=delete; st_pvi=26838250597806; st_sp=2026-04-15%2022%3A23%3A37; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=30; st_psi=20260520214901287-117001354293-0422265952',
|
||||||
|
}
|
||||||
|
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
DELAY_BETWEEN_REQUESTS = 2.0
|
||||||
|
DELAY_BETWEEN_PAGES = 5.0
|
||||||
|
OUTPUT_FILE = 'guba_data.json'
|
||||||
|
|
||||||
|
|
||||||
|
def fetch(url, headers, method='GET', data=None, timeout=15):
|
||||||
|
for attempt in range(MAX_RETRIES):
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, headers=headers, method=method, data=data)
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as response:
|
||||||
|
if response.status == 429:
|
||||||
|
print(f' 请求过于频繁,等待10秒后重试...')
|
||||||
|
time.sleep(10)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if response.status == 403:
|
||||||
|
print(f' 请求被拒绝,第{attempt+1}次重试...')
|
||||||
|
time.sleep(5)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if response.status != 200:
|
||||||
|
print(f' 请求失败,状态码: {response.status}')
|
||||||
|
return None
|
||||||
|
|
||||||
|
content = response.read().decode('utf-8', errors='ignore')
|
||||||
|
return content
|
||||||
|
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
print(f' 请求超时,第{attempt+1}次重试...')
|
||||||
|
time.sleep(5)
|
||||||
|
except Exception as e:
|
||||||
|
print(f' 请求异常: {str(e)}')
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_session():
|
||||||
|
print('正在初始化会话...')
|
||||||
|
fetch('https://guba.eastmoney.com/', headers)
|
||||||
|
time.sleep(2)
|
||||||
|
print('会话初始化完成')
|
||||||
|
|
||||||
|
|
||||||
|
def get_post_list(stock_code='002624', page=1):
|
||||||
|
if page == 1:
|
||||||
|
url = f'https://guba.eastmoney.com/list,{stock_code},f.html'
|
||||||
|
else:
|
||||||
|
url = f'https://guba.eastmoney.com/list,{stock_code},f{page}.html'
|
||||||
|
|
||||||
|
html = fetch(url, headers)
|
||||||
|
|
||||||
|
if not html:
|
||||||
|
return []
|
||||||
|
|
||||||
|
posts = []
|
||||||
|
pattern = r'var article_list=\s*({"re":.*?});'
|
||||||
|
match = re.search(pattern, html, re.DOTALL)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
data = json.loads(match.group(1))
|
||||||
|
for item in data.get('re', []):
|
||||||
|
post_id = item.get('post_id', '')
|
||||||
|
title = item.get('post_title', '').strip()
|
||||||
|
author = item.get('user_nickname', '').strip()
|
||||||
|
post_time = item.get('post_display_time', '')
|
||||||
|
comment_count = item.get('post_comment_count', 0)
|
||||||
|
click_count = item.get('post_click_count', 0)
|
||||||
|
forward_count = item.get('post_forward_count', 0)
|
||||||
|
like_count = item.get('post_like_count', 0)
|
||||||
|
|
||||||
|
if post_id and title:
|
||||||
|
posts.append({
|
||||||
|
'post_id': post_id,
|
||||||
|
'title': title,
|
||||||
|
'author': author,
|
||||||
|
'post_time': post_time,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'click_count': click_count,
|
||||||
|
'forward_count': forward_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'url': f'https://guba.eastmoney.com/news,{stock_code},{post_id}.html'
|
||||||
|
})
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return posts
|
||||||
|
|
||||||
|
|
||||||
|
def get_comments(stock_code, post_id, page=1, page_size=30):
|
||||||
|
url = f'https://guba.eastmoney.com/api/getData?code={stock_code}&path=reply/api/Reply/ArticleNewReplyList'
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
'param': f'postid={post_id}&sort=1&sorttype=1&p={page}&ps={page_size}',
|
||||||
|
'plat': 'Web',
|
||||||
|
'path': 'reply/api/Reply/ArticleNewReplyList',
|
||||||
|
'env': '2',
|
||||||
|
'origin': '',
|
||||||
|
'version': '2022',
|
||||||
|
'product': 'Guba'
|
||||||
|
}
|
||||||
|
|
||||||
|
data = urllib.parse.urlencode(payload).encode('utf-8')
|
||||||
|
response_text = fetch(url, comment_headers, method='POST', data=data)
|
||||||
|
|
||||||
|
if not response_text:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(response_text)
|
||||||
|
|
||||||
|
if 're' in data:
|
||||||
|
reply_list = data.get('re', [])
|
||||||
|
elif 'data' in data and 'reply_list' in data['data']:
|
||||||
|
reply_list = data['data'].get('reply_list', [])
|
||||||
|
else:
|
||||||
|
print(f' 未知的响应结构: {list(data.keys())}')
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not isinstance(reply_list, list) or len(reply_list) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
comments = []
|
||||||
|
for item in reply_list:
|
||||||
|
reply_user = item.get('reply_user', {})
|
||||||
|
comment = {
|
||||||
|
'reply_id': str(item.get('reply_id', '')),
|
||||||
|
'user_nickname': reply_user.get('user_nickname', '').strip(),
|
||||||
|
'reply_content': item.get('reply_text', '').strip(),
|
||||||
|
'reply_time': item.get('reply_time', ''),
|
||||||
|
'reply_like_count': item.get('reply_like_count', 0),
|
||||||
|
'reply_against_count': item.get('reply_against_count', 0),
|
||||||
|
}
|
||||||
|
if comment['reply_content']:
|
||||||
|
comments.append(comment)
|
||||||
|
|
||||||
|
return comments
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f' JSON解析失败: {str(e)}')
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_comments(stock_code, post_id, total_comments):
|
||||||
|
all_comments = []
|
||||||
|
page_size = 30
|
||||||
|
page = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
comments = get_comments(stock_code, post_id, page, page_size)
|
||||||
|
|
||||||
|
if not comments:
|
||||||
|
break
|
||||||
|
|
||||||
|
all_comments.extend(comments)
|
||||||
|
print(f' 第{page}页评论获取完成,累计{len(all_comments)}条')
|
||||||
|
|
||||||
|
if len(comments) < page_size:
|
||||||
|
break
|
||||||
|
|
||||||
|
page += 1
|
||||||
|
time.sleep(DELAY_BETWEEN_REQUESTS)
|
||||||
|
|
||||||
|
return all_comments
|
||||||
|
|
||||||
|
|
||||||
|
def process_post(stock_code, post):
|
||||||
|
post_id = post['post_id']
|
||||||
|
title = post['title']
|
||||||
|
print(f' 获取帖子: {title[:40]}... (评论:{post["comment_count"]})')
|
||||||
|
|
||||||
|
post_data = {
|
||||||
|
'post_id': post_id,
|
||||||
|
'title': title,
|
||||||
|
'author': post.get('author', ''),
|
||||||
|
'post_time': post.get('post_time', ''),
|
||||||
|
'url': post['url'],
|
||||||
|
'comment_count': post.get('comment_count', 0),
|
||||||
|
'click_count': post.get('click_count', 0),
|
||||||
|
'forward_count': post.get('forward_count', 0),
|
||||||
|
'like_count': post.get('like_count', 0),
|
||||||
|
'comments': []
|
||||||
|
}
|
||||||
|
|
||||||
|
if post['comment_count'] > 0:
|
||||||
|
print(f' 正在获取评论...')
|
||||||
|
comments = get_all_comments(stock_code, post_id, post['comment_count'])
|
||||||
|
post_data['comments'] = comments
|
||||||
|
print(f' 评论获取完成,共{len(comments)}条')
|
||||||
|
|
||||||
|
time.sleep(DELAY_BETWEEN_REQUESTS)
|
||||||
|
return post_data
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_guba(stock_code='002624', stock_name='完美世界', total_pages=3, min_comment_count=0):
|
||||||
|
all_posts = []
|
||||||
|
seen_post_ids = set()
|
||||||
|
|
||||||
|
print(f'开始爬取{stock_name}({stock_code})股吧前{total_pages}页帖子...')
|
||||||
|
print(f'爬取时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
|
||||||
|
if min_comment_count > 0:
|
||||||
|
print(f'筛选条件: 评论数 >= {min_comment_count}')
|
||||||
|
print('-' * 60)
|
||||||
|
|
||||||
|
initialize_session()
|
||||||
|
|
||||||
|
for page in range(1, total_pages + 1):
|
||||||
|
print(f'\n正在爬取第{page}/{total_pages}页...')
|
||||||
|
|
||||||
|
posts = get_post_list(stock_code, page)
|
||||||
|
|
||||||
|
if not posts:
|
||||||
|
print(f' 第{page}页未找到数据')
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f' 找到{len(posts)}个帖子')
|
||||||
|
|
||||||
|
filtered_posts = []
|
||||||
|
for post in posts:
|
||||||
|
post_id = post['post_id']
|
||||||
|
if post_id in seen_post_ids:
|
||||||
|
continue
|
||||||
|
seen_post_ids.add(post_id)
|
||||||
|
|
||||||
|
if min_comment_count > 0 and post['comment_count'] < min_comment_count:
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered_posts.append(post)
|
||||||
|
|
||||||
|
if not filtered_posts:
|
||||||
|
print(f' 第{page}页没有符合条件的帖子')
|
||||||
|
continue
|
||||||
|
|
||||||
|
for post in filtered_posts:
|
||||||
|
post_data = process_post(stock_code, post)
|
||||||
|
all_posts.append(post_data)
|
||||||
|
|
||||||
|
print(f' 第{page}页完成,已获取{len(all_posts)}个帖子')
|
||||||
|
|
||||||
|
if page < total_pages:
|
||||||
|
time.sleep(DELAY_BETWEEN_PAGES)
|
||||||
|
|
||||||
|
return all_posts
|
||||||
|
|
||||||
|
|
||||||
|
def save_to_json(data, filename):
|
||||||
|
output = {
|
||||||
|
'scrape_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
'total_posts': len(data),
|
||||||
|
'posts': data
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(filename, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(output, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
stock_code = '002624'
|
||||||
|
stock_name = '完美世界'
|
||||||
|
total_pages = 3
|
||||||
|
min_comment_count = 0
|
||||||
|
|
||||||
|
print(f'使用 Python: {__import__("sys").version}')
|
||||||
|
print(f'脚本路径: {__file__}')
|
||||||
|
print(f'工作目录: {__import__("os").getcwd()}')
|
||||||
|
|
||||||
|
start_time = datetime.now()
|
||||||
|
|
||||||
|
all_posts = scrape_guba(stock_code, stock_name, total_pages, min_comment_count)
|
||||||
|
|
||||||
|
end_time = datetime.now()
|
||||||
|
|
||||||
|
print('\n' + '=' * 60)
|
||||||
|
|
||||||
|
if all_posts:
|
||||||
|
output = save_to_json(all_posts, OUTPUT_FILE)
|
||||||
|
|
||||||
|
print(f'爬取完成!')
|
||||||
|
print(f' - 帖子数量: {output["total_posts"]}')
|
||||||
|
print(f' - 数据已保存到: {OUTPUT_FILE}')
|
||||||
|
print(f' - 耗时: {(end_time - start_time).total_seconds():.2f} 秒')
|
||||||
|
|
||||||
|
print('\n前3个帖子预览:')
|
||||||
|
for i, post in enumerate(all_posts[:3], 1):
|
||||||
|
print(f'\n--- 帖子{i} ---')
|
||||||
|
print(f'标题: {post["title"]}')
|
||||||
|
print(f'作者: {post["author"]}')
|
||||||
|
print(f'时间: {post["post_time"]}')
|
||||||
|
print(f'URL: {post["url"]}')
|
||||||
|
print(f'评论数: {post["comment_count"]}')
|
||||||
|
print(f'实际获取评论数: {len(post["comments"])}')
|
||||||
|
if post.get('comments'):
|
||||||
|
print(f'第一条评论: {post["comments"][0]["reply_content"][:30]}...')
|
||||||
|
else:
|
||||||
|
print('未获取到任何数据')
|
||||||
|
print(f'耗时: {(end_time - start_time).total_seconds():.2f} 秒')
|
||||||
+38
@@ -0,0 +1,38 @@
|
|||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
comment_headers = {
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Origin': 'https://guba.eastmoney.com',
|
||||||
|
'Pragma': 'no-cache',
|
||||||
|
'Referer': 'https://guba.eastmoney.com/news,002624,1711407668.html',
|
||||||
|
'Sec-Fetch-Dest': 'empty',
|
||||||
|
'Sec-Fetch-Mode': 'cors',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36',
|
||||||
|
}
|
||||||
|
|
||||||
|
async def test_comment_api():
|
||||||
|
url = 'https://guba.eastmoney.com/api/getData?code=002624&path=reply/api/Reply/ArticleNewReplyList'
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
'param': 'postid=1711407668&sort=1&sorttype=1&p=1&ps=30',
|
||||||
|
'plat': 'Web',
|
||||||
|
'path': 'reply/api/Reply/ArticleNewReplyList',
|
||||||
|
'env': '2',
|
||||||
|
'origin': '',
|
||||||
|
'version': '2022',
|
||||||
|
'product': 'Guba'
|
||||||
|
}
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(url, headers=comment_headers, data=payload) as response:
|
||||||
|
print(f'状态码: {response.status}')
|
||||||
|
text = await response.text()
|
||||||
|
print(f'响应内容:\n{text}')
|
||||||
|
|
||||||
|
asyncio.run(test_comment_api())
|
||||||
Reference in New Issue
Block a user