简单的爬取页面数据,并生成词云和网络图
需要安卓 networkx wordcloud 包
代码如下


1 # @Author :whyCai
2 # @Time :2020/10/17 10:35
3 from time import sleep
4
5 import requests,json,jieba,wordcloud,networkx as nx,jieba.posseg as jp
6 from matplotlib import pyplot as plt
7
8 """
9 #爬取页面的数据
10 def getCommText():
11 '''
12 爬取页面的数据
13 :return:
14 '''
15 text = ''
16 url = 'https://xxxxxxx'
17 headers = {'content-type': 'application/json'}
18
19 for i in range(0,300):
20 data = {"pageIndex": i+1,"xxxx":1}
21 r = requests.post(url, data=json.dumps(data), headers=headers)
22 res = json.loads(r.text)
23 #获取接口的字段值
24 resContent = res['result']['items']
25 lenComm = len(resContent)
26 # 获取接口的字段值
27 for j in range(0,lenComm):
28 # text = text + resContent[j]['content']+' '
29 print(resContent[j]['content'])
30 sleep(0.2)
31 # print(text)
32 # return text
33 getCommText()
34 """
35
36 """
37 #生成词云
38
39 #读取数据
40 f = open('xxx.txt',encoding='utf-8')
41 text = f.read()
42 txtlist = jieba.lcut(text)
43 txtlist = " ".join(txtlist)
44 w = wordcloud.WordCloud(width=1000,height=700,background_color='white',font_path='msyh.ttc')
45 w.generate(txtlist)
46 #生成词云
47 w.to_file('output2-poem.png')
48 """
49
50
51 """
52 #生成网络图
53
54 #text 为 上面词云中的 text = f.read()
55 words = jp.lcut(text)
56 G = nx.MultiDiGraph()
57 # 添加节点
58 for word in words:
59 G.add_node(word.flag)
60 # 添加边
61 for i in range(len(words) - 1):
62 G.add_edge(words[i].flag, words[i+1].flag)
63 # 绘图
64 nx.draw(G, alpha=0.8, with_labels=True, node_color='lightgreen', font_size=36, node_size=999, width=2)
65 # 展示
66 plt.show()
67 """
View Code
    
    










