1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
   | import os from urllib.request import urlopen from bs4 import BeautifulSoup import urllib.request
 
  cates = [     ["城市信息", 167, 8],     ["自然科学", 1, 28],     ["社会科学", 76, 34],     ["工程应用", 96, 75],     ["农林渔畜", 127, 9],     ["医学医药", 132, 32],     ["电子游戏", 436, 100],     ["艺术设计", 154, 17],     ["生活百科", 389, 77],     ["运动休闲", 367, 16],     ["人文科学", 31, 81],     ["娱乐休闲", 403, 101] ]
  sets = ['/', '\\', ':', '*', '?', '"', '<', '>', '|']  
  for cate in cates:     count = 0     os.mkdir("./" + cate[0] + " 官方推荐")     for i in range(1, cate[2] + 1):         html = urlopen("https://pinyin.sogou.com/dict/cate/index/" + str(cate[1]) + "/default/" + str(i))         bsObj = BeautifulSoup(html.read(), "html.parser")         nameList = bsObj.findAll("div", {"class": "detail_title"})         urlList = bsObj.findAll("div", {"class": "dict_dl_btn"})         for name, url in zip(nameList, urlList):             count += 1             name = name.a.get_text()             if name.find("官方推荐") == -1:                   continue             else:                 for char in name:                     if char in sets:                         name = name.replace(char, "")                   urllib.request.urlretrieve(url.a.attrs['href'], "./" + cate[0] + " 官方推荐" + "/" + str(count) + name + ".scel")                                  print(cate[0], count, name)
 
  |