import requests import urllib import urllib.request import mimetypes from bs4 import BeautifulSoup from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry import re import json import ast custom_deck_string = "" ContainedObjects = "" custom_deck_count = 1 back_url = "https://git.iidx.ca/aquostics/VancouverSanguosha.git/info/lfs/objects/1ba1832950a60b45a7b5d98147a1cc07d75ecf53ced1b8489c5a74169cbfb65d/6LWb5a6B5qih5p2_6IOM6Z2iLnBuZw" last_part = "', 'NumWidth': 1, 'NumHeight': 1, 'BackIsHidden': True, 'UniqueBack': False, 'Type': 0}" start_part = "'" custom_deck_str_array = [] info_array = [] name_array = [] def dumper(obj): try: return obj.toJSON() except: return obj.__dict__ def find_chinese(file): pattern = re.compile(r'[^\u4e00-\u9fa5]') chinese = re.sub(pattern, '', file) return chinese def find_heroes_images(country): info = country.find('a') string = "https://git.iidx.ca/" + str(info.get('href')) country_name = str(info.get('title')) print('==================================== 开始抓取%s势力图链 ====================================\n' % str(country_name)) url = requests.get(string) html = url.text soup = BeautifulSoup(html, 'html.parser') tags = soup.find_all('a') for tag in tags: text = tag.text href = tag.get('href') if text.endswith("png"): hero_url = requests.get("https://git.iidx.ca/" + href) heroes_soup = BeautifulSoup(hero_url.text, 'html.parser') heroes_images = heroes_soup.find_all('img') global custom_deck_string global custom_deck_count global back_url global last_part global start_part global info_array global name_array for i in heroes_images: img_src = i.get('src') if img_src.startswith('https://git.iidx.ca/aquostics/VancouverSanguosha.git/info/lfs/objects'): info_array.append({"FaceURL": img_src, "BackURL": back_url, "NumWidth": 1, "NumHeight": 1, "BackIsHidden": True, "UniqueBack": False, "Type": 0}) heroes_name = heroes_soup.find_all('span', attrs={'class': 'active section'}) for i in heroes_name: name = i.get('title') name_array.append(str(country_name) + " " + find_chinese(name)) print('==================================== %s势力图链抓取完毕 ====================================\n' % str(country_name)) def go_through_country(): url = requests.get( 'https://git.iidx.ca/aquostics/VancouverSanguosha/src/branch/master/PS%e6%96%87%e4%bb%b6') html = url.text soup = BeautifulSoup(html, 'html.parser') # countries = soup.find_all('td', attrs={'class': 'name four wide'}) countries = soup.find_all('td', {"name four wide"}) for country in countries: find_heroes_images(country) print('==================================== 全势力图链抓取完毕 ====================================\n') go_through_country() with open('武将牌堆.json', 'r', encoding="utf-8") as f: data = json.load(f) index = 1 custom_decks = {} data['ObjectStates'][0]['DeckIDs'] = [] for card in data['ObjectStates'][0]['ContainedObjects']: custom_deck = {str(index): info_array[index - 1]} card['CustomDeck'] = custom_deck card["CardID"] = index * 100 card['Nickname'] = name_array[index - 1] card['HideWhenFaceDown'] = True custom_decks[str(index)] = info_array[index - 1] data['ObjectStates'][0]['DeckIDs'].append(index * 100) index += 1 data['ObjectStates'][0]['CustomDeck'] = custom_decks with open('武将牌堆.json', 'w', encoding="utf-8") as f: f.write(json.dumps(data, default=dumper, indent=2, ensure_ascii=False)) f.close()