VancouverSanguosha/脚本/sgs_card_image_update.py

import requests
import urllib
import urllib.request
import mimetypes
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import re
import json
import ast

custom_deck_string = ""
ContainedObjects = ""
custom_deck_count = 1
back_url = "https://git.iidx.ca/aquostics/VancouverSanguosha.git/info/lfs/objects/1ba1832950a60b45a7b5d98147a1cc07d75ecf53ced1b8489c5a74169cbfb65d/6LWb5a6B5qih5p2_6IOM6Z2iLnBuZw"
last_part = "', 'NumWidth': 1, 'NumHeight': 1, 'BackIsHidden': True, 'UniqueBack': False, 'Type': 0}"
start_part = "'"
custom_deck_str_array = []
info_array = []
name_array = []


def dumper(obj):
    try:
        return obj.toJSON()
    except:
        return obj.__dict__


def find_chinese(file):
    pattern = re.compile(r'[^\u4e00-\u9fa5]')
    chinese = re.sub(pattern, '', file)
    return chinese


def find_heroes_images(country):
    info = country.find('a')
    string = "https://git.iidx.ca/" + str(info.get('href'))
    country_name = str(info.get('title'))
    print('==================================== 开始抓取%s势力图链 ====================================\n' % str(country_name))
    url = requests.get(string)
    html = url.text
    soup = BeautifulSoup(html, 'html.parser')
    tags = soup.find_all('a')

    for tag in tags:
        text = tag.text
        href = tag.get('href')
        if text.endswith("png"):
            hero_url = requests.get("https://git.iidx.ca/" + href)
            heroes_soup = BeautifulSoup(hero_url.text, 'html.parser')
            heroes_images = heroes_soup.find_all('img')

            global custom_deck_string
            global custom_deck_count
            global back_url
            global last_part
            global start_part
            global info_array
            global name_array

            for i in heroes_images:
                img_src = i.get('src')
                if img_src.startswith('https://git.iidx.ca/aquostics/VancouverSanguosha.git/info/lfs/objects'):
                    info_array.append({"FaceURL": img_src, "BackURL": back_url, "NumWidth": 1, "NumHeight": 1,
                                       "BackIsHidden": True, "UniqueBack": False, "Type": 0})

            heroes_name = heroes_soup.find_all('span', attrs={'class': 'active section'})
            for i in heroes_name:
                name = i.get('title')
                name_array.append(str(country_name) + " " + find_chinese(name))

    print('==================================== %s势力图链抓取完毕 ====================================\n' % str(country_name))


def go_through_country():
    url = requests.get(
        'https://git.iidx.ca/aquostics/VancouverSanguosha/src/branch/master/PS%e6%96%87%e4%bb%b6')
    html = url.text
    soup = BeautifulSoup(html, 'html.parser')
    # countries = soup.find_all('td', attrs={'class': 'name four wide'})
    countries = soup.find_all('td', {"name four wide"})
    for country in countries:
        find_heroes_images(country)

    print('==================================== 全势力图链抓取完毕 ====================================\n')


go_through_country()

with open('武将牌堆.json', 'r', encoding="utf-8") as f:
    data = json.load(f)
    index = 1
    custom_decks = {}
    data['ObjectStates'][0]['DeckIDs'] = []
    for card in data['ObjectStates'][0]['ContainedObjects']:
        custom_deck = {str(index): info_array[index - 1]}
        card['CustomDeck'] = custom_deck
        card["CardID"] = index * 100
        card['Nickname'] = name_array[index - 1]
        card['HideWhenFaceDown'] = True
        custom_decks[str(index)] = info_array[index - 1]
        data['ObjectStates'][0]['DeckIDs'].append(index * 100)
        index += 1

    data['ObjectStates'][0]['CustomDeck'] = custom_decks

with open('武将牌堆.json', 'w', encoding="utf-8") as f:
    f.write(json.dumps(data, default=dumper, indent=2, ensure_ascii=False))

f.close()
Update, 2021-06-02 02:22:34 -07:00			`import requests`
			`import urllib`
			`import urllib.request`
			`import mimetypes`
			`from bs4 import BeautifulSoup`
			`from requests.adapters import HTTPAdapter`
			`from requests.packages.urllib3.util.retry import Retry`
			`import re`
			`import json`
			`import ast`

			`custom_deck_string = ""`
			`ContainedObjects = ""`
			`custom_deck_count = 1`
			`back_url = "https://git.iidx.ca/aquostics/VancouverSanguosha.git/info/lfs/objects/1ba1832950a60b45a7b5d98147a1cc07d75ecf53ced1b8489c5a74169cbfb65d/6LWb5a6B5qih5p2_6IOM6Z2iLnBuZw"`
			`last_part = "', 'NumWidth': 1, 'NumHeight': 1, 'BackIsHidden': True, 'UniqueBack': False, 'Type': 0}"`
			`start_part = "'"`
			`custom_deck_str_array = []`
			`info_array = []`
			`name_array = []`


			`def dumper(obj):`
			`try:`
			`return obj.toJSON()`
			`except:`
			`return obj.__dict__`


			`def find_chinese(file):`
			`pattern = re.compile(r'[^\u4e00-\u9fa5]')`
			`chinese = re.sub(pattern, '', file)`
			`return chinese`


			`def find_heroes_images(country):`
			`info = country.find('a')`
			`string = "https://git.iidx.ca/" + str(info.get('href'))`
			`country_name = str(info.get('title'))`
			`print('==================================== 开始抓取%s势力图链 ====================================\n' % str(country_name))`
			`url = requests.get(string)`
			`html = url.text`
			`soup = BeautifulSoup(html, 'html.parser')`
			`tags = soup.find_all('a')`

			`for tag in tags:`
			`text = tag.text`
			`href = tag.get('href')`
			`if text.endswith("png"):`
			`hero_url = requests.get("https://git.iidx.ca/" + href)`
			`heroes_soup = BeautifulSoup(hero_url.text, 'html.parser')`
			`heroes_images = heroes_soup.find_all('img')`

			`global custom_deck_string`
			`global custom_deck_count`
			`global back_url`
			`global last_part`
			`global start_part`
			`global info_array`
			`global name_array`

			`for i in heroes_images:`
			`img_src = i.get('src')`
			`if img_src.startswith('https://git.iidx.ca/aquostics/VancouverSanguosha.git/info/lfs/objects'):`
			`info_array.append({"FaceURL": img_src, "BackURL": back_url, "NumWidth": 1, "NumHeight": 1,`
			`"BackIsHidden": True, "UniqueBack": False, "Type": 0})`

			`heroes_name = heroes_soup.find_all('span', attrs={'class': 'active section'})`
			`for i in heroes_name:`
			`name = i.get('title')`
			`name_array.append(str(country_name) + " " + find_chinese(name))`

			`print('==================================== %s势力图链抓取完毕 ====================================\n' % str(country_name))`


			`def go_through_country():`
			`url = requests.get(`
			`'https://git.iidx.ca/aquostics/VancouverSanguosha/src/branch/master/PS%e6%96%87%e4%bb%b6')`
			`html = url.text`
			`soup = BeautifulSoup(html, 'html.parser')`
			`# countries = soup.find_all('td', attrs={'class': 'name four wide'})`
			`countries = soup.find_all('td', {"name four wide"})`
			`for country in countries:`
			`find_heroes_images(country)`

			`print('==================================== 全势力图链抓取完毕 ====================================\n')`


			`go_through_country()`

			`with open('武将牌堆.json', 'r', encoding="utf-8") as f:`
			`data = json.load(f)`
			`index = 1`
			`custom_decks = {}`
			`data['ObjectStates'][0]['DeckIDs'] = []`
			`for card in data['ObjectStates'][0]['ContainedObjects']:`
			`custom_deck = {str(index): info_array[index - 1]}`
			`card['CustomDeck'] = custom_deck`
			`card["CardID"] = index * 100`
			`card['Nickname'] = name_array[index - 1]`
			`card['HideWhenFaceDown'] = True`
			`custom_decks[str(index)] = info_array[index - 1]`
			`data['ObjectStates'][0]['DeckIDs'].append(index * 100)`
			`index += 1`

			`data['ObjectStates'][0]['CustomDeck'] = custom_decks`

			`with open('武将牌堆.json', 'w', encoding="utf-8") as f:`
			`f.write(json.dumps(data, default=dumper, indent=2, ensure_ascii=False))`

			`f.close()`