105 lines
3.9 KiB
Python
105 lines
3.9 KiB
Python
#!/usr/bin/env python3
|
||
|
||
from pathlib import Path
|
||
from util import DiskCache
|
||
import html
|
||
import json
|
||
import re
|
||
import urllib.request
|
||
import xml.etree.ElementTree as ET
|
||
|
||
INPUT_PATH = Path(__file__).parent.parent.joinpath("input.json").resolve()
|
||
OUTPUT_PATH = Path(__file__).parent.parent.joinpath("data.json").resolve()
|
||
CACHE_PATH = Path(__file__).parent.parent.joinpath("cache").resolve()
|
||
|
||
class BoardGame:
|
||
def __init__(self, id, name, year, player_range, time, rank, families, weight, description, mechanics, thumbnail_url, image_url):
|
||
self.id = id
|
||
self.name = name
|
||
self.year = year
|
||
self.player_range = player_range
|
||
self.time = time
|
||
self.rank = rank
|
||
self.families = families
|
||
self.weight = weight
|
||
self.description = description
|
||
self.mechanics = mechanics
|
||
self.thumbnail_url = thumbnail_url
|
||
self.image_url = image_url
|
||
|
||
@classmethod
|
||
def from_bgg_xml(cls, xml_doc):
|
||
families = [e.attrib['friendlyname'].split()[0] for e in xml_doc.findall('.//rank[@type="family"]')]
|
||
mechanics = [e.attrib['value'] for e in xml_doc.findall('.//link[@type="boardgamemechanic"]')]
|
||
return cls(
|
||
name=re.split(r'[–-]', xml_doc.find(".//name").attrib['value'])[0].strip(),
|
||
id=int(xml_doc.find(".//item").attrib['id']),
|
||
year=int(xml_doc.find('.//yearpublished').attrib['value']),
|
||
player_range=(
|
||
int(xml_doc.find('.//minplayers').attrib['value']),
|
||
int(xml_doc.find('.//maxplayers').attrib['value']),
|
||
),
|
||
time=int(xml_doc.find('.//playingtime').attrib['value']),
|
||
rank=int(xml_doc.find('.//rank[@type="subtype"]').attrib['value']),
|
||
families=families,
|
||
weight=float(xml_doc.find('.//averageweight').attrib['value']),
|
||
description=html.unescape(xml_doc.find('.//description').text.strip()),
|
||
mechanics=mechanics,
|
||
thumbnail_url=xml_doc.find('.//thumbnail').text.strip(),
|
||
image_url=xml_doc.find('.//image').text.strip()
|
||
)
|
||
|
||
def serialize(self):
|
||
return {
|
||
'id': self.id,
|
||
'title': self.name,
|
||
'year': self.year,
|
||
'player_range': self.player_range,
|
||
'time': self.time,
|
||
'rank': self.rank,
|
||
'families': self.families,
|
||
'description': self.description,
|
||
'mechanics': self.mechanics,
|
||
'weight': self.weight
|
||
}
|
||
|
||
def fetch_boardgames(data, cache):
|
||
boardgames = []
|
||
|
||
for item in data:
|
||
key = f"{item['title']}.xml"
|
||
if key not in cache:
|
||
url = f"https://api.geekdo.com/xmlapi2/thing?id={item['id']}&stats=1"
|
||
with urllib.request.urlopen(url) as response:
|
||
cache[key] = response.read()
|
||
boardgame = BoardGame.from_bgg_xml(ET.fromstring(cache[key]))
|
||
if 'families' in item:
|
||
boardgame.families = item['families']
|
||
boardgames.append(boardgame)
|
||
return boardgames
|
||
|
||
def cache_images(boardgames, cache):
|
||
for boardgame in boardgames:
|
||
key = f"{boardgame.id}_thumbnail.jpg"
|
||
if key not in cache:
|
||
with urllib.request.urlopen(boardgame.thumbnail_url) as response:
|
||
cache[key] = response.read()
|
||
key = f"{boardgame.id}.jpg"
|
||
if key not in cache:
|
||
with urllib.request.urlopen(boardgame.image_url) as response:
|
||
cache[key] = response.read()
|
||
|
||
if __name__ == "__main__":
|
||
CACHE_PATH.mkdir(parents=True, exist_ok=True)
|
||
|
||
cache = DiskCache(CACHE_PATH)
|
||
|
||
with open(INPUT_PATH) as infile:
|
||
data = json.load(infile)
|
||
|
||
boardgames = fetch_boardgames(data, cache)
|
||
cache_images(boardgames, cache)
|
||
|
||
with open(OUTPUT_PATH, 'w') as outfile:
|
||
json.dump([game.serialize() for game in boardgames], outfile)
|