105 lines
3.9 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
from pathlib import Path
from util import DiskCache
import html
import json
import re
import urllib.request
import xml.etree.ElementTree as ET
INPUT_PATH = Path(__file__).parent.parent.joinpath("input.json").resolve()
OUTPUT_PATH = Path(__file__).parent.parent.joinpath("data.json").resolve()
CACHE_PATH = Path(__file__).parent.parent.joinpath("cache").resolve()
class BoardGame:
def __init__(self, id, name, year, player_range, time, rank, families, weight, description, mechanics, thumbnail_url, image_url):
self.id = id
self.name = name
self.year = year
self.player_range = player_range
self.time = time
self.rank = rank
self.families = families
self.weight = weight
self.description = description
self.mechanics = mechanics
self.thumbnail_url = thumbnail_url
self.image_url = image_url
@classmethod
def from_bgg_xml(cls, xml_doc):
families = [e.attrib['friendlyname'].split()[0] for e in xml_doc.findall('.//rank[@type="family"]')]
mechanics = [e.attrib['value'] for e in xml_doc.findall('.//link[@type="boardgamemechanic"]')]
return cls(
name=re.split(r'[-]', xml_doc.find(".//name").attrib['value'])[0].strip(),
id=int(xml_doc.find(".//item").attrib['id']),
year=int(xml_doc.find('.//yearpublished').attrib['value']),
player_range=(
int(xml_doc.find('.//minplayers').attrib['value']),
int(xml_doc.find('.//maxplayers').attrib['value']),
),
time=int(xml_doc.find('.//playingtime').attrib['value']),
rank=int(xml_doc.find('.//rank[@type="subtype"]').attrib['value']),
families=families,
weight=float(xml_doc.find('.//averageweight').attrib['value']),
description=html.unescape(xml_doc.find('.//description').text.strip()),
mechanics=mechanics,
thumbnail_url=xml_doc.find('.//thumbnail').text.strip(),
image_url=xml_doc.find('.//image').text.strip()
)
def serialize(self):
return {
'id': self.id,
'title': self.name,
'year': self.year,
'player_range': self.player_range,
'time': self.time,
'rank': self.rank,
'families': self.families,
'description': self.description,
'mechanics': self.mechanics,
'weight': self.weight
}
def fetch_boardgames(data, cache):
boardgames = []
for item in data:
key = f"{item['title']}.xml"
if key not in cache:
url = f"https://api.geekdo.com/xmlapi2/thing?id={item['id']}&stats=1"
with urllib.request.urlopen(url) as response:
cache[key] = response.read()
boardgame = BoardGame.from_bgg_xml(ET.fromstring(cache[key]))
if 'families' in item:
boardgame.families = item['families']
boardgames.append(boardgame)
return boardgames
def cache_images(boardgames, cache):
for boardgame in boardgames:
key = f"{boardgame.id}_thumbnail.jpg"
if key not in cache:
with urllib.request.urlopen(boardgame.thumbnail_url) as response:
cache[key] = response.read()
key = f"{boardgame.id}.jpg"
if key not in cache:
with urllib.request.urlopen(boardgame.image_url) as response:
cache[key] = response.read()
if __name__ == "__main__":
CACHE_PATH.mkdir(parents=True, exist_ok=True)
cache = DiskCache(CACHE_PATH)
with open(INPUT_PATH) as infile:
data = json.load(infile)
boardgames = fetch_boardgames(data, cache)
cache_images(boardgames, cache)
with open(OUTPUT_PATH, 'w') as outfile:
json.dump([game.serialize() for game in boardgames], outfile)