Szerkesztő:BinBot/huwiki/wikidata.py
Megjelenés
Részletes leírás és példák a modul használatára
"""Wikidata-related stuff for Hungarian Wikipedia."""
#
# (C) Bináris, 2024
#
# Distributed under the terms of the MIT license.
from typing import Union
import pywikibot
from pywikibot.backports import Iterable
site = pywikibot.Site()
repo = site.data_repository()
# https://doc.wikimedia.org/pywikibot/master/api_ref/pywikibot.page.html#page.Claim
# https://www.wikidata.org/wiki/Help:Ranking
class ItemPlus(pywikibot.ItemPage):
"""A Wikidata page with enhanced methods.
May be instantiated with either itempage or 'Qxxxx'.
"""
def __init__(self, item: Union[pywikibot.ItemPage, str]) -> None:
"""Call it for an existing page only. Will not be checked."""
if isinstance(item, pywikibot.ItemPage):
title = item.title()
elif isinstance(item, str) \
and item.startswith('Q') and item[1:].isdigit():
title = item
else:
raise ValueError(
f'{item} is neither a Wikidata item page nor Q number.')
super(ItemPlus, self).__init__(repo, title)
self.data = self.get(get_redirect=True)
def _getstr(self,
strings: pywikibot.page._collections.LanguageDict) -> str:
return strings.get('hu', strings.get('en', ''))
@property
def label(self) -> str:
return self._getstr(self.data['labels'])
@property
def description(self) -> str:
return self._getstr(self.data['descriptions'])
@property
def has_magyar(self) -> bool:
return 'huwiki' in list(self.sitelinks)
@property
def hupage(self) -> Union[pywikibot.Category, pywikibot.Page]:
"""Return the huwiki page belonging to the item.
If the page represents a category, the returned type is
pywikibot.Category. Otherwise it is pywikibot.Page.
"""
if not 'huwiki' in list(self.sitelinks):
return None
title = self.getSitelink('huwiki')
if self.property_has_value('P31', 'Q4167836'):
return pywikibot.Category(site, title)
return pywikibot.Page(site, title)
@property
def is_magyar(self) -> bool:
"""Try to say if the person is likely to be Hungarian."""
# 1. Look for cityzenship in Wikidata
# 2. Examine label if Wikidata item
# Is P27 'Magyarország' or 'Magyar Királyság' in Wikidata?
hun = self.property_has_value('P27', 'Q28') \
or self.property_has_value('P27', 'Q171150')
if hun is not None: # P27 in WD item found
return hun
words = self.description.lower().split()
if 'magyar' in words or 'hungarian' in words:
return True
return False
def property_has_value(self,
p: str,
q: str,
rank=None # Not implemented, TODO
) -> Union[bool, None]:
"""Tell if the given p property has the given q value.
E.g. property_has_value('P31', 'Q5') == True if it is a person.
Code is partially derived from
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/888791/
(C) Author of copied part: Ayush Anand33
"""
if p in self.claims:
p_claims = self.claims[p]
for claim in p_claims:
try:
if claim.getTarget().getID() == q:
return True
# Hitler (Q352) has 6 citizenships, one of them w/o name
# Q81219 has place of burial/nyughely (P119) w/o value
except AttributeError:
pass
return False # Has the given P in WD, but not the given Q.
else:
return None
def properties(self) -> Iterable:
"""Yield the properties of the item with Hungarian or English name."""
for p in self.data['claims']:
prop = pywikibot.PropertyPage(repo, p)
propdata = prop.get()
labels = propdata['labels']
label = labels.get('hu', labels.get('en', ''))
yield p, label
def has_auth_ctrl(self) -> bool:
"""Tell if the item has any of IDs used in {{Nemzetközi katalógusok}}."""
# https://hu.wikipedia.org/wiki/Sablon:Nemzetk%C3%B6zi_katal%C3%B3gusok
plist = ['P213', 'P214', 'P227', 'P244', 'P254', 'P268', 'P269',
'P396', 'P434', 'P496', 'P549', 'P651', 'P691', 'P906',
'P950', 'P951', 'P1015', 'P1157', 'P2492', 'P3133', 'P3973',
'P6796', 'P10832',
]
return any([p in self.claims for p in plist])
def p_q_generator(p: str,
q: str,
has_hu: str = 'yes',
hu: bool = True,
total: int = None,
) -> Iterable:
"""Yield Wikidata items where p has value q.
If has_hu == 'yes', yield only those that have a sitelink in huwiki.
If has_hu == 'no', yield only those that don't have a sitelink in huwiki.
Otherwise yield all items.
If hu is True, yield directly the huwiki page rather than Wikidata page.
Will work only with has_hu = 'yes'.
"""
target_item = pywikibot.ItemPage(repo, q)
for page in target_item.backlinks(namespaces=0, total=total):
item = ItemPlus(page.title())
if not item.property_has_value(p, q):
continue
if has_hu == 'yes':
if item.has_magyar:
yield [item, item.hupage][hu]
continue
if has_hu == 'no':
if not item.has_magyar:
yield item
continue
yield item