Szerkesztő:BinBot/huwiki/category.py
Megjelenés
"""Category-related stuff for Hungarian Wikipedia."""
#
# (C) Bináris, 2023
#
# Distributed under the terms of the MIT license.
import re
import pywikibot
class CatProperties(object):
"""A class to hold various category-related functions.
It is easier to export if they are in a class.
"""
def __init__(self, cat: pywikibot.Category) -> None:
self.cat = cat
def is_cityzencat(self) -> bool:
"""Is it like "Miskolciak" (inhabitants of)?
Currently searches for supercategory only on the first level.
May produce false negatives.
"""
pattern = re.compile(r'Személyek település szerint \(.*?\)')
for supercat in self.cat.categories():
if pattern.fullmatch(supercat.title(with_ns=False)):
return True
return False
def is_secondary_category(self) -> bool:
"""Is it a secondary category?
Secondary categories, such as birth, death, city, awards etc.
are useful, but not enough to say the article properly
categorized. Current list is connected to biographies.
Of course, this is not exact and not fast at all.
A page is not really categorized, if all of its categories are
among:
- hidden categories
- Élő személyek (living persons)
- ...- született személyek (births in year or city)
- ...- elhunyt személyek (deaths in year or city or disease)
- ... személyek (some persons, e.g. changed their names)
- ...-tagok (members of)
- ... díjasok, érmesek, koszorúsok (awarded with)
- ... kitüntetettjei, birtokosai, tulajdonosai (awarded with)
- ... család (member of a family)
- Nők (women) or "... nők" (some kind of women, with space)
(but 'Magyar írónők' is OK!)
- inhabitants of... (see is_cityzencat() above)
"""
endings = ' személyek|-tagok| tagjai| nők| magyarok| család' \
+ 'díjasok|érmesek|koszorúsok|díszpolgárai|' \
+ 'kitüntetettjei|birtokosai|tulajdonosai|' \
+ 'diákok|diákjai|tanárai|végzettjei|túlélői'
badend = re.compile(fr'({endings})$')
title = self.cat.title(with_ns=False)
if title == 'Nők':
return True
if ' származású ' in title:
return True
if badend.search(title):
return True
return False
def is_not_real_categorization(self) -> bool:
"""This category is not enough to properly categorize a page.
If an article has only these categories, needs attention.
"""
return self.cat.isHiddenCategory() \
or self.is_cityzencat() \
or self.is_secondary_category()