Szerkesztő:UTF46/src/utf46.pl
Megjelenés
< Szerkesztő:UTF46 | src
utf46.pl forráskódja[szerkesztés]
#!/usr/bin/perl -T
use utf8;
use strict;
use warnings;
our $VERSION='1.0';
# UTF46-bot
# developer: UTF48
# Version history
# Testing phase
# 1.0 release, fix devel
# Under developement:
# 0.11 lintlog++, origfix
# 2022-11-21
# 0.10 maxlag handling, wikitext chomping
# 2022-11-20
# 0.9 extended logging, difffix
# 2022-11-18
# 0.8: bot source upload
# 2022-11-16
# 0.7: bot summary, manual, half automated support
# 2022-11-15
# 0.6: edit timeouts
# 2022-11-11
# 0.5: task management (generator, editor, chekker)
# 2022-11-10
# 0.4: fixing assert
# 2022-11-09
# 0.3: edit page
# 2022-11-07
# 0.2: read page (wikitext)
# 0.1: try login
# 0.0: try query
# 2022-11-06
our $setup; #global config parameters
our $bot; #bot singleton
package setup;
use utf8;
use strict;
use warnings;
use lib './';
use private::setup;
# private/setup.pm contains secret config values including password.
# this file is not public
sub init {
my $def={ # defaults
pub=>{
wiki_host=>'hu.wikipedia.org',
# no support for different host yet
api_url=>'/w/api.php',
},
sec=>{
# defaults
username=>'UTF46',
password=>not('cleartext; use private/setup.pm'),
#hidden details in separate file
%{(eval{private::setup->init})},
},
};
die $@ if $@;
my $setup={
can_edit=>1,
wiki_host=>$def->{pub}->{wiki_host},
url=>'https://'.$def->{pub}->{wiki_host}.$def->{pub}->{api_url},
username=>($def->{sec}->{username}||die "username required"),
password=>($def->{sec}->{password}||die "password required"),
bot_status=>(
(
$main::VERSION =~ /^0/
)?(
'under_developement'
):(
$def->{sec}->{registered}
)?(
'registered_bot'
):(
'unregistered_bot'
)
),
unregistered_bot_lag=>60, # bot evaluation period, time between edits
registered_bot_lag=>10, # approved bot, expected time between edits
min_wait=>1, # minimum wait between edits.
nice_wait=>2, # wait between api calls (too high value may destroys tokens
interactive=>2, # minimum interactive time
};
die "Minimum wait time cannot be 0" if not $setup->{min_wait};
return $setup;
};
package page;
use utf8;
use strict;
use warnings;
sub new {
my $class=shift;
my $self={};
bless $self,$class;
return $self;
};
sub init {
my $self=shift;
my %params=@_;
$self->{orig}={%params};
$self->{loaded}=0;
return $self;
};
sub cached {
my $self=shift;
return 0;
# TODO check if the page content is already cached
};
sub load_from_cache {
my $self=shift;
die "unimplemented"; # TODO load page from cache
$self->{loaded}=1;
};
sub save_to_cache {
my $self=shift;
# TODO save page to cache
};
sub load_from_api {
my $self=shift;
my $page=shift;
$self->{page}=$page;
$self->{loaded}=1;
$page->{revisions}->[0]->{slots}->{main}->{content} =~ s/\r//sg;
};
sub edit {
my $self=shift;
my $editor=shift;
my $taskmode=shift;
my $rev=$self->{page}->{revisions}->[0];
my $text=$rev->{slots}->{main}->{content};
my $before=time;
$self->{new}=$editor->edit($self->{page}->{title},$text);
my $after=time;
my $ok;
if ($editor->automata or (($after-$before) >=$main::setup->{interactive})) {
#ok
$ok=1;
} else {
$ok=0;
};
$self->{new}->{baserevid}=$rev->{revid};
$self->{new}->{contentformat}=$rev->{slots}->{main}->{contentformat};
$self->{new}->{contentmodel}=$rev->{slots}->{main}->{contentmodel};
$self->{new}->{pageid}=$self->{page}->{pageid};
$self->{new}->{summary}="(bot v$VERSION task:$taskmode) ".$self->{new}->{summary};
if ($self->{new}->{contentformat} eq 'text/x-wiki') {
$self->{new}->{text} =~ s/\n+$//s;
};
};
sub get_new {
my $self=shift;
return $self->{new};
};
sub check {
my $self=shift;
my $checker=shift;
my $rev=$self->{page}->{revisions}->[0];
my $text=$rev->{slots}->{main}->{content};
my $before=time;
my $ok=$checker->check($self->{page}->{title},$text,$self->{new}->{text},$self->{new}->{summary});
my $after=time;
if ($checker->automata or (($after-$before) >=$main::setup->{interactive})) {
#ok
return $ok;
} else {
return 0; # if too fast, then fail, slow human check required
};
};
sub title {
my $self=shift;
return $self->{page}->{title};
};
sub orig {
my $self=shift;
return $self->{orig};
};
package task_common;
use utf8;
use strict;
use warnings;
sub new {
my $class=shift;
my $self={};
bless $self,$class;
$self->{taskmodules}=$self->init;
$self->build;
return $self;
};
sub init {
my $self=shift;
die "You must implement the init()";
};
sub automata {
my $self=shift;
return 1;
};
sub name {
my $self=shift;
die "You must implement the name()";
};
sub build {
my $self=shift;
my $tm=$self->{taskmodules};
my $enabled={map {($_=>1)} qw{generator editor checker logger}};
my $interactor={%{$enabled}};
delete $interactor->{logger};
my $interactive=0;
my $automated=0;
for my $module_key (grep {exists($enabled->{$_})} keys %{$tm}) {
my $module=$tm->{$module_key};
my $module_obj;
if (ref($module)) {
$module_obj=$module;
} else {
my $module_name;
if (-f $module_key.'/'.$module.'.pm') {
$module_name=$module_key.'::'.$module; # correct class name
} else {
die "$module_key/$module.pm does not exists";
};
eval("use $module_name");
$module_obj=$module_name->new;
};
$self->{module}->{$module_key}=$module_obj;
if ($interactor->{$module_key}) {
my $automata=1;
$automata=$module_obj->automata if ref($module_obj);
$automated||=$automata;
$interactive||=not $automata;
};
};
die "Fully automated bot not supported yet" if not $interactive;
$self->{botmode}=((
$interactive and $automated
)?(
"kézi félautomata"
):(
$interactive and not $automated
)?(
"kézi"
):(
$automated and not $interactive
)?(
"automata"
):(
not $automated and not $interactive
)?(
"hibás"
):(
"hibás"
));
};
sub get_botmode {
my $self=shift;
die "hibás működés" if $self->{botmode} eq 'hibás';
return $self->{botmode};
};
our $AUTOLOAD;
sub AUTOLOAD {
my $self=shift;
my $sub=$AUTOLOAD;
$sub=~ s/.*:://;
if (exists $self->{module}->{$sub}) {
return $self->{module}->{$sub}; # object
} else {
die "Cannot call method $sub";
};
};
sub DESTROY {};
package bot;
use utf8;
use strict;
use warnings;
#use setup;
use LWP::UserAgent;
use Data::Dumper;
use JSON;
# LWP layer
sub _log {
my $text=shift;
open FOUT,'>>:utf8','log.txt';
print FOUT $text;
close FOUT;
};
sub debug {
my $data=shift;
my $dp=Data::Dumper->new([$data]);
my $dumped= $dp->Dump;
_log($dumped);
print $dumped;
};
sub new {
my $class=shift;
my $self={
setup=>$main::setup,
logged_in=>0,
min_wait=>$main::setup->{min_wait}, # per edit
nice_wait=>$main::setup->{nice_wait}, # per api call
can_edit=>$main::setup->{can_edit},
wait_preferences=>{
0=>{
'under_developement'=>0,
'unregistered_bot'=>$main::setup->{unregistered_bot_lag},
'registered_bot'=>$main::setup->{registered_bot_lag},
}->{$main::setup->{bot_status}},
2=>2,
4=>{
'under_developement'=>$main::setup->{unregistered_bot_lag},
'unregistered_bot'=>$main::setup->{unregistered_bot_lag},
'registered_bot'=>$main::setup->{registered_bot_lag},
},
''=>0,
},
};
$self->{ua}=LWP::UserAgent->new(
agent=>'UTF46-bot/'.$main::VERSION.' (https://hu.wikipedia.org/wiki/User:UTF48)',
cookie_jar=>{},
);
bless $self,$class;
return $self;
};
# API layer
sub lag {
my $self=shift;
my $lag_method=shift;
my $value=shift;
if ($lag_method eq 'ns') {
my $lag=$self->{wait_preferences}->{$value};
if ($lag) {
sleep $lag;
} else {
die "No editing enabled in '$value' namespace";
};
} elsif ($lag_method eq 'query') {
sleep $self->{min_wait};
} elsif ($lag_method eq 'nice') {
sleep $self->{nice_wait};
} else {
die "Unexpected lag method: '$lag_method'";
};
}; # end of lag
sub api {
my $self=shift;
my $method=shift; # GET POST etc
my $content=shift; # POST
my %params=@_; # GET
$self->lag('nice');;
my $url=$self->{setup}->{url};
my $add={
format=>'json',
maxlag=>1,
assert=>(($main::setup->{bot_status} eq 'registered_bot')?'bot':'user'),
};
$add->{assertuser}=$main::setup->{username} if not delete $params{noassertuser};
my $ua=$self->{ua};
my $result;
if ($method eq 'get') {
%params=(%{$add},%params);
$url.='?'.join('&',map { ($_.'='.$params{$_}) } keys %params);
debug($url);
$result=$ua->get($url);
} elsif ($method eq 'post') {
my %merged=(
%{$add},
%{$content},
);
# debug({%merged});
$result=$ua->post($url,{%merged});
} else {
die "Bad http method: '$method'";
};
if ($result->is_success) {
$result=JSON->new->utf8->decode($result->content);
} else {
debug($result);
die "FAILED check log: ",$result->status_line;
};
if ($result->{error} and ($result->{error}->{code} eq 'maxlag')) {
print "\n=== L A G ===\n\n";
unshift @_,$content;
unshift @_,$method;
unshift @_,$self;
$self->{nice_wait}++;
goto &api;
} else {
$self->{nice_wait}-- if ($self->{nice_wait} > 2 * $main::setup->{nice_wait}) and not(int(rand(100)));
};
return $result;
}; # end of api
sub api_get {
my $self=shift;
$self->api('get',{},@_);
};
sub api_post {
my $self=shift;
my $content=shift;
$self->api('post',$content,@_);
};
# Above API layer; action layer, etc
sub query {
my $self=shift;
my $result=$self->api_get(action=>'query',@_);
if (exists($result->{batchcomplete})) {
# NOP
} else {
debug($result);
die "Batch not completed";
};
return $result;
}; # end of qyery
sub login {
my $self=shift;
#--------------------------
my $ami_r=$self->query(
meta=>'authmanagerinfo',
amirequestsfor=>'login',
assert=>'anon',
noassertuser=>-1,
);
my $rqs=$ami_r->{query}->{authmanagerinfo}->{requests};
my $ami_f={};
if (ref($rqs) eq 'ARRAY') {
for my $rq (@{$rqs}) {
if ($rq->{required} eq 'primary-required') {
$ami_f->{id}=$rq->{id};
for my $key (keys %{$rq->{fields}}) {
$ami_f->{$key}=1;
};
};
};
} else {
debug($ami_r);
die "How to login? Help me!";
};
#----------------------------
my $token_r=$self->query(
meta=>'tokens',
type=>'login',
assert=>'anon',
noassertuser=>-1,
);
#debug([$ami_r,$token_r,$ami_f]);
# checking...
if (
$token_r->{query}->{tokens}->{logintoken} and
$ami_f->{id} and
$ami_f->{username} and
$ami_f->{password} and
exists($ami_r->{query}->{authmanagerinfo}->{canauthenticatenow})
) {
#NOP
} else {
debug([$token_r,$ami_r,$ami_f]);
die "Cannot login, no login token";
};
# print $result;
#--------------------------
my $login_r=$self->api_post(
{
logintoken=>$token_r->{query}->{tokens}->{logintoken},
action=>'clientlogin',
username=>$main::setup->{username},
password=>$main::setup->{password},
loginreturnurl=>'https://'.$main::setup->{wiki_host}.'/wiki/User:'.$main::setup->{username},
assert=>'anon',
},
noassertuser=>-1,
);
if (
($login_r->{clientlogin}->{status} eq 'PASS') and
($login_r->{clientlogin}->{username} eq $main::setup->{username})
) {
$self->{logged_in}=1;
} else {
debug($login_r);
die "Login failed or other thing happened";
};
my $success=$bot->query(
meta=>'userinfo',
uiprop=>'*',
);
if ($success->{query}->{userinfo}->{name} eq $main::setup->{username}) {
# debug($success);
} else {
debug($success);
die "Finally login failed. Can't beleive";
};
}; # end of login
sub get_page {
my $self=shift;
my $todo=shift;
my $title;
if (ref($todo) eq 'HASH') {
$title=$todo->{title};
} else {
$title=$todo;
};
# my $revision=$page_r->{query}->{pages}->[0]->{revisions}->[0];
# my $page_text=$revision->{slots}->{main}->{content};
# print $page_text;
# die "EDDIG";
my $page_o=page->new->init(title=>$title,todo=>$todo);
if ($page_o->cached) {
$page_o->load_from_cache;
} else {
my $page_r=$self->query(
prop=>'revisions',
titles=>$title,
formatversion=>2,
rvprop=>join('|',qw[ ids timestamp flags comment user content ]),
rvslots=>'*',
);
# debug($page_r);
$page_o->load_from_api($page_r->{query}->{pages}->[0]);
$page_o->save_to_cache;
};
return $page_o;
}; # end of get_page
sub lag_edit {
my $self=shift;
my $page=shift;
my $ns=$page->{page}->{ns};
my $lag=$self->{wait_preferences}->{$ns};
if ($self->{lastedit}) {
die "Editing ns: '$ns' not allowed" if not $lag;
my $now=time;
my $elapsed=$now-$self->{lastedit};
if ($elapsed<0) {
die "Time machine detected. This bug was fixed in the future";
} elsif ($elapsed>=0 and $elapsed <$lag ) {
sleep ($lag-$elapsed);
} elsif ($elapsed >=$lag ) {
# NOP
} else {
die "Trichotom error";
};
} else {
die "Editing ns: '$ns' not allowed" if not $lag;
};
};
sub edit_page {
my $self=shift;
my $page=shift;
#debug($page);
#die "DISAB";
$self->lag_edit($page);
my $new_page=$page->get_new;
my $token_r=$self->query(
meta=>'tokens',
type=>'csrf',
);
debug($token_r);
my $edit={
action=>'edit',
%{$new_page},
watchlist=>'nochange',
token=>$token_r->{query}->{tokens}->{csrftoken},
};
#debug($edit);
if ($self->{can_edit}) {
my $edit_r=$self->api_post($edit);
#debug($edit_r);
} else {
#NOP
};
$self->{elapsed}=time;
}; # end of edit_page
sub tasklog {
my $self=shift;
my $log=shift;
open FOUT, ">>:utf8","tasklog.txt";
print FOUT time.":".$log."\n";
close FOUT;
};
sub pagelog {
my $self=shift;
my $page=shift;
my $ok=shift||0;
$self->tasklog("Title $ok:".$page->title);
if ($self->{logger}) {
$self->{logger}->log($page,$ok);
};
};
sub load_task {
my $self=shift;
my $taskname=shift @ARGV; # add taskname as script parameter
if (not $taskname) {
local $ENV{PATH}='/bin:/usr/bin';
print qx{ls task/*.pm};
local $|=1;
print "Which task:";
$taskname=<STDIN>; # or enter it into terminal prompt
};
if ($taskname =~ /^(\w+)$/) {
$taskname=$1;
};
$taskname='task::'.$taskname;
# if ($taskname =~ /^((generator|editor|checker)::\w+)$/) {
# $taskname=$1;
# };
eval("use $taskname");
die $@ if $@;
$self->{task}=$taskname->new;
# $self->{taskname}=$self->{task}->name.' ('.$taskname.'.pm';
};
sub process_task {
my $self=shift;
my $task=$self->{task};
bot::debug($task);
my $generator=$task->generator;
my $editor=$task->editor;
my $checker=$task->checker;
$self->{logger}=$task->logger;
my $style=$task->style;
$self->{todolist}=$generator->todolist;
$self->tasklog('Task: '.$task->name);
if ($style eq 'one_by_one') {
for my $todo (@{$self->{todolist}}) {
my $page=$self->get_page($todo);
$page->edit($editor,$self->{task}->name.' ('.$self->{task}->get_botmode.')');
my $ok=$page->check($checker);
chomp($ok); # dont enatble empty terminal input az true
$ok =~ s/\s//sg; #neither enable any whitespace
if ($ok) {
$self->edit_page($page);
};
$self->pagelog($page,$ok);
};
} elsif ($style eq 'batch') {
my $pages=[];
for my $todo (@{$self->{todolist}}) {
push @{$pages},$self->get_page($todo);
};
for my $page (@{$pages}) {
$page->edit($editor,$self->{task}->name.' ('.$self->{task}->get_botmode.')');
};
my $okpages=[];
for my $page (@{$pages}) {
my $ok=$page->check($checker);
chomp($ok);
if ($ok) {
push @{$okpages},$page;
} else {
$self->pagelog($page,0);
};
};
for my $page (@{$okpages}) {
$self->edit_page($page);
$self->pagelog($page,1);
};
} else {
die "Unknown style: '$style'";
};
};
package main;
use utf8;
use strict;
use warnings;
#use bot;
sub main {
binmode(STDIN,':utf8');
binmode(STDOUT,':utf8');
binmode(STDERR,':utf8');
$setup=setup->init;
$bot=bot->new; # bot starts
$bot->load_task; # loading task to do.
$bot->login; # login to wikipedia via api calls
# $bot->log_start; # user page: task started
$bot->process_task; # execute the task
# $bot->log_end; # user page: task finished
#$bot->logout; # unimplemented
};
main;