Upload perl script

From TheMorganReport
Jump to navigation Jump to search
#!perl -w
use strict;

# this script takes pairs of filename, description as parameters, and
# uploads them to the wiki.
#
# example:
# perl wiki-upload.pl image.jpg "first image" image2.jpg "second image"
#
# you need to create a file c:\local\etc\wiki.cfg
# containing 2 lines:
# user=USERNAME
# pass=PASSWORD
#

#####################################
#   some included support nodules
package WebServer;
use strict;
use warnings;
use HTTP::Request::Common qw(POST GET);
use LWP::UserAgent;
use HTTP::Cookies;

use List::Util qw(first);

sub new {
    my ($class, $baseurl)= @_;

    my $ua= LWP::UserAgent->new(agent=>'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20040501');
    $ua->cookie_jar(HTTP::Cookies->new(hide_cookie2=>1));
    $ua->env_proxy();

    return bless {
        ua=>$ua,
        baseurl=>$baseurl,
    }, $class;
}
sub clearcookies {
    my $self= shift;

    $self->{ua}->cookie_jar(HTTP::Cookies->new(hide_cookie2=>1));
}
# almost interface compatible with httpost
#   - optional hashref with parameters is merged with parameters.
#
# httpget("/some.cgi", key1=>123, key2=>455);
# httpget("/some.cgi", { urlkey1=>999 }, key1=>123, key2=>455);
#
sub httpget {
    my $self= shift;
    my $path= shift;

    my $query;
    if (@_) {
        $query= shift;
        if (ref $query ne "HASH") {
            unshift @_, $query;
            $query=undef;
        }
    }
    my %params= @_;

    my $uri= URI->new($self->{baseurl});
    $uri->path($path);
    $uri->query_form($query?%$query:(), %params);
    my $rq= GET $uri;

    # todo: get rid of 'TE' header, and 'Connection'-TE flag. and 'Cookie2' header
    $rq->header(
        'User-Agent'=> 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20040501',
        'Accept'=> ($path =~ /\.aspx|\.htm/ 
            ? 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'
            : $path =~ /\.css/
            ? 'text/css,*/*;q=0.1'
            : '*/*') ,
        'Accept-Language'=> 'en-us,en;q=0.5',
        #'Accept-Encoding'=> 'gzip,deflate',
        'Accept-Charset'=> 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    );
    #print "request:\n", $rq->as_string, "\n";
    #warn "network access disabled\n";
    #return;
    my $rp= $self->{ua}->request($rq) or die "httperror: $@\n";

    return $rp->content;
}

# can be called in several ways:
# httppost("/some.cgi", key1=>123, key2=>455);
#   -> just form values
# httppost("/some.cgi", { urlkey1=>999 }, key1=>123, key2=>455);
#   -> both url and form params
# httppost("/some.cgi", key1=>123, key2=>455, file1=>["filename"]);
#   -> form-data file upload
sub httppost {
    my $self= shift;
    my $path= shift;

    my $query;
    if (@_) {
        $query= shift;
        if (ref $query ne "HASH") {
            unshift @_, $query;
            $query=undef;
        }
    }
    my %params= @_;

    my $useformdata= grep { defined ref $_ && ref $_ eq "ARRAY" } values %params;

    my $uri= URI->new($self->{baseurl});
    $uri->path($path);
    $uri->query_form(%$query) if ($query);
    my $rq;
    if ( $useformdata ) {
        $rq = POST $uri, Content_Type=>"form-data", Content=>[ %params ];
    }
    else {
        $rq = POST $uri, [ %params ];
    }

    # -- for http uploads : 
    # ( Content_Type=>"form-data", Content=>[ %params ]);
    $rq->header(
        'User-Agent'=> 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20040501',
        'Accept'=> 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
        'Accept-Language'=> 'en-us,en;q=0.5',
        #'Accept-Encoding'=> 'gzip,deflate',
        'Accept-Charset'=> 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    );
    #print "request:\n", $rq->as_string, "\n";
    #warn "network access disabled\n";
    #return;
    my $rp= $self->{ua}->request($rq) or die "httperror: $@\n";

    #print $rp->status_line, "\n";
    #print $rp->headers->as_string();

    return $rp->content;
}

sub httprequest {
    my ($self, $method, @params)= @_;
    if (lc($method) eq "get") {
        return $self->httpget(@params);
    }
    elsif (lc($method) eq "post") {
        return $self->httppost(@params);
    }
    else {
        die "invalid http request method '$method'\n";
    }
}



package MediaWiki;
use strict;
use warnings;
use HTML::TreeBuilder;

sub new {
    my $class= shift;

    my $self= bless {
        server=> WebServer->new("http://morganreport.org"),
        url=> "/mediawiki/index.php",
    }, $class;

    return $self;
}
sub post {
    my ($self, @params)= @_;
    $self->{server}->httppost($self->{url}, @params);
}
sub get {
    my ($self, @params)= @_;
    if (!$self->{loggedin}) {
        $self->login();
    }
    $self->{server}->httpget($self->{url}, @params);
}
sub cachedget {
    my ($self, @params)= @_;

    my $filename= encodeurlasfile(@params);

    if (-e $filename) {
        return readfile($filename);
    }

    my $data= $self->get(@params);
    savefile($filename, $data);
    return $data;
}
sub DESTROY {
    my $self= shift;
}
########################################################################


sub getwikisource {
    my ($self, $page)= @_;
    my $xml= $self->post(
        title=>'Special:Export', 
        action=>'submit',
        pages=>$page,
        curonly=>'true',
    );
    if ($xml =~ /<text[^>]*>(.*?)<\/text>/s) {
        return $1;
    }
    die "could not find <text> xml tag in\n$xml\n";
}
sub getcategoryitems {
    my ($self, $page)= @_;
    my $html= $self->get(
        title=>$page,
    );
    my $tree = HTML::TreeBuilder->new();
    $tree->parse($html);
    $tree->eof();

    my ($table)= $tree->look_down(
        "_tag"=>"table",
    ) or die "could not find category table for $page\n";

    my @items;
    for $a ($table->look_down("_tag", "a")) {
        push @items, $a->as_text();
    }
    return @items;
}
sub uploadfile {
    my ($self, $imgname, $imgdesc)= @_;
    $imgname =~ s/\\/\//g;
    ( my $imgdestname= $imgname ) =~ s/.*\///;

    my $answer= $self->post(
        { title=>'Special:Upload', },
        wpUploadFile=>[$imgname],
        wpDestFile=>$imgdestname,
        wpUploadDescription=>$imgdesc,
        wpUpload=>"Upload file",
    );
    
    print $answer->content;
}
sub logout {
    my $self= shift;
    my $answer= $self->get(title=>'Special:Userlogout');
    $self->{loggedin}= 0;
    $self->{server}->clearcookies();
}
sub login {
    my ($self, $username, $password)= @_;

    my $answer= $self->post(
        { action=>'submitlogin', title=>'Special:Userlogin' },
        wpName => $username,
        wpPassword => $password,
        wpLoginattempt => 'Log in',
    );
    $self->{loggedin}= 1;
}

# title=>'Template:UpcomingTable'
# action=>'submit'

# text   wpSummary
# flag   wpMinoredit 1
# flag   wpWatchthis
# button wpSave      Save page
# button wpPreview   Show preview
# button wpDiff      Show changes
# hidden wpSection
# hidden wpEdittime  20050730124636
# hidden wpEditToken cd44d6f6003e41d1d44b9a79266a846f
# text   wpTextbox1 

sub geteditform {
    my ($self, $page, $section)= @_;
    my $answer= $self->get(
        action=>'edit', 
        title=>$page,
        defined $section ? ( section=>$section ) : (),
    );
    my $tree = HTML::TreeBuilder->new();
    $tree->parse($answer);
    $tree->eof();

    my ($formtag)= $tree->look_down(
        "_tag"=>"form",
        "name"=>"editform",
    );
    my @inputelements= $formtag->look_down(
        "_tag"=>"input",
        sub { $_[0]->attr('type') ne 'submit' && $_[0]->attr('type') ne 'radio' }
    );
    my @textelements= $formtag->look_down(
        "_tag"=>"textarea",
    );


    my %form;
    # not handling radio buttons yet.
    for my $field (@inputelements) {
        $form{$field->attr('name')}= $field->attr('value')
    }
    for my $field (@textelements) {
        $form{$field->attr('name')}= $field->as_text;
    }
    return \%form;
}
sub saveeditform {
    my ($self, $page, $form)= @_;
    my $answer= $self->post(
        { action=>'submit', title=>$page, },
        wpSave=>"Save page",
        %$form,
    );
}

sub createpage {
    my ($self, $page, $content)= @_;

    my $f= $self->geteditform($page);
    if ($f->{wpTextbox1}) {
        print "----$page\n$f->{wpTextbox1}\n\n";
    }
    $f->{wpTextbox1}= $content;
    print map { sprintf("%-20s= %s\n", $_, defined $f->{$_} ? "'$f->{$_}'":"<undef>") } keys %$f;
    $self->saveeditform($page, $f);
}

package main;

use strict;
use warnings;
use IO::File;
$|=1;
my $m= MediaWiki->new();

if (@ARGV%2) {
    die "expected an even nr of params\n";
}
my $config= readconfig();
$m->login($config->{user}, $config->{pass});

for (my $i=0 ; $i<@ARGV ; $i+=2) {
    if (!-f $ARGV[$i]) {
        die "file $ARGV[$i] not found\n";
    }
    $m->uploadfile($ARGV[$i], $ARGV[$i+1]);
}
sub readconfig {
    my %params;
    my $fh= IO::File->new("wiki.cfg", "r") or die "wiki.cfg: $!";
    while (<$fh>) {
        s/\s+$//;
        if (/(\w+)\s*=\s*(.*)/) {
            my ($k, $v)= ($1, $2);
            $params{$k}= $v;
        }
    }
    $fh->close();
    return \%params;
}