Δημοσιεύτηκε: 25 Ιούλ 2011, 10:00
από medigeek
Βρήκα ένα παλιό πρόγραμμα με perl mechanize που είχα φτιάξει. ( Τώρα δε θυμάμαι τόσο λεπτομερώς τη γλώσσα perl. :P )
Μπορεί να σε βοηθήσει όμως, κάνει login και εμφανίζει στατιστικά από την ιστοσελίδα του φόρουμ. :)

Το GetHTTP και το GetForumStats είναι ξεχωριστά "package" ("classes") στο ίδιο αρχείο. Έτσι ξεχώρισα το html parser από το κυρίως script.
Κώδικας: Επιλογή όλων
#!/usr/bin/perl
# Perl script to get latest users in a forum (phpbb3).
# Requires: WWW::Mechanize HTML::Parser
# Also: date
# sudo apt-get install libwww-mechanize-perl libhtml-parser-perl
#
# Copyright (c) 2009 Savvas Radevic <>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

use strict;
use warnings;
#use diagnostics;
use WWW::Mechanize;
use Term::ReadKey;

# Your forum's link
#my $forumroot = 'http://www.ubuntucy.org/forum/';
my $forumroot = 'http://forum.ubuntu-gr.org/';

# Login page
my $loginpage = 'ucp.php?mode=login';

# User login
# Set your username
my $username = 'medigeek';

# last day of last month
my $datecmd = 'date -d "1 month ago $(date +%Y-%m-01)" +%Y-%m-%d';
my $lastdaylastmonth = `$datecmd`;

# Members by date link
my $byjoindate = "memberlist.php?sk=c&sd=a&mode=searchuser&search_group_id=0&joined_select=gt&count_select=eq&joined=$lastdaylastmonth";

# By number of posts
my $byposts = "memberlist.php?mode=&sk=d&sd=d";



if ($username eq '') {
print "Type your username: ";
$username = <>;
chomp($username);
}
print "Type password for $username: ";
ReadMode(4);
my $password = ReadLine(0);
print "\n";
ReadMode(0);


my $m = WWW::Mechanize->new(
agent => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.6) Gecko/2009022716 Ubuntu/9.04 (jaunty) Firefox/3.0.6',
cookie_jar => {},
);

print "Getting ".$forumroot.$loginpage."\n";
$m->get($forumroot.$loginpage);
die "Could not get page\n" unless $m->success;

# Login
print "Logging in...\n";
$m->form_id('login');
$m->set_fields(
'username' => $username,
'password' => $password,
);
$m->add_header('Referer' => $forumroot.$loginpage);
$m->click_button(name => 'login');
die "Could not submit to login page\n" unless $m->success;

# Get member info
print "Getting users that have been registered at the forum a month ago ($lastdaylastmonth)\n";
$m->get($forumroot.$byjoindate);
die "Could not get page ".$forumroot.$byjoindate."\n" unless $m->success;
my $gethttp = GetHTTP->new;
$gethttp->parse($m->content);

# Get top posters
print "\nGetting top posters information\n";
$m->get($forumroot.$byposts);
die "Could not get page ".$forumroot.$byposts."\n" unless $m->success;
$gethttp->parse($m->content);

# Get statistics
print "\nGetting general statistics\n";
$m->get($forumroot);
die "Could not get page $forumroot\n" unless $m->success;
my $getstats = GetForumStats->new;
$getstats->parse($m->content);

print "\nAll done.\n";



package GetHTTP;
use base qw(HTML::Parser);
our ($tr_tag, $td_tag, $a_tag);
sub start {
my ($self, $tag, $attr, $attrlist, $origtext) = @_;
if ($tag =~ /^tr$/i and $attr->{class} and $attr->{class} =~ /^bg[12]/i) {
$tr_tag = 1;
}
elsif ($tag =~ /^td$/i and $tr_tag) {
$td_tag = 1;
}
elsif ($tag =~ /^a$/i and $td_tag and $tr_tag) {
$a_tag = 1;
}
}
sub text {
my ($self, $plaintext) = @_;
# If we're in the anchor tag
no warnings "uninitialized";
if ($tr_tag and $td_tag and $a_tag) {
print("Data: $plaintext\n");
}
}
sub end {
my ($self, $tag, $origtext) = @_;
if ($tag =~ /^tr$/i) { $tr_tag = 0; }
elsif ($tag =~ /^td$/i) { $td_tag = 0; }
elsif ($tag =~ /^a$/i) { $a_tag = 0; }
}

package GetForumStats;
use base qw(HTML::Parser);
our ($h3_tag, $h3_wanted_tag, $p_tag, $strong_tag);
our $num = 0;
sub start {
my ($self, $tag, $attr, $attrlist, $origtext) = @_;
if ($tag =~ /^h3$/i) {
$h3_tag = 1;
}
elsif ($tag =~ /^p$/i and $h3_wanted_tag) {
$p_tag = 1;
}
elsif ($tag =~ /^strong$/i and $p_tag and $h3_wanted_tag) {
$strong_tag = 1;
}
}
sub text {
my ($self, $plaintext) = @_;
my @arraydesc = ("Total posts", "Total topics", "Total members");
# If it's the <h3> tag we're looking for
if ($h3_tag and $plaintext =~ /^Statistics$/) { $h3_wanted_tag = 1; }
if ($strong_tag and $p_tag and $h3_wanted_tag) {
no warnings "uninitialized";
if ($num < 3) {
print $arraydesc[$num].": $plaintext\n";
$num++;
}
}
}
sub end {
my ($self, $tag, $origtext) = @_;
if ($tag =~ /^h3$/i) { $h3_tag = 0; }
elsif ($tag =~ /^p$/i) {
$p_tag = 0;
$h3_wanted_tag = 0; # Disable h3_wanted_tag (left </p>)
}
elsif ($tag =~ /^strong$/i) { $strong_tag = 0; }
}


Για bsd (freebsd) δεν ξέρω πολλά, αλλά μπορείς να βρεις ports από το www.freshports.org

Ενδιαφέροντα φαίνονται τα ακόλουθα:
p5-WWW-Mechanize
p5-WWW-Mechanize-CGI