Δημοσιεύτηκε: 25 Ιούλ 2011, 10:00
Βρήκα ένα παλιό πρόγραμμα με perl mechanize που είχα φτιάξει. ( Τώρα δε θυμάμαι τόσο λεπτομερώς τη γλώσσα perl.
)
Μπορεί να σε βοηθήσει όμως, κάνει login και εμφανίζει στατιστικά από την ιστοσελίδα του φόρουμ.
Το GetHTTP και το GetForumStats είναι ξεχωριστά "package" ("classes") στο ίδιο αρχείο. Έτσι ξεχώρισα το html parser από το κυρίως script.
Για bsd (freebsd) δεν ξέρω πολλά, αλλά μπορείς να βρεις ports από το www.freshports.org
Ενδιαφέροντα φαίνονται τα ακόλουθα:
p5-WWW-Mechanize
p5-WWW-Mechanize-CGI

Μπορεί να σε βοηθήσει όμως, κάνει login και εμφανίζει στατιστικά από την ιστοσελίδα του φόρουμ.

Το GetHTTP και το GetForumStats είναι ξεχωριστά "package" ("classes") στο ίδιο αρχείο. Έτσι ξεχώρισα το html parser από το κυρίως script.
- Κώδικας: Επιλογή όλων
#!/usr/bin/perl
# Perl script to get latest users in a forum (phpbb3).
# Requires: WWW::Mechanize HTML::Parser
# Also: date
# sudo apt-get install libwww-mechanize-perl libhtml-parser-perl
#
# Copyright (c) 2009 Savvas Radevic <>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
use strict;
use warnings;
#use diagnostics;
use WWW::Mechanize;
use Term::ReadKey;
# Your forum's link
#my $forumroot = 'http://www.ubuntucy.org/forum/';
my $forumroot = 'http://forum.ubuntu-gr.org/';
# Login page
my $loginpage = 'ucp.php?mode=login';
# User login
# Set your username
my $username = 'medigeek';
# last day of last month
my $datecmd = 'date -d "1 month ago $(date +%Y-%m-01)" +%Y-%m-%d';
my $lastdaylastmonth = `$datecmd`;
# Members by date link
my $byjoindate = "memberlist.php?sk=c&sd=a&mode=searchuser&search_group_id=0&joined_select=gt&count_select=eq&joined=$lastdaylastmonth";
# By number of posts
my $byposts = "memberlist.php?mode=&sk=d&sd=d";
if ($username eq '') {
print "Type your username: ";
$username = <>;
chomp($username);
}
print "Type password for $username: ";
ReadMode(4);
my $password = ReadLine(0);
print "\n";
ReadMode(0);
my $m = WWW::Mechanize->new(
agent => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.6) Gecko/2009022716 Ubuntu/9.04 (jaunty) Firefox/3.0.6',
cookie_jar => {},
);
print "Getting ".$forumroot.$loginpage."\n";
$m->get($forumroot.$loginpage);
die "Could not get page\n" unless $m->success;
# Login
print "Logging in...\n";
$m->form_id('login');
$m->set_fields(
'username' => $username,
'password' => $password,
);
$m->add_header('Referer' => $forumroot.$loginpage);
$m->click_button(name => 'login');
die "Could not submit to login page\n" unless $m->success;
# Get member info
print "Getting users that have been registered at the forum a month ago ($lastdaylastmonth)\n";
$m->get($forumroot.$byjoindate);
die "Could not get page ".$forumroot.$byjoindate."\n" unless $m->success;
my $gethttp = GetHTTP->new;
$gethttp->parse($m->content);
# Get top posters
print "\nGetting top posters information\n";
$m->get($forumroot.$byposts);
die "Could not get page ".$forumroot.$byposts."\n" unless $m->success;
$gethttp->parse($m->content);
# Get statistics
print "\nGetting general statistics\n";
$m->get($forumroot);
die "Could not get page $forumroot\n" unless $m->success;
my $getstats = GetForumStats->new;
$getstats->parse($m->content);
print "\nAll done.\n";
package GetHTTP;
use base qw(HTML::Parser);
our ($tr_tag, $td_tag, $a_tag);
sub start {
my ($self, $tag, $attr, $attrlist, $origtext) = @_;
if ($tag =~ /^tr$/i and $attr->{class} and $attr->{class} =~ /^bg[12]/i) {
$tr_tag = 1;
}
elsif ($tag =~ /^td$/i and $tr_tag) {
$td_tag = 1;
}
elsif ($tag =~ /^a$/i and $td_tag and $tr_tag) {
$a_tag = 1;
}
}
sub text {
my ($self, $plaintext) = @_;
# If we're in the anchor tag
no warnings "uninitialized";
if ($tr_tag and $td_tag and $a_tag) {
print("Data: $plaintext\n");
}
}
sub end {
my ($self, $tag, $origtext) = @_;
if ($tag =~ /^tr$/i) { $tr_tag = 0; }
elsif ($tag =~ /^td$/i) { $td_tag = 0; }
elsif ($tag =~ /^a$/i) { $a_tag = 0; }
}
package GetForumStats;
use base qw(HTML::Parser);
our ($h3_tag, $h3_wanted_tag, $p_tag, $strong_tag);
our $num = 0;
sub start {
my ($self, $tag, $attr, $attrlist, $origtext) = @_;
if ($tag =~ /^h3$/i) {
$h3_tag = 1;
}
elsif ($tag =~ /^p$/i and $h3_wanted_tag) {
$p_tag = 1;
}
elsif ($tag =~ /^strong$/i and $p_tag and $h3_wanted_tag) {
$strong_tag = 1;
}
}
sub text {
my ($self, $plaintext) = @_;
my @arraydesc = ("Total posts", "Total topics", "Total members");
# If it's the <h3> tag we're looking for
if ($h3_tag and $plaintext =~ /^Statistics$/) { $h3_wanted_tag = 1; }
if ($strong_tag and $p_tag and $h3_wanted_tag) {
no warnings "uninitialized";
if ($num < 3) {
print $arraydesc[$num].": $plaintext\n";
$num++;
}
}
}
sub end {
my ($self, $tag, $origtext) = @_;
if ($tag =~ /^h3$/i) { $h3_tag = 0; }
elsif ($tag =~ /^p$/i) {
$p_tag = 0;
$h3_wanted_tag = 0; # Disable h3_wanted_tag (left </p>)
}
elsif ($tag =~ /^strong$/i) { $strong_tag = 0; }
}
Για bsd (freebsd) δεν ξέρω πολλά, αλλά μπορείς να βρεις ports από το www.freshports.org
Ενδιαφέροντα φαίνονται τα ακόλουθα:
p5-WWW-Mechanize
p5-WWW-Mechanize-CGI