################################################################################
# MTMostVisited
# A Plugin for Moveable Type
#
# Release 1.3
# May 27, 2003
#
# From Jeffrey Borlik
#
# ---------------------------------------------------------------------------
# This software is provided as-is.
# You may use it for commercial or personal use.
# If you distribute it, please keep this notice intact.
#
# Copyright (c) 2003 Jeffrey Borlik
# ---------------------------------------------------------------------------
################################################################################
package MT::Plugin::MostVisited;
use strict;
use MT::Template::Context;
use vars qw($VERSION);
$VERSION = 1.3;
my $logdir = "/home/zajmal/logs/zackvision.com/http";
my $root = "/weblog/archives/entry";
my $filetype = "html";
my @xyz = glob($logdir . "/access.log.*[0123][0123456789]");
my @searchfiles = ("$logdir/access.log", @xyz);
my @zippedfiles = glob($logdir . "/access.log.*gz");
my %archives = ();
my $cleanbuild = 0; # turning on cleanbuild will silently put error messages in your blog
MT::Template::Context->add_container_tag(MostVisited => \&MostVisited);
MT::Template::Context->add_tag(MostVisitedCount => \&MostVisitedCount);
MT::Template::Context->add_tag(MostVisitedLink => \&MostVisitedLink);
################################################################################
# Main container tag. It loops through the most-requested entries, based
# upon the results of the apache access log. It can contain MTMostVisitedCount
# and MTMostVisitedLink tags, as well as any MTEntry-type tags.'
sub MostVisited {
my($ctx, $args, $cond) = @_;
my $builder = $ctx->stash('builder');
my $tokens = $ctx->stash('tokens');
my $content = "";
if (exists $args->{"cleanbuild"}) { $cleanbuild = $args->{"cleanbuild"}; }
require MT::Entry;
use Apache::ParseLog;
use File::Copy;
my $parser = new Apache::ParseLog();
%archives = ();
# Load up the archives hash (filename/counts) via the
# Apache parser. This retrieves pages requested from
# the $root directory, that end in $filetype (usually html).
# Some servers (like mine) rotate their logs.
my $numhits = 0;
my $gotanything = 0;
foreach my $logfile (@searchfiles) {
my $retval = loadVisitedFromLog($logfile, $parser);
if ($retval >= 0) {
$numhits += $retval;
$gotanything = 1;
}
}
foreach my $zipfile (@zippedfiles) {
(my $filepath,my $filename) = $zipfile =~ m|^(.*[/\\])(.*?)$|;
copy($zipfile, $filename);
my $zerr = `gunzip $filename`;
$filename =~ s/(.*).gz$/$1/;
my $retval = loadVisitedFromLog($filename, $parser);
if ($retval >= 0) {
$numhits += $retval;
$gotanything = 1;
}
unlink($filename);
}
# Verify that we have actually found ANY apache accesslog file.
if (! $gotanything) {
$content = "Error in MTMostVisited. A webserver log was not found.
The plugin was looking for files in the \"$logdir\" directory, with filenames @searchfiles.
Please double-check the location of your Apache webserver access log, and
possibly change the \$logdir variable, or any of the entries in the \@searchfiles list.";
if (!$cleanbuild) { return $ctx->error($content); }
return $content;
}
# Verify that we found ANY hit via the $root regex
if (scalar keys %archives == 0) {
$content = "Error in MTMostVisited. NO hits on MT archives were found in the
webserver logs (some webserver logs were found). The plugin determines if it was a MT archive hit by matching the
page hits against the \$root variable. In your case, it looks for hits that look
like http://yoursite.whatever$root/000001.$filetype . If that URL
doesn't look right for your individual archives, then you will either need to
change your \$root variable or the \$filetype variable in the plugin. Note that
the \$root variable should not have the http/domain thing in front of it, and should
not end in a \"/\". I.e. it should look like /archives or something like that.
(It is also possible that you really haven't had any hits on your individual archives.
In that case, this plugin is probably not very useful to you.)";
if (!$cleanbuild) { return $ctx->error($content); }
return $content;
}
# do stuff with the archives hash (filename/counts)
my $inum = 0;
my $imax = 10; # default
if (exists $args->{"count"}) { $imax = $args->{"count"}; }
foreach my $entryid (sort { $archives{$b} <=> $archives{$a} } keys %archives) {
# stash something
$ctx->stash('mostvisited_count', $archives{$entryid});
$ctx->stash('mostvisited_entryid', $entryid);
# load entry stuff
# Check to ensure that this entry does not have non-digit characters
# in it, as that would make it a month archive or a popup-image page.
# Simply skip that one.
if ($entryid =~ /\D/) {
next;
}
#ASSUMPTION: The name of the page is the same as the entry_id.
my $entry = MT::Entry->load($entryid);
#TODO: Need filtering to ensure that it is actually an entry for the current
# blog, but that doesn't seem too critical as different blogs probably
# have different filepaths
if (! $entry) {
$content = "Error in MTMostVisited, while loading EntryID \"$entryid\": ";
$content = $content . "This does not appear to be a valid EntryID. It should be all numbers.";
if (! $cleanbuild) { return $ctx->error( $content ); }
return $content;
}
local $ctx->{__stash}{entry} = $entry;
local $ctx->{current_timestamp} = $entry->created_on;
# build internal stuff
my $out = $builder->build($ctx, $tokens);
if (! $out ) {
$content = "Error in MTMostVisited, while loading EntryID \"$entryid\": ";
$content = $content . $builder->errstr;
$content = " Perhaps you are using a tag unsupported by the MTMostVisited collection.";
if (! $cleanbuild) { return $ctx->error($content); }
return $content;
}
$content .= $out;
last if (++$inum >= $imax);
}
if ($content eq "") {
my $num = scalar keys %archives;
$content = "No hits on entry archive files. There were $num pages found
that match the pattern http://your.url.tld$root/000001.$filetype.";
if ($num > 0) {
$content = $content . " They
must have been skipped due to extraneous characters in the filename. (Are your
individual archives named with the EntryID? If not, this plugin will not work. Bug
the author about it.)\n"; }
else {
$content = $content . " Perhaps the plugin is misconfigured, and the \$root
variable isn't set correctly. It should be the web address/URL of your individual
archives, without the http://your.url.tld on the front, e.g. \"/archives\". Or
maybe your website actually hasn't gotten any hits on archive files yet.";
}
}
return $content;
}
sub MostVisitedCount {
my $ctx = shift;
return $ctx->stash('mostvisited_count');
}
sub MostVisitedLink {
my $ctx = shift;
return $root . "/" . $ctx->stash('mostvisited_entryid') . "." . $filetype;
}
# loadVisitedFromLog - Utility subroutine, that parses an apache access
# log, and appends to the %archives hash. It uses the Apache::ParseLog perlmodule.
sub loadVisitedFromLog {
my $log = shift;
my $parser = shift;
if (! -r $log) { return -1; }
$parser = $parser->config(transferlog => "$log");
my $transferlog = $parser->getTransferLog();
my %files = $transferlog->file();
my $numfound = 0;
foreach (keys %files) {
if (/^$root\/(\w+)\.$filetype$/) {
++$numfound;
my $count = 0;
if (exists $archives{$1}) { $count = $archives{$1}; }
$archives{$1} = $count + $files{$_};
}
}
return $numfound;
}
1;