#!/usr/bin/perl
#**********************************************************************************************************
#
# Authored by Colin Drew (Kul) summarizer@akakul.co.uk http://akakul.co.uk/
# The intellectual copyright for this code is owned by Colin Drew
# This scripts Home Page can be found at http://scripts.akakul.co.uk/summarizer/
# Updates and Bugs/fixes can also be found at the Home Page for this software.
#
#**********************************************************************************************************
#
# Version History Software Name: Summarizer
#
# 1.0 2001 Original version written (personal use)
# 1.1 13th July 2002 First official public release (free)
# 1.1.a 16th July 2002 'Minor bug' corrected where occasionally mis-counted total sites.
# Also added a small feature to use the 'site name' that is stoed on the
# 'index.html' page for each sites stats, as the SITENAME, this will allow
# easier installations for NON RaQ3/4's hopefully, and wont rely on the
# RaQ's way of site naming/identifying...symbollinc links, and wont use any
# non descriptive and non URL like directory names as the sites URL's.
# Added the ability to check for updates automatically every cache refresh.
# 1.2 17th July 2002 Significant addition to the main front page, where it now will not only
# show 'usage' reports (second table), it also will show the following reports
# as well (if found): 'search' 'agent' 'site' 'url' 'ref'
# See the webalizer.conf file for detail on how to enable these
# Tags to look out for in webalizer.conf:
# AllSites,AllURLs,AllReferrers,AllAgents,AllSearchStr,AllUsers
# Also split the 'webalizer_numbers_file' variable into three sections to allow
# better/easier NON-Cobalt/Sun RaQ server setups.
# 1.3 18th July 2002 I just cant stop updating this, everytime I speak to someone (feedback)
# they say how much more usefull it would be with X feature. So now Summarizer
# can SORT by columns on when viewing the totals for a particular month!!
# Both alphabetic and numeric sorts accross domain name and all totals columns.
# Sorting can be either forward (0-9 or A-Z) or reverse (9-0 Z-A).
# 1.4 15th November 2002 Odd Symbollic Link problem on some raq4's section of code removed.
# 1.4.1 10th January 2003 Unfixed 1.4 revision, also added debugging info to .cache
#
#**********************************************************************************************************
#
# Copyright & Licencing
#
# This Software is supplied 'FREE' and 'AS IS', you may 'NOT' alter and redistribute without permission
# from me first. I will accept no responsibility for any damage this script causes or how this script is
# misused. By installing this software you acknowledge this and agree not to attempt too or hold me
# responsible in any way shape or form whatsoever.
# Updates and additions will only be made at my own discretion and will be provided under a similar
# licencing/copyright agreement and are your responsibility to check for and update as you choose.
# You MAY alter this script for your own personal use. You may NOT remove ANY part of this header.
# This script does not include any part of the Webalizer (http://webalizer.com/), but does require it
# be installed first. This you must do yourself. Summarizer is NOT asociated in anyway with Webalizer,
# and is written purely for System Adminstrators to use.
#
# NO WARRANTY
# (THE USE OF "I" AND "ME" IN THE FOLLOWING MEANS ME - "COLIN DREW" aka "KUL")
#
# BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT
# PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDER (ME)
# PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
# THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM
# PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
#
# IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL I, BE LIABLE TO YOU
# FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF
# THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
# RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO
# OPERATE WITH ANY OTHER PROGRAMS), EVEN IF I HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
#
# BY USING OR INSTALLING THIS SOFTWARE YOU HAVE AGREED TO ALL OF THE ABOVE.
#
#**********************************************************************************************************
#
# About this Software, what it does, and its purpose:
#
# 'Summarizer' was written to be used primarily on Cobalt/Sun RaQ3's & RaQ4's
# (though will easily be able to be altered to work on almost any Server) with Webalizer 1.3/2.x.
#
# Summarizer is NOT a replacement for Webalizer, infact it compliments it.
# Summarizer's main role is to make a summary of ALL the sites Webalizer Statistics (main stats only),
# and present them in a very similar way to what Webalizer does.
# Summarizer is aimed directly at Server Administrators, and NOT at end users, as these stats are
# Server Wide and include all the Sites on the server. However you should be aware that I 'beleive'
# this software could be installed on many servers and used to access other sites data.
# This is often due to lacks file and directory permissions.
# (standard CGI's can also do this on many system)
#
# This Script locates all the current sites on the Server and extracts the current set of Site statistics
# from each main Webalizer page "stats/index.html", caches these for a short period
# (to speed up subsequent lookups), and then displays these results as a combined total (Summarised).
#
#**********************************************************************************************************
#
# Please feel free to let me know what you think of this software and any ideas you have for its
# improvement. I am always interested in making improvements if i can see they would benefit others.
# If you find any inaccuracies or bugs, please let me know.
#
# I do write scripts (in perl mostly) commercially, so if you have some work you want undertaken,
# or would like a commercial version of this software, please feel free to email me on the above address
# for discussions/quotes.
#
# If you like this software and feel its been of use to you, then please feel free to make a small
# financial contribution to my development fund. Details for doing so are at: http://akakul.co.uk/
#
#**********************************************************************************************************
$| = 1;
use CGI::Carp qw( fatalsToBrowser ); # Used during testing/debugging mostly
#use Diagnostics; # used during testing/debugging mostly
use strict qw(vars);
use CGI qw(:standard :form);
use Fcntl qw(:DEFAULT :flock);
# Some magicery to find out if some modules we would 'like' are installed, and load them
# (not REQUIRED though these modules....just nice)
# This is done in an 'eval' to avoid fatal exceptions, and allow the script to continue.
my $useful_modules = ();
my @test_these = qw(LWP::UserAgent); # The module to test for/load
foreach (@test_these) { eval "require $_" ? $useful_modules++ : $useful_modules--; }
if ($useful_modules eq scalar @test_these) { $useful_modules = q(yes they are found); }
else { undef $useful_modules; }
# Ahhhaahh, now we know if the modules are available - '$useful_modules' is set if they are
# Back to normal variables... you may alter these...
my $auto_check_for_updates = q(yes); # Set to q(); to disable automatic checking (every cache refresh)
my $allstats_username = q();
my $allstats_password = q();
my $months_to_show = 12; # Choose how many months you want displayed/totalled
my $webalizer_conf_file = q(/etc/webalizer.conf); # Webalizers conf file and location
my $httpd_groups_dir = q(/home/sites); # The common base directory of ALL sites
my $httpd_inside_site_dir = q(web); # The dir('s) inside the site (ie web - /home/sites/site99/web)
# do NOT include leading or trailing '/'s. This is the LAST directory
# that signifys the websites Dcoument Root !!
# This 'dir' is added to above $httpd_groups_dir /*/ $httpd_inside_site_dir
my $webalizer_stats_dir = q(stats); # The actual stats dir where webalizer stores all its statistics
# in site. this is appended to
# $httpd_groups_dir /*/ $httpd_inside_site_dir / $webalizer_stats_dir
# do NOT include leading or trailing '/'s
my $webalizer_numbers_file = q(index.html); # The name within each site of the main page
my $re_cache_time = 3600; # (3600 = 1hour) # Time (seconds) that the cache is to be concidered up-to-date
my $summarizer_cache = qq($ENV{'DOCUMENT_ROOT'}/../summarizer.cache); # name and location of the temporary cache we create/use
my $summarizer_cache_perms = 0600; # Permissions for the cache (I think 600 is more than enough)
my $html_start_tag = q(); # This will be overwritten if /etc/webalizer.conf is found
my $body_start_tag = q(
); # This will be overwritten if /etc/webalizer.conf is found
my $html_finish_tag = q();# This will be overwritten if /etc/webalizer.conf is found
my $title_bgcolor = q(#c0c0c0); # Table title's colour (just like webalizers c0c0c0)
# I dont see why you would want to alter these, but you can if your a hacker i suppose
my $webalizer_conf_exists = q(No);
my $summarizer_cache_size = q(Unknown);
my $summarizer_cache_modified = q(Not Found);
my $summarizer_cache_recache = q(Unknown);
my $allstats_username_set = q(Not Set);
my $allstats_password_set = q(Not Set);
my $version_available = q(Unable to connect to Summarizer Home Page for version information);
my $version_date_available = q(Unable to connect to Summarizer Home Page for version information);
my $visiting_scripts_upgrade = q(Module NOT installed, can not visit 'Summarizer' site to check automatically for upgrades!);
my $new_version_avail_mess = q(New Version Available);
my $report_colour_alterate_1 = q(ffffff);
my $report_colour_alterate_2 = q(ffeedd);
# Avoid altering these variables, they are not customizable
my $about_sites_found = 0;
my $url_user_pass = ();
my $showing_month = ();
my $show_update_available = ();
my %search_replace = ();
my %sites_list_complete = ();
my %total_sites_counted = ();
my %stats_of_sites = ();
my @grand_totals = ();
my @list_of_sites = ();
my $start_split = q(:::MAINSTART); # need some defaults
my $end_split = q(:::MAINEND); # need some defaults
my $script_name = $ENV{'SCRIPT_NAME'}; # used in the html pages for the GET url
my @all_months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); # All the months in the rainbow
my $cache_refreshed = localtime (time);
my $query = new CGI();
my $inp_action = $query->param('action');
my $inp_month = $query->param('month');
my $inp_sort = $query->param('sort');
my $inp_column = $query->param('column');
# Alter these variables and you have broken the licence agreement :(
# And I will instruct my 'Camel' to bite you!
my $version = q(1.4.2); # current version number
my $version_date = q(10th January 2003); # current version modified
my $cache_date = localtime (time);
# -------------------------------------------------------------
# -------------------------------------------------------------
# -------------------------------------------------------------
#
# Let the fun commence...
# START OF PROGRAM/SCRIPT
# From here to the __DATA__ tag, the comments are for my own documentation
# They are NOT intended to explain much at all to you, just my notes when I
# first bashed this together one evening (or 2)
#
# Set the httpd login, only if a username and a password are stored
$url_user_pass = qq($allstats_username:$allstats_password\@) if ($allstats_username && $allstats_password);
$allstats_username_set = $allstats_username if ($allstats_username);
$allstats_password_set = q(Yes) if ($allstats_password);
if (-e $webalizer_conf_file) {
# If the webalizer.conf file isnt found...carry on regardless ...
# I dont care if your hiding it from me :(
sysopen (WEBALIZER_CONF, $webalizer_conf_file, O_RDONLY) or die qq(Could not SYSOPEN file $webalizer_conf_file for READ: $!);
flock (WEBALIZER_CONF, LOCK_SH) or die qq(Could not SH-LOCK $webalizer_conf_file file: $!);
while () {
chomp;
$html_start_tag = $1 if /^\s*#*\s*HTMLPre\s+(.*)$/i;
$body_start_tag = $1 if /^\s*#*\s*HTMLBody\s+(.*)$/i;
$html_finish_tag = $1 if /^\s*#*\s*HTMLEnd\s+(.*)$/i;
}
flock (WEBALIZER_CONF, LOCK_UN) or die qq(Could not UN-LOCK $webalizer_conf_file file: $!);
close (WEBALIZER_CONF) or die qq(Could NOT CLOSE $webalizer_conf_file: $!);
$webalizer_conf_exists = (-s $webalizer_conf_file) . q( bytes);
}
# Don't bother updating the cache file, just delete it if the user requests
# a cache update (easier and far quicker). Cache will be automatically
# re-created if its not found anyway - this must be faster !
my $ignore_errors = unlink ($summarizer_cache) if (lc $inp_action eq q(cache));
# What is the GET/POST choice of display types - 'action'
if (lc $inp_action eq q(about)) {
$start_split = q(:::ABOUTSTART);
$end_split = q(:::ABOUTEND);
# Get the cache file details
if (-e $summarizer_cache) {
$summarizer_cache_size = (-s $summarizer_cache) . q( bytes);
$summarizer_cache_modified = localtime ((lstat ($summarizer_cache))[9]);
$summarizer_cache_recache = ((lstat ($summarizer_cache))[9] + $re_cache_time) - time . q( seconds);
}
# Find out if any sites exist
my %all_sites = ();
opendir HTTPD_GROUPS, $httpd_groups_dir or die qq(Could not READ $httpd_groups_dir Directory: $!);
while (my $site = readdir(HTTPD_GROUPS)) {
if (-l qq($httpd_groups_dir/$site)) {
my $this_sites_number = readlink qq($httpd_groups_dir/$site);
$all_sites{$this_sites_number} = $site;
}
if (($site =~ /^(site\d+|home)$/) and (! $all_sites{qq($httpd_groups_dir/$site)})) {
$all_sites{qq($httpd_groups_dir/$site)} = $site;
}
}
closedir HTTPD_GROUPS or die qq(Could not CLOSE $httpd_groups_dir Directory: $!);
foreach my $this_site (keys %all_sites) {
$about_sites_found++ if (-e qq($httpd_groups_dir/$all_sites{$this_site}/$httpd_inside_site_dir/$webalizer_stats_dir/$webalizer_numbers_file));
}
$about_sites_found = q(NONE) if (! $about_sites_found);
}
elsif (lc $inp_action eq q(update)) {
$start_split = q(:::UPDATESTART);
$end_split = q(:::UPDATEEND);
my $ignore_results = check_for_update();
$version_available = q(Same Version) if ($version eq $version_available);
$version_date_available = q(Same Version) if ($version_date eq $version_date_available);
}
else {
if ($inp_month) {
$start_split = q(:::MONTHSTART);
$end_split = q(:::MONTHEND);
do_the_counting($inp_month);
}
else { do_the_counting(); }
}
#
# Display the HTML page now to the browser (also search/replace its contents with various variables)
#
print "Content-Type: text/html\n\n"; # HTML is best trust me.
my $saved_repeat = ();
while () {
if ((/$start_split/i .. /$end_split/i) or (/:::ALLSTART/i .. /:::ALLEND/i)) {
next if ((/$start_split/i or /$end_split/i) or (/:::ALLSTART/i or /:::ALLEND/i));
#
# Monster fun, search and replace - scalars, hashes, hashes of arrays -
# you wont beleive how long it took to come up with this little gem !!
# Please dont steal this bit, I know its nice and very cool, but its all my work!
if (/{{start[_\s]+repeating}}/i .. /{{end[_\s]+repeating}}/i) {
my $all_repeat = ();
if (/{{start[_\s]+repeating}}/i) { $saved_repeat = (); }
else { $saved_repeat .= $_; }
if (/{{end[_\s]+repeating}}/i) {
$saved_repeat =~ s/{{(start|end)[_\s]+repeating}}//gi;
my $max_keys = 0;
while ($saved_repeat =~ s/{{(.*?)}}/{_{$1}_}/i) {
if (defined @{ $search_replace{$1} }) {
if ($#{ $search_replace{$1} } gt $max_keys) { $max_keys = $#{ $search_replace{$1} }; }
}
}
for my $loop (0 .. $max_keys) {
my $this_repeat = $saved_repeat;
while ($this_repeat =~ /{_{(.*?)}_}/) {
my $variable_name = $1;
if (defined @{ $search_replace{$variable_name} }[$loop]) {
$this_repeat =~ s/{_{$variable_name}_}/@{ $search_replace{$variable_name} }[$loop]/;
}
else { $this_repeat =~ s/{_{$variable_name}_}/'$' . $variable_name/xee; }
}
$all_repeat .= $this_repeat;
}
$_ = $all_repeat;
}
else { next; }
}
while (/{{(.*?)}}/) {
if (defined @{ $search_replace{$1} }) { s/{{(.*?)}}/@{ $search_replace{$1} }/; }
else { s/{{(.*?)}}/'$' . $1/xee; }
}
print;
}
}
exit();
# -------------------------------------------------------------
# -------------------------------------------------------------
# -------------------------------------------------------------
# -------------------------------------------------------------
sub do_the_counting {
my $this_month = $_[0];
my %alternate_stats_of_sites = ();
if (-e $summarizer_cache) { go_re_cache() if (time gt ( (lstat ($summarizer_cache))[9] + $re_cache_time)); }
else { go_re_cache(); }
$cache_refreshed = localtime ((lstat ($summarizer_cache))[9]);
# Cache MUST exist now...surely !!
sysopen (SUMMARIZER_CACHE, $summarizer_cache, O_RDONLY) or die qq(Could not SYSOPEN file $summarizer_cache for READ: $!);
flock (SUMMARIZER_CACHE, LOCK_EX) or die qq(Could not EX-LOCK $summarizer_cache file: $!);
while () {
chomp;
next if (/^INFO/); # Skip the cache header info (used only in debugging)
my ($site, $data_type, $date, $total) = split (/\t+/);
if (($data_type eq q(generated)) or ($data_type eq q(real_name))) {
$sites_list_complete{$site}{$data_type} = $date;
next;
}
next if (($date ne $this_month) and ($this_month)); # Dont pop'array if not = selected month
if ($data_type =~ /search|usage|agent|site|url|ref/i) {# eq q(usage)) {
$sites_list_complete{$site}{$data_type}{$date} = $total;
$showing_month = $total; # Store Word ver of month (used for month report)
next;
}
push @{ $sites_list_complete{$site}{$data_type}{$date} }, $total;
}
flock (SUMMARIZER_CACHE, LOCK_UN) or die qq(Could not UN-LOCK $summarizer_cache file: $!);
close (SUMMARIZER_CACHE) or die qq(Could NOT CLOSE $summarizer_cache: $!);
foreach my $site (sort keys %sites_list_complete) {
foreach my $data_type (sort keys %{ $sites_list_complete{$site} }) {
foreach my $date (reverse sort keys %{ $sites_list_complete{$site}{$data_type} }) {
$total_sites_counted{$date}++ if ($data_type eq q(usage));
for my $i (0 .. $#{ $sites_list_complete{$site}{$data_type}{$date} }) { # Number of months with stats
$stats_of_sites{$date}[$i] += $sites_list_complete{$site}{$data_type}{$date}[$i];
$alternate_stats_of_sites{$site}[$i] += $sites_list_complete{$site}{$data_type}{$date}[$i];
}
}
}
}
my %dates_list = ();
%stats_of_sites = %alternate_stats_of_sites if ($this_month);
foreach my $value (reverse sort keys %stats_of_sites) { push @list_of_sites, $value; }
foreach my $site (sort keys %sites_list_complete) {
foreach my $report_type (sort qw(search usage agent site url ref)) {
foreach my $date (sort keys %{ $sites_list_complete{$site}{$report_type} }) {
$dates_list{$date} = $sites_list_complete{$site}{$report_type}{$date};
}
}
}
if ($this_month) {
#
# Do some super magic sorting for the show_by_month
#
@list_of_sites = sort @list_of_sites;
if ($inp_sort eq 'za') { @list_of_sites = reverse sort @list_of_sites; }
else {
if ($inp_column =~ /^[0-9]{1,1}$/) {
my %sortitup = ();
my @new_list_of_sites = @list_of_sites;
undef @list_of_sites;
foreach (@new_list_of_sites) {
my $this_sprint_num = sprintf ("%020d", $stats_of_sites{$_}[$inp_column]);
push @{ $sortitup{$this_sprint_num} }, $_;
}
if ($inp_sort eq 9) {
foreach my $num (reverse sort keys %sortitup) {
foreach my $site (sort @{ $sortitup{$num} }) {
push @list_of_sites, $site;
}
}
}
else {
foreach my $num (sort keys %sortitup) {
foreach my $site (sort @{ $sortitup{$num} }) {
push @list_of_sites, $site;
}
}
}
}
}
# Begin to now process the output texts
# table 1
for my $i (0 .. ($total_sites_counted{$this_month} - 1)) {
next if ($list_of_sites[$i] =~ /^$/); # Just incase the sitename is empty
my $site = $list_of_sites[$i];
my $site_url = $site;
$site_url = $sites_list_complete{$site}{'real_name'} if ($sites_list_complete{$site}{'real_name'} =~ /.*\..*/);
$site_url = qq($site_url) if ($site_url =~ /\./);
push @{ $search_replace{'month_site_name'} }, $site_url;
for my $loop (0 .. 9) {
$grand_totals[$loop] += $stats_of_sites{$site}[$loop]; # generate the grand totals
push @{ $search_replace{'month_total_' . $loop} }, comma_magic($stats_of_sites{$site}[$loop]);
}
}
for my $loop (0 .. 9) { push @{ $search_replace{'month_grand_total_' . $loop} }, comma_magic($grand_totals[$loop]); }
}
else {
# Begin to now process the output texts
# table 1
for my $i (0 .. ($months_to_show - 1)) {
my $date = $list_of_sites[$i];
push @{ $search_replace{'site_name'} }, qq($dates_list{$date});
push @{ $search_replace{'total_sites'} }, comma_magic($total_sites_counted{$date});
for my $loop (0 .. 9) {
$grand_totals[$loop] += $stats_of_sites{$date}[$loop];
push @{ $search_replace{'total_' . $loop} }, comma_magic($stats_of_sites{$date}[$loop]);
}
}
for my $loop (0 .. 9) { push @{ $search_replace{'grand_total_' . $loop} }, comma_magic($grand_totals[$loop]); }
# table 2
foreach my $site (sort keys %sites_list_complete) {
my $site_url = $site;
$site_url = $sites_list_complete{$site}{'real_name'} if ($sites_list_complete{$site}{'real_name'} =~ /.*\..*/);
$site_url = qq($site_url) if ($site_url =~ /\./);
push @{ $search_replace{'report_site_name'} }, $site_url;
push @{ $search_replace{'report_generated'} }, $sites_list_complete{$site}{'generated'};
my $report_available = ();
my $stored_report_type = ();
my $line_break = ();
my $report_colour_alterating = $report_colour_alterate_1;
$report_available .= qq(\n\t\t\n);
foreach my $report_type (qw(usage agent ref url search site)) {
my $newline = ();
my $stored_year = ();
my $months_to_show_count = 0;
foreach my $date (reverse sort keys %{ $sites_list_complete{$site}{$report_type} }) {
last if ($months_to_show_count++ eq $months_to_show); # Only show this many months (max)
my ($this_month, $this_year) = split (/\s+/, $sites_list_complete{$site}{$report_type}{$date});
my $site_url = $this_month;
$site_url = qq($this_month) if ($site =~ /\./);
if ($report_type ne $stored_report_type) {
$report_available .= qq(\t\t\n);
$report_available .= qq(\t\t\t| $report_type | \n);
$report_available .= qq(\t\t\t\n);
$line_break = qq(\t\t\t | \n\t\t
\n);
if ($report_colour_alterating eq $report_colour_alterate_1) { $report_colour_alterating = $report_colour_alterate_2; }
else { $report_colour_alterating = $report_colour_alterate_1; }
$stored_report_type = $report_type;
$stored_year = ();
}
if ($this_year ne $stored_year) {
$report_available .= qq($newline\t\t\t\t$this_year: $site_url);
$newline = qq(
\n) if (! $stored_year);
$stored_year = $this_year;
}
else { $report_available .= qq(, $site_url); }
}
}
$report_available .= qq(\n\t\t\t\n\t\t\n\t\t
\n);
push @{ $search_replace{'report_available'} }, $report_available;
}
}
}
# -------------------------------------------------------------
sub go_re_cache {
# reads ALL the sites and grabs their totals, then stores them in our temporary cache file
my %sites_list_complete = ();
my %all_sites = ();
# find all the sites to look at and extract the details from
opendir HTTPD_GROUPS, $httpd_groups_dir or die qq(Could not READ $httpd_groups_dir Directory: $!);
while (my $this_site = readdir(HTTPD_GROUPS)) {
next if ($this_site =~ /^\.+$/);
# symbolic links for raq3/4's point to a site ie 'www.akakul.co.uk' would point to 'site99'
# This will deal with sites that dont have a symbollic link (something must be up with it)
# if its a raq3/4 ! but does work well in windaz, where symblinks dont exist ;) = testing
# 15/nov/2002 removed, causing odd troubles on 'patlabor's raq4
# 10/jan/2003 removed again- that was a wild goose chase, and cause more probs for others
if (-l qq($httpd_groups_dir/$this_site)) {
my $this_sites_number = readlink qq($httpd_groups_dir/$this_site);
$all_sites{$this_sites_number} = $this_site;
next;
}
if (($this_site =~ /^(site\d+|home)$/) and (! $all_sites{qq($httpd_groups_dir/$this_site)})) {
# if (! $all_sites{qq($httpd_groups_dir/$this_site)}) {
my $this_sites_number = qq($httpd_groups_dir/$this_site);
$all_sites{qq($httpd_groups_dir/$this_site)} = $this_site;
}
}
closedir HTTPD_GROUPS or die qq(Could not CLOSE $httpd_groups_dir Directory: $!);
# Itterate through each site now, and read the 'stats/index.html' file
foreach my $this_site (keys %all_sites) {
my $site = $all_sites{$this_site};
my $each_site_path = qq($httpd_groups_dir/$site/$httpd_inside_site_dir/$webalizer_stats_dir/$webalizer_numbers_file);
# Some sites may not have an 'index.html' file yet (new site ??)
next if (! -e $each_site_path);
my $this_stats_dir = qq($this_site/$httpd_inside_site_dir/$webalizer_stats_dir);
opendir STATS_DIR, $this_stats_dir or die qq(Could not READ $this_stats_dir Directory: $!);
while (my $this_stats_file = readdir(STATS_DIR)) {
next if ($this_stats_file =~ /^\.+$/);
next if ($this_stats_file !~ /\.html$/i);
if ($this_stats_file =~ /^(search|usage|agent|site|url|ref)_(\d+)\.html$/i) {
my $year = substr ($2, 0, 4);
my $month = $all_months[(substr ($2, 4, 2) - 1)];
$sites_list_complete{$site}{$1}{$2} = qq($month $year);
}
}
closedir STATS_DIR or die qq(Could not CLOSE $this_stats_dir Directory: $!);
my $date = q(unknown);
# Do the magic on each sites 'index.html' page and extract the nice numbers
# sites and names etc, and populate a 3d hash with them
sysopen (SITE_STATS, $each_site_path, O_RDONLY) or die qq(Could not SYSOPEN file $each_site_path for READ: $!);
flock (SITE_STATS, LOCK_SH) or die qq(Could not SH-LOCK $each_site_path file: $!);
while () {
chomp;
# added these two catch line to ensure that there is something set
# as Nick was having script errors :(
if (! defined $sites_list_complete{$site}{'generated'}) {
$sites_list_complete{$site}{'generated'} = q(unknown);
}
if (! defined $sites_list_complete{$site}{'generated'}) {
$sites_list_complete{$site}{'real_name'} = q(unknown);
}
$sites_list_complete{$site}{'generated'} = $1 if (/Generated\s+(.*?)
/i);
$sites_list_complete{$site}{'real_name'} = $1 if (/