#!/usr/bin/perl
#
# wl-rl - web server log r*f*r*r list
#
# List all referring URLs listed in the web server log given on STDIN.  Sort
# them, and print all the unique r*f*r*rs.
#
# This and other hacks can be found at: http://oddgeek.info/
#
# Copyright (c) 2005 Jason A. Dour
#
# This software is provided 'as-is', without any express or implied warranty.
# In no event will the authors be held liable for any damages arising from the
# use of this software.
#
# Permission is granted to anyone to use this software for any purpose,
# including commercial applications, and to alter it and redistribute it
# freely, subject to the following restrictions:
#
#     1. The origin of this software must not be misrepresented; you must not
#     claim that you wrote the original software. If you use this software in a
#     product, an acknowledgment in the product documentation would be
#     appreciated but is not required.
#
#     2. Altered source versions must be plainly marked as such, and must not
#     be misrepresented as being the original software.
#
#     3. This notice may not be removed or altered from any source
#     distribution.
#

#
# Version Information
#
# 1.0	2005.05.26
#
# 	Put in comments.  Cleaned up formatting.
#
# primordial ooze
#
# 	Used every once in a while to check who's linking into my websites.
# 	Was tired of doing this on command line.
#

# Loop over each line of input on STDIN...
while (<STDIN>) {
    # Parsing the web server log line...
    /^(\S+) (\S+) (\S+) \[(.+)\] \"(.+)\" (\S+) (\S+) \"(.+)\" \"(.+)\"/;
    # And shoving the r*f*r*r into an array.
    push(@blahre,$8);
}

# Loop over a sorted, unique-entries list...
$prev = undef;
foreach $refent ( grep($_ ne $prev && (($prev) = $_), sort @blahre) ) {
    # Printing each entry to STDOUT.
    print "$refent\n"
}

# We're done.  Rawk!
exit(0);
