#!/usr/bin/perl # # dupels - list duplicates # # Dupels makes use of the File::Find::Duplicates CPAN module to find and then # list in a structured format the duplicate files it finds. It lists the # duplicates by md5sum and size, then gives each filename of the files that # were identical. # # This and other hacks can be found at: http://oddgeek.info/ # # Copyright (c) 2005 Jason A. Dour # # This software is provided 'as-is', without any express or implied warranty. # In no event will the authors be held liable for any damages arising from the # use of this software. # # Permission is granted to anyone to use this software for any purpose, # including commercial applications, and to alter it and redistribute it # freely, subject to the following restrictions: # # 1. The origin of this software must not be misrepresented; you must not # claim that you wrote the original software. If you use this software in a # product, an acknowledgment in the product documentation would be # appreciated but is not required. # # 2. Altered source versions must be plainly marked as such, and must not # be misrepresented as being the original software. # # 3. This notice may not be removed or altered from any source # distribution. # # # Version Information # # 1.0 2005.09.22 # # Written because I needed a quick, defined method of locating duplicate # files in a large fileserver. By running dupels on the filesystems, then # post-processing the output, I was able to reduce storage requirements # for the data by removing duplicate copies. Barely a script, but hey, it # helped me out. # # # Required Modules use File::Find::Duplicates; # # Find duplicates... my @dupes = find_duplicate_files(@ARGV); # # For each duplicate fileset found... foreach my $dupeset (@dupes) { # print the md5sum and size... printf "%s (%d)\n", $dupeset->md5, $dupeset->size; # and then for every identical file... foreach my $file ( @{$dupeset->files} ) { # print the filename. printf " %s\n", $file; } } # # We're done. exit(0);