Program to find the number of times each word appears in a file.
The program below find each unique word that appears in a file and the number of times that it occures.
#! /usr/bin/perl -w
# Program to determine the frequency distribution of words in a file.
# Written by Sam Watson Jan. 2008
# Veriables:
$stars = "\n*****\n"; # Printout top and bottom
$cnt = 3; # Line counter
# --- Get the file name ---
if ($#ARGV == -1) { # Check if the name was supplied
START: # Return point for reentering the file name
print $stars, "Enter the path and file name for the file to be tested\nFile: ";
$file = ;
}
else {$file = $ARGV[0];}
# Remove the CR character
chomp $file;
if (-e $file) { # Does the file exist
goto FOUNDFILE;
}
else { # File not found
print "$stars $file could not be found try another filename? (y/n) ";
ASK: # "y" to look for another file
$yn = ;
chomp $yn;
$yn = lc($yn);
if ($yn eq "y") {goto START;}
}
print $stars;
exit; # Exit if file not found and
# not looking for another file
FOUNDFILE:
print "$stars Below is a listing of the words found in \"$file\" and the number of times it was found \n$stars";
open (FILE, $file) || die $!; # Open the file or die
while () { # Create %Words with each word and length
while ( /(\w[\w-]*)/g ) { # Globaly (/g)
# Look for a word character (\w) followed by
# zero or more (*) word characters or dashes (/w-)
$Words{$1}++; # Place into hash %Words key of ($1) and inc value
}
}
close (FILE); # Close the file
foreach (sort (keys %Words)) { # Print out each word and the number of
# times it is in the file sorted
$cnt++; # Increment line count
if ($cnt == 10) { # If line count larger then 10
$cnt =0;
print "$stars Press \"q\" to exit any other key will continue.\n";
$aa = ;
chomp $aa;
$aa = lc $aa;
if ($aa eq "q") {
print $stars;
exit;}
}
print $_;
$numb = $Words{$_}; # $numb will be the number of times
if ((length $_) >7) { # Check that the word is less than 7 characters
print "\n";
}
print "\t was found $numb \t";
if ($numb > 1) { # Should I print time or times
print " times\n";
}
else {
print " time\n";
}
}
print "/n$stars done$stars";