Program to find the number of times each word appears in a file.


The program below find each unique word that appears in a file and the number of times that it occures.

#! /usr/bin/perl -w

# Program to determine the frequency distribution of words in a file.
# Written by Sam Watson Jan. 2008

# Veriables:
$stars = "\n*****\n";			# Printout top and bottom
$cnt = 3;				# Line counter

# --- Get the file name ---
if ($#ARGV == -1) {			# Check if the name was supplied
START:					# Return point for reentering the file name
   print $stars, "Enter the path and file name for the file to be tested\nFile: ";
   $file = ;
   }
   else {$file = $ARGV[0];}
# Remove the CR character
chomp $file;
if (-e $file) {							# Does the file exist
   goto FOUNDFILE;
   }
   else {							# File not found
      print "$stars $file could not be found try another filename? (y/n)  ";
ASK:								# "y" to look for another file
      $yn = ;
      chomp $yn;
      $yn = lc($yn);
      if ($yn eq "y") {goto START;}
      }
print $stars;
exit;								# Exit if file not found and 
								# not looking for another file
FOUNDFILE:
print "$stars Below is a listing of the words found in \"$file\" and the number of times it was found \n$stars";
open (FILE, $file) || die $!;					# Open the file or die
while () {						# Create %Words with each word and length
  while ( /(\w[\w-]*)/g ) {					# Globaly (/g)
								# Look for a word character (\w) followed by
								# zero or more (*) word characters or dashes (/w-) 
    $Words{$1}++;						# Place into hash %Words key of ($1) and inc value 
  }
}
close (FILE);							# Close the file
foreach (sort (keys %Words)) {					# Print out each word and the number of
								# times it is in the file sorted
  $cnt++;							# Increment line count
  if ($cnt == 10) {						# If line count larger then 10
    $cnt =0;
    print "$stars Press \"q\" to exit any other key will continue.\n";
    $aa = ;
    chomp $aa;
    $aa = lc $aa;
    if ($aa eq "q") {
      print $stars;
      exit;}
    }
  print $_;
  $numb = $Words{$_};						# $numb will be the number of times
  if ((length $_) >7) {						# Check that the word is less than 7 characters
    print "\n";
    } 
  print "\t was found $numb \t";
  if ($numb > 1) {						# Should I print time or times
    print " times\n";
    }
  else {
    print " time\n";
    }
}
print "/n$stars done$stars";