#!/perl # Create a street mispellings hash to map # misspelled street names to correct names %spell = ( 'BRD' => 'BROAD', 'WWOOD' => 'WYNNEWOOD', 'WYNWD' => 'WYNNEWOOD' ); # Open a tab-delimited file of crime data # It is assigned to the CRIMES filehandle variable open ( CRIMES, "c:/projects/nicar2000/crimes.txt" ); open ( CRIMESFIXED, ">c:/projects/nicar2000/crimesfixed.txt" ); # Loop thru the file 1 line at a time while ( $line = ) { # Remove the trailing newline character chomp $line; # Split the line of text based on tabs (\t) # and fill the variables with it ( $id, $sector, $ucr_code, $location ) = split ( /\t/, $line ); # 400 N BROAD ST if ( $location =~ m/^(\d+) ([NSEW]) (.+)$/ ) { $block_number = $1; $direction = $2; $street = $3; # 400 BROAD ST } elsif ( $location =~ m/^(\d+) (.+)$/ ) { $block_number = $1; $direction = ""; $street = $2; # BROAD ST 400 } elsif ( $location =~ m/^(.+) (\d+)$/ ) { $street = $1; $block_number = $2; $direction = ""; # BROAD ST 400 N } elsif ( $location =~ m/^(.+) (\d+) ([NSEW])$/ ) { $street = $1; $block_number = $2; $direction = $3; } else { print "++++ Couldn't parse $location\n"; } # Check for ST|RD|AV etc in the street field if ( $street =~ m/^(.+) (ST|AV|RD)$/ ) { $street_name = $1; $street_type = $2; } else { $street_name = $street; $street_type = ""; } # Fix mispelled names if ( $spell{$street_name} ) { $street_name = $spell{$street_name}; } # Join fields together with tabs $output = join ( "\t", $id, $sector, $ucr_code, $location, $block_number, $direction, $street_name, $street_type ); # Print the line out print CRIMESFIXED "$output\n"; } close CRIMES; close CRIMESFIXED;