#!/bin/bash # # Look up words with a given pattern of repeating characters # PG=plu WL=/usr/dict/word.list LIM1=4 # If this many or fewer matches, list eash on it's own line. LIM2=79 # Maximum number of characters on a line. LIM3=23 # If more than this many lines, output to 'less'. TMPFILE1=/tmp/${PG}-tmp1_.tmp TMPFILE2=/tmp/${PG}-tmp2_.tmp function display_help() { echo "$PG: Find words that match the number of letters and the repeating letter" echo "$PG: pattern in the user-supplied argument; this is useful in finding" echo "$PG: individual candidate words in the solution a simple substitution" echo "$PG: crypogram." echo "$PG: Invoke with no argument to begin and exit interactive mode." echo "$PG: Long lists of matches are displayed using 'less'; 'q' to exit." echo "$PG: Example: 'zyxyz' matches 'kaiak', 'rotor', and several others." } if [ -z "$1" ] ; then ONESHOT="0" display_help else ONESHOT="1" fi while [ -n "$ONESHOT" ] ; do if [ "$ONESHOT" == "0" ] ; then read -e -p "$PG>" WORD1 WORD2 if [ -z "$WORD1" ] ; then exit 0 fi elif [ "$ONESHOT" == "1" ] ; then WORD1="$1" WORD2="$2" unset ONESHOT fi if [ -n "$WORD2" ] ; then display_help else # Check the user's argument and output a list of pattern matches. gawk -v arg=$WORD1 ' \ BEGIN { ok = 0; arg_len = length(arg); if (arg_len < 2) exit 2; s = ""; for (i=1;i<=arg_len;i++) { c = tolower(substr(arg,i,1)); n = index("abcdefghijklmnopqrstuvwxyz",c); if (n > 0) s = s c; else exit 2; } ok = 1; num = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; num_cnt = length(num); if (arg_len > num_cnt) exit 3; # Here we generate the pattern of our argument. The result is in the # form of a character string with one character per character of the # argument; the pattern character for the first argument character is # always '0'; for other characters in the argument, # - the pattern character is '0' if the argument character is not # the same as any other character BEFORE it in the argument; # - otherwise the pattern character is a character from 'num' that # represents an index (1,2, ) to the first character in the argument # that it matches. # p0 = "0"; for (i=2;i<=arg_len;i++) { p0 = p0 substr(num,index(substr(arg,1,i-1),substr(arg,i,1))+1,1); } cnt = 0; } { if (length($1) != arg_len) next; p1 = "0"; for (i=2;i<=arg_len;i++) { p1 = p1 substr(num,index(substr($1,1,i-1),substr($1,i,1))+1,1); } if (p1 != p0) next; print $1 cnt++; } END { if (ok != 1) exit 2; if (cnt < 1) exit 1; exit 0; } ' $WL >$TMPFILE1 RTN=$? if [ $RTN -eq 2 ] ; then display_help elif [ $RTN -eq 3 ] ; then echo "$PG: Too many characters in pattern." elif [ $RTN -eq 1 ] ; then echo "No pattern matches found." else # Count matches and possibly consolidate onto fewer lines gawk -v lim1=$LIM1 -v lim2=$LIM2 -v lim3=$LIM3 ' \ { if (FNR == 1) { filecnt++; if (filecnt == 2) { # Determine words per line for the output pass reccnt = recnumb; if (reccnt <= lim1) { words_per_line = 1; } else { words_per_line = 1 + int((lim2-maxlength)/(maxlength+1)); if (words_per_line < 1) words_per_line = 1; } # Calculate the format string fmt = "%-" maxlength "s"; } words_so_far = 0; lines_out = 0; recnumb = 0; } recnumb++; if (filecnt == 1) { n = length($1); if (n > maxlength) maxlength = n; } else { if (words_so_far == words_per_line) { printf "\n"; words_so_far = 0; } if (words_so_far == 0) { printf fmt,$1; lines_out++; } else printf " " fmt,$1 words_so_far++; } } END { if (words_so_far > 0) { printf "\n"; lines_out++; } if (lines_out > lim3) exit 1; else exit 0; }; ' $TMPFILE1 $TMPFILE1 >$TMPFILE2 if [ $? -eq 0 ] ; then cat $TMPFILE2 else cat $TMPFILE2 | less fi fi fi done