#!/bin/bash # # Look up words using a string of letters and wildcard characters # # 14Sep2004: Incorporate a new class of wildcard characters to facilitate # searching for repeat letter patterns in matched words. These characters, # '+', '@', and '=' each match a single letter, but with the added wrinkle # that each occurance of '+' (for example) in the search string matches # _the same_ character. For example, the search string "+@==@+" finds # "pullup", "redder" and several less common words, and ".+@==@+" finds # "dresser", "grammar", and a few others. # PG=wlu WL=/usr/dict/word.list LIM1=4 # If this many or fewer matches, list eash on it's own line. LIM2=79 # Maximum number of characters on a line. LIM3=23 # If more than this many lines, output to 'less'. TMPFILE1=/tmp/${PG}-tmp1_.tmp TMPFILE2=/tmp/${PG}-tmp2_.tmp function display_help() { echo "$PG: Find words using a string of letters and these wildcards:" echo "$PG: '.' matches a single letter" echo "$PG: '@' like '.', but all occurances of '@' match the same letter" echo "$PG: '=' like '.', but all occurances of '=' match the same letter" echo "$PG: '+' like '.', but all occurances of '+' match the same letter" echo "$PG: '-' matches 0 or more successive letters" echo "$PG: '1' matches 0 or 1 letters" echo "$PG: '2' matches 0, 1, or 2 letters, ... and so forth for 3,4, ...9" echo "$PG: Invoke with no argument to begin and exit interactive mode." echo "$PG: Long lists of matches are displayed using 'less'; 'q' to exit." echo "$PG: Examples: '-cats-', 'f.r.o.g', '2ee2', 'q...', '.+@==@+'" } if [ -z "$1" ] ; then ONESHOT="0" display_help else ONESHOT="1" fi while [ -n "$ONESHOT" ] ; do if [ "$ONESHOT" == "0" ] ; then read -e -p "wlu>" WORD1 WORD2 if [ -z "$WORD1" ] ; then exit 0 fi elif [ "$ONESHOT" == "1" ] ; then WORD1="$1" WORD2="$2" unset ONESHOT fi if [ -n "$WORD2" ] ; then display_help else # Convert user's argument into a regular expression for 'grep'. The script # uses \xHH escape sequences for characters that the shell messes with, # namely '\', '<', '>', '{', and '}'. TARGET=` gawk -v arg=$WORD1 ' \ BEGIN { # The next few statements set up for the wildcard characters that # stand for unique but unspecified letters. First we specify the # number of such characters, then we initialize an array that we # use to remember when we have encountered the first occurances # of each of the wildcards in this class. The variable 'sbx_cnt' # holds the running count of parenthesized sub-expressions in the # 'grep' argument that we are building, each sub-expressions being # used for the _first_ occurance of a particular such wildcard. ucw_cnt = 3; ucw_idx = 11; sbx_cnt = 0; for (i=0;i dot_idx) s = s c; # 'a' through 'z' else if (n == dot_idx) s = s "[a-z]"; # '.' else if (n >= ucw_idx) { # find multiple occurances of _the same_ letter j = n - ucw_idx; # 0, 1, ... if (sbx_num[j] == 0) { # this is the 1st occurance of this wildcard character sbx_cnt++; s = s "\x5C([a-z]\x5C)"; sbx_num[j] = sbx_cnt; } else { # this is the 2nd, 3rd, ... occurance of this wildcard character s = s "\x5C" sbx_num[j]; } } else if (n == 10) s = s "[a-z]*"; # '-' else if (n > 0) s = s "[a-z]\x5C\x7B" "0," c "\x5C\x7D" # '1' - '9' else exit 1; } print s "\x5C\x3E"; exit 0; } ' ` if [ $? -ne 0 ] ; then display_help else # echo "${TARGET}" grep "${TARGET}" $WL >$TMPFILE1 RESULT=$? if [ $RESULT -gt 1 ] ; then echo "$PG: CAUTION: an error was encountered running 'grep'." elif [ $RESULT -eq 1 ] ; then echo "$PG: No matches were found." else # Count matches and possibly consolidate onto fewer lines gawk -v lim1=$LIM1 -v lim2=$LIM2 -v lim3=$LIM3 ' \ { if (FNR == 1) { filecnt++; if (filecnt == 2) { # Determine words per line for the output pass reccnt = recnumb; if (reccnt <= lim1) { words_per_line = 1; } else { words_per_line = 1 + int((lim2-maxlength)/(maxlength+1)); if (words_per_line < 1) words_per_line = 1; } # Calculate the format string fmt = "%-" maxlength "s"; } words_so_far = 0; lines_out = 0; recnumb = 0; } recnumb++; if (filecnt == 1) { n = length($1); if (n > maxlength) maxlength = n; } else { if (words_so_far == words_per_line) { printf "\n"; words_so_far = 0; } if (words_so_far == 0) { printf fmt,$1; lines_out++; } else printf " " fmt,$1 words_so_far++; } } END { if (words_so_far > 0) { printf "\n"; lines_out++; } if (lines_out > lim3) exit 1; else exit 0; }; ' $TMPFILE1 $TMPFILE1 >$TMPFILE2 if [ $? -eq 0 ] ; then cat $TMPFILE2 else cat $TMPFILE2 | less fi fi fi fi done