#!/bin/bash # # 'permu' - 19Jan2005, Jeff Edwards # # Just display all the permutations of the letters in the given argument # PG=permu # The code below makes the simplifying assumption that there will be no # more than 99 permutations per line and no more than 99999 lines of output; # if LIM2 and LIM4 permit otherwise, adjust the code accordingly! # # The idea here is to help solve jumbled word type puzzles by presenting # all premutations of the letters in a jumbled word, essentially using # the computer as you might use a pencil and paper to do the same thing, # on the theory that you'll recognize the answer when you see it. # # Formatting notes: # # 1) The permutations are generally in alphabetical order, blocked # by first letter. # 2) Within contiguous blocks of lines, the permutations are sorted # first by column, then by row. # 3) Blocks of lines are limited LIM1 lines, selectable below. # #--- # # Set formatting preferences here: # LIM1=6 # Desired number of lines per block of full lines. LIM2=79 # Maximum number of characters on a line. LIM3=23 # If more than this many lines, output to 'less'. LIM4=4000 # Error if more than this many permutations # # Change 'HOST' and other definitions as required. # HOST=linux if [ $HOST = linux ] ; then # what we should use for temporary files TMPFILE1=/tmp/${PG}-tmp1_.tmp TMPFILE2=/tmp/${PG}-tmp2_.tmp # pathnames for the utility programs RM_=/bin/rm CAT_=/usr/bin/cat GAWK_=/usr/bin/gawk SORT_=/usr/bin/sort LESS_=/usr/bin/less TOUCH_=/bin/touch elif [ $HOST = djgpp ] ; then # what we should use for temporary files TMPFILE1=/cygwin/tmp/${PG}-tmp1_.tmp TMPFILE2=/cygwin/tmp/${PG}-tmp2_.tmp # pathnames for the utility programs RM_=/djgpp/bin/rm.exe CAT_=/djgpp/bin/cat.exe GAWK_=/djgpp/bin/gawk.exe SORT_=/djgpp/bin/sort.exe LESS_=/djgpp/bin/less.exe TOUCH_=/djgpp/bin/touch.exe fi # function display_help() { echo "$PG: Display unique permutations of the letters in the given argument." echo "$PG: In interactive mode, the pager 'less' is used to print long lists;" echo "$PG: invoke with no argument to begin and exit interactive mode." } if [ -z "$1" ] ; then ONESHOT="0" display_help else ONESHOT="1" fi while [ -n "$ONESHOT" ] ; do if [ "$ONESHOT" == "0" ] ; then read -e -p "$PG>" WORD1 WORD2 if [ -z "$WORD1" ] ; then exit 0 fi elif [ "$ONESHOT" == "1" ] ; then WORD1="$1" WORD2="$2" unset ONESHOT fi if [ -n "$WORD2" ] ; then display_help else $RM_ -f $TMPFILE2 # Will be used as an empty input file $TOUCH_ $TMPFILE2 $GAWK_ -v arg=$WORD1 \ -v lim1=$LIM1 \ -v lim2=$LIM2 \ -v lim3=$LIM3 \ -v lim4=$LIM4 ' \ # exit codes: 0 -> normal # 2 -> bad argument # 3 -> too many permutations function pcnt(s, cnt,i,k,m,n) # cnt,i,k,n are local { # calculate the number of permutations, which is: # length(s)!/(k[1]! k[2]! ...) # where the k[i] are the counts of all unique letters in s. m = length(s); cnt = 1; k[1] = 1; for (i=2; i<=m; i++) { cnt *= i; k[i] = 1; n = index(substr(s,1,i-1),substr(s,i,1)); if (n > 0) { # this letter duplicates a previous one k[n]++; cnt /= k[n]; } } return cnt; } BEGIN { arg_len = length(arg); if (arg_len < 1) exit 2; # error check arg and convert to lower case s = ""; for (i=1; i<=arg_len; i++) { c = tolower(substr(arg,i,1)); n = index("abcdefghijklmnopqrstuvwxyz",c); if (n < 1) exit 2; letter[i] = c; } # sort the argument into alphabetical order for (i=1; i<=arg_len; i++) { for (j=i+1;j<=arg_len;j++) { if (letter[j] < letter[i]) { c = letter[i]; letter[i] = letter[j]; letter[j] = c; } } } s = ""; for (i=1; i<=arg_len; i++) s = s letter[i]; # calculate the number of permutations cnt = pcnt(s); if ((cnt + 0) > (lim4 + 0)) exit 3; # calculate the parameters needed to eventually format # the output DOWN the page before ACROSS the page. u_cnt = 0; u_str = ""; for (i=1; i<=arg_len; i++) { # counting the number of occurances if each unique letter c = substr(s,i,1); n = index(u_str,c); if (n == 0) { # this is the first occurance of this letter # NOTE: "k[u_cnt++] = 1;" DOES NOT WORK!!! u_cnt++; k[u_cnt] = 1; u_str = u_str c; } else { # we already have one or more of these k[n]++; } } # see if the argument has the same number of each unique letter for (i=1; i<=u_cnt; i++) { if (k[i] != k[1]) break; } u_eql = (i > u_cnt); # same number of occurances for each letter # words_per_line = 1 + int((lim2-arg_len)/(arg_len+1)); if (words_per_line < 1) words_per_line = 1; # Determine if it is possible to print all the permutations # using equal length columns, with all the permutations in each # column beginning with the same letter, all within a single page. # This is a bit complicated because of the possibility of duplicate # letters. # # We already have then number of unique letters and the count of # each in our argument. Now we will consider the number of permutations # that start with each letter, and build an array with only the unique # numbers of such permutations. # q_cnt = 0; for (i=1; i<=u_cnt; i++) { c = substr(u_str,i,1); # one of the unique letters n = index(s,c); # posn of its 1st occurance in argument q_str = substr(s,1,n-1) substr(s,n+1); n = pcnt(q_str); # permutations starting with this letter for (j=1;j<=q_cnt;j++) if (n == q[j]) break; if (j > q_cnt) { # new unique number of permutations starting with this letter q_cnt++; q[q_cnt] = n; if (q_cnt == 1) { q_min = n; # the smallest number of such permutations } else { if (n < q_min) q_min = n; } } } # Now we determine the greatest common factor of the number of # permutations for various first letters. for (gcf=q_min;gcf>0;gcf--) { for (i=1;i<=q_cnt;i++) { m = q[i] / gcf; if (m != int(m)) break; # not a factor } if (i > q_cnt) break; } # The maximum number of columns that group all the permutations # in equal length rows is gcf calcuated above. See if we should # use gcd instead of words_per_line for this argument in order # go make all the columns of even length m = cnt / gcf; if (gcf <= words_per_line) cols = gcf; else cols = words_per_line; last_line = 0; grouping = 1; ix = 1; # index of the character position we are working on. u[ix] = s; # universe of available letters for this position k[ix] = 1; # index into u[ix] for this position p = ""; # permutation-in-progress o_r = 0; # output row used for DOWN-before-ACROSS o_c = 0; # output col used for DOWN-before-ACROSS o_o = 0; # offset while (1) { # except in case of duplicate letters in a particular character # position, each time in this loop add or subtract a letter # from the partial permutation and change the character # position under consideration. # # exit the loop if we are completely done. if (ix < 1) break; # if (k[ix] > (arg_len + 1 - ix)) { # if no more letters for this position p = substr(p,1,ix-2); ix--; continue; } # Initialize the duplicate letter checker for this posn if (k[ix] == 1) d[ix] = ""; # Isolate the next letter for this position c = substr(u[ix],k[ix],1); # See if we have already used its duplicate if (index(d[ix],c) != 0) { # If this letter of this universe is a duplicate # Note: cannot happen for last letter position, # because there is only one letter for that position. k[ix]++; continue; } # Add this letter to the permutation-in-progress p = p c; if (ix < arg_len) { # This is not the last letter position d[ix] = d[ix] c; # Compute universe for the next letter position u[ix+1] = substr(u[ix],1,k[ix]-1) substr(u[ix],k[ix]+1); k[ix]++; k[ix+1] = 1; ix++; if ((ix == 2) && (grouping)) { # Here we just started a new first letter, and we are # grouping the output into entire blocks of lines starting # with the same first letter. The next chunk of code sets # up the row/column logic to (1) start this new group on # a new line, and (2) insert a blank line between this and # the previous block. if (last_line > 0) o_o += (last_line + 1); n = int((pcnt(u[ix]) + cols - 1) / cols); o_blks = int((n + lim1 - 1) / lim1); if (n > lim1) last_line = lim1; else last_line = n; o_r = 0; o_c = 0; } } else { # we just completed a permutation, so print it # # the row/column number is used to sort the permutations # into the desired output sequence and to simplify the # output logic during the output pass. # printf "%05i %02i %s\n" ,(o_r + o_o) ,o_c ,p; o_r++; if (o_r == last_line) { o_r = 0; o_c++; if (grouping && (o_c == cols)) { o_blks--; if (o_blks > 0) { o_o += (last_line + 1); o_c = 0; } } } # adjust the permutation-in-progress, and back up one letter p = substr(p,1,arg_len-2); ix--; } } # here when all permutations have been output exit 0; } ' $TMPFILE2 >$TMPFILE1 RTN0=$? if [ $RTN0 -eq 2 ] ; then echo "$PG: Bad argument." display_help elif [ $RTN0 -eq 3 ] ; then echo "$PG: Too many permutations." display_help else # Sort by the output row and column numbers calculated above $SORT_ -k 1,2 $TMPFILE1 >$TMPFILE2 # $GAWK_ -v lim3=$LIM3 ' \ BEGIN { lines_out = 0; ss = ""; # line accumulator pv = 0; # past value of row number } { if ($1 > pv) { # if a new row ... print ss; lines_out++; if ($1 > (pv + 1)) { print " ... "; lines_out++; } ss = $3; } else { if (ss == "") ss = $3; else ss = ss " " $3; } pv = $1; } END { if (ss != "") { print ss; lines_out++; } if (lines_out <= lim3) exit 0; else exit 1; } ' $TMPFILE2 >$TMPFILE1 RTN1=$? if [ $RTN1 -eq 0 -o -z "$ONESHOT" ] ; then $CAT_ $TMPFILE1 else unset LESS $LESS_ --prompt="('q' to exit pager)" $TMPFILE1 fi fi fi done $RM_ -f $TMPFILE1 $TMPFILE2