(************************************************************************* ** String Similarity (Slow Method) ** ** For: 1998 UMCP Programming Contest ** Author: David Mount ** Date: March 3, 1998 ** ** This program inputs two character strings, and outputs their ** longest matching subsequence. X and Y holds the two strings, ** and Z holds the longest matching subsequence (LMS), and lx, ly, ** and lz are their respective lengths. Most of the work is done ** by the recursive procedure recLMS(). ** **************************************************************************) program lcs(input, output); type Sequence = array [1..50] of char; (* character sequence *) var X: Sequence; (* input sequences *) Y: Sequence; Z: Sequence; (* output sequence *) lx, ly, lz: integer; (* lengths of X, Y, Z *) i: integer; procedure copySeq( (* copy sequence X to Y *) X: Sequence; lx: integer; var Y: Sequence; var ly: integer); var i: integer; begin for i := 1 to lx do Y[i] := X[i]; (* copy contents *) ly := lx; (* copy length *) end; (************************************************************************** ** recLMS - computes LMS recursively ** ** This finds the LMS of X[sx..lx] and Y[sy..ly] and returns ** the result in Z[lz+1..lz']. The sequences X and Y are global. ** On entry the value lz is the length of any existing LMS that ** has already been computed. It is appended to, and the new ** value of lz (lz' above) is returned. ** ** The algorithm works recursively. First, if either string is ** empty, then the LMS is empty. Otherwise, we test the first two ** characters of the sequences X[sx..lx] and Y[sy..ly]. If they ** are equal, then we may take this character to be in the LMS. ** (This can be proven formally to be correct.) We append this ** common character onto Z, and then we recursively compute the ** LMS of the remaining sequences X[sx+1..lx] and Y[sy+1..ly]. ** ** If the first two characters are different, then there are two ** possibilities: either the first character of the first sequence ** is NOT in the LMS or the first character of the second sequence ** is NOT in the LMS. (It is possible that both are not in the ** LMS, but this case is handled by either of these two.) We do ** not know which is the case, so we try both, and see whether gives ** the longer LMS. That is, we delete the first character from the ** first sequence (considering X[sx+1..lx]) and recurse on the ** remainder and we delete the first character from the other ** (considering Y[sy+1..ly]) and recurse on the remainder. We ** take the larger LMS of the two. We need to save the contents ** of Z and the value of lz between these calls. ** ** Note that this is not the most efficient algorithm for this ** problem. ** **************************************************************************) procedure recLMS( sx, sy: integer; (* starting indices *) lx, ly: integer; (* last indices *) var Z: Sequence; (* the LMS (returned) *) var lz: integer); (* length of LMS (returned) *) var savelz: integer; (* saved length Z *) saveZ: Sequence; (* saved contents of Z *) begin if ((sx <= lx) and (sy <= ly)) then begin (* exit if either is empty *) if (X[sx] = Y[sy]) then begin (* first characters match *) lz := lz + 1; (* add character to Z *) Z[lz] := X[sx]; recLMS(sx+1, sy+1, lx, ly, Z, lz); (* recurse on remainder *) end else begin (* first chars don't match *) copySeq(Z, lz, saveZ, savelz); (* save value of Z *) recLMS(sx+1,sy,lx,ly,saveZ,savelz); (* LMS X[sx+1..lx] Y[sy..ly] *) recLMS(sx,sy+1,lx,ly,Z,lz); (* LMS X[sx..lx] Y[sy+1..ly] *) if (savelz > lz) then begin (* the first was longer *) copySeq(saveZ, savelz, Z, lz); (* restore saved values *) end; end; end; end; begin lx := 0; write('First string: '); while (not eoln(input)) do begin (* input X *) lx := lx+1; read(X[lx]); write(X[lx]); end; readln; writeln; ly := 0; write('Second string: '); while (not eoln(input)) do begin (* input Y *) ly := ly+1; read(Y[ly]); write(Y[ly]); end; readln; writeln; lz := 0; (* initial LMS length is 0 *) recLMS(1, 1, lx, ly, Z, lz); (* compute LMS recursively *) writeln('Similarity: ', lz:2); (* output length of LMS *) write('LMS: '); for i := 1 to lz do begin write(Z[i]); end; writeln; end.