Smiley face

Allison-Dix Algorithm(Bit-String, 1986)


Main features
Description

Allison and Dix proposed an longest common subsequence(LCS) algorithm using bit operations. The idea is based on the observation that each cell [i, j] in the LCS dynamic programing(DP) lattice is either the same as or one more than [i, j-1], so one can record the difference to transform the LCS DP lattice into 0/1. lattice. The time complexity of this algorithm is O(mn/w) time, where w is the word size of the machine.

C source code
int AllisonDix_Alg(char *stringA, char *stringB, int m, int n)
{
   int i, j, LCS = 0, bitStringCount, neg;
   char matchTable[DATATYPE] = { FLASE }, *revStringB;
   unsigned char **alphbetString, *stringX, *prevRow, *tempX;

   revStringB = (char*) calloc(n + 2, sizeof(char));

   revStringB = reverse(stringB, n);


   bitStringCount = (int) ceil((double) m / BITSIZE);
   // Count the length of bit string

   prevRow = (unsigned char*) calloc(bitStringCount + 1, sizeof(unsigned char));
   // Init the string X

   tempX = (unsigned char*) calloc(bitStringCount + 1, sizeof(unsigned char));
   // Init the temp X

   stringX = (unsigned char*) calloc(bitStringCount + 1, sizeof(unsigned char));
   // Init the string X


   for (i = 1; i <= m; i++){
      matchTable[stringA[i]] = TRUE;
   }
   for (i = 1; i <= n; i++){
      matchTable[revStringB[i]] = TRUE;
   }// Mark the used alphbet


   alphbetString = (unsigned char**) calloc(DATATYPE, sizeof(unsigned char*));
   // Set the size of alphbet string


   for (i = 0; i <= DATATYPE; i++){
      if (matchTable[i] == TRUE){
         alphbetString[i] = (unsigned char*) calloc(bitStringCount + 1, sizeof(unsigned char));
      }
   }// Init the size of alphbet string which are used


   for (i = 0; i < bitStringCount; i++){
      for (j = 1; j <= BITSIZE; j++){
         if (m%BITSIZE!=0&&(i == bitStringCount - 1) && (j > m%BITSIZE)){
            break;
         }
         alphbetString[stringA[i*BITSIZE + j]][i + 1] += (int) pow((double) 2, (int) (BITSIZE - j));
      }
   }// Init the value of alphbet string which are used

   for (i = 1; i <= n; i++){

      for (j = 1; j <= bitStringCount; j++){
         stringX[j] = alphbetString[revStringB[i]][j] | prevRow[j];
      }// Process OR operation

      for (j = 1; j <= bitStringCount; j++){
         prevRow[j] *= 2;
         if ((prevRow[j + 1] >= (int) pow((float) 2, (int) BITSIZE - 1)) || (j == bitStringCount)){
            prevRow[j] += 1;
         }
      }// Prev. row left-shift 1 +1

      for (j = bitStringCount, neg=0; j >= 1; j--){
         if (((stringX[j] - prevRow[j] >= 0) && (neg == 0)) || ((stringX[j] - prevRow[j] - 1 >= 0) && (neg == 1))){
            tempX[j] = stringX[j] - prevRow[j];
            if (neg == 1){
               tempX[j]--;
            }
            neg = 0;
         }
         else{
            tempX[j] = (int) pow((float) 2, BITSIZE) + stringX[j] - prevRow[j];
            if (neg == 1){
               tempX[j]--;
            }
            neg = 1;
         }
      }// X - Prev. row

      for (j = 1; j <= bitStringCount; j++){
         tempX[j] ^= stringX[j];
      }// Process XOR operation

      for (j = 1; j <= bitStringCount; j++){
         prevRow[j] = tempX[j] & stringX[j];
      }// Process AND operation
   }// Main process


   for (i = 1; i <= bitStringCount; i++){
      for (j = 1; j <= BITSIZE; j++){
         if (prevRow[i] % 2 != 0){
            LCS++;
         }
         prevRow[i] /= 2;
      }
   }// Find the LCS

   free(alphbetString);
   free(stringX);
   free(tempX);
   free(prevRow);
   free(revStringB);

   return LCS;

}
The files
  main.c   lcslib.h   Allison-Dix_Alg.exe

Reference
L. Allison and T. I. Dix, "A bit-string longest-common-subsequence algorithm," Infor- mation Processing Letters, Vol. 23, pp. 305-310, 1986.