Smiley face

Our Algorithm_MLCS(Diagonal Extension and Domination, 2017)


Main features
Abstract of the paper [Chan 2017]

Given a pair of merging sequences A, B and a target sequence T , the merged longest common subsequence (MLCS) problem is to find out the longest common subsequence (LCS) between sequences E(A, B) and T , where E(A, B) is obtained from merging two subsequences of A and B. In this paper, we first propose an algorithm for solving the MLCS problem in O(n|Σ|+(r−L+1)Lm) time and space, where r and L denote the lengths of T and MLCS, respectively, and m and n denote the minimum and maximum lengths of A and B, respectively. With the time complexity, it is clear that our algorithm is very efficient when T and E(A, B) are very similar. With slight modification, our algorithm can also solve another merged LCS problem variant, the block-merged LCS (BMLCS) problem, in O(n|Σ| + (r − L + 1)Lδ) time and space, where δ denotes the maximum number of blocks of A and B. Experimental results show that our algorithms are faster than other previously published MLCS and BMLCS algorithms for sequences with high similarities.

C++ source code
int MLCS_Ours(vector< int> T, vector< int> A, vector< int>B, int Tlen, int Alen, int Blen, int size)
{
    vector< vector< int> > nextA(size + 1, vector< int>(Alen + 1, 0));
    vector< vector< int> > nextB(size + 1, vector< int>(Blen + 1, 0));
    int L = 0;

    int Alenplusone = Alen + 1;
    int Blenplusone = Blen + 1;
    int Lmaxsize = (Alen + Blen < Tlen ? Alen + Blen : Tlen); // Max LCS length

    int Dmaxsize = (Alen < Blen ? Blen : Alen); // Max size of a dominating set

    int **Dx = new int *[Lmaxsize + 2];
    int **Dy = new int *[Lmaxsize + 2];
    for (int i = 0; i < Lmaxsize + 2; ++i) {
        Dx[i] = new int[Dmaxsize + 2];
        Dy[i] = new int[Dmaxsize + 2];
    }
    int *WAx = new int[Dmaxsize + 2];
    int *WAy = new int[Dmaxsize + 2];
    int *WBx = new int[Dmaxsize + 2];
    int *WBy = new int[Dmaxsize + 2];
    int *tempDx = new int[Dmaxsize + 2];
    int *tempDy = new int[Dmaxsize + 2];
    // Dmaxsize=smaller of Alen and Blen, cannot set to Tlenvector< vector< int> > Dx(Lmaxsize + 2, vector< int>(Dmaxsize, -1));
    
    //construct array of nextA
    for (int i = 1; i <= size; i++) {
        nextA[i][Alen] = Alen + 1;
        nextB[i][Blen] = Blen + 1;
    }
    for (int j = Alen; j >= 1; j--) {
        for (int i = 1; i <= size; i++)
            nextA[i][j - 1] = nextA[i][j];
        nextA[A[j]][j - 1] = j;  //  need update only if (A[j] == i)
    }

    //construct array of nextB
    for (int j = Blen; j >= 1; j--) {
        for (int i = 1; i <= size; i++)
            nextB[i][j - 1] = nextB[i][j];
        nextB[B[j]][j - 1] = j;  //  need update only if (B[j] == i)
    }


    Dx[0][1] = 0;
    Dy[0][1] = 0;
    Dx[0][2] = Alenplusone;
    Dy[0][2] = 0;

    for (int s = 1; s <= Lmaxsize; ++s) {
        Dx[s][1] = Alenplusone;
        Dy[s][1] = 0;
    }

    int PosT = 0;
    for (int i = 1; i <= Tlen; ++i) {
        if (i > Tlen - L)
            break;
        for (int s = 1; s <= Tlen - i + 1; ++s) {
            PosT = i + s - 1;
            //(Dk−1,s−1) Extension of A, extension of B
            int j, k;
            int sMinusOne = s - 1;
            for (j = 1, k = 1; Dx[sMinusOne][j] != Alenplusone; ++j) {
                WAx[j] = nextA[T[PosT]][Dx[sMinusOne][j]];
                WAy[j] = Dy[sMinusOne][j];
                WBx[j] = Dx[sMinusOne][j];   // some  are added
                WBy[j] = nextB[T[PosT]][Dy[sMinusOne][j]];
            }
            WAx[j] = Alenplusone;
            WBx[j] = Alenplusone;

            tempDx[1] = Alenplusone;

            // Dominate(Dk−1,s ∪ ExtA )
            if ((Dx[s][1] != Alenplusone) || (WAx[1] != Alenplusone)) {
                Dominate(Dx[s], Dy[s], WAx, WAy, tempDx, tempDy, Alenplusone, Blenplusone);
            }
            // Dominate(Dominate(Dk−1,s ∪ ExtA)∪ExtB)
            if ((tempDx[1] != Alenplusone) || (WBx[1] != Alenplusone)) {
                Dominate(tempDx, tempDy, WBx, WBy, Dx[s], Dy[s], Alenplusone, Blenplusone);
            }


            if ((Dx[s][1] == Alenplusone))
                break;

            if (s > L)
            {
                L = s;
            }

        }//end for j
    }//end for i

     // delete the allocated array Dx[][], Dy[][]
    for (int i = 0; i < Lmaxsize + 2; ++i) {
        delete[] Dx[i];
        delete[] Dy[i];
    }
    delete[] Dx;    delete[] Dy;
    delete[] WAx;   delete[] WAy;   delete[] WBx;   delete[] WBy;
    delete[] tempDx;    delete[] tempDy;
    return L;
}

void Dominate(int *L1x, int *L1y, int *L2x, int *L2y, int *Rx, int *Ry, int Alenplusone, int Blenplusone) {

    int i = 1, j = 1, tempx, tempy;
    int Rsizeminusone = 0;   // compare starting from [0]

    Rx[0] = -1;
    Ry[0] = Blenplusone;   // [0] used for discard 
    Rx[1] = Alenplusone;
    Ry[1] = Blenplusone;
    while ((L1x[i] != Alenplusone) || (L2x[j] != Alenplusone)) {
        // ← the non-null smaller one of the first 2-tuples of L1 and L2
        if (L1x[i] <= L2x[j]) { //<3 , 2> is smaller than <4 , 1>
                                //  tempx = L1x[i];
                                //  tempy = L1y[i];
            if (L1y[i] >= Ry[Rsizeminusone])  // && L2x[j] >= Rx[Rsizeminusone]
                ;   // no operation, discard L2
            else if (L1x[i] > Rx[Rsizeminusone] && L1y[i] < Ry[Rsizeminusone]) {
                ++Rsizeminusone;
                Rx[Rsizeminusone] = L1x[i];
                Ry[Rsizeminusone] = L1y[i];
            }
            else {
                Rx[Rsizeminusone] = L1x[i];
                Ry[Rsizeminusone] = L1y[i];
            } // else discard L1
            ++i;
        }
        else {
            //  tempx = L2x[j];
            //  tempy = L2y[j];
            if (L2y[j] >= Ry[Rsizeminusone])  // && L2x[j] >= Rx[Rsizeminusone]
                ;   // no operation, discard L2
            else if (L2x[j] > Rx[Rsizeminusone] && L2y[j] < Ry[Rsizeminusone]) {
                ++Rsizeminusone;
                Rx[Rsizeminusone] = L2x[j];
                Ry[Rsizeminusone] = L2y[j];
            }
            //    else if (L2x[j] <= Rx[Rsizeminusone] && L2y[j] <= Ry[Rsizeminusone]) {
            else {
                Rx[Rsizeminusone] = L2x[j];
                Ry[Rsizeminusone] = L2y[j];
            }
            ++j;
        }
    }
    ++Rsizeminusone;
    Rx[Rsizeminusone] = Alenplusone;
    Ry[Rsizeminusone] = 0;

}


The files
  All_MLCS.cpp

Reference
Kuo-Tsung Tseng., De-Sheng Chan., Chang-Biau Yang., Shou-Fu Lo., 2017, Efficient Merged Longest Common Subsequence Algorithms for Similar Sequences. (Unpublished)