﻿ Peng et al. Algorithm_BMLCS

# Peng et al. Algorithm_BMLCS(Sparse Dynamic Programming, 2010)

Main features
• searching phase in O(Lrδ) time complexity
• searching phase in O(n + ) space complexity
Abstract of the paper [Peng 2010]

Detecting the interleaving relationship between sequences has become impor- tant because of its wide applications to genomic and signal comparison. Given a target sequence T and two merging sequences A and B, recently Huang et al. propose algo- rithms for the merged LCS problem, without or with block constraint, whose aim is to ﬁnd the longest common subsequence (LCS) with interleaving relationship. Without block constraint, Huang’s algorithm requires O(nmr)-time and O(mr)-space, where n = |T|, m and r denote the longer and shorter length of A and B, respectively. In this paper, for solving the problem without block constraint, we ﬁrst propose an algorithm with O(Lnr) time and O(m + Lr) space. We also propose an algorithm to solve the problem with block constraint. Our algorithms are more eﬃcient than previous results, especially for sequences over large alphabets.

C++ source code
```int BMLCS_PengSparse(vector< int> T, vector< > A, vector< int>B, int Tlen, int Alen, int Blen, int size, vector< int> EOBA, vector< int> EOBB)
{
vector< vector< int>> nextA(size + 1, vector< int>(Alen + 1, 0));
vector< vector< int>> nextB(size + 1, vector< int>(Blen + 1, 0));
vector< int> PosBlockA(Alen + 1, 0);
vector< int> PosBlockB(Blen + 1, 0);
vector< int> FEOBA(Alen + 1, 0);
vector< int> FEOBB(Blen + 1, 0);
int len = 1;
vector< vector< triple>> QL1(Tlen + 2, vector< triple>());
vector< vector< triple>> QL2(Tlen + 2, vector< triple>());
vector< vector< triple>> tempQL(Tlen + 2, vector< triple>());
int qlsize = 0;
int locationA = 0;
int locationB = 0;
//construct array of nextA
for (int i = 1; i <= size; ++i) {
locationA = Alen + 1;
for (int j = Alen; j > 0; --j) {
if (j == Alen)
nextA[i][j] = locationA;
if (A[j] == i) {
locationA = j;
nextA[i][j - 1] = locationA;
}
else {
nextA[i][j - 1] = locationA;
}
}
}

//construct array of nextB
for (int i = 1; i <= size; ++i) {
locationB = Blen + 1;
for (int j = Blen; j > 0; --j) {
if (j == Blen)
nextB[i][j] = locationB;
if (B[j] == i) {
locationB = j;
nextB[i][j - 1] = locationB;
}
else {
nextB[i][j - 1] = locationB;
}
}
}

//construct PosA and PosB ; same block record the EOB's index
for (int i = 0, count = 0; i < Alen; ++i) {
if (i <= EOBA[count]) {
PosBlockA[i] = EOBA[count];
}
else {
++count;
PosBlockA[i] = EOBA[count];
}
}
PosBlockA[Alen] = EOBA[EOBA.size() - 1];
for (int i = 0, count = 0; i < Blen; ++i) {
if (i <= EOBB[count]) {
PosBlockB[i] = EOBB[count];
}
else {
++count;
PosBlockB[i] = EOBB[count];
}
}
PosBlockB[Blen] = EOBB[EOBB.size() - 1];

//construct FEOBA and FEOBB  ; EOB=1 , not EOB=0
int sizeEOBA = EOBA.size(), sizeEOBB = EOBB.size();
for (int i = 0; i < sizeEOBA; ++i) {
FEOBA[EOBA[i]] = 1;
}
FEOBA[0] = 1;
FEOBA[Alen] = 1;
for (int i = 0; i < sizeEOBB; ++i) {
FEOBB[EOBB[i]] = 1;
}
FEOBB[0] = 1;
FEOBB[Blen] = 1;

int PosA = 0;
int PosB = 0;
int layer = 1;
while (1) {
if (layer % 2 == 1) {
QL1[0].push_back(triple(0, 0));
for (int l = 1; l <= len; ++l)
{
for (auto mytriple = QL2[l - 1].begin(); mytriple != QL2[l - 1].end(); ++mytriple)
{
PosA = nextA[T[layer - 1]][mytriple->x];
PosB = nextB[T[layer - 1]][mytriple->y];
//A[i] and B[j] are end of block.
if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 1) {
if (PosA <= Alen) {
QL1[l].push_back(triple(PosA, mytriple->y));
}
if (PosB <= Blen) {
QL1[l].push_back(triple(mytriple->x, PosB));
}
}
//A[i] is EOB, but B[j] is not.
else if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 0) {
if (PosB <= Blen) {
QL1[l].push_back(triple(mytriple->x, PosB));
}
if (PosB > PosBlockB[mytriple->y] && PosA <= Alen) {
QL1[l].push_back(triple(PosA, PosBlockB[mytriple->y]));
}
}
//B[j] is EOB, but A[i] is not.
else if (FEOBA[mytriple->x] == 0 && FEOBB[mytriple->y] == 1) {
if (PosA <= Alen) {
QL1[l].push_back(triple(PosA, mytriple->y));
}
if (PosA > PosBlockA[mytriple->x] && PosB <= Blen) {
QL1[l].push_back(triple(PosBlockA[mytriple->x], PosB));
}
}

}
if (QL2[l].size() > 0) {
for (auto mytriple = QL2[l].begin(); mytriple != QL2[l].end(); ++mytriple)
{
QL1[l].push_back(triple(mytriple->x, mytriple->y));
}
}

//2-D minima algorithm
qlsize = QL1[l].size();
if (qlsize > 1) {
for (int i = 0; i < qlsize; ++i) {
triple temptri = QL1[l][i];
if (!tempQL[temptri.x].empty() && temptri.y < tempQL[temptri.x][0].y) {
tempQL[temptri.x].erase(tempQL[temptri.x].begin());
tempQL[temptri.x].push_back(temptri);
}
else if (tempQL[temptri.x].empty() == true) {
tempQL[temptri.x].push_back(temptri);
}
}
QL1[l].clear();
int prevMaxY = Blen + 1;
for (int i = 0; i <= Alen; ++i) {
if (!tempQL[i].empty()) {
if (tempQL[i][0].y < prevMaxY) {
prevMaxY = tempQL[i][0].y;
QL1[l].push_back(tempQL[i][0]);
}
tempQL[i].clear();
}
}
}
}//end for j
if (QL1[len].size() > 0)
len++;
if (layer == Tlen + 1)
break;
++layer;
}
else {
QL2[0].push_back(triple(0, 0));
for (int l = 1; l <= len; ++l)
{
for (auto mytriple = QL1[l - 1].begin(); mytriple != QL1[l - 1].end(); ++mytriple)
{
PosA = nextA[T[layer - 1]][mytriple->x];
PosB = nextB[T[layer - 1]][mytriple->y];
//A[i] and B[j] are end of block.
if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 1) {
if (PosA <= Alen) {
QL2[l].push_back(triple(PosA, mytriple->y));
}
if (PosB <= Blen) {
QL2[l].push_back(triple(mytriple->x, PosB));
}
}
//A[i] is EOB, but B[j] is not.
else if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 0) {
if (PosB <= Blen) {
QL2[l].push_back(triple(mytriple->x, PosB));
}
if (PosB > PosBlockB[mytriple->y] && PosA <= Alen) {
QL2[l].push_back(triple(PosA, PosBlockB[mytriple->y]));
}
}
//B[j] is EOB, but A[i] is not.
else if (FEOBA[mytriple->x] == 0 && FEOBB[mytriple->y] == 1) {
if (PosA <= Alen) {
QL2[l].push_back(triple(PosA, mytriple->y));
}
if (PosA > PosBlockA[mytriple->x] && PosB <= Blen) {
QL2[l].push_back(triple(PosBlockA[mytriple->x], PosB));
}
}

}
if (QL1[l].size() > 0) {
for (auto mytriple = QL1[l].begin(); mytriple != QL1[l].end(); ++mytriple)
{
QL2[l].push_back(triple(mytriple->x, mytriple->y));
}
}

//2-D minima algorithm
qlsize = QL2[l].size();
if (qlsize > 1) {
for (int i = 0; i < qlsize; ++i) {
triple temptri = QL2[l][i];
if (!tempQL[temptri.x].empty() && temptri.y < tempQL[temptri.x][0].y) {
tempQL[temptri.x].erase(tempQL[temptri.x].begin());
tempQL[temptri.x].push_back(temptri);
}
else if (tempQL[temptri.x].empty() == true) {
tempQL[temptri.x].push_back(temptri);
}
}
QL2[l].clear();
int prevMaxY = Blen + 1;
for (int i = 0; i <= Alen; ++i) {
if (!tempQL[i].empty()) {
if (tempQL[i][0].y < prevMaxY) {
prevMaxY = tempQL[i][0].y;
QL2[l].push_back(tempQL[i][0]);
}
tempQL[i].clear();
}
}
}
}//end for j
if (QL2[len].size() > 0)
len++;
if (layer == Tlen + 1)
break;
++layer;
}
}

return len - 1;
}

```
The files
All_BMLCS.cpp

Reference
Peng, Y.-H., Yang, C.-B., Huang, K.-S., Tseng, C.-T., Hor, C.-Y., 2010. Eﬃcient sparse dynamic programming for the merged LCS problem with block constraints. International Journal of Innovative Computing, Information and Control 6, 1935–1947.