The longest common subsequence and sequence alignment problems have been studied extensively and they can be regarded as the relationship measurement between sequences. However, most of them treat sequences evenly or consider only two sequences. Recently, with the rise of whole-genome duplication research, the doubly conserved synteny relationship among three sequences should be considered. It is a brand new model to find a merging way for understanding the interleaving relationship of sequences. Here, we define the merged LCS problem for measuring the interleaving relationship among three sequences. An O (n3 ) algorithm is first proposed for solving the problem, where n is the sequence length. We further discuss the variant version of this problem with the block information. For the blocked merged LCS problem, we propose an algorithm with time complexity O (n2 m), where m is the number of blocks. An improved O (n2 + nm2 ) algorithm is further proposed for the same blocked problem.
int BMLCS_stable(vector< int> T, vector< int> A, vector< int> B, int Tlen, int Alen, int Blen, vector< int> EOBA, vector< int> EOBB, int size) { constructAllStable_int(T, A, B, Tlen, Alen, Blen, EOBA, EOBB, size); int Asize = (int)EOBA.size(), Bsize = (int)EOBB.size(); vector< vector< vector< int>>> vb(EOBA.size() + 1, vector< vector< int>>(EOBB.size() + 1, vector< int>(Tlen + 1))); vector< int> minv(Tlen + 1); int range = 0, tmpvalue = 0; //constrcut vb[i][0] for (int tmpx = 1; tmpx < AllStable[0][0].size(); ++tmpx) { vb[1][0][tmpx] = AllStable[0][0][tmpx]; } for (int i = 2; i <= Asize; ++i) { for (int s = 1; s <= Tlen; ++s) { minv[s] = Tlen + 1; } for (int p = 1; p < vb[i - 1][0].size(); ++p) { if (vb[i - 1][0][p] == 0) break; if (p == 1) { if (vb[i - 1][0][p] >= Tlen) { break; } range = (int)(AllStable[i - 1][vb[i - 1][0][p]].size()); for (int k = 0; k < range; ++k) { minv[k + p] = AllStable[i - 1][vb[i - 1][0][p]][k]; } } else { if (vb[i - 1][0][p] >= Tlen) { break; } range = (int)(AllStable[i - 1][vb[i - 1][0][p]].size()); for (int k = 0; k < range; ++k) { tmpvalue = AllStable[i - 1][vb[i - 1][0][p]][k]; if (tmpvalue < minv[k + p]) minv[k + p] = tmpvalue; } } } for (int tmpi = 1; tmpi <= Tlen; ++tmpi) { if (minv[tmpi] == Tlen + 1) break; vb[i][0][tmpi] = minv[tmpi]; } } //constrcut vb[0][j] for (int tmpx = 1; tmpx < AllStable[Asize][0].size(); ++tmpx) { vb[0][1][tmpx] = AllStable[Asize][0][tmpx]; } for (int j = 2; j <= Bsize; ++j) { for (int s = 1; s <= Tlen; ++s) { minv[s] = Tlen + 1; } for (int p = 1; p < vb[0][j - 1].size(); ++p) { if (vb[0][j - 1][p] == 0) break; if (p == 1) { if (vb[0][j - 1][p] >= Tlen) { break; } range = (int)(AllStable[j - 1 + Asize][vb[0][j - 1][p]].size()); for (int k = 0; k < range; ++k) { minv[k + p] = AllStable[j - 1 + Asize][vb[0][j - 1][p]][k]; } } else { if (vb[0][j - 1][p] >= Tlen) { break; } range = (int)(AllStable[j - 1 + Asize][vb[0][j - 1][p]].size()); for (int k = 0; k < range; ++k) { tmpvalue = AllStable[j - 1 + Asize][vb[0][j - 1][p]][k]; if (tmpvalue < minv[k + p]) minv[k + p] = tmpvalue; } } } for (int tmpi = 1; tmpi <= Tlen; ++tmpi) { if (minv[tmpi] == Tlen + 1) break; vb[0][j][tmpi] = minv[tmpi]; } } //construct vb[i][j] vector< int> mini(Tlen + 1); vector< int> minj(Tlen + 1); //vector< int> maxv(Tlen + 1); for (int i = 1; i <= Asize; ++i) { for (int j = 1; j <= Bsize; ++j) { for (int s = 1; s <= Tlen; ++s) { mini[s] = Tlen + 1; minj[s] = Tlen + 1; //maxv[s] = 0; } for (int p = 1; p < vb[i][j - 1].size(); ++p) { if (vb[i][j - 1][p] == 0) { break; } if (p == 1) { if (vb[i][j - 1][p] >= Tlen) { break; } range = AllStable[j - 1 + Asize][vb[i][j - 1][p]].size(); for (int k = 0; k < range; ++k) { mini[k + p] = AllStable[j - 1 + Asize][vb[i][j - 1][p]][k]; } } else { if (vb[i][j - 1][p] >= Tlen) { break; } range = AllStable[j - 1 + Asize][vb[i][j - 1][p]].size(); for (int k = 0; k < range; ++k) { tmpvalue = AllStable[j - 1 + Asize][vb[i][j - 1][p]][k]; if (tmpvalue < mini[k + p]) mini[k + p] = tmpvalue; } } } for (int p = 1; p < vb[i - 1][j].size(); ++p) { if (vb[i - 1][j][p] == 0) { break; } if (p == 1) { if (vb[i - 1][j][p] >= Tlen) { break; } range = AllStable[i - 1][vb[i - 1][j][p]].size(); for (int k = 0; k < range; ++k) { minj[k + p] = AllStable[i - 1][vb[i - 1][j][p]][k]; } } else { if (vb[i - 1][j][p] >= Tlen) { break; } range = AllStable[i - 1][vb[i - 1][j][p]].size(); for (int k = 0; k < range; ++k) { tmpvalue = AllStable[i - 1][vb[i - 1][j][p]][k]; if (tmpvalue < minj[k + p]) minj[k + p] = tmpvalue; } } } for (int tmpk = 1; tmpk <= Tlen; ++tmpk) { if (mini[tmpk] != Tlen + 1 && minj[tmpk] != Tlen + 1) { vb[i][j][tmpk] = max(mini[tmpk], minj[tmpk]); } else if (mini[tmpk] != Tlen + 1 && minj[tmpk] == Tlen + 1) { vb[i][j][tmpk] = mini[tmpk]; } else if (mini[tmpk] == Tlen + 1 && minj[tmpk] != Tlen + 1) { vb[i][j][tmpk] = minj[tmpk]; } } } } int answer = 0; for (int i = 1; i <= Tlen; ++i) { if (vb[Asize][Bsize][i] != 0) ++answer; } return answer; } void constructNextMatch_int(vector< int> S, int Slen, int symbolNumber) { //nextMatchs = new vector< int>[symbolNumber]; vector < int> *tempMatchs; // store the occurring position for each symbol int *tempMatchsIndex; tempMatchsIndex = new int[symbolNumber]; tempMatchs = new vector< int>[symbolNumber]; for (int i = 0; i< symbolNumber; i++) { tempMatchsIndex[i] = 0; tempMatchs[i].clear(); nextMatchs[i].clear(); } for (int i = 0; i< Slen; i++) { tempMatchs[S[i] - 1].push_back(i); } int *lenTempMatchs = new int[symbolNumber]; for (int i = 0; i < symbolNumber; i++) { lenTempMatchs[i] = (int)tempMatchs[i].size(); } for (int i = 0; i < Slen; i++) { for (int j = 0; j < symbolNumber; j++) { if (tempMatchsIndex[j] < lenTempMatchs[j]) { nextMatchs[j].push_back(tempMatchs[j][tempMatchsIndex[j]]); if ((tempMatchs[j][tempMatchsIndex[j]]) == i) { tempMatchsIndex[j]++; } } } } /* // test part for printing nextMatchs[] for (int i=0;i < symbolNumber;i++) { for (int j=0;j < (int)nextMatchs[i].size();j++) { cout << nextMatchs[i][j] << " "; } cout << endl; } cout << "-" << endl; // test part for printing nextMatchs[] */ for (int i = 0; i < symbolNumber; i++) { tempMatchs[i].clear(); } delete[] tempMatchs; delete[] tempMatchsIndex; } void constructAllStable_int(vector< int> T, vector< int> A, vector< int> B, int Tlen, int Alen, int Blen, vector< int> EOBA, vector< int> EOBB, int symbolNumber) { nextMatchs = new vector< int>[symbolNumber]; int *lenNextMatchs = new int[symbolNumber]; vector < int> *partitionPs; int lenT = Tlen; partitionPs = new vector< int>[lenT]; vector< int> smallestJs; vector< int> strT(T.begin() + 1, T.begin() + 1 + Tlen); //string strT(T.begin() + 1, T.begin() + 1 + Tlen); //theSTables = new vector< int>[lenT]; int totalBlockNum = EOBA.size() + EOBB.size(); AllStable = new vector< int>*[totalBlockNum]; for (int i = 0; i < totalBlockNum; ++i) { AllStable[i] = new vector< int>[Tlen]; } int thisBlockStringLen = Alen / (int)(EOBA.size()); int ranga = 0, rangb = 0; for (int round = 0; round < totalBlockNum; ++round) { if (round < EOBA.size()) { if (round == 0) { ranga = 0; rangb = EOBA[round]; } else { ranga = EOBA[round - 1]; rangb = EOBA[round]; } //string strA(A.begin() + 1 + ranga, A.begin() + 1 + rangb); vector< int> strA(A.begin() + 1 + ranga, A.begin() + 1 + rangb); constructNextMatch_int(strA, thisBlockStringLen, symbolNumber); } else { if (round == EOBA.size()) { ranga = 0; rangb = EOBB[round - EOBA.size()]; } else { ranga = EOBB[round - 1 - EOBA.size()]; rangb = EOBB[round - EOBA.size()]; } //string strB(B.begin() + 1 + ranga, B.begin() + 1 + rangb); vector< int> strB(B.begin() + 1 + ranga, B.begin() + 1 + rangb); constructNextMatch_int(strB, thisBlockStringLen, symbolNumber); } // construct and initialize for (int i = 0; i < lenT; i++) { partitionPs[i].clear(); } for (int i = 0; i < symbolNumber; i++) { lenNextMatchs[i] = (int)(nextMatchs[i].size()); } for (int k = lenT - 1; k >= 0; k--) { if (k == 1) int aas = 1; // Create a new partition point of strT[k] int level = 0; if (0 < nextMatchs[strT[k] - 1].size()) // matched at strT[k] { partitionPs[k].push_back(nextMatchs[strT[k] - 1][0]); if (level + 1 > (int)smallestJs.size()) { smallestJs.push_back(k); } else { smallestJs[level] = k; } } // update the partition point datastructure for (int j = k + 1; j < lenT; j++) { //if ( partitionPs[j-1].empty() ) // ? if ( level < paritionPs[j-1].size() ) //if ( level >= (int) partitionPs[j-1].size() ) //{ // if ( partitionPs[j-1][level] == NULL ) // no new partitionPoint // continue; //} if (0 == partitionPs[k].size()) { break; } if (level < (int)partitionPs[j].size()) { /* if ( partitionPs[j-1][level] == partitionPs[j][level] ) { level++; } */ // } // if ( level < (int) partitionPs[j].size() ) // { if (partitionPs[j - 1][level] < partitionPs[j][level]) { partitionPs[j].insert(partitionPs[j].begin() + level, partitionPs[j - 1][level]); if (partitionPs[j].size() > smallestJs.size()) { smallestJs.push_back(j); } else { if (j < smallestJs[partitionPs[j].size() - 1]) { smallestJs[partitionPs[j].size() - 1] = j; } } } else // if ( partitionPs[j-1][level] > partitionPs[j][level] ) { int minPartitionPoint = thisBlockStringLen; if (level + 1 < (int)partitionPs[j - 1].size()) { minPartitionPoint = partitionPs[j - 1][level + 1]; } if (partitionPs[j - 1][level] + 1 < (int)nextMatchs[strT[j] - 1].size()) { if (minPartitionPoint > nextMatchs[strT[j] - 1][partitionPs[j - 1][level] + 1]) { minPartitionPoint = nextMatchs[strT[j] - 1][partitionPs[j - 1][level] + 1]; } } if (minPartitionPoint < thisBlockStringLen) { if ((level + 1) < (int)partitionPs[j].size()) { //partitionPs[j][level+1] = minPartitionPoint; partitionPs[j].insert(partitionPs[j].begin() + level + 1, minPartitionPoint); } else { partitionPs[j].push_back(minPartitionPoint); } level++; if (partitionPs[j].size() > smallestJs.size()) { smallestJs.push_back(j); } else { if (j < smallestJs[partitionPs[j].size() - 1]) { smallestJs[partitionPs[j].size() - 1] = j; } } } else { j = lenT; } } } else //if ( level == (int) partitionPs[j].size() ) { int minPartitionPoint = thisBlockStringLen; if (level < (int)partitionPs[j - 1].size()) { minPartitionPoint = partitionPs[j - 1][level]; } if (level > 0) { if (partitionPs[j - 1][level - 1] + 1 < (int)nextMatchs[strT[j] - 1].size()) { if (minPartitionPoint > nextMatchs[strT[j] - 1][partitionPs[j - 1][level - 1] + 1]) { minPartitionPoint = nextMatchs[strT[j] - 1][partitionPs[j - 1][level - 1] + 1]; } } } if (minPartitionPoint < thisBlockStringLen) { partitionPs[j].push_back(minPartitionPoint); if (partitionPs[j].size() > smallestJs.size()) { smallestJs.push_back(j); } else { if (j < smallestJs[partitionPs[j].size() - 1]) { smallestJs[partitionPs[j].size() - 1] = j; } } } else { j = lenT; } } } AllStable[round][k].push_back(k); for (int p = 0; p < (int)smallestJs.size(); p++) { AllStable[round][k].push_back(smallestJs[p] + 1); } // test part for printing smallestJs[], partitionPs[] /* cout << "k = " << k << ", " << inALen << endl; cout << "smallestJs[]" << endl; for (int p=0;p< (int) smallestJs.size();p++) { cout << smallestJs[p] << " "; } cout << endl; cout << "partitionPs[][]: " << endl; for (int j=k; j< inALen; j++) { if ( 0 == partitionPs[j].size() ) { cout << "-"; } for (int p=0;p<(int) partitionPs[j].size();p++) { cout << partitionPs[j][p] << " "; } cout << endl; } cout << endl; */ // End of test part for printing smallestJs[], partitionPs[] } //theSTables[inALen-1].pop_back(); // clear and deconstruct for (int i = 0; i < Tlen; i++) { partitionPs[i].clear(); } smallestJs.clear(); } delete[] partitionPs; delete[] lenNextMatchs; }