本文整理汇总了C++中UnitigVector类的典型用法代码示例。如果您正苦于以下问题:C++ UnitigVector类的具体用法?C++ UnitigVector怎么用?C++ UnitigVector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了UnitigVector类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: promoteToSingleton
void
promoteToSingleton(UnitigVector &unitigs, bool enablePromoteToSingleton) {
for (uint32 fi=1; fi<=FI->numFragments(); fi++) {
if (Unitig::fragIn(fi) != 0)
// Placed already
continue;
if (FI->fragmentLength(fi) == 0)
// Deleted.
continue;
if (enablePromoteToSingleton == false) {
writeLog("promoteToSingleton()-- Repeat fragment "F_U32" removed from assembly.\n", fi);
FI->markAsIgnore(fi);
continue;
}
Unitig *utg = unitigs.newUnitig(false);
ufNode frag;
frag.ident = fi;
frag.contained = 0;
frag.parent = 0;
frag.ahang = 0;
frag.bhang = 0;
frag.position.bgn = 0;
frag.position.end = FI->fragmentLength(fi);
frag.containment_depth = 0;
utg->addFrag(frag, 0, false);
}
}
开发者ID:ondovb,项目名称:canu,代码行数:33,代码来源:AS_BAT_PromoteToSingleton.C
示例2: breakSingletonTigs
void
breakSingletonTigs(UnitigVector &unitigs) {
// For any singleton unitig, eject the read and delete the unitig. Eventually,
// we will stop making singleton unitigs.
uint32 removed = 0;
for (uint32 ti=1; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
if (utg == NULL)
continue;
if (utg->ufpath.size() > 1)
continue;
unitigs[ti] = NULL; // Remove the unitig from the list
utg->removeFrag(utg->ufpath[0].ident); // Eject the read
delete utg; // Reclaim space
removed++; // Count
}
writeLog("Removed %u read%s from %u singleton unitig%s.\n",
removed, (removed != 1) ? "" : "s",
removed, (removed != 1) ? "" : "s");
}
开发者ID:Ecogpr,项目名称:canu,代码行数:27,代码来源:AS_BAT_PlaceContains.C
示例3: promoteToSingleton
void
promoteToSingleton(UnitigVector &unitigs) {
for (uint32 fi=1; fi<=FI->numFragments(); fi++) {
if (Unitig::fragIn(fi) != 0)
// Placed already
continue;
if (FI->fragmentLength(fi) == 0)
// Deleted.
continue;
Unitig *utg = unitigs.newUnitig(false);
ufNode frag;
frag.ident = fi;
frag.contained = 0;
frag.parent = 0;
frag.ahang = 0;
frag.bhang = 0;
frag.position.bgn = 0;
frag.position.end = FI->fragmentLength(fi);
utg->addFrag(frag, 0, false);
}
}
开发者ID:AndreasHegerGenomics,项目名称:canu,代码行数:26,代码来源:AS_BAT_PromoteToSingleton.C
示例4: placeContainsUsingBestOverlaps
void
placeContainsUsingBestOverlaps(UnitigVector &unitigs) {
uint32 fragsPlaced = 1;
uint32 fragsPending = 0;
logFileFlags &= ~LOG_PLACE_FRAG;
while (fragsPlaced > 0) {
fragsPlaced = 0;
fragsPending = 0;
writeLog("==> PLACING CONTAINED FRAGMENTS\n");
for (uint32 fid=1; fid<FI->numFragments()+1; fid++) {
BestContainment *bestcont = OG->getBestContainer(fid);
Unitig *utg;
if (bestcont->isContained == false)
// Not a contained fragment.
continue;
if (Unitig::fragIn(fid) != 0)
// Containee already placed.
continue;
if (Unitig::fragIn(bestcont->container) == 0) {
// Container not placed (yet).
fragsPending++;
continue;
}
utg = unitigs[Unitig::fragIn(bestcont->container)];
utg->addContainedFrag(fid, bestcont, logFileFlagSet(LOG_INITIAL_CONTAINED_PLACEMENT));
if (utg->id() != Unitig::fragIn(fid))
writeLog("placeContainsUsingBestOverlaps()-- FAILED to add frag %d to unitig %d.\n", fid, bestcont->container);
assert(utg->id() == Unitig::fragIn(fid));
fragsPlaced++;
}
writeLog("==> PLACING CONTAINED FRAGMENTS - placed %d fragments; still need to place %d\n",
fragsPlaced, fragsPending);
if ((fragsPlaced == 0) && (fragsPending > 0)) {
writeLog("Stopping contained fragment placement due to zombies.\n");
fragsPlaced = 0;
fragsPending = 0;
}
}
for (uint32 ti=1; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
if (utg)
utg->sort();
}
}
开发者ID:cdunn2001,项目名称:DConvert,代码行数:59,代码来源:AS_BAT_PlaceContains.C
示例5: joinUnitigs
void
joinUnitigs(UnitigVector &unitigs, bool enableJoining) {
if (enableJoining == false)
return;
writeLog("==> JOINING SPLIT UNITIGS\n");
// Sort unitigs by joined size. Sort. Join the largest first.
vector<joinEntry> joins;
// Over all unitigs, evaluate if a unitig is a candidate for merging onto something.
for (uint32 frID=0; frID<unitigs.size(); frID++) {
Unitig *fr = unitigs[frID];
if (fr == NULL)
// Ain't no unitig here, mister!
continue;
if (fr->ufpath.size() < 2)
// Ain't no real unitig here, mister!
continue;
// Do we look like a bubble?
if (joinUnitigs_looksLikeBubble(fr))
continue;
// The for loop tries reads close to the end - but we don't support joining these.
for (uint32 ii=0; (ii < 1) && (ii < fr->ufpath.size()); ii++)
if (joinUnitigs_examineEnd(unitigs, fr, ii, true, joins))
break;
for (uint32 ii=0; (ii < 1) && (ii < fr->ufpath.size()); ii++)
if (joinUnitigs_examineEnd(unitigs, fr, ii, false, joins))
break;
} // Over all unitigs.
writeLog("Found %d pairs of unitigs to join.\n", (int)joins.size());
std::sort(joins.begin(), joins.end(), greater<joinEntry>());
return;
for (uint32 j=0; j<joins.size(); j++) {
joinEntry *join = &joins[j];
//joinUnitigs_append(unitigs, join);
}
}
开发者ID:ondovb,项目名称:canu,代码行数:57,代码来源:AS_BAT_Joining.C
示例6: checkUnitigMembership
void
checkUnitigMembership(UnitigVector &unitigs) {
uint32 *inUnitig = new uint32 [FI->numFragments()+1];
uint32 noUnitig = 0xffffffff;
// All reads start of not placed in a unitig.
for (uint32 i=0; i<FI->numFragments()+1; i++)
inUnitig[i] = noUnitig;
// Over all unitigs, remember where each read is.
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *tig = unitigs[ti];
int32 len = 0;
if (tig == NULL)
continue;
for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
ufNode *frg = &tig->ufpath[fi];
if (frg->ident > FI->numFragments())
fprintf(stderr, "tig %u ufpath[%d] ident %u more than number of reads %u\n",
tig->id(), fi, frg->ident, FI->numFragments());
if (inUnitig[frg->ident] != noUnitig)
fprintf(stderr, "tig %u ufpath[%d] ident %u placed multiple times\n",
tig->id(), fi, frg->ident);
assert(frg->ident <= FI->numFragments()); // Can't be out of range.
assert(inUnitig[frg->ident] == noUnitig); // Read must be not placed yet.
inUnitig[frg->ident] = ti;
}
}
// Find any read not placed in a unitig.
for (uint32 i=0; i<FI->numFragments()+1; i++) {
if (FI->fragmentLength(i) == 0) // Deleted read.
continue;
assert(inUnitig[i] != 0); // There shouldn't be a unitig 0.
assert(inUnitig[i] != noUnitig); // The read should be in a unitig.
}
delete [] inUnitig;
}
开发者ID:AndreasHegerGenomics,项目名称:canu,代码行数:49,代码来源:AS_BAT_Instrumentation.C
示例7: makeNewUnitig
static
void
makeNewUnitig(UnitigVector &unitigs,
uint32 splitFragsLen,
ufNode *splitFrags) {
Unitig *dangler = unitigs.newUnitig(false);
if (logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS))
writeLog("splitDiscontinuous()-- new tig "F_U32" with "F_U32" fragments (starting at frag "F_U32").\n",
dangler->id(), splitFragsLen, splitFrags[0].ident);
int splitOffset = -MIN(splitFrags[0].position.bgn, splitFrags[0].position.end);
// This should already be true, but we force it still
splitFrags[0].contained = 0;
for (uint32 i=0; i<splitFragsLen; i++)
dangler->addFrag(splitFrags[i], splitOffset, false); //logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS));
}
开发者ID:xtmgah,项目名称:canu,代码行数:19,代码来源:AS_BAT_SplitDiscontinuous.C
示例8: reportUnitigs
void
reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name) {
if (logFileFlagSet(LOG_INTERMEDIATE_UNITIGS) == 0)
return;
uint32 numFragsT = 0;
uint32 numFragsP = 0;
uint64 utgLen = 0;
// Compute average frags per partition.
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
if (utg == NULL)
continue;
numFragsT += utg->ufpath.size();
if (utg->ufpath.size() > 2)
utgLen += utg->getLength();
}
if (utgLen < 16 * 1024 * 1024)
numFragsP = numFragsT / 7;
else if (utgLen < 64 * 1024 * 1024)
numFragsP = numFragsT / 63;
else
numFragsP = numFragsT / 127;
char tigStorePath[FILENAME_MAX];
sprintf(tigStorePath, "%s.%03u.%s.tigStore", prefix, logFileOrder, name);
// Failing to do this results in consensus running about 40 times slower. Three hours instead of
// five minutes.
setParentAndHang(unitigs);
writeUnitigsToStore(unitigs, tigStorePath, tigStorePath, numFragsP, false);
}
开发者ID:xtmgah,项目名称:canu,代码行数:39,代码来源:AS_BAT_Instrumentation.C
示例9: classifyUnitigsAsUnassembled
// Decides if a unitig is unassembled. The other classifications (isBubble, isCircular, isRepeat)
// are made when the type is processed (e.g., when bubbles are popped).
//
// A unitig is unassembled if:
// 1) it has fewer than R reads (R=2)
// 2) it is shorter than S bases (S=1000)
// 3) a single read spans at least fraction F of the lenth (F=1.0)
// 4) at least fraction F of the unitig is below read depth D (F=1.0, D=2)
//
void
classifyUnitigsAsUnassembled(UnitigVector &unitigs,
uint32 fewReadsNumber,
uint32 tooShortLength,
double spanFraction,
double lowcovFraction, uint32 lowcovDepth) {
uint32 nTooFew = 0;
uint32 nShort = 0;
uint32 nSingle = 0;
uint32 nCoverage = 0;
uint32 nContig = 0;
uint64 bTooFew = 0;
uint64 bShort = 0;
uint64 bSingle = 0;
uint64 bCoverage = 0;
uint64 bContig = 0;
char N[FILENAME_MAX];
sprintf(N, "%s.unassembled", getLogFilePrefix());
errno = 0;
FILE *F = fopen(N, "w");
if (errno)
F = NULL;
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
if (utg == NULL)
continue;
utg->_isUnassembled = false;
// Rule 1. Too few reads.
if (utg->ufpath.size() < fewReadsNumber) {
fprintf(F, "unitig "F_U32" unassembled - too few reads ("F_U64" < "F_U32")\n", ti, utg->ufpath.size(), fewReadsNumber);
utg->_isUnassembled = true;
nTooFew += 1;
bTooFew += utg->getLength();
continue;
}
// Rule 2. Short.
if (utg->getLength() < tooShortLength) {
fprintf(F, "unitig "F_U32" unassembled - too short ("F_U32" < "F_U32")\n", ti, utg->getLength(), tooShortLength);
utg->_isUnassembled = true;
nShort += 1;
bShort += utg->getLength();
continue;
}
// Rule 3. Single read spans large fraction of tig.
for (uint32 oi=0; oi<utg->ufpath.size(); oi++) {
ufNode *frg = &utg->ufpath[oi];
int frgbgn = MIN(frg->position.bgn, frg->position.end);
int frgend = MAX(frg->position.bgn, frg->position.end);
if (frgend - frgbgn > utg->getLength() * spanFraction) {
fprintf(F, "unitig "F_U32" unassembled - single read spans unitig (read "F_U32" "F_U32"-"F_U32" spans fraction %f > %f\n",
ti, frg->ident, frg->position.bgn, frg->position.end, (double)(frgend - frgbgn) / utg->getLength(), spanFraction);
utg->_isUnassembled = true;
nSingle += 1;
bSingle += utg->getLength();
break;
}
}
if (utg->_isUnassembled)
continue;
// Rule 4. Low coverage.
intervalList<int32> IL;
for (uint32 oi=0; oi<utg->ufpath.size(); oi++) {
ufNode *frg = &utg->ufpath[oi];
int frgbgn = MIN(frg->position.bgn, frg->position.end);
int frgend = MAX(frg->position.bgn, frg->position.end);
IL.add(frgbgn, frgend - frgbgn);
}
intervalList<int32> ID(IL);
uint32 basesLow = 0;
//.........这里部分代码省略.........
开发者ID:AndreasHegerGenomics,项目名称:canu,代码行数:101,代码来源:AS_BAT_Instrumentation.C
示例10: accumulateLibraryStats
InsertSizes::InsertSizes(UnitigVector &unitigs) {
_numLibs = FI->numLibraries();
_dist = new int32 * [_numLibs + 1];
_distLen = new int32 [_numLibs + 1];
_distMax = new int32 [_numLibs + 1];
_mean = new int32 [_numLibs + 1];
_stddev = new int32 [_numLibs + 1];
_samples = new int32 [_numLibs + 1];
_distLen[0] = 0;
_distMax[0] = 0;
_dist[0] = NULL;
for (uint32 i=1; i<_numLibs + 1; i++) {
_distLen[i] = 0;
_distMax[i] = 1048576;
_dist[i] = new int32 [_distMax[i]];
_mean[i] = (int32)FI->mean(i);
_stddev[i] = (int32)FI->stddev(i);
_samples[i] = FI->numMatesInLib(i);
}
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
if ((utg == NULL) ||
(utg->ufpath.size() < 2))
continue;
accumulateLibraryStats(utg);
}
for (uint32 i=1; i<_numLibs + 1; i++)
sort(_dist[i], _dist[i] + _distLen[i]);
// Disregard outliers (those outside 5 (estimated) stddevs) and recompute global stddev
for (uint32 i=1; i<_numLibs + 1; i++) {
int32 median = _dist[i][_distLen[i] * 1 / 2];
int32 oneThird = _dist[i][_distLen[i] * 1 / 3];
int32 twoThird = _dist[i][_distLen[i] * 2 / 3];
int32 aproxStd = MAX(median - oneThird, twoThird - median);
int32 biggest = median + aproxStd * 5;
int32 smallest = median - aproxStd * 5;
uint32 numPairs = 0;
double sum_Dists = 0.0;
double sumSquares = 0.0;
for (int32 d=0; d<_distLen[i]; d++)
if ((smallest <= _dist[i][d]) &&
(_dist[i][d] <= biggest)) {
numPairs++;
sum_Dists += _dist[i][d];
}
_samples[i] = numPairs;
_mean[i] = (numPairs > 0) ? sum_Dists / numPairs : 0;
for (int32 d=0; d<_distLen[i]; d++)
if ((smallest <= _dist[i][d]) &&
(_dist[i][d] <= biggest))
sumSquares += ((double)(_dist[i][d] - _mean[i]) *
(double)(_dist[i][d] - _mean[i]));
_stddev[i] = (numPairs > 1) ? sqrt(sumSquares / (numPairs - 1)) : 0.0;
writeLog("InsertSizes()-- lib %d mean %d stddev %d samples %d\n", i, _mean[i], _stddev[i], _samples[i]);
}
for (uint32 i=0; i<_numLibs + 1; i++)
delete [] _dist[i];
delete [] _dist; _dist = NULL;
delete [] _distLen; _distLen = NULL;
delete [] _distMax; _distMax = NULL;
}
开发者ID:ondovb,项目名称:canu,代码行数:83,代码来源:AS_BAT_InsertSizes.C
示例11: placeUnplacedUsingAllOverlaps
void
placeUnplacedUsingAllOverlaps(UnitigVector &unitigs,
const char *prefix) {
uint32 fiLimit = FI->numFragments();
uint32 numThreads = omp_get_max_threads();
uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
uint32 *placedTig = new uint32 [FI->numFragments() + 1];
SeqInterval *placedPos = new SeqInterval [FI->numFragments() + 1];
memset(placedTig, 0, sizeof(uint32) * (FI->numFragments() + 1));
memset(placedPos, 0, sizeof(SeqInterval) * (FI->numFragments() + 1));
// Just some logging. Count the number of reads we try to place.
uint32 nToPlaceContained = 0;
uint32 nToPlace = 0;
uint32 nPlacedContained = 0;
uint32 nPlaced = 0;
uint32 nFailedContained = 0;
uint32 nFailed = 0;
for (uint32 fid=1; fid<FI->numFragments()+1; fid++)
if (Unitig::fragIn(fid) == 0)
if (OG->isContained(fid))
nToPlaceContained++;
else
nToPlace++;
writeLog("placeContains()-- placing %u contained and %u unplaced reads, with %d threads.\n",
nToPlaceContained, nToPlace, numThreads);
// Do the placing!
#pragma omp parallel for schedule(dynamic, blockSize)
for (uint32 fid=1; fid<FI->numFragments()+1; fid++) {
bool enableLog = true;
if (Unitig::fragIn(fid) > 0)
continue;
// Place the read.
vector<overlapPlacement> placements;
placeFragUsingOverlaps(unitigs, AS_MAX_ERATE, NULL, fid, placements);
// Search the placements for the highest expected identity placement using all overlaps in the unitig.
uint32 b = UINT32_MAX;
for (uint32 i=0; i<placements.size(); i++) {
Unitig *tig = unitigs[placements[i].tigID];
if (placements[i].fCoverage < 0.99) // Ignore partially placed reads.
continue;
if (tig->ufpath.size() == 1) // Ignore placements in singletons.
continue;
uint32 bgn = (placements[i].position.bgn < placements[i].position.end) ? placements[i].position.bgn : placements[i].position.end;
uint32 end = (placements[i].position.bgn < placements[i].position.end) ? placements[i].position.end : placements[i].position.bgn;
double erate = placements[i].errors / placements[i].aligned;
if (tig->overlapConsistentWithTig(5.0, bgn, end, erate) < 0.5) {
if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
writeLog("frag %8u tested tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f) - HIGH ERROR\n",
fid, placements[i].tigID, tig->ufpath.size(), placements[i].position.bgn, placements[i].position.end, placements[i].fCoverage, erate);
continue;
}
if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
writeLog("frag %8u tested tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f)\n",
fid, placements[i].tigID, tig->ufpath.size(), placements[i].position.bgn, placements[i].position.end, placements[i].fCoverage, erate);
if ((b == UINT32_MAX) ||
(placements[i].errors / placements[i].aligned < placements[b].errors / placements[b].aligned))
b = i;
}
// If we didn't find a best, b will be invalid; set positions for adding to a new tig.
// If we did, save both the position it was placed at, and the tigID it was placed in.
if (b == UINT32_MAX) {
if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
writeLog("frag %8u remains unplaced\n", fid);
placedPos[fid].bgn = 0;
placedPos[fid].end = FI->fragmentLength(fid);
}
else {
if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
writeLog("frag %8u placed tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f)\n",
fid, placements[b].tigID, unitigs[placements[b].tigID]->ufpath.size(),
placements[b].position.bgn, placements[b].position.end,
placements[b].fCoverage,
placements[b].errors / placements[b].aligned);
placedTig[fid] = placements[b].tigID;
placedPos[fid] = placements[b].position;
//.........这里部分代码省略.........
开发者ID:Ecogpr,项目名称:canu,代码行数:101,代码来源:AS_BAT_PlaceContains.C
示例12: checkUnitigMembership
void
checkUnitigMembership(UnitigVector &unitigs) {
int nutg = 0;
int nfrg = 0;
writeLog("checkUnitigMembership()-- numfrags=%d\n", FI->numFragments());
uint32 *inUnitig = new uint32 [FI->numFragments()+1];
uint32 logSizeMax = 0;
uint32 logSize[64] = {0};
for (uint32 i=0; i<FI->numFragments()+1; i++)
inUnitig[i] = noUnitig;
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *tig = unitigs[ti];
int32 len = 0;
if (tig) {
nutg++;
for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
ufNode *frg = &tig->ufpath[fi];
nfrg++;
if (frg->ident > FI->numFragments())
writeLog("HUH? ident=%d numfrags=%d\n", frg->ident, FI->numFragments());
inUnitig[frg->ident] = ti;
len = MAX(len, frg->position.bgn);
len = MAX(len, frg->position.end);
}
uint32 ls = (uint32)(log10(len) / log10(2));
logSizeMax = (logSizeMax < ls) ? ls : logSizeMax;
logSize[ls]++;
}
}
int lost = 0;
int found = 0;
for (uint32 i=0; i<FI->numFragments()+1; i++) {
if (FI->fragmentLength(i) > 0) {
if (inUnitig[i] == 0) {
writeLog("ERROR frag %d is in unitig 0!\n", i);
} else if (inUnitig[i] != noUnitig) {
found++;
} else {
writeLog("ERROR frag %d disappeared!\n", i);
lost++;
}
}
}
writeLog("checkUnitigMembership()-- nutg=%d nfrg=%d lost=%d found=%d\n", nutg, nfrg, lost, found);
writeLog("checkUnitigMembership()-- log2 length histogram:\n");
for (uint32 i=5; i<=logSizeMax; i++)
writeLog("checkUnitigMembership()-- %2u (%9u-%9u) %u\n", i, (uint32)1 << i, (uint32)1 << (i+1), logSize[i]);
assert(lost == 0);
delete [] inUnitig;
}
开发者ID:xtmgah,项目名称:canu,代码行数:66,代码来源:AS_BAT_Instrumentation.C
示例13: writeOverlapsUsed
// For every unitig, report the best overlaps contained in the
// unitig, and all overlaps contained in the unitig.
//
// Wow, this is ancient.
//
void
writeOverlapsUsed(UnitigVector &unitigs,
char *prefix) {
char N[FILENAME_MAX];
sprintf(N, "%s.unused.best.edges", prefix);
FILE *F = fopen(N, "w");
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *tig = unitigs[ti];
Unitig *ovl = NULL;
char tyt = 'C';
if (tig == NULL)
continue;
if (tig->_isUnassembled) tyt = 'U';
if (tig->_isBubble) tyt = 'B';
if (tig->_isRepeat) tyt = 'R';
if (tig->_isCircular) tyt = 'O';
for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
ufNode *frg = &tig->ufpath[fi];
ufNode *oth = NULL;
// Report the unused best edge
BestEdgeOverlap *be5 = OG->getBestEdgeOverlap(frg->ident, false);
uint32 rd5 = (be5 == NULL) ? 0 : be5->fragId();
Unitig *tg5 = (be5 == NULL) ? NULL : unitigs[Unitig::fragIn(rd5)];
char ty5 = 'C';
if ((tg5 != NULL) && (tg5->tigID() != tig->tigID())) {
uint32 ord = Unitig::pathPosition(rd5);
ufNode *oth = &tg5->ufpath[ord];
if (tig->_isUnassembled) ty5 = 'U';
if (tig->_isBubble) ty5 = 'B';
if (tig->_isRepeat) ty5 = 'R';
if (tig->_isCircular) ty5 = 'O';
fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n",
tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '5',
be5->ahang(), be5->bhang(),
tg5->tigID(), ty5, oth->ident, oth->position.bgn, oth->position.end, (be5->frag3p() == false) ? '5' : '3');
}
BestEdgeOverlap *be3 = OG->getBestEdgeOverlap(frg->ident, true);
uint32 rd3 = (be3 == NULL) ? 0 : be3->fragId();
Unitig *tg3 = (be3 == NULL) ? NULL : unitigs[Unitig::fragIn(rd3)];
char ty3 = 'C';
if ((tg3 != NULL) && (tg3->tigID() != tig->tigID())) {
uint32 ord = Unitig::pathPosition(rd3);
ufNode *oth = &tg3->ufpath[ord];
if (tig->_isUnassembled) ty3 = 'U';
if (tig->_isBubble) ty3 = 'B';
if (tig->_isRepeat) ty3 = 'R';
if (tig->_isCircular) ty3 = 'O';
fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n",
tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '3',
be3->ahang(), be3->bhang(),
tg3->tigID(), ty3, oth->ident, oth->position.bgn, oth->position.end, (be3->frag3p() == false) ? '5' : '3');
}
}
}
fclose(F);
}
开发者ID:swang8,项目名称:canu,代码行数:77,代码来源:AS_BAT_Outputs.C
示例14: writeUnitigsToStore
void
writeUnitigsToStore(UnitigVector &unitigs,
char *fileprefix,
char *tigStorePath,
uint32 frg_count_target,
bool isFinal) {
uint32 utg_count = 0;
uint32 frg_count = 0;
uint32 prt_count = 1;
char filename[FILENAME_MAX] = {0};
uint32 *partmap = new uint32 [unitigs.size()];
// This code closely follows that in AS_CGB_unitigger.c::output_the_chunks()
if (isFinal)
checkUnitigMembership(unitigs);
// Open up the initial output file
sprintf(filename, "%s.iidmap", fileprefix);
FILE *iidm = fopen(filename, "w");
assert(NULL != iidm);
sprintf(filename, "%s.partitioning", fileprefix);
FILE *part = fopen(filename, "w");
assert(NULL != part);
sprintf(filename, "%s.partitioningInfo", fileprefix);
FILE *pari = fopen(filename, "w");
assert(NULL != pari);
// Step through all the unitigs once to build the partition mapping and IID mapping.
tgStore *tigStore = new tgStore(tigStorePath);
tgTig *tig = new tgTig;
for (uint32 tigID=0, ti=0; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
if ((utg == NULL) || (utg->getNumFrags() == 0))
continue;
assert(utg->getLength() > 0);
// Convert the bogart tig to a tgTig and save to the store.
unitigToTig(tig, (isFinal) ? tigID : ti, utg);
tigID++;
tigStore->insertTig(tig, false);
// Increment the partition if the current one is too large.
if ((frg_count + utg->getNumFrags() >= frg_count_target) &&
(frg_count > 0)) {
fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n",
prt_count, utg_count, frg_count);
prt_count++;
utg_count = 0;
frg_count = 0;
}
// Note that the tig is included in this partition.
utg_count += 1;
frg_count += utg->getNumFrags();
// Map the tig to a partition, and log both the tig-to-partition map and the partition-to-read map.
fprintf(iidm, "bogart "F_U32" -> tig "F_U32" (in partition "F_U32" with "F_U32" frags)\n",
utg->id(),
utg->tigID(),
prt_count,
utg->getNumFrags());
for (uint32 fragIdx=0; fragIdx<utg->getNumFrags(); fragIdx++)
fprintf(part, "%d\t%d\n", prt_count, utg->ufpath[fragIdx].ident);
}
fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n", // Don't forget to log the last partition!
prt_count, utg_count, frg_count);
fclose(pari);
fclose(part);
fclose(iidm);
delete tig;
delete tigStore;
}
开发者ID:swang8,项目名称:canu,代码行数:90,代码来源:AS_BAT_Outputs.C
示例15: splitUnitigs
uint32
splitUnitigs(UnitigVector &unitigs,
Unitig *tig,
vector<breakPointCoords> &BP,
Unitig **newTigs,
int32 *lowCoord,
uint32 *nRepeat,
uint32 *nUnique,
bool doMove) {
uint32 nTigsCreated = 0;
if (doMove == true) {
memset(newTigs, 0, sizeof(Unitig *) * BP.size());
memset(lowCoord, 0, sizeof(int32) * BP.size());
} else {
memset(nRepeat, 0, sizeof(uint32) * BP.size());
memset(nUnique, 0, sizeof(uint32) * BP.size());
}
for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
ufNode &frg = tig->ufpath[fi];
int32 frgbgn = min(frg.position.bgn, frg.position.end);
int32 frgend = max(frg.position.bgn, frg.position.end);
// Search for the region that matches the read. BP's are sorted in increasing order. It
// probably doesn't matter, but makes the logging a little easier to read.
uint32 rid = UINT32_MAX;
bool rpt = false;
//fprintf(stderr, "Searching for placement for read %u at %u-%u\n", frg.ident, frgbgn, frgend);
for (uint32 ii=0; ii<BP.size(); ii++) {
int32 rgnbgn = BP[ii]._bgn;
int32 rgnend = BP[ii]._end;
bool repeat = BP[ii]._isRepeat;
// For repeats, the read must be contained fully.
if ((repeat == true) && (rgnbgn <= frgbgn) && (frgend <= rgnend)) {
rid = ii;
rpt = true;
break;
}
// For non-repeat, the read just needs to intersect.
if ((repeat == false) && (rgnbgn < frgend) && (frgbgn < rgnend)) {
rid = ii;
rpt = false;
break;
}
}
if (rid == UINT32_MAX) {
fprintf(stderr, "Failed to place read %u at %u-%u\n", frg.ident, frgbgn, frgend);
for (uint32 ii=0; ii<BP.size(); ii++)
fprintf(stderr, "Breakpoints %2u %8u-%8u repeat %u\n", ii, BP[ii]._bgn, BP[ii]._end, BP[ii]._isRepeat);
}
assert(rid != UINT32_MAX); // We searched all the BP's, the read had better be placed!
// If moving reads, move the read!
if (doMove) {
if (newTigs[rid] == NULL) {
lowCoord[rid] = frgbgn;
newTigs[rid] = unitigs.newUnitig(true); // LOG_ADDUNITIG_BREAKING
if (nRepeat[rid] > nUnique[rid])
newTigs[rid]->_isRepeat = true;
}
newTigs[rid]->addFrag(frg, -lowCoord[rid], false); //LOG_ADDFRAG_BREAKING);
}
// Else, we're not moving, just count how many reads came from repeats or uniques.
else {
if (rpt)
nRepeat[rid]++;
else
nUnique[rid]++;
}
}
// Return the number of tigs created.
for (uint32 ii=0; ii<BP.size(); ii++)
if (nRepeat[ii] + nUnique[ii] > 0)
nTigsCreated++;
return(nTigsCreated);
}
开发者ID:Ecogpr,项目名称:canu,代码行数:94,代码来源:AS_BAT_MarkRepeatReads.C
示例16: joinUnitigs_append
static
void
joinUnitigs_append(UnitigVector &unitigs, joinEntry *join) {
uint32 frId = Unitig::fragIn(join->frFragID);
uint32 toId = Unitig::fragIn(join->toFragID);
Unitig *fr = unitigs[frId];
Unitig *to = unitigs[toId];
uint32 frIdx = Unitig::pathPosition(join->frFragID);
uint32 toIdx = Unitig::pathPosition(join->toFragID);
// The 'fr' unitig is assumed to be forward, and assumed to be the one we join to.
// Compute the offset for our append. We just need to compute where the join fragment would
// appear in the unitig. The join fragment MUST be the first thing in the frUnitig.
//int32 offset = MIN(frF.position.bgn, frF.position.end);
// Over all fragments in the frUnitig, add them to either the joinUnitig or the discUnitig.
Unitig *joinUnitig = unitigs.newUnitig(false);
Unitig *discUnitig = unitigs.newUnitig(false);
// Reverse the 'to' unitig if needed.
if (join->toFlip)
to->reverseComplement(true);
// If we're joining off the 5' end of the fr untiig, add the to reads first.
if (join->frFirst == true) {
uint32 ii=0;
for (; ii < toIdx; ii++)
joinUnitig->addFrag(to->ufpath[ii], 0, false);
for (; ii < to->ufpath.size(); ii++)
discUnitig->addFrag(to->ufpath[ii], 0, false);
}
// Now add all the fr unitig reads.
for (uint32 ii=0; ii < fr->ufpath.size(); ii++)
joinUnitig->addFrag(to->ufpath[ii], 0, false);
// If we're not joining off the 5' end, add the to unitig reads last.
if (join->frFirst == false) {
uint32 ii = 0;
for (; ii < toIdx; ii++)
discUnitig->addFrag(to->ufpath[ii], 0, false);
for (; ii < to->ufpath.size(); ii++)
joinUnitig->addFrag(to->ufpath[ii], 0, false);
}
// Delete the donor unitigs.
delete fr;
delete to;
unitigs[frId] = NULL;
unitigs[toId] = NULL;
// And make sure the new unitigs are consistent.
joinUnitig->sort();
discUnitig->sort();
}
开发者ID:ondovb,项目名称:canu,代码行数:71,代码来源:AS_BAT_Joining.C
示例17: writeUnitigsToStore
void
writeUnitigsToStore(UnitigVector &unitigs,
char *fileprefix,
char *tigStorePath,
uint32 frg_count_target,
bool isFinal) {
uint32 utg_count = 0;
uint32 frg_count = 0;
uint32 prt_count = 1;
char filename[FILENAME_MAX] = {0};
uint32 *partmap = new uint32 [unitigs.size()];
// This code closely follows that in AS_CGB_unitigger.c::output_the_chunks()
if (isFinal)
checkUnitigMembership(unitigs);
// Open up the initial output file
sprintf(filename, "%s.iidmap", fileprefix);
FILE *iidm = fopen(filename, "w");
assert(NULL != iidm);
sprintf(filename, "%s.partitioning", fileprefix);
FILE *part = fopen(filename, "w");
assert(NULL != part);
sprintf(filename, "%s.partitioningInfo", fileprefix);
FILE *pari = fopen(filename, "w");
assert(NULL != pari);
// Step through all the unitigs once to build the partition mapping and IID mapping.
memset(partmap, 0xff, sizeof(uint32) * unitigs.size());
for (uint32 iumiid=0, ti=0; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
uint32 nf = (utg) ? utg->getNumFrags() : 0;
if ((utg == NULL) || (nf == 0))
continue;
assert(utg->getLength() > 0);
assert(nf == utg->ufpath.size());
if ((frg_count + nf >= frg_count_target) &&
(frg_count > 0)) {
fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n",
prt_count, utg_count, frg_count);
prt_count++;
utg_count = 0;
frg_count = 0;
}
uint32 tigid = (isFinal) ? iumiid : ti;
assert(tigid < unitigs.size());
partmap[tigid] = prt_count;
fprintf(iidm, "Unitig "F_U32" == IUM "F_U32" (in partition "F_U32" with "F_U32" frags)\n",
utg->id(),
(tigid),
partmap[(tigid)],
nf);
for (uint32 fragIdx=0; fragIdx<nf; fragIdx++) {
ufNode *f = &utg->ufpath[fragIdx];
fprintf(part, "%d\t%d\n", prt_count, f->ident);
}
utg_count += 1;
frg_count += nf;
iumiid++;
}
fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n",
prt_count, utg_count, frg_count);
fclose(pari);
fclose(part);
fclose(iidm);
// Step through all the unitigs once to build the partition mapping and IID mapping.
tgStore *tigStore = new tgStore(tigStorePath);
tgTig *tig = new tgTig;
for (uint32 iumiid=0, ti=0; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
uint32 nf = (utg) ? utg->getNumFrags() : 0;
if ((utg == NULL) || (nf == 0))
continue;
unitigToTig(tig, (isFinal) ? iumiid : ti, utg);
tigStore->insertTig(tig, false);
//.........这里部分代码省略.........
开发者ID:ondovb,项目名称:canu,代码行数:101,代码来源:AS_BAT_Outputs.C
示例18:
intersectionList::intersectionList(UnitigVector &unitigs) {
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *tig = unitigs[ti];
if (tig == NULL)
continue;
intersectionEvidence *evidence = new intersectionEvidence [tig->ufpath.size()];
for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
ufNode *frg = &tig->ufpath[fi];
if (OG->isContained(frg->ident))
continue;
// For my best overlap, the ID of the unitig that the overlapping fragment is in.
evidence[fi].edge5 = *OG->getBestEdgeOverlap(frg->ident, false);
evidence[fi].edge3 = *OG->getBestEdgeOverlap(frg->ident, true);
evidence[fi].frag5tig = tig->fragIn(evidence[fi].edge5.fragId());
evidence[fi].frag3tig = tig->fragIn(evidence[fi].edge3.fragId());
// Do NOT initialize these! An earlier fragment could have already confirmed an end.
// Properly, only the 5' end of a forward fragment (or 3' end of a reverse fragment) can be
// confirmed already (otherwise the tig is nonsense), but we don't yet check that.
//
//evidence[fi].frag5confirmed = false;
//evidence[fi].frag3confirmed = false;
// But, because the path could be promiscuous, not every overlap to a different tig is bad.
//
// If my best overlap is to a different tig, but there is an overlapping fragment (in the
// unitig placement) with a best edge to me, I'm still good. The BOG build this unitig using
// the edge from the other fragment to me.
//
// If the fragments do not overlap in the layout (yet the best edge still exists) that is a
// self-intersection.
//
// The two blocks are identical, except for 'edge3' and 'edge5'.
if (evidence[fi].frag5tig == tig->id()) {
uint32 ti = tig->pathPosition(evidence[fi].edge5.fragId());
ufNode *trg = &tig->ufpath[ti];
uint32 minf = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
uint32 maxf = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
uint32 mint = (trg->position.bgn < trg->position.end) ? trg->position.bgn : trg->position.end;
uint32 maxt = (trg->position.bgn < trg->position.end) ? trg->position.end : trg->position.bgn;
// If they overlap, mark as confirmed, else remember an intersection.
if (((minf < mint) && (mint < maxf)) || // t begins inside f
((mint < minf) && (minf < maxt))) { // f begins inside t
if (evidence[fi].edge5.frag3p())
evidence[ti].frag3confirmed = true;
else
evidence[ti].frag5confirmed = true;
} else {
evidence[fi].frag5self = true;
// Not the correct place to report this. Some of these get confirmed by later fragments.
//writeLog("BUG1 F: %d,%d T %d,%d\n", minf, maxf, mint, maxt);
//writeLog("INTERSECT from unitig %d frag %d end %d TO unitig %d frag %d end %d (SELF)\n",
// tig->id(), frg->ident, 5, evidence[fi].frag5tig, evidence[fi].edge5.fragId(), evidence[fi].edge5.frag3p() ? 3 : 5);
}
}
if (evidence[fi].frag3tig == tig->id()) {
uint32 ti = tig->pathPosition(evidence[fi].edge3.fragId());
ufNode *trg = &tig->ufpath[ti];
uint32 minf = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
uint32 maxf = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
uint32 mint = (trg->position.bgn < trg->position.end) ? trg->position.bgn : trg->position.end;
uint32 maxt = (trg->position.bgn < trg->position.end) ? trg->position.end : trg->position.bgn;
if (((minf < mint) && (mint < maxf)) || // t begins inside f
((mint < minf) && (minf &
|
请发表评论