diff --git a/packages/CLPBN/clpbn/bp/BpSolver.cpp b/packages/CLPBN/clpbn/bp/BpSolver.cpp index c5f812f97..6fe7c4851 100644 --- a/packages/CLPBN/clpbn/bp/BpSolver.cpp +++ b/packages/CLPBN/clpbn/bp/BpSolver.cpp @@ -278,14 +278,10 @@ BpSolver::calculateFactor2VariableMsg (SpLink* link) const SpLinkSet& links = ninf(src)->getLinks(); // calculate the product of messages that were sent // to factor `src', except from var `dst' - Params::size_type msgSize = 1; + unsigned msgSize = 1; for (unsigned i = 0; i < links.size(); i++) { msgSize *= links[i]->getVariable()->range(); } - if (msgSize > Util::maxUnsigned()) { - cout << "error: an overflow occurred when sending bp message" << endl; - abort(); - } unsigned repetitions = 1; Params msgProduct (msgSize, LogAware::multIdenty()); if (Globals::logDomain) { diff --git a/packages/CLPBN/clpbn/bp/ConstraintTree.cpp b/packages/CLPBN/clpbn/bp/ConstraintTree.cpp index 40c3cce16..d854abb1b 100644 --- a/packages/CLPBN/clpbn/bp/ConstraintTree.cpp +++ b/packages/CLPBN/clpbn/bp/ConstraintTree.cpp @@ -84,14 +84,6 @@ CTNode::childSymbols (void) const void CTNode::updateChildLevels (CTNode* n, unsigned level) { - /* - n->setLevel (level); - const CTChilds& childs = n->childs(); - for (CTChilds::const_iterator chIt = childs.begin(); - chIt != childs.end(); ++ chIt) { - updateChildLevels (*chIt, level + 1); - } - */ CTNodes stack; stack.push_back (n); n->setLevel (level); @@ -134,16 +126,6 @@ CTNode::copySubtree (const CTNode* root1) } } return root2; - /* - CTNode* newNode = new CTNode (*root1); - const CTChilds& childs = root1->childs(); - newNode->childs().reserve (childs.size()); - for (CTChilds::const_iterator chIt = childs.begin(); - chIt != childs.end(); ++ chIt) { - newNode->childs().push_back ((copySubtree (*chIt))); - } - return newNode; - */ } @@ -329,9 +311,8 @@ ConstraintTree::join (ConstraintTree* ct, bool oneTwoOne) CTNodes::const_iterator appendIt = appendNodes.begin(); for (unsigned i = 0; i < tuples.size(); ++ i, ++ appendIt) { bool tupleFounded = join (root_, tuples[i], 0, *appendIt); - if (oneTwoOne) { - assert (tupleFounded); - tupleFounded = true; // hack to avoid gcc warning + if (oneTwoOne && tupleFounded == false) { + assert (false); } } @@ -378,10 +359,6 @@ ConstraintTree::rename (LogVar X_old, LogVar X_new) void ConstraintTree::applySubstitution (const Substitution& theta) { - LogVars discardedLvs = theta.getDiscardedLogVars(); - for (unsigned i = 0; i < discardedLvs.size(); i++) { - remove(discardedLvs[i]); - } for (unsigned i = 0; i < logVars_.size(); i++) { logVars_[i] = theta.newNameFor (logVars_[i]); } @@ -422,17 +399,6 @@ ConstraintTree::remove (const LogVarSet& X) bool ConstraintTree::ConstraintTree::isSingleton (LogVar X) { - /* - const CTNodes& nodes = getNodesAtLevel (getLevel (X)); - Symbol symb = nodes.front()->symbol(); - for (CTNodes::const_iterator it = nodes.begin(); - it != nodes.end(); ++ it) { - if ((*it)->symbol() != symb) { - return false; - } - } - return true; - */ Symbol symb; unsigned level = getLevel (X); CTNodes stack; @@ -487,12 +453,39 @@ ConstraintTree::tupleSet (unsigned stopLevel) const TupleSet -ConstraintTree::tupleSet (const LogVars& lvs) -{ +ConstraintTree::tupleSet (const LogVars& originalLvs) +{ + LogVars uniqueLvs; + for (unsigned i = 0; i < originalLvs.size(); i++) { + if (Util::contains (uniqueLvs, originalLvs[i]) == false) { + uniqueLvs.push_back (originalLvs[i]); + } + } + Tuples tuples; - moveToTop (lvs); - unsigned stopLevel = lvs.size(); + moveToTop (uniqueLvs); + unsigned stopLevel = uniqueLvs.size(); getTuples (root_, Tuples(), stopLevel, tuples, CTNodes() = {}); + + if (originalLvs.size() != uniqueLvs.size()) { + vector indexes; + indexes.reserve (originalLvs.size()); + for (unsigned i = 0; i < originalLvs.size(); i++) { + indexes.push_back (Util::vectorIndex (uniqueLvs, originalLvs[i])); + } + Tuples tuples2; + tuples2.reserve (tuples.size()); + for (unsigned i = 0; i < tuples.size(); i++) { + Tuple t; + t.reserve (originalLvs.size()); + for (unsigned j = 0; j < originalLvs.size(); j++) { + t.push_back (tuples[i][indexes[j]]); + } + tuples2.push_back (t); + } + return TupleSet (tuples2); + } + return TupleSet (tuples); } @@ -625,12 +618,13 @@ ConstraintTree::getConditionalCounts (const LogVarSet& Ys) bool -ConstraintTree::isCarteesianProduct (const LogVarSet& Xs) const +ConstraintTree::isCarteesianProduct (const LogVarSet& Xs) { assert (logVarSet_.contains (Xs)); if (Xs.size() <= 1) { return true; } + moveToTop (Xs.elements()); for (unsigned i = 1; i < Xs.size(); i++) { CTNodes nodes = getNodesAtLevel (i); for (unsigned j = 1; j < nodes.size(); j++) { @@ -648,24 +642,29 @@ ConstraintTree::isCarteesianProduct (const LogVarSet& Xs) const std::pair -ConstraintTree::split ( - const Tuple& tuple, - unsigned stopLevel) +ConstraintTree::split (const Tuple& tuple) { + // assumes that my log vars are already on top + LogVars lvs (logVars_.begin(), logVars_.begin() + tuple.size()); ConstraintTree tempCt (logVars_, {tuple}); - return split (&tempCt, stopLevel); + return split (lvs, &tempCt, lvs); } std::pair ConstraintTree::split ( - const ConstraintTree* ct, - unsigned stopLevel) const + const LogVars& lvs1, + ConstraintTree* ct, + const LogVars& lvs2) { - assert (stopLevel <= logVars_.size()); - assert (stopLevel <= ct->logVars_.size()); + assert (lvs1.size() == lvs2.size()); + assert (lvs1.size() == LogVarSet (lvs1).size()); + assert (lvs2.size() == LogVarSet (lvs2).size()); + assert (logVarSet_.contains (lvs1)); + assert (ct->logVarSet().contains (lvs2)); CTChilds commChilds, exclChilds; + unsigned stopLevel = lvs1.size(); split (root_, ct->root(), commChilds, exclChilds, stopLevel); ConstraintTree* commCt = new ConstraintTree (commChilds, logVars_); ConstraintTree* exclCt = new ConstraintTree (exclChilds, logVars_); @@ -804,32 +803,6 @@ ConstraintTree::jointCountNormalize ( -bool -ConstraintTree::identical ( - const ConstraintTree* ct1, - const ConstraintTree* ct2, - unsigned stopLevel) -{ - TupleSet ts1 = ct1->tupleSet (stopLevel); - TupleSet ts2 = ct2->tupleSet (stopLevel); - return ts1 == ts2; -} - - - -bool -ConstraintTree::disjoint ( - const ConstraintTree* ct1, - const ConstraintTree* ct2, - unsigned stopLevel) -{ - TupleSet ts1 = ct1->tupleSet (stopLevel); - TupleSet ts2 = ct2->tupleSet (stopLevel); - return (ts1 & ts2).empty(); -} - - - LogVars ConstraintTree::expand (LogVar X) { @@ -886,6 +859,21 @@ ConstraintTree::ground (LogVar X) +void +ConstraintTree::copyLogVar (LogVar X_1, LogVar X_2) +{ + moveToBottom ({X_1}); + CTNodes leafs = getNodesAtLevel (logVars_.size()); + for (unsigned i = 0; i < leafs.size(); i++) { + leafs[i]->childs().push_back ( + new CTNode (leafs[i]->symbol(), leafs[i]->level() + 1)); + } + logVars_.push_back (X_2); + logVarSet_.insert (X_2); +} + + + unsigned ConstraintTree::countTuples (const CTNode* n) const { @@ -930,23 +918,6 @@ ConstraintTree::getNodesAtLevel (unsigned level) const if (level == 0) { return { root_ }; } - /* - CTNodes nodes; - queue queue; - queue.push (root_); - while (queue.empty() == false) { - CTNode* node = queue.front(); - if (node->level() == level) { - nodes.push_back (node); - } else { - for (CTChilds::const_iterator chIt = node->childs().begin(); - chIt != node->childs().end(); ++ chIt) { - queue.push (*chIt); - } - } - queue.pop(); - } - */ CTNodes stack; CTNodes nodes; stack.push_back (root_); diff --git a/packages/CLPBN/clpbn/bp/ConstraintTree.h b/packages/CLPBN/clpbn/bp/ConstraintTree.h index 59b1a9bb0..8e7283264 100644 --- a/packages/CLPBN/clpbn/bp/ConstraintTree.h +++ b/packages/CLPBN/clpbn/bp/ConstraintTree.h @@ -185,28 +185,23 @@ class ConstraintTree TinySet getConditionalCounts (const LogVarSet&); - bool isCarteesianProduct (const LogVarSet&) const; + bool isCarteesianProduct (const LogVarSet&); + + std::pair split (const Tuple&); std::pair split ( - const Tuple&, unsigned); - - std::pair split ( - const ConstraintTree*, unsigned) const; + const LogVars&, ConstraintTree*, const LogVars&); ConstraintTrees countNormalize (const LogVarSet&); ConstraintTrees jointCountNormalize ( ConstraintTree*, ConstraintTree*, LogVar, LogVar, LogVar); - static bool identical ( - const ConstraintTree*, const ConstraintTree*, unsigned); - - static bool disjoint ( - const ConstraintTree*, const ConstraintTree*, unsigned); - LogVars expand (LogVar); ConstraintTrees ground (LogVar); + + void copyLogVar (LogVar,LogVar); private: unsigned countTuples (const CTNode*) const; diff --git a/packages/CLPBN/clpbn/bp/Factor.h b/packages/CLPBN/clpbn/bp/Factor.h index 04747259a..aa04fe32f 100644 --- a/packages/CLPBN/clpbn/bp/Factor.h +++ b/packages/CLPBN/clpbn/bp/Factor.h @@ -90,9 +90,8 @@ class TFactor for (unsigned i = 0; i < g_args.size(); i++) { int idx = indexOf (g_args[i]); if (idx == -1) { - Params::size_type newSize = params_.size() * g_ranges[i]; - if (newSize > Util::maxUnsigned()) { - // factor will become to bigger, is not worth to continue + ullong newSize = params_.size() * g_ranges[i]; + if (newSize > params_.max_size()) { cerr << "error: an overflow occurred on factor multiplication" ; cerr << endl; abort(); @@ -211,7 +210,7 @@ class TFactor return true; } - double& operator[] (unsigned idx) + double& operator[] (psize_t idx) { assert (idx < params_.size()); return params_[idx]; diff --git a/packages/CLPBN/clpbn/bp/FoveSolver.cpp b/packages/CLPBN/clpbn/bp/FoveSolver.cpp index e844c5b78..09da088e8 100644 --- a/packages/CLPBN/clpbn/bp/FoveSolver.cpp +++ b/packages/CLPBN/clpbn/bp/FoveSolver.cpp @@ -13,17 +13,23 @@ LiftedOperator::getValidOps ( const Grounds& query) { vector validOps; - vector sumOutOps; - vector countOps; - vector groundOps; + vector multOps; - sumOutOps = SumOutOperator::getValidOps (pfList, query); - countOps = CountingOperator::getValidOps (pfList); - groundOps = GroundOperator::getValidOps (pfList); + multOps = ProductOperator::getValidOps (pfList); + validOps.insert (validOps.end(), multOps.begin(), multOps.end()); + + if (Globals::verbosity > 1 || multOps.empty()) { + vector sumOutOps; + vector countOps; + vector groundOps; + sumOutOps = SumOutOperator::getValidOps (pfList, query); + countOps = CountingOperator::getValidOps (pfList); + groundOps = GroundOperator::getValidOps (pfList); + validOps.insert (validOps.end(), sumOutOps.begin(), sumOutOps.end()); + validOps.insert (validOps.end(), countOps.begin(), countOps.end()); + validOps.insert (validOps.end(), groundOps.begin(), groundOps.end()); + } - validOps.insert (validOps.end(), sumOutOps.begin(), sumOutOps.end()); - validOps.insert (validOps.end(), countOps.begin(), countOps.end()); - validOps.insert (validOps.end(), groundOps.begin(), groundOps.end()); return validOps; } @@ -44,17 +50,9 @@ LiftedOperator::printValidOps ( -vector -LiftedOperator::getAllGroupss (ParfactorList& ) -{ - return { }; -} - - - vector LiftedOperator::getParfactorsWithGroup ( - ParfactorList& pfList, unsigned group) + ParfactorList& pfList, unsigned group) { vector iters; ParfactorList::iterator pflIt = pfList.begin(); @@ -69,6 +67,105 @@ LiftedOperator::getParfactorsWithGroup ( +double +ProductOperator::getLogCost (void) +{ + return std::log (0.0); +} + + + +void +ProductOperator::apply (void) +{ + Parfactor* g1 = *g1_; + Parfactor* g2 = *g2_; + g1->multiply (*g2); + pfList_.remove (g1_); + pfList_.removeAndDelete (g2_); + pfList_.addShattered (g1); +} + + + +vector +ProductOperator::getValidOps (ParfactorList& pfList) +{ + vector validOps; + ParfactorList::iterator it1 = pfList.begin(); + ParfactorList::iterator penultimate = -- pfList.end(); + set pfs; + while (it1 != penultimate) { + if (Util::contains (pfs, *it1)) { + ++ it1; + continue; + } + ParfactorList::iterator it2 = it1; + ++ it2; + while (it2 != pfList.end()) { + if (Util::contains (pfs, *it2)) { + ++ it2; + continue; + } else { + if (validOp (*it1, *it2)) { + pfs.insert (*it1); + pfs.insert (*it2); + validOps.push_back (new ProductOperator ( + it1, it2, pfList)); + if (Globals::verbosity < 2) { + return validOps; + } + break; + } + } + ++ it2; + } + ++ it1; + } + return validOps; +} + + + +string +ProductOperator::toString (void) +{ + stringstream ss; + ss << "just multiplicate " ; + ss << (*g1_)->getAllGroups(); + ss << " x " ; + ss << (*g2_)->getAllGroups(); + ss << " [cost=" << std::exp (getLogCost()) << "]" << endl; + return ss.str(); +} + + + +bool +ProductOperator::validOp (Parfactor* g1, Parfactor* g2) +{ + TinySet g1_gs (g1->getAllGroups()); + TinySet g2_gs (g2->getAllGroups()); + if (g1_gs.contains (g2_gs) || g2_gs.contains (g1_gs)) { + TinySet intersect = g1_gs & g2_gs; + for (unsigned i = 0; i < intersect.size(); i++) { + if (g1->nrFormulasWithGroup (intersect[i]) != 1 || + g2->nrFormulasWithGroup (intersect[i]) != 1) { + return false; + } + int idx1 = g1->indexOfGroup (intersect[i]); + int idx2 = g2->indexOfGroup (intersect[i]); + if (g1->range (idx1) != g2->range (idx2)) { + return false; + } + } + return Parfactor::canMultiply (g1, g2); + } + return false; +} + + + double SumOutOperator::getLogCost (void) { @@ -84,7 +181,8 @@ SumOutOperator::getLogCost (void) ++ pfIter; } if (nrProdFactors == 1) { - return std::log (0.0); // best possible case + // best possible case + return std::log (0.0); } double cost = 1.0; for (unsigned i = 0; i < groupSet.size(); i++) { @@ -190,25 +288,20 @@ SumOutOperator::validOp ( if (isToEliminate (*pfIters[0], group, query) == false) { return false; } - unordered_map groupToRange; + int range = -1; for (unsigned i = 0; i < pfIters.size(); i++) { + if ((*pfIters[i])->nrFormulasWithGroup (group) > 1) { + return false; + } int fIdx = (*pfIters[i])->indexOfGroup (group); if ((*pfIters[i])->argument (fIdx).contains ( (*pfIters[i])->elimLogVars()) == false) { return false; } - vector ranges = (*pfIters[i])->ranges(); - vector groups = (*pfIters[i])->getAllGroups(); - for (unsigned i = 0; i < groups.size(); i++) { - unordered_map::iterator it; - it = groupToRange.find (groups[i]); - if (it == groupToRange.end()) { - groupToRange.insert (make_pair (groups[i], ranges[i])); - } else { - if (it->second != ranges[i]) { - return false; - } - } + if (range == -1) { + range = (*pfIters[i])->range (fIdx); + } else if ((int)(*pfIters[i])->range (fIdx) != range) { + return false; } } return true; @@ -252,8 +345,23 @@ CountingOperator::getLogCost (void) for (unsigned i = 0; i < counts.size(); i++) { cost += size * HistogramSet::nrHistograms (counts[i], range); } - if ((*pfIter_)->nrArguments() == 1) { - cost *= 3; // avoid counting conversion in the beginning + unsigned group = (*pfIter_)->argument (fIdx).group(); + int lvIndex = Util::vectorIndex ( + (*pfIter_)->argument (fIdx).logVars(), X_); + assert (lvIndex != -1); + ParfactorList::iterator pfIter = pfList_.begin(); + while (pfIter != pfList_.end()) { + if (pfIter != pfIter_) { + int fIdx2 = (*pfIter)->indexOfGroup (group); + if (fIdx2 != -1) { + LogVar Y = ((*pfIter)->argument (fIdx2).logVars()[lvIndex]); + if ((*pfIter)->canCountConvert (Y) == false) { + // the real cost should be the cost of grounding Y + cost *= 10.0; + } + } + } + ++ pfIter; } return std::log (cost); } @@ -295,6 +403,7 @@ CountingOperator::getValidOps (ParfactorList& pfList) if (validOp (*it, candidates[i])) { validOps.push_back (new CountingOperator ( it, candidates[i], pfList)); + } else { } } ++ it; @@ -337,12 +446,7 @@ CountingOperator::validOp (Parfactor* g, LogVar X) } bool countNormalized = g->constr()->isCountNormalized (X); if (countNormalized) { - unsigned condCount = g->constr()->getConditionalCount (X); - bool cartProduct = g->constr()->isCarteesianProduct ( - g->countedLogVars() | X); - if (condCount == 1 || cartProduct == false) { - return false; - } + return g->canCountConvert (X); } return true; } @@ -425,6 +529,11 @@ GroundOperator::apply (void) } delete pf; } + ParfactorList::iterator pflIt = pfList_.begin(); + while (pflIt != pfList_.end()) { + (*pflIt)->simplifyGrounds(); + ++ pflIt; + } } @@ -612,6 +721,39 @@ FoveSolver::countNormalize ( +Parfactor +FoveSolver::calcGroundMultiplication (Parfactor pf) +{ + LogVarSet lvs = pf.constr()->logVarSet(); + lvs -= pf.constr()->singletons(); + Parfactors newPfs = {new Parfactor (pf)}; + for (unsigned i = 0; i < lvs.size(); i++) { + Parfactors pfs = newPfs; + newPfs.clear(); + for (unsigned j = 0; j < pfs.size(); j++) { + bool countedLv = pfs[j]->countedLogVars().contains (lvs[i]); + if (countedLv) { + pfs[j]->fullExpand (lvs[i]); + newPfs.push_back (pfs[j]); + } else { + ConstraintTrees cts = pfs[j]->constr()->ground (lvs[i]); + for (unsigned k = 0; k < cts.size(); k++) { + newPfs.push_back (new Parfactor (pfs[j], cts[k])); + } + delete pfs[j]; + } + } + } + ParfactorList pfList (newPfs); + Parfactors groundShatteredPfs (pfList.begin(),pfList.end()); + for (unsigned i = 1; i < groundShatteredPfs.size(); i++) { + groundShatteredPfs[0]->multiply (*groundShatteredPfs[i]); + } + return Parfactor (*groundShatteredPfs[0]); +} + + + void FoveSolver::runSolver (const Grounds& query) { @@ -652,6 +794,7 @@ FoveSolver::runSolver (const Grounds& query) cout << "largest cost = " << std::exp (largestCost_) << endl; cout << endl; } + (*pfList_.begin())->simplifyGrounds(); (*pfList_.begin())->reorderAccordingGrounds (query); } @@ -756,8 +899,11 @@ FoveSolver::shatterAgainstQuery (const Grounds& query) while (it != pfList_.end()) { if ((*it)->containsGround (query[i])) { found = true; - std::pair split = - (*it)->constr()->split (query[i].args(), query[i].arity()); + std::pair split; + LogVars queryLvs ( + (*it)->constr()->logVars().begin(), + (*it)->constr()->logVars().begin() + query[i].arity()); + split = (*it)->constr()->split (query[i].args()); ConstraintTree* commCt = split.first; ConstraintTree* exclCt = split.second; newPfs.push_back (new Parfactor (*it, commCt)); @@ -813,8 +959,11 @@ FoveSolver::absorve ( } g->constr()->moveToTop (formulas[i].logVars()); - std::pair res - = g->constr()->split (&(obsFormula.constr()), formulas[i].arity()); + std::pair res; + res = g->constr()->split ( + formulas[i].logVars(), + &(obsFormula.constr()), + obsFormula.constr().logVars()); ConstraintTree* commCt = res.first; ConstraintTree* exclCt = res.second; diff --git a/packages/CLPBN/clpbn/bp/FoveSolver.h b/packages/CLPBN/clpbn/bp/FoveSolver.h index ef52fae7c..c0c1cfc71 100644 --- a/packages/CLPBN/clpbn/bp/FoveSolver.h +++ b/packages/CLPBN/clpbn/bp/FoveSolver.h @@ -19,14 +19,37 @@ class LiftedOperator static void printValidOps (ParfactorList&, const Grounds&); - static vector getAllGroupss (ParfactorList&); - static vector getParfactorsWithGroup ( ParfactorList&, unsigned group); }; +class ProductOperator : public LiftedOperator +{ + public: + ProductOperator ( + ParfactorList::iterator g1, ParfactorList::iterator g2, + ParfactorList& pfList) : g1_(g1), g2_(g2), pfList_(pfList) { } + + double getLogCost (void); + + void apply (void); + + static vector getValidOps (ParfactorList&); + + string toString (void); + + private: + static bool validOp (Parfactor*, Parfactor*); + + ParfactorList::iterator g1_; + ParfactorList::iterator g2_; + ParfactorList& pfList_; +}; + + + class SumOutOperator : public LiftedOperator { public: @@ -123,6 +146,8 @@ class FoveSolver static Parfactors countNormalize (Parfactor*, const LogVarSet&); + static Parfactor calcGroundMultiplication (Parfactor pf); + private: void runSolver (const Grounds&); diff --git a/packages/CLPBN/clpbn/bp/Horus.h b/packages/CLPBN/clpbn/bp/Horus.h index 8ad3293e5..90f346bfd 100644 --- a/packages/CLPBN/clpbn/bp/Horus.h +++ b/packages/CLPBN/clpbn/bp/Horus.h @@ -16,15 +16,17 @@ class Factor; class VarNode; class FacNode; -typedef vector Params; -typedef unsigned VarId; -typedef vector VarIds; -typedef vector Vars; -typedef vector VarNodes; -typedef vector FacNodes; -typedef vector Factors; -typedef vector States; -typedef vector Ranges; +typedef vector Params; +typedef unsigned VarId; +typedef vector VarIds; +typedef vector Vars; +typedef vector VarNodes; +typedef vector FacNodes; +typedef vector Factors; +typedef vector States; +typedef vector Ranges; +typedef Params::size_type psize_t; +typedef unsigned long long ullong; enum InfAlgorithms diff --git a/packages/CLPBN/clpbn/bp/HorusYap.cpp b/packages/CLPBN/clpbn/bp/HorusYap.cpp index 876d0c063..9c108ffd6 100644 --- a/packages/CLPBN/clpbn/bp/HorusYap.cpp +++ b/packages/CLPBN/clpbn/bp/HorusYap.cpp @@ -68,6 +68,7 @@ int createLiftedNetwork (void) Util::printHeader ("INITIAL PARFACTORS"); for (unsigned i = 0; i < parfactors.size(); i++) { parfactors[i]->print(); + cout << endl; } } diff --git a/packages/CLPBN/clpbn/bp/LiftedUtils.h b/packages/CLPBN/clpbn/bp/LiftedUtils.h index 181e98789..d89fe477b 100644 --- a/packages/CLPBN/clpbn/bp/LiftedUtils.h +++ b/packages/CLPBN/clpbn/bp/LiftedUtils.h @@ -137,14 +137,20 @@ class Substitution LogVar newNameFor (LogVar X) const { - assert (Util::contains (subs_, X)); - return subs_.find (X)->second; + unordered_map::const_iterator it; + it = subs_.find (X); + if (it != subs_.end()) { + return subs_.find (X)->second; + } + return X; } bool containsReplacementFor (LogVar X) const { return Util::contains (subs_, X); } + + unsigned nrReplacements (void) const { return subs_.size(); } LogVars getDiscardedLogVars (void) const; diff --git a/packages/CLPBN/clpbn/bp/Parfactor.cpp b/packages/CLPBN/clpbn/bp/Parfactor.cpp index 4caac3c08..c5ad13d14 100644 --- a/packages/CLPBN/clpbn/bp/Parfactor.cpp +++ b/packages/CLPBN/clpbn/bp/Parfactor.cpp @@ -193,6 +193,32 @@ Parfactor::multiply (Parfactor& g) alignAndExponentiate (this, &g); TFactor::multiply (g); constr_->join (g.constr(), true); + simplifyGrounds(); + assert (constr_->isCarteesianProduct (countedLogVars())); +} + + + +bool +Parfactor::canCountConvert (LogVar X) +{ + if (nrFormulas (X) != 1) { + return false; + } + int fIdx = indexOfLogVar (X); + if (args_[fIdx].isCounting()) { + return false; + } + if (constr_->isCountNormalized (X) == false) { + return false; + } + if (constr_->getConditionalCount (X) == 1) { + return false; + } + if (constr_->isCarteesianProduct (countedLogVars() | X) == false) { + return false; + } + return true; } @@ -201,11 +227,10 @@ void Parfactor::countConvert (LogVar X) { int fIdx = indexOfLogVar (X); - assert (fIdx != -1); assert (constr_->isCountNormalized (X)); assert (constr_->getConditionalCount (X) > 1); - assert (constr_->isCarteesianProduct (countedLogVars() | X)); - + assert (canCountConvert (X)); + unsigned N = constr_->getConditionalCount (X); unsigned R = ranges_[fIdx]; unsigned H = HistogramSet::nrHistograms (N, R); @@ -245,6 +270,7 @@ Parfactor::countConvert (LogVar X) ++ mapIndexer; } args_[fIdx].setCountedLogVar (X); + simplifyCountingFormulas (fIdx); } @@ -307,10 +333,9 @@ Parfactor::fullExpand (LogVar X) unsigned N = constr_->getConditionalCount (X); unsigned R = args_[fIdx].range(); - vector originHists = HistogramSet::getHistograms (N, R); vector expandHists = HistogramSet::getHistograms (1, R); - + assert (ranges_[fIdx] == originHists.size()); vector sumIndexes; sumIndexes.reserve (N * R); @@ -496,6 +521,20 @@ Parfactor::indexOfGroup (unsigned group) const +unsigned +Parfactor::nrFormulasWithGroup (unsigned group) const +{ + unsigned count = 0; + for (unsigned i = 0; i < args_.size(); i++) { + if (args_[i].group() == group) { + count ++; + } + } + return count; +} + + + vector Parfactor::getAllGroups (void) const { @@ -547,6 +586,7 @@ Parfactor::print (bool printParams) const if (printParams == false) { cout << "Params: " ; if (params_.size() <= 32) { + cout.precision(10); cout << params_ << endl; } else { cout << "|" << params_.size() << "|" << endl; @@ -556,35 +596,56 @@ Parfactor::print (bool printParams) const copy.moveToTop (copy.logVarSet().elements()); cout << "Tuples: " << copy.tupleSet() << endl; if (printParams) { - vector jointStrings; - StatesIndexer indexer (ranges_); - while (indexer.valid()) { - stringstream ss; - for (unsigned i = 0; i < args_.size(); i++) { - if (i != 0) ss << ", " ; - if (args_[i].isCounting()) { - unsigned N = constr_->getConditionalCount ( - args_[i].countedLogVar()); - HistogramSet hs (N, args_[i].range()); - unsigned c = 0; - while (c < indexer[i]) { - hs.nextHistogram(); - c ++; - } - ss << hs; - } else { - ss << indexer[i]; - } - } - jointStrings.push_back (ss.str()); - ++ indexer; - } - for (unsigned i = 0; i < params_.size(); i++) { - cout << "f(" << jointStrings[i] << ")" ; - cout << " = " << params_[i] << endl; - } + printParameters(); + } +} + + + +void +Parfactor::printParameters (void) const +{ + vector jointStrings; + StatesIndexer indexer (ranges_); + while (indexer.valid()) { + stringstream ss; + for (unsigned i = 0; i < args_.size(); i++) { + if (i != 0) ss << ", " ; + if (args_[i].isCounting()) { + unsigned N = constr_->getConditionalCount ( + args_[i].countedLogVar()); + HistogramSet hs (N, args_[i].range()); + unsigned c = 0; + while (c < indexer[i]) { + hs.nextHistogram(); + c ++; + } + ss << hs; + } else { + ss << indexer[i]; + } + } + jointStrings.push_back (ss.str()); + ++ indexer; + } + for (unsigned i = 0; i < params_.size(); i++) { + cout << "f(" << jointStrings[i] << ")" ; + cout << " = " << params_[i] << endl; + } +} + + + +void +Parfactor::printProjections (void) const +{ + ConstraintTree copy (*constr_); + + LogVarSet Xs = copy.logVarSet(); + for (unsigned i = 0; i < Xs.size(); i++) { + cout << "-> projection of " << Xs[i] << ": " ; + cout << copy.tupleSet ({Xs[i]}) << endl; } - cout << endl; } @@ -595,9 +656,8 @@ Parfactor::expandPotential ( unsigned newRange, const vector& sumIndexes) { - Params::size_type newSize = (params_.size() / ranges_[fIdx]) * newRange; - if (newSize > Util::maxUnsigned()) { - // factor will become to bigger, is not worth to continue + ullong newSize = (params_.size() / ranges_[fIdx]) * newRange; + if (newSize > params_.max_size()) { cerr << "error: an overflow occurred when performing expansion" ; cerr << endl; abort(); @@ -651,22 +711,119 @@ Parfactor::expandPotential ( void -Parfactor::alignAndExponentiate (Parfactor* g1, Parfactor* g2) +Parfactor::simplifyCountingFormulas (int fIdx) { - LogVars X_1, X_2; - const ProbFormulas& formulas1 = g1->arguments(); - const ProbFormulas& formulas2 = g2->arguments(); - for (unsigned i = 0; i < formulas1.size(); i++) { - for (unsigned j = 0; j < formulas2.size(); j++) { - if (formulas1[i].group() == formulas2[j].group()) { - Util::addToVector (X_1, formulas1[i].logVars()); - Util::addToVector (X_2, formulas2[j].logVars()); + // check if we can simplify the parfactor + for (unsigned i = 0; i < args_.size(); i++) { + if ((int)i != fIdx && + args_[i].isCounting() && + args_[i].group() == args_[fIdx].group()) { + // if they only differ in the name of the counting log var + if ((args_[i].logVarSet() - args_[i].countedLogVar()) == + (args_[fIdx].logVarSet()) - args_[fIdx].countedLogVar() && + ranges_[i] == ranges_[fIdx]) { + simplifyParfactor (fIdx, i); + break; } } } - LogVarSet Y_1 = g1->logVarSet() - LogVarSet (X_1); - LogVarSet Y_2 = g2->logVarSet() - LogVarSet (X_2); - // counting log vars were already raised on counting conversion +} + + + +void +Parfactor::simplifyGrounds (void) +{ + LogVarSet singletons = constr_->singletons(); + for (int i = 0; i < (int)args_.size() - 1; i++) { + for (unsigned j = i + 1; j < args_.size(); j++) { + if (args_[i].group() == args_[j].group() && + singletons.contains (args_[i].logVarSet()) && + singletons.contains (args_[j].logVarSet())) { + simplifyParfactor (i, j); + i --; + break; + } + } + } +} + + + +bool +Parfactor::canMultiply (Parfactor* g1, Parfactor* g2) +{ + std::pair res = getAlignLogVars (g1, g2); + LogVarSet Xs_1 (res.first); + LogVarSet Xs_2 (res.second); + LogVarSet Y_1 = g1->logVarSet() - Xs_1; + LogVarSet Y_2 = g2->logVarSet() - Xs_2; + Y_1 -= g1->countedLogVars(); + Y_2 -= g2->countedLogVars(); + return g1->constr()->isCountNormalized (Y_1) && + g2->constr()->isCountNormalized (Y_2); +} + + + +void +Parfactor::simplifyParfactor (unsigned fIdx1, unsigned fIdx2) +{ + Params copy = params_; + params_.clear(); + StatesIndexer indexer (ranges_); + while (indexer.valid()) { + if (indexer[fIdx1] == indexer[fIdx2]) { + params_.push_back (copy[indexer]); + } + ++ indexer; + } + for (unsigned i = 0; i < args_[fIdx2].logVars().size(); i++) { + if (nrFormulas (args_[fIdx2].logVars()[i]) == 1) { + constr_->remove ({ args_[fIdx2].logVars()[i] }); + } + } + args_.erase (args_.begin() + fIdx2); + ranges_.erase (ranges_.begin() + fIdx2); +} + + + +std::pair +Parfactor::getAlignLogVars (Parfactor* g1, Parfactor* g2) +{ + g1->simplifyGrounds(); + g2->simplifyGrounds(); + LogVars Xs_1, Xs_2; + TinySet matchedI; + TinySet matchedJ; + ProbFormulas& formulas1 = g1->arguments(); + ProbFormulas& formulas2 = g2->arguments(); + for (unsigned i = 0; i < formulas1.size(); i++) { + for (unsigned j = 0; j < formulas2.size(); j++) { + if (formulas1[i].group() == formulas2[j].group() && + g1->range (i) == g2->range (j) && + matchedI.contains (i) == false && + matchedJ.contains (j) == false) { + Util::addToVector (Xs_1, formulas1[i].logVars()); + Util::addToVector (Xs_2, formulas2[j].logVars()); + matchedI.insert (i); + matchedJ.insert (j); + } + } + } + return make_pair (Xs_1, Xs_2); +} + + + +void +Parfactor::alignAndExponentiate (Parfactor* g1, Parfactor* g2) +{ + alignLogicalVars (g1, g2); + LogVarSet comm = g1->logVarSet() & g2->logVarSet(); + LogVarSet Y_1 = g1->logVarSet() - comm; + LogVarSet Y_2 = g2->logVarSet() - comm; Y_1 -= g1->countedLogVars(); Y_2 -= g2->countedLogVars(); assert (g1->constr()->isCountNormalized (Y_1)); @@ -675,26 +832,27 @@ Parfactor::alignAndExponentiate (Parfactor* g1, Parfactor* g2) unsigned condCount2 = g2->constr()->getConditionalCount (Y_2); LogAware::pow (g1->params(), 1.0 / condCount2); LogAware::pow (g2->params(), 1.0 / condCount1); - // the alignment should be done in the end or else X_1 and X_2 - // will refer to the old log var names on the code above - align (g1, X_1, g2, X_2); } void -Parfactor::align ( - Parfactor* g1, const LogVars& alignLvs1, - Parfactor* g2, const LogVars& alignLvs2) +Parfactor::alignLogicalVars (Parfactor* g1, Parfactor* g2) { - LogVar freeLogVar = 0; + std::pair res = getAlignLogVars (g1, g2); + const LogVars& alignLvs1 = res.first; + const LogVars& alignLvs2 = res.second; + // cout << "ALIGNING :::::::::::::::::" << endl; + // g1->print(); + // cout << "AND" << endl; + // g2->print(); + // cout << "-> align lvs1 = " << alignLvs1 << endl; + // cout << "-> align lvs2 = " << alignLvs2 << endl; + LogVar freeLogVar (0); Substitution theta1, theta2; for (unsigned i = 0; i < alignLvs1.size(); i++) { bool b1 = theta1.containsReplacementFor (alignLvs1[i]); bool b2 = theta2.containsReplacementFor (alignLvs2[i]); - // handle this type of situation: - // g1 = p(X), q(X) ; X in {x1} - // g2 = p(X), q(Y) ; X in {x1}, Y in {x1} if (b1 == false && b2 == false) { theta1.add (alignLvs1[i], freeLogVar); theta2.add (alignLvs2[i], freeLogVar); @@ -705,6 +863,7 @@ Parfactor::align ( theta2.add (alignLvs2[i], theta1.newNameFor (alignLvs1[i])); } } + const LogVarSet& allLvs1 = g1->logVarSet(); for (unsigned i = 0; i < allLvs1.size(); i++) { if (theta1.containsReplacementFor (allLvs1[i]) == false) { @@ -719,6 +878,31 @@ Parfactor::align ( ++ freeLogVar; } } + + // handle this type of situation: + // g1 = p(X), q(X) ; X in {(p1),(p2)} + // g2 = p(X), q(Y) ; (X,Y) in {(p1,p2),(p2,p1)} + LogVars discardedLvs1 = theta1.getDiscardedLogVars(); + for (unsigned i = 0; i < discardedLvs1.size(); i++) { + if (g1->constr()->isSingleton (discardedLvs1[i]) && + g1->nrFormulas (discardedLvs1[i]) == 1) { + g1->constr()->remove (discardedLvs1[i]); + } else { + LogVar X_new = ++ g1->constr()->logVarSet().back(); + theta1.rename (discardedLvs1[i], X_new); + } + } + LogVars discardedLvs2 = theta2.getDiscardedLogVars(); + for (unsigned i = 0; i < discardedLvs2.size(); i++) { + if (g2->constr()->isSingleton (discardedLvs2[i]) && + g2->nrFormulas (discardedLvs2[i]) == 1) { + g2->constr()->remove (discardedLvs2[i]); + } else { + LogVar X_new = ++ g2->constr()->logVarSet().back(); + theta2.rename (discardedLvs2[i], X_new); + } + } + // cout << "theta1: " << theta1 << endl; // cout << "theta2: " << theta2 << endl; g1->applySubstitution (theta1); diff --git a/packages/CLPBN/clpbn/bp/Parfactor.h b/packages/CLPBN/clpbn/bp/Parfactor.h index 4c206e209..1a55e9c55 100644 --- a/packages/CLPBN/clpbn/bp/Parfactor.h +++ b/packages/CLPBN/clpbn/bp/Parfactor.h @@ -50,6 +50,8 @@ class Parfactor : public TFactor void multiply (Parfactor&); + bool canCountConvert (LogVar X); + void countConvert (LogVar); void expand (LogVar, LogVar, LogVar); @@ -76,22 +78,40 @@ class Parfactor : public TFactor int indexOfGroup (unsigned) const; + unsigned nrFormulasWithGroup (unsigned) const; + vector getAllGroups (void) const; void print (bool = false) const; + void printParameters (void) const; + + void printProjections (void) const; + string getLabel (void) const; + void simplifyGrounds (void); + + static bool canMultiply (Parfactor*, Parfactor*); + private: + + void simplifyCountingFormulas (int fIdx); + + void simplifyParfactor (unsigned fIdx1, unsigned fIdx2); + + static std::pair getAlignLogVars ( + Parfactor* g1, Parfactor* g2); + void expandPotential (int fIdx, unsigned newRange, const vector& sumIndexes); static void alignAndExponentiate (Parfactor*, Parfactor*); - static void align ( - Parfactor*, const LogVars&, Parfactor*, const LogVars&); + static void alignLogicalVars (Parfactor*, Parfactor*); ConstraintTree* constr_; + }; diff --git a/packages/CLPBN/clpbn/bp/ParfactorList.cpp b/packages/CLPBN/clpbn/bp/ParfactorList.cpp index bca6de33d..d588a393a 100644 --- a/packages/CLPBN/clpbn/bp/ParfactorList.cpp +++ b/packages/CLPBN/clpbn/bp/ParfactorList.cpp @@ -98,6 +98,9 @@ ParfactorList::isAllShattered (void) const return true; } vector pfs (pfList_.begin(), pfList_.end()); + for (unsigned i = 0; i < pfs.size(); i++) { + assert (isShattered (pfs[i])); + } for (unsigned i = 0; i < pfs.size() - 1; i++) { for (unsigned j = i + 1; j < pfs.size(); j++) { if (isShattered (pfs[i], pfs[j]) == false) { @@ -117,11 +120,48 @@ ParfactorList::print (void) const std::sort (pfVec.begin(), pfVec.end(), sortByParams()); for (unsigned i = 0; i < pfVec.size(); i++) { pfVec[i]->print(); + cout << endl; } } +bool +ParfactorList::isShattered (const Parfactor* g) const +{ + const ProbFormulas& formulas = g->arguments(); + if (formulas.size() < 2) { + return true; + } + ConstraintTree ct (*g->constr()); + for (unsigned i = 0; i < formulas.size() - 1; i++) { + for (unsigned j = i + 1; j < formulas.size(); j++) { + if (formulas[i].group() == formulas[j].group()) { + if (identical ( + formulas[i], *(g->constr()), + formulas[j], *(g->constr())) == false) { + g->print(); + cout << "-> not identical on positions " ; + cout << i << " and " << j << endl; + return false; + } + } else { + if (disjoint ( + formulas[i], *(g->constr()), + formulas[j], *(g->constr())) == false) { + g->print(); + cout << "-> not disjoint on positions " ; + cout << i << " and " << j << endl; + return false; + } + } + } + } + return true; +} + + + bool ParfactorList::isShattered ( const Parfactor* g1, @@ -130,18 +170,28 @@ ParfactorList::isShattered ( assert (g1 != g2); const ProbFormulas& fms1 = g1->arguments(); const ProbFormulas& fms2 = g2->arguments(); + for (unsigned i = 0; i < fms1.size(); i++) { for (unsigned j = 0; j < fms2.size(); j++) { if (fms1[i].group() == fms2[j].group()) { if (identical ( fms1[i], *(g1->constr()), fms2[j], *(g2->constr())) == false) { + g1->print(); + cout << "^" << endl; + g2->print(); + cout << "-> not identical on group " << fms1[i].group() << endl; return false; } } else { if (disjoint ( fms1[i], *(g1->constr()), fms2[j], *(g2->constr())) == false) { + g1->print(); + cout << "^" << endl; + g2->print(); + cout << "-> not disjoint on groups " << fms1[i].group(); + cout << " and " << fms2[j].group() << endl; return false; } } @@ -180,7 +230,12 @@ ParfactorList::addToShatteredList (Parfactor* g) } residuals.pop(); if (pfSplitted == false) { - addShattered (pf); + Parfactors res = shatterAgainstMySelf (pf); + if (res.empty()) { + addShattered (pf); + } else { + Util::addToQueue (residuals, res); + } } } assert (isAllShattered()); @@ -188,6 +243,150 @@ ParfactorList::addToShatteredList (Parfactor* g) +Parfactors +ParfactorList::shatterAgainstMySelf (Parfactor* g) +{ + Parfactors pfs; + queue residuals; + residuals.push (g); + bool shattered = true; + while (residuals.empty() == false) { + Parfactor* pf = residuals.front(); + Parfactors res = shatterAgainstMySelf2 (pf); + if (res.empty()) { + assert (isShattered (pf)); + if (shattered) { + return { }; + } + pfs.push_back (pf); + } else { + shattered = false; + for (unsigned i = 0; i < res.size(); i++) { + assert (res[i]->constr()->empty() == false); + residuals.push (res[i]); + } + delete pf; + } + residuals.pop(); + } + return pfs; +} + + + +Parfactors +ParfactorList::shatterAgainstMySelf2 (Parfactor* g) +{ + // slip a parfactor with overlapping formulas: + // e.g. {s(X),s(Y)}, with (X,Y) in {(p1,p2),(p1,p3),(p4,p1)} + const ProbFormulas& formulas = g->arguments(); + for (unsigned i = 0; i < formulas.size() - 1; i++) { + for (unsigned j = i + 1; j < formulas.size(); j++) { + if (formulas[i].sameSkeletonAs (formulas[j])) { + Parfactors res = shatterAgainstMySelf (g, i, j); + if (res.empty() == false) { + return res; + } + } + } + } + return Parfactors(); +} + + + +Parfactors +ParfactorList::shatterAgainstMySelf ( + Parfactor* g, + unsigned fIdx1, + unsigned fIdx2) +{ + /* + Util::printDashedLine(); + cout << "-> SHATTERING" << endl; + g->print(); + cout << "-> ON: " << g->argument (fIdx1) << "|" ; + cout << g->constr()->tupleSet (g->argument (fIdx1).logVars()) << endl; + cout << "-> ON: " << g->argument (fIdx2) << "|" ; + cout << g->constr()->tupleSet (g->argument (fIdx2).logVars()) << endl; + Util::printDashedLine(); + */ + ProbFormula& f1 = g->argument (fIdx1); + ProbFormula& f2 = g->argument (fIdx2); + if (f1.isAtom()) { + cerr << "error: a ground occurs twice in a parfactor" << endl; + cerr << endl; + abort(); + } + assert (g->constr()->empty() == false); + ConstraintTree ctCopy (*g->constr()); + if (f1.group() == f2.group()) { + assert (identical (f1, *(g->constr()), f2, ctCopy)); + return { }; + } + + g->constr()->moveToTop (f1.logVars()); + ctCopy.moveToTop (f2.logVars()); + + std::pair split1 = + g->constr()->split (f1.logVars(), &ctCopy, f2.logVars()); + ConstraintTree* commCt1 = split1.first; + ConstraintTree* exclCt1 = split1.second; + + if (commCt1->empty()) { + // disjoint + delete commCt1; + delete exclCt1; + return { }; + } + + unsigned newGroup = ProbFormula::getNewGroup(); + Parfactors res1 = shatter (g, fIdx1, commCt1, exclCt1, newGroup); + if (res1.empty()) { + res1.push_back (g); + } + + Parfactors res; + ctCopy.moveToTop (f1.logVars()); + for (unsigned i = 0; i < res1.size(); i++) { + res1[i]->constr()->moveToTop (f2.logVars()); + std::pair split2; + split2 = res1[i]->constr()->split (f2.logVars(), &ctCopy, f1.logVars()); + ConstraintTree* commCt2 = split2.first; + ConstraintTree* exclCt2 = split2.second; + if (commCt2->empty()) { + if (res1[i] != g) { + res.push_back (res1[i]); + } + delete commCt2; + delete exclCt2; + continue; + } + newGroup = ProbFormula::getNewGroup(); + Parfactors res2 = shatter (res1[i], fIdx2, commCt2, exclCt2, newGroup); + if (res2.empty()) { + if (res1[i] != g) { + res.push_back (res1[i]); + } + } else { + Util::addToVector (res, res2); + for (unsigned j = 0; j < res2.size(); j++) { + } + if (res1[i] != g) { + delete res1[i]; + } + } + } + + if (res.empty()) { + g->argument (fIdx2).setGroup (g->argument (fIdx1).group()); + updateGroups (f2.group(), f1.group()); + } + return res; +} + + + std::pair ParfactorList::shatter (Parfactor* g1, Parfactor* g2) { @@ -218,17 +417,18 @@ ParfactorList::shatter ( { ProbFormula& f1 = g1->argument (fIdx1); ProbFormula& f2 = g2->argument (fIdx2); - // cout << endl; - // Util::printDashedLine(); - // cout << "-> SHATTERING (#" << g1 << ", #" << g2 << ")" << endl; - // g1->print(); - // cout << "-> WITH" << endl; - // g2->print(); - // cout << "-> ON: " << f1 << "|" ; - // cout << g1->constr()->tupleSet (f1.logVars()) << endl; - // cout << "-> ON: " << f2 << "|" ; - // cout << g2->constr()->tupleSet (f2.logVars()) << endl; - // Util::printDashedLine(); + /* + Util::printDashedLine(); + cout << "-> SHATTERING" << endl; + g1->print(); + cout << "-> WITH" << endl; + g2->print(); + cout << "-> ON: " << f1 << "|" ; + cout << g1->constr()->tupleSet (f1.logVars()) << endl; + cout << "-> ON: " << f2 << "|" ; + cout << g2->constr()->tupleSet (f2.logVars()) << endl; + Util::printDashedLine(); + */ if (f1.isAtom()) { f2.setGroup (f1.group()); updateGroups (f2.group(), f1.group()); @@ -245,7 +445,7 @@ ParfactorList::shatter ( g2->constr()->moveToTop (f2.logVars()); std::pair split1 = - g1->constr()->split (g2->constr(), f1.arity()); + g1->constr()->split (f1.logVars(), g2->constr(), f2.logVars()); ConstraintTree* commCt1 = split1.first; ConstraintTree* exclCt1 = split1.second; @@ -257,12 +457,12 @@ ParfactorList::shatter ( } std::pair split2 = - g2->constr()->split (g1->constr(), f2.arity()); + g2->constr()->split (f2.logVars(), g1->constr(), f1.logVars()); ConstraintTree* commCt2 = split2.first; ConstraintTree* exclCt2 = split2.second; - assert (commCt1->tupleSet (f1.arity()) == - commCt2->tupleSet (f2.arity())); + assert (commCt1->tupleSet (f1.logVars()) == + commCt2->tupleSet (f2.logVars())); // unsigned static count = 0; count ++; // stringstream ss1; ss1 << "" << count << "_A.dot" ; @@ -374,19 +574,19 @@ ParfactorList::updateGroups (unsigned oldGroup, unsigned newGroup) bool ParfactorList::proper ( - const ProbFormula& f1, ConstraintTree c1, - const ProbFormula& f2, ConstraintTree c2) const + const ProbFormula& f1, ConstraintTree ct1, + const ProbFormula& f2, ConstraintTree ct2) const { - return disjoint (f1, c1, f2, c2) - || identical (f1, c1, f2, c2); + return disjoint (f1, ct1, f2, ct2) + || identical (f1, ct1, f2, ct2); } bool ParfactorList::identical ( - const ProbFormula& f1, ConstraintTree c1, - const ProbFormula& f2, ConstraintTree c2) const + const ProbFormula& f1, ConstraintTree ct1, + const ProbFormula& f2, ConstraintTree ct2) const { if (f1.sameSkeletonAs (f2) == false) { return false; @@ -394,28 +594,26 @@ ParfactorList::identical ( if (f1.isAtom()) { return true; } - c1.moveToTop (f1.logVars()); - c2.moveToTop (f2.logVars()); - return ConstraintTree::identical ( - &c1, &c2, f1.arity()); + TupleSet ts1 = ct1.tupleSet (f1.logVars()); + TupleSet ts2 = ct2.tupleSet (f2.logVars()); + return ts1 == ts2; } bool ParfactorList::disjoint ( - const ProbFormula& f1, ConstraintTree c1, - const ProbFormula& f2, ConstraintTree c2) const + const ProbFormula& f1, ConstraintTree ct1, + const ProbFormula& f2, ConstraintTree ct2) const { if (f1.sameSkeletonAs (f2) == false) { return true; } if (f1.isAtom()) { - return true; + return false; } - c1.moveToTop (f1.logVars()); - c2.moveToTop (f2.logVars()); - return ConstraintTree::disjoint ( - &c1, &c2, f1.arity()); + TupleSet ts1 = ct1.tupleSet (f1.logVars()); + TupleSet ts2 = ct2.tupleSet (f2.logVars()); + return (ts1 & ts2).empty(); } diff --git a/packages/CLPBN/clpbn/bp/ParfactorList.h b/packages/CLPBN/clpbn/bp/ParfactorList.h index a574184d7..bcf5aa3e1 100644 --- a/packages/CLPBN/clpbn/bp/ParfactorList.h +++ b/packages/CLPBN/clpbn/bp/ParfactorList.h @@ -59,9 +59,18 @@ class ParfactorList private: + bool isShattered (const Parfactor*) const; + bool isShattered (const Parfactor*, const Parfactor*) const; void addToShatteredList (Parfactor*); + + Parfactors shatterAgainstMySelf (Parfactor* g); + + Parfactors shatterAgainstMySelf2 (Parfactor* g); + + Parfactors shatterAgainstMySelf ( + Parfactor* g, unsigned fIdx1, unsigned fIdx2); std::pair shatter ( Parfactor*, Parfactor*); diff --git a/packages/CLPBN/clpbn/bp/ProbFormula.cpp b/packages/CLPBN/clpbn/bp/ProbFormula.cpp index 785163202..827f255b7 100644 --- a/packages/CLPBN/clpbn/bp/ProbFormula.cpp +++ b/packages/CLPBN/clpbn/bp/ProbFormula.cpp @@ -99,9 +99,9 @@ ProbFormula::rename (LogVar oldName, LogVar newName) bool operator== (const ProbFormula& f1, const ProbFormula& f2) -{ - return f1.group_ == f2.group_; - //return functor_ == f.functor_ && logVars_ == f.logVars_ ; +{ + return f1.group_ == f2.group_ && + f1.logVars_ == f2.logVars_; } diff --git a/packages/CLPBN/clpbn/bp/ProbFormula.h b/packages/CLPBN/clpbn/bp/ProbFormula.h index 9982c4865..e05665889 100644 --- a/packages/CLPBN/clpbn/bp/ProbFormula.h +++ b/packages/CLPBN/clpbn/bp/ProbFormula.h @@ -7,7 +7,7 @@ #include "LiftedUtils.h" #include "Horus.h" - +typedef unsigned PrvGroup; class ProbFormula { diff --git a/packages/CLPBN/clpbn/bp/TODO b/packages/CLPBN/clpbn/bp/TODO index c2eaaaebc..a6bbb5930 100644 --- a/packages/CLPBN/clpbn/bp/TODO +++ b/packages/CLPBN/clpbn/bp/TODO @@ -1,13 +1,4 @@ -- Refactor sum out in factor -- Add a way to sum out several vars at the same time -- Receive ranges as a constant reference in Indexer -- Merge TinySet and SortedVector classes -- Check if evidence remains in the compressed factor graph -- Check is this doesn't happen phi(smokes(p1),smokes(p1)) -- Handle {p(X),q(X)} * {p(X),q(Y)}, X={x_1,...,x_n}, Y={x_1,...,x_n} -- Handle {p(X),p(Y)}, where constraint = (x1,x2),(x2,x1) -- Add a constant for the maximum factor size -- Add a lifted multiply operation to FoveSolver -- Maybe directly hashs instead of vectors of colors to calculate the groups in - counting bp - +TODO + - add a way to calculate combinations and factorials with large numbers + - refactor sumOut in parfactor -> is really ugly code + - Indexer: start receiving ranges as constant reference diff --git a/packages/CLPBN/clpbn/bp/Util.cpp b/packages/CLPBN/clpbn/bp/Util.cpp index 9f8f1955f..6eb220779 100644 --- a/packages/CLPBN/clpbn/bp/Util.cpp +++ b/packages/CLPBN/clpbn/bp/Util.cpp @@ -55,10 +55,7 @@ stringToUnsigned (string str) stringstream ss; ss << str; ss >> val; - if (val < 0) { - cerr << "error: the value tried to read is negative" << endl; - abort(); - } + abort ("error: the value tried to read is negative", val < 0); return static_cast (val); } @@ -297,6 +294,17 @@ setHorusFlag (string key, string value) +void +abort (string msg, bool b) +{ + if (b) { + cerr << msg << endl; + std::abort(); + } +} + + + void printHeader (string header, std::ostream& os) { diff --git a/packages/CLPBN/clpbn/bp/Util.h b/packages/CLPBN/clpbn/bp/Util.h index 3b65c5141..72517fb6b 100644 --- a/packages/CLPBN/clpbn/bp/Util.h +++ b/packages/CLPBN/clpbn/bp/Util.h @@ -34,6 +34,8 @@ template bool contains (const set&, const T&); template bool contains ( const unordered_map&, const K&); +template int vectorIndex (const vector&, const T&); + template std::string toString (const T&); template <> std::string toString (const bool&); @@ -76,6 +78,8 @@ vector getStateLines (const Vars&); bool setHorusFlag (string key, string value); +void abort (string msg, bool = true); + void printHeader (string, std::ostream& os = std::cout); void printSubHeader (string, std::ostream& os = std::cout); @@ -138,6 +142,19 @@ Util::contains (const unordered_map& m, const K& k) +template int +Util::vectorIndex (const vector& v, const T& e) +{ + int pos = std::distance (v.begin(), + std::find (v.begin(), v.end(), e)); + if (pos == (int)v.size()) { + pos = -1; + } + return pos; +} + + + template std::string Util::toString (const T& t) { diff --git a/packages/CLPBN/clpbn/bp/benchmarks/city/gen_city.sh b/packages/CLPBN/clpbn/bp/benchmarks/city/gen_city.sh index 814707d93..7c95f4ed3 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/city/gen_city.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/city/gen_city.sh @@ -1,4 +1,4 @@ -#! /home/tiago/bin/yap -L -- +#! /home/tgomes/bin/yap -L -- :- initialization(main). diff --git a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/bp_tests.sh b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/bp_tests.sh index 58666fc33..4cabcb531 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/bp_tests.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/bp_tests.sh @@ -8,13 +8,13 @@ SOLVER="bp" function run_all_graphs { write_header $1 - run_solver p1000w$N_WORKSHOPS $2 - run_solver p5000w$N_WORKSHOPS $2 - run_solver p10000w$N_WORKSHOPS $2 - run_solver p20000w$N_WORKSHOPS $2 - run_solver p30000w$N_WORKSHOPS $2 - run_solver p40000w$N_WORKSHOPS $2 - run_solver p50000w$N_WORKSHOPS $2 + run_solver p1000w$N_WORKSHOPS $2 + run_solver p5000w$N_WORKSHOPS $2 + run_solver p10000w$N_WORKSHOPS $2 + run_solver p20000w$N_WORKSHOPS $2 + run_solver p30000w$N_WORKSHOPS $2 + run_solver p40000w$N_WORKSHOPS $2 + run_solver p50000w$N_WORKSHOPS $2 } prepare_new_run diff --git a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/cbp_tests.sh b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/cbp_tests.sh index 9814d8257..01b620c83 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/cbp_tests.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/cbp_tests.sh @@ -8,13 +8,13 @@ SOLVER="cbp" function run_all_graphs { write_header $1 - run_solver p1000w$N_WORKSHOPS $2 - run_solver p5000w$N_WORKSHOPS $2 - run_solver p10000w$N_WORKSHOPS $2 - run_solver p20000w$N_WORKSHOPS $2 - run_solver p30000w$N_WORKSHOPS $2 - run_solver p40000w$N_WORKSHOPS $2 - run_solver p50000w$N_WORKSHOPS $2 + run_solver p1000w$N_WORKSHOPS $2 + run_solver p5000w$N_WORKSHOPS $2 + run_solver p10000w$N_WORKSHOPS $2 + run_solver p20000w$N_WORKSHOPS $2 + run_solver p30000w$N_WORKSHOPS $2 + run_solver p40000w$N_WORKSHOPS $2 + run_solver p50000w$N_WORKSHOPS $2 } prepare_new_run diff --git a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/fove_tests.sh b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/fove_tests.sh index 94fbf8009..f280f7814 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/fove_tests.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/fove_tests.sh @@ -8,13 +8,13 @@ SOLVER="fove" function run_all_graphs { write_header $1 - run_solver p1000w$N_WORKSHOPS $2 - run_solver p5000w$N_WORKSHOPS $2 - run_solver p10000w$N_WORKSHOPS $2 - run_solver p20000w$N_WORKSHOPS $2 - run_solver p30000w$N_WORKSHOPS $2 - run_solver p40000w$N_WORKSHOPS $2 - run_solver p50000w$N_WORKSHOPS $2 + run_solver p1000w$N_WORKSHOPS $2 + run_solver p5000w$N_WORKSHOPS $2 + run_solver p10000w$N_WORKSHOPS $2 + run_solver p20000w$N_WORKSHOPS $2 + run_solver p30000w$N_WORKSHOPS $2 + run_solver p40000w$N_WORKSHOPS $2 + run_solver p50000w$N_WORKSHOPS $2 } prepare_new_run diff --git a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/hve_tests.sh b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/hve_tests.sh index a44d7f28f..a0f2f3d34 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/hve_tests.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/comp_workshops/hve_tests.sh @@ -8,13 +8,13 @@ SOLVER="hve" function run_all_graphs { write_header $1 - run_solver p1000w$N_WORKSHOPS $2 - run_solver p5000w$N_WORKSHOPS $2 - run_solver p10000w$N_WORKSHOPS $2 - run_solver p20000w$N_WORKSHOPS $2 - run_solver p30000w$N_WORKSHOPS $2 - run_solver p40000w$N_WORKSHOPS $2 - run_solver p50000w$N_WORKSHOPS $2 + run_solver p1000w$N_WORKSHOPS $2 + run_solver p5000w$N_WORKSHOPS $2 + run_solver p10000w$N_WORKSHOPS $2 + run_solver p20000w$N_WORKSHOPS $2 + run_solver p30000w$N_WORKSHOPS $2 + run_solver p40000w$N_WORKSHOPS $2 + run_solver p50000w$N_WORKSHOPS $2 } prepare_new_run diff --git a/packages/CLPBN/clpbn/bp/benchmarks/smokers/gen_people.sh b/packages/CLPBN/clpbn/bp/benchmarks/smokers/gen_people.sh index 3a6e9cdee..b8eebbfa1 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/smokers/gen_people.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/smokers/gen_people.sh @@ -1,4 +1,4 @@ -#!/home/tiago/bin/yap -L -- +#!/home/tgomes/bin/yap -L -- :- initialization(main). diff --git a/packages/CLPBN/clpbn/bp/benchmarks/smokers/hve_tests.sh b/packages/CLPBN/clpbn/bp/benchmarks/smokers/hve_tests.sh index ce67d86ff..179e80acd 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/smokers/hve_tests.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/smokers/hve_tests.sh @@ -10,12 +10,15 @@ function run_all_graphs write_header $1 run_solver pop25 $2 run_solver pop50 $2 - run_solver pop75 $2 - run_solver pop100 $2 - run_solver pop125 $2 - run_solver pop150 $2 + #run_solver pop75 $2 + #run_solver pop100 $2 + #run_solver pop125 $2 + #run_solver pop150 $2 } prepare_new_run run_all_graphs "hve(elim_heuristic=min_neighbors) " min_neighbors +#run_all_graphs "hve(elim_heuristic=min_weight) " min_weight +#run_all_graphs "hve(elim_heuristic=min_fill) " min_fill +#run_all_graphs "hve(elim_heuristic=weighted_min_fill) " weighted_min_fill diff --git a/packages/CLPBN/clpbn/bp/benchmarks/workshop_attrs/gen_attrs.sh b/packages/CLPBN/clpbn/bp/benchmarks/workshop_attrs/gen_attrs.sh index 3f005cbf3..f3e7a5212 100755 --- a/packages/CLPBN/clpbn/bp/benchmarks/workshop_attrs/gen_attrs.sh +++ b/packages/CLPBN/clpbn/bp/benchmarks/workshop_attrs/gen_attrs.sh @@ -1,4 +1,4 @@ -#!/home/tiago/bin/yap -L -- +#!/home/tgomes/bin/yap -L -- :- use_module(library(lists)). diff --git a/packages/CLPBN/clpbn/bp/examples/fail.yap b/packages/CLPBN/clpbn/bp/examples/fail.yap deleted file mode 100644 index aa5cfc5db..000000000 --- a/packages/CLPBN/clpbn/bp/examples/fail.yap +++ /dev/null @@ -1,21 +0,0 @@ - -:- use_module(library(pfl)). - -:- set_pfl_flag(solver,fove). -%:- set_pfl_flag(solver,bp), clpbn_horus:set_horus_flag(inf_alg,ve). -%:- set_pfl_flag(solver,bp), clpbn_horus:set_horus_flag(inf_alg,bp). -%:- set_pfl_flag(solver,bp), clpbn_horus:set_horus_flag(inf_alg,cbp). - - -t(ann). -t(dave). - -% p(ann,t). - -markov p(X)::[t,f] ; [0.1, 0.3] ; [t(X)]. - -% use standard Prolog queries: provide evidence first. - -?- p(ann,t), p(ann,X). -% ?- p(ann,X). -