[英]How to interface this DFS code with a Boost Graph DFS?
我已經在樹上定義了格式化操作,當它們使用具有以下結構和 DFS 定義的前/中/后順序運算符時,它們工作得很好(它也適用於 k-ary 樹):
struct Node
{
Node *parent = nullptr;
Node *left = nullptr;
Node *right = nullptr;
char data;
template<class Op1, class Op2, class Op3>
void depth_first_search(Op1 pre_order, Op2 in_order, Op3 post_order)
{
pre_order(*this);
if(this->left != nullptr && this->right != nullptr)
{
this->left->depth_first_search(pre_order, in_order, post_order);
in_order(*this);
this->right->depth_first_search(pre_order, in_order, post_order);
in_order(*this);
}
post_order(*this);
}
};
我可以使用以下格式格式化此結構:
Formatter formatter(my_config);
tree.depth_first_search(formatter.get_pre_order(), formatter.get_in_order(), formatter.get_post_order());
auto result = formatter.take_result();
由於它們工作得很好,我想在 Boost 圖具有樹值時重新使用相同的仿函數來操作格式化。 所以我試圖讓這個(近似/錯誤)語法工作:
template<class Formatter>
class my_visitor : boost::default_dfs_visitor
{
Formatter & _formatter;
public:
my_visitor(Formatter& formatter) : _formatter(formatter){}
auto discover_vertex(auto vertex, auto const& g)
{
auto f = _formatter.get_pre_order();
f(vertex);
}
auto examine_edge(auto edge, auto const& g)
{
auto f = _formatter.get_in_order();
f(edge);
}
auto finish_vertex(auto vertex, auto const& g)
{
auto f = _formatter.get_post_order();
f(vertex);
}
};
所以我可以使用類似的語法來格式化樹
Formatter formatter(my_config);
my_visitor vis(formatter);
depth_first_search(graph, root, boost::visitor(vis));
auto s = formatter.take_result();
在目前的 state 中,代碼在 Gobolt 上編譯和運行: https://godbolt.org/z/bzjqjbvE3
但是,在 main function 中調用的最后一個方法(還)不存在,因為我不知道如何指定它:
auto s = newick::generate_from(tree);
有這樣的 function 的評論草稿,但我很難將其與 BGL 相匹配:
///
/// @brief Generate a Newick string from a k-ary tree with no properties attached to edges or vertices
///
std::string generate_from(quetzal::coalescence::k_ary_tree<> graph)
{
using vertex_t = typename quetzal::coalescence::k_ary_tree<>::vertex_descriptor;
// Data access
std::predicate<vertex_t> auto has_parent = [&graph](vertex_t v){ return graph.has_parent(v); };
std::predicate<vertex_t> auto has_children = [&graph](vertex_t v){ return graph.has_children(v); };
newick::Formattable<vertex_t> auto label = [&graph](auto){ return ""; };
newick::Formattable<vertex_t> auto branch_length = [&graph](auto){ return ""; };
// We declare a generator passing it the data interfaces
auto generator = newick::make_generator(has_parent, has_children, label, branch_length);
// We expose its interface to the boost DFS algorithm
detail::newick_no_property_visitor vis(generator);
depth_first_search(graph, boost::visitor(vis));
}
我無法真正圍繞界面一致性進行思考,而且我很難確定問題的根源(笑):
Node
語義的參數(封裝節點標識符和它所指的圖)pre-order
映射到discover_vertex
, in_order
映射到examine_edge
和post_order
映射到finish_vertex
,雖然我不確定?有沒有辦法協調這兩個接口,或者它們的語義差異太大,我必須復制/修改格式語法?
這是我的看法。 我們不生成隨機圖,而是復制您使用“舊式Node*
2 元樹”處理的精確圖:
using G = quetzal::coalescence::k_ary_tree<>;
using V = G::vertex_descriptor;
enum {a,b,c,d,e,N};
G tree(N);
/*
* a
* / \
* / c
* / / \
* b d e
*/
add_edge(a, b, tree);
add_edge(a, c, tree);
add_edge(c, d, tree);
add_edge(c, e, tree);
auto name_map = boost::make_function_property_map<V>([](V v) -> char { return 'a' + v; });
print_graph(tree, name_map);
印刷
a --> b c
b -->
c --> d e
d -->
e -->
關鍵是將“格式化程序”( generator
)適當地包裝到 DFS 訪問者中。
一些注意事項:
State
結構中:// We declare a generator passing it the data interfaces
struct State {
newick::generator<V, P, P, F, F, decltype(flavor)> gen;
std::stack<int> nth_child;
} state{{has_parent, has_children, label, branch_length, flavor}, {}};
// We expose its interface to the boost DFS algorithm
struct VisWrap : boost::default_dfs_visitor {
State& state_;
VisWrap(State& ref) : state_(ref) {}
void discover_vertex(V v, G const&) const {
state_.nth_child.push(0);
state_.gen.pre_order()(v);
}
void finish_vertex(V v, G const&) const {
state_.gen.post_order()(v);
state_.nth_child.pop();
}
void tree_edge(E e, G const& g) const {
if (state_.nth_child.top()++ > 0)
state_.gen.in_order()(target(e, g));
}
} vis{state};
depth_first_search(graph, boost::visitor(vis));
return state.gen.take_result();
您在has_parent()
和has_children()
中混淆了in_edges
和out_edges
。 所以我改變了那些:
bool has_parent(vertex_descriptor v) const { auto [it1, it2] = out_edges(v, *this); // since it is a tree, at most 1 parent assert(std::distance(it1,it2) <= 1); // if iterators equal, then no parent return it1 == it2? false: true; } bool has_children(vertex_descriptor v) const { auto [it1, it2] = in_edges(v, *this); return std::distance(it1, it2) >= 1; }
進入(稍微現代化):
bool has_parent(vertex_descriptor v) const { auto pp = make_iterator_range(in_edges(v, *this)); // since it is a tree, at most 1 parent assert(boost::size(pp) <= 1); return.pp;empty(), } bool has_children(vertex_descriptor v) const { return.make_iterator_range(out_edges(v; *this)).empty(); }
請注意,使用in_edges
確實需要通過更改更改為 BidirectionalGraph 的 model
using directed_type = boost::directedS;
進入
using directed_type = boost::bidirectionalS; // for boost::in_edges
由於生成器操作與訪問者事件的時間問題,我不得不在添加/刪除逗號分隔符時添加一些保護措施。 我有一種感覺,如果您消除該區域中的“阻抗失配”,事情就會變得更簡單。
對於這個問題(好主意),您在 scope 中遺漏了一些工作,這意味着實際上對標簽/長度函數進行了概括。
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/graphviz.hpp>
#include <boost/graph/isomorphism.hpp>
#include <concepts>
#include <iostream>
#include <regex>
namespace quetzal::coalescence {
namespace details {
///
/// @brief Defines the desired properties and constraints for a coalescent graph
///
struct tree_traits {
/// @brief Trees are sparse graph in nature, adjacency_matrix would not be justified here
template <class... Types> using model = boost::adjacency_list<Types...>;
/// @brief We want to enforce avoiding multi-graphs (edges with same end nodes)
using out_edge_list_type = boost::setS;
/// @brief We don't allow for inserting vertices except at the end and we don't remove vertices.
/// This means that neither reallocation cost nor stability are reasons for preferring
/// listS to vecS.
using vertex_list_type = boost::vecS;
/// @brief Coalescent trees are directed acyclic graphs
using directed_type = boost::bidirectionalS; // for boost::in_edges
};
} // namespace details
///
/// @brief A class adapted for simulating coalescent trees as a rooted K-ary tree, where each node can
/// hold at most k number of children.
///
/// @remark
/// - Since topology (pure structure) matter, a coalescence tree is more than a data container.
/// - This class inherits from as a boost::graph with specialized edge and vertex properties
/// defined in details#tree_traits
/// - The simulation interface intentionally does not allow for removing edges or vertices,
/// but you may access the underlying boost graph object to do so.
///
template <class VertexProperties = boost::no_property, class EdgeProperties = boost::no_property>
struct k_ary_tree
: public details::tree_traits::model<
details::tree_traits::out_edge_list_type, details::tree_traits::vertex_list_type,
details::tree_traits::directed_type, VertexProperties, EdgeProperties> {
/// @brief Properties of an edge, e.g. a structure representing the series of demes visited or simply
/// the branch length.
using edge_properties = EdgeProperties;
/// @brief Properties of a vertex, e.g. a structure representing the mutational state.
using vertex_properties = VertexProperties;
/// @brief The type of graph hold by the tree class
using base_type = details::tree_traits::model<
details::tree_traits::out_edge_list_type, details::tree_traits::vertex_list_type,
details::tree_traits::directed_type, vertex_properties, edge_properties>;
using self_type = k_ary_tree<vertex_properties, edge_properties>;
/// @brief The type used for identifying vertices within the graph
using vertex_descriptor = typename self_type::vertex_descriptor;
/// @brief Inherit all constructor from boost graph
using base_type::base_type;
///
/// @brief Print the tree to the graphviz format
/// @remarks Intends to hide the bundles writer syntax
///
void to_graphviz(std::ostream& out) const {
using namespace boost;
return boost::write_graphviz(out, *this,
boost::make_label_writer(boost::get(vertex_bundle, *this)),
boost::make_label_writer(boost::get(edge_bundle, *this)));
}
///
/// @brief Returns true if there exists an isomorphism between this and other and false otherwise.
///
template <class T> bool is_isomorphic(T const& other) noexcept {
return boost::isomorphism(*this, other);
}
///
/// @brief Returns true if a given node has a parent node
///
bool has_parent(vertex_descriptor v) const {
auto pp = make_iterator_range(in_edges(v, *this));
// since it is a tree, at most 1 parent
assert(boost::size(pp) <= 1);
return !pp.empty();
}
///
/// @brief Returns true if a given node has children nodes
///
bool has_children(vertex_descriptor v) const {
return !make_iterator_range(out_edges(v, *this)).empty();
}
}; // end class Tree
} // end namespace quetzal::coalescence
namespace quetzal::format::newick {
namespace detail {
// Replacement for `std::function<T(U)>::argument_type`
template <typename T> struct single_function_argument;
template <typename Ret, typename Arg> struct single_function_argument<std::function<Ret(Arg)>> {
using type = Arg;
};
template <typename P1> struct single_function_argument_impl {
using type = typename single_function_argument<decltype(std::function{std::declval<P1>()})>::type;
};
template <typename P1>
using single_function_argument_t = typename single_function_argument_impl<P1>::type;
/// @brief Tag
struct parenthesis {};
/// @brief Tag
struct square_bracket {};
///
/// @brief Check if the string is balanced for open/close symbols (parenthesis,brackets)
///
///
/// @note Since parenthesis checking is a context-free grammar, it requires a stack.
/// Regex can not accomplish that since they do not have memory.
///
bool check_if_balanced(std::string_view input, char const& open = '(', char const& close = ')') {
int count = 0;
for (auto const& ch : input) {
if (ch == open)
count++;
if (ch == close)
count--;
// if a parenthesis is closed without being opened return false
if (count < 0)
return false;
}
// in the end the test is passed only if count is zero
return count == 0;
}
///
/// @brief Default comment removal policy: do not change anything
///
struct identity {
static std::string edit(const std::string s) { return s; }
};
///
/// @brief Class template, base for further specialization
///
template <class tag> struct is_balanced {};
///
/// @brief Specialization for parenthesis
///
template <> struct is_balanced<detail::parenthesis> {
static bool check(std::string_view s) { return check_if_balanced(s, '(', ')'); }
};
///
/// @brief Specialization for square bracket
///
template <> struct is_balanced<detail::square_bracket> {
static bool check(std::string_view s) { return check_if_balanced(s, '[', ']'); }
};
} // namespace detail
using namespace std::string_literals;
///
/// @brief Node names can be any character except blanks, colons, semicolons, parentheses, and square
/// brackets.
///
static inline std::vector<std::string> forbidden_labels = {" "s, ","s, ";"s, "("s, ")"s, "["s, "]"s};
///
/// @brief Underscore characters in unquoted labels are converted to blanks.
///
/// @detail Because you may want to include a blank in a name, it is assumed
/// that an underscore character ("_") stands for a blank; any of
/// these in a name will be converted to a blank when it is read in.
///
static inline constexpr char blank = '_';
///
/// @brief Template class.
///
template <unsigned int N> struct remove_comments_of_depth {};
///
/// @brief Do not remove anything
///
template <> struct remove_comments_of_depth<0> : detail::identity {};
///
/// @brief Remove all comments substrings contained between square brackets
///
/// @note Assumes that text is well formatted so there are no such things like [[]] or unclosed bracket
///
template <> struct remove_comments_of_depth<1> {
static std::string edit(const std::string s) {
if (s.empty())
return s;
return std::regex_replace(s, std::regex(R"(\[[^()]*\])"), "");
}
};
///
/// @brief Remove all comments substrings contained between square brackets of depth 2
///
/// @note Assumes that text is well formatted so there are no such things like [[]] or unclosed bracket
///
template <> struct remove_comments_of_depth<2> {
static std::string edit(const std::string s) {
std::string buffer;
int counter = 0;
for (auto const& ch : s) {
if (ch == '[')
counter++;
if (ch == ']')
counter--;
if (ch == '[' && counter == 2)
continue; // do nothing, that was the opening
if (ch == ']' && counter == 1)
continue; // do nothing, that was the closing
if (!(counter >= 2 || (counter == 1 && ch == ']')))
buffer.append(std::string(1, ch));
}
return buffer;
}
};
///
/// @brief Policy allowing to keep nested comments.
///
/// @note Use this as a template parameter to specialize a Newick generator policy
///
struct PAUP {
// return empty string
static inline std::string root_branch_length() { return ""; }
// do nothing
static inline std::string treat_comments(std::string& s) { return s; }
};
///
/// @brief Set a root node branch length to zero, allow comments of depth 1, but will remove nested
/// comments.
///
/// @note Use this as a template parameter to specialize a Newick generator policy
///
struct TreeAlign {
// Set explicit null branch length for root node
static inline std::string root_branch_length() { return ":0.0"; }
// Remove comments that are nested, keep comments of depth 1
static inline std::string treat_comments(const std::string s) {
return remove_comments_of_depth<2>::edit(s);
}
};
///
/// @brief Requires that an unrooted tree begin with a trifurcation; it will not "uproot" a rooted tree.
/// Allow comments of depth 1, but does not allow nested comments.
/// @note Use this as a template parameter to specialize a Newick generator policy
///
struct PHYLIP {
// Branch length for root node is not explicit.
static inline std::string root_branch_length() { return ""; }
// Remove comments that are nested, keep comments of depth 1
static inline std::string treat_comments(std::string& s) {
// Allow comments of depth 1, but does not allow nested comments.
return remove_comments_of_depth<2>::edit(s);
}
};
///
/// @brief Concept for label name: invocable and the return type is convertible to a string.
///
template <class F, class... Args>
concept Formattable =
std::invocable<F, Args...> && std::convertible_to<std::invoke_result_t<F, Args...>, std::string>;
///
/// @brief Generate the Newick formula from an external (custom) tree class.
///
/// @remark This is a non-intrusive interface implementation so users can reuse Newick formatting
/// logic and expose the formatting internals to their own Tree class's DFS.
template <class T, std::predicate<T> P1, std::predicate<T> P2, Formattable<T> F1, Formattable<T> F2,
class Policy = PAUP>
class generator : public Policy {
public:
///
/// @brief Type of node being formatted
///
using node_type = T;
///
/// @brief Type of formula being generated
///
using formula_type = std::string;
///
/// @brief Type of formula being generated
///
using policy_type = Policy;
private:
///
/// @brief End character.
///
static inline constexpr char _end = ';';
///
/// @brief The string formula to be updated.
///
mutable formula_type _formula;
///
/// @brief Functor inspecting if the node being visited has a parent.
///
P1 _has_parent;
///
/// @brief Functor inspecting if the node being visited has children.
///
P2 _has_children;
///
/// @brief Retrieve the name of the node being visited.
///
/// @detail A name can be any string of printable characters except blanks,
/// colons, semicolons, parentheses, and square brackets.
///
/// @remark Return type must be convertible to std::string
///
F1 _label;
///
/// @brief Retrieve the branch length immediately above the node being visited.
///
/// @details Branch lengths can be incorporated into a tree by putting a
/// real number, with or without decimal point, after a node and
/// preceded by a colon. This represents the length of the branch
/// immediately above that node (that is, distance to parent node)
/// @remark Return type must be convertible to std::string
///
F2 _branch_length;
void _pre_order(node_type const& node) const {
if (std::invoke(_has_children, node)) {
_formula += '(';
}
}
void _in_order(node_type const&) const { _formula += ","; }
void _post_order(node_type const& node) const {
if (std::invoke(_has_children, node)) {
if (_formula.back() == ',')
_formula.pop_back(); // Remove comma
_formula += ')';
}
if (std::invoke(_has_parent, node)) {
auto label = std::invoke(_label, node);
if (has_forbidden_characters(remove_comments_of_depth<1>::edit(label))) {
throw std::invalid_argument(std::string("Label with forbidden characters:") +
std::string(label));
}
_formula += label;
std::string branch(std::invoke(_branch_length, node));
if (!branch.empty()) {
_formula += ":";
_formula += branch;
}
} else {
_formula += std::invoke(_label, node);
_formula += policy_type::root_branch_length();
}
}
public:
///
/// @brief Constructor
///
generator(P1 has_parent, P2 has_children, F1 label, F2 branch_length, Policy pol = {})
: policy_type(std::move(pol))
, _has_parent(std::move(has_parent))
, _has_children(std::move(has_children))
, _label(std::move(label))
, _branch_length(std::move(branch_length)) {}
///
/// @brief Operation called in the general DFS algorithm to open a parenthesis if node has children to
/// be visited.
///
/// @param node the node currently visited
///
auto pre_order() {
return [this](node_type const& node) { this->_pre_order(node); };
}
///
/// @brief Operation called in the general DFS algorithm to add a comma between visited nodes.
///
auto in_order() {
return [this](node_type const& node) { this->_in_order(node); };
}
///
/// @brief Operation to be passed to a generic DFS algorithm to open a parenthesis if node has
/// children to be visited.
///
/// @param node the node currently visited
///
auto post_order() {
return [this](node_type const& node) { this->_post_order(node); };
}
///
/// @brief Check if a string contains characters forbidden by the standard
///
bool has_forbidden_characters(std::string const& s) const {
if (s.empty())
return false;
std::string forbidden = " ,;()[\\]";
bool is_forbidden = std::regex_search(s, std::regex("[" + forbidden + "]"));
return is_forbidden;
}
///
/// @brief Clear the formula buffer to refresh the generator.
///
void clear() { _formula.clear(); }
///
/// @brief Retrieve the formatted string of the given node in the specified format
///
std::string&& take_result() const {
_formula.push_back(this->_end);
_formula = policy_type::treat_comments(_formula);
if (detail::is_balanced<detail::parenthesis>::check(_formula) == false) {
throw std::runtime_error(std::string("Failed: formula parenthesis are not balanced:") +
_formula);
}
if (detail::is_balanced<detail::square_bracket>::check(_formula) == false) {
throw std::runtime_error(std::string("Failed: formula square brackets are not balanced:") +
_formula);
}
return std::move(_formula);
}
}; // end structure generator
///
/// @brief User-defined deduction guide where the node/graph type T is deduced from P1
/// @remark Template deduction guides are patterns associated with a template
/// class that tell the compiler how to translate a set of constructor
/// arguments (and their types) into template parameters for the class.
template <class P1, class P2, class F1, class F2, class Policy = PAUP>
generator(P1, P2, F1, F2, Policy pol = {})
-> generator<detail::single_function_argument_t<P1>, P1, P2, F1, F2, Policy>;
} // end namespace quetzal::format::newick
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/connected_components.hpp>
#include <boost/graph/graph_utility.hpp>
#include <boost/graph/graphviz.hpp>
#include <boost/property_map/function_property_map.hpp>
#include <iomanip>
// Simplistic tree for testing - emulates implementations found in my field
struct Node {
Node* parent = nullptr;
Node* left = nullptr;
Node* right = nullptr;
char data;
template <class Op1, class Op2, class Op3>
void depth_first_search(Op1 pre_order, Op2 in_order, Op3 post_order) {
pre_order(*this);
if (this->left != nullptr && this->right != nullptr) {
this->left->depth_first_search(pre_order, in_order, post_order);
in_order(*this);
this->right->depth_first_search(pre_order, in_order, post_order);
in_order(*this);
}
post_order(*this);
}
};
using Flavor = quetzal::format::newick::TreeAlign;
std::string legacy_implementation() {
/* Topology :
*
* a
* / \
* / c
* / / \
* b d e
*/
Node a, b, c, d, e;
a.data = 'a';
b.data = 'b';
c.data = 'c';
d.data = 'd';
e.data = 'e';
a.left = &b;
b.parent = &a;
a.right = &c;
c.parent = &a;
c.left = &d;
d.parent = &c;
c.right = &e;
e.parent = &c;
namespace newick = quetzal::format::newick;
// Interfacing Quetzal generator with non-quetzal tree types
std::predicate<Node> auto has_parent = [](Node const& n) { return n.parent; };
std::predicate<Node> auto has_children = [](Node const& n) { return n.left && n.right; };
// Random data is generated for the branch length
newick::Formattable<Node> auto branch_length = [](Node const&) /*-> std::string*/ { return "0.1"; };
// More sophisticated label formatting
newick::Formattable<Node> auto label = [](Node const& n) {
return std::string(1, n.data) + "[my[comment]]";
};
// Writes a root node branch length with a value of 0.0 and disable/remove nested comments
newick::generator generator(has_parent, has_children, label, branch_length, Flavor());
a.depth_first_search(generator.pre_order(), generator.in_order(), generator.post_order());
// We retrieve the formatted string
return generator.take_result();
}
//
// @brief Generate a Newick string from a k-ary tree with no properties attached to edges or vertices
//
std::string generate_from(quetzal::coalescence::k_ary_tree<> const& graph, auto flavor) {
using G = quetzal::coalescence::k_ary_tree<>;
using V = G::vertex_descriptor;
using E = G::edge_descriptor;
namespace newick = quetzal::format::newick;
// Data access
using P = std::function<bool(V)>;
using F = std::function<std::string(V)>;
P has_parent = [&graph](V v) { return graph.has_parent(v); };
P has_children = [&graph](V v) { return graph.has_children(v); };
F branch_length = [](V) { return "0.1"; };
F label = [](V v) {
std::string r = "a[my[comment]]";
r.front() += v;
return r;
};
// We declare a generator passing it the data interfaces
struct State {
newick::generator<V, P, P, F, F, decltype(flavor)> gen;
std::stack<int> nth_child;
} state{{has_parent, has_children, label, branch_length, flavor}, {}};
// We expose its interface to the boost DFS algorithm
struct VisWrap : boost::default_dfs_visitor {
State& state_;
VisWrap(State& ref) : state_(ref) {}
void discover_vertex(V v, G const&) const {
state_.nth_child.push(0);
state_.gen.pre_order()(v);
}
void finish_vertex(V v, G const&) const {
state_.gen.post_order()(v);
state_.nth_child.pop();
}
void tree_edge(E e, G const& g) const {
if (state_.nth_child.top()++ > 0)
state_.gen.in_order()(target(e, g));
}
} vis{state};
depth_first_search(graph, boost::visitor(vis));
return state.gen.take_result();
}
int main() {
std::string const legacy = legacy_implementation();
assert(legacy == "(b[my]:0.1,(d[my]:0.1,e[my]:0.1)c[my]:0.1)a[my]:0.0;");
// NOW WITH BGL GRAPHS
using G = quetzal::coalescence::k_ary_tree<>;
enum {a,b,c,d,e,N};
G tree(N);
add_edge(a, b, tree);
add_edge(a, c, tree);
add_edge(c, d, tree);
add_edge(c, e, tree);
// Generate the newick string
auto const bgl = generate_from(tree, Flavor());
std::cout << quoted(bgl) << "\n";
std::cout << quoted(legacy) << "\n";
std::cout << (bgl == legacy?"matching":"MISMATCH") << "\n";
}
印刷
"(b[my]:0.1,(d[my]:0.1,e[my]:0.1)c[my]:0.1)a[my]:0.0;"
"(b[my]:0.1,(d[my]:0.1,e[my]:0.1)c[my]:0.1)a[my]:0.0;"
matching
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.