// // Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // // Official repository: https://github.com/boostorg/url // #ifndef BOOST_URL_DETAIL_ROUTER_IPP #define BOOST_URL_DETAIL_ROUTER_IPP #include "../router.hpp" #include #include #include #include #include #include #include #include #include namespace boost { namespace urls { namespace detail { // A path segment template class segment_template { enum class modifier : unsigned char { none, // {id?} optional, // {id*} star, // {id+} plus }; std::string str_; bool is_literal_ = true; modifier modifier_ = modifier::none; friend struct segment_template_rule_t; public: segment_template() = default; bool match(pct_string_view seg) const; core::string_view string() const { return str_; } core::string_view id() const; bool empty() const { return str_.empty(); } bool is_literal() const { return is_literal_; } bool has_modifier() const { return !is_literal() && modifier_ != modifier::none; } bool is_optional() const { return modifier_ == modifier::optional; } bool is_star() const { return modifier_ == modifier::star; } bool is_plus() const { return modifier_ == modifier::plus; } friend bool operator==( segment_template const& a, segment_template const& b) { if (a.is_literal_ != b.is_literal_) return false; if (a.is_literal_) return a.str_ == b.str_; return a.modifier_ == b.modifier_; } // segments have precedence: // - literal // - unique // - optional // - plus // - star friend bool operator<( segment_template const& a, segment_template const& b) { if (b.is_literal()) return false; if (a.is_literal()) return !b.is_literal(); return a.modifier_ < b.modifier_; } }; // A segment template is either a literal string // or a replacement field (as in a format_string). // Fields cannot contain format specs and might // have one of the following modifiers: // - ?: optional segment // - *: zero or more segments // - +: one or more segments struct segment_template_rule_t { using value_type = segment_template; system::result parse( char const*& it, char const* end ) const noexcept; }; constexpr auto segment_template_rule = segment_template_rule_t{}; constexpr auto path_template_rule = grammar::tuple_rule( grammar::squelch( grammar::optional_rule( grammar::delim_rule('/'))), grammar::range_rule( segment_template_rule, grammar::tuple_rule( grammar::squelch(grammar::delim_rule('/')), segment_template_rule))); bool segment_template:: match(pct_string_view seg) const { if (is_literal_) return *seg == str_; // other nodes match any string return true; } core::string_view segment_template:: id() const { // if (is_literal_) return {}; BOOST_ASSERT(!is_literal()); core::string_view r = {str_}; r.remove_prefix(1); r.remove_suffix(1); if (r.ends_with('?') || r.ends_with('+') || r.ends_with('*')) r.remove_suffix(1); return r; } auto segment_template_rule_t:: parse( char const*& it, char const* end) const noexcept -> system::result { segment_template t; if (it != end && *it == '{') { // replacement field auto it0 = it; ++it; auto send = grammar::find_if( it, end, grammar::lut_chars('}')); if (send != end) { core::string_view s(it, send); static constexpr auto modifiers_cs = grammar::lut_chars("?*+"); static constexpr auto id_rule = grammar::tuple_rule( grammar::optional_rule( arg_id_rule), grammar::optional_rule( grammar::delim_rule(modifiers_cs))); if (s.empty() || grammar::parse(s, id_rule)) { it = send + 1; t.str_ = core::string_view(it0, send + 1); t.is_literal_ = false; if (s.ends_with('?')) t.modifier_ = segment_template::modifier::optional; else if (s.ends_with('*')) t.modifier_ = segment_template::modifier::star; else if (s.ends_with('+')) t.modifier_ = segment_template::modifier::plus; return t; } } it = it0; } // literal segment auto rv = grammar::parse( it, end, urls::detail::segment_rule); BOOST_ASSERT(rv); rv->decode({}, urls::string_token::assign_to(t.str_)); t.is_literal_ = true; return t; } // a small vector for child nodes... // we shouldn't expect many children per node, and // we don't want to allocate for that. But we also // cannot cap the max number of child nodes because // especially the root nodes can potentially an // exponentially higher number of child nodes. class child_idx_vector { static constexpr std::size_t N = 5; std::size_t static_child_idx_[N]{}; std::size_t* child_idx{nullptr}; std::size_t size_{0}; std::size_t cap_{0}; public: ~child_idx_vector() { delete[] child_idx; } child_idx_vector() = default; child_idx_vector(child_idx_vector const& other) : size_{other.size_} , cap_{other.cap_} { if (other.child_idx) { child_idx = new std::size_t[cap_]; std::memcpy(child_idx, other.child_idx, size_ * sizeof(std::size_t)); return; } std::memcpy(static_child_idx_, other.static_child_idx_, size_ * sizeof(std::size_t)); } child_idx_vector(child_idx_vector&& other) : child_idx{other.child_idx} , size_{other.size_} , cap_{other.cap_} { std::memcpy(static_child_idx_, other.static_child_idx_, N); other.child_idx = nullptr; } bool empty() const { return size_ == 0; } std::size_t size() const { return size_; } std::size_t* begin() { if (child_idx) return child_idx; return static_child_idx_; } std::size_t* end() { return begin() + size_; } std::size_t const* begin() const { if (child_idx) return child_idx; return static_child_idx_; } std::size_t const* end() const { return begin() + size_; } void erase(std::size_t* it) { BOOST_ASSERT(it - begin() >= 0); std::memmove(it - 1, it, end() - it); --size_; } void push_back(std::size_t v) { if (size_ == N && !child_idx) { child_idx = new std::size_t[N*2]; cap_ = N*2; std::memcpy(child_idx, static_child_idx_, N * sizeof(std::size_t)); } else if (child_idx && size_ == cap_) { auto* tmp = new std::size_t[cap_*2]; std::memcpy(tmp, child_idx, cap_ * sizeof(std::size_t)); delete[] child_idx; child_idx = tmp; cap_ = cap_*2; } begin()[size_++] = v; } }; // A node in the resource tree // Each segment in the resource tree might be // associated with struct node { static constexpr std::size_t npos{std::size_t(-1)}; // literal segment or replacement field detail::segment_template seg{}; // A pointer to the resource router_base::any_resource const* resource{nullptr}; // The complete match for the resource std::string path_template; // Index of the parent node in the // implementation pool of nodes std::size_t parent_idx{npos}; // Index of child nodes in the pool detail::child_idx_vector child_idx; }; class impl { // Pool of nodes in the resource tree std::vector nodes_; public: impl() { // root node with no resource nodes_.push_back(node{}); } ~impl() { for (auto &r: nodes_) delete r.resource; } // include a node for a resource void insert_impl( core::string_view path, router_base::any_resource const* v); // match a node and return the element router_base::any_resource const* find_impl( segments_encoded_view path, core::string_view*& matches, core::string_view*& ids) const; private: // try to match from this root node node const* try_match( segments_encoded_view::const_iterator it, segments_encoded_view::const_iterator end, node const* root, int level, core::string_view*& matches, core::string_view*& ids) const; // check if a node has a resource when we // also consider optional paths through // the child nodes. static node const* find_optional_resource( const node* root, std::vector const& ns, core::string_view*& matches, core::string_view*& ids); }; node const* impl:: find_optional_resource( const node* root, std::vector const& ns, core::string_view*& matches, core::string_view*& ids) { BOOST_ASSERT(root); if (root->resource) return root; BOOST_ASSERT(!root->child_idx.empty()); for (auto i: root->child_idx) { auto& c = ns[i]; if (!c.seg.is_optional() && !c.seg.is_star()) continue; // Child nodes are also // potentially optional. auto matches0 = matches; auto ids0 = ids; *matches++ = {}; *ids++ = c.seg.id(); auto n = find_optional_resource( &c, ns, matches, ids); if (n) return n; matches = matches0; ids = ids0; } return nullptr; } void impl:: insert_impl( core::string_view path, router_base::any_resource const* v) { // Parse dynamic route segments if (path.starts_with("/")) path.remove_prefix(1); auto segsr = grammar::parse(path, detail::path_template_rule); if (!segsr) { delete v; segsr.value(); } auto segs = *segsr; auto it = segs.begin(); auto end = segs.end(); // Iterate existing nodes node* cur = &nodes_.front(); int level = 0; while (it != end) { core::string_view seg = (*it).string(); if (seg == ".") { ++it; continue; } if (seg == "..") { // discount unmatched leaf or // keep track of levels behind root if (cur == &nodes_.front()) { --level; ++it; continue; } // move to parent deleting current // if it carries no resource std::size_t p_idx = cur->parent_idx; if (cur == &nodes_.back() && !cur->resource && cur->child_idx.empty()) { node* p = &nodes_[p_idx]; std::size_t cur_idx = cur - nodes_.data(); p->child_idx.erase( std::remove( p->child_idx.begin(), p->child_idx.end(), cur_idx)); nodes_.pop_back(); } cur = &nodes_[p_idx]; ++it; continue; } // discount unmatched root parent if (level < 0) { ++level; ++it; continue; } // look for child auto cit = std::find_if( cur->child_idx.begin(), cur->child_idx.end(), [this, &it](std::size_t ci) -> bool { return nodes_[ci].seg == *it; }); if (cit != cur->child_idx.end()) { // move to existing child cur = &nodes_[*cit]; } else { // create child if it doesn't exist node child; child.seg = *it; std::size_t cur_id = cur - nodes_.data(); child.parent_idx = cur_id; nodes_.push_back(std::move(child)); nodes_[cur_id].child_idx.push_back(nodes_.size() - 1); if (nodes_[cur_id].child_idx.size() > 1) { // keep nodes sorted auto& cs = nodes_[cur_id].child_idx; std::size_t n = cs.size() - 1; while (n) { if (nodes_[cs.begin()[n]].seg < nodes_[cs.begin()[n - 1]].seg) std::swap(cs.begin()[n], cs.begin()[n - 1]); else break; --n; } } cur = &nodes_.back(); } ++it; } if (level != 0) { delete v; urls::detail::throw_invalid_argument(); } cur->resource = v; cur->path_template = path; } node const* impl:: try_match( segments_encoded_view::const_iterator it, segments_encoded_view::const_iterator end, node const* cur, int level, core::string_view*& matches, core::string_view*& ids) const { while (it != end) { pct_string_view s = *it; if (*s == ".") { // ignore segment ++it; continue; } if (*s == "..") { // move back to the parent node ++it; if (level <= 0 && cur != &nodes_.front()) { if (!cur->seg.is_literal()) { --matches; --ids; } cur = &nodes_[cur->parent_idx]; } else // there's no parent, so we // discount that from the implicit // tree beyond terminals --level; continue; } // we are in the implicit tree above the // root, so discount that as a level if (level < 0) { ++level; ++it; continue; } // calculate the lower bound on the // possible number of branches to // determine if we need to branch. // We branch when we might have more than // one child matching node at this level. // If so, we need to potentially branch // to find which path leads to a valid // resource. Otherwise, we can just // consume the node and input without // any recursive function calls. bool branch = false; if (cur->child_idx.size() > 1) { int branches_lb = 0; for (auto i: cur->child_idx) { auto& c = nodes_[i]; if (c.seg.is_literal() || !c.seg.has_modifier()) { // a literal path counts only // if it matches branches_lb += c.seg.match(s); } else { // everything not matching // a single path counts as // more than one path already branches_lb = 2; } if (branches_lb > 1) { // already know we need to // branch branch = true; break; } } } // attempt to match each child node node const* r = nullptr; bool match_any = false; for (auto i: cur->child_idx) { auto& c = nodes_[i]; if (c.seg.match(s)) { if (c.seg.is_literal()) { // just continue from the // next segment if (branch) { r = try_match( std::next(it), end, &c, level, matches, ids); if (r) break; } else { cur = &c; match_any = true; break; } } else if (!c.seg.has_modifier()) { // just continue from the // next segment if (branch) { auto matches0 = matches; auto ids0 = ids; *matches++ = *it; *ids++ = c.seg.id(); r = try_match( std::next(it), end, &c, level, matches, ids); if (r) { break; } else { // rewind matches = matches0; ids = ids0; } } else { // only path possible *matches++ = *it; *ids++ = c.seg.id(); cur = &c; match_any = true; break; } } else if (c.seg.is_optional()) { // attempt to match by ignoring // and not ignoring the segment. // we first try the complete // continuation consuming the // input, which is the // longest and most likely // match auto matches0 = matches; auto ids0 = ids; *matches++ = *it; *ids++ = c.seg.id(); r = try_match( std::next(it), end, &c, level, matches, ids); if (r) break; // rewind matches = matches0; ids = ids0; // try complete continuation // consuming no segment *matches++ = {}; *ids++ = c.seg.id(); r = try_match( it, end, &c, level, matches, ids); if (r) break; // rewind matches = matches0; ids = ids0; } else { // check if the next segments // won't send us to a parent // directory auto first = it; std::size_t ndotdot = 0; std::size_t nnondot = 0; auto it1 = it; while (it1 != end) { if (*it1 == "..") { ++ndotdot; if (ndotdot >= (nnondot + c.seg.is_star())) break; } else if (*it1 != ".") { ++nnondot; } ++it1; } if (it1 != end) break; // attempt to match many // segments auto matches0 = matches; auto ids0 = ids; *matches++ = *it; *ids++ = c.seg.id(); // if this is a plus seg, we // already consumed the first // segment if (c.seg.is_plus()) { ++first; } // {*} is usually the last // match in a path. // try complete continuation // match for every subrange // from {last, last} to // {first, last}. // We also try {last, last} // first because it is the // longest match. auto start = end; while (start != first) { r = try_match( start, end, &c, level, matches, ids); if (r) { core::string_view prev = *std::prev(start); *matches0 = { matches0->data(), prev.data() + prev.size()}; break; } matches = matches0 + 1; ids = ids0 + 1; --start; } if (r) { break; } // start == first matches = matches0 + 1; ids = ids0 + 1; r = try_match( start, end, &c, level, matches, ids); if (r) { if (!c.seg.is_plus()) *matches0 = {}; break; } } } } // r represent we already found a terminal // node which is a match if (r) return r; // if we couldn't match anything, we go // one level up in the implicit tree // because the path might still have a // "..". if (!match_any) ++level; ++it; } if (level != 0) { // the path ended below or above an // existing node return nullptr; } if (!cur->resource) { // we consumed all the input and reached // a node with no resource, but it might // still have child optional segments // with resources we can reach without // consuming any input return find_optional_resource( cur, nodes_, matches, ids); } return cur; } router_base::any_resource const* impl:: find_impl( segments_encoded_view path, core::string_view*& matches, core::string_view*& ids) const { // parse_path is inconsistent for empty paths if (path.empty()) path = segments_encoded_view("./"); // Iterate nodes from the root node const*p = try_match( path.begin(), path.end(), &nodes_.front(), 0, matches, ids); if (p) return p->resource; return nullptr; } router_base:: router_base() : impl_(new impl{}) {} router_base:: ~router_base() { delete reinterpret_cast(impl_); } void router_base:: insert_impl( core::string_view s, any_resource const* v) { reinterpret_cast(impl_) ->insert_impl(s, v); } auto router_base:: find_impl( segments_encoded_view path, core::string_view*& matches, core::string_view*& ids) const noexcept -> any_resource const* { return reinterpret_cast(impl_) ->find_impl(path, matches, ids); } } // detail } // urls } // boost #endif