Ada 3.3.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_H
6#define ADA_URL_PATTERN_H
7
9#include "ada/expected.h"
10#include "ada/parser.h"
12
13#include <ostream>
14#include <string>
15#include <string_view>
16#include <unordered_map>
17#include <variant>
18#include <vector>
19
20#if ADA_TESTING
21#include <iostream>
22#endif // ADA_TESTING
23
24#if ADA_INCLUDE_URL_PATTERN
25namespace ada {
26
27enum class url_pattern_part_type : uint8_t {
28 // The part represents a simple fixed text string.
29 FIXED_TEXT,
30 // The part represents a matching group with a custom regular expression.
31 REGEXP,
32 // The part represents a matching group that matches code points up to the
33 // next separator code point. This is typically used for a named group like
34 // ":foo" that does not have a custom regular expression.
35 SEGMENT_WILDCARD,
36 // The part represents a matching group that greedily matches all code points.
37 // This is typically used for the "*" wildcard matching group.
38 FULL_WILDCARD,
39};
40
41enum class url_pattern_part_modifier : uint8_t {
42 // The part does not have a modifier.
43 none,
44 // The part has an optional modifier indicated by the U+003F (?) code point.
45 optional,
46 // The part has a "zero or more" modifier indicated by the U+002A (*) code
47 // point.
48 zero_or_more,
49 // The part has a "one or more" modifier indicated by the U+002B (+) code
50 // point.
51 one_or_more,
52};
53
54// @see https://urlpattern.spec.whatwg.org/#part
55class url_pattern_part {
56 public:
57 url_pattern_part(url_pattern_part_type _type, std::string&& _value,
58 url_pattern_part_modifier _modifier)
59 : type(_type), value(std::move(_value)), modifier(_modifier) {}
60
61 url_pattern_part(url_pattern_part_type _type, std::string&& _value,
62 url_pattern_part_modifier _modifier, std::string&& _name,
63 std::string&& _prefix, std::string&& _suffix)
64 : type(_type),
65 value(std::move(_value)),
66 modifier(_modifier),
67 name(std::move(_name)),
68 prefix(std::move(_prefix)),
69 suffix(std::move(_suffix)) {}
70 // A part has an associated type, a string, which must be set upon creation.
71 url_pattern_part_type type;
72 // A part has an associated value, a string, which must be set upon creation.
73 std::string value;
74 // A part has an associated modifier a string, which must be set upon
75 // creation.
76 url_pattern_part_modifier modifier;
77 // A part has an associated name, a string, initially the empty string.
78 std::string name{};
79 // A part has an associated prefix, a string, initially the empty string.
80 std::string prefix{};
81 // A part has an associated suffix, a string, initially the empty string.
82 std::string suffix{};
83
84 inline bool is_regexp() const noexcept;
85};
86
87// @see https://urlpattern.spec.whatwg.org/#options-header
88struct url_pattern_compile_component_options {
89 url_pattern_compile_component_options() = default;
90 explicit url_pattern_compile_component_options(
91 std::optional<char> new_delimiter = std::nullopt,
92 std::optional<char> new_prefix = std::nullopt)
93 : delimiter(new_delimiter), prefix(new_prefix) {}
94
95 inline std::string_view get_delimiter() const ada_warn_unused;
96 inline std::string_view get_prefix() const ada_warn_unused;
97
98 // @see https://urlpattern.spec.whatwg.org/#options-ignore-case
99 bool ignore_case = false;
100
101 static url_pattern_compile_component_options DEFAULT;
102 static url_pattern_compile_component_options HOSTNAME;
103 static url_pattern_compile_component_options PATHNAME;
104
105 private:
106 // @see https://urlpattern.spec.whatwg.org/#options-delimiter-code-point
107 std::optional<char> delimiter{};
108 // @see https://urlpattern.spec.whatwg.org/#options-prefix-code-point
109 std::optional<char> prefix{};
110};
111
112// The default options is an options struct with delimiter code point set to
113// the empty string and prefix code point set to the empty string.
114inline url_pattern_compile_component_options
115 url_pattern_compile_component_options::DEFAULT(std::nullopt, std::nullopt);
116
117// The hostname options is an options struct with delimiter code point set
118// "." and prefix code point set to the empty string.
119inline url_pattern_compile_component_options
120 url_pattern_compile_component_options::HOSTNAME('.', std::nullopt);
121
122// The pathname options is an options struct with delimiter code point set
123// "/" and prefix code point set to "/".
124inline url_pattern_compile_component_options
125 url_pattern_compile_component_options::PATHNAME('/', '/');
126
127// A struct providing the URLPattern matching results for a single
128// URL component. The URLPatternComponentResult is only ever used
129// as a member attribute of a URLPatternResult struct. The
130// URLPatternComponentResult API is defined as part of the URLPattern
131// specification.
132struct url_pattern_component_result {
133 std::string input;
134 std::unordered_map<std::string, std::optional<std::string>> groups;
135
136 bool operator==(const url_pattern_component_result&) const;
137
138#if ADA_TESTING
139 friend void PrintTo(const url_pattern_component_result& result,
140 std::ostream* os) {
141 *os << "input: '" << result.input << "', group: ";
142 for (const auto& group : result.groups) {
143 *os << "(" << group.first << ", " << group.second.value_or("undefined")
144 << ") ";
145 }
146 }
147#endif // ADA_TESTING
148};
149
150template <url_pattern_regex::regex_concept regex_provider>
151class url_pattern_component {
152 public:
153 url_pattern_component() = default;
154
155 // This function explicitly takes a std::string because it is moved.
156 // To avoid unnecessary copy, move each value while calling the constructor.
157 url_pattern_component(std::string&& new_pattern,
158 typename regex_provider::regex_type&& new_regexp,
159 std::vector<std::string>&& new_group_name_list,
160 bool new_has_regexp_groups)
161 : regexp(std::move(new_regexp)),
162 pattern(std::move(new_pattern)),
163 group_name_list(std::move(new_group_name_list)),
164 has_regexp_groups(new_has_regexp_groups) {}
165
166 // @see https://urlpattern.spec.whatwg.org/#compile-a-component
167 template <url_pattern_encoding_callback F>
168 static tl::expected<url_pattern_component, errors> compile(
169 std::string_view input, F& encoding_callback,
170 url_pattern_compile_component_options& options);
171
172 // @see https://urlpattern.spec.whatwg.org/#create-a-component-match-result
173 url_pattern_component_result create_component_match_result(
174 std::string&& input,
175 std::vector<std::optional<std::string>>&& exec_result);
176
177#if ADA_TESTING
178 friend void PrintTo(const url_pattern_component& component,
179 std::ostream* os) {
180 *os << "pattern: '" << component.pattern
181 << "', has_regexp_groups: " << component.has_regexp_groups
182 << "group_name_list: ";
183 for (const auto& name : component.group_name_list) {
184 *os << name << ", ";
185 }
186 }
187#endif // ADA_TESTING
188
189 typename regex_provider::regex_type regexp{};
190 std::string pattern{};
191 std::vector<std::string> group_name_list{};
192 bool has_regexp_groups = false;
193};
194
195// A URLPattern input can be either a string or a URLPatternInit object.
196// If it is a string, it must be a valid UTF-8 string.
197using url_pattern_input = std::variant<std::string_view, url_pattern_init>;
198
199// A struct providing the URLPattern matching results for all
200// components of a URL. The URLPatternResult API is defined as
201// part of the URLPattern specification.
202struct url_pattern_result {
203 std::vector<url_pattern_input> inputs;
204 url_pattern_component_result protocol;
205 url_pattern_component_result username;
206 url_pattern_component_result password;
207 url_pattern_component_result hostname;
208 url_pattern_component_result port;
209 url_pattern_component_result pathname;
210 url_pattern_component_result search;
211 url_pattern_component_result hash;
212};
213
214struct url_pattern_options {
215 bool ignore_case = false;
216
217#if ADA_TESTING
218 friend void PrintTo(const url_pattern_options& options, std::ostream* os) {
219 *os << "ignore_case: '" << options.ignore_case;
220 }
221#endif // ADA_TESTING
222};
223
224// URLPattern is a Web Platform standard API for matching URLs against a
225// pattern syntax (think of it as a regular expression for URLs). It is
226// defined in https://wicg.github.io/urlpattern.
227// More information about the URL Pattern syntax can be found at
228// https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
229//
230// We require all strings to be valid UTF-8: it is the user's responsibility
231// to ensure that the provided strings are valid UTF-8.
232template <url_pattern_regex::regex_concept regex_provider>
233class url_pattern {
234 public:
235 url_pattern() = default;
236
241 result<std::optional<url_pattern_result>> exec(
242 const url_pattern_input& input,
243 const std::string_view* base_url = nullptr);
244
249 result<bool> test(const url_pattern_input& input,
250 const std::string_view* base_url = nullptr);
251
256 result<std::optional<url_pattern_result>> match(
257 const url_pattern_input& input,
258 const std::string_view* base_url_string = nullptr);
259
260 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-protocol
261 [[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound;
262 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-username
263 [[nodiscard]] std::string_view get_username() const ada_lifetime_bound;
264 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-password
265 [[nodiscard]] std::string_view get_password() const ada_lifetime_bound;
266 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hostname
267 [[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound;
268 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-port
269 [[nodiscard]] std::string_view get_port() const ada_lifetime_bound;
270 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-pathname
271 [[nodiscard]] std::string_view get_pathname() const ada_lifetime_bound;
272 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-search
273 [[nodiscard]] std::string_view get_search() const ada_lifetime_bound;
274 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hash
275 [[nodiscard]] std::string_view get_hash() const ada_lifetime_bound;
276
277 // If ignoreCase is true, the JavaScript regular expression created for each
278 // pattern must use the `vi` flag. Otherwise, they must use the `v` flag.
279 [[nodiscard]] bool ignore_case() const;
280
281 // @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups
282 [[nodiscard]] bool has_regexp_groups() const;
283
284#if ADA_TESTING
285 friend void PrintTo(const url_pattern& c, std::ostream* os) {
286 *os << "protocol_component: '" << c.get_protocol() << ", ";
287 *os << "username_component: '" << c.get_username() << ", ";
288 *os << "password_component: '" << c.get_password() << ", ";
289 *os << "hostname_component: '" << c.get_hostname() << ", ";
290 *os << "port_component: '" << c.get_port() << ", ";
291 *os << "pathname_component: '" << c.get_pathname() << ", ";
292 *os << "search_component: '" << c.get_search() << ", ";
293 *os << "hash_component: '" << c.get_hash();
294 }
295#endif // ADA_TESTING
296
297 template <url_pattern_regex::regex_concept P>
298 friend tl::expected<url_pattern<P>, errors> parser::parse_url_pattern_impl(
299 std::variant<std::string_view, url_pattern_init>&& input,
300 const std::string_view* base_url, const url_pattern_options* options);
301
307 url_pattern_component<regex_provider> protocol_component{};
313 url_pattern_component<regex_provider> username_component{};
319 url_pattern_component<regex_provider> password_component{};
325 url_pattern_component<regex_provider> hostname_component{};
331 url_pattern_component<regex_provider> port_component{};
337 url_pattern_component<regex_provider> pathname_component{};
343 url_pattern_component<regex_provider> search_component{};
349 url_pattern_component<regex_provider> hash_component{};
355 bool ignore_case_ = false;
356};
357} // namespace ada
358#endif // ADA_INCLUDE_URL_PATTERN
359#endif
#define ada_lifetime_bound
#define ada_warn_unused
Definition common_defs.h:85
Definitions for user facing functions for parsing URL and it's components.
Definition ada_idna.h:13
errors
Definition errors.h:10
tl::expected< result_type, ada::errors > result
Definitions for the parser.
Declaration for the url_pattern_init implementation.