2 * Copyright (C) 2016 J. Ali Harlow <ali@juiblex.co.uk>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 #include "types/types.h"
27 #include "razor-internal.h"
31 * Following RFC 3986 § 3.
32 * Note that we don't validate queries or fragments.
35 #define strdup0(s) ((s) ? strdup(s) : NULL)
37 #define string_str(str) ((char *)(str)->data)
39 #define string_init(str) do { \
42 _p = array_add(str, 1); \
46 #define string_append_len(str, s, len) do { \
48 _p = array_add(str, len); \
50 strncpy(_p, s, len); \
54 #define string_append(str, s) string_append_len(str, s, strlen(s))
56 #define string_truncate_at(str, s) do { \
59 (char *)(str)->data; \
61 (str)->size = _len + 1; \
65 static const char *skip_uri_scheme(const char *uri)
68 * RFC 3986 defines scheme as:
69 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
71 if (*uri >= 'a' && *uri <= 'z' || *uri >= 'A' && *uri <= 'Z') {
74 } while (is_alnum(*uri) || *uri == '+' || *uri == '-' ||
82 static char *razor_strndup(const char *s, size_t n)
86 if (memchr(s, '\0', n))
89 result = malloc(n + 1);
99 * Return the (possibly decoded) pchar or 0 on end-of-string or -1 on error
101 static int pchar_get_char_validated(const char *p)
107 else if (p[0]=='%') {
108 if (xdigit_value(p[1]) < 0)
110 c = xdigit_value(p[1]) * 16;
111 if (xdigit_value(p[2]) < 0)
113 c += xdigit_value(p[2]);
114 } else if (p[0] >= 'a' && p[0] <= 'z' || p[0] >= 'A' && p[0] <= 'Z' ||
115 p[0] >= '0' && p[0] <= '9' ||
116 strchr("-._~!$&'()*+,;=:@", p[0]))
126 * Verify the percent encoding. All '%' characters must be followed by
127 * exactly two hexadecimal digits.
129 static int pct_encoding_validate(const char *s)
133 if (xdigit_value(s[1]) < 0 || xdigit_value(s[2]) < 0)
144 static char *pct_encoding_normalize(char *s)
152 p = retval = malloc(strlen(s) + 1);
156 c = pchar_get_char(s);
157 if (is_unreserved(c))
161 *p++ = "0123456789ABCDEF"[c/16];
162 *p++ = "0123456789ABCDEF"[c%16];
171 return realloc(retval, p - retval);
174 static int validate_userinfo(const char *userinfo, struct razor_error **error)
178 for (s = userinfo; *s; s++) {
179 if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s)
181 razor_set_error(error, RAZOR_GENERAL_ERROR,
182 RAZOR_GENERAL_ERROR_BAD_URI, userinfo,
183 "Invalid URI userinfo");
191 static int validate_reg_name(const char *reg_name)
195 for (s = reg_name; *s; s++) {
196 if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s))
203 static int validate_ipv4address(const char *s, int length)
205 int count = 0, digits, octet;
215 if (*s < '1' || *s > '9')
220 for (digits = 1; digits < length; digits++) {
221 if (s[digits] >= '0' && s[digits] <= '9') {
223 octet += s[digits] - '0';
237 if (length < 1 || *s != '.')
244 return length ? -1 : 0;
247 static int count_ipv6_pieces(const char **s, int *length)
251 for (digits = 0; digits < 4 && digits < *length; digits++) {
252 if (!is_xdigit((*s)[digits]))
263 if (*length && **s == ':') {
266 count += count_ipv6_pieces(s, length);
276 static int validate_ip_literal(const char *ip_literal, int length)
279 int len, no_pieces, elide;
281 if (length >= 4 && ip_literal[0] == 'v') {
283 dot = strchr(ip_literal + 2, '.');
284 if (!dot || dot >= ip_literal + length)
286 for (s = ip_literal + 1; s < dot; s++) {
290 for (s = dot + 1; s < ip_literal + length; s++) {
291 if (!is_unreserved(*s) && !is_sub_delim(*s) && *s != ':')
298 no_pieces = count_ipv6_pieces(&s, &len);
300 if (len > 1 && s[0] == ':' && s[1] == ':') {
304 no_pieces += count_ipv6_pieces(&s, &len);
308 if (!validate_ipv4address(s, len))
313 if (no_pieces > 8 || no_pieces == 8 && elide || no_pieces < 1)
320 static int validate_host(const char *host, struct razor_error **error)
324 if (host[0] == '[' && host[strlen(host) - 1] == ']')
325 retval = validate_ip_literal(host + 1, strlen(host) - 2);
327 retval = validate_ipv4address(host, strlen(host));
329 retval = validate_reg_name(host);
333 razor_set_error(error, RAZOR_GENERAL_ERROR,
334 RAZOR_GENERAL_ERROR_BAD_URI, host,
340 static char *strdown(char *s)
343 if (*s >= 'A' && *s <= 'Z') {
353 static int razor_uri_parse_authority(struct razor_uri *ru,
354 const char *authority, int length,
355 struct razor_error **error)
357 const char *s, *auth = authority;
358 char *userinfo, *port, *host;
360 s = strchr(auth, '@');
361 if (s && s < auth + length) {
362 userinfo = razor_strndup(auth, s - auth);
367 if (validate_userinfo(userinfo, error)) {
374 s = strchr(auth, ':');
375 if (s && s < auth + length) {
377 port = razor_strndup(s, length - (s - auth));
381 if (strspn(port, "0123456789") != strlen(port)) {
382 razor_set_error(error, RAZOR_GENERAL_ERROR,
383 RAZOR_GENERAL_ERROR_BAD_URI, port,
392 host = razor_strndup(auth, length);
394 if (validate_host(host, error)) {
401 ru->userinfo = userinfo;
409 * Parse either a hier-part or a relative-part
411 static int razor_uri_parse_part(struct razor_uri *ru, const char *part,
412 int relative_part, struct razor_error **error)
414 const char *s, *hp = part;
418 if (hp[0] == '/' && hp[1] == '/') {
420 s = strpbrk(hp, "/?#");
423 if (razor_uri_parse_authority(ru, hp, s - hp, error) < 0)
434 ru->path = strdup("");
436 } else if (*hp == '/') {
438 p = path = malloc(strlen(hp) + 1);
443 ru->path = realloc(path, p - path);
446 } else if (!ru->host) {
447 /* path-rootless or path-noscheme */
448 noscheme = relative_part;
449 p = path = malloc(strlen(hp) + 1);
451 razor_set_error(error, RAZOR_GENERAL_ERROR,
452 RAZOR_GENERAL_ERROR_BAD_URI, part,
453 relative_part ? "Invalid URI relative part" :
454 "Invalid URI hierarchical part");
458 if (!is_pchar(*hp) || noscheme && *hp == ':') {
460 razor_set_error(error, RAZOR_GENERAL_ERROR,
461 RAZOR_GENERAL_ERROR_BAD_URI, part,
462 "Invalid character in URI path");
470 else if (!is_pchar(*hp) || noscheme && *hp == ':') {
472 razor_set_error(error, RAZOR_GENERAL_ERROR,
473 RAZOR_GENERAL_ERROR_BAD_URI, part,
474 "Invalid character in URI path");
482 ru->path = realloc(path, p - path);
487 void razor_uri_destroy(struct razor_uri *ru)
498 int razor_uri_parse_uri(struct razor_uri *ru, const char *uri, int absolute,
499 struct razor_error **error)
505 if (pct_encoding_validate(uri) < 0) {
506 razor_set_error(error, RAZOR_GENERAL_ERROR,
507 RAZOR_GENERAL_ERROR_BAD_URI, uri,
508 "Invalid percent encoding");
512 memset(ru, 0, sizeof(*ru));
514 s = skip_uri_scheme(uri);
516 razor_set_error(error, RAZOR_GENERAL_ERROR,
517 RAZOR_GENERAL_ERROR_BAD_URI, uri,
518 "Invalid URI scheme");
521 ru->scheme = razor_strndup(uri, s - uri);
524 s = strchr(uri, '?');
526 s = strchr(uri, '#');
528 s = uri + strlen(uri);
529 hier_part = razor_strndup(uri, s - uri);
532 r = razor_uri_parse_part(ru, hier_part, 0, error);
535 razor_uri_destroy(ru);
543 s = strchr(uri, '#');
545 s = uri + strlen(uri);
546 ru->query = razor_strndup(uri, s - uri);
553 razor_set_error(error, RAZOR_GENERAL_ERROR,
554 RAZOR_GENERAL_ERROR_BAD_URI, uri,
555 "Fragments are not allowed in absolute URIs");
556 razor_uri_destroy(ru);
560 ru->fragment = strdup(uri);
566 int razor_uri_parse_relative_ref(struct razor_uri *ru, const char *uri,
567 struct razor_error **error)
573 if (pct_encoding_validate(uri) < 0) {
574 razor_set_error(error, RAZOR_GENERAL_ERROR,
575 RAZOR_GENERAL_ERROR_BAD_URI, uri,
576 "Invalid percent encoding");
580 memset(ru, 0, sizeof(*ru));
582 s = strchr(uri, '?');
584 s = strchr(uri, '#');
586 s = uri + strlen(uri);
587 relative_part = razor_strndup(uri, s - uri);
590 r = razor_uri_parse_part(ru, relative_part, 1, error);
597 s = strchr(uri, '#');
599 s = uri + strlen(uri);
600 ru->query = razor_strndup(uri, s - uri);
607 ru->fragment = strdup(uri);
614 int razor_uri_parse(struct razor_uri *ru, const char *uri,
615 struct razor_error **error)
617 struct razor_error *tmp_error = NULL;
620 r = razor_uri_parse_uri(ru, uri, 0, &tmp_error);
622 r = razor_uri_parse_relative_ref(ru, uri, NULL);
624 razor_propagate_error(error, tmp_error, NULL);
626 razor_error_free(tmp_error);
633 * Following RFC 3986 § 5.2.4
635 static char *remove_dot_segments(const char *path)
638 char *input, *in, *s, *t;
642 fprintf(stderr, "STEP OUTPUT BUFFER INPUT BUFFER\n");
645 input = strdup(path);
647 string_init(&output);
650 fprintf(stderr, " 1 : %-21s %s\n", string_str(&output), in);
654 if (str_has_prefix(in, "../")) {
657 } else if (str_has_prefix(in, "./")) {
660 } else if (str_has_prefix(in, "/./")) {
663 } else if (!strcmp(in, "/.")) {
667 } else if (str_has_prefix(in, "/../")) {
670 s = strrchr(string_str(&output), '/');
672 s = string_str(&output);
673 string_truncate_at(&output, s);
674 } else if (!strcmp(in, "/..")) {
678 s = strrchr(string_str(&output), '/');
680 s = string_str(&output);
681 string_truncate_at(&output, s);
682 } else if (!strcmp(in, ".") || !strcmp(in, "..")) {
687 t = strchr(in + 1, '/');
690 string_append_len(&output, in, t - in);
694 fprintf(stderr, " %s: %-21s %s\n", step, string_str(&output),
700 return string_str(&output);
705 * Following RFC 3986 § 6.2.2
707 void razor_uri_normalize(struct razor_uri *ru)
715 s = pct_encoding_normalize(ru->userinfo);
719 s = pct_encoding_normalize(ru->host);
723 s = pct_encoding_normalize(ru->path);
727 s = pct_encoding_normalize(ru->query);
731 s = pct_encoding_normalize(ru->fragment);
735 s = remove_dot_segments(ru->path);
740 char *razor_uri_get_authority(const struct razor_uri *ru)
747 len += strlen(ru->userinfo) + 1;
748 len += strlen(ru->host);
750 len += strlen(ru->port) + 1;
754 r = result = malloc(len);
757 strcpy(r, ru->userinfo);
774 * Following RFC 3986 § 5.3
776 char *razor_uri_recompose(const struct razor_uri *ru)
778 char *authority, *result, *r;
781 authority = razor_uri_get_authority(ru);
784 len += strlen(ru->scheme) + 1;
786 len += strlen(authority) + 2;
787 len += strlen(ru->path);
789 len += strlen(ru->query) + 1;
791 len += strlen(ru->fragment) + 1;
793 r = result = malloc(len);
796 strcpy(r, ru->scheme);
804 strcpy(r, authority);
814 strcpy(r, ru->query);
820 strcpy(r, ru->fragment);
827 * Following RFC 3986 § 5.2.3
829 static char *merge_paths(const struct razor_uri *base,const struct razor_uri *R)
833 if (base->host && !*base->path)
834 path = razor_concat("/", R->path, NULL);
836 s = strrchr(base->path, '/');
838 t = razor_strndup(base->path, s + 1 - base->path);
839 path = razor_concat(t, R->path, NULL);
842 path = strdup(R->path);
849 * Following RFC 3986 § 5.2
851 void razor_uri_resolve(struct razor_uri *T, const struct razor_uri *base,
852 const struct razor_uri *R)
857 T->scheme = strdup(R->scheme);
858 T->userinfo = strdup0(R->userinfo);
859 T->host = strdup0(R->host);
860 T->port = strdup0(R->port);
861 T->path = remove_dot_segments(R->path);
862 T->query = strdup0(R->query);
865 T->userinfo = strdup0(R->userinfo);
866 T->host = strdup0(R->host);
867 T->port = strdup0(R->port);
868 T->path = remove_dot_segments(R->path);
869 T->query = strdup0(R->query);
872 T->path = strdup(base->path);
874 T->query = strdup(R->query);
876 T->query = strdup0(base->query);
879 T->path = remove_dot_segments(R->path);
881 s = merge_paths(base, R);
882 T->path = remove_dot_segments(s);
885 T->query = strdup0(R->query);
887 T->userinfo = strdup0(base->userinfo);
888 T->host = strdup0(base->host);
889 T->port = strdup0(base->port);
891 T->scheme = strdup(base->scheme);
893 T->fragment = strdup0(R->fragment);
897 * This differs from razor_uri_resolve() both in the types of its arguments
898 * and in the fact that it takes a root URI rather than a base URI. The base
899 * URI is determined by appending a slash to the root URI (if it doesn't
900 * already end in a slash). Finally, uri can be explicitly marked as either
901 * relative (ie., a relative-ref) or not (ie., a URI). This is important as
902 * otherwise "c:/xxx" could be interpreted as a URI in the "c" scheme.
904 char *razor_resolve_uri_root(const char *root_uri, const char *uri,
905 int is_relative, struct razor_error **error)
908 char *base_uri, *s, *result;
909 struct razor_uri ru, base, file;
911 if (!root_uri || !*root_uri)
914 if (root_uri[strlen(root_uri) - 1] == '/')
915 base_uri = strdup(root_uri);
917 base_uri = razor_concat(root_uri, "/", NULL);
919 r = razor_uri_parse_uri(&base, base_uri, 1, error);
924 if (is_relative > 0) {
926 * We can't use razor_uri_parse_relative_ref() to parse
927 * uri in case it starts with a segment that includes a
928 * colon. Thus we use this kludge.
930 s = razor_concat("scheme:", uri, NULL);
931 r = razor_uri_parse_uri(&file, s, 0, error);
938 else if (!is_relative)
939 r = razor_uri_parse_uri(&file, uri, 0, error);
941 r = razor_uri_parse(&file, uri, error);
943 razor_uri_destroy(&base);
947 razor_uri_resolve(&ru, &base, &file);
949 razor_uri_destroy(&base);
950 razor_uri_destroy(&file);
952 result = razor_uri_recompose(&ru);
954 razor_uri_destroy(&ru);