/* * Copyright (C) 2016 J. Ali Harlow * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "config.h" #undef DEBUG #include #include #include "razor.h" #include "types/types.h" #include "razor-internal.h" #include "uri.h" /* * Following RFC 3986 § 3. * Note that we don't validate queries or fragments. */ #define strdup0(s) ((s) ? strdup(s) : NULL) #define string_str(str) ((char *)(str)->data) #define string_init(str) do { \ char *_p; \ array_init(str); \ _p = array_add(str, 1); \ *_p = '\0'; \ } while(0) #define string_append_len(str, s, len) do { \ char *_p; \ _p = array_add(str, len); \ _p--; \ strncpy(_p, s, len); \ _p[(len)] = '\0'; \ } while(0) #define string_append(str, s) string_append_len(str, s, strlen(s)) #define string_truncate_at(str, s) do { \ int _len; \ _len = (s) - \ (char *)(str)->data; \ *(s) = '\0'; \ (str)->size = _len + 1; \ } while(0) static const char *skip_uri_scheme(const char *uri) { /* * RFC 3986 defines scheme as: * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ if (*uri >= 'a' && *uri <= 'z' || *uri >= 'A' && *uri <= 'Z') { do { uri++; } while (is_alnum(*uri) || *uri == '+' || *uri == '-' || *uri == '.'); if (*uri == ':') return uri; } return NULL; } static char *razor_strndup(const char *s, size_t n) { char *result; if (memchr(s, '\0', n)) result = strdup(s); else { result = malloc(n + 1); memcpy(result, s, n); result[n] = '\0'; } return result; } #if 0 /* * Return the (possibly decoded) pchar or 0 on end-of-string or -1 on error */ static int pchar_get_char_validated(const char *p) { int c; if (p[0]=='\0') c = 0; else if (p[0]=='%') { if (xdigit_value(p[1]) < 0) return -1; c = xdigit_value(p[1]) * 16; if (xdigit_value(p[2]) < 0) return -1; c += xdigit_value(p[2]); } else if (p[0] >= 'a' && p[0] <= 'z' || p[0] >= 'A' && p[0] <= 'Z' || p[0] >= '0' && p[0] <= '9' || strchr("-._~!$&'()*+,;=:@", p[0])) c = p[0]; else c = -1; return c; } #endif /* * Verify the percent encoding. All '%' characters must be followed by * exactly two hexadecimal digits. */ static int pct_encoding_validate(const char *s) { while (*s) { if (*s == '%') { if (xdigit_value(s[1]) < 0 || xdigit_value(s[2]) < 0) return -1; s += 2; } s++; } return 0; } static char *pct_encoding_normalize(char *s) { char *retval, *p; int c; if (!s) return NULL; p = retval = malloc(strlen(s) + 1); while (*s) { if (*s == '%') { c = pchar_get_char(s); if (is_unreserved(c)) *p++ = c; else { *p++ = '%'; *p++ = "0123456789ABCDEF"[c/16]; *p++ = "0123456789ABCDEF"[c%16]; } pchar_next_char(s); } else *p++ = *s++; } *p++ = '\0'; return realloc(retval, p - retval); } static int validate_userinfo(const char *userinfo, struct razor_error **error) { const char *s; for (s = userinfo; *s; s++) { if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s) && *s != ':') { razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, userinfo, "Invalid URI userinfo"); return -1; } } return 0; } static int validate_reg_name(const char *reg_name) { const char *s; for (s = reg_name; *s; s++) { if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s)) return -1; } return 0; } static int validate_ipv4address(const char *s, int length) { int count = 0, digits, octet; for (;;) { if (!length) return -1; if (*s == '0') { digits = 1; octet = 0; } else { if (*s < '1' || *s > '9') return -1; octet = *s - '0'; for (digits = 1; digits < length; digits++) { if (s[digits] >= '0' && s[digits] <= '9') { octet *= 10; octet += s[digits] - '0'; if (octet > 255) return -1; } else break; } } s += digits; length -= digits; if (++count == 4) break; if (length < 1 || *s != '.') return -1; s++; length--; } return length ? -1 : 0; } static int count_ipv6_pieces(const char **s, int *length) { int count, digits; for (digits = 0; digits < 4 && digits < *length; digits++) { if (!is_xdigit((*s)[digits])) break; } if (!digits) return 0; (*s) += digits; (*length) -= digits; count = 1; if (*length && **s == ':') { (*s)++; (*length)--; count += count_ipv6_pieces(s, length); if (count == 1) { (*s)--; (*length)++; } } return count; } static int validate_ip_literal(const char *ip_literal, int length) { const char *s, *dot; int len, no_pieces, elide; if (length >= 4 && ip_literal[0] == 'v') { /* IPvFuture */ dot = strchr(ip_literal + 2, '.'); if (!dot || dot >= ip_literal + length) return -1; for (s = ip_literal + 1; s < dot; s++) { if (!is_xdigit(*s)) return -1; } for (s = dot + 1; s < ip_literal + length; s++) { if (!is_unreserved(*s) && !is_sub_delim(*s) && *s != ':') return -1; } } else { /* IPv6address */ s = ip_literal; len = length; no_pieces = count_ipv6_pieces(&s, &len); if (len > 1 && s[0] == ':' && s[1] == ':') { s += 2; len -= 2; elide = 1; no_pieces += count_ipv6_pieces(&s, &len); } else elide = 0; if (!validate_ipv4address(s, len)) no_pieces += 2; else if (len) return -1; if (no_pieces > 8 || no_pieces == 8 && elide || no_pieces < 1) return -1; } return 0; } static int validate_host(const char *host, struct razor_error **error) { int retval; if (host[0] == '[' && host[strlen(host) - 1] == ']') retval = validate_ip_literal(host + 1, strlen(host) - 2); else { retval = validate_ipv4address(host, strlen(host)); if (retval < 0) retval = validate_reg_name(host); } if (retval) razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, host, "Invalid URI host"); return retval; } static char *strdown(char *s) { while (*s) { if (*s >= 'A' && *s <= 'Z') { *s -= 'A'; *s += 'a'; } s++; } return s; } static int razor_uri_parse_authority(struct razor_uri *ru, const char *authority, int length, struct razor_error **error) { const char *s, *auth = authority; char *userinfo, *port, *host; s = strchr(auth, '@'); if (s && s < auth + length) { userinfo = razor_strndup(auth, s - auth); s++; length -= s - auth; auth = s; if (validate_userinfo(userinfo, error)) { free(userinfo); return -1; } } else userinfo = NULL; s = strchr(auth, ':'); if (s && s < auth + length) { s++; port = razor_strndup(s, length - (s - auth)); s--; length = s - auth; if (strspn(port, "0123456789") != strlen(port)) { razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, port, "Invalid URI port"); free(userinfo); free(port); return -1; } } else port = NULL; host = razor_strndup(auth, length); if (validate_host(host, error)) { free(userinfo); free(port); free(host); return -1; } ru->userinfo = userinfo; ru->port = port; ru->host = host; return 0; } /* * Parse either a hier-part or a relative-part */ static int razor_uri_parse_part(struct razor_uri *ru, const char *part, int relative_part, struct razor_error **error) { const char *s, *hp = part; char *path, *p; int noscheme = 0; if (hp[0] == '/' && hp[1] == '/') { hp += 2; s = strpbrk(hp, "/?#"); if (!s) s = hp + strlen(hp); if (razor_uri_parse_authority(ru, hp, s - hp, error) < 0) return -1; hp = s; } else { ru->userinfo = NULL; ru->host = NULL; ru->port = NULL; } if (!*hp) { /* path-empty */ ru->path = strdup(""); return 0; } else if (*hp == '/') { /* path-absolute */ p = path = malloc(strlen(hp) + 1); *p++ = '/'; hp++; if (!*hp) { *p++ = '\0'; ru->path = realloc(path, p - path); return 0; } } else if (!ru->host) { /* path-rootless or path-noscheme */ noscheme = relative_part; p = path = malloc(strlen(hp) + 1); } else { razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, part, relative_part ? "Invalid URI relative part" : "Invalid URI hierarchical part"); return -1; } if (!is_pchar(*hp) || noscheme && *hp == ':') { free(path); razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, part, "Invalid character in URI path"); return -1; } *p++ = *hp++; while (*hp) { if (*hp == '/') noscheme = 0; else if (!is_pchar(*hp) || noscheme && *hp == ':') { free(path); razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, part, "Invalid character in URI path"); return -1; } *p++ = *hp++; } *p++ = '\0'; ru->path = realloc(path, p - path); return 0; } void razor_uri_destroy(struct razor_uri *ru) { free(ru->scheme); free(ru->userinfo); free(ru->host); free(ru->port); free(ru->path); free(ru->query); free(ru->fragment); } int razor_uri_parse_uri(struct razor_uri *ru, const char *uri, int absolute, struct razor_error **error) { int r; const char *s; char *hier_part; if (pct_encoding_validate(uri) < 0) { razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, uri, "Invalid percent encoding"); return -1; } memset(ru, 0, sizeof(*ru)); s = skip_uri_scheme(uri); if (!s) { razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, uri, "Invalid URI scheme"); return -1; } ru->scheme = razor_strndup(uri, s - uri); uri = s + 1; s = strchr(uri, '?'); if (!s) s = strchr(uri, '#'); if (!s) s = uri + strlen(uri); hier_part = razor_strndup(uri, s - uri); uri = s; r = razor_uri_parse_part(ru, hier_part, 0, error); free(hier_part); if (r) { razor_uri_destroy(ru); return -1; } if (*uri != '?') ru->query = NULL; else { uri++; s = strchr(uri, '#'); if (!s) s = uri + strlen(uri); ru->query = razor_strndup(uri, s - uri); uri = s; } if (*uri != '#') ru->fragment = NULL; else if (absolute) { razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, uri, "Fragments are not allowed in absolute URIs"); razor_uri_destroy(ru); return -1; } else { uri++; ru->fragment = strdup(uri); } return 0; } int razor_uri_parse_relative_ref(struct razor_uri *ru, const char *uri, struct razor_error **error) { int r; const char *s; char *relative_part; if (pct_encoding_validate(uri) < 0) { razor_set_error(error, RAZOR_GENERAL_ERROR, RAZOR_GENERAL_ERROR_BAD_URI, uri, "Invalid percent encoding"); return -1; } memset(ru, 0, sizeof(*ru)); s = strchr(uri, '?'); if (!s) s = strchr(uri, '#'); if (!s) s = uri + strlen(uri); relative_part = razor_strndup(uri, s - uri); uri = s; r = razor_uri_parse_part(ru, relative_part, 1, error); free(relative_part); if (r) return -1; if (*uri == '?') { uri++; s = strchr(uri, '#'); if (!s) s = uri + strlen(uri); ru->query = razor_strndup(uri, s - uri); uri = s; } else ru->query = NULL; if (*uri == '#') { uri++; ru->fragment = strdup(uri); } else ru->fragment = NULL; return 0; } int razor_uri_parse(struct razor_uri *ru, const char *uri, struct razor_error **error) { struct razor_error *tmp_error = NULL; int r; r = razor_uri_parse_uri(ru, uri, 0, &tmp_error); if (r < 0) { r = razor_uri_parse_relative_ref(ru, uri, NULL); if (r < 0) razor_propagate_error(error, tmp_error, NULL); else razor_error_free(tmp_error); } return r; } /* * Following RFC 3986 § 5.2.4 */ static char *remove_dot_segments(const char *path) { struct array output; char *input, *in, *s, *t; const char *step; #ifdef DEBUG fprintf(stderr, "STEP OUTPUT BUFFER INPUT BUFFER\n"); #endif input = strdup(path); in = input; string_init(&output); #ifdef DEBUG fprintf(stderr, " 1 : %-21s %s\n", string_str(&output), in); #endif while (*in) { if (str_has_prefix(in, "../")) { step = "2A"; in += 3; } else if (str_has_prefix(in, "./")) { step = "2A"; in += 2; } else if (str_has_prefix(in, "/./")) { step = "2B"; in += 2; } else if (!strcmp(in, "/.")) { step = "2B"; in++; *in = '/'; } else if (str_has_prefix(in, "/../")) { step = "2C"; in += 3; s = strrchr(string_str(&output), '/'); if (!s) s = string_str(&output); string_truncate_at(&output, s); } else if (!strcmp(in, "/..")) { step = "2C"; in += 2; *in = '/'; s = strrchr(string_str(&output), '/'); if (!s) s = string_str(&output); string_truncate_at(&output, s); } else if (!strcmp(in, ".") || !strcmp(in, "..")) { step = "2D"; in += strlen(in); } else { step = "2E"; t = strchr(in + 1, '/'); if (!t) t = in + strlen(in); string_append_len(&output, in, t - in); in = t; } #ifdef DEBUG fprintf(stderr, " %s: %-21s %s\n", step, string_str(&output), in); #endif } free(input); return string_str(&output); } /* * Following RFC 3986 § 6.2.2 */ void razor_uri_normalize(struct razor_uri *ru) { char *s; strdown(ru->scheme); if (ru->host) strdown(ru->host); s = pct_encoding_normalize(ru->userinfo); free(ru->userinfo); ru->userinfo = s; s = pct_encoding_normalize(ru->host); free(ru->host); ru->host = s; s = pct_encoding_normalize(ru->path); free(ru->path); ru->path = s; s = pct_encoding_normalize(ru->query); free(ru->query); ru->query = s; s = pct_encoding_normalize(ru->fragment); free(ru->fragment); ru->fragment = s; s = remove_dot_segments(ru->path); free(ru->path); ru->path = s; } char *razor_uri_get_authority(const struct razor_uri *ru) { char *result, *r; int len = 1; if (ru->host) { if (ru->userinfo) len += strlen(ru->userinfo) + 1; len += strlen(ru->host); if (ru->port) len += strlen(ru->port) + 1; } else return NULL; r = result = malloc(len); if (ru->userinfo) { strcpy(r, ru->userinfo); r += strlen(r); *r++ = '@'; } strcpy(r, ru->host); r += strlen(r); if (ru->port) { *r++ = ':'; strcpy(r, ru->port); } return result; } /* * Following RFC 3986 § 5.3 */ char *razor_uri_recompose(const struct razor_uri *ru) { char *authority, *result, *r; int len = 1; authority = razor_uri_get_authority(ru); if (ru->scheme) len += strlen(ru->scheme) + 1; if (authority) len += strlen(authority) + 2; len += strlen(ru->path); if (ru->query) len += strlen(ru->query) + 1; if (ru->fragment) len += strlen(ru->fragment) + 1; r = result = malloc(len); if (ru->scheme) { strcpy(r, ru->scheme); r += strlen(r); *r++ = ':'; } if (authority) { *r++ = '/'; *r++ = '/'; strcpy(r, authority); free(authority); r += strlen(r); } strcpy(r, ru->path); r += strlen(r); if (ru->query) { *r++ = '?'; strcpy(r, ru->query); r += strlen(r); } if (ru->fragment) { *r++ = '#'; strcpy(r, ru->fragment); } return result; } /* * Following RFC 3986 § 5.2.3 */ static char *merge_paths(const struct razor_uri *base,const struct razor_uri *R) { char *s, *t, *path; if (base->host && !*base->path) path = razor_concat("/", R->path, NULL); else { s = strrchr(base->path, '/'); if (s) { t = razor_strndup(base->path, s + 1 - base->path); path = razor_concat(t, R->path, NULL); free(t); } else path = strdup(R->path); } return path; } /* * Following RFC 3986 § 5.2 */ void razor_uri_resolve(struct razor_uri *T, const struct razor_uri *base, const struct razor_uri *R) { char *s; if (R->scheme) { T->scheme = strdup(R->scheme); T->userinfo = strdup0(R->userinfo); T->host = strdup0(R->host); T->port = strdup0(R->port); T->path = remove_dot_segments(R->path); T->query = strdup0(R->query); } else { if (R->host) { T->userinfo = strdup0(R->userinfo); T->host = strdup0(R->host); T->port = strdup0(R->port); T->path = remove_dot_segments(R->path); T->query = strdup0(R->query); } else { if (!*R->path) { T->path = strdup(base->path); if (R->query) T->query = strdup(R->query); else T->query = strdup0(base->query); } else { if (*R->path == '/') T->path = remove_dot_segments(R->path); else { s = merge_paths(base, R); T->path = remove_dot_segments(s); free(s); } T->query = strdup0(R->query); } T->userinfo = strdup0(base->userinfo); T->host = strdup0(base->host); T->port = strdup0(base->port); } T->scheme = strdup(base->scheme); } T->fragment = strdup0(R->fragment); } /* * This differs from razor_uri_resolve() both in the types of its arguments * and in the fact that it takes a root URI rather than a base URI. The base * URI is determined by appending a slash to the root URI (if it doesn't * already end in a slash). Finally, uri can be explicitly marked as either * relative (ie., a relative-ref) or not (ie., a URI). This is important as * otherwise "c:/xxx" could be interpreted as a URI in the "c" scheme. */ char *razor_resolve_uri_root(const char *root_uri, const char *uri, int is_relative, struct razor_error **error) { int r; char *base_uri, *s, *result; struct razor_uri ru, base, file; if (!root_uri || !*root_uri) root_uri = "file:/"; if (root_uri[strlen(root_uri) - 1] == '/') base_uri = strdup(root_uri); else base_uri = razor_concat(root_uri, "/", NULL); r = razor_uri_parse_uri(&base, base_uri, 1, error); free(base_uri); if (r) return NULL; if (is_relative > 0) { /* * We can't use razor_uri_parse_relative_ref() to parse * uri in case it starts with a segment that includes a * colon. Thus we use this kludge. */ s = razor_concat("scheme:", uri, NULL); r = razor_uri_parse_uri(&file, s, 0, error); free(s); if (!r) { free(file.scheme); file.scheme = NULL; } } else if (!is_relative) r = razor_uri_parse_uri(&file, uri, 0, error); else r = razor_uri_parse(&file, uri, error); if (r) { razor_uri_destroy(&base); return NULL; } razor_uri_resolve(&ru, &base, &file); razor_uri_destroy(&base); razor_uri_destroy(&file); result = razor_uri_recompose(&ru); razor_uri_destroy(&ru); return result; }