librazor/uri.c
changeset 478 8e4bf84a7bb8
child 491 b18e0bf48a91
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/librazor/uri.c	Thu Jul 07 11:04:10 2016 +0100
     1.3 @@ -0,0 +1,957 @@
     1.4 +/*
     1.5 + * Copyright (C) 2016  J. Ali Harlow <ali@juiblex.co.uk>
     1.6 + *
     1.7 + * This program is free software; you can redistribute it and/or modify
     1.8 + * it under the terms of the GNU General Public License as published by
     1.9 + * the Free Software Foundation; either version 2 of the License, or
    1.10 + * (at your option) any later version.
    1.11 + *
    1.12 + * This program is distributed in the hope that it will be useful,
    1.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    1.15 + * GNU General Public License for more details.
    1.16 + *
    1.17 + * You should have received a copy of the GNU General Public License along
    1.18 + * with this program; if not, write to the Free Software Foundation, Inc.,
    1.19 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    1.20 + */
    1.21 +
    1.22 +#include "config.h"
    1.23 +
    1.24 +#undef DEBUG
    1.25 +
    1.26 +#include <stdlib.h>
    1.27 +#include <string.h>
    1.28 +#include "razor.h"
    1.29 +#include "types/types.h"
    1.30 +#include "razor-internal.h"
    1.31 +#include "uri.h"
    1.32 +
    1.33 +/*
    1.34 + * Following RFC 3986 § 3.
    1.35 + * Note that we don't validate queries or fragments.
    1.36 + */
    1.37 +
    1.38 +#define strdup0(s)			((s) ? strdup(s) : NULL)
    1.39 +
    1.40 +#define string_str(str)			((char *)(str)->data)
    1.41 +
    1.42 +#define string_init(str)		do { \
    1.43 +						char *_p; \
    1.44 +						array_init(str); \
    1.45 +						_p = array_add(str, 1); \
    1.46 +						*_p = '\0'; \
    1.47 +					} while(0)
    1.48 +
    1.49 +#define string_append_len(str, s, len)	do { \
    1.50 +						char *_p; \
    1.51 +						_p = array_add(str, len); \
    1.52 +						_p--; \
    1.53 +						strncpy(_p, s, len); \
    1.54 +						_p[(len)] = '\0'; \
    1.55 +					} while(0)
    1.56 +
    1.57 +#define string_append(str, s)		string_append_len(str, s, strlen(s))
    1.58 +
    1.59 +#define string_truncate_at(str, s)	do { \
    1.60 +						int _len; \
    1.61 +						_len = (s) - \
    1.62 +						       (char *)(str)->data; \
    1.63 +						*(s) = '\0'; \
    1.64 +						(str)->size = _len + 1; \
    1.65 +					} while(0)
    1.66 +
    1.67 +
    1.68 +static const char *skip_uri_scheme(const char *uri)
    1.69 +{
    1.70 +	/*
    1.71 +	 * RFC 3986 defines scheme as:
    1.72 +	 *	scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
    1.73 +	 */
    1.74 +	if (*uri >= 'a' && *uri <= 'z' || *uri >= 'A' && *uri <= 'Z') {
    1.75 +		do {
    1.76 +		    uri++;
    1.77 +		} while (is_alnum(*uri) || *uri == '+' || *uri == '-' ||
    1.78 +			 *uri == '.');
    1.79 +		if (*uri == ':')
    1.80 +			return uri;
    1.81 +	}
    1.82 +	return NULL;
    1.83 +}
    1.84 +
    1.85 +static char *razor_strndup(const char *s, size_t n)
    1.86 +{
    1.87 +	char *result;
    1.88 +
    1.89 +	if (memchr(s, '\0', n))
    1.90 +		result = strdup(s);
    1.91 +	else {
    1.92 +		result = malloc(n + 1);
    1.93 +		memcpy(result, s, n);
    1.94 +		result[n] = '\0';
    1.95 +	}
    1.96 +
    1.97 +	return result;
    1.98 +}
    1.99 +
   1.100 +#if 0
   1.101 +/*
   1.102 + * Return the (possibly decoded) pchar or 0 on end-of-string or -1 on error
   1.103 + */
   1.104 +static int pchar_get_char_validated(const char *p)
   1.105 +{
   1.106 +	int c;
   1.107 +
   1.108 +	if (p[0]=='\0')
   1.109 +		c = 0;
   1.110 +	else if (p[0]=='%') {
   1.111 +		if (xdigit_value(p[1]) < 0)
   1.112 +			return -1;
   1.113 +		c = xdigit_value(p[1]) * 16;
   1.114 +		if (xdigit_value(p[2]) < 0)
   1.115 +			return -1;
   1.116 +		c += xdigit_value(p[2]);
   1.117 +	} else if (p[0] >= 'a' && p[0] <= 'z' || p[0] >= 'A' && p[0] <= 'Z' || 
   1.118 +		   p[0] >= '0' && p[0] <= '9' ||
   1.119 +		   strchr("-._~!$&'()*+,;=:@", p[0]))
   1.120 +		c = p[0];
   1.121 +	else
   1.122 +		c = -1;
   1.123 +
   1.124 +	return c;
   1.125 +}
   1.126 +#endif
   1.127 +
   1.128 +/*
   1.129 + * Verify the percent encoding. All '%' characters must be followed by
   1.130 + * exactly two hexadecimal digits.
   1.131 + */
   1.132 +static int pct_encoding_validate(const char *s)
   1.133 +{
   1.134 +	while (*s) {
   1.135 +		if (*s == '%') {
   1.136 +			if (xdigit_value(s[1]) < 0 || xdigit_value(s[2]) < 0)
   1.137 +				return -1;
   1.138 +			s += 2;
   1.139 +		}
   1.140 +
   1.141 +		s++;
   1.142 +	}
   1.143 +
   1.144 +	return 0;
   1.145 +}
   1.146 +
   1.147 +static char *pct_encoding_normalize(char *s)
   1.148 +{
   1.149 +	char *retval, *p;
   1.150 +	int c;
   1.151 +
   1.152 +	if (!s)
   1.153 +		return NULL;
   1.154 +
   1.155 +	p = retval = malloc(strlen(s) + 1);
   1.156 +
   1.157 +	while (*s) {
   1.158 +		if (*s == '%') {
   1.159 +			c = pchar_get_char(s);
   1.160 +			if (is_unreserved(c))
   1.161 +				*p++ = c;
   1.162 +			else {
   1.163 +				*p++ = '%';
   1.164 +				*p++ = "0123456789ABCDEF"[c/16];
   1.165 +				*p++ = "0123456789ABCDEF"[c%16];
   1.166 +			}
   1.167 +			pchar_next_char(s);
   1.168 +		} else
   1.169 +			*p++ = *s++;
   1.170 +	}
   1.171 +
   1.172 +	*p++ = '\0';
   1.173 +
   1.174 +	return realloc(retval, p - retval);
   1.175 +}
   1.176 +
   1.177 +static int validate_userinfo(const char *userinfo, struct razor_error **error)
   1.178 +{
   1.179 +	const char *s;
   1.180 +
   1.181 +	for (s = userinfo; *s; s++) {
   1.182 +		if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s)
   1.183 +		    && *s != ':') {
   1.184 +			razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.185 +					RAZOR_GENERAL_ERROR_BAD_URI, userinfo,
   1.186 +					"Invalid URI userinfo");
   1.187 +			return -1;
   1.188 +		}
   1.189 +	}
   1.190 +
   1.191 +	return 0;
   1.192 +}
   1.193 +
   1.194 +static int validate_reg_name(const char *reg_name)
   1.195 +{
   1.196 +	const char *s;
   1.197 +
   1.198 +	for (s = reg_name; *s; s++) {
   1.199 +		if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s))
   1.200 +			return -1;
   1.201 +	}
   1.202 +
   1.203 +	return 0;
   1.204 +}
   1.205 +
   1.206 +static int validate_ipv4address(const char *s, int length)
   1.207 +{
   1.208 +	int count = 0, digits, octet;
   1.209 +
   1.210 +	for (;;) {
   1.211 +		if (!length)
   1.212 +			return -1;
   1.213 +
   1.214 +		if (*s == '0') {
   1.215 +			digits = 1;
   1.216 +			octet = 0;
   1.217 +		} else {
   1.218 +			if (*s < '1' || *s > '9')
   1.219 +				return -1;
   1.220 +
   1.221 +			octet = *s - '0';
   1.222 +
   1.223 +			for (digits = 1; digits < length; digits++) {
   1.224 +				if (s[digits] >= '0' && s[digits] <= '9') {
   1.225 +					octet *= 10;
   1.226 +					octet += s[digits] - '0';
   1.227 +					if (octet > 255)
   1.228 +						return -1;
   1.229 +				} else
   1.230 +					break;
   1.231 +			}
   1.232 +		}
   1.233 +
   1.234 +		s += digits;
   1.235 +		length -= digits;
   1.236 +
   1.237 +		if (++count == 4)
   1.238 +			break;
   1.239 +
   1.240 +		if (length < 1 || *s != '.')
   1.241 +			return -1;
   1.242 +
   1.243 +		s++;
   1.244 +		length--;
   1.245 +	}
   1.246 +
   1.247 +	return length ? -1 : 0;
   1.248 +}
   1.249 +
   1.250 +static int count_ipv6_pieces(const char **s, int *length)
   1.251 +{
   1.252 +	int count, digits;
   1.253 +
   1.254 +	for (digits = 0; digits < 4 && digits < *length; digits++) {
   1.255 +		if (!is_xdigit((*s)[digits]))
   1.256 +			break;
   1.257 +	}
   1.258 +
   1.259 +	if (!digits)
   1.260 +		return 0;
   1.261 +
   1.262 +	(*s) += digits;
   1.263 +	(*length) -= digits;
   1.264 +	count = 1;
   1.265 +
   1.266 +	if (*length && **s == ':') {
   1.267 +		(*s)++;
   1.268 +		(*length)--;
   1.269 +		count += count_ipv6_pieces(s, length);
   1.270 +		if (count == 1) {
   1.271 +			(*s)--;
   1.272 +			(*length)++;
   1.273 +		}
   1.274 +	}
   1.275 +
   1.276 +	return count;
   1.277 +}
   1.278 +
   1.279 +static int validate_ip_literal(const char *ip_literal, int length)
   1.280 +{
   1.281 +	const char *s, *dot;
   1.282 +	int len, no_pieces, elide;
   1.283 +
   1.284 +	if (length >= 4 && ip_literal[0] == 'v') {
   1.285 +		/* IPvFuture */
   1.286 +		dot = strchr(ip_literal + 2, '.');
   1.287 +		if (!dot || dot >= ip_literal + length)
   1.288 +			return -1;
   1.289 +		for (s = ip_literal + 1; s < dot; s++) {
   1.290 +			if (!is_xdigit(*s))
   1.291 +				return -1;
   1.292 +		}
   1.293 +		for (s = dot + 1; s < ip_literal + length; s++) {
   1.294 +			if (!is_unreserved(*s) && !is_sub_delim(*s) && *s != ':')
   1.295 +				return -1;
   1.296 +		}
   1.297 +	} else {
   1.298 +		/* IPv6address */
   1.299 +		s = ip_literal;
   1.300 +		len = length;
   1.301 +		no_pieces = count_ipv6_pieces(&s, &len);
   1.302 +
   1.303 +		if (len > 1 && s[0] == ':' && s[1] == ':') {
   1.304 +			s += 2;
   1.305 +			len -= 2;
   1.306 +			elide = 1;
   1.307 +			no_pieces += count_ipv6_pieces(&s, &len);
   1.308 +		} else
   1.309 +			elide = 0;
   1.310 +
   1.311 +		if (!validate_ipv4address(s, len))
   1.312 +			no_pieces += 2;
   1.313 +		else if (len)
   1.314 +			return -1;
   1.315 +
   1.316 +		if (no_pieces > 8 || no_pieces == 8 && elide || no_pieces < 1)
   1.317 +			return -1;
   1.318 +	}
   1.319 +
   1.320 +	return 0;
   1.321 +}
   1.322 +
   1.323 +static int validate_host(const char *host, struct razor_error **error)
   1.324 +{
   1.325 +	int retval;
   1.326 +
   1.327 +	if (host[0] == '[' && host[strlen(host) - 1] == ']')
   1.328 +		retval = validate_ip_literal(host + 1, strlen(host) - 2);
   1.329 +	else {
   1.330 +		retval = validate_ipv4address(host, strlen(host));
   1.331 +		if (retval < 0)
   1.332 +			retval = validate_reg_name(host);
   1.333 +	}
   1.334 +
   1.335 +	if (retval)
   1.336 +		razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.337 +				RAZOR_GENERAL_ERROR_BAD_URI, host,
   1.338 +				"Invalid URI host");
   1.339 +
   1.340 +	return retval;
   1.341 +}
   1.342 +
   1.343 +static char *strdown(char *s)
   1.344 +{
   1.345 +	while (*s) {
   1.346 +		if (*s >= 'A' && *s <= 'Z') {
   1.347 +			*s -= 'A';
   1.348 +			*s += 'a';
   1.349 +		}
   1.350 +		s++;
   1.351 +	}
   1.352 +
   1.353 +	return s;
   1.354 +}
   1.355 +
   1.356 +static int razor_uri_parse_authority(struct razor_uri *ru,
   1.357 +				     const char *authority, int length,
   1.358 +				     struct razor_error **error)
   1.359 +{
   1.360 +	const char *s, *auth = authority;
   1.361 +	char *userinfo, *port, *host;
   1.362 +
   1.363 +	s = strchr(auth, '@');
   1.364 +	if (s && s < auth + length) {
   1.365 +		userinfo = razor_strndup(auth, s - auth);
   1.366 +		s++;
   1.367 +		length -= s - auth;
   1.368 +		auth = s;
   1.369 +
   1.370 +		if (validate_userinfo(userinfo, error)) {
   1.371 +			free(userinfo);
   1.372 +			return -1;
   1.373 +		}
   1.374 +	} else
   1.375 +		userinfo = NULL;
   1.376 +
   1.377 +	s = strchr(auth, ':');
   1.378 +	if (s && s < auth + length) {
   1.379 +		s++;
   1.380 +		port = razor_strndup(s, length - (s - auth));
   1.381 +		s--;
   1.382 +		length = s - auth;
   1.383 +
   1.384 +		if (strspn(port, "0123456789") != strlen(port)) {
   1.385 +			razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.386 +					RAZOR_GENERAL_ERROR_BAD_URI, port,
   1.387 +					"Invalid URI port");
   1.388 +			free(userinfo);
   1.389 +			free(port);
   1.390 +			return -1;
   1.391 +		}
   1.392 +	} else
   1.393 +		port = NULL;
   1.394 +
   1.395 +	host = razor_strndup(auth, length);
   1.396 +
   1.397 +	if (validate_host(host, error)) {
   1.398 +		free(userinfo);
   1.399 +		free(port);
   1.400 +		free(host);
   1.401 +		return -1;
   1.402 +	}
   1.403 +
   1.404 +	ru->userinfo = userinfo;
   1.405 +	ru->port = port;
   1.406 +	ru->host = host;
   1.407 +
   1.408 +	return 0;
   1.409 +}
   1.410 +
   1.411 +/*
   1.412 + * Parse either a hier-part or a relative-part
   1.413 + */
   1.414 +static int razor_uri_parse_part(struct razor_uri *ru, const char *part,
   1.415 +				int relative_part, struct razor_error **error)
   1.416 +{
   1.417 +	const char *s, *hp = part;
   1.418 +	char *path, *p;
   1.419 +	int noscheme = 0;
   1.420 +
   1.421 +	if (hp[0] == '/' && hp[1] == '/') {
   1.422 +		hp += 2;
   1.423 +		s = strpbrk(hp, "/?#");
   1.424 +		if (!s)
   1.425 +			s = hp + strlen(hp);
   1.426 +		if (razor_uri_parse_authority(ru, hp, s - hp, error) < 0)
   1.427 +			return -1;
   1.428 +		hp = s;
   1.429 +	} else {
   1.430 +		ru->userinfo = NULL;
   1.431 +		ru->host = NULL;
   1.432 +		ru->port = NULL;
   1.433 +	}
   1.434 +
   1.435 +	if (!*hp) {
   1.436 +		/* path-empty */
   1.437 +		ru->path = strdup("");
   1.438 +		return 0;
   1.439 +	} else if (*hp == '/') {
   1.440 +		/* path-absolute */
   1.441 +		p = path = malloc(strlen(hp) + 1);
   1.442 +		*p++ = '/';
   1.443 +		hp++;
   1.444 +		if (!*hp) {
   1.445 +			*p++ = '\0';
   1.446 +			ru->path = realloc(path, p - path);
   1.447 +			return 0;
   1.448 +		}
   1.449 +	} else if (!ru->host) {
   1.450 +		/* path-rootless or path-noscheme */
   1.451 +		noscheme = relative_part;
   1.452 +		p = path = malloc(strlen(hp) + 1);
   1.453 +	} else {
   1.454 +		razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.455 +				RAZOR_GENERAL_ERROR_BAD_URI, part,
   1.456 +				relative_part ? "Invalid URI relative part" :
   1.457 +				"Invalid URI hierarchical part");
   1.458 +		return -1;
   1.459 +	}
   1.460 +
   1.461 +	if (!is_pchar(*hp) || noscheme && *hp == ':') {
   1.462 +		free(path);
   1.463 +		razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.464 +				RAZOR_GENERAL_ERROR_BAD_URI, part,
   1.465 +				"Invalid character in URI path");
   1.466 +		return -1;
   1.467 +	}
   1.468 +	*p++ = *hp++;
   1.469 +
   1.470 +	while (*hp) {
   1.471 +		if (*hp == '/')
   1.472 +			noscheme = 0;
   1.473 +		else if (!is_pchar(*hp) || noscheme && *hp == ':') {
   1.474 +			free(path);
   1.475 +			razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.476 +					RAZOR_GENERAL_ERROR_BAD_URI, part,
   1.477 +					"Invalid character in URI path");
   1.478 +			return -1;
   1.479 +		}
   1.480 +		*p++ = *hp++;
   1.481 +	}
   1.482 +
   1.483 +	*p++ = '\0';
   1.484 +
   1.485 +	ru->path = realloc(path, p - path);
   1.486 +
   1.487 +	return 0;
   1.488 +}
   1.489 +
   1.490 +void razor_uri_destroy(struct razor_uri *ru)
   1.491 +{
   1.492 +	free(ru->scheme);
   1.493 +	free(ru->userinfo);
   1.494 +	free(ru->host);
   1.495 +	free(ru->port);
   1.496 +	free(ru->path);
   1.497 +	free(ru->query);
   1.498 +	free(ru->fragment);
   1.499 +}
   1.500 +
   1.501 +int razor_uri_parse_uri(struct razor_uri *ru, const char *uri, int absolute,
   1.502 +			struct razor_error **error)
   1.503 +{
   1.504 +	int r;
   1.505 +	const char *s;
   1.506 +	char *hier_part;
   1.507 +
   1.508 +	if (pct_encoding_validate(uri) < 0) {
   1.509 +		razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.510 +				RAZOR_GENERAL_ERROR_BAD_URI, uri,
   1.511 +				"Invalid percent encoding");
   1.512 +		return -1;
   1.513 +	}
   1.514 +
   1.515 +	memset(ru, 0, sizeof(*ru));
   1.516 +
   1.517 +	s = skip_uri_scheme(uri);
   1.518 +	if (!s) {
   1.519 +		razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.520 +				RAZOR_GENERAL_ERROR_BAD_URI, uri,
   1.521 +				"Invalid URI scheme");
   1.522 +		return -1;
   1.523 +	}
   1.524 +	ru->scheme = razor_strndup(uri, s - uri);
   1.525 +	uri = s + 1;
   1.526 +
   1.527 +	s = strchr(uri, '?');
   1.528 +	if (!s)
   1.529 +		s = strchr(uri, '#');
   1.530 +	if (!s)
   1.531 +		s = uri + strlen(uri);
   1.532 +	hier_part = razor_strndup(uri, s - uri);
   1.533 +	uri = s;
   1.534 +
   1.535 +	r = razor_uri_parse_part(ru, hier_part, 0, error);
   1.536 +	free(hier_part);
   1.537 +	if (r) {
   1.538 +		razor_uri_destroy(ru);
   1.539 +		return -1;
   1.540 +	}
   1.541 +
   1.542 +	if (*uri != '?')
   1.543 +		ru->query = NULL;
   1.544 +	else {
   1.545 +		uri++;
   1.546 +		s = strchr(uri, '#');
   1.547 +		if (!s)
   1.548 +			s = uri + strlen(uri);
   1.549 +		ru->query = razor_strndup(uri, s - uri);
   1.550 +		uri = s;
   1.551 +	}
   1.552 +
   1.553 +	if (*uri != '#')
   1.554 +		ru->fragment = NULL;
   1.555 +	else if (absolute) {
   1.556 +		razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.557 +				RAZOR_GENERAL_ERROR_BAD_URI, uri,
   1.558 +				"Fragments are not allowed in absolute URIs");
   1.559 +		razor_uri_destroy(ru);
   1.560 +		return -1;
   1.561 +	} else {
   1.562 +		uri++;
   1.563 +		ru->fragment = strdup(uri);
   1.564 +	}
   1.565 +
   1.566 +	return 0;
   1.567 +}
   1.568 +
   1.569 +int razor_uri_parse_relative_ref(struct razor_uri *ru, const char *uri,
   1.570 +				 struct razor_error **error)
   1.571 +{
   1.572 +	int r;
   1.573 +	const char *s;
   1.574 +	char *relative_part;
   1.575 +
   1.576 +	if (pct_encoding_validate(uri) < 0) {
   1.577 +		razor_set_error(error, RAZOR_GENERAL_ERROR,
   1.578 +				RAZOR_GENERAL_ERROR_BAD_URI, uri,
   1.579 +				"Invalid percent encoding");
   1.580 +		return -1;
   1.581 +	}
   1.582 +
   1.583 +	memset(ru, 0, sizeof(*ru));
   1.584 +
   1.585 +	s = strchr(uri, '?');
   1.586 +	if (!s)
   1.587 +		s = strchr(uri, '#');
   1.588 +	if (!s)
   1.589 +		s = uri + strlen(uri);
   1.590 +	relative_part = razor_strndup(uri, s - uri);
   1.591 +	uri = s;
   1.592 +
   1.593 +	r = razor_uri_parse_part(ru, relative_part, 1, error);
   1.594 +	free(relative_part);
   1.595 +	if (r)
   1.596 +		return -1;
   1.597 +
   1.598 +	if (*uri == '?') {
   1.599 +		uri++;
   1.600 +		s = strchr(uri, '#');
   1.601 +		if (!s)
   1.602 +			s = uri + strlen(uri);
   1.603 +		ru->query = razor_strndup(uri, s - uri);
   1.604 +		uri = s;
   1.605 +	} else
   1.606 +		ru->query = NULL;
   1.607 +
   1.608 +	if (*uri == '#') {
   1.609 +		uri++;
   1.610 +		ru->fragment = strdup(uri);
   1.611 +	} else
   1.612 +		ru->fragment = NULL;
   1.613 +
   1.614 +	return 0;
   1.615 +}
   1.616 +
   1.617 +int razor_uri_parse(struct razor_uri *ru, const char *uri,
   1.618 +		    struct razor_error **error)
   1.619 +{
   1.620 +	struct razor_error *tmp_error = NULL;
   1.621 +	int r;
   1.622 +
   1.623 +	r = razor_uri_parse_uri(ru, uri, 0, &tmp_error);
   1.624 +	if (r < 0) {
   1.625 +		r = razor_uri_parse_relative_ref(ru, uri, NULL);
   1.626 +		if (r < 0)
   1.627 +			razor_propagate_error(error, tmp_error, NULL);
   1.628 +		else
   1.629 +			razor_error_free(tmp_error);
   1.630 +	}
   1.631 +
   1.632 +	return r;
   1.633 +}
   1.634 +
   1.635 +/*
   1.636 + * Following RFC 3986 § 5.2.4
   1.637 + */
   1.638 +static char *remove_dot_segments(const char *path)
   1.639 +{
   1.640 +	struct array output;
   1.641 +	char *input, *in, *s, *t;
   1.642 +	const char *step;
   1.643 +
   1.644 +#ifdef DEBUG
   1.645 +	fprintf(stderr, "STEP   OUTPUT BUFFER         INPUT BUFFER\n");
   1.646 +#endif
   1.647 +
   1.648 +	input = strdup(path);
   1.649 +	in = input;
   1.650 +	string_init(&output);
   1.651 +
   1.652 +#ifdef DEBUG
   1.653 +	fprintf(stderr, " 1 :   %-21s %s\n", string_str(&output), in);
   1.654 +#endif
   1.655 +
   1.656 +	while (*in) {
   1.657 +		if (str_has_prefix(in, "../")) {
   1.658 +			step = "2A";
   1.659 +			in += 3;
   1.660 +		} else if (str_has_prefix(in, "./")) {
   1.661 +			step = "2A";
   1.662 +			in += 2;
   1.663 +		} else if (str_has_prefix(in, "/./")) {
   1.664 +			step = "2B";
   1.665 +			in += 2;
   1.666 +		} else if (!strcmp(in, "/.")) {
   1.667 +			step = "2B";
   1.668 +			in++;
   1.669 +			*in = '/';
   1.670 +		} else if (str_has_prefix(in, "/../")) {
   1.671 +			step = "2C";
   1.672 +			in += 3;
   1.673 +			s = strrchr(string_str(&output), '/');
   1.674 +			if (!s)
   1.675 +				s = string_str(&output);
   1.676 +			string_truncate_at(&output, s);
   1.677 +		} else if (!strcmp(in, "/..")) {
   1.678 +			step = "2C";
   1.679 +			in += 2;
   1.680 +			*in = '/';
   1.681 +			s = strrchr(string_str(&output), '/');
   1.682 +			if (!s)
   1.683 +				s = string_str(&output);
   1.684 +			string_truncate_at(&output, s);
   1.685 +		} else if (!strcmp(in, ".") || !strcmp(in, "..")) {
   1.686 +			step = "2D";
   1.687 +			in += strlen(in);
   1.688 +		} else {
   1.689 +			step = "2E";
   1.690 +			t = strchr(in + 1, '/');
   1.691 +			if (!t)
   1.692 +				t = in + strlen(in);
   1.693 +			string_append_len(&output, in, t - in);
   1.694 +			in = t;
   1.695 +		}
   1.696 +#ifdef DEBUG
   1.697 +		fprintf(stderr, " %s:   %-21s %s\n", step, string_str(&output),
   1.698 +			in);
   1.699 +#endif
   1.700 +	}
   1.701 +
   1.702 +	free(input);
   1.703 +	return string_str(&output);
   1.704 +}
   1.705 +
   1.706 +
   1.707 +/*
   1.708 + * Following RFC 3986 § 6.2.2
   1.709 + */
   1.710 +void razor_uri_normalize(struct razor_uri *ru)
   1.711 +{
   1.712 +	char *s;
   1.713 +
   1.714 +	strdown(ru->scheme);
   1.715 +	if (ru->host)
   1.716 +		strdown(ru->host);
   1.717 +
   1.718 +	s = pct_encoding_normalize(ru->userinfo);
   1.719 +	free(ru->userinfo);
   1.720 +	ru->userinfo = s;
   1.721 +
   1.722 +	s = pct_encoding_normalize(ru->host);
   1.723 +	free(ru->host);
   1.724 +	ru->host = s;
   1.725 +
   1.726 +	s = pct_encoding_normalize(ru->path);
   1.727 +	free(ru->path);
   1.728 +	ru->path = s;
   1.729 +
   1.730 +	s = pct_encoding_normalize(ru->query);
   1.731 +	free(ru->query);
   1.732 +	ru->query = s;
   1.733 +
   1.734 +	s = pct_encoding_normalize(ru->fragment);
   1.735 +	free(ru->fragment);
   1.736 +	ru->fragment = s;
   1.737 +
   1.738 +	s = remove_dot_segments(ru->path);
   1.739 +	free(ru->path);
   1.740 +	ru->path = s;
   1.741 +}
   1.742 +
   1.743 +char *razor_uri_get_authority(const struct razor_uri *ru)
   1.744 +{
   1.745 +	char *result, *r;
   1.746 +	int len = 1;
   1.747 +
   1.748 +	if (ru->host) {
   1.749 +		if (ru->userinfo)
   1.750 +			len += strlen(ru->userinfo) + 1;
   1.751 +		len += strlen(ru->host);
   1.752 +		if (ru->port)
   1.753 +			len += strlen(ru->port) + 1;
   1.754 +	} else
   1.755 +		return NULL;
   1.756 +
   1.757 +	r = result = malloc(len);
   1.758 +
   1.759 +	if (ru->userinfo) {
   1.760 +		strcpy(r, ru->userinfo);
   1.761 +		r += strlen(r);
   1.762 +		*r++ = '@';
   1.763 +	}
   1.764 +
   1.765 +	strcpy(r, ru->host);
   1.766 +	r += strlen(r);
   1.767 +
   1.768 +	if (ru->port) {
   1.769 +		*r++ = ':';
   1.770 +		strcpy(r, ru->port);
   1.771 +	}
   1.772 +
   1.773 +	return result;
   1.774 +}
   1.775 +
   1.776 +/*
   1.777 + * Following RFC 3986 § 5.3
   1.778 + */
   1.779 +char *razor_uri_recompose(const struct razor_uri *ru)
   1.780 +{
   1.781 +	char *authority, *result, *r;
   1.782 +	int len = 1;
   1.783 +
   1.784 +	authority = razor_uri_get_authority(ru);
   1.785 +
   1.786 +	if (ru->scheme)
   1.787 +		len += strlen(ru->scheme) + 1;
   1.788 +	if (authority)
   1.789 +		len += strlen(authority) + 2;
   1.790 +	len += strlen(ru->path);
   1.791 +	if (ru->query)
   1.792 +		len += strlen(ru->query) + 1;
   1.793 +	if (ru->fragment)
   1.794 +		len += strlen(ru->fragment) + 1;
   1.795 +
   1.796 +	r = result = malloc(len);
   1.797 +
   1.798 +	if (ru->scheme) {
   1.799 +		strcpy(r, ru->scheme);
   1.800 +		r += strlen(r);
   1.801 +		*r++ = ':';
   1.802 +	}
   1.803 +
   1.804 +	if (authority) {
   1.805 +		*r++ = '/';
   1.806 +		*r++ = '/';
   1.807 +		strcpy(r, authority);
   1.808 +		free(authority);
   1.809 +		r += strlen(r);
   1.810 +	}
   1.811 +
   1.812 +	strcpy(r, ru->path);
   1.813 +	r += strlen(r);
   1.814 +
   1.815 +	if (ru->query) {
   1.816 +		*r++ = '?';
   1.817 +		strcpy(r, ru->query);
   1.818 +		r += strlen(r);
   1.819 +	}
   1.820 +
   1.821 +	if (ru->fragment) {
   1.822 +		*r++ = '#';
   1.823 +		strcpy(r, ru->fragment);
   1.824 +	}
   1.825 +
   1.826 +	return result;
   1.827 +}
   1.828 +
   1.829 +/*
   1.830 + * Following RFC 3986 § 5.2.3
   1.831 + */
   1.832 +static char *merge_paths(const struct razor_uri *base,const struct razor_uri *R)
   1.833 +{
   1.834 +	char *s, *t, *path;
   1.835 +
   1.836 +	if (base->host && !*base->path)
   1.837 +		path = razor_concat("/", R->path, NULL);
   1.838 +	else {
   1.839 +		s = strrchr(base->path, '/');
   1.840 +		if (s) {
   1.841 +			t = razor_strndup(base->path, s + 1 - base->path);
   1.842 +			path = razor_concat(t, R->path, NULL);
   1.843 +			free(t);
   1.844 +		} else
   1.845 +			path = strdup(R->path);
   1.846 +	}
   1.847 +
   1.848 +	return path;
   1.849 +}
   1.850 +
   1.851 +/*
   1.852 + * Following RFC 3986 § 5.2
   1.853 + */
   1.854 +void razor_uri_resolve(struct razor_uri *T, const struct razor_uri *base,
   1.855 +		       const struct razor_uri *R)
   1.856 +{
   1.857 +	char *s;
   1.858 +
   1.859 +	if (R->scheme) {
   1.860 +		T->scheme = strdup(R->scheme);
   1.861 +		T->userinfo = strdup0(R->userinfo);
   1.862 +		T->host = strdup0(R->host);
   1.863 +		T->port = strdup0(R->port);
   1.864 +		T->path = remove_dot_segments(R->path);
   1.865 +		T->query = strdup0(R->query);
   1.866 +	} else {
   1.867 +		if (R->host) {
   1.868 +			T->userinfo = strdup0(R->userinfo);
   1.869 +			T->host = strdup0(R->host);
   1.870 +			T->port = strdup0(R->port);
   1.871 +			T->path = remove_dot_segments(R->path);
   1.872 +			T->query = strdup0(R->query);
   1.873 +		} else {
   1.874 +			if (!*R->path) {
   1.875 +				T->path = strdup(base->path);
   1.876 +				if (R->query)
   1.877 +					T->query = strdup(R->query);
   1.878 +				else
   1.879 +					T->query = strdup0(base->query);
   1.880 +			} else {
   1.881 +				if (*R->path == '/')
   1.882 +					T->path = remove_dot_segments(R->path);
   1.883 +				else {
   1.884 +					s = merge_paths(base, R);
   1.885 +					T->path = remove_dot_segments(s);
   1.886 +					free(s);
   1.887 +				}
   1.888 +				T->query = strdup0(R->query);
   1.889 +			}
   1.890 +			T->userinfo = strdup0(base->userinfo);
   1.891 +			T->host = strdup0(base->host);
   1.892 +			T->port = strdup0(base->port);
   1.893 +		}
   1.894 +		T->scheme = strdup(base->scheme);
   1.895 +	}
   1.896 +	T->fragment = strdup0(R->fragment);
   1.897 +}
   1.898 +
   1.899 +/*
   1.900 + * This differs from razor_uri_resolve() both in the types of its arguments
   1.901 + * and in the fact that it takes a root URI rather than a base URI. The base
   1.902 + * URI is determined by appending a slash to the root URI (if it doesn't
   1.903 + * already end in a slash). Finally, uri can be explicitly marked as either
   1.904 + * relative (ie., a relative-ref) or not (ie., a URI). This is important as
   1.905 + * otherwise "c:/xxx" could be interpreted as a URI in the "c" scheme.
   1.906 + */
   1.907 +char *razor_resolve_uri_root(const char *root_uri, const char *uri,
   1.908 +			     int is_relative, struct razor_error **error)
   1.909 +{
   1.910 +	int r;
   1.911 +        char *base_uri, *s, *result;
   1.912 +	struct razor_uri ru, base, file;
   1.913 +
   1.914 +	if (!root_uri || !*root_uri)
   1.915 +		root_uri = "file:/";
   1.916 +
   1.917 +	if (root_uri[strlen(root_uri) - 1] == '/')
   1.918 +		base_uri = strdup(root_uri);
   1.919 +	else
   1.920 +		base_uri = razor_concat(root_uri, "/", NULL);
   1.921 +
   1.922 +	r = razor_uri_parse_uri(&base, base_uri, 1, error);
   1.923 +	free(base_uri);
   1.924 +	if (r)
   1.925 +		return NULL;
   1.926 +
   1.927 +	if (is_relative > 0) {
   1.928 +		/*
   1.929 +		 * We can't use razor_uri_parse_relative_ref() to parse
   1.930 +		 * uri in case it starts with a segment that includes a
   1.931 +		 * colon. Thus we use this kludge.
   1.932 +		 */
   1.933 +		s = razor_concat("scheme:", uri, NULL);
   1.934 +		r = razor_uri_parse_uri(&file, s, 0, error);
   1.935 +		free(s);
   1.936 +		if (!r) {
   1.937 +			free(file.scheme);
   1.938 +			file.scheme = NULL;
   1.939 +		}
   1.940 +	}
   1.941 +	else if (!is_relative)
   1.942 +		r = razor_uri_parse_uri(&file, uri, 0, error);
   1.943 +	else
   1.944 +		r = razor_uri_parse(&file, uri, error);
   1.945 +	if (r) {
   1.946 +		razor_uri_destroy(&base);
   1.947 +		return NULL;
   1.948 +	}
   1.949 +
   1.950 +	razor_uri_resolve(&ru, &base, &file);
   1.951 +
   1.952 +	razor_uri_destroy(&base);
   1.953 +	razor_uri_destroy(&file);
   1.954 +
   1.955 +	result = razor_uri_recompose(&ru);
   1.956 +
   1.957 +	razor_uri_destroy(&ru);
   1.958 +
   1.959 +	return result;
   1.960 +}