librazor/uri.c
author J. Ali Harlow <ali@juiblex.co.uk>
Thu Jul 14 12:49:48 2016 +0100 (2016-07-14)
changeset 491 b18e0bf48a91
parent 475 008c75a5e08d
permissions -rw-r--r--
razor_path_to_uri() should cope with relative paths better
ali@475
     1
/*
ali@475
     2
 * Copyright (C) 2016  J. Ali Harlow <ali@juiblex.co.uk>
ali@475
     3
 *
ali@475
     4
 * This program is free software; you can redistribute it and/or modify
ali@475
     5
 * it under the terms of the GNU General Public License as published by
ali@475
     6
 * the Free Software Foundation; either version 2 of the License, or
ali@475
     7
 * (at your option) any later version.
ali@475
     8
 *
ali@475
     9
 * This program is distributed in the hope that it will be useful,
ali@475
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ali@475
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
ali@475
    12
 * GNU General Public License for more details.
ali@475
    13
 *
ali@475
    14
 * You should have received a copy of the GNU General Public License along
ali@475
    15
 * with this program; if not, write to the Free Software Foundation, Inc.,
ali@475
    16
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
ali@475
    17
 */
ali@475
    18
ali@475
    19
#include "config.h"
ali@475
    20
ali@475
    21
#undef DEBUG
ali@475
    22
ali@475
    23
#include <stdlib.h>
ali@475
    24
#include <string.h>
ali@475
    25
#include "razor.h"
ali@475
    26
#include "types/types.h"
ali@475
    27
#include "razor-internal.h"
ali@475
    28
#include "uri.h"
ali@475
    29
ali@475
    30
/*
ali@475
    31
 * Following RFC 3986 § 3.
ali@475
    32
 * Note that we don't validate queries or fragments.
ali@475
    33
 */
ali@475
    34
ali@475
    35
#define strdup0(s)			((s) ? strdup(s) : NULL)
ali@475
    36
ali@475
    37
#define string_str(str)			((char *)(str)->data)
ali@475
    38
ali@475
    39
#define string_init(str)		do { \
ali@475
    40
						char *_p; \
ali@475
    41
						array_init(str); \
ali@475
    42
						_p = array_add(str, 1); \
ali@475
    43
						*_p = '\0'; \
ali@475
    44
					} while(0)
ali@475
    45
ali@475
    46
#define string_append_len(str, s, len)	do { \
ali@475
    47
						char *_p; \
ali@475
    48
						_p = array_add(str, len); \
ali@475
    49
						_p--; \
ali@475
    50
						strncpy(_p, s, len); \
ali@475
    51
						_p[(len)] = '\0'; \
ali@475
    52
					} while(0)
ali@475
    53
ali@475
    54
#define string_append(str, s)		string_append_len(str, s, strlen(s))
ali@475
    55
ali@475
    56
#define string_truncate_at(str, s)	do { \
ali@475
    57
						int _len; \
ali@475
    58
						_len = (s) - \
ali@475
    59
						       (char *)(str)->data; \
ali@475
    60
						*(s) = '\0'; \
ali@475
    61
						(str)->size = _len + 1; \
ali@475
    62
					} while(0)
ali@475
    63
ali@475
    64
ali@475
    65
static const char *skip_uri_scheme(const char *uri)
ali@475
    66
{
ali@475
    67
	/*
ali@475
    68
	 * RFC 3986 defines scheme as:
ali@475
    69
	 *	scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
ali@475
    70
	 */
ali@475
    71
	if (*uri >= 'a' && *uri <= 'z' || *uri >= 'A' && *uri <= 'Z') {
ali@475
    72
		do {
ali@475
    73
		    uri++;
ali@475
    74
		} while (is_alnum(*uri) || *uri == '+' || *uri == '-' ||
ali@475
    75
			 *uri == '.');
ali@475
    76
		if (*uri == ':')
ali@475
    77
			return uri;
ali@475
    78
	}
ali@475
    79
	return NULL;
ali@475
    80
}
ali@475
    81
ali@475
    82
static char *razor_strndup(const char *s, size_t n)
ali@475
    83
{
ali@475
    84
	char *result;
ali@475
    85
ali@475
    86
	if (memchr(s, '\0', n))
ali@475
    87
		result = strdup(s);
ali@475
    88
	else {
ali@475
    89
		result = malloc(n + 1);
ali@475
    90
		memcpy(result, s, n);
ali@475
    91
		result[n] = '\0';
ali@475
    92
	}
ali@475
    93
ali@475
    94
	return result;
ali@475
    95
}
ali@475
    96
ali@475
    97
#if 0
ali@475
    98
/*
ali@475
    99
 * Return the (possibly decoded) pchar or 0 on end-of-string or -1 on error
ali@475
   100
 */
ali@475
   101
static int pchar_get_char_validated(const char *p)
ali@475
   102
{
ali@475
   103
	int c;
ali@475
   104
ali@475
   105
	if (p[0]=='\0')
ali@475
   106
		c = 0;
ali@475
   107
	else if (p[0]=='%') {
ali@475
   108
		if (xdigit_value(p[1]) < 0)
ali@475
   109
			return -1;
ali@475
   110
		c = xdigit_value(p[1]) * 16;
ali@475
   111
		if (xdigit_value(p[2]) < 0)
ali@475
   112
			return -1;
ali@475
   113
		c += xdigit_value(p[2]);
ali@475
   114
	} else if (p[0] >= 'a' && p[0] <= 'z' || p[0] >= 'A' && p[0] <= 'Z' || 
ali@475
   115
		   p[0] >= '0' && p[0] <= '9' ||
ali@475
   116
		   strchr("-._~!$&'()*+,;=:@", p[0]))
ali@475
   117
		c = p[0];
ali@475
   118
	else
ali@475
   119
		c = -1;
ali@475
   120
ali@475
   121
	return c;
ali@475
   122
}
ali@475
   123
#endif
ali@475
   124
ali@475
   125
/*
ali@475
   126
 * Verify the percent encoding. All '%' characters must be followed by
ali@475
   127
 * exactly two hexadecimal digits.
ali@475
   128
 */
ali@475
   129
static int pct_encoding_validate(const char *s)
ali@475
   130
{
ali@475
   131
	while (*s) {
ali@475
   132
		if (*s == '%') {
ali@475
   133
			if (xdigit_value(s[1]) < 0 || xdigit_value(s[2]) < 0)
ali@475
   134
				return -1;
ali@475
   135
			s += 2;
ali@475
   136
		}
ali@475
   137
ali@475
   138
		s++;
ali@475
   139
	}
ali@475
   140
ali@475
   141
	return 0;
ali@475
   142
}
ali@475
   143
ali@475
   144
static char *pct_encoding_normalize(char *s)
ali@475
   145
{
ali@475
   146
	char *retval, *p;
ali@475
   147
	int c;
ali@475
   148
ali@475
   149
	if (!s)
ali@475
   150
		return NULL;
ali@475
   151
ali@475
   152
	p = retval = malloc(strlen(s) + 1);
ali@475
   153
ali@475
   154
	while (*s) {
ali@475
   155
		if (*s == '%') {
ali@475
   156
			c = pchar_get_char(s);
ali@475
   157
			if (is_unreserved(c))
ali@475
   158
				*p++ = c;
ali@475
   159
			else {
ali@475
   160
				*p++ = '%';
ali@475
   161
				*p++ = "0123456789ABCDEF"[c/16];
ali@475
   162
				*p++ = "0123456789ABCDEF"[c%16];
ali@475
   163
			}
ali@475
   164
			pchar_next_char(s);
ali@475
   165
		} else
ali@475
   166
			*p++ = *s++;
ali@475
   167
	}
ali@475
   168
ali@475
   169
	*p++ = '\0';
ali@475
   170
ali@475
   171
	return realloc(retval, p - retval);
ali@475
   172
}
ali@475
   173
ali@475
   174
static int validate_userinfo(const char *userinfo, struct razor_error **error)
ali@475
   175
{
ali@475
   176
	const char *s;
ali@475
   177
ali@475
   178
	for (s = userinfo; *s; s++) {
ali@475
   179
		if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s)
ali@475
   180
		    && *s != ':') {
ali@475
   181
			razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   182
					RAZOR_GENERAL_ERROR_BAD_URI, userinfo,
ali@475
   183
					"Invalid URI userinfo");
ali@475
   184
			return -1;
ali@475
   185
		}
ali@475
   186
	}
ali@475
   187
ali@475
   188
	return 0;
ali@475
   189
}
ali@475
   190
ali@475
   191
static int validate_reg_name(const char *reg_name)
ali@475
   192
{
ali@475
   193
	const char *s;
ali@475
   194
ali@475
   195
	for (s = reg_name; *s; s++) {
ali@475
   196
		if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s))
ali@475
   197
			return -1;
ali@475
   198
	}
ali@475
   199
ali@475
   200
	return 0;
ali@475
   201
}
ali@475
   202
ali@475
   203
static int validate_ipv4address(const char *s, int length)
ali@475
   204
{
ali@475
   205
	int count = 0, digits, octet;
ali@475
   206
ali@475
   207
	for (;;) {
ali@475
   208
		if (!length)
ali@475
   209
			return -1;
ali@475
   210
ali@475
   211
		if (*s == '0') {
ali@475
   212
			digits = 1;
ali@475
   213
			octet = 0;
ali@475
   214
		} else {
ali@475
   215
			if (*s < '1' || *s > '9')
ali@475
   216
				return -1;
ali@475
   217
ali@475
   218
			octet = *s - '0';
ali@475
   219
ali@475
   220
			for (digits = 1; digits < length; digits++) {
ali@475
   221
				if (s[digits] >= '0' && s[digits] <= '9') {
ali@475
   222
					octet *= 10;
ali@475
   223
					octet += s[digits] - '0';
ali@475
   224
					if (octet > 255)
ali@475
   225
						return -1;
ali@475
   226
				} else
ali@475
   227
					break;
ali@475
   228
			}
ali@475
   229
		}
ali@475
   230
ali@475
   231
		s += digits;
ali@475
   232
		length -= digits;
ali@475
   233
ali@475
   234
		if (++count == 4)
ali@475
   235
			break;
ali@475
   236
ali@475
   237
		if (length < 1 || *s != '.')
ali@475
   238
			return -1;
ali@475
   239
ali@475
   240
		s++;
ali@475
   241
		length--;
ali@475
   242
	}
ali@475
   243
ali@475
   244
	return length ? -1 : 0;
ali@475
   245
}
ali@475
   246
ali@475
   247
static int count_ipv6_pieces(const char **s, int *length)
ali@475
   248
{
ali@475
   249
	int count, digits;
ali@475
   250
ali@475
   251
	for (digits = 0; digits < 4 && digits < *length; digits++) {
ali@475
   252
		if (!is_xdigit((*s)[digits]))
ali@475
   253
			break;
ali@475
   254
	}
ali@475
   255
ali@475
   256
	if (!digits)
ali@475
   257
		return 0;
ali@475
   258
ali@475
   259
	(*s) += digits;
ali@475
   260
	(*length) -= digits;
ali@475
   261
	count = 1;
ali@475
   262
ali@475
   263
	if (*length && **s == ':') {
ali@475
   264
		(*s)++;
ali@475
   265
		(*length)--;
ali@475
   266
		count += count_ipv6_pieces(s, length);
ali@475
   267
		if (count == 1) {
ali@475
   268
			(*s)--;
ali@475
   269
			(*length)++;
ali@475
   270
		}
ali@475
   271
	}
ali@475
   272
ali@475
   273
	return count;
ali@475
   274
}
ali@475
   275
ali@475
   276
static int validate_ip_literal(const char *ip_literal, int length)
ali@475
   277
{
ali@475
   278
	const char *s, *dot;
ali@475
   279
	int len, no_pieces, elide;
ali@475
   280
ali@475
   281
	if (length >= 4 && ip_literal[0] == 'v') {
ali@475
   282
		/* IPvFuture */
ali@475
   283
		dot = strchr(ip_literal + 2, '.');
ali@475
   284
		if (!dot || dot >= ip_literal + length)
ali@475
   285
			return -1;
ali@475
   286
		for (s = ip_literal + 1; s < dot; s++) {
ali@475
   287
			if (!is_xdigit(*s))
ali@475
   288
				return -1;
ali@475
   289
		}
ali@475
   290
		for (s = dot + 1; s < ip_literal + length; s++) {
ali@475
   291
			if (!is_unreserved(*s) && !is_sub_delim(*s) && *s != ':')
ali@475
   292
				return -1;
ali@475
   293
		}
ali@475
   294
	} else {
ali@475
   295
		/* IPv6address */
ali@475
   296
		s = ip_literal;
ali@475
   297
		len = length;
ali@475
   298
		no_pieces = count_ipv6_pieces(&s, &len);
ali@475
   299
ali@475
   300
		if (len > 1 && s[0] == ':' && s[1] == ':') {
ali@475
   301
			s += 2;
ali@475
   302
			len -= 2;
ali@475
   303
			elide = 1;
ali@475
   304
			no_pieces += count_ipv6_pieces(&s, &len);
ali@475
   305
		} else
ali@475
   306
			elide = 0;
ali@475
   307
ali@475
   308
		if (!validate_ipv4address(s, len))
ali@475
   309
			no_pieces += 2;
ali@475
   310
		else if (len)
ali@475
   311
			return -1;
ali@475
   312
ali@475
   313
		if (no_pieces > 8 || no_pieces == 8 && elide || no_pieces < 1)
ali@475
   314
			return -1;
ali@475
   315
	}
ali@475
   316
ali@475
   317
	return 0;
ali@475
   318
}
ali@475
   319
ali@475
   320
static int validate_host(const char *host, struct razor_error **error)
ali@475
   321
{
ali@475
   322
	int retval;
ali@475
   323
ali@475
   324
	if (host[0] == '[' && host[strlen(host) - 1] == ']')
ali@475
   325
		retval = validate_ip_literal(host + 1, strlen(host) - 2);
ali@475
   326
	else {
ali@475
   327
		retval = validate_ipv4address(host, strlen(host));
ali@475
   328
		if (retval < 0)
ali@475
   329
			retval = validate_reg_name(host);
ali@475
   330
	}
ali@475
   331
ali@475
   332
	if (retval)
ali@475
   333
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   334
				RAZOR_GENERAL_ERROR_BAD_URI, host,
ali@475
   335
				"Invalid URI host");
ali@475
   336
ali@475
   337
	return retval;
ali@475
   338
}
ali@475
   339
ali@475
   340
static char *strdown(char *s)
ali@475
   341
{
ali@475
   342
	while (*s) {
ali@475
   343
		if (*s >= 'A' && *s <= 'Z') {
ali@475
   344
			*s -= 'A';
ali@475
   345
			*s += 'a';
ali@475
   346
		}
ali@475
   347
		s++;
ali@475
   348
	}
ali@475
   349
ali@475
   350
	return s;
ali@475
   351
}
ali@475
   352
ali@475
   353
static int razor_uri_parse_authority(struct razor_uri *ru,
ali@475
   354
				     const char *authority, int length,
ali@475
   355
				     struct razor_error **error)
ali@475
   356
{
ali@475
   357
	const char *s, *auth = authority;
ali@475
   358
	char *userinfo, *port, *host;
ali@475
   359
ali@475
   360
	s = strchr(auth, '@');
ali@475
   361
	if (s && s < auth + length) {
ali@475
   362
		userinfo = razor_strndup(auth, s - auth);
ali@475
   363
		s++;
ali@475
   364
		length -= s - auth;
ali@475
   365
		auth = s;
ali@475
   366
ali@475
   367
		if (validate_userinfo(userinfo, error)) {
ali@475
   368
			free(userinfo);
ali@475
   369
			return -1;
ali@475
   370
		}
ali@475
   371
	} else
ali@475
   372
		userinfo = NULL;
ali@475
   373
ali@475
   374
	s = strchr(auth, ':');
ali@475
   375
	if (s && s < auth + length) {
ali@475
   376
		s++;
ali@475
   377
		port = razor_strndup(s, length - (s - auth));
ali@475
   378
		s--;
ali@475
   379
		length = s - auth;
ali@475
   380
ali@475
   381
		if (strspn(port, "0123456789") != strlen(port)) {
ali@475
   382
			razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   383
					RAZOR_GENERAL_ERROR_BAD_URI, port,
ali@475
   384
					"Invalid URI port");
ali@475
   385
			free(userinfo);
ali@475
   386
			free(port);
ali@475
   387
			return -1;
ali@475
   388
		}
ali@475
   389
	} else
ali@475
   390
		port = NULL;
ali@475
   391
ali@475
   392
	host = razor_strndup(auth, length);
ali@475
   393
ali@475
   394
	if (validate_host(host, error)) {
ali@475
   395
		free(userinfo);
ali@475
   396
		free(port);
ali@475
   397
		free(host);
ali@475
   398
		return -1;
ali@475
   399
	}
ali@475
   400
ali@475
   401
	ru->userinfo = userinfo;
ali@475
   402
	ru->port = port;
ali@475
   403
	ru->host = host;
ali@475
   404
ali@475
   405
	return 0;
ali@475
   406
}
ali@475
   407
ali@475
   408
/*
ali@475
   409
 * Parse either a hier-part or a relative-part
ali@475
   410
 */
ali@475
   411
static int razor_uri_parse_part(struct razor_uri *ru, const char *part,
ali@475
   412
				int relative_part, struct razor_error **error)
ali@475
   413
{
ali@475
   414
	const char *s, *hp = part;
ali@475
   415
	char *path, *p;
ali@475
   416
	int noscheme = 0;
ali@475
   417
ali@475
   418
	if (hp[0] == '/' && hp[1] == '/') {
ali@475
   419
		hp += 2;
ali@475
   420
		s = strpbrk(hp, "/?#");
ali@475
   421
		if (!s)
ali@475
   422
			s = hp + strlen(hp);
ali@475
   423
		if (razor_uri_parse_authority(ru, hp, s - hp, error) < 0)
ali@475
   424
			return -1;
ali@475
   425
		hp = s;
ali@475
   426
	} else {
ali@475
   427
		ru->userinfo = NULL;
ali@475
   428
		ru->host = NULL;
ali@475
   429
		ru->port = NULL;
ali@475
   430
	}
ali@475
   431
ali@475
   432
	if (!*hp) {
ali@475
   433
		/* path-empty */
ali@475
   434
		ru->path = strdup("");
ali@475
   435
		return 0;
ali@475
   436
	} else if (*hp == '/') {
ali@475
   437
		/* path-absolute */
ali@475
   438
		p = path = malloc(strlen(hp) + 1);
ali@475
   439
		*p++ = '/';
ali@475
   440
		hp++;
ali@475
   441
		if (!*hp) {
ali@475
   442
			*p++ = '\0';
ali@475
   443
			ru->path = realloc(path, p - path);
ali@475
   444
			return 0;
ali@475
   445
		}
ali@475
   446
	} else if (!ru->host) {
ali@475
   447
		/* path-rootless or path-noscheme */
ali@475
   448
		noscheme = relative_part;
ali@475
   449
		p = path = malloc(strlen(hp) + 1);
ali@475
   450
	} else {
ali@475
   451
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   452
				RAZOR_GENERAL_ERROR_BAD_URI, part,
ali@475
   453
				relative_part ? "Invalid URI relative part" :
ali@475
   454
				"Invalid URI hierarchical part");
ali@475
   455
		return -1;
ali@475
   456
	}
ali@475
   457
ali@475
   458
	if (!is_pchar(*hp) || noscheme && *hp == ':') {
ali@475
   459
		free(path);
ali@475
   460
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   461
				RAZOR_GENERAL_ERROR_BAD_URI, part,
ali@475
   462
				"Invalid character in URI path");
ali@475
   463
		return -1;
ali@475
   464
	}
ali@475
   465
	*p++ = *hp++;
ali@475
   466
ali@475
   467
	while (*hp) {
ali@475
   468
		if (*hp == '/')
ali@475
   469
			noscheme = 0;
ali@475
   470
		else if (!is_pchar(*hp) || noscheme && *hp == ':') {
ali@475
   471
			free(path);
ali@475
   472
			razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   473
					RAZOR_GENERAL_ERROR_BAD_URI, part,
ali@475
   474
					"Invalid character in URI path");
ali@475
   475
			return -1;
ali@475
   476
		}
ali@475
   477
		*p++ = *hp++;
ali@475
   478
	}
ali@475
   479
ali@475
   480
	*p++ = '\0';
ali@475
   481
ali@475
   482
	ru->path = realloc(path, p - path);
ali@475
   483
ali@475
   484
	return 0;
ali@475
   485
}
ali@475
   486
ali@475
   487
void razor_uri_destroy(struct razor_uri *ru)
ali@475
   488
{
ali@475
   489
	free(ru->scheme);
ali@475
   490
	free(ru->userinfo);
ali@475
   491
	free(ru->host);
ali@475
   492
	free(ru->port);
ali@475
   493
	free(ru->path);
ali@475
   494
	free(ru->query);
ali@475
   495
	free(ru->fragment);
ali@475
   496
}
ali@475
   497
ali@475
   498
int razor_uri_parse_uri(struct razor_uri *ru, const char *uri, int absolute,
ali@475
   499
			struct razor_error **error)
ali@475
   500
{
ali@475
   501
	int r;
ali@475
   502
	const char *s;
ali@475
   503
	char *hier_part;
ali@475
   504
ali@475
   505
	if (pct_encoding_validate(uri) < 0) {
ali@475
   506
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   507
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
ali@475
   508
				"Invalid percent encoding");
ali@475
   509
		return -1;
ali@475
   510
	}
ali@475
   511
ali@475
   512
	memset(ru, 0, sizeof(*ru));
ali@475
   513
ali@475
   514
	s = skip_uri_scheme(uri);
ali@475
   515
	if (!s) {
ali@475
   516
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   517
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
ali@475
   518
				"Invalid URI scheme");
ali@475
   519
		return -1;
ali@475
   520
	}
ali@475
   521
	ru->scheme = razor_strndup(uri, s - uri);
ali@475
   522
	uri = s + 1;
ali@475
   523
ali@475
   524
	s = strchr(uri, '?');
ali@475
   525
	if (!s)
ali@475
   526
		s = strchr(uri, '#');
ali@475
   527
	if (!s)
ali@475
   528
		s = uri + strlen(uri);
ali@475
   529
	hier_part = razor_strndup(uri, s - uri);
ali@475
   530
	uri = s;
ali@475
   531
ali@475
   532
	r = razor_uri_parse_part(ru, hier_part, 0, error);
ali@475
   533
	free(hier_part);
ali@475
   534
	if (r) {
ali@475
   535
		razor_uri_destroy(ru);
ali@475
   536
		return -1;
ali@475
   537
	}
ali@475
   538
ali@475
   539
	if (*uri != '?')
ali@475
   540
		ru->query = NULL;
ali@475
   541
	else {
ali@475
   542
		uri++;
ali@475
   543
		s = strchr(uri, '#');
ali@475
   544
		if (!s)
ali@475
   545
			s = uri + strlen(uri);
ali@475
   546
		ru->query = razor_strndup(uri, s - uri);
ali@475
   547
		uri = s;
ali@475
   548
	}
ali@475
   549
ali@475
   550
	if (*uri != '#')
ali@475
   551
		ru->fragment = NULL;
ali@475
   552
	else if (absolute) {
ali@475
   553
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   554
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
ali@475
   555
				"Fragments are not allowed in absolute URIs");
ali@475
   556
		razor_uri_destroy(ru);
ali@475
   557
		return -1;
ali@475
   558
	} else {
ali@475
   559
		uri++;
ali@475
   560
		ru->fragment = strdup(uri);
ali@475
   561
	}
ali@475
   562
ali@475
   563
	return 0;
ali@475
   564
}
ali@475
   565
ali@475
   566
int razor_uri_parse_relative_ref(struct razor_uri *ru, const char *uri,
ali@475
   567
				 struct razor_error **error)
ali@475
   568
{
ali@475
   569
	int r;
ali@475
   570
	const char *s;
ali@475
   571
	char *relative_part;
ali@475
   572
ali@475
   573
	if (pct_encoding_validate(uri) < 0) {
ali@475
   574
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   575
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
ali@475
   576
				"Invalid percent encoding");
ali@475
   577
		return -1;
ali@475
   578
	}
ali@475
   579
ali@475
   580
	memset(ru, 0, sizeof(*ru));
ali@475
   581
ali@475
   582
	s = strchr(uri, '?');
ali@475
   583
	if (!s)
ali@475
   584
		s = strchr(uri, '#');
ali@475
   585
	if (!s)
ali@475
   586
		s = uri + strlen(uri);
ali@475
   587
	relative_part = razor_strndup(uri, s - uri);
ali@475
   588
	uri = s;
ali@475
   589
ali@475
   590
	r = razor_uri_parse_part(ru, relative_part, 1, error);
ali@475
   591
	free(relative_part);
ali@475
   592
	if (r)
ali@475
   593
		return -1;
ali@475
   594
ali@475
   595
	if (*uri == '?') {
ali@475
   596
		uri++;
ali@475
   597
		s = strchr(uri, '#');
ali@475
   598
		if (!s)
ali@475
   599
			s = uri + strlen(uri);
ali@475
   600
		ru->query = razor_strndup(uri, s - uri);
ali@475
   601
		uri = s;
ali@475
   602
	} else
ali@475
   603
		ru->query = NULL;
ali@475
   604
ali@475
   605
	if (*uri == '#') {
ali@475
   606
		uri++;
ali@475
   607
		ru->fragment = strdup(uri);
ali@475
   608
	} else
ali@475
   609
		ru->fragment = NULL;
ali@475
   610
ali@475
   611
	return 0;
ali@475
   612
}
ali@475
   613
ali@475
   614
int razor_uri_parse(struct razor_uri *ru, const char *uri,
ali@475
   615
		    struct razor_error **error)
ali@475
   616
{
ali@475
   617
	struct razor_error *tmp_error = NULL;
ali@475
   618
	int r;
ali@475
   619
ali@475
   620
	r = razor_uri_parse_uri(ru, uri, 0, &tmp_error);
ali@475
   621
	if (r < 0) {
ali@475
   622
		r = razor_uri_parse_relative_ref(ru, uri, NULL);
ali@475
   623
		if (r < 0)
ali@475
   624
			razor_propagate_error(error, tmp_error, NULL);
ali@475
   625
		else
ali@475
   626
			razor_error_free(tmp_error);
ali@475
   627
	}
ali@475
   628
ali@475
   629
	return r;
ali@475
   630
}
ali@475
   631
ali@475
   632
/*
ali@475
   633
 * Following RFC 3986 § 5.2.4
ali@475
   634
 */
ali@475
   635
static char *remove_dot_segments(const char *path)
ali@475
   636
{
ali@475
   637
	struct array output;
ali@475
   638
	char *input, *in, *s, *t;
ali@475
   639
	const char *step;
ali@475
   640
ali@475
   641
#ifdef DEBUG
ali@475
   642
	fprintf(stderr, "STEP   OUTPUT BUFFER         INPUT BUFFER\n");
ali@475
   643
#endif
ali@475
   644
ali@475
   645
	input = strdup(path);
ali@475
   646
	in = input;
ali@475
   647
	string_init(&output);
ali@475
   648
ali@475
   649
#ifdef DEBUG
ali@475
   650
	fprintf(stderr, " 1 :   %-21s %s\n", string_str(&output), in);
ali@475
   651
#endif
ali@475
   652
ali@475
   653
	while (*in) {
ali@475
   654
		if (str_has_prefix(in, "../")) {
ali@475
   655
			step = "2A";
ali@475
   656
			in += 3;
ali@475
   657
		} else if (str_has_prefix(in, "./")) {
ali@475
   658
			step = "2A";
ali@475
   659
			in += 2;
ali@475
   660
		} else if (str_has_prefix(in, "/./")) {
ali@475
   661
			step = "2B";
ali@475
   662
			in += 2;
ali@475
   663
		} else if (!strcmp(in, "/.")) {
ali@475
   664
			step = "2B";
ali@475
   665
			in++;
ali@475
   666
			*in = '/';
ali@475
   667
		} else if (str_has_prefix(in, "/../")) {
ali@475
   668
			step = "2C";
ali@475
   669
			in += 3;
ali@475
   670
			s = strrchr(string_str(&output), '/');
ali@475
   671
			if (!s)
ali@475
   672
				s = string_str(&output);
ali@475
   673
			string_truncate_at(&output, s);
ali@475
   674
		} else if (!strcmp(in, "/..")) {
ali@475
   675
			step = "2C";
ali@475
   676
			in += 2;
ali@475
   677
			*in = '/';
ali@475
   678
			s = strrchr(string_str(&output), '/');
ali@475
   679
			if (!s)
ali@475
   680
				s = string_str(&output);
ali@475
   681
			string_truncate_at(&output, s);
ali@475
   682
		} else if (!strcmp(in, ".") || !strcmp(in, "..")) {
ali@475
   683
			step = "2D";
ali@475
   684
			in += strlen(in);
ali@475
   685
		} else {
ali@475
   686
			step = "2E";
ali@475
   687
			t = strchr(in + 1, '/');
ali@475
   688
			if (!t)
ali@475
   689
				t = in + strlen(in);
ali@475
   690
			string_append_len(&output, in, t - in);
ali@475
   691
			in = t;
ali@475
   692
		}
ali@475
   693
#ifdef DEBUG
ali@475
   694
		fprintf(stderr, " %s:   %-21s %s\n", step, string_str(&output),
ali@475
   695
			in);
ali@475
   696
#endif
ali@475
   697
	}
ali@475
   698
ali@475
   699
	free(input);
ali@475
   700
	return string_str(&output);
ali@475
   701
}
ali@475
   702
ali@475
   703
/*
ali@475
   704
 * Following RFC 3986 § 6.2.2
ali@475
   705
 */
ali@475
   706
void razor_uri_normalize(struct razor_uri *ru)
ali@475
   707
{
ali@475
   708
	char *s;
ali@475
   709
ali@475
   710
	strdown(ru->scheme);
ali@475
   711
	if (ru->host)
ali@475
   712
		strdown(ru->host);
ali@475
   713
ali@475
   714
	s = pct_encoding_normalize(ru->userinfo);
ali@475
   715
	free(ru->userinfo);
ali@475
   716
	ru->userinfo = s;
ali@475
   717
ali@475
   718
	s = pct_encoding_normalize(ru->host);
ali@475
   719
	free(ru->host);
ali@475
   720
	ru->host = s;
ali@475
   721
ali@475
   722
	s = pct_encoding_normalize(ru->path);
ali@475
   723
	free(ru->path);
ali@475
   724
	ru->path = s;
ali@475
   725
ali@475
   726
	s = pct_encoding_normalize(ru->query);
ali@475
   727
	free(ru->query);
ali@475
   728
	ru->query = s;
ali@475
   729
ali@475
   730
	s = pct_encoding_normalize(ru->fragment);
ali@475
   731
	free(ru->fragment);
ali@475
   732
	ru->fragment = s;
ali@475
   733
ali@475
   734
	s = remove_dot_segments(ru->path);
ali@475
   735
	free(ru->path);
ali@475
   736
	ru->path = s;
ali@475
   737
}
ali@475
   738
ali@475
   739
char *razor_uri_get_authority(const struct razor_uri *ru)
ali@475
   740
{
ali@475
   741
	char *result, *r;
ali@475
   742
	int len = 1;
ali@475
   743
ali@475
   744
	if (ru->host) {
ali@475
   745
		if (ru->userinfo)
ali@475
   746
			len += strlen(ru->userinfo) + 1;
ali@475
   747
		len += strlen(ru->host);
ali@475
   748
		if (ru->port)
ali@475
   749
			len += strlen(ru->port) + 1;
ali@475
   750
	} else
ali@475
   751
		return NULL;
ali@475
   752
ali@475
   753
	r = result = malloc(len);
ali@475
   754
ali@475
   755
	if (ru->userinfo) {
ali@475
   756
		strcpy(r, ru->userinfo);
ali@475
   757
		r += strlen(r);
ali@475
   758
		*r++ = '@';
ali@475
   759
	}
ali@475
   760
ali@475
   761
	strcpy(r, ru->host);
ali@475
   762
	r += strlen(r);
ali@475
   763
ali@475
   764
	if (ru->port) {
ali@475
   765
		*r++ = ':';
ali@475
   766
		strcpy(r, ru->port);
ali@475
   767
	}
ali@475
   768
ali@475
   769
	return result;
ali@475
   770
}
ali@475
   771
ali@475
   772
/*
ali@475
   773
 * Following RFC 3986 § 5.3
ali@475
   774
 */
ali@475
   775
char *razor_uri_recompose(const struct razor_uri *ru)
ali@475
   776
{
ali@475
   777
	char *authority, *result, *r;
ali@475
   778
	int len = 1;
ali@475
   779
ali@475
   780
	authority = razor_uri_get_authority(ru);
ali@475
   781
ali@475
   782
	if (ru->scheme)
ali@475
   783
		len += strlen(ru->scheme) + 1;
ali@475
   784
	if (authority)
ali@475
   785
		len += strlen(authority) + 2;
ali@475
   786
	len += strlen(ru->path);
ali@475
   787
	if (ru->query)
ali@475
   788
		len += strlen(ru->query) + 1;
ali@475
   789
	if (ru->fragment)
ali@475
   790
		len += strlen(ru->fragment) + 1;
ali@475
   791
ali@475
   792
	r = result = malloc(len);
ali@475
   793
ali@475
   794
	if (ru->scheme) {
ali@475
   795
		strcpy(r, ru->scheme);
ali@475
   796
		r += strlen(r);
ali@475
   797
		*r++ = ':';
ali@475
   798
	}
ali@475
   799
ali@475
   800
	if (authority) {
ali@475
   801
		*r++ = '/';
ali@475
   802
		*r++ = '/';
ali@475
   803
		strcpy(r, authority);
ali@475
   804
		free(authority);
ali@475
   805
		r += strlen(r);
ali@475
   806
	}
ali@475
   807
ali@475
   808
	strcpy(r, ru->path);
ali@475
   809
	r += strlen(r);
ali@475
   810
ali@475
   811
	if (ru->query) {
ali@475
   812
		*r++ = '?';
ali@475
   813
		strcpy(r, ru->query);
ali@475
   814
		r += strlen(r);
ali@475
   815
	}
ali@475
   816
ali@475
   817
	if (ru->fragment) {
ali@475
   818
		*r++ = '#';
ali@475
   819
		strcpy(r, ru->fragment);
ali@475
   820
	}
ali@475
   821
ali@475
   822
	return result;
ali@475
   823
}
ali@475
   824
ali@475
   825
/*
ali@475
   826
 * Following RFC 3986 § 5.2.3
ali@475
   827
 */
ali@475
   828
static char *merge_paths(const struct razor_uri *base,const struct razor_uri *R)
ali@475
   829
{
ali@475
   830
	char *s, *t, *path;
ali@475
   831
ali@475
   832
	if (base->host && !*base->path)
ali@475
   833
		path = razor_concat("/", R->path, NULL);
ali@475
   834
	else {
ali@475
   835
		s = strrchr(base->path, '/');
ali@475
   836
		if (s) {
ali@475
   837
			t = razor_strndup(base->path, s + 1 - base->path);
ali@475
   838
			path = razor_concat(t, R->path, NULL);
ali@475
   839
			free(t);
ali@475
   840
		} else
ali@475
   841
			path = strdup(R->path);
ali@475
   842
	}
ali@475
   843
ali@475
   844
	return path;
ali@475
   845
}
ali@475
   846
ali@475
   847
/*
ali@475
   848
 * Following RFC 3986 § 5.2
ali@475
   849
 */
ali@475
   850
void razor_uri_resolve(struct razor_uri *T, const struct razor_uri *base,
ali@475
   851
		       const struct razor_uri *R)
ali@475
   852
{
ali@475
   853
	char *s;
ali@475
   854
ali@475
   855
	if (R->scheme) {
ali@475
   856
		T->scheme = strdup(R->scheme);
ali@475
   857
		T->userinfo = strdup0(R->userinfo);
ali@475
   858
		T->host = strdup0(R->host);
ali@475
   859
		T->port = strdup0(R->port);
ali@475
   860
		T->path = remove_dot_segments(R->path);
ali@475
   861
		T->query = strdup0(R->query);
ali@475
   862
	} else {
ali@475
   863
		if (R->host) {
ali@475
   864
			T->userinfo = strdup0(R->userinfo);
ali@475
   865
			T->host = strdup0(R->host);
ali@475
   866
			T->port = strdup0(R->port);
ali@475
   867
			T->path = remove_dot_segments(R->path);
ali@475
   868
			T->query = strdup0(R->query);
ali@475
   869
		} else {
ali@475
   870
			if (!*R->path) {
ali@475
   871
				T->path = strdup(base->path);
ali@475
   872
				if (R->query)
ali@475
   873
					T->query = strdup(R->query);
ali@475
   874
				else
ali@475
   875
					T->query = strdup0(base->query);
ali@475
   876
			} else {
ali@475
   877
				if (*R->path == '/')
ali@475
   878
					T->path = remove_dot_segments(R->path);
ali@475
   879
				else {
ali@475
   880
					s = merge_paths(base, R);
ali@475
   881
					T->path = remove_dot_segments(s);
ali@475
   882
					free(s);
ali@475
   883
				}
ali@475
   884
				T->query = strdup0(R->query);
ali@475
   885
			}
ali@475
   886
			T->userinfo = strdup0(base->userinfo);
ali@475
   887
			T->host = strdup0(base->host);
ali@475
   888
			T->port = strdup0(base->port);
ali@475
   889
		}
ali@475
   890
		T->scheme = strdup(base->scheme);
ali@475
   891
	}
ali@475
   892
	T->fragment = strdup0(R->fragment);
ali@475
   893
}
ali@475
   894
ali@475
   895
/*
ali@475
   896
 * This differs from razor_uri_resolve() both in the types of its arguments
ali@475
   897
 * and in the fact that it takes a root URI rather than a base URI. The base
ali@475
   898
 * URI is determined by appending a slash to the root URI (if it doesn't
ali@475
   899
 * already end in a slash). Finally, uri can be explicitly marked as either
ali@475
   900
 * relative (ie., a relative-ref) or not (ie., a URI). This is important as
ali@475
   901
 * otherwise "c:/xxx" could be interpreted as a URI in the "c" scheme.
ali@475
   902
 */
ali@475
   903
char *razor_resolve_uri_root(const char *root_uri, const char *uri,
ali@475
   904
			     int is_relative, struct razor_error **error)
ali@475
   905
{
ali@475
   906
	int r;
ali@475
   907
        char *base_uri, *s, *result;
ali@475
   908
	struct razor_uri ru, base, file;
ali@475
   909
ali@475
   910
	if (!root_uri || !*root_uri)
ali@475
   911
		root_uri = "file:/";
ali@475
   912
ali@475
   913
	if (root_uri[strlen(root_uri) - 1] == '/')
ali@475
   914
		base_uri = strdup(root_uri);
ali@475
   915
	else
ali@475
   916
		base_uri = razor_concat(root_uri, "/", NULL);
ali@475
   917
ali@475
   918
	r = razor_uri_parse_uri(&base, base_uri, 1, error);
ali@475
   919
	free(base_uri);
ali@475
   920
	if (r)
ali@475
   921
		return NULL;
ali@475
   922
ali@475
   923
	if (is_relative > 0) {
ali@475
   924
		/*
ali@475
   925
		 * We can't use razor_uri_parse_relative_ref() to parse
ali@475
   926
		 * uri in case it starts with a segment that includes a
ali@475
   927
		 * colon. Thus we use this kludge.
ali@475
   928
		 */
ali@475
   929
		s = razor_concat("scheme:", uri, NULL);
ali@475
   930
		r = razor_uri_parse_uri(&file, s, 0, error);
ali@475
   931
		free(s);
ali@475
   932
		if (!r) {
ali@475
   933
			free(file.scheme);
ali@475
   934
			file.scheme = NULL;
ali@475
   935
		}
ali@475
   936
	}
ali@475
   937
	else if (!is_relative)
ali@475
   938
		r = razor_uri_parse_uri(&file, uri, 0, error);
ali@475
   939
	else
ali@475
   940
		r = razor_uri_parse(&file, uri, error);
ali@475
   941
	if (r) {
ali@475
   942
		razor_uri_destroy(&base);
ali@475
   943
		return NULL;
ali@475
   944
	}
ali@475
   945
ali@475
   946
	razor_uri_resolve(&ru, &base, &file);
ali@475
   947
ali@475
   948
	razor_uri_destroy(&base);
ali@475
   949
	razor_uri_destroy(&file);
ali@475
   950
ali@475
   951
	result = razor_uri_recompose(&ru);
ali@475
   952
ali@475
   953
	razor_uri_destroy(&ru);
ali@475
   954
ali@475
   955
	return result;
ali@475
   956
}