librazor/path.c
author J. Ali Harlow <ali@juiblex.co.uk>
Tue Apr 24 19:27:29 2018 +0100 (2018-04-24)
changeset 496 203fa998c6df
parent 479 4204db81cdbc
permissions -rw-r--r--
Support expat v2.2
ali@459
     1
/*
ali@475
     2
 * Copyright (C) 2014, 2016  J. Ali Harlow <ali@juiblex.co.uk>
ali@459
     3
 *
ali@459
     4
 * This program is free software; you can redistribute it and/or modify
ali@459
     5
 * it under the terms of the GNU General Public License as published by
ali@459
     6
 * the Free Software Foundation; either version 2 of the License, or
ali@459
     7
 * (at your option) any later version.
ali@459
     8
 *
ali@459
     9
 * This program is distributed in the hope that it will be useful,
ali@459
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ali@459
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
ali@459
    12
 * GNU General Public License for more details.
ali@459
    13
 *
ali@459
    14
 * You should have received a copy of the GNU General Public License along
ali@459
    15
 * with this program; if not, write to the Free Software Foundation, Inc.,
ali@459
    16
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
ali@459
    17
 */
ali@459
    18
ali@459
    19
#include "config.h"
ali@459
    20
#include <stdlib.h>
ali@459
    21
#include <string.h>
ali@475
    22
#include <ctype.h>
ali@459
    23
#include "razor.h"
ali@459
    24
#include "razor-internal.h"
ali@475
    25
#include "uri.h"
ali@459
    26
ali@459
    27
static int valid_unicode(unsigned unicode)
ali@459
    28
{
ali@459
    29
	/*
ali@459
    30
	 * Within the U+0000..U+10FFFF range defined by RFC3629
ali@459
    31
	 * but not in the U+D800..U+DFFF range prohibited in UTF-8.
ali@459
    32
	 */
ali@459
    33
	return unicode < 0xD800 || (unicode >= 0xE000 && unicode < 0x110000);
ali@459
    34
}
ali@459
    35
ali@475
    36
char *razor_path_from_parsed_uri(const struct razor_uri *ru,
ali@475
    37
  struct razor_error **error)
ali@459
    38
{
ali@459
    39
	int continuation_bytes = 0;
ali@475
    40
	char *path, *p, *s, *uri;
ali@459
    41
	unsigned char c;
ali@459
    42
	unsigned unicode;
ali@459
    43
ali@475
    44
	if (!ru->scheme) {
ali@475
    45
		uri = razor_uri_recompose(ru);
ali@475
    46
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
    47
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
ali@475
    48
				"URI does not include a scheme");
ali@475
    49
		free(uri);
ali@459
    50
		return NULL;
ali@475
    51
	}
ali@459
    52
ali@475
    53
	if (strcmp(ru->scheme, "file")) {
ali@475
    54
		uri = razor_uri_recompose(ru);
ali@475
    55
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
    56
				RAZOR_GENERAL_ERROR_UNSUPPORTED_URI, uri,
ali@475
    57
				"Not a file URI");
ali@475
    58
		free(uri);
ali@459
    59
		return NULL;
ali@475
    60
	}
ali@459
    61
ali@475
    62
	if (ru->host && *ru->host && strcmp(ru->host, "localhost") ||
ali@475
    63
	    ru->userinfo || ru->port) {
ali@475
    64
		uri = razor_uri_recompose(ru);
ali@475
    65
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
    66
				RAZOR_GENERAL_ERROR_UNSUPPORTED_URI, uri,
ali@475
    67
				"URI refers to a non-local file");
ali@475
    68
		free(uri);
ali@475
    69
		return NULL;
ali@475
    70
	}
ali@475
    71
ali@475
    72
	s = ru->path;
ali@459
    73
#ifdef MSWIN_API
ali@459
    74
	/*
ali@475
    75
	 * Under MS-Windows, a path of /c:/xxx maps to c:/xxx
ali@459
    76
	 * Note that PathCreateFromUrl converts / to \ as well.
ali@459
    77
	 */
ali@475
    78
	if (s[0] == '/' && is_alpha(s[1]) && s[2] == ':' && s[3] == '/')
ali@475
    79
		s++;
ali@459
    80
#endif
ali@459
    81
ali@475
    82
	p = path = malloc(strlen(s) + 1);
ali@459
    83
ali@475
    84
	while (*s) {
ali@475
    85
		if (*s >= 0x7F || *s < 0x20)
ali@475
    86
			break;
ali@475
    87
		else if (*s != '%') {
ali@475
    88
			if (continuation_bytes)
ali@475
    89
				break;
ali@475
    90
			else
ali@475
    91
				*p++ = *s++;
ali@475
    92
		} else {
ali@475
    93
			c = pchar_get_char(s);
ali@475
    94
#ifdef MSWIN_API
ali@475
    95
			if (c == '/' || c == '\\')
ali@475
    96
#else
ali@475
    97
			if (c == '/')
ali@475
    98
#endif
ali@475
    99
				break;
ali@475
   100
			else if (!continuation_bytes) {
ali@475
   101
				if (c >= 0xF5 || c == 0xC0 || c == 0xC1)
ali@475
   102
					break;
ali@475
   103
				else if (c >= 0xF0) {
ali@459
   104
					unicode = c & 7;
ali@459
   105
					continuation_bytes = 3;
ali@459
   106
				} else if (c >= 0xE0) {
ali@459
   107
					unicode = c & 3;
ali@459
   108
					continuation_bytes = 2;
ali@459
   109
				} else if (c >= 0xC0) {
ali@459
   110
					unicode = c & 1;
ali@459
   111
					continuation_bytes = 1;
ali@459
   112
				}
ali@475
   113
			} else if ((c & 0xC0) != 0x80)
ali@475
   114
				break;
ali@475
   115
			else {
ali@459
   116
				unicode <<= 6;
ali@459
   117
				unicode |= (c & 0x3F);
ali@459
   118
ali@459
   119
				if (!--continuation_bytes &&
ali@475
   120
				    !valid_unicode(unicode))
ali@475
   121
					break;
ali@459
   122
			}
ali@459
   123
ali@459
   124
			*p++ = c;
ali@475
   125
			s += 3;
ali@459
   126
		}
ali@459
   127
	}
ali@459
   128
ali@475
   129
	if (*s || continuation_bytes) {
ali@475
   130
		uri = razor_uri_recompose(ru);
ali@475
   131
		razor_set_error(error, RAZOR_GENERAL_ERROR,
ali@475
   132
				RAZOR_GENERAL_ERROR_BAD_URI,
ali@475
   133
				uri, "Illegal character in file URI path");
ali@475
   134
		free(uri);
ali@459
   135
		free(path);
ali@459
   136
		return NULL;
ali@459
   137
	}
ali@459
   138
ali@459
   139
	*p++ = '\0';
ali@459
   140
ali@459
   141
	return realloc(path, p - path);
ali@459
   142
}
ali@459
   143
ali@475
   144
RAZOR_EXPORT char *razor_path_from_uri(const char *uri,
ali@475
   145
  struct razor_error **error)
ali@475
   146
{
ali@475
   147
	struct razor_uri ru;
ali@475
   148
	char *path;
ali@475
   149
ali@475
   150
	if (razor_uri_parse(&ru, uri, error))
ali@475
   151
		return NULL;
ali@475
   152
ali@475
   153
	path = razor_path_from_parsed_uri(&ru, error);
ali@475
   154
ali@475
   155
	razor_uri_destroy(&ru);
ali@475
   156
ali@475
   157
	return path;
ali@475
   158
}
ali@475
   159
ali@475
   160
RAZOR_EXPORT char *razor_path_to_uri(const char *path)
ali@475
   161
{
ali@491
   162
	char *uri, *s;
ali@491
   163
	const char *p;
ali@491
   164
	int check_dotdot, len;
ali@491
   165
	struct razor_uri ru;
ali@475
   166
ali@491
   167
	uri = malloc(5 + (4 - 3) + 4 + 3 * strlen(path) + 1);
ali@475
   168
ali@475
   169
	strcpy(uri, "file:");
ali@475
   170
ali@491
   171
	s = uri + 5;
ali@491
   172
ali@491
   173
#ifdef MSWIN_API
ali@491
   174
	check_dotdot = path[0] != '/' && path[0] != '\\';
ali@491
   175
#else
ali@491
   176
	check_dotdot = path[0] != '/';
ali@491
   177
#endif
ali@491
   178
ali@491
   179
	p = path;
ali@475
   180
ali@475
   181
#ifdef MSWIN_API
ali@475
   182
	/*
ali@475
   183
	 * Under MS-Windows, c:/xxx maps to a path of /c:/xxx
ali@491
   184
	 * Relative paths that include a drive letter (eg., c:xxx)
ali@491
   185
	 * can't be handled directly and have to be converted
ali@491
   186
	 * to absolute form.
ali@475
   187
	 */
ali@491
   188
	if (is_alpha(p[0]) && p[1] == ':') {
ali@491
   189
		if (p[2] == '/' || p[2] == '\\') {
ali@491
   190
			*s++ = '/';
ali@491
   191
			*s++ = p[0];
ali@491
   192
			*s++ = ':';
ali@491
   193
			*s++ = '/';
ali@491
   194
			p += 3;
ali@491
   195
			/*
ali@491
   196
			 * We need to take care that ".." segments don't remove
ali@491
   197
			 * the drive letter (eg., c:/../xxx -> file:/c:/../xxx
ali@491
   198
			 * which normalizes to file:/xxx).
ali@491
   199
			 */
ali@491
   200
			check_dotdot = 2;
ali@491
   201
		} else {
ali@491
   202
			s = razor_abspath(p);
ali@491
   203
			uri = razor_path_to_uri(s);
ali@491
   204
			free(s);
ali@491
   205
			return uri;
ali@491
   206
		}
ali@491
   207
	}
ali@475
   208
#endif
ali@475
   209
ali@491
   210
	/*
ali@491
   211
	 * Relative paths are complicated. URIs can't have dot segments
ali@491
   212
	 * so these will be removed during normalization. That often does
ali@491
   213
	 * the right thing, but where a relative path traverses up the
ali@491
   214
	 * tree then the result is a URI that points to somewhere quite
ali@491
   215
	 * different to path: eg., file:../dir normalizes to file:dir
ali@491
   216
	 * We solve this by inserting a sentinel segment at the beginning.
ali@491
   217
	 * If the segment is still present after normalization, then it
ali@491
   218
	 * can just be removed. If it is missing, then we need to create
ali@491
   219
	 * an absolute path and redo the conversion.
ali@491
   220
	 */
ali@491
   221
	if (check_dotdot) {
ali@491
   222
		*s++ = '%';
ali@491
   223
		*s++ = '2';
ali@491
   224
		*s++ = 'F';
ali@491
   225
		*s++ = '/';
ali@491
   226
	}
ali@491
   227
ali@491
   228
	while(*p) {
ali@491
   229
		if (*p == '/' || is_unreserved(*p) || is_sub_delim(*p) ||
ali@491
   230
		    *p == ':' || *p == '@')
ali@491
   231
			*s++ = *p;
ali@475
   232
#ifdef MSWIN_API
ali@491
   233
		else if (*p == '\\')
ali@491
   234
			*s++ = '/';
ali@475
   235
#endif
ali@475
   236
		else {
ali@491
   237
			*s++ = '%';
ali@491
   238
			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)/16];
ali@491
   239
			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)%16];
ali@475
   240
		}
ali@491
   241
		p++;
ali@475
   242
	}
ali@491
   243
	*s++ = '\0';
ali@475
   244
ali@491
   245
	if (razor_uri_parse(&ru, uri, NULL) < 0) {
ali@491
   246
		free(uri);
ali@491
   247
		return NULL;
ali@491
   248
	}
ali@491
   249
	free(uri);
ali@491
   250
ali@491
   251
	razor_uri_normalize(&ru);
ali@491
   252
ali@491
   253
	uri = razor_uri_recompose(&ru);
ali@491
   254
ali@491
   255
	razor_uri_destroy(&ru);
ali@491
   256
ali@491
   257
	if (check_dotdot == 2) {
ali@491
   258
		s = strdup("file:/x:/%2F/");
ali@491
   259
		s[6] = path[0];
ali@491
   260
		if (str_has_prefix(uri, s)) {
ali@491
   261
			free(s);
ali@491
   262
			memmove(uri + 5 + 3, uri + 9 + 3,
ali@491
   263
				strlen(uri + 9 + 3) + 1);
ali@491
   264
			uri = realloc(uri, strlen(uri) + 1);
ali@491
   265
		} else {
ali@491
   266
			free(s);
ali@491
   267
			free(uri);
ali@491
   268
			s = razor_abspath(path);
ali@491
   269
			uri = razor_path_to_uri(s);
ali@491
   270
			free(s);
ali@491
   271
		}
ali@491
   272
	} else if (check_dotdot) {
ali@491
   273
		if (str_has_prefix(uri, "file:%2F/")) {
ali@491
   274
			memmove(uri + 5, uri + 9, strlen(uri + 9) + 1);
ali@491
   275
			uri = realloc(uri, strlen(uri) + 1);
ali@491
   276
		} else {
ali@491
   277
			free(uri);
ali@491
   278
			s = razor_abspath(path);
ali@491
   279
			uri = razor_path_to_uri(s);
ali@491
   280
			free(s);
ali@491
   281
		}
ali@491
   282
	}
ali@491
   283
ali@491
   284
	return uri;
ali@475
   285
}
ali@475
   286
ali@475
   287
RAZOR_EXPORT char *
ali@475
   288
razor_path_relative_to_uri(const char *uri, const char *path,
ali@475
   289
			   struct razor_error **error)
ali@475
   290
{
ali@475
   291
	char *rel_uri, *result;
ali@475
   292
ali@475
   293
	/* Strictly wrong if uri isn't a file URI, but probably okay */
ali@475
   294
	rel_uri = razor_path_to_uri(path);
ali@475
   295
ali@475
   296
	result = razor_resolve_uri_root(uri, rel_uri + 5, 1, error);
ali@475
   297
ali@475
   298
	free(rel_uri);
ali@475
   299
ali@475
   300
	return result;
ali@475
   301
}