librazor/path.c
author J. Ali Harlow <ali@juiblex.co.uk>
Fri Jun 08 18:02:49 2018 +0100 (2018-06-08)
changeset 501 850be6a6885c
parent 479 4204db81cdbc
permissions -rw-r--r--
Added tag 0.7 for changeset f98d77376544
     1 /*
     2  * Copyright (C) 2014, 2016  J. Ali Harlow <ali@juiblex.co.uk>
     3  *
     4  * This program is free software; you can redistribute it and/or modify
     5  * it under the terms of the GNU General Public License as published by
     6  * the Free Software Foundation; either version 2 of the License, or
     7  * (at your option) any later version.
     8  *
     9  * This program is distributed in the hope that it will be useful,
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12  * GNU General Public License for more details.
    13  *
    14  * You should have received a copy of the GNU General Public License along
    15  * with this program; if not, write to the Free Software Foundation, Inc.,
    16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    17  */
    18 
    19 #include "config.h"
    20 #include <stdlib.h>
    21 #include <string.h>
    22 #include <ctype.h>
    23 #include "razor.h"
    24 #include "razor-internal.h"
    25 #include "uri.h"
    26 
    27 static int valid_unicode(unsigned unicode)
    28 {
    29 	/*
    30 	 * Within the U+0000..U+10FFFF range defined by RFC3629
    31 	 * but not in the U+D800..U+DFFF range prohibited in UTF-8.
    32 	 */
    33 	return unicode < 0xD800 || (unicode >= 0xE000 && unicode < 0x110000);
    34 }
    35 
    36 char *razor_path_from_parsed_uri(const struct razor_uri *ru,
    37   struct razor_error **error)
    38 {
    39 	int continuation_bytes = 0;
    40 	char *path, *p, *s, *uri;
    41 	unsigned char c;
    42 	unsigned unicode;
    43 
    44 	if (!ru->scheme) {
    45 		uri = razor_uri_recompose(ru);
    46 		razor_set_error(error, RAZOR_GENERAL_ERROR,
    47 				RAZOR_GENERAL_ERROR_BAD_URI, uri,
    48 				"URI does not include a scheme");
    49 		free(uri);
    50 		return NULL;
    51 	}
    52 
    53 	if (strcmp(ru->scheme, "file")) {
    54 		uri = razor_uri_recompose(ru);
    55 		razor_set_error(error, RAZOR_GENERAL_ERROR,
    56 				RAZOR_GENERAL_ERROR_UNSUPPORTED_URI, uri,
    57 				"Not a file URI");
    58 		free(uri);
    59 		return NULL;
    60 	}
    61 
    62 	if (ru->host && *ru->host && strcmp(ru->host, "localhost") ||
    63 	    ru->userinfo || ru->port) {
    64 		uri = razor_uri_recompose(ru);
    65 		razor_set_error(error, RAZOR_GENERAL_ERROR,
    66 				RAZOR_GENERAL_ERROR_UNSUPPORTED_URI, uri,
    67 				"URI refers to a non-local file");
    68 		free(uri);
    69 		return NULL;
    70 	}
    71 
    72 	s = ru->path;
    73 #ifdef MSWIN_API
    74 	/*
    75 	 * Under MS-Windows, a path of /c:/xxx maps to c:/xxx
    76 	 * Note that PathCreateFromUrl converts / to \ as well.
    77 	 */
    78 	if (s[0] == '/' && is_alpha(s[1]) && s[2] == ':' && s[3] == '/')
    79 		s++;
    80 #endif
    81 
    82 	p = path = malloc(strlen(s) + 1);
    83 
    84 	while (*s) {
    85 		if (*s >= 0x7F || *s < 0x20)
    86 			break;
    87 		else if (*s != '%') {
    88 			if (continuation_bytes)
    89 				break;
    90 			else
    91 				*p++ = *s++;
    92 		} else {
    93 			c = pchar_get_char(s);
    94 #ifdef MSWIN_API
    95 			if (c == '/' || c == '\\')
    96 #else
    97 			if (c == '/')
    98 #endif
    99 				break;
   100 			else if (!continuation_bytes) {
   101 				if (c >= 0xF5 || c == 0xC0 || c == 0xC1)
   102 					break;
   103 				else if (c >= 0xF0) {
   104 					unicode = c & 7;
   105 					continuation_bytes = 3;
   106 				} else if (c >= 0xE0) {
   107 					unicode = c & 3;
   108 					continuation_bytes = 2;
   109 				} else if (c >= 0xC0) {
   110 					unicode = c & 1;
   111 					continuation_bytes = 1;
   112 				}
   113 			} else if ((c & 0xC0) != 0x80)
   114 				break;
   115 			else {
   116 				unicode <<= 6;
   117 				unicode |= (c & 0x3F);
   118 
   119 				if (!--continuation_bytes &&
   120 				    !valid_unicode(unicode))
   121 					break;
   122 			}
   123 
   124 			*p++ = c;
   125 			s += 3;
   126 		}
   127 	}
   128 
   129 	if (*s || continuation_bytes) {
   130 		uri = razor_uri_recompose(ru);
   131 		razor_set_error(error, RAZOR_GENERAL_ERROR,
   132 				RAZOR_GENERAL_ERROR_BAD_URI,
   133 				uri, "Illegal character in file URI path");
   134 		free(uri);
   135 		free(path);
   136 		return NULL;
   137 	}
   138 
   139 	*p++ = '\0';
   140 
   141 	return realloc(path, p - path);
   142 }
   143 
   144 RAZOR_EXPORT char *razor_path_from_uri(const char *uri,
   145   struct razor_error **error)
   146 {
   147 	struct razor_uri ru;
   148 	char *path;
   149 
   150 	if (razor_uri_parse(&ru, uri, error))
   151 		return NULL;
   152 
   153 	path = razor_path_from_parsed_uri(&ru, error);
   154 
   155 	razor_uri_destroy(&ru);
   156 
   157 	return path;
   158 }
   159 
   160 RAZOR_EXPORT char *razor_path_to_uri(const char *path)
   161 {
   162 	char *uri, *s;
   163 	const char *p;
   164 	int check_dotdot, len;
   165 	struct razor_uri ru;
   166 
   167 	uri = malloc(5 + (4 - 3) + 4 + 3 * strlen(path) + 1);
   168 
   169 	strcpy(uri, "file:");
   170 
   171 	s = uri + 5;
   172 
   173 #ifdef MSWIN_API
   174 	check_dotdot = path[0] != '/' && path[0] != '\\';
   175 #else
   176 	check_dotdot = path[0] != '/';
   177 #endif
   178 
   179 	p = path;
   180 
   181 #ifdef MSWIN_API
   182 	/*
   183 	 * Under MS-Windows, c:/xxx maps to a path of /c:/xxx
   184 	 * Relative paths that include a drive letter (eg., c:xxx)
   185 	 * can't be handled directly and have to be converted
   186 	 * to absolute form.
   187 	 */
   188 	if (is_alpha(p[0]) && p[1] == ':') {
   189 		if (p[2] == '/' || p[2] == '\\') {
   190 			*s++ = '/';
   191 			*s++ = p[0];
   192 			*s++ = ':';
   193 			*s++ = '/';
   194 			p += 3;
   195 			/*
   196 			 * We need to take care that ".." segments don't remove
   197 			 * the drive letter (eg., c:/../xxx -> file:/c:/../xxx
   198 			 * which normalizes to file:/xxx).
   199 			 */
   200 			check_dotdot = 2;
   201 		} else {
   202 			s = razor_abspath(p);
   203 			uri = razor_path_to_uri(s);
   204 			free(s);
   205 			return uri;
   206 		}
   207 	}
   208 #endif
   209 
   210 	/*
   211 	 * Relative paths are complicated. URIs can't have dot segments
   212 	 * so these will be removed during normalization. That often does
   213 	 * the right thing, but where a relative path traverses up the
   214 	 * tree then the result is a URI that points to somewhere quite
   215 	 * different to path: eg., file:../dir normalizes to file:dir
   216 	 * We solve this by inserting a sentinel segment at the beginning.
   217 	 * If the segment is still present after normalization, then it
   218 	 * can just be removed. If it is missing, then we need to create
   219 	 * an absolute path and redo the conversion.
   220 	 */
   221 	if (check_dotdot) {
   222 		*s++ = '%';
   223 		*s++ = '2';
   224 		*s++ = 'F';
   225 		*s++ = '/';
   226 	}
   227 
   228 	while(*p) {
   229 		if (*p == '/' || is_unreserved(*p) || is_sub_delim(*p) ||
   230 		    *p == ':' || *p == '@')
   231 			*s++ = *p;
   232 #ifdef MSWIN_API
   233 		else if (*p == '\\')
   234 			*s++ = '/';
   235 #endif
   236 		else {
   237 			*s++ = '%';
   238 			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)/16];
   239 			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)%16];
   240 		}
   241 		p++;
   242 	}
   243 	*s++ = '\0';
   244 
   245 	if (razor_uri_parse(&ru, uri, NULL) < 0) {
   246 		free(uri);
   247 		return NULL;
   248 	}
   249 	free(uri);
   250 
   251 	razor_uri_normalize(&ru);
   252 
   253 	uri = razor_uri_recompose(&ru);
   254 
   255 	razor_uri_destroy(&ru);
   256 
   257 	if (check_dotdot == 2) {
   258 		s = strdup("file:/x:/%2F/");
   259 		s[6] = path[0];
   260 		if (str_has_prefix(uri, s)) {
   261 			free(s);
   262 			memmove(uri + 5 + 3, uri + 9 + 3,
   263 				strlen(uri + 9 + 3) + 1);
   264 			uri = realloc(uri, strlen(uri) + 1);
   265 		} else {
   266 			free(s);
   267 			free(uri);
   268 			s = razor_abspath(path);
   269 			uri = razor_path_to_uri(s);
   270 			free(s);
   271 		}
   272 	} else if (check_dotdot) {
   273 		if (str_has_prefix(uri, "file:%2F/")) {
   274 			memmove(uri + 5, uri + 9, strlen(uri + 9) + 1);
   275 			uri = realloc(uri, strlen(uri) + 1);
   276 		} else {
   277 			free(uri);
   278 			s = razor_abspath(path);
   279 			uri = razor_path_to_uri(s);
   280 			free(s);
   281 		}
   282 	}
   283 
   284 	return uri;
   285 }
   286 
   287 RAZOR_EXPORT char *
   288 razor_path_relative_to_uri(const char *uri, const char *path,
   289 			   struct razor_error **error)
   290 {
   291 	char *rel_uri, *result;
   292 
   293 	/* Strictly wrong if uri isn't a file URI, but probably okay */
   294 	rel_uri = razor_path_to_uri(path);
   295 
   296 	result = razor_resolve_uri_root(uri, rel_uri + 5, 1, error);
   297 
   298 	free(rel_uri);
   299 
   300 	return result;
   301 }