librazor/path.c
changeset 494 889dc38157ac
parent 479 4204db81cdbc
     1.1 --- a/librazor/path.c	Thu Jul 07 15:17:29 2016 +0100
     1.2 +++ b/librazor/path.c	Mon Mar 05 20:35:27 2018 +0000
     1.3 @@ -159,41 +159,129 @@
     1.4  
     1.5  RAZOR_EXPORT char *razor_path_to_uri(const char *path)
     1.6  {
     1.7 -	char *uri, *p;
     1.8 +	char *uri, *s;
     1.9 +	const char *p;
    1.10 +	int check_dotdot, len;
    1.11 +	struct razor_uri ru;
    1.12  
    1.13 -	uri = malloc(6 + 3 * strlen(path) + 1);
    1.14 +	uri = malloc(5 + (4 - 3) + 4 + 3 * strlen(path) + 1);
    1.15  
    1.16  	strcpy(uri, "file:");
    1.17  
    1.18 -	p = uri + 5;
    1.19 +	s = uri + 5;
    1.20 +
    1.21 +#ifdef MSWIN_API
    1.22 +	check_dotdot = path[0] != '/' && path[0] != '\\';
    1.23 +#else
    1.24 +	check_dotdot = path[0] != '/';
    1.25 +#endif
    1.26 +
    1.27 +	p = path;
    1.28  
    1.29  #ifdef MSWIN_API
    1.30  	/*
    1.31  	 * Under MS-Windows, c:/xxx maps to a path of /c:/xxx
    1.32 +	 * Relative paths that include a drive letter (eg., c:xxx)
    1.33 +	 * can't be handled directly and have to be converted
    1.34 +	 * to absolute form.
    1.35  	 */
    1.36 -	if (is_alpha(path[0]) && path[1] == ':' &&
    1.37 -	    (path[2] == '/' || path[2] == '\\'))
    1.38 -		*p++ = '/';
    1.39 +	if (is_alpha(p[0]) && p[1] == ':') {
    1.40 +		if (p[2] == '/' || p[2] == '\\') {
    1.41 +			*s++ = '/';
    1.42 +			*s++ = p[0];
    1.43 +			*s++ = ':';
    1.44 +			*s++ = '/';
    1.45 +			p += 3;
    1.46 +			/*
    1.47 +			 * We need to take care that ".." segments don't remove
    1.48 +			 * the drive letter (eg., c:/../xxx -> file:/c:/../xxx
    1.49 +			 * which normalizes to file:/xxx).
    1.50 +			 */
    1.51 +			check_dotdot = 2;
    1.52 +		} else {
    1.53 +			s = razor_abspath(p);
    1.54 +			uri = razor_path_to_uri(s);
    1.55 +			free(s);
    1.56 +			return uri;
    1.57 +		}
    1.58 +	}
    1.59  #endif
    1.60  
    1.61 -	while(*path) {
    1.62 -		if (*path == '/' || is_unreserved(*path) ||
    1.63 -		    is_sub_delim(*path) || *path == ':' || *path == '@')
    1.64 -			*p++ = *path;
    1.65 +	/*
    1.66 +	 * Relative paths are complicated. URIs can't have dot segments
    1.67 +	 * so these will be removed during normalization. That often does
    1.68 +	 * the right thing, but where a relative path traverses up the
    1.69 +	 * tree then the result is a URI that points to somewhere quite
    1.70 +	 * different to path: eg., file:../dir normalizes to file:dir
    1.71 +	 * We solve this by inserting a sentinel segment at the beginning.
    1.72 +	 * If the segment is still present after normalization, then it
    1.73 +	 * can just be removed. If it is missing, then we need to create
    1.74 +	 * an absolute path and redo the conversion.
    1.75 +	 */
    1.76 +	if (check_dotdot) {
    1.77 +		*s++ = '%';
    1.78 +		*s++ = '2';
    1.79 +		*s++ = 'F';
    1.80 +		*s++ = '/';
    1.81 +	}
    1.82 +
    1.83 +	while(*p) {
    1.84 +		if (*p == '/' || is_unreserved(*p) || is_sub_delim(*p) ||
    1.85 +		    *p == ':' || *p == '@')
    1.86 +			*s++ = *p;
    1.87  #ifdef MSWIN_API
    1.88 -		else if (*path == '\\')
    1.89 -			*p++ = '/';
    1.90 +		else if (*p == '\\')
    1.91 +			*s++ = '/';
    1.92  #endif
    1.93  		else {
    1.94 -			*p++ = '%';
    1.95 -			*p++ = "0123456789ABCDEF"[(*(unsigned char *)path)/16];
    1.96 -			*p++ = "0123456789ABCDEF"[(*(unsigned char *)path)%16];
    1.97 +			*s++ = '%';
    1.98 +			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)/16];
    1.99 +			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)%16];
   1.100  		}
   1.101 -		path++;
   1.102 +		p++;
   1.103  	}
   1.104 -	*p++ = '\0';
   1.105 +	*s++ = '\0';
   1.106  
   1.107 -	return realloc(uri, p - uri);
   1.108 +	if (razor_uri_parse(&ru, uri, NULL) < 0) {
   1.109 +		free(uri);
   1.110 +		return NULL;
   1.111 +	}
   1.112 +	free(uri);
   1.113 +
   1.114 +	razor_uri_normalize(&ru);
   1.115 +
   1.116 +	uri = razor_uri_recompose(&ru);
   1.117 +
   1.118 +	razor_uri_destroy(&ru);
   1.119 +
   1.120 +	if (check_dotdot == 2) {
   1.121 +		s = strdup("file:/x:/%2F/");
   1.122 +		s[6] = path[0];
   1.123 +		if (str_has_prefix(uri, s)) {
   1.124 +			free(s);
   1.125 +			memmove(uri + 5 + 3, uri + 9 + 3,
   1.126 +				strlen(uri + 9 + 3) + 1);
   1.127 +			uri = realloc(uri, strlen(uri) + 1);
   1.128 +		} else {
   1.129 +			free(s);
   1.130 +			free(uri);
   1.131 +			s = razor_abspath(path);
   1.132 +			uri = razor_path_to_uri(s);
   1.133 +			free(s);
   1.134 +		}
   1.135 +	} else if (check_dotdot) {
   1.136 +		if (str_has_prefix(uri, "file:%2F/")) {
   1.137 +			memmove(uri + 5, uri + 9, strlen(uri + 9) + 1);
   1.138 +			uri = realloc(uri, strlen(uri) + 1);
   1.139 +		} else {
   1.140 +			free(uri);
   1.141 +			s = razor_abspath(path);
   1.142 +			uri = razor_path_to_uri(s);
   1.143 +			free(s);
   1.144 +		}
   1.145 +	}
   1.146 +
   1.147 +	return uri;
   1.148  }
   1.149  
   1.150  RAZOR_EXPORT char *