razor_path_to_uri() should cope with relative paths better
authorJ. Ali Harlow <ali@juiblex.co.uk>
Thu Jul 14 12:49:48 2016 +0100 (2016-07-14)
changeset 491b18e0bf48a91
parent 490 9e2943af74fe
child 492 644b648173b0
razor_path_to_uri() should cope with relative paths better
librazor/path.c
librazor/razor-internal.h
librazor/test-pfu.c
librazor/uri.c
librazor/util.c
     1.1 --- a/librazor/path.c	Mon Jul 11 16:50:21 2016 +0100
     1.2 +++ b/librazor/path.c	Thu Jul 14 12:49:48 2016 +0100
     1.3 @@ -159,41 +159,129 @@
     1.4  
     1.5  RAZOR_EXPORT char *razor_path_to_uri(const char *path)
     1.6  {
     1.7 -	char *uri, *p;
     1.8 +	char *uri, *s;
     1.9 +	const char *p;
    1.10 +	int check_dotdot, len;
    1.11 +	struct razor_uri ru;
    1.12  
    1.13 -	uri = malloc(6 + 3 * strlen(path) + 1);
    1.14 +	uri = malloc(5 + (4 - 3) + 4 + 3 * strlen(path) + 1);
    1.15  
    1.16  	strcpy(uri, "file:");
    1.17  
    1.18 -	p = uri + 5;
    1.19 +	s = uri + 5;
    1.20 +
    1.21 +#ifdef MSWIN_API
    1.22 +	check_dotdot = path[0] != '/' && path[0] != '\\';
    1.23 +#else
    1.24 +	check_dotdot = path[0] != '/';
    1.25 +#endif
    1.26 +
    1.27 +	p = path;
    1.28  
    1.29  #ifdef MSWIN_API
    1.30  	/*
    1.31  	 * Under MS-Windows, c:/xxx maps to a path of /c:/xxx
    1.32 +	 * Relative paths that include a drive letter (eg., c:xxx)
    1.33 +	 * can't be handled directly and have to be converted
    1.34 +	 * to absolute form.
    1.35  	 */
    1.36 -	if (is_alpha(path[0]) && path[1] == ':' &&
    1.37 -	    (path[2] == '/' || path[2] == '\\'))
    1.38 -		*p++ = '/';
    1.39 +	if (is_alpha(p[0]) && p[1] == ':') {
    1.40 +		if (p[2] == '/' || p[2] == '\\') {
    1.41 +			*s++ = '/';
    1.42 +			*s++ = p[0];
    1.43 +			*s++ = ':';
    1.44 +			*s++ = '/';
    1.45 +			p += 3;
    1.46 +			/*
    1.47 +			 * We need to take care that ".." segments don't remove
    1.48 +			 * the drive letter (eg., c:/../xxx -> file:/c:/../xxx
    1.49 +			 * which normalizes to file:/xxx).
    1.50 +			 */
    1.51 +			check_dotdot = 2;
    1.52 +		} else {
    1.53 +			s = razor_abspath(p);
    1.54 +			uri = razor_path_to_uri(s);
    1.55 +			free(s);
    1.56 +			return uri;
    1.57 +		}
    1.58 +	}
    1.59  #endif
    1.60  
    1.61 -	while(*path) {
    1.62 -		if (*path == '/' || is_unreserved(*path) ||
    1.63 -		    is_sub_delim(*path) || *path == ':' || *path == '@')
    1.64 -			*p++ = *path;
    1.65 +	/*
    1.66 +	 * Relative paths are complicated. URIs can't have dot segments
    1.67 +	 * so these will be removed during normalization. That often does
    1.68 +	 * the right thing, but where a relative path traverses up the
    1.69 +	 * tree then the result is a URI that points to somewhere quite
    1.70 +	 * different to path: eg., file:../dir normalizes to file:dir
    1.71 +	 * We solve this by inserting a sentinel segment at the beginning.
    1.72 +	 * If the segment is still present after normalization, then it
    1.73 +	 * can just be removed. If it is missing, then we need to create
    1.74 +	 * an absolute path and redo the conversion.
    1.75 +	 */
    1.76 +	if (check_dotdot) {
    1.77 +		*s++ = '%';
    1.78 +		*s++ = '2';
    1.79 +		*s++ = 'F';
    1.80 +		*s++ = '/';
    1.81 +	}
    1.82 +
    1.83 +	while(*p) {
    1.84 +		if (*p == '/' || is_unreserved(*p) || is_sub_delim(*p) ||
    1.85 +		    *p == ':' || *p == '@')
    1.86 +			*s++ = *p;
    1.87  #ifdef MSWIN_API
    1.88 -		else if (*path == '\\')
    1.89 -			*p++ = '/';
    1.90 +		else if (*p == '\\')
    1.91 +			*s++ = '/';
    1.92  #endif
    1.93  		else {
    1.94 -			*p++ = '%';
    1.95 -			*p++ = "0123456789ABCDEF"[(*(unsigned char *)path)/16];
    1.96 -			*p++ = "0123456789ABCDEF"[(*(unsigned char *)path)%16];
    1.97 +			*s++ = '%';
    1.98 +			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)/16];
    1.99 +			*s++ = "0123456789ABCDEF"[(*(unsigned char *)p)%16];
   1.100  		}
   1.101 -		path++;
   1.102 +		p++;
   1.103  	}
   1.104 -	*p++ = '\0';
   1.105 +	*s++ = '\0';
   1.106  
   1.107 -	return realloc(uri, p - uri);
   1.108 +	if (razor_uri_parse(&ru, uri, NULL) < 0) {
   1.109 +		free(uri);
   1.110 +		return NULL;
   1.111 +	}
   1.112 +	free(uri);
   1.113 +
   1.114 +	razor_uri_normalize(&ru);
   1.115 +
   1.116 +	uri = razor_uri_recompose(&ru);
   1.117 +
   1.118 +	razor_uri_destroy(&ru);
   1.119 +
   1.120 +	if (check_dotdot == 2) {
   1.121 +		s = strdup("file:/x:/%2F/");
   1.122 +		s[6] = path[0];
   1.123 +		if (str_has_prefix(uri, s)) {
   1.124 +			free(s);
   1.125 +			memmove(uri + 5 + 3, uri + 9 + 3,
   1.126 +				strlen(uri + 9 + 3) + 1);
   1.127 +			uri = realloc(uri, strlen(uri) + 1);
   1.128 +		} else {
   1.129 +			free(s);
   1.130 +			free(uri);
   1.131 +			s = razor_abspath(path);
   1.132 +			uri = razor_path_to_uri(s);
   1.133 +			free(s);
   1.134 +		}
   1.135 +	} else if (check_dotdot) {
   1.136 +		if (str_has_prefix(uri, "file:%2F/")) {
   1.137 +			memmove(uri + 5, uri + 9, strlen(uri + 9) + 1);
   1.138 +			uri = realloc(uri, strlen(uri) + 1);
   1.139 +		} else {
   1.140 +			free(uri);
   1.141 +			s = razor_abspath(path);
   1.142 +			uri = razor_path_to_uri(s);
   1.143 +			free(s);
   1.144 +		}
   1.145 +	}
   1.146 +
   1.147 +	return uri;
   1.148  }
   1.149  
   1.150  RAZOR_EXPORT char *
     2.1 --- a/librazor/razor-internal.h	Mon Jul 11 16:50:21 2016 +0100
     2.2 +++ b/librazor/razor-internal.h	Thu Jul 14 12:49:48 2016 +0100
     2.3 @@ -292,6 +292,8 @@
     2.4  wchar_t *razor_utf8_to_utf16(const char *utf8, int len);
     2.5  #endif
     2.6  
     2.7 +char *razor_abspath(const char *path);
     2.8 +
     2.9  /* Error functions */
    2.10  struct razor_error {
    2.11  	int domain;
     3.1 --- a/librazor/test-pfu.c	Mon Jul 11 16:50:21 2016 +0100
     3.2 +++ b/librazor/test-pfu.c	Thu Jul 14 12:49:48 2016 +0100
     3.3 @@ -25,6 +25,7 @@
     3.4  #include <windows.h>
     3.5  #endif
     3.6  #include "razor.h"
     3.7 +#include "razor-internal.h"
     3.8  
     3.9  #ifdef MSWIN_API
    3.10  static int is_ascii_letter(char c)
    3.11 @@ -104,6 +105,64 @@
    3.12  	return r;
    3.13  }
    3.14  
    3.15 +static int test_abs(const char *abspath, const char *path)
    3.16 +{
    3.17 +	char *s;
    3.18 +	int r;
    3.19 +
    3.20 +	s = razor_abspath(path);
    3.21 +
    3.22 +	if (s && abspath)
    3.23 +		r = strcmp(s, abspath);
    3.24 +	else
    3.25 +		r = (s != abspath);
    3.26 +
    3.27 +	if (r) {
    3.28 +		fprintf(stderr, "Fail: razor_abspath(\"%s\")", path);
    3.29 +		if (s)
    3.30 +			fprintf(stderr, " returns \"%s\", expected", s);
    3.31 +		else
    3.32 +			fprintf(stderr, " fails, expected");
    3.33 +		if (abspath)
    3.34 +			fprintf(stderr, " \"%s\"\n", abspath);
    3.35 +		else
    3.36 +			fprintf(stderr, " failure\n");
    3.37 +	}
    3.38 +
    3.39 +	free(s);
    3.40 +
    3.41 +	return r;
    3.42 +}
    3.43 +
    3.44 +static int test_ptu(const char *uri, const char *path)
    3.45 +{
    3.46 +	char *s;
    3.47 +	int r;
    3.48 +
    3.49 +	s = razor_path_to_uri(path);
    3.50 +
    3.51 +	if (s && uri)
    3.52 +		r = strcmp(s, uri);
    3.53 +	else
    3.54 +		r = (s != uri);
    3.55 +
    3.56 +	if (r) {
    3.57 +		fprintf(stderr, "Fail: razor_path_to_uri(\"%s\")", path);
    3.58 +		if (s)
    3.59 +			fprintf(stderr, " returns \"%s\", expected", s);
    3.60 +		else
    3.61 +			fprintf(stderr, " fails, expected");
    3.62 +		if (uri)
    3.63 +			fprintf(stderr, " \"%s\"\n", uri);
    3.64 +		else
    3.65 +			fprintf(stderr, " failure\n");
    3.66 +	}
    3.67 +
    3.68 +	free(s);
    3.69 +
    3.70 +	return r;
    3.71 +}
    3.72 +
    3.73  #ifdef MSWIN_API
    3.74  UINT saved_cp;
    3.75  
    3.76 @@ -116,6 +175,20 @@
    3.77  int main(int argc, char *argv[])
    3.78  {
    3.79  	int r = 0;
    3.80 +	const char *tmpdir;
    3.81 +	char *tempdir, *s;
    3.82 +
    3.83 +	tmpdir = getenv("TMPDIR");
    3.84 +	if (!tmpdir || !*tmpdir)
    3.85 +		tmpdir = "/tmp";
    3.86 +
    3.87 +	tempdir = razor_concat(tmpdir, "/test-pfu-XXXXXX", NULL);
    3.88 +
    3.89 +	if (!mkdtemp(tempdir) || chdir(tempdir) < 0) {
    3.90 +		perror(tempdir);
    3.91 +		free(tempdir);
    3.92 +		exit(1);
    3.93 +	}
    3.94  
    3.95  #ifdef MSWIN_API
    3.96  	atexit(cleanup_on_exit);
    3.97 @@ -136,5 +209,31 @@
    3.98  	r |= test_pfu("file:///var/log/22%20%e0%b8%aa%e0%b8%b4%e0%b8%87%e0%b8%ab%e0%b8%b2%e0%b8%84%e0%b8%a1%202014",
    3.99  		      "/var/log/22 สิงหาคม 2014");
   3.100  
   3.101 +	s = razor_concat(tempdir, "/file.txt", NULL);
   3.102 +	r |= test_abs(s, "file.txt");
   3.103 +	free(s);
   3.104 +
   3.105 +	s = razor_concat(tempdir, "/dir/../file.txt", NULL);
   3.106 +	r |= test_abs(s, "dir/../file.txt");
   3.107 +	free(s);
   3.108 +
   3.109 +	s = razor_concat(tempdir, "/../file.txt", NULL);
   3.110 +	r |= test_abs(s, "../file.txt");
   3.111 +	free(s);
   3.112 +
   3.113 +	s = razor_concat(tempdir, "/dir/../../file.txt", NULL);
   3.114 +	r |= test_abs(s, "dir/../../file.txt");
   3.115 +	free(s);
   3.116 +
   3.117 +	r |= test_ptu("file:file.txt", "file.txt");
   3.118 +	r |= test_ptu("file:file.txt", "dir/../file.txt");
   3.119 +
   3.120 +	s = razor_concat("file:", tmpdir, "/file.txt", NULL);
   3.121 +	r |= test_ptu(s, "../file.txt");
   3.122 +	r |= test_ptu(s, "dir/../../file.txt");
   3.123 +	free(s);
   3.124 +
   3.125 +	free(tempdir);
   3.126 +
   3.127  	exit(r ? 1 : 0);
   3.128  }
     4.1 --- a/librazor/uri.c	Mon Jul 11 16:50:21 2016 +0100
     4.2 +++ b/librazor/uri.c	Thu Jul 14 12:49:48 2016 +0100
     4.3 @@ -700,7 +700,6 @@
     4.4  	return string_str(&output);
     4.5  }
     4.6  
     4.7 -
     4.8  /*
     4.9   * Following RFC 3986 § 6.2.2
    4.10   */
     5.1 --- a/librazor/util.c	Mon Jul 11 16:50:21 2016 +0100
     5.2 +++ b/librazor/util.c	Thu Jul 14 12:49:48 2016 +0100
     5.3 @@ -364,3 +364,68 @@
     5.4  }
     5.5  
     5.6  #endif	/* MSWIN_API */
     5.7 +
     5.8 +/*
     5.9 + * Returns an absolute path (ie., not relative to the current directory on the
    5.10 + * default, or any other, disk). Note that the absolute path may still have
    5.11 + * "." or ".." path segments present (ie., this is not equivalent to realpath).
    5.12 + */
    5.13 +char *razor_abspath(const char *path)
    5.14 +{
    5.15 +#ifdef MSWIN_API
    5.16 +
    5.17 +	int n;
    5.18 +	wchar_t *wpath, *wabspath;
    5.19 +	char *abspath;
    5.20 +
    5.21 +	wpath = razor_utf8_to_utf16(path, -1);
    5.22 +
    5.23 +	n = GetFullPathNameW(wpath, 0, NULL, NULL);
    5.24 +	if (!n) {
    5.25 +		free(wpath);
    5.26 +		return NULL;
    5.27 +	}
    5.28 +
    5.29 +	wabspath = malloc(n * sizeof(sizeof(wchar_t)));
    5.30 +	if (!GetFullPathNameW(wpath, n, wabspath, NULL)) {
    5.31 +		free(wabspath);
    5.32 +		free(wpath);
    5.33 +		return NULL;
    5.34 +	}
    5.35 +	free(wpath);
    5.36 +
    5.37 +	abspath = razor_utf16_to_utf8(wabspath, n - 1);
    5.38 +	free(wabspath);
    5.39 +
    5.40 +	return abspath;
    5.41 +
    5.42 +#else	/* MSWIN_API */
    5.43 +
    5.44 +	int pathlen, len;
    5.45 +	char *cwd, *abspath;
    5.46 +
    5.47 +	if (*path == '/')
    5.48 +		abspath = strdup(path);
    5.49 +	else {
    5.50 +		pathlen = strlen(path);
    5.51 +		for (len = 32;; len *= 2) {
    5.52 +			abspath = malloc(pathlen + 1 + len);
    5.53 +			if (getcwd(abspath, len))
    5.54 +				break;
    5.55 +			if (errno != ERANGE) {
    5.56 +				free(abspath);
    5.57 +				return NULL;
    5.58 +			}
    5.59 +			free(abspath);
    5.60 +		}
    5.61 +		len = strlen(abspath);
    5.62 +		if (abspath[len - 1] != '/')
    5.63 +			abspath[len++] = '/';
    5.64 +		memcpy(abspath + len, path, pathlen + 1);
    5.65 +		abspath = realloc(abspath, len + pathlen + 1);
    5.66 +	}
    5.67 +
    5.68 +	return abspath;
    5.69 +
    5.70 +#endif
    5.71 +}