# HG changeset patch # User J. Ali Harlow # Date 1468496988 -3600 # Node ID b18e0bf48a91e5e4249ce33675d97f71a44d0518 # Parent 9e2943af74fe48fe334087b6bc19b3b0dcb754a0 razor_path_to_uri() should cope with relative paths better diff -r 9e2943af74fe -r b18e0bf48a91 librazor/path.c --- a/librazor/path.c Mon Jul 11 16:50:21 2016 +0100 +++ b/librazor/path.c Thu Jul 14 12:49:48 2016 +0100 @@ -159,41 +159,129 @@ RAZOR_EXPORT char *razor_path_to_uri(const char *path) { - char *uri, *p; + char *uri, *s; + const char *p; + int check_dotdot, len; + struct razor_uri ru; - uri = malloc(6 + 3 * strlen(path) + 1); + uri = malloc(5 + (4 - 3) + 4 + 3 * strlen(path) + 1); strcpy(uri, "file:"); - p = uri + 5; + s = uri + 5; + +#ifdef MSWIN_API + check_dotdot = path[0] != '/' && path[0] != '\\'; +#else + check_dotdot = path[0] != '/'; +#endif + + p = path; #ifdef MSWIN_API /* * Under MS-Windows, c:/xxx maps to a path of /c:/xxx + * Relative paths that include a drive letter (eg., c:xxx) + * can't be handled directly and have to be converted + * to absolute form. */ - if (is_alpha(path[0]) && path[1] == ':' && - (path[2] == '/' || path[2] == '\\')) - *p++ = '/'; + if (is_alpha(p[0]) && p[1] == ':') { + if (p[2] == '/' || p[2] == '\\') { + *s++ = '/'; + *s++ = p[0]; + *s++ = ':'; + *s++ = '/'; + p += 3; + /* + * We need to take care that ".." segments don't remove + * the drive letter (eg., c:/../xxx -> file:/c:/../xxx + * which normalizes to file:/xxx). + */ + check_dotdot = 2; + } else { + s = razor_abspath(p); + uri = razor_path_to_uri(s); + free(s); + return uri; + } + } #endif - while(*path) { - if (*path == '/' || is_unreserved(*path) || - is_sub_delim(*path) || *path == ':' || *path == '@') - *p++ = *path; + /* + * Relative paths are complicated. URIs can't have dot segments + * so these will be removed during normalization. That often does + * the right thing, but where a relative path traverses up the + * tree then the result is a URI that points to somewhere quite + * different to path: eg., file:../dir normalizes to file:dir + * We solve this by inserting a sentinel segment at the beginning. + * If the segment is still present after normalization, then it + * can just be removed. If it is missing, then we need to create + * an absolute path and redo the conversion. + */ + if (check_dotdot) { + *s++ = '%'; + *s++ = '2'; + *s++ = 'F'; + *s++ = '/'; + } + + while(*p) { + if (*p == '/' || is_unreserved(*p) || is_sub_delim(*p) || + *p == ':' || *p == '@') + *s++ = *p; #ifdef MSWIN_API - else if (*path == '\\') - *p++ = '/'; + else if (*p == '\\') + *s++ = '/'; #endif else { - *p++ = '%'; - *p++ = "0123456789ABCDEF"[(*(unsigned char *)path)/16]; - *p++ = "0123456789ABCDEF"[(*(unsigned char *)path)%16]; + *s++ = '%'; + *s++ = "0123456789ABCDEF"[(*(unsigned char *)p)/16]; + *s++ = "0123456789ABCDEF"[(*(unsigned char *)p)%16]; } - path++; + p++; } - *p++ = '\0'; + *s++ = '\0'; - return realloc(uri, p - uri); + if (razor_uri_parse(&ru, uri, NULL) < 0) { + free(uri); + return NULL; + } + free(uri); + + razor_uri_normalize(&ru); + + uri = razor_uri_recompose(&ru); + + razor_uri_destroy(&ru); + + if (check_dotdot == 2) { + s = strdup("file:/x:/%2F/"); + s[6] = path[0]; + if (str_has_prefix(uri, s)) { + free(s); + memmove(uri + 5 + 3, uri + 9 + 3, + strlen(uri + 9 + 3) + 1); + uri = realloc(uri, strlen(uri) + 1); + } else { + free(s); + free(uri); + s = razor_abspath(path); + uri = razor_path_to_uri(s); + free(s); + } + } else if (check_dotdot) { + if (str_has_prefix(uri, "file:%2F/")) { + memmove(uri + 5, uri + 9, strlen(uri + 9) + 1); + uri = realloc(uri, strlen(uri) + 1); + } else { + free(uri); + s = razor_abspath(path); + uri = razor_path_to_uri(s); + free(s); + } + } + + return uri; } RAZOR_EXPORT char * diff -r 9e2943af74fe -r b18e0bf48a91 librazor/razor-internal.h --- a/librazor/razor-internal.h Mon Jul 11 16:50:21 2016 +0100 +++ b/librazor/razor-internal.h Thu Jul 14 12:49:48 2016 +0100 @@ -292,6 +292,8 @@ wchar_t *razor_utf8_to_utf16(const char *utf8, int len); #endif +char *razor_abspath(const char *path); + /* Error functions */ struct razor_error { int domain; diff -r 9e2943af74fe -r b18e0bf48a91 librazor/test-pfu.c --- a/librazor/test-pfu.c Mon Jul 11 16:50:21 2016 +0100 +++ b/librazor/test-pfu.c Thu Jul 14 12:49:48 2016 +0100 @@ -25,6 +25,7 @@ #include #endif #include "razor.h" +#include "razor-internal.h" #ifdef MSWIN_API static int is_ascii_letter(char c) @@ -104,6 +105,64 @@ return r; } +static int test_abs(const char *abspath, const char *path) +{ + char *s; + int r; + + s = razor_abspath(path); + + if (s && abspath) + r = strcmp(s, abspath); + else + r = (s != abspath); + + if (r) { + fprintf(stderr, "Fail: razor_abspath(\"%s\")", path); + if (s) + fprintf(stderr, " returns \"%s\", expected", s); + else + fprintf(stderr, " fails, expected"); + if (abspath) + fprintf(stderr, " \"%s\"\n", abspath); + else + fprintf(stderr, " failure\n"); + } + + free(s); + + return r; +} + +static int test_ptu(const char *uri, const char *path) +{ + char *s; + int r; + + s = razor_path_to_uri(path); + + if (s && uri) + r = strcmp(s, uri); + else + r = (s != uri); + + if (r) { + fprintf(stderr, "Fail: razor_path_to_uri(\"%s\")", path); + if (s) + fprintf(stderr, " returns \"%s\", expected", s); + else + fprintf(stderr, " fails, expected"); + if (uri) + fprintf(stderr, " \"%s\"\n", uri); + else + fprintf(stderr, " failure\n"); + } + + free(s); + + return r; +} + #ifdef MSWIN_API UINT saved_cp; @@ -116,6 +175,20 @@ int main(int argc, char *argv[]) { int r = 0; + const char *tmpdir; + char *tempdir, *s; + + tmpdir = getenv("TMPDIR"); + if (!tmpdir || !*tmpdir) + tmpdir = "/tmp"; + + tempdir = razor_concat(tmpdir, "/test-pfu-XXXXXX", NULL); + + if (!mkdtemp(tempdir) || chdir(tempdir) < 0) { + perror(tempdir); + free(tempdir); + exit(1); + } #ifdef MSWIN_API atexit(cleanup_on_exit); @@ -136,5 +209,31 @@ r |= test_pfu("file:///var/log/22%20%e0%b8%aa%e0%b8%b4%e0%b8%87%e0%b8%ab%e0%b8%b2%e0%b8%84%e0%b8%a1%202014", "/var/log/22 สิงหาคม 2014"); + s = razor_concat(tempdir, "/file.txt", NULL); + r |= test_abs(s, "file.txt"); + free(s); + + s = razor_concat(tempdir, "/dir/../file.txt", NULL); + r |= test_abs(s, "dir/../file.txt"); + free(s); + + s = razor_concat(tempdir, "/../file.txt", NULL); + r |= test_abs(s, "../file.txt"); + free(s); + + s = razor_concat(tempdir, "/dir/../../file.txt", NULL); + r |= test_abs(s, "dir/../../file.txt"); + free(s); + + r |= test_ptu("file:file.txt", "file.txt"); + r |= test_ptu("file:file.txt", "dir/../file.txt"); + + s = razor_concat("file:", tmpdir, "/file.txt", NULL); + r |= test_ptu(s, "../file.txt"); + r |= test_ptu(s, "dir/../../file.txt"); + free(s); + + free(tempdir); + exit(r ? 1 : 0); } diff -r 9e2943af74fe -r b18e0bf48a91 librazor/uri.c --- a/librazor/uri.c Mon Jul 11 16:50:21 2016 +0100 +++ b/librazor/uri.c Thu Jul 14 12:49:48 2016 +0100 @@ -700,7 +700,6 @@ return string_str(&output); } - /* * Following RFC 3986 § 6.2.2 */ diff -r 9e2943af74fe -r b18e0bf48a91 librazor/util.c --- a/librazor/util.c Mon Jul 11 16:50:21 2016 +0100 +++ b/librazor/util.c Thu Jul 14 12:49:48 2016 +0100 @@ -364,3 +364,68 @@ } #endif /* MSWIN_API */ + +/* + * Returns an absolute path (ie., not relative to the current directory on the + * default, or any other, disk). Note that the absolute path may still have + * "." or ".." path segments present (ie., this is not equivalent to realpath). + */ +char *razor_abspath(const char *path) +{ +#ifdef MSWIN_API + + int n; + wchar_t *wpath, *wabspath; + char *abspath; + + wpath = razor_utf8_to_utf16(path, -1); + + n = GetFullPathNameW(wpath, 0, NULL, NULL); + if (!n) { + free(wpath); + return NULL; + } + + wabspath = malloc(n * sizeof(sizeof(wchar_t))); + if (!GetFullPathNameW(wpath, n, wabspath, NULL)) { + free(wabspath); + free(wpath); + return NULL; + } + free(wpath); + + abspath = razor_utf16_to_utf8(wabspath, n - 1); + free(wabspath); + + return abspath; + +#else /* MSWIN_API */ + + int pathlen, len; + char *cwd, *abspath; + + if (*path == '/') + abspath = strdup(path); + else { + pathlen = strlen(path); + for (len = 32;; len *= 2) { + abspath = malloc(pathlen + 1 + len); + if (getcwd(abspath, len)) + break; + if (errno != ERANGE) { + free(abspath); + return NULL; + } + free(abspath); + } + len = strlen(abspath); + if (abspath[len - 1] != '/') + abspath[len++] = '/'; + memcpy(abspath + len, path, pathlen + 1); + abspath = realloc(abspath, len + pathlen + 1); + } + + return abspath; + +#endif +}