ali@459: /* ali@475: * Copyright (C) 2014, 2016 J. Ali Harlow ali@459: * ali@459: * This program is free software; you can redistribute it and/or modify ali@459: * it under the terms of the GNU General Public License as published by ali@459: * the Free Software Foundation; either version 2 of the License, or ali@459: * (at your option) any later version. ali@459: * ali@459: * This program is distributed in the hope that it will be useful, ali@459: * but WITHOUT ANY WARRANTY; without even the implied warranty of ali@459: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ali@459: * GNU General Public License for more details. ali@459: * ali@459: * You should have received a copy of the GNU General Public License along ali@459: * with this program; if not, write to the Free Software Foundation, Inc., ali@459: * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ali@459: */ ali@459: ali@459: #include "config.h" ali@459: #include ali@459: #include ali@475: #include ali@459: #include "razor.h" ali@459: #include "razor-internal.h" ali@475: #include "uri.h" ali@459: ali@459: static int valid_unicode(unsigned unicode) ali@459: { ali@459: /* ali@459: * Within the U+0000..U+10FFFF range defined by RFC3629 ali@459: * but not in the U+D800..U+DFFF range prohibited in UTF-8. ali@459: */ ali@459: return unicode < 0xD800 || (unicode >= 0xE000 && unicode < 0x110000); ali@459: } ali@459: ali@475: char *razor_path_from_parsed_uri(const struct razor_uri *ru, ali@475: struct razor_error **error) ali@459: { ali@459: int continuation_bytes = 0; ali@475: char *path, *p, *s, *uri; ali@459: unsigned char c; ali@459: unsigned unicode; ali@459: ali@475: if (!ru->scheme) { ali@475: uri = razor_uri_recompose(ru); ali@475: razor_set_error(error, RAZOR_GENERAL_ERROR, ali@475: RAZOR_GENERAL_ERROR_BAD_URI, uri, ali@475: "URI does not include a scheme"); ali@475: free(uri); ali@459: return NULL; ali@475: } ali@459: ali@475: if (strcmp(ru->scheme, "file")) { ali@475: uri = razor_uri_recompose(ru); ali@475: razor_set_error(error, RAZOR_GENERAL_ERROR, ali@475: RAZOR_GENERAL_ERROR_UNSUPPORTED_URI, uri, ali@475: "Not a file URI"); ali@475: free(uri); ali@459: return NULL; ali@475: } ali@459: ali@475: if (ru->host && *ru->host && strcmp(ru->host, "localhost") || ali@475: ru->userinfo || ru->port) { ali@475: uri = razor_uri_recompose(ru); ali@475: razor_set_error(error, RAZOR_GENERAL_ERROR, ali@475: RAZOR_GENERAL_ERROR_UNSUPPORTED_URI, uri, ali@475: "URI refers to a non-local file"); ali@475: free(uri); ali@475: return NULL; ali@475: } ali@475: ali@475: s = ru->path; ali@459: #ifdef MSWIN_API ali@459: /* ali@475: * Under MS-Windows, a path of /c:/xxx maps to c:/xxx ali@459: * Note that PathCreateFromUrl converts / to \ as well. ali@459: */ ali@475: if (s[0] == '/' && is_alpha(s[1]) && s[2] == ':' && s[3] == '/') ali@475: s++; ali@459: #endif ali@459: ali@475: p = path = malloc(strlen(s) + 1); ali@459: ali@475: while (*s) { ali@475: if (*s >= 0x7F || *s < 0x20) ali@475: break; ali@475: else if (*s != '%') { ali@475: if (continuation_bytes) ali@475: break; ali@475: else ali@475: *p++ = *s++; ali@475: } else { ali@475: c = pchar_get_char(s); ali@475: #ifdef MSWIN_API ali@475: if (c == '/' || c == '\\') ali@475: #else ali@475: if (c == '/') ali@475: #endif ali@475: break; ali@475: else if (!continuation_bytes) { ali@475: if (c >= 0xF5 || c == 0xC0 || c == 0xC1) ali@475: break; ali@475: else if (c >= 0xF0) { ali@459: unicode = c & 7; ali@459: continuation_bytes = 3; ali@459: } else if (c >= 0xE0) { ali@459: unicode = c & 3; ali@459: continuation_bytes = 2; ali@459: } else if (c >= 0xC0) { ali@459: unicode = c & 1; ali@459: continuation_bytes = 1; ali@459: } ali@475: } else if ((c & 0xC0) != 0x80) ali@475: break; ali@475: else { ali@459: unicode <<= 6; ali@459: unicode |= (c & 0x3F); ali@459: ali@459: if (!--continuation_bytes && ali@475: !valid_unicode(unicode)) ali@475: break; ali@459: } ali@459: ali@459: *p++ = c; ali@475: s += 3; ali@459: } ali@459: } ali@459: ali@475: if (*s || continuation_bytes) { ali@475: uri = razor_uri_recompose(ru); ali@475: razor_set_error(error, RAZOR_GENERAL_ERROR, ali@475: RAZOR_GENERAL_ERROR_BAD_URI, ali@475: uri, "Illegal character in file URI path"); ali@475: free(uri); ali@459: free(path); ali@459: return NULL; ali@459: } ali@459: ali@459: *p++ = '\0'; ali@459: ali@459: return realloc(path, p - path); ali@459: } ali@459: ali@475: RAZOR_EXPORT char *razor_path_from_uri(const char *uri, ali@475: struct razor_error **error) ali@475: { ali@475: struct razor_uri ru; ali@475: char *path; ali@475: ali@475: if (razor_uri_parse(&ru, uri, error)) ali@475: return NULL; ali@475: ali@475: path = razor_path_from_parsed_uri(&ru, error); ali@475: ali@475: razor_uri_destroy(&ru); ali@475: ali@475: return path; ali@475: } ali@475: ali@475: RAZOR_EXPORT char *razor_path_to_uri(const char *path) ali@475: { ali@491: char *uri, *s; ali@491: const char *p; ali@491: int check_dotdot, len; ali@491: struct razor_uri ru; ali@475: ali@491: uri = malloc(5 + (4 - 3) + 4 + 3 * strlen(path) + 1); ali@475: ali@475: strcpy(uri, "file:"); ali@475: ali@491: s = uri + 5; ali@491: ali@491: #ifdef MSWIN_API ali@491: check_dotdot = path[0] != '/' && path[0] != '\\'; ali@491: #else ali@491: check_dotdot = path[0] != '/'; ali@491: #endif ali@491: ali@491: p = path; ali@475: ali@475: #ifdef MSWIN_API ali@475: /* ali@475: * Under MS-Windows, c:/xxx maps to a path of /c:/xxx ali@491: * Relative paths that include a drive letter (eg., c:xxx) ali@491: * can't be handled directly and have to be converted ali@491: * to absolute form. ali@475: */ ali@491: if (is_alpha(p[0]) && p[1] == ':') { ali@491: if (p[2] == '/' || p[2] == '\\') { ali@491: *s++ = '/'; ali@491: *s++ = p[0]; ali@491: *s++ = ':'; ali@491: *s++ = '/'; ali@491: p += 3; ali@491: /* ali@491: * We need to take care that ".." segments don't remove ali@491: * the drive letter (eg., c:/../xxx -> file:/c:/../xxx ali@491: * which normalizes to file:/xxx). ali@491: */ ali@491: check_dotdot = 2; ali@491: } else { ali@491: s = razor_abspath(p); ali@491: uri = razor_path_to_uri(s); ali@491: free(s); ali@491: return uri; ali@491: } ali@491: } ali@475: #endif ali@475: ali@491: /* ali@491: * Relative paths are complicated. URIs can't have dot segments ali@491: * so these will be removed during normalization. That often does ali@491: * the right thing, but where a relative path traverses up the ali@491: * tree then the result is a URI that points to somewhere quite ali@491: * different to path: eg., file:../dir normalizes to file:dir ali@491: * We solve this by inserting a sentinel segment at the beginning. ali@491: * If the segment is still present after normalization, then it ali@491: * can just be removed. If it is missing, then we need to create ali@491: * an absolute path and redo the conversion. ali@491: */ ali@491: if (check_dotdot) { ali@491: *s++ = '%'; ali@491: *s++ = '2'; ali@491: *s++ = 'F'; ali@491: *s++ = '/'; ali@491: } ali@491: ali@491: while(*p) { ali@491: if (*p == '/' || is_unreserved(*p) || is_sub_delim(*p) || ali@491: *p == ':' || *p == '@') ali@491: *s++ = *p; ali@475: #ifdef MSWIN_API ali@491: else if (*p == '\\') ali@491: *s++ = '/'; ali@475: #endif ali@475: else { ali@491: *s++ = '%'; ali@491: *s++ = "0123456789ABCDEF"[(*(unsigned char *)p)/16]; ali@491: *s++ = "0123456789ABCDEF"[(*(unsigned char *)p)%16]; ali@475: } ali@491: p++; ali@475: } ali@491: *s++ = '\0'; ali@475: ali@491: if (razor_uri_parse(&ru, uri, NULL) < 0) { ali@491: free(uri); ali@491: return NULL; ali@491: } ali@491: free(uri); ali@491: ali@491: razor_uri_normalize(&ru); ali@491: ali@491: uri = razor_uri_recompose(&ru); ali@491: ali@491: razor_uri_destroy(&ru); ali@491: ali@491: if (check_dotdot == 2) { ali@491: s = strdup("file:/x:/%2F/"); ali@491: s[6] = path[0]; ali@491: if (str_has_prefix(uri, s)) { ali@491: free(s); ali@491: memmove(uri + 5 + 3, uri + 9 + 3, ali@491: strlen(uri + 9 + 3) + 1); ali@491: uri = realloc(uri, strlen(uri) + 1); ali@491: } else { ali@491: free(s); ali@491: free(uri); ali@491: s = razor_abspath(path); ali@491: uri = razor_path_to_uri(s); ali@491: free(s); ali@491: } ali@491: } else if (check_dotdot) { ali@491: if (str_has_prefix(uri, "file:%2F/")) { ali@491: memmove(uri + 5, uri + 9, strlen(uri + 9) + 1); ali@491: uri = realloc(uri, strlen(uri) + 1); ali@491: } else { ali@491: free(uri); ali@491: s = razor_abspath(path); ali@491: uri = razor_path_to_uri(s); ali@491: free(s); ali@491: } ali@491: } ali@491: ali@491: return uri; ali@475: } ali@475: ali@475: RAZOR_EXPORT char * ali@475: razor_path_relative_to_uri(const char *uri, const char *path, ali@475: struct razor_error **error) ali@475: { ali@475: char *rel_uri, *result; ali@475: ali@475: /* Strictly wrong if uri isn't a file URI, but probably okay */ ali@475: rel_uri = razor_path_to_uri(path); ali@475: ali@475: result = razor_resolve_uri_root(uri, rel_uri + 5, 1, error); ali@475: ali@475: free(rel_uri); ali@475: ali@475: return result; ali@475: }