/*
 * Copyright (C) 2016  J. Ali Harlow <ali@juiblex.co.uk>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "config.h"

#undef DEBUG

#include <stdlib.h>
#include <string.h>
#include "razor.h"
#include "types/types.h"
#include "razor-internal.h"
#include "uri.h"

/*
 * Following RFC 3986 § 3.
 * Note that we don't validate queries or fragments.
 */

#define strdup0(s)			((s) ? strdup(s) : NULL)

#define string_str(str)			((char *)(str)->data)

#define string_init(str)		do { \
						char *_p; \
						array_init(str); \
						_p = array_add(str, 1); \
						*_p = '\0'; \
					} while(0)

#define string_append_len(str, s, len)	do { \
						char *_p; \
						_p = array_add(str, len); \
						_p--; \
						strncpy(_p, s, len); \
						_p[(len)] = '\0'; \
					} while(0)

#define string_append(str, s)		string_append_len(str, s, strlen(s))

#define string_truncate_at(str, s)	do { \
						int _len; \
						_len = (s) - \
						       (char *)(str)->data; \
						*(s) = '\0'; \
						(str)->size = _len + 1; \
					} while(0)


static const char *skip_uri_scheme(const char *uri)
{
	/*
	 * RFC 3986 defines scheme as:
	 *	scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	 */
	if (*uri >= 'a' && *uri <= 'z' || *uri >= 'A' && *uri <= 'Z') {
		do {
		    uri++;
		} while (is_alnum(*uri) || *uri == '+' || *uri == '-' ||
			 *uri == '.');
		if (*uri == ':')
			return uri;
	}
	return NULL;
}

static char *razor_strndup(const char *s, size_t n)
{
	char *result;

	if (memchr(s, '\0', n))
		result = strdup(s);
	else {
		result = malloc(n + 1);
		memcpy(result, s, n);
		result[n] = '\0';
	}

	return result;
}

#if 0
/*
 * Return the (possibly decoded) pchar or 0 on end-of-string or -1 on error
 */
static int pchar_get_char_validated(const char *p)
{
	int c;

	if (p[0]=='\0')
		c = 0;
	else if (p[0]=='%') {
		if (xdigit_value(p[1]) < 0)
			return -1;
		c = xdigit_value(p[1]) * 16;
		if (xdigit_value(p[2]) < 0)
			return -1;
		c += xdigit_value(p[2]);
	} else if (p[0] >= 'a' && p[0] <= 'z' || p[0] >= 'A' && p[0] <= 'Z' || 
		   p[0] >= '0' && p[0] <= '9' ||
		   strchr("-._~!$&'()*+,;=:@", p[0]))
		c = p[0];
	else
		c = -1;

	return c;
}
#endif

/*
 * Verify the percent encoding. All '%' characters must be followed by
 * exactly two hexadecimal digits.
 */
static int pct_encoding_validate(const char *s)
{
	while (*s) {
		if (*s == '%') {
			if (xdigit_value(s[1]) < 0 || xdigit_value(s[2]) < 0)
				return -1;
			s += 2;
		}

		s++;
	}

	return 0;
}

static char *pct_encoding_normalize(char *s)
{
	char *retval, *p;
	int c;

	if (!s)
		return NULL;

	p = retval = malloc(strlen(s) + 1);

	while (*s) {
		if (*s == '%') {
			c = pchar_get_char(s);
			if (is_unreserved(c))
				*p++ = c;
			else {
				*p++ = '%';
				*p++ = "0123456789ABCDEF"[c/16];
				*p++ = "0123456789ABCDEF"[c%16];
			}
			pchar_next_char(s);
		} else
			*p++ = *s++;
	}

	*p++ = '\0';

	return realloc(retval, p - retval);
}

static int validate_userinfo(const char *userinfo, struct razor_error **error)
{
	const char *s;

	for (s = userinfo; *s; s++) {
		if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s)
		    && *s != ':') {
			razor_set_error(error, RAZOR_GENERAL_ERROR,
					RAZOR_GENERAL_ERROR_BAD_URI, userinfo,
					"Invalid URI userinfo");
			return -1;
		}
	}

	return 0;
}

static int validate_reg_name(const char *reg_name)
{
	const char *s;

	for (s = reg_name; *s; s++) {
		if (!is_unreserved(*s) && *s != '%' && !is_sub_delim(*s))
			return -1;
	}

	return 0;
}

static int validate_ipv4address(const char *s, int length)
{
	int count = 0, digits, octet;

	for (;;) {
		if (!length)
			return -1;

		if (*s == '0') {
			digits = 1;
			octet = 0;
		} else {
			if (*s < '1' || *s > '9')
				return -1;

			octet = *s - '0';

			for (digits = 1; digits < length; digits++) {
				if (s[digits] >= '0' && s[digits] <= '9') {
					octet *= 10;
					octet += s[digits] - '0';
					if (octet > 255)
						return -1;
				} else
					break;
			}
		}

		s += digits;
		length -= digits;

		if (++count == 4)
			break;

		if (length < 1 || *s != '.')
			return -1;

		s++;
		length--;
	}

	return length ? -1 : 0;
}

static int count_ipv6_pieces(const char **s, int *length)
{
	int count, digits;

	for (digits = 0; digits < 4 && digits < *length; digits++) {
		if (!is_xdigit((*s)[digits]))
			break;
	}

	if (!digits)
		return 0;

	(*s) += digits;
	(*length) -= digits;
	count = 1;

	if (*length && **s == ':') {
		(*s)++;
		(*length)--;
		count += count_ipv6_pieces(s, length);
		if (count == 1) {
			(*s)--;
			(*length)++;
		}
	}

	return count;
}

static int validate_ip_literal(const char *ip_literal, int length)
{
	const char *s, *dot;
	int len, no_pieces, elide;

	if (length >= 4 && ip_literal[0] == 'v') {
		/* IPvFuture */
		dot = strchr(ip_literal + 2, '.');
		if (!dot || dot >= ip_literal + length)
			return -1;
		for (s = ip_literal + 1; s < dot; s++) {
			if (!is_xdigit(*s))
				return -1;
		}
		for (s = dot + 1; s < ip_literal + length; s++) {
			if (!is_unreserved(*s) && !is_sub_delim(*s) && *s != ':')
				return -1;
		}
	} else {
		/* IPv6address */
		s = ip_literal;
		len = length;
		no_pieces = count_ipv6_pieces(&s, &len);

		if (len > 1 && s[0] == ':' && s[1] == ':') {
			s += 2;
			len -= 2;
			elide = 1;
			no_pieces += count_ipv6_pieces(&s, &len);
		} else
			elide = 0;

		if (!validate_ipv4address(s, len))
			no_pieces += 2;
		else if (len)
			return -1;

		if (no_pieces > 8 || no_pieces == 8 && elide || no_pieces < 1)
			return -1;
	}

	return 0;
}

static int validate_host(const char *host, struct razor_error **error)
{
	int retval;

	if (host[0] == '[' && host[strlen(host) - 1] == ']')
		retval = validate_ip_literal(host + 1, strlen(host) - 2);
	else {
		retval = validate_ipv4address(host, strlen(host));
		if (retval < 0)
			retval = validate_reg_name(host);
	}

	if (retval)
		razor_set_error(error, RAZOR_GENERAL_ERROR,
				RAZOR_GENERAL_ERROR_BAD_URI, host,
				"Invalid URI host");

	return retval;
}

static char *strdown(char *s)
{
	while (*s) {
		if (*s >= 'A' && *s <= 'Z') {
			*s -= 'A';
			*s += 'a';
		}
		s++;
	}

	return s;
}

static int razor_uri_parse_authority(struct razor_uri *ru,
				     const char *authority, int length,
				     struct razor_error **error)
{
	const char *s, *auth = authority;
	char *userinfo, *port, *host;

	s = strchr(auth, '@');
	if (s && s < auth + length) {
		userinfo = razor_strndup(auth, s - auth);
		s++;
		length -= s - auth;
		auth = s;

		if (validate_userinfo(userinfo, error)) {
			free(userinfo);
			return -1;
		}
	} else
		userinfo = NULL;

	s = strchr(auth, ':');
	if (s && s < auth + length) {
		s++;
		port = razor_strndup(s, length - (s - auth));
		s--;
		length = s - auth;

		if (strspn(port, "0123456789") != strlen(port)) {
			razor_set_error(error, RAZOR_GENERAL_ERROR,
					RAZOR_GENERAL_ERROR_BAD_URI, port,
					"Invalid URI port");
			free(userinfo);
			free(port);
			return -1;
		}
	} else
		port = NULL;

	host = razor_strndup(auth, length);

	if (validate_host(host, error)) {
		free(userinfo);
		free(port);
		free(host);
		return -1;
	}

	ru->userinfo = userinfo;
	ru->port = port;
	ru->host = host;

	return 0;
}

/*
 * Parse either a hier-part or a relative-part
 */
static int razor_uri_parse_part(struct razor_uri *ru, const char *part,
				int relative_part, struct razor_error **error)
{
	const char *s, *hp = part;
	char *path, *p;
	int noscheme = 0;

	if (hp[0] == '/' && hp[1] == '/') {
		hp += 2;
		s = strpbrk(hp, "/?#");
		if (!s)
			s = hp + strlen(hp);
		if (razor_uri_parse_authority(ru, hp, s - hp, error) < 0)
			return -1;
		hp = s;
	} else {
		ru->userinfo = NULL;
		ru->host = NULL;
		ru->port = NULL;
	}

	if (!*hp) {
		/* path-empty */
		ru->path = strdup("");
		return 0;
	} else if (*hp == '/') {
		/* path-absolute */
		p = path = malloc(strlen(hp) + 1);
		*p++ = '/';
		hp++;
		if (!*hp) {
			*p++ = '\0';
			ru->path = realloc(path, p - path);
			return 0;
		}
	} else if (!ru->host) {
		/* path-rootless or path-noscheme */
		noscheme = relative_part;
		p = path = malloc(strlen(hp) + 1);
	} else {
		razor_set_error(error, RAZOR_GENERAL_ERROR,
				RAZOR_GENERAL_ERROR_BAD_URI, part,
				relative_part ? "Invalid URI relative part" :
				"Invalid URI hierarchical part");
		return -1;
	}

	if (!is_pchar(*hp) || noscheme && *hp == ':') {
		free(path);
		razor_set_error(error, RAZOR_GENERAL_ERROR,
				RAZOR_GENERAL_ERROR_BAD_URI, part,
				"Invalid character in URI path");
		return -1;
	}
	*p++ = *hp++;

	while (*hp) {
		if (*hp == '/')
			noscheme = 0;
		else if (!is_pchar(*hp) || noscheme && *hp == ':') {
			free(path);
			razor_set_error(error, RAZOR_GENERAL_ERROR,
					RAZOR_GENERAL_ERROR_BAD_URI, part,
					"Invalid character in URI path");
			return -1;
		}
		*p++ = *hp++;
	}

	*p++ = '\0';

	ru->path = realloc(path, p - path);

	return 0;
}

void razor_uri_destroy(struct razor_uri *ru)
{
	free(ru->scheme);
	free(ru->userinfo);
	free(ru->host);
	free(ru->port);
	free(ru->path);
	free(ru->query);
	free(ru->fragment);
}

int razor_uri_parse_uri(struct razor_uri *ru, const char *uri, int absolute,
			struct razor_error **error)
{
	int r;
	const char *s;
	char *hier_part;

	if (pct_encoding_validate(uri) < 0) {
		razor_set_error(error, RAZOR_GENERAL_ERROR,
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
				"Invalid percent encoding");
		return -1;
	}

	memset(ru, 0, sizeof(*ru));

	s = skip_uri_scheme(uri);
	if (!s) {
		razor_set_error(error, RAZOR_GENERAL_ERROR,
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
				"Invalid URI scheme");
		return -1;
	}
	ru->scheme = razor_strndup(uri, s - uri);
	uri = s + 1;

	s = strchr(uri, '?');
	if (!s)
		s = strchr(uri, '#');
	if (!s)
		s = uri + strlen(uri);
	hier_part = razor_strndup(uri, s - uri);
	uri = s;

	r = razor_uri_parse_part(ru, hier_part, 0, error);
	free(hier_part);
	if (r) {
		razor_uri_destroy(ru);
		return -1;
	}

	if (*uri != '?')
		ru->query = NULL;
	else {
		uri++;
		s = strchr(uri, '#');
		if (!s)
			s = uri + strlen(uri);
		ru->query = razor_strndup(uri, s - uri);
		uri = s;
	}

	if (*uri != '#')
		ru->fragment = NULL;
	else if (absolute) {
		razor_set_error(error, RAZOR_GENERAL_ERROR,
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
				"Fragments are not allowed in absolute URIs");
		razor_uri_destroy(ru);
		return -1;
	} else {
		uri++;
		ru->fragment = strdup(uri);
	}

	return 0;
}

int razor_uri_parse_relative_ref(struct razor_uri *ru, const char *uri,
				 struct razor_error **error)
{
	int r;
	const char *s;
	char *relative_part;

	if (pct_encoding_validate(uri) < 0) {
		razor_set_error(error, RAZOR_GENERAL_ERROR,
				RAZOR_GENERAL_ERROR_BAD_URI, uri,
				"Invalid percent encoding");
		return -1;
	}

	memset(ru, 0, sizeof(*ru));

	s = strchr(uri, '?');
	if (!s)
		s = strchr(uri, '#');
	if (!s)
		s = uri + strlen(uri);
	relative_part = razor_strndup(uri, s - uri);
	uri = s;

	r = razor_uri_parse_part(ru, relative_part, 1, error);
	free(relative_part);
	if (r)
		return -1;

	if (*uri == '?') {
		uri++;
		s = strchr(uri, '#');
		if (!s)
			s = uri + strlen(uri);
		ru->query = razor_strndup(uri, s - uri);
		uri = s;
	} else
		ru->query = NULL;

	if (*uri == '#') {
		uri++;
		ru->fragment = strdup(uri);
	} else
		ru->fragment = NULL;

	return 0;
}

int razor_uri_parse(struct razor_uri *ru, const char *uri,
		    struct razor_error **error)
{
	struct razor_error *tmp_error = NULL;
	int r;

	r = razor_uri_parse_uri(ru, uri, 0, &tmp_error);
	if (r < 0) {
		r = razor_uri_parse_relative_ref(ru, uri, NULL);
		if (r < 0)
			razor_propagate_error(error, tmp_error, NULL);
		else
			razor_error_free(tmp_error);
	}

	return r;
}

/*
 * Following RFC 3986 § 5.2.4
 */
static char *remove_dot_segments(const char *path)
{
	struct array output;
	char *input, *in, *s, *t;
	const char *step;

#ifdef DEBUG
	fprintf(stderr, "STEP   OUTPUT BUFFER         INPUT BUFFER\n");
#endif

	input = strdup(path);
	in = input;
	string_init(&output);

#ifdef DEBUG
	fprintf(stderr, " 1 :   %-21s %s\n", string_str(&output), in);
#endif

	while (*in) {
		if (str_has_prefix(in, "../")) {
			step = "2A";
			in += 3;
		} else if (str_has_prefix(in, "./")) {
			step = "2A";
			in += 2;
		} else if (str_has_prefix(in, "/./")) {
			step = "2B";
			in += 2;
		} else if (!strcmp(in, "/.")) {
			step = "2B";
			in++;
			*in = '/';
		} else if (str_has_prefix(in, "/../")) {
			step = "2C";
			in += 3;
			s = strrchr(string_str(&output), '/');
			if (!s)
				s = string_str(&output);
			string_truncate_at(&output, s);
		} else if (!strcmp(in, "/..")) {
			step = "2C";
			in += 2;
			*in = '/';
			s = strrchr(string_str(&output), '/');
			if (!s)
				s = string_str(&output);
			string_truncate_at(&output, s);
		} else if (!strcmp(in, ".") || !strcmp(in, "..")) {
			step = "2D";
			in += strlen(in);
		} else {
			step = "2E";
			t = strchr(in + 1, '/');
			if (!t)
				t = in + strlen(in);
			string_append_len(&output, in, t - in);
			in = t;
		}
#ifdef DEBUG
		fprintf(stderr, " %s:   %-21s %s\n", step, string_str(&output),
			in);
#endif
	}

	free(input);
	return string_str(&output);
}

/*
 * Following RFC 3986 § 6.2.2
 */
void razor_uri_normalize(struct razor_uri *ru)
{
	char *s;

	strdown(ru->scheme);
	if (ru->host)
		strdown(ru->host);

	s = pct_encoding_normalize(ru->userinfo);
	free(ru->userinfo);
	ru->userinfo = s;

	s = pct_encoding_normalize(ru->host);
	free(ru->host);
	ru->host = s;

	s = pct_encoding_normalize(ru->path);
	free(ru->path);
	ru->path = s;

	s = pct_encoding_normalize(ru->query);
	free(ru->query);
	ru->query = s;

	s = pct_encoding_normalize(ru->fragment);
	free(ru->fragment);
	ru->fragment = s;

	s = remove_dot_segments(ru->path);
	free(ru->path);
	ru->path = s;
}

char *razor_uri_get_authority(const struct razor_uri *ru)
{
	char *result, *r;
	int len = 1;

	if (ru->host) {
		if (ru->userinfo)
			len += strlen(ru->userinfo) + 1;
		len += strlen(ru->host);
		if (ru->port)
			len += strlen(ru->port) + 1;
	} else
		return NULL;

	r = result = malloc(len);

	if (ru->userinfo) {
		strcpy(r, ru->userinfo);
		r += strlen(r);
		*r++ = '@';
	}

	strcpy(r, ru->host);
	r += strlen(r);

	if (ru->port) {
		*r++ = ':';
		strcpy(r, ru->port);
	}

	return result;
}

/*
 * Following RFC 3986 § 5.3
 */
char *razor_uri_recompose(const struct razor_uri *ru)
{
	char *authority, *result, *r;
	int len = 1;

	authority = razor_uri_get_authority(ru);

	if (ru->scheme)
		len += strlen(ru->scheme) + 1;
	if (authority)
		len += strlen(authority) + 2;
	len += strlen(ru->path);
	if (ru->query)
		len += strlen(ru->query) + 1;
	if (ru->fragment)
		len += strlen(ru->fragment) + 1;

	r = result = malloc(len);

	if (ru->scheme) {
		strcpy(r, ru->scheme);
		r += strlen(r);
		*r++ = ':';
	}

	if (authority) {
		*r++ = '/';
		*r++ = '/';
		strcpy(r, authority);
		free(authority);
		r += strlen(r);
	}

	strcpy(r, ru->path);
	r += strlen(r);

	if (ru->query) {
		*r++ = '?';
		strcpy(r, ru->query);
		r += strlen(r);
	}

	if (ru->fragment) {
		*r++ = '#';
		strcpy(r, ru->fragment);
	}

	return result;
}

/*
 * Following RFC 3986 § 5.2.3
 */
static char *merge_paths(const struct razor_uri *base,const struct razor_uri *R)
{
	char *s, *t, *path;

	if (base->host && !*base->path)
		path = razor_concat("/", R->path, NULL);
	else {
		s = strrchr(base->path, '/');
		if (s) {
			t = razor_strndup(base->path, s + 1 - base->path);
			path = razor_concat(t, R->path, NULL);
			free(t);
		} else
			path = strdup(R->path);
	}

	return path;
}

/*
 * Following RFC 3986 § 5.2
 */
void razor_uri_resolve(struct razor_uri *T, const struct razor_uri *base,
		       const struct razor_uri *R)
{
	char *s;

	if (R->scheme) {
		T->scheme = strdup(R->scheme);
		T->userinfo = strdup0(R->userinfo);
		T->host = strdup0(R->host);
		T->port = strdup0(R->port);
		T->path = remove_dot_segments(R->path);
		T->query = strdup0(R->query);
	} else {
		if (R->host) {
			T->userinfo = strdup0(R->userinfo);
			T->host = strdup0(R->host);
			T->port = strdup0(R->port);
			T->path = remove_dot_segments(R->path);
			T->query = strdup0(R->query);
		} else {
			if (!*R->path) {
				T->path = strdup(base->path);
				if (R->query)
					T->query = strdup(R->query);
				else
					T->query = strdup0(base->query);
			} else {
				if (*R->path == '/')
					T->path = remove_dot_segments(R->path);
				else {
					s = merge_paths(base, R);
					T->path = remove_dot_segments(s);
					free(s);
				}
				T->query = strdup0(R->query);
			}
			T->userinfo = strdup0(base->userinfo);
			T->host = strdup0(base->host);
			T->port = strdup0(base->port);
		}
		T->scheme = strdup(base->scheme);
	}
	T->fragment = strdup0(R->fragment);
}

/*
 * This differs from razor_uri_resolve() both in the types of its arguments
 * and in the fact that it takes a root URI rather than a base URI. The base
 * URI is determined by appending a slash to the root URI (if it doesn't
 * already end in a slash). Finally, uri can be explicitly marked as either
 * relative (ie., a relative-ref) or not (ie., a URI). This is important as
 * otherwise "c:/xxx" could be interpreted as a URI in the "c" scheme.
 */
char *razor_resolve_uri_root(const char *root_uri, const char *uri,
			     int is_relative, struct razor_error **error)
{
	int r;
        char *base_uri, *s, *result;
	struct razor_uri ru, base, file;

	if (!root_uri || !*root_uri)
		root_uri = "file:/";

	if (root_uri[strlen(root_uri) - 1] == '/')
		base_uri = strdup(root_uri);
	else
		base_uri = razor_concat(root_uri, "/", NULL);

	r = razor_uri_parse_uri(&base, base_uri, 1, error);
	free(base_uri);
	if (r)
		return NULL;

	if (is_relative > 0) {
		/*
		 * We can't use razor_uri_parse_relative_ref() to parse
		 * uri in case it starts with a segment that includes a
		 * colon. Thus we use this kludge.
		 */
		s = razor_concat("scheme:", uri, NULL);
		r = razor_uri_parse_uri(&file, s, 0, error);
		free(s);
		if (!r) {
			free(file.scheme);
			file.scheme = NULL;
		}
	}
	else if (!is_relative)
		r = razor_uri_parse_uri(&file, uri, 0, error);
	else
		r = razor_uri_parse(&file, uri, error);
	if (r) {
		razor_uri_destroy(&base);
		return NULL;
	}

	razor_uri_resolve(&ru, &base, &file);

	razor_uri_destroy(&base);
	razor_uri_destroy(&file);

	result = razor_uri_recompose(&ru);

	razor_uri_destroy(&ru);

	return result;
}