/*
 *	TICKR - GTK-based Feed Reader - Copyright (C) Emmanuel Thomas-Maurin 2009-2013
 *	<manutm007@gmail.com>
 *
 * 	This program is free software: you can redistribute it and/or modify
 * 	it under the terms of the GNU General Public License as published by
 * 	the Free Software Foundation, either version 3 of the License, or
 * 	(at your option) any later version.
 *
 * 	This program is distributed in the hope that it will be useful,
 * 	but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * 	GNU General Public License for more details.
 *
 * 	You should have received a copy of the GNU General Public License
 * 	along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "tickr.h"

static int	depth;
static xmlNode	*item_or_entry_element;
static int	n;
static int	counter;

/*
 * Here, 'rss' is sometimes used as a synonym of 'feed' and sometimes
 * used by opposition to 'atom' (this can be confusing).
 */

/*
 * Look for url and, if valid, parse it then dump result into
 * <user_home_dir>/TICKR_DIR_NAME/XML_DUMP and -/XML_DUMP_EXTRA.
 * If url isn't valid, only set error code and return.
 */
int get_feed(Resource *resrc, const Params *prm)
{
	char	feed_title[FEED_TITLE_MAXLEN + 1];
	char	feed_link[FILE_NAME_MAXLEN + 1];
	char	feed_ttl[32];
	char	file_name[FILE_NAME_MAXLEN + 1];
	char	url[FILE_NAME_MAXLEN + 1];
	int	suspend_rq_bak, exit_status, i;

	suspend_rq_bak = get_ticker_env()->suspend_rq;
	get_ticker_env()->suspend_rq = TRUE;

	resrc->rss_ttl = prm->rss_refresh;

	/* 'xml dump' stuff */
	str_n_cpy(resrc->xml_dump, get_datafile_full_name_from_name(XML_DUMP), FILE_NAME_MAXLEN);
	if (resrc->fp != NULL)
		fclose(resrc->fp);
	resrc->fp = open_new_datafile_with_name(XML_DUMP, "wb");

	/* 'xml dump extra' stuff */
	str_n_cpy(resrc->xml_dump_extra, get_datafile_full_name_from_name(XML_DUMP_EXTRA), FILE_NAME_MAXLEN);
	if (resrc->fp_extra != NULL)
		fclose(resrc->fp_extra);
	resrc->fp_extra = open_new_datafile_with_name(XML_DUMP_EXTRA, "wb");

	str_n_cpy(file_name, get_datafile_full_name_from_name(RESOURCE_DUMP), FILE_NAME_MAXLEN);

	/* We replace resrc->id with modified url, file_name = downloaded resource */
	if ((exit_status = fetch_resource((const char *)resrc->id, (const char *)file_name, url)) == OK) {
#ifdef VERBOSE_OUTPUT
		fprintf(STD_OUT, "Resource fetched: %s\n", (const char *)resrc->id);
		fflush(STD_OUT);
#endif
	} else if (exit_status == FEED_FORMAT_ERROR) {
		warning(get_ticker_env()->selection_mode == MULTIPLE, 2, "Feed format error in: ", resrc->id);
		return FEED_FORMAT_ERROR;
	} else if (exit_status == CONNECT_TOO_MANY_ERRORS) {
		return CONNECT_TOO_MANY_ERRORS;
	} else {
		warning(get_ticker_env()->selection_mode == MULTIPLE, 2, "Can't fetch resource: ", resrc->id);
		return FEED_DOWNLOAD_ERROR;
	}

	/* We use file_name instead of resrc->id */
	if ((i = get_feed_info((int *)&resrc->format, file_name, feed_title, feed_link, feed_ttl)) != OK) {
		if (i == FEED_UNPARSABLE)
			warning(get_ticker_env()->selection_mode == MULTIPLE,
				2, "Feed is unparsable: ", resrc->id);
		else if (i == FEED_EMPTY)
			warning(get_ticker_env()->selection_mode == MULTIPLE,
				2, "Feed is empty: ", resrc->id);
		else if (i == RSS_FORMAT_UNDETERMINED)
			warning(get_ticker_env()->selection_mode == MULTIPLE,
				2, "Undetermined feed format: ", resrc->id);
		else
			warning(get_ticker_env()->selection_mode == MULTIPLE,
				2, "get_feed_info() undetermined error: ", resrc->id);
		return i;
	} else if (resrc->format == RSS_FORMAT_UNDETERMINED) {
		warning(get_ticker_env()->selection_mode == MULTIPLE,
			2, "Undetermined feed format: ", resrc->id);
		return RSS_FORMAT_UNDETERMINED;
	}
	str_n_cpy(resrc->feed_title, feed_title, FEED_TITLE_MAXLEN);
	if (prm->feed_title == 'y') {
		/* We remove any LINK_TAG_CHAR from str because it will be used in "link tag" */
		remove_char_from_str((char *)feed_title, LINK_TAG_CHAR);
		fprintf(resrc->fp, "%s%s", feed_title, prm->feed_title_delimiter);
	}
	if (feed_ttl[0] != '\0')
		resrc->rss_ttl = atoi(feed_ttl);
	/* Link offset stuff reset */
	for (i = 0; i < NFEEDLINKANDOFFSETMAX; i++) {
		resrc->link_and_offset[i].offset_in_surface = 0;
		(resrc->link_and_offset[i].url)[0] = '\0';
	}
	/* We use file_name instead of resrc->id */
	if ((exit_status = parse_xml_file(resrc->format, resrc->fp, resrc->fp_extra,
			file_name, resrc->link_and_offset, prm)) == FEED_NO_ITEM_OR_ENTRY_ELEMENT)
		warning(get_ticker_env()->selection_mode == MULTIPLE, 2,
			"No 'item' or 'entry' element found in: ", resrc->id);
	if (exit_status != OK)
		return exit_status;
	fclose(resrc->fp);
	resrc->fp = open_new_datafile_with_name(XML_DUMP, "rb");
	fclose(resrc->fp_extra);
	resrc->fp_extra = open_new_datafile_with_name(XML_DUMP_EXTRA, "rb");
	get_ticker_env()->suspend_rq = suspend_rq_bak;
	return OK;
}

/*
 * Must be utf-8 encoded
 */
int parse_xml_file(int format, FILE *fp, FILE *fp_extra, const char* file_name,\
	FeedLinkAndOffset *link_and_offset, const Params *prm)
{
	xmlDoc	*doc;
	xmlNode	*root_element;

#ifdef VERBOSE_OUTPUT
	fprintf(STD_OUT, "Parsing XML file ... ");
	fflush(STD_OUT);
#endif
	if ((doc = xmlParseFile(file_name)) == NULL) {
		warning(get_ticker_env()->selection_mode == MULTIPLE, 2,
			"XML parser error: ", xmlGetLastError()->message);
		return FEED_UNPARSABLE;
	}
	if ((root_element = xmlDocGetRootElement(doc)) == NULL) {
		xmlFreeDoc(doc);
		warning(get_ticker_env()->selection_mode == MULTIPLE, 2,
			"Empty XML document: ", file_name);
		return FEED_EMPTY;
	}

	depth = 0;
	item_or_entry_element = NULL;
	n = 1;
	counter = 0;

	if (format == RSS_2_0)
		get_rss20_selected_elements1(root_element, doc);
	else if (format == RSS_1_0)
		get_rss10_selected_elements1(root_element, doc);
	else if (format == RSS_ATOM)
		get_atom_selected_elements1(root_element, doc);
	else {
		xmlFreeDoc(doc);
		return FEED_FORMAT_ERROR;
	}
	if (item_or_entry_element != NULL)
		get_feed_selected_elements2(format, item_or_entry_element, doc,
			fp, fp_extra, link_and_offset, prm);
	else {
		xmlFreeDoc(doc);
		return FEED_NO_ITEM_OR_ENTRY_ELEMENT;
	}
	xmlFreeDoc(doc);
#ifdef VERBOSE_OUTPUT
	fprintf(STD_OUT, "Done\n");
	fflush(STD_OUT);
#endif
	return OK;
}

void get_rss20_selected_elements1(xmlNode *some_element, xmlDoc *doc)
{
	xmlNode	*cur_node;

	for (cur_node = some_element; cur_node != NULL; cur_node = cur_node->next) {
		if (item_or_entry_element != NULL)
			return;
		/* NEW: RSS 2.0: we don't want extra namespaces */
		if (cur_node->ns != NULL)
			continue;
		if (xmlStrcmp(cur_node->name, (const xmlChar *)"rss") == 0 && depth == 0)
			depth = 1;
		else if (xmlStrcmp(cur_node->name, (const xmlChar *)"channel") == 0 && depth == 1)
			depth = 2;
		else if (xmlStrcmp(cur_node->name, (const xmlChar *)"item") == 0 && depth == 2)
			depth = 3;

		if (depth == 3)
			item_or_entry_element = cur_node;
		else
			get_rss20_selected_elements1(cur_node->children, doc);
	}
}

void get_rss10_selected_elements1(xmlNode *some_element, xmlDoc *doc)
{
	xmlNode	*cur_node;

	for (cur_node = some_element; cur_node != NULL; cur_node = cur_node->next) {
		if (item_or_entry_element != NULL)
			return;
		if (xmlStrcmp(cur_node->name, (const xmlChar *)"RDF") == 0 && depth == 0)
			depth = 1;
		else if (xmlStrcmp(cur_node->name, (const xmlChar *)"item") == 0 && depth == 1)
			depth = 2;

		if (depth == 2)
			item_or_entry_element = cur_node;
		else
			get_rss10_selected_elements1(cur_node->children, doc);
	}
}

void get_atom_selected_elements1(xmlNode *some_element, xmlDoc *doc)
{
	xmlNode	*cur_node;

	for (cur_node = some_element; cur_node != NULL; cur_node = cur_node->next) {
		if (item_or_entry_element != NULL)
			return;
		if (xmlStrcmp(cur_node->name, (const xmlChar *)"feed") == 0)
			depth = 1;
		else if (xmlStrcmp(cur_node->name, (const xmlChar *)"entry") == 0 && depth == 1)
			depth = 2;

		if (depth == 2)
			item_or_entry_element = cur_node;
		else
			get_atom_selected_elements1(cur_node->children, doc);
	}
}

/*
 * For every link found, we insert in text LINK_TAG_CHAR + "00n"
 * with n = link rank and we fill link_and_offset with url.
 * This is used later in render_stream_to_surface().
 */
void get_feed_selected_elements2(int feed_format, xmlNode *some_element, xmlDoc *doc,
	FILE *fp, FILE *fp_extra, FeedLinkAndOffset *link_and_offset, const Params *prm)
{
	xmlNode	*cur_node, *cur_node_bak;
	xmlChar	*str;
	xmlChar	item_or_entry[16];
	xmlChar description_or_summary[16];

	if (feed_format == RSS_2_0 || feed_format == RSS_1_0) {
		str_n_cpy((char *)item_or_entry, "item", 15);
		str_n_cpy((char *)description_or_summary, "description", 15);
	} else if (feed_format == RSS_ATOM) {
		str_n_cpy((char *)item_or_entry, "entry", 15);
		str_n_cpy((char *)description_or_summary, "summary", 15);
	} else {
		fprintf(STD_ERR, "get_feed_selected_elements2(): Undefined feed format\n");
		fflush(STD_ERR);
		return;
	}
	for (cur_node = some_element; cur_node != NULL; cur_node = cur_node->next) {
		/* NEW: RSS 2.0: we don't want extra namespaces */
		if (feed_format == RSS_2_0 && cur_node->ns != NULL)
			continue;
		if (xmlStrcmp(cur_node->name, (const xmlChar *)item_or_entry) == 0) {
			cur_node_bak = cur_node;
			cur_node = cur_node->children;
			for (; cur_node != NULL; cur_node = cur_node->next) {
				/* NEW: RSS 2.0: we don't want extra namespaces */
				if (feed_format == RSS_2_0 && cur_node->ns != NULL)
					continue;
				if (xmlStrcmp(cur_node->name, (const xmlChar *)"title") == 0) {
					/* xmlNodeListGetString(doc, node, 1) = node->content */
					if ((str = xmlNodeListGetString(doc, cur_node->children, 1)) != NULL) {
						/* We remove any LINK_TAG_CHAR from str because it will be used in "link tag" */
						remove_char_from_str((char *)str, LINK_TAG_CHAR);
						remove_char_from_str((char *)str, ITEM_TITLE_TAG_CHAR);
						if (prm->item_title == 'y')
							fprintf(fp, "%s%s", str, prm->item_title_delimiter);
						else if (prm->item_description == 'y')
							fprintf(fp_extra, "%c%03d%s\n", ITEM_TITLE_TAG_CHAR, n, str);
						xmlFree(str);
					}
				} else if (xmlStrcmp(cur_node->name, (const xmlChar *)description_or_summary) == 0) {
					if ((str = xmlNodeListGetString(doc, cur_node->children, 1)) != NULL) {
						/* We remove any LINK_TAG_CHAR from str because it will be used in "link tag" */
						remove_char_from_str((char *)str, LINK_TAG_CHAR);
						remove_char_from_str((char *)str, ITEM_DES_TAG_CHAR);
						if (prm->item_description == 'y')
							fprintf(fp, "%s%s", str, prm->item_description_delimiter);
						else if (prm->item_title == 'y')
							fprintf(fp_extra, "%c%03d%s\n", ITEM_DES_TAG_CHAR, n, str);
						xmlFree(str);
					}
				}
			}
			cur_node = cur_node_bak;
			cur_node = cur_node->children;
			for (; cur_node != NULL; cur_node = cur_node->next) {
				/* NEW: RSS 2.0: we don't want extra namespaces */
				if (feed_format == RSS_2_0 && cur_node->ns != NULL)
					continue;
				if (xmlStrcmp(cur_node->name, (const xmlChar *)"link") == 0) {
					/* Node content (RSS 2.0 / RSS 1.0) or node attribute (Atom) */
					if ((feed_format != RSS_ATOM &&	((str = xmlNodeListGetString(doc, cur_node->children, 1)) != NULL)) ||
							(str = xmlGetProp(cur_node, (const xmlChar *)"href")) != NULL) {
						if (n < NFEEDLINKANDOFFSETMAX + 1) {
							str_n_cpy((link_and_offset + n)->url, (const char *)str,
								FILE_NAME_MAXLEN);
							fprintf(fp, "%c%03d", LINK_TAG_CHAR, n++);
						}
						xmlFree(str);
					}
				}
			}
			cur_node = cur_node_bak;
			if (prm->n_items_per_feed != 0)
				if (++counter >= prm->n_items_per_feed)
					break;
		}
	}
}

static const char *try_str_to_utf8(const char *str)
{
	static char	str2[1024];
	int		i;

	str_n_cpy(str2, str, 1023);
	for (i = strlen(str2); i > 0; i--) {
		str2[i - 1] = '\0';
		if (g_utf8_validate(str2, -1, NULL))
			break;
	}
	if (i == 0)
		str_n_cpy(str2, "(not UTF-8 encoded)", 1023);
	return (const char *)str2;
}

/*
 * Info is 1 int + 4 strings, 255 chars long each.
 * feed_* can be NULL.
 */
int get_feed_info(int *format, const char *file_name, char *feed_title, char *feed_link, char *feed_ttl)
{
	xmlDoc	*doc;
	xmlNode	*root_element;

	*format = RSS_FORMAT_UNDETERMINED;
	if ((doc = xmlParseFile(file_name)) == NULL) {
		return FEED_UNPARSABLE;
	} else {
		if ((root_element = xmlDocGetRootElement(doc)) == NULL) {
			xmlFreeDoc(doc);
			return FEED_EMPTY;
		} else {
			if (xmlStrcmp(root_element->name, (const xmlChar *)"rss") == 0)
				*format = RSS_2_0;
			else if (xmlStrcmp(root_element->name, (const xmlChar *)"RDF") == 0)
				*format = RSS_1_0;
			else if (xmlStrcmp(root_element->name, (const xmlChar *)"feed") == 0)
				*format = RSS_ATOM;
			else {
				xmlFreeDoc(doc);
				return *format;
			}
			if (feed_title != NULL) {
				feed_title[0] = '\0';
				get_xml_first_element(root_element->children, doc,
					"title", feed_title, FEED_TITLE_MAXLEN);
			}
			if (feed_link != NULL) {
				feed_link[0] = '\0';
				get_xml_first_element(root_element->children, doc,
					"link", feed_link, FILE_NAME_MAXLEN);
			}
			if (feed_ttl != NULL) {
				feed_ttl[0] = '\0';
				get_xml_first_element(root_element->children, doc,
					"ttl", feed_ttl, 31);
			}
			xmlFreeDoc(doc);
 			if (!g_utf8_validate(feed_title, -1, NULL))
				str_n_cpy(feed_title, try_str_to_utf8(feed_title), 255);
			/* We remove any LINK_TAG_CHAR from str because it will be used in "link tag" */
			remove_char_from_str(feed_title, LINK_TAG_CHAR);
			return OK;
		}
	}
}

/*
 * string must be empty (string[0] = '\0'
 */
void get_xml_first_element(xmlNode *some_element, xmlDoc *doc, char *name, char *string, int length)
{
	xmlNode	*cur_node;
	xmlChar	*str;

	if (string[0] != '\0')
		return;
	for (cur_node = some_element; cur_node != NULL; cur_node = cur_node->next) {
		if (xmlStrcmp(cur_node->name, (const xmlChar *)name) == 0) {
			if ((str = xmlNodeListGetString(doc, cur_node->children, 1)) != NULL) {
				str_n_cpy(string, (const char *)str, length);
				xmlFree(str);
			} else
				string[0] = '\0';
			break;
		}
		get_xml_first_element(cur_node->children, doc, name, string, length);
	}
}
