import type MarkdownIt from 'markdown-it';
import type StateCore from 'markdown-it/lib/rules_core/state_core';
import type Token from 'markdown-it/lib/token';

const CITATION_REGEX = /\[\^(\d+)\^\]/;

/**
 * This plugin is used to transform citations in markdown to status tokens
 * Citations are in the format [^1^], [^2^], etc.
 * The citations correspond to source references in the response:
 * If we have a response like:
 * "This is a citation[^5^]. This is another citation[^8^]. This is the final citation[^5^]." then the citations will be transformed to:
 * "This is a citation 1. This is another citation 2. This is the final citation 1."
 * We are reordering the citation from 1 2 3 on purpose
 */
const citation = (state: StateCore) => {
	const tokens: Token[] = [];
	const citationIndexMap: Record<string, number> = {};
	let currentIndex = 1;

	for (const token of state.tokens) {
		const tokenChildren: Array<Token | null> = [];

		if (token.type !== 'inline' || (token.children && token?.children?.length <= 0)) {
			tokens.push(token);
			continue;
		}

		if (token.children) {
			token.children.forEach((childToken) => {
				if (childToken.type === 'text' && CITATION_REGEX.test(childToken.content)) {
					/**
					 * Example content: `Hello [^1^][^1^].`
					 * `parts` will be ['Hello ', '1', '', '1', '.']
					 */
					const parts = childToken.content.split(CITATION_REGEX);
					const newChildren: Token[] = [];

					/**
					 * we want to remove consecutive citation
					 * e.g. `Hello [^1^][^1^]. How are you [^1^][^2^]`
					 * should be transformed to `Hello 1. How are you 1 2`
					 */
					let lastCitation: string | null = null;
					let isLastCitationDuplicate: boolean = false;

					// only want to add space in the next part after citation
					// if the last citation is valid

					parts.forEach((part, index) => {
						if (index % 2 === 0) {
							// this is a text part

							/**
							 * if the last citation is a duplicate, and the text part is an empty string, we skip rendering the inbetween empty space
							 * So it doesn't end up rendering empty space that will end up with multi empty spaces
							 * because previous valid citation already added 1 empty space
							 *
							 * Example: For `[^1^][^1^]`
							 * it will be split into parts: ['', '1', '', '1', ''],
							 * This code will skip any rendering for the last empty string (index=4)
							 * Because the empty string at index=2 already added 1 empty space from the code below
							 */
							if (isLastCitationDuplicate && part.trim() === '') {
								return;
							}

							if (part.trim() !== '') {
								/**
								 * if the text part is not an empty string, we want to reset the last citation
								 * because the text part is a valid text, and not a space between citations
								 * so that we can render citation again, even if the next number is the same as the last number
								 *
								 * Example:
								 * `[^1^] [^1^] Hello [^1^][^1^]` -> `1 Hello 1`
								 */
								lastCitation = null;
							}

							const textToken = new state.Token('text', '', 0);
							if (index === parts.length - 1) {
								textToken.content = part;
							} else {
								textToken.content = part + ' ';
							}
							textToken.block = false;
							textToken.level = childToken.level;
							newChildren.push(textToken);
						} else {
							// this is a citation part

							// if the current citation is the same as the last citation, we skip rendering the citation, and mark it as a duplicate
							if (part === lastCitation) {
								isLastCitationDuplicate = true;
								return;
							} else {
								// always reset the duplicate flag
								isLastCitationDuplicate = false;
								lastCitation = part;
							}

							const citationToken = new state.Token('status', '', 0);
							citationToken.content = '';
							citationToken.block = false;
							citationToken.level = childToken.level;

							const citationText = citationIndexMap[part]
								? citationIndexMap[part].toString()
								: currentIndex.toString();

							if (!citationIndexMap[part]) {
								citationIndexMap[part] = currentIndex;
								currentIndex++;
							}

							citationToken.attrs = [
								['color', 'blue'],
								['text', citationText],
								// localId is used to identify the citation token in the DOM
								// this overrides the uuid that is normally used
								['localId', `citation-${citationText}`],
							];
							citationToken.content = citationText;

							newChildren.push(citationToken);
						}
					});

					tokenChildren.push(...newChildren);

					return;
				}

				tokenChildren.push(childToken);
			});
		}

		const filteredTokenChildren = tokenChildren.filter(
			(childToken) => childToken !== null,
		) as Token[];
		token.children = filteredTokenChildren;
		tokens.push(token);
	}

	state.tokens = tokens;
	return;
};

export default (md: MarkdownIt) => md.core.ruler.push('citation', citation);
