import lookup from './lookup';
import lookupAll from './lookupAll';

const readWord = ({ node, threshold = 0, direction = 'right', page }) => {
	if (!node) {
		return '';
	}

	let text = node.text;

	if (direction === 'both' && threshold > 0) {
		if (page) {
			const nodeTop = node.y;
			const nodeBottom = node.y + node.h;
			const nodeCenterY = node.y + node.h / 2;
			const leftTexts = page.Texts.filter(({ x, y, h }) => {
				const textTop = y;
				const textBottom = y + h;
				const distance = (node.x - x) / page.Width;

				if (x >= node.x) {
					return false;
				}

				if (distance > (threshold ?? 1) / 100) {
					return false;
				}

				if (nodeCenterY >= y && nodeCenterY <= y + h) {
					return true;
				}

				if (textBottom < nodeTop + node.h * 0.1) {
					return false;
				}

				if (textTop > nodeBottom - node.h * 0.1) {
					return false;
				}

				return true;
			})
				.sort(({ x: x1 }, { x: x2 }) => x1 - x2)
				.map(({ text }) => text)
				.join(' '); // Sort by x position left to right

			text = `${leftTexts} ${text}`;
		} else {
			for (
				let c = node.siblings.left?.node;
				c;
				c = c.siblings.left?.node
			) {
				if (threshold > 0) {
					if (
						!c.siblings.right ||
						c.siblings.right.distance > (threshold ?? 1) / 100
					) {
						break;
					}
				} else {
					break;
				}

				text = `${c.text} ${text}`;
			}
		}
	}

	for (let c = node.siblings.right?.node; c; c = c.siblings.right?.node) {
		if (threshold > 0) {
			if (
				!c.siblings.left ||
				c.siblings.left.distance > (threshold ?? 1) / 100
			) {
				break;
			}
		} else {
			break;
		}

		text += ` ${c.text}`;
	}

	return text.trim();
};

const readLine = (
	{
		node,
		threshold,
		direction = 'both',
		wordDistance = 100,
		wordCount = 0,
		absolute,
		page,
	},
	{ raw = false } = {},
) => {
	if (!node) {
		return [];
	}

	const items = [];
	const wordRelativeDistance = (wordDistance ?? 100) / 100;

	if (absolute) {
		// Find all items that are hit horizontaly by the nodes center line
		const centerY = node.y + node.h / 2;
		return page.Texts.filter(
			({ y, h }) => centerY > y && centerY < y + h,
		).sort(({ x: x1 }, { x: x2 }) => x1 - x2);
	}

	let start = node?.siblings?.start?.node ?? node;
	let end = null;

	if (direction === 'after') {
		start = node;
	} else if (direction === 'before') {
		end = node.siblings.right?.node;
	}

	for (let c = start; c && c !== end; c = c.siblings.right?.node) {
		let value = raw ? c : c.text;
		// items.push(raw ? c : c.text);

		if (c !== start && c.siblings.left.distance > wordRelativeDistance) {
			break;
		}

		if (threshold) {
			if (
				c.siblings.down &&
				c.siblings.down.distance <= (threshold ?? 100) / 100
			) {
				if (raw) {
					value = c.siblings.down.node;
				} else {
					value += ` ${c.siblings.down.node.text}`;
				}
			}
		}

		items.push(value);

		if (wordCount !== 0 && items.length >= wordCount) {
			break;
		}
	}

	return items;
};

const readLines = ({ node, threshold, raw = false }) => {
	const lines = [];
	while (node !== null) {
		let line = [];
		for (let c = node.siblings.start.node; c; c = c.siblings.right?.node) {
			line.push(raw === false ? c.text : c);
		}

		lines.push(line);

		if (
			node.siblings.down &&
			node.siblings.down.distance <= (threshold ?? 100) / 100
		) {
			node = node.siblings.down.node;
		} else {
			break;
		}
	}

	return [lines, node];
};

const readColumns = ({
	node,
	threshold,
	thresholdWidth,
	raw = false,
	stoppers,
	absolute = false,
	page = null,
}) => {
	const items = [];
	let current = node;
	while (current !== null) {
		if (stoppers && stoppers.includes(current)) {
			break;
		}

		let text = current.text;

		if (
			thresholdWidth &&
			current.siblings.right &&
			current.siblings.right.distance <= (thresholdWidth ?? 100) / 100
		) {
			text += ' ' + current.siblings.right.node.text;
		}

		items.push(raw === false ? text : current);

		if (absolute && page) {
			// We find the closest down from the original node
			const x1 = node.x;
			const x2 = node.x + node.w;
			const currentY = current.y;

			const nextMatch = page.Texts.find(({ y, x, w }) => {
				if (y <= currentY) {
					return false;
				}

				if (x > x1 + w) {
					return false;
				}

				if (x < x1 && x + w < x1) {
					return false;
				}

				return true;
			});

			current = nextMatch ?? null;
		} else {
			if (
				current.siblings.down &&
				current.siblings.down.distance <= (threshold ?? 100) / 100
			) {
				current = current.siblings.down.node;
			} else {
				break;
			}
		}
	}

	return [items, current];
};

const getDown = (node, threshold) => {
	if (!node.siblings.down) {
		return null;
	}

	if (node.siblings.down.distance < (threshold ?? 1) / 100) {
		return getDown(node.siblings.down.node);
	}

	return node.siblings.down;
};

const getUntil = (node, stops, visited) => {
	if (!node || stops.includes(node) || visited[node.uuid]) {
		return '';
	}

	visited[node.uuid] = true;
	return (
		node.text + ' ' + getUntil(node.siblings.right?.node, stops, visited)
	);
};

const readTable = ({
	node,
	pdfData,
	threshold,
	linethreshold,
	linemerge,
	stopperNode,
}) => {
	const anchors = Array.isArray(node) ? node : [node];
	// Get table headers and get next row
	let rows = [];
	const visited = {};
	const [, stopper] = lookup({
		op: { pattern: stopperNode },
		node: null,
		pdfData,
	});

	for (let i = 0; i < anchors.length; i += 1) {
		const headerNodes = readLine({ node: anchors[i] }, { raw: true });
		const headerColumns = headerNodes.map((node) =>
			getDown(node, threshold),
		);

		let minDistance = headerColumns.reduce(
			(acc, node) => Math.min(acc, node?.distance ?? 1),
			linethreshold,
		);

		while (minDistance < linethreshold) {
			const nextRow = headerColumns.find(
				(column) => column?.distance === minDistance,
			);

			// Check for stopper
			if (
				stopper &&
				headerColumns.find((item) => item?.node === stopper)
			) {
				break;
			}

			const row = {
				distance: minDistance,
				$data: [],
				extra: [],
			};
			for (
				let c = nextRow.node.siblings.start.node;
				c;
				c = c.siblings.right?.node
			) {
				const index = headerColumns.findIndex(
					(column) => column?.node === c,
				);

				if (index === -1) {
					continue;
				}

				const headerText = headerNodes[index].text;

				const text = getUntil(
					c,
					headerColumns
						.filter((column) => column?.node && column?.node !== c)
						.map(({ node }) => node),
					visited,
				).trim();

				row[headerText] = text;
				row['$data'].push(text);
				headerColumns[index] = headerColumns[index].node.siblings.down;
			}

			for (let i = 0; i < headerColumns.length; i += 1) {
				if (visited[headerColumns[i]?.node?.uuid]) {
					headerColumns[i] = headerColumns[i].node.siblings.down;
					headerColumns[i] = headerColumns[i]?.node?.siblings?.right;
				}
			}

			minDistance = headerColumns.reduce(
				(acc, node) => Math.min(acc, node?.distance ?? 1),
				linethreshold,
			);

			if (row.$data.join('') !== '') {
				rows.push(row);
			}
		}

		for (let i = 0; i < rows.length - 1; i += 1) {
			if (rows[i + 1].distance < linemerge) {
				rows[i].extra.push(rows[i + 1]['$data'].join(', '));
				rows.splice(i + 1, 1);
				i -= 1;
			}
		}
	}

	return [rows, node];
};

const readTableAbsolute = ({
	node,
	pdfData,
	threshold,
	linethreshold,
	linemerge,
	column,
	stopperNodes,
}) => {
	const [, stoppers] = lookupAll({
		op: { pattern: stopperNodes },
		node: null,
		pdfData,
	});

	const nodes = Array.isArray(node) ? node : [node];
	const items = [];
	for (let n = 0; n < nodes.length; n += 1) {
		const rowIdentifierRegex = new RegExp(column, 'gmi');

		for (let i = 0; i < pdfData?.pdfData?.Pages?.length; i += 1) {
			const page = pdfData.pdfData.Pages[i];
			const startLineIndex = page.Lines.findIndex((items) =>
				items.includes(nodes[n]),
			);

			if (startLineIndex === -1) {
				continue;
			}

			const headerRow =
				pdfData.pdfData.Pages[i].Lines[startLineIndex].toReversed();
			const headerRowIdentifier = headerRow.find((node) =>
				rowIdentifierRegex.test(node.text),
			);

			const [rowIdentifiers] = readColumns({
				node: headerRowIdentifier,
				threshold: 100,
				raw: true,
				absolute: true,
				page: page,
			});

			// Read each line

			items.push({
				type: 'table',
				data: headerRow.map(({ text }) => text),
			});

			for (let j = 1; j < rowIdentifiers.length; j += 1) {
				const rowIdentifier = rowIdentifiers[j];

				// Read everything on the same line
				const lineItems = readLine(
					{
						node: rowIdentifier,
						absolute: true,
						page: page,
					},
					{ raw: true },
				).toReversed();

				// console.log(rowIdentifier, lineItems);

				// Mapp values to headers
				let currentHeader = headerRow[0];
				const rowItem = {};

				// Inject any lines before

				let textItems = page.Texts.filter(
					({ y }) =>
						y < rowIdentifier.y && y > rowIdentifiers[j - 1].y,
				).filter((node) => !lineItems.includes(node));

				// Merge texts in to lines
				for (let k = 0; k < textItems.length; k += 1) {
					const centerY = textItems[k].y + textItems[k].h / 2;
					const line = textItems
						.filter(({ y, h }) => centerY > y && centerY < y + h)
						.sort(({ x: x1 }, { x: x2 }) => x1 - x2);

					textItems = textItems.filter(
						(item) => !line.includes(item),
					);

					items.push({
						type: 'text',
						data: line.map(({ text }) => text).join(' '),
					});
				}

				// console.log(textLines);

				if (stoppers.some((stopper) => lineItems.includes(stopper))) {
					break;
				}

				for (let k = 0; k < lineItems.length; k += 1) {
					const item = lineItems[k];

					if (item.x > currentHeader.x) {
						// If item is to the right of the begining of the header column
						rowItem[currentHeader.text] = [
							rowItem[currentHeader.text],
							item.text,
						]
							.filter(Boolean)
							.join(' ');
					} else if (
						currentHeader.siblings.left &&
						item.x + item.w > currentHeader.x &&
						item.x >
							currentHeader.siblings.left.node.x +
								currentHeader.siblings.left.node.w
					) {
						// If item is partially inside the header column, but outside the next columns header
						rowItem[currentHeader.text] = [
							rowItem[currentHeader.text],
							item.text,
						]
							.reverse()
							.filter(Boolean)
							.join(' ');
					} else if (currentHeader.siblings.left) {
						// Not inside current header, so left move to the next one
						currentHeader = currentHeader.siblings.left.node;
						k--;
					} else {
						// If we have no more headers, just append to the current one
						rowItem[currentHeader.text] = [
							rowItem[currentHeader.text],
							item.text,
						]
							.filter(Boolean)
							.join(' ');
					}
				}

				items.push({ type: 'row', data: rowItem });
			}
		}
	}

	console.log(items);

	return [items];
};

const visitNodes = (visited, nodes) => {
	nodes.forEach((node) => visited.push(node));
};

const read = ({ op, node, results, pdfData }) => {
	let operationResults = results;
	if (op.items === 'line') {
		operationResults = readLine({
			node,
			threshold: op.threshold,
			direction: op.direction ?? 'both',
			wordDistance: op.wordDistance ?? 100,
			wordCount: op.wordCount ?? 0,
		});
	} else if (op.items === 'lines') {
		[operationResults, node] = readLines({ node, threshold: op.threshold });
	} else if (op.items === 'column') {
		const [, stoppers] = lookupAll({
			op: { pattern: op.stopper },
			node: null,
			pdfData,
		});

		[operationResults, node] = readColumns({
			node,
			threshold: op.threshold,
			thresholdWidth: op.thresholdWidth,
			stoppers: stoppers,
		});
	} else if (op.items === 'table') {
		/*
		[operationResults] = readTableAbsolute({
			node,
			pdfData,
			column: op.column,
			threshold: op.threshold,
			linethreshold: op.linethreshold ? op.linethreshold / 100 : 0.05,
			linemerge: op.linemerge ? op.linemerge / 100 : 0.013,
			stopperNodes: op.stopper,
		});
		*/

		if (op.column) {
			let items = [];
			const visited = [];

			const [, stoppers] = lookupAll({
				op: { pattern: op.stopper },
				node: null,
				pdfData,
			});

			const nodes = Array.isArray(node) ? node : [node];

			for (let n = 0; n < nodes.length; n += 1) {
				const pagesItems = [];
				const rowIdentifierRegex = new RegExp(op.column, 'gmi');

				let lines = [];

				for (let i = 0; i < pdfData?.pdfData?.Pages?.length; i += 1) {
					const startLineIndex = pdfData.pdfData.Pages[
						i
					].Lines.findIndex((items) => items.includes(nodes[n]));

					if (startLineIndex === -1) {
						continue;
					}

					lines =
						pdfData.pdfData.Pages[i].Lines.slice(startLineIndex);
				}

				if (lines.length === 0) {
					continue;
				}
				// const startLineIndex = lines.findIndexOf((items) => items.includes(nodes[n]));
				// const lines =

				const headerRow = (lines?.[0] ?? []).toReversed();
				const headerRowIdentifier = headerRow.find((node) =>
					rowIdentifierRegex.test(node.text),
				);

				const [rowIdentifiers] = readColumns({
					node: headerRowIdentifier,
					threshold: 100,
					raw: true,
				});

				// Build row groups
				for (let i = 1; i < lines.length; i += 1) {
					const line = lines[i].filter(
						(node) => !visited.includes(node),
					);

					const lineText = line.map(({ text }) => text).join('');
					if (lineText === '') {
						continue;
					}

					visitNodes(visited, line);

					if (
						stoppers &&
						stoppers.length > 0 &&
						stoppers.some((stopper) => line.includes(stopper))
					) {
						break;
					}

					if (line.some((node) => rowIdentifiers.includes(node))) {
						pagesItems.push({
							data: {},
							line: line,
						});
					} else {
						pagesItems.push({
							data: {
								[headerRow[headerRow.length - 1].text]: line
									.map(({ text }) => text)
									.join(' '),
							},
						});
					}
				}

				// Detect column values
				for (let i = 0; i < pagesItems.length; i += 1) {
					if (!pagesItems[i].line) {
						continue;
					}

					let lastMatch = headerRow[headerRow.length - 1];
					for (
						let j = pagesItems[i].line.length - 1;
						j >= 0;
						j -= 1
					) {
						const node = pagesItems[i].line[j];
						if (!node) {
							continue;
						}

						let headerMatch = headerRow.find((headerNode) => {
							if (
								headerNode.x <= node.x + node.w &&
								node.x <= headerNode.x + headerNode.w
							) {
								return true;
							}

							return false;
						});

						if (!headerMatch) {
							headerMatch = lastMatch;
						}

						lastMatch = headerMatch;

						if (pagesItems[i].data[headerMatch.text]) {
							pagesItems[i].data[headerMatch.text] = `${
								node.text
							} ${pagesItems[i].data[headerMatch.text]}`;
						} else {
							pagesItems[i].data[headerMatch.text] = node.text;
						}
					}
				}

				items = [...items, ...pagesItems];
			}

			// group based on column

			operationResults = items.map(({ data }) => data);
		} else {
			[operationResults] = readTable({
				node,
				pdfData,
				threshold: op.threshold,
				linethreshold: op.linethreshold ? op.linethreshold / 100 : 0.05,
				linemerge: op.linemerge ? op.linemerge / 100 : 0.013,
				stopperNode: op.stopper,
			});
		}
	} else {
		if (Array.isArray(node)) {
			operationResults = node.map(({ text }) => text);
		} else {
			operationResults = readWord({
				node,
				threshold: op.thresholdWord,
				direction: op.direction,
			});
		}
	}

	if (op.selector) {
		try {
			const source = new RegExp(op?.selector, 'gmi');

			if (source) {
				// const matches = acc.toString().match(source);
				if (Array.isArray(operationResults)) {
					for (let i = 0; i < operationResults.length; i += 1) {
						let result = op?.result ?? '$1';
						const matches = source.exec(
							operationResults[i].toString(),
						);
						if (matches && matches.length > 0) {
							for (let i = 0; i < matches.length; i += 1) {
								result = result.replaceAll(`$${i}`, matches[i]);
							}

							operationResults[i] = result;
						}
						source.lastIndex = 0;
					}
				} else {
					let result = op?.result ?? '$1';
					const matches = source.exec(operationResults.toString());
					if (matches && matches.length > 0) {
						for (let i = 0; i < matches.length; i += 1) {
							result = result.replaceAll(`$${i}`, matches[i]);
						}

						operationResults = result;
					} else {
						operationResults = results;
					}
				}
			}
		} catch (e) {
			// Silent fail
		}
	}

	return [operationResults, node];
};

export default read;
