export interface HtmlCandidate {
  element: Element
  type: 'text' | 'image'
  xPath?: string
}

function getDirectTextContent(element: Element): string {
  return Array.from(element.childNodes)
    .filter(node => node.nodeType === Node.TEXT_NODE)
    .map(node => node.textContent?.trim())
    .filter(text => text) // Filter out empty strings
    .join(' ') || ''
}

function isDeepestContentElement(element: Element): boolean {
  const hasDirectText = getDirectTextContent(element).length > 0
  if (!hasDirectText) { return false }

  return !Array.from(element.children).some(child => getDirectTextContent(child).length > 0)
}

export function isImageElement(element: Element): boolean {
  return element.tagName.toLowerCase() === 'img'
}

function isElementInsideAnchorTag(element: Element): boolean {
  if (element.tagName.toLowerCase() === 'a') return true
  return element.parentElement ? isElementInsideAnchorTag(element.parentElement) : false
}

export function elementIsTextCandidate(element: Element): boolean {
  if (isStyleOrScriptTag(element)) { return false }

  return isElementInsideAnchorTag(element)
    && isDeepestContentElement(element)
}
export function elementIsImageCandidate(element: Element): boolean {
  if (isStyleOrScriptTag(element)) { return false }

  return isImageElement(element)
}

function isStyleOrScriptTag(el: Element): boolean {
  return el.tagName.toLowerCase() === 'style' || el.tagName.toLowerCase() === 'script'
}

export function generateXpath(element: Element): string {
  if (!element.parentNode) {
    return '/'
  }

  const ix = Array.from(element.parentNode?.childNodes ?? [])
    .filter(node => node.nodeType === 1 && node.tagName === element.tagName)
    .indexOf(element) + 1

  return `${generateXpath(element.parentNode)}/${element.tagName.toLowerCase()}[${ix}]`
}

export function elementMatchesXPath(element, xpath) {
  const evaluator = new XPathEvaluator()
  const result = evaluator.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null)
  for (let i = 0; i < result.snapshotLength; i++) {
    if (result.snapshotItem(i) === element) {
      return true
    }
  }
  return false
}
