const DECOMPOSE_TAGS = [
  'script',
  'svg',
  'path',
  'g',
  'meta',
  'link',
  'style',
  'rect',
  'footer',
  'head',
  'header',
  'iframe',
  'template',
  'em',
  'nav',
  'noscript',
];
const KEEP_ATTRIBUTES = ['href', 'title'];
const UNWRAP_TAGS = ['span'];

export const clean_html = (htmlString: string): string => {
  const doc = domParseFromString(htmlString);

  visitNodes(doc, decomposeTags);

  visitNodes(doc, unwrapTags);

  visitNodes(doc, cleanupNodeAttributes);

  visitNodes(doc, normalizeNodes);

  visitNodes(doc, cleanupEmptyNodes);

  visitNodes(doc, removeUselessDivs);

  const serializer = new XMLSerializer();
  const html = serializer.serializeToString(doc);

  return html;
};

const domParseFromString = (htmlString: string): Document => {
  const parser = new DOMParser();
  return parser.parseFromString(htmlString, 'text/html');
};

const visitNodes = (node: ChildNode | Document, callback: (node: ChildNode) => void) => {
  // Create a copy because the callback may change the DOM
  const childNodesCopy = Array.from(node.childNodes);
  childNodesCopy.forEach(child => visitNodes(child, callback));

  // now process the node if it's not a document node (root)
  if (node.nodeType !== Node.DOCUMENT_NODE) {
    callback(node as ChildNode);
  }
};

const cleanupEmptyNodes = (node: ChildNode) => {
  if (node.nodeType === Node.ELEMENT_NODE) {
    const elementNode = node as Element;
    if (elementNode.textContent?.trim() === '') {
      node.remove();
    }
  }
};

const removeUselessDivs = (node: ChildNode) => {
  if (node.nodeType === Node.ELEMENT_NODE && (node as Element).tagName.toLowerCase() === 'div') {
    let childElementNodesCount = 0;
    let childTextNodesContent = '';
    for (let i = 0; i < node.childNodes.length; i++) {
      const childNode = node.childNodes[i];

      if (childNode.nodeType === Node.ELEMENT_NODE) {
        childElementNodesCount++;
      }

      if (childNode.nodeType === Node.TEXT_NODE) {
        childTextNodesContent = childTextNodesContent + childNode.textContent;
      }

      if (childElementNodesCount >= 2 || (childElementNodesCount === 1 && childTextNodesContent.trim() !== '')) {
        return;
      }
    }

    // No early return, so we can remove the div
    unwrapNode(node);
  }
};

const normalizeNodes = (node: ChildNode) => {
  if (node.nodeType === Node.TEXT_NODE) {
    //node.normalize()
    node.textContent = node.textContent?.trim() ?? null;
    if (node.textContent === '') {
      node.textContent = ' ';
    }
  }
};

const cleanupNodeAttributes = (node: ChildNode) => {
  if (node.nodeType === Node.DOCUMENT_NODE) {
    return;
  }

  const attributesToRemove = [];
  if (node.nodeType === Node.ELEMENT_NODE) {
    const elementNode = node as Element;
    for (const attr of elementNode.attributes) {
      const attributeName = attr.localName;
      if (!KEEP_ATTRIBUTES.includes(attributeName)) {
        attributesToRemove.push(attributeName);
      }
    }

    for (const attrName of attributesToRemove) {
      elementNode.removeAttribute(attrName);
    }
  }
};

const decomposeTags = (node: ChildNode) => {
  if (node.nodeType === Node.COMMENT_NODE) {
    node.remove();
  }

  if (node.nodeType === Node.ELEMENT_NODE) {
    const elementNode = node as Element;
    if (typeof elementNode.tagName != 'undefined' && DECOMPOSE_TAGS.includes(elementNode.tagName.toLowerCase())) {
      node.remove();
    }
  }
};

const unwrapTags = (node: Node): void => {
  if (node.nodeType === Node.ELEMENT_NODE) {
    const elementNode = node as Element;
    if (typeof elementNode.tagName != 'undefined' && UNWRAP_TAGS.includes(elementNode.tagName.toLowerCase())) {
      unwrapNode(node);
    }
  }
};

const unwrapNode = (node: Node) => {
  const parentElement = node.parentNode;

  while (node.firstChild) {
    parentElement?.insertBefore(node.firstChild, node);
  }

  node.parentNode?.removeChild(node);
};
