import sanitizeHtml from "sanitize-html";
import { convertMarkdownToHtml } from "./convertMarkdownToHtml";

export const getPlainTextFromHtml = (s: string) => {
  // const withSpacing = s.replace(/<\/?p[^>]*>/g, "\n");
  return sanitizeHtml(s, {
    allowedTags: [], // no tags allowed, stripping all
    allowedAttributes: {} // no attributes allowed
  });
};

export const strongSanitizeHtml = (s: string) => {
  return sanitizeHtml(s);
};

export const getPlainTextWithStrippedParagraphs = (s: string) => {
  const withSpacing = s.replace(/<\/?p[^>]*>/g, "\n");
  return sanitizeHtml(withSpacing, {
    allowedTags: [], // no tags allowed, stripping all
    allowedAttributes: {} // no attributes allowed
  });
};

export const stripAllParagraphTags = (s: string) => {
  return s.replaceAll(/<\/?p[^>]*>/g, "");
};

const formatFlowArtifact = (content: string) => {
  content = content.replace(/^#(.*)/gm, "\n$&"); // we need this to make sure the first # is respected as a header
  return convertMarkdownToHtml(content);
};

// Cache regex patterns
const REGEX = {
  BOM: /^\uFEFF/,
  MARKDOWN_BLOCK: /```(?:markdown)?/g,
  FLOW_THINKING: /<flowThinking[^>]*>.*?<\/flowThinking>/gs,
  NON_TAG_CONTENT: /(?![^<]*>|[^<>]*<\/)([^<]+)/g,
  FLOW_ARTIFACT: /<flowArtifact([^>]*)>([\s\S]*?)<\/flowArtifact>/g,
  WRAPPED_FLOW_ARTIFACT:
    /<p><flowArtifact([^>]*)>([\s\S]*?)<\/flowArtifact><\/p>/g,
  HTML_TAG: /<[^>]+>/g,
  TRAILING_EMPTY_P: /<\/flowArtifact><p><\/p>/g
};

export const addSpacingAndConvertToHtml = (s: string) => {
  // Combine initial replacements
  let t = s
    .replace(REGEX.BOM, "")
    .replace(REGEX.MARKDOWN_BLOCK, "")
    .replace(REGEX.FLOW_THINKING, "");

  // Split the content into HTML tags and text content
  const parts = t.split(
    /((?![^<]*>|[^<>]*<\/)(?:<[^>]+>)|<flowArtifact[^>]*>[\s\S]*?<\/flowArtifact>)/
  );

  // Process each part
  t = parts
    .map((part) => {
      if (!part) return "";
      // If it's a flowArtifact tag
      if (part.startsWith("<flowArtifact")) {
        return part.replace(
          REGEX.FLOW_ARTIFACT,
          (_, attributes, content) =>
            `<flowArtifact${attributes}>${formatFlowArtifact(content)}</flowArtifact>`
        );
      }
      // If it's an HTML tag, leave it unchanged
      if (part.startsWith("<")) return part;
      // Otherwise convert to HTML
      return convertMarkdownToHtml(part);
    })
    .join("");

  // Single sanitization pass with all allowed elements
  t = sanitizeHtml(t, {
    allowedTags: sanitizeHtml.defaults.allowedTags.concat([
      "flowArtifact",
      "flowThinking"
    ]),
    allowedAttributes: {
      ...sanitizeHtml.defaults.allowedAttributes,
      flowArtifact: ["*"],
      flowThinking: ["*"]
    },
    parser: {
      lowerCaseTags: false,
      lowerCaseAttributeNames: false
    }
  });

  // Final cleanup
  const cleaned = t
    .replace(REGEX.WRAPPED_FLOW_ARTIFACT, "<flowArtifact$1>$2</flowArtifact>")
    .trim()
    .replace(REGEX.TRAILING_EMPTY_P, "</flowArtifact>");
  return cleaned;
};

export const isHtml = (s: string) => {
  const htmlTagRegex = /<\/?[a-z][\s\S]*>/i;
  return htmlTagRegex.test(s);
};

export const getHtmlFromText = (s: string) => {
  if (!(typeof s === "string")) return "";
  let text = s;

  text = s.replace(/\n/g, "<br>");
  if (!isHtml(s)) {
    text = convertMarkdownToHtml(s);
  }

  // Detect custom tags
  // we need these tags so that we can output instructions of anthropic in the prompt
  // later, we may want to add an input type that can allow html directly that isn't
  // formatted in the editor
  const customTagRegex = /<\/?([a-zA-Z0-9]+)[^>]*>/g;
  const customTags = new Set<string>();
  text = text.replace(customTagRegex, (m, p1) => {
    if (!sanitizeHtml.defaults.allowedTags.includes(p1.toLowerCase())) {
      customTags.add(p1);
      return m.replace(/</g, "&lt;").replace(/>/g, "&gt;");
    }
    return m;
  });

  // Sanitize while allowing custom tags
  const sanitized = sanitizeHtml(text, {
    allowedTags: sanitizeHtml.defaults.allowedTags
      .concat(Array.from(customTags))
      .concat(["br", "p", "img"])
  });

  // console.log("wrapped", wrapped);

  return sanitized;
};
