import { BLOCKS_UNICODE } from "./unicode";

export type ReturnedHangulWithRuby =
  | string
  | {
      text: string;
      ruby: string;
    };

export const HANGUL_WORD_REGEX =
  /([\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7FF]+|[^\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7FF]+)/;

const hangulBlocks = Object.entries(BLOCKS_UNICODE).filter(([blockName]) =>
  blockName.startsWith("HANGUL"),
) as [string, [number, number]][];

/**
 * Check whether a provided character belongs to a Hangul Unicode block.
 * @param {string} char
 * @returns {boolean}
 */
export const isHangul = (char: string) => {
  if (typeof char !== "string") {
    return false;
  }

  const codePoint = char.codePointAt(0);
  let isCharHangul = false;
  hangulBlocks.forEach(([, [start, end]]) => {
    if (codePoint && codePoint >= start && codePoint <= end) {
      isCharHangul = true;
    }
  });

  return isCharHangul;
};

/**
 * Transforms a given string by replacing each Hangul character-containing substring
 * @param {string} sentence
 */
export const hangulToRuby = (sentence: string): ReturnedHangulWithRuby[] => {
  // TODO Use Intl.Segmenter instead of splitting by Hangul words
  return sentence.split(HANGUL_WORD_REGEX).map((word) => {
    if (isHangul(word[0] ?? "")) {
      return { text: "", ruby: word };
    }
    return word;
  });
};
