import { v4 as uuid } from "uuid";
import { z } from "zod";

export const ZodVertex = z.object({ x: z.number(), y: z.number() });
export type Vertex = z.infer<typeof ZodVertex>;

export const ZodBox = z.object({
  vertices: z.array(ZodVertex),
});
export type Box = z.infer<typeof ZodBox>;

export const ZodWordInResponse = z.object({
  id: z.number(),
  boundingBox: ZodBox,
  confidence: z.number(),
  text: z.string(),
});
export type WordInResponse = z.infer<typeof ZodWordInResponse>;

export const ZodPageInResponse = z.object({
  confidence: z.number(),
  height: z.number(),
  text: z.string(),
  width: z.number(),
  words: z.array(ZodWordInResponse),
});
export type PageInResponse = z.infer<typeof ZodPageInResponse>;

export const ZodOcrOacJsonResponse = z.object({
  apiVersion: z.string(),
  confidence: z.number(),
  imageBase64: z.string(),
  mimeType: z.string(),
  pages: z.array(ZodPageInResponse),
  text: z.string(),
});
export type OcrOacJsonResponse = z.infer<typeof ZodOcrOacJsonResponse>;

// HACK: 현재의 Word 응답에는 id(한 박스 아이디)와 lineId(한 줄 아이디)가 없다.
// 하지만 이 두 필드는 필요하다. 그래서 이 두 필드를 추가한 ZodWord를 정의한다.

export const ZodWord = ZodWordInResponse;
export type Word = z.infer<typeof ZodWord>;

export const ZodLine = z.object({
  id: z.string().uuid(),
  words: z.array(ZodWord),
  text: z.string(),
});
export type Line = z.infer<typeof ZodLine>;

export const ZodPage = z.object({
  id: z.string().uuid(),
  width: z.number(),
  height: z.number(),
  words: z.array(ZodWord),
  lines: z.array(ZodLine),
  confidence: z.number(),
  text: z.string(),
});
export type OcrInferenceResult = z.infer<typeof ZodPage>;

export const ZodImageOcrOacJson = z.object({
  apiVersion: z.string(),
  confidence: z.number(),
  imageBase64: z.string(),
  mimeType: z.string(),
  pages: z.array(ZodPage),
  text: z.string(),
});

export type ImageOcrOacJson = z.infer<typeof ZodImageOcrOacJson>;

export function convertResponseToImageOcrOacJson(
  file: OcrOacJsonResponse,
): ImageOcrOacJson {
  return {
    ...file,
    pages: file.pages.map(page => assignSerializedId(page)),
  };
}

function assignSerializedId(page: PageInResponse) {
  // assumption1: All lines exhaustively and exclusively include all words
  // assumption2: The words are in the same order as the lines
  const textLines = page.text.split(" \n");
  const lines: Line[] = [];

  // line 관련 변수들을 초기화한다.
  let wordsInLine: Word[] = [];
  let lineText = "";

  for (const word of page.words) {
    const currentLineText = textLines[lines.length];
    // word를 line 관련 변수들에 추가한다
    wordsInLine.push(word);
    lineText = lineText ? lineText + " " + word.text : word.text;
    if (lineText === currentLineText) {
      // word가 line의 마지막 word이면 line을 완성해서 추가한다.
      lines.push({
        id: uuid(),
        words: wordsInLine,
        text: currentLineText,
      });
      // line 관련 변수들을 초기화한다.
      wordsInLine = [];
      lineText = "";
    }
  }
  return { id: uuid(), ...page, lines };
}
