import { pdfjs, Document, Page } from 'react-pdf';
import { createWorker } from 'tesseract.js';

pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.js`;

export const regex = /BON DE TRAVAUX N°\s?(?<id>.*)(?:\n.*){10,15}LOCALISATION\s(?:(?<loc>.*))?ADRESSE\s(?<name>.*)\sProgramme (?<programId>.+)(?<programName>.+)\t(?<adress1>.*)\sTranche\s*(?<trancheId>.*)\s*Ensemble\s+(?<ensemble>.*) +Escalier (?<escalier>.*)\s+Étage\s?(?<etage>.*)\t\s*(?<zipcode>[0-9AB]{0,5})\s*(?<city>.*)\sLogement\s*(?<logement>.*)\s*Module\s*(?<module>\S*)\s*(?:Tél[.,] personnel (?<telPers>\S*))?\s*(?:Tél[.,] professionnel (?<telPro>\S*))?\s*(?:Tél[.,] mobile (?<telMob>\S*))?\sDÉLAI D[’']EXÉCUTION\s*(?<delais>.*)\s*PÉRIODE\s*DU\s*(?<start>..-..-....)\s*AU\s*(?<end>..-..-....)\s(?:ARTICLE\s*DÉSIGNATION.*(?<ref>[^ ]*) (?<lib>[^\t]*))?/gm;
/*
export const regex = /BON DE TRAVAUX N°\s?(?<id>.*)(?:\n.*){10,15}LOCALISATION\s(?:(?<loc>.*))?ADRESSE\s(?<name>.*)\sProgramme (?<programId>.+)(?<programName>.+)\t
(?<adress1>.*)\sTranche\s*(?<trancheId>.*)\s*Ensemble\s+(?<ensemble>.*) +Escalier (?<escalier>.*)\s+Étage\s?(?<etage>.*)\t\s*(?<zipcode>[0-9AB]{0,5})\s*(?<city>.*)
\sLogement\s*(?<logement>.*)\s*Module\s*(?<module>\S*)\s*(?:Tél[.,] personnel (?<telPers>\S*))?\s*(?:Tél[.,] professionnel (?<telPro>\S*))?\s*(?:Tél[.,] mobile
  (?<telMob>\S*))?\s
  DÉLAI D[’']EXÉCUTION\s*(?<delais>.*)\s*PÉRIODE\s*DU\s*(?<start>..-..-....)\s*AU\s*(?<end>..-..-....)\
  s(?:ARTICLE\s*DÉSIGNATION.*(?<ref>[^ ]*) (?<lib>[^\t]*))?/gm;
*/

const LAYOUTS = [
  (txt) => /Étage\s+\S*\s*([0-9]{5})/gm.exec(txt),
  (txt) => !/Étage\s+\S*\s*([0-9]{5})/gm.exec(txt),
];
const fields = [
  {
    adress1: /Programme *[0-9]+[^0-9\n]*[ \.\t](?:[0-9 ]*[ \.\t])?([0-9».][^\:\n]*)/gm,
    city: /Étage\s+\S*\s*[0-9]{5}\s+([^\:\n]*)/gm,
    delais: /EXÉCUTION\s+(\S+\s?\S*)/gm,
    end: /PÉRIODE[\s—\-_ ]*DU[\s—\-_ ]*\S*[\s—\- ]*AU[\s—\- ]*(\S*)/gm,
    ensemble: /Ensemble\s+(\S*)/gm,
    escalier: /Escalier\s+(\S*)/gm,
    etage: /Étage\s+(\S*)/gm,
    id: /BON *DE *TRAVAUX[\S -]*[Nn]°? *(\d*\s\d*)/gm,
    lib: /ARTICLE[\s;]*D[ÉÊ]SIGNATION.*[\r\n]+[^0-9]*[0-9][^ ]*(.*)(?:U |FFT|NIV)/gm,
    loc: /LOCALISATION\s(.*)ADRESSE/gm,
    logement: /Logement\s+(\S*)/gm,
    module: /Modu[lt]e\s+(\S*)/gm,
    name: /LOCALISATION.*ADRESSE[\s—-]*([a-zA-Z\s]*).?$/gm,
    programId: /Programme\s([0-9]+)/gm,
    programName: /Programme *[0-9]+[\s_-—]*([^0-9\n]*[ \.\t](?:[0-9 ]*[ \.\t])?)([0-9».][^\:\n]*)/gm,
    ref: /ARTICLE[\s;]*D[ÉÊ]SIGNATION.*[\r\n]+[^0-9]*([0-9][^ ]*)/gm,
    start: /PÉRIODE[\s—\-_ ]*DU[\s—\-_ ]*(\S*)/gm,
    telMob: /Té[li]. mobile\s+(\S*)/gm,
    telPers: /Té[li]. personne[li]\s+(\S*)/gm,
    telPro: /Té[li]. professionnel\|?\s+(\S*)/gm,
    trancheId: /Tranche\s*(\S*)/gm,
    zipcode: /Étage\s+\S*\s*([0-9]{5})/gm,
  },
  {
    adress1: /LOCALISATION.*ADRESSE[\s—\-=—]*([ 0-9».][^\:\n]*)/gm,
    city: /Programme *[0-9]+[^0-9\n]*[ \.\t][0-9]{5}([^\:\n]*)/gm,
    delais: /EXÉCUTION\s+(\S+\s?\S*)/gm,
    end: /PÉRIODE[\s—\-_ ]*DU[\s—\-_ ]*\S*[\s—\- ]*AU[\s—\- ]*(\S*)/gm,
    ensemble: /Ensemble\s+(\S*)/gm,
    escalier: /Escalier\s+(\S*)/gm,
    etage: /Étage\s?(\S*)$/gm,
    id: /BON *DE *TRAVAUX[\S -]*[Nn]°? *(\d*\s\d*)/gm,
    lib: /ARTICLE\s*D[ÉÊ]SIGNATION.*[\r\n]+.*[\r\n]\s?\S*\s(.*)(?:U |FFT|NIV)/gm,
    loc: /LOCALISATION\s_?(.*)ADRESSE/gm,
    logement: /Logement\s+([0-9]\S*)/gm,
    module: /Modu[lt]e\s+([0-9]\S*)/gm,
    programId: /Programme\s([0-9]+)/gm,
    programName: /Programme *[0-9]+[ \—_]+([^0-9\n]*)[ \.\t][ 0-9».]/gm,
    ref: /ARTICLE\s*D[ÉÊ]SIGNATION.*[\r\n]+.*[\r\n]\s?(\S*)/gm,
    start: /PÉRIODE[\s—\-_ ]*DU[\s—\-_ ]*(\S*)/gm,
    telMob: /Té[li]. mobile\s+(\S*)/gm,
    telPers: /Té[li]. personne[li]\s+(\S*)/gm,
    telPro: /Té[li]. professionnel\|?\s+(\S*)/gm,
    trancheId: /Tranche\s*(\S*)/gm,
    zipcode: /Programme *[0-9]+[^0-9\n]*[ \.\t]([0-9]{5})/gm,
  },
];

export const parse = (text) => {
  const res = {};
  const layout = LAYOUTS.findIndex((l) => l(text));
  Object.entries(fields[layout] || fields[0]).forEach(([key, reg]) => {
    if (reg) {
      reg.lastIndex = 0;
      const r = reg.exec(text);
      res[key] = (r?.[1] ?? '').trim();
    }
  });
  return res;
};

const readFileData = (file) => new Promise((resolve, reject) => {
  const reader = new FileReader();
  reader.onload = (e) => {
    resolve(e.target.result);
  };
  reader.onerror = (err) => {
    reject(err);
  };
  reader.readAsDataURL(file);
});

export const processOcrPDF = async (file) => {
  const imgs = await convertPdfToImages(file);
  const worker = createWorker({});
  await worker.load();
  await worker.loadLanguage('fra');
  await worker.initialize('fra');
  const {
    data: { text },
  } = await worker.recognize(imgs[0]);
  await worker.terminate();
  return text;
};

// param: file -> the input file (e.g. event.target.files[0])
// return: images -> an array of images encoded in base64
export const convertPdfToImages = async (file) => {
  const images = [];
  const data = await readFileData(file);
  const pdf = await pdfjs.getDocument(data).promise;
  const canvas = document.createElement('canvas');

  const page = await pdf.getPage(1);
  const viewport = page.getViewport({ scale: 2 });
  const context = canvas.getContext('2d');
  canvas.height = viewport.height;
  canvas.width = viewport.width;
  await page.render({ canvasContext: context, viewport }).promise;
  images.push(canvas.toDataURL());

  canvas.remove();
  return images;
};
