import unidecode from 'unidecode';
import porterStemmer from './stemmer/porter_stemmer.js';
import XRegExp from 'xregexp';
import { IGNORE_QUOTES } from '@/utils/english/englishWordUtilsEnum';

const tokenizeEnRe = new XRegExp(
  '[^\\p{Latin}\\d-' + IGNORE_QUOTES + ']+',
  'g'
);
const dashEnRe = new XRegExp('((^| )-)', 'g');

const stemmer = token => {
  let stem = porterStemmer.stem(token);

  if (!stem) {
    stem = token;
  }
  return stem;
};

const tokenizer = str => {
  str = str.replace(tokenizeEnRe, ' ').replace(dashEnRe, ' ');
  const tokens = str.toLowerCase().split(/\s+/);
  return tokens.filter(token => token.length);
};

const replaceDiacritic = str => {
  const quotesRe = new XRegExp('[' + IGNORE_QUOTES + ']+', 'g');

  str = str.replace(quotesRe, '').trim();
  return unidecode(str);
};

export default {
  stemmer,
  tokenizer,
  replaceDiacritic,
};
