import XRegExp from 'xregexp';
import without from 'lodash/without';
import {
  BULLIT,
  SHADDA,
  KASHIDA,
  SOUND_EX_CODES,
} from '@/utils/farsi/farsiWordsUtilsEnum';

const phoneticConstants = {
  diacriticalMarks: '',
};
const tokenizArRe = new XRegExp('[\\p{Arabic}\\p{M}]+', 'gi');

/**
 * Compares this string to the specified object.
 * The result is true if and only if the argument
 * is not null and is a String object that represents
 * the same sequence of characters as this object.
 *
 * @param someString
 * @returns {boolean}
 */
String.prototype.equals = function(someString) {
  const arg = String.prototype.toString.call(this, arguments);
  const someStringType = typeof someString;

  return someStringType === undefined || someStringType == null
    ? false
    : arg === someString;
};

/**
 * Removes the characters in a substring of this sequence.
 * The substring begins at the specified start and extends
 * to the character at index end - 1 or to the end of the
 * sequence if no such character exists. If start is equal
 * to end, no changes are made.
 *
 * @param start
 * @param end
 * @returns {*}
 */
String.prototype.delete = function(start, end) {
  const arg = Array.prototype.slice.call(this, arguments);

  if (
    typeof start === 'number' &&
    typeof end === 'number' &&
    start < end &&
    start >= 0 &&
    end >= 0
  ) {
    arg.splice(start, end - start);
  }
  return arg.join('');
};

/**
 * Removes the char at the specified position in
 * this sequence. This sequence is shortened by one char.
 *
 * @param index
 * @returns {*}
 */
String.prototype.deleteCharAt = function(index) {
  const arg = Array.prototype.slice.call(this, arguments);

  if (typeof index === 'number' && index >= 0) {
    arg.splice(index, 1);
  }
  return arg.join('');
};

function PersianStemmerLight() {
  const _normalize1 = word => {
    const len = word.length - 1;

    const isNotNormalized = (_word, _len) => {
      const char = _word.charAt(_len);

      return (
        char === '\u0649' ||
        char === '\u06AF' ||
        char === '\u0645' ||
        char === '\u062A' ||
        char === '\u0631' ||
        char === '\u0634'
      );
    };

    if (len > 2 && isNotNormalized(word, len)) {
      word = word.deleteCharAt(len);
      word = _normalize2(word);
    }
    return word;
  };

  const _removeKasra = word => {
    const len = word.length - 1;
    const isKasra = (_word, _len) => _word.charAt(_len) === '\u0650';

    if (len > 3 && isKasra(word, len)) {
      word = word.deleteCharAt(len);
    }
    return word;
  };

  const _normalize2 = word => {
    const len = word.length - 1;
    const isExtraChars = (_word, _len) => {
      const char = _word.charAt(_len);
      return char === '\u06CC' || char === '\u064A';
    };

    if (len > 2 && isExtraChars(word, len)) {
      word = word.deleteCharAt(len);
    }
    return word;
  };

  const _removeSuffix = word => {
    const diacriticalMarksRe = new RegExp(
      '[' + BULLIT + SHADDA + KASHIDA + ']+',
      'gi'
    );
    word = word.replace(diacriticalMarksRe, '');

    const wordLength = word.length - 1;

    if (wordLength > 6) {
      const wordSuffix = word.substring(wordLength - 3, wordLength + 1);

      if (
        wordSuffix.equals('\u062A\u0631\u06CC\u0646') ||
        wordSuffix.equals('\u0622\u0628\u0627\u062F') ||
        wordSuffix.equals('\u062A\u0631\u064A\u0646')
      ) {
        word = word.delete(wordLength - 3, wordLength + 1);
        return word;
      }
      if (
        wordSuffix.equals('\u06AF\u064A\u0631\u064A') ||
        wordSuffix.equals('\u0647\u0627\u064A\u064A') ||
        wordSuffix.equals('\u0647\u0627\u06CC\u06CC') ||
        wordSuffix.equals('\u06AF\u06CC\u0631\u06CC') ||
        wordSuffix.equals('\u0633\u0627\u0632\u064A') ||
        wordSuffix.equals('\u0633\u0627\u0632\u06CC') ||
        wordSuffix.equals('\u0631\u064A\u0632\u064A') ||
        wordSuffix.equals('\u0631\u06CC\u0632\u06CC') ||
        wordSuffix.equals('\u0628\u0646\u062F\u064A') ||
        wordSuffix.equals('\u0628\u0646\u062F\u06CC') ||
        wordSuffix.equals('\u0622\u0628\u0627\u062F') ||
        wordSuffix.equals('\u0628\u0627\u0631\u0647')
      ) {
        word = word.delete(wordLength - 3, wordLength + 1);
        return word;
      }
    }

    if (wordLength > 5) {
      const wordSuffix = word.substring(wordLength - 2, wordLength + 1);

      if (
        wordSuffix.equals('\u0647\u0627\u064A') ||
        wordSuffix.equals('\u0647\u0627\u06CC') ||
        wordSuffix.equals('\u0627\u0646\u062F') ||
        wordSuffix.equals('\u0627\u064A\u0645') ||
        wordSuffix.equals('\u0627\u06CC\u0645') ||
        wordSuffix.equals('\u0634\u0627\u0646')
      ) {
        word = word.delete(wordLength - 2, wordLength + 1);
        return word;
      }
    }

    if (wordLength > 4) {
      const wordSuffix = word.substring(wordLength - 1, wordLength + 1);

      if (wordSuffix.equals('\u0627\u0646')) {
        word = word.delete(wordLength - 1, wordLength + 1);
        word = _normalize1(word);
        return word;
      }
      if (
        wordSuffix.equals('\u0647\u0627') ||
        wordSuffix.equals('\u06CC\u0646') ||
        wordSuffix.equals('\u064A\u0646') ||
        wordSuffix.equals('\u0627\u062A') ||
        wordSuffix.equals('\u0647\u0621') ||
        wordSuffix.equals('\u0627\u0634') ||
        wordSuffix.equals('\u062A\u0631') ||
        wordSuffix.equals('\u0631\u0627') ||
        wordSuffix.equals('\u0648\u0646') ||
        wordSuffix.equals('\u0627\u0645')
      ) {
        word = word.delete(wordLength - 1, wordLength + 1);
        return word;
      }
    }

    if (wordLength > 2) {
      const char = word.charAt(wordLength);

      if (
        char === '\u0647' ||
        char === '\u06CC' ||
        char === '\u064A' ||
        char === '\u0645' ||
        char === '\u062A' ||
        char === '\u0634'
      ) {
        word = word.deleteCharAt(wordLength);
        return word;
      }
    }
    return word;
  };

  // A cache of words and their stems
  const cache = {};

  // A buffer of the current word being stemmed
  let sb = '';

  this.getStem = word => {
    let result = cache[word];

    if (result !== undefined) {
      return result;
    }

    sb = sb.delete(0, sb.length);

    sb += word;

    sb = _removeKasra(sb);
    sb = _removeSuffix(sb);
    sb = _removeKasra(sb);

    result = sb.toString();
    cache[word] = result;

    return result;
  };
}

const stemmer = token => {
  return new PersianStemmerLight().getStem(token);
};

const tokenizer = str => {
  let tokens = str.match(tokenizArRe);
  tokens = without(tokens, '', ' ');

  return tokens || [];
};

const _createDiacriticRe = () => {
  phoneticConstants.diacriticalMarks = new RegExp(
    '[' + BULLIT + SHADDA + KASHIDA + ']+',
    'gi'
  );
};

const replaceDiacritic = someString => {
  const diacriticalMarksRe =
    phoneticConstants.diacriticalMarks || _createDiacriticRe();

  return someString
    .replace(diacriticalMarksRe, '')
    .replace(/\s{2,}/, ' ')
    .trim();
};

/**
 * http://research.ijcaonline.org/volume34/number10/pxc3876054.pdf
 *
 * @param s
 * @returns {string}
 */
function soundEx(s) {
  const a = s
    .replace(/^[إآأا]/g, '')
    .replace(/^[aeiouy']{1,}/g, '')
    .split('');

  let r = '';
  let prevChar = '';

  a.forEach(function(character) {
    if (
      SOUND_EX_CODES.hasOwnProperty(character) &&
      (prevChar.length === 0 || prevChar !== SOUND_EX_CODES[character])
    ) {
      prevChar = SOUND_EX_CODES[character];
      r += SOUND_EX_CODES[character];
    }
  });
  return r.length !== 0 ? (r + '000').slice(0, 4) : r;
}

export default {
  stemmer,
  tokenizer,
  replaceDiacritic,
  soundEx,
};
