import has from 'lodash/has';
import each from 'lodash/each';
import without from 'lodash/without';
import filter from 'lodash/filter';
import map from 'lodash/map';
import compact from 'lodash/compact';

import unidecode from 'unidecode';

import {
  AR_PHONETIC_DICTIONARY,
  BULLIT,
  SHADDA,
  KASHIDA,
  SOUND_EX_CODES,
} from '@/utils/arabic/arabicWordsUtilsEnum';

import XRegExp from 'xregexp';

import * as log from 'loglevel';
log.setLevel('info');

const phoneticConstants = {
  diacriticalMarks: '',
};
const tokenizArRe = new XRegExp('[\\p{Arabic}\\p{M}]+', 'gi');

const _createDiacriticRe = () => {
  phoneticConstants.diacriticalMarks = new RegExp(
    '[' + BULLIT + SHADDA + KASHIDA + ']+',
    'gi'
  );
};

/**
 * A diacritic is a glyph added to a letter or basic glyph.
 *
 * @param someString
 * @returns {*}
 */
const replaceDiacritic = someString => {
  const diacriticalMarksRe =
    phoneticConstants.diacriticalMarks || _createDiacriticRe();

  return someString
    .replace(diacriticalMarksRe, '')
    .replace(/\s{2,}/, ' ')
    .trim();
};

const _getLetterCombinations = (letters, phoneticDict) => {
  let combination = [];
  const groupedCombinations = [];
  const letterLen = letters.length;
  let word = '';

  for (let j = 0; j < letterLen; j++) {
    combination = [];
    for (let i = 2; i < letterLen; i++) {
      word = letters.slice(j, j + i).join('');
      if (word.length === i && has(phoneticDict.combination, word)) {
        combination = combination.concat(phoneticDict.combination[word]);
      }
    }
    groupedCombinations.push(combination);
  }
  return groupedCombinations;
};

const _createWords = letters => {
  const currentLetters = letters.splice(0, 1)[0];
  const nextLetters = letters.splice(0, 1)[0];
  const words = [];

  each(currentLetters, currentLetter => {
    const currentLetterLength = currentLetter.length - 1;
    each(nextLetters, nextLetter => {
      if (currentLetter[currentLetterLength] === nextLetter) {
        words.push(currentLetter);
      }
      words.push(currentLetter + nextLetter);
    });
  });

  letters.unshift(words);
  return letters.length !== 1 ? _createWords(letters) : letters[0];
};

/**
 * https://en.wikipedia.org/wiki/Levenshtein_distance
 *
 * @param str1
 * @param str2
 * @returns {number|*}
 * @private
 */
const _levenshtein = (str1, str2) => {
  if (str1 === null && str2 === null) {
    return 0;
  }
  if (str1 === null) {
    return String(str2).length;
  }
  if (str2 === null) {
    return String(str1).length;
  }

  str1 = String(str1);
  str2 = String(str2);

  const current = [];
  let prev, value;

  for (let i = 0; i <= str2.length; i++) {
    for (let j = 0; j <= str1.length; j++) {
      if (i && j) {
        value =
          str1.charAt(j - 1) === str2.charAt(i - 1)
            ? prev
            : Math.min(current[j], current[j - 1], prev) + 1;
      } else {
        value = i + j;
      }

      prev = current[j];
      current[j] = value;
    }
  }
  return current.pop();
};

const getArabicLetter = word => {
  let letters = without(word.split(''), '');

  letters = filter(letters, letter => has(AR_PHONETIC_DICTIONARY, letter));

  const groupedCombinations = _getLetterCombinations(
    letters,
    AR_PHONETIC_DICTIONARY
  );

  letters = map(letters, (letter, index) => {
    const arabicLetters = AR_PHONETIC_DICTIONARY[letter].concat(
      groupedCombinations[index]
    );
    return arabicLetters;
  }).filter(item => item.length !== 0);

  return letters;
};

function isEmptyLetters(letters) {
  return compact(Array.prototype.concat.apply([], letters)).length === 0;
}

/**
 *
 * @param words
 * @returns {{}}
 */
const generateWords = words => {
  const response = {};
  words = words.filter(word => word.length < 10);

  each(words, word => {
    let wordForms = [];
    let words = {};
    const letters = getArabicLetter(word);

    if (!isEmptyLetters(letters)) {
      wordForms = _createWords(letters);
    }

    let min;

    wordForms.forEach(wordForm => {
      const lev = _levenshtein(word, unidecode(wordForm));

      if (!min) {
        min = lev;
      }

      if (lev < min) {
        words = [];
        min = lev;
      }

      if (lev === min) {
        words[wordForm] = null;
      }
    });

    words = Object.keys(words);
    if (words.length !== 0) {
      response[word] = words;
    }
  });

  return response;
};

/**
 * //http://research.ijcaonline.org/volume34/number10/pxc3876054.pdf
 *
 * @param s
 * @returns {string}
 */
const soundEx = s => {
  const a = s
    .replace(/^[إآأا]/g, '')
    .replace(/^[aeiouy']{1,}/g, '')
    .split('');
  let r = '';
  let prevChar = '';

  a.forEach(character => {
    if (
      SOUND_EX_CODES.hasOwnProperty(character) &&
      (prevChar.length === 0 || prevChar !== SOUND_EX_CODES[character])
    ) {
      prevChar = SOUND_EX_CODES[character];
      r += SOUND_EX_CODES[character];
    }
  });
  return r.length !== 0 ? (r + '000').slice(0, 4) : r;
};

/**
 *
 * @param input
 * @returns {string|*}
 */
const stemmer = input => {
  const isActiveDebugMod = false;
  let stem = input;

  // Stemming step 1. Strip diacritics
  stem = XRegExp.replace(stem, XRegExp('\\p{M}', 'g'), '');
  if (isActiveDebugMod) {
    log.info(`step 1. : ${input} -> ${stem}`);
  }

  // Stemming step 2. remove length three and length two prefixes/suffixes in this order
  if (stem.length >= 6) {
    stem = stem.replace(/^(كال|بال|ولل|وال)(.*)$/i, '$2');
  }
  if (stem.length >= 5) {
    stem = stem.replace(/^(ال|لل)(.*)$/i, '$2');
  }
  if (isActiveDebugMod) {
    log.info(`step 2. : ${stem}`);
  }

  // Stemming step 3. remove length three and length two suffixes in this order
  if (stem.length >= 6) {
    stem = stem.replace(/^(.*)(تما|هما|تان|تين|كما)$/i, '$1');// eslint-disable-line
  }
  if (stem.length >= 5) {
    stem = stem.replace(/^(.*)(ون|ات|ان|ين|تن|كم|هن|نا|يا|ها|تم|كن|ني|وا|ما|هم)$/i, '$1');// eslint-disable-line
  }
  if (isActiveDebugMod) {
    log.info(`step 3. : ${stem}`);
  }

  // Stemming step 4. remove initial waw if found
  if (stem.length >= 4) {
    stem = stem.replace(/^وو/i, 'و');
  }
  if (isActiveDebugMod) {
    log.info(`step 4. : ${stem}`);
  }

  // Stemming step 5. normalize initial hamza to bare alif
  if (stem.length >= 4) {
    stem = stem.replace(/^[آأإ]/i, 'ا');
  }
  if (isActiveDebugMod) {
    log.info(`step 5. : ${stem}`);
  }
  if (stem.length <= 3) {
    return stem;
  }

  // Stemming step 6. process length four patterns and extract length three roots
  if (stem.length === 6) {
    stem = stem.replace(/^[ام]ست(...)$/i, '$1'); // مستفعل - استفعل
    stem = stem.replace(/^[ام]ست(...)$/i, '$1'); // مستفعل - استفعل
    stem = stem.replace(/^[تم](.)ا(.)ي(.)$/i, '$1$2$3'); // تفاعيل - مفاعيل
    stem = stem.replace(/^م(..)ا(.)ة$/i, '$1$2'); // مفعالة
    stem = stem.replace(/^ا(.)[تط](.)ا(.)$/i, '$1$2$3'); // افتعال
    stem = stem.replace(/^ا(.)(.)و\2(.)$/i, '$1$2$3'); // افعوعل
    if (stem.length === 3) {
      return stem;
    } else {
      stem = stem.replace(/[ةهيكتان]$/i, ''); // single letter suffixes
      //if (stem.length === 4 ) { TODO: initiate 4 letter word routine? }
      //if (stem.length === 5 ) { TODO: initiate 5 letter word routine? }
      stem = stem.replace(/^(..)ا(..)$/i, '$1$2'); // فعالل
      stem = stem.replace(/^ا(...)ا(.)$/i, '$1$2'); // افعلال
      stem = stem.replace(/^مت(.۔..)$/i, '$1'); // متفعلل

      stem = stem.replace(/^[لبفسويتنامك]/i, ''); // single letter prefixes، added م for مفعلل
      if (stem.length === 6) {
        stem = stem.replace(/^(..)ا(.)ي(.)$/i, '$1$2$3'); // فعاليل
      }
    }
  }
  if (isActiveDebugMod) {
    log.info(`after length 6 : ${stem}`);
  }
  if (stem.length === 5) {
    stem = stem.replace(/^ا(.)[اتط](.)(.)$/i, '$1$2$3'); //   افتعل   -  افاعل
    stem = stem.replace(/^م(.)(.)[يوا](.)$/i, '$1$2$3'); //   مفعول  -   مفعال  -   مفعيل
    stem = stem.replace(/^[اتم](.)(.)(.)ة$/i, '$1$2$3'); //   مفعلة  -    تفعلة   -  افعلة
    stem = stem.replace(/^[يتم](.)[تط](.)(.)$/i, '$1$2$3'); //   مفتعل  -    يفتعل   -  تفتعل
    stem = stem.replace(/^[تم](.)ا(.)(.)$/i, '$1$2$3'); //   مفاعل  -  تفاعل
    stem = stem.replace(/^(.)(.)[وا](.)ة$/i, '$1$2$3'); //   فعولة  -   فعالة
    stem = stem.replace(/^[ما]ن(.)(.)(.)$/i, '$1$2$3'); //   انفعل   -   منفعل
    stem = stem.replace(/^ا(.)(.)ا(.)$/i, '$1$2$3'); //    افعال
    stem = stem.replace(/^(.)(.)(.)ان$/i, '$1$2$3'); //    فعلان
    stem = stem.replace(/^ت(.)(.)ي(.)$/i, '$1$2$3'); //    تفعيل
    stem = stem.replace(/^(.)ا(.)و(.)$/i, '$1$2$3'); //    فاعول
    stem = stem.replace(/^(.)وا(.)(.)$/i, '$1$2$3'); //    فواعل
    stem = stem.replace(/^(.)(.)ائ(.)$/i, '$1$2$3'); //    فعائل
    stem = stem.replace(/^(.)ا(.)(.)ة$/i, '$1$2$3'); //    فاعلة
    stem = stem.replace(/^(.)(.)ا(.)ي$/i, '$1$2$3'); //    فعالي
    if (stem.length === 3) {
      return stem;
    } else {
      stem = stem.replace(/^[اتم]/i, ''); //    تفعلل - افعلل - مفعلل

      stem = stem.replace(/[ةهيكتان]$/i, ''); // single letter suffixes
      //if (stem.length === 4 ) { TODO: initiate 4 letter word routine? }
      stem = stem.replace(/^(..)ا(..)$/i, '$1$2'); //    فعالل
      stem = stem.replace(/^(...)ا(.)$/i, '$1$2'); //    فعلال
      stem = stem.replace(/^[لبفسويتنامك]/i, ''); // single letter prefixes، added م for مفعلل
    }
  }
  if (isActiveDebugMod) {
    log.info(`after length 5 : ${stem}`);
  }
  if (stem.length === 4) {
    stem = stem.replace(/^م(.)(.)(.)$/i, '$1$2$3'); // مفعل
    stem = stem.replace(/^(.)ا(.)(.)$/i, '$1$2$3'); // فاعل
    stem = stem.replace(/^(.)(.)[يوا](.)$/i, '$1$2$3'); // فعال   -   فعول    - فعيل
    stem = stem.replace(/^(.)(.)(.)ة$/i, '$1$2$3'); // فعلة
    if (stem.length === 3) {
      return stem;
    } else {
      stem = stem.replace(/^(.)(.)(.)[ةهيكتان]$/i, '$1$2$3'); // single letter suffixes
      if (stem.length === 3) {
        return stem;
      }
      stem = stem.replace(/^[لبفسويتناك](.)(.)(.)$/i, '$1$2$3'); // single letter prefixes
    }
  }
  if (isActiveDebugMod) {
    log.info(`after length 4 : ${stem}`);
  }
  return stem;
};

/**
 *
 * @param str
 * @returns {*[]}
 */
const tokenizer = str => {
  let tokens = str.match(tokenizArRe);
  tokens = without(tokens, '', ' ');

  return tokens || [];
};

export default {
  generateWords,
  soundEx,
  replaceDiacritic,
  stemmer,
  tokenizer,
};
