import ngrams from 'natural/lib/natural/ngrams'
import spellcheckers from 'natural/lib/natural/spellcheck'
import tokenizers from 'natural/lib/natural/tokenizers'

type TObject = { [key in any]: any }

const variants = ['Instagram', 'YouTube', 'TikTok', 'Google Search', 'School', 'Friends', 'ChatGPT'].map((variant) =>
  variant.toLowerCase(),
)

const words: TObject = {
  Instagram: ['Insta', 'stories'],
  YouTube: ['video', 'shorts', 'yt', 'vid'],
  'Google Search': [
    'google',
    'looking for',
    'googled',
    'searched',
    'searching',
    'Internet',
    'chrome',
    'safari',
    'edge',
    'online',
    'web',
  ],
  School: ['Teacher', 'class'],
  Friends: ['mate', 'mom', 'mother', 'dad', 'father', 'uncle', 'sister', 'brother', 'parent', 'someone'],
  ChatGPT: ['AI'],
}

const response_formatter: TObject = {
  instagram: 'Instagram',
  youtube: 'YouTube',
  tiktok: 'TikTok',
  'google search': 'Google Search',
  friends: 'Friends',
  other: 'Other',
  school: 'School / University',
  chatgpt: 'ChatGPT',
}

const tokenizer = new tokenizers.TreebankWordTokenizer()
const NGrams = ngrams.NGrams
const spellcheck = new spellcheckers.Spellcheck(variants)

const analyzeSentenceWithSimilarWords = (sentence: string) => {
  const sentence_tokens = tokenizer.tokenize(sentence)

  const res: TObject = {}

  Object.keys(words).forEach((category) => {
    const category_res = words[category].filter((word: string) => sentence_tokens.includes(word.toLowerCase()))

    if (category_res.length) res[category.toLowerCase()] = category_res
  })

  if (!Object.keys(res).length) return 'Other'

  return Object.keys(res)
}

const analyzeSentence = (sentence: string) => {
  const sentence_tokens = tokenizer.tokenize(sentence)
  const sentence_bigrams = NGrams.bigrams(sentence).map((bigram) => bigram.join(''))

  const tokens = sentence_tokens
    .map((token) => {
      if (spellcheck.isCorrect(token)) return token
      return spellcheck.getCorrections(token, 1)
    })
    .flat()

  const bigrams = sentence_bigrams
    .map((bigram) => {
      if (spellcheck.isCorrect(bigram)) return bigram
      return spellcheck.getCorrections(bigram, 1)
    })
    .flat()

  if (!tokens.length && !bigrams.length) {
    return {
      result: analyzeSentenceWithSimilarWords(sentence),
      tokens,
      bigrams,
    }
  } else {
    return {
      result: Array.from(new Set([...tokens, ...bigrams])),
      tokens,
      bigrams,
    }
  }
}

export const classifyQuizSource = (source: string, source_detailed: string) => {
  if (source !== 'Other') return { source, source_detailed }

  const classified_sentence = analyzeSentence(source_detailed)
  const classification_res = classified_sentence.result

  if (
    Array.isArray(classification_res) &&
    classification_res.length === 1 &&
    classification_res[0] in response_formatter
  ) {
    return { source: response_formatter[classification_res[0]], source_detailed: '' }
  } else {
    return { source, source_detailed }
  }
}
