Visualizing Offensive language keywords - Word2Vec and t-SNE¶

March 22, 2021

Code is adapted from Google News and Leo Tolstoy: Visualizing Word2Vec Word Embeddings using t-SNE by Sergey Smetanin.

We use Google's Word2Vec vectors from https://code.google.com/archive/p/word2vec/.

We are publishing pre-trained vectors trained on part of Google News dataset (about 100 billion words). The model contains 300-dimensional vectors for 3 million words and phrases. The phrases were obtained using a simple data-driven approach described by Mikolov et al., 2013.

Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean. Distributed Representations of Words and Phrases and their Compositionality. In Proceedings of NIPS, 2013.

Imports¶

import gensim
import gensim.downloader
from sklearn.manifold import TSNE
from sklearn.manifold import MDS
from sklearn.decomposition import PCA
import numpy as np
import tempfile
import imageio
import shutil
import os
from statistics import mean
import pandas as pd 
from nltk.stem import WordNetLemmatizer 
from nltk.stem import PorterStemmer 
from gensim.test.utils import datapath
from gensim.models.fasttext import load_facebook_vectors

from IPython.display import Image
from IPython.display import display
pd.options.display.max_columns = None

import matplotlib.patheffects as PathEffects
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

from matplotlib.axes._axes import _log as matplotlib_axes_logger
matplotlib_axes_logger.setLevel('ERROR')

Constants¶

TOP_N = 30
KEYS = { \
    "offensive": set(), 
    "abusive": set(), 
    "cyberbullying": set(), 
    "vulgar": set(), 
    "racist": set(), 
    "homophobic": set(), 
    "profane": set(), 
    "slur": set(), 
    "harrasment": set(), 
    "obscene": set(), 
    "threat": set(), 
    "discredit": set(), 
    "hateful": set(), 
    "insult": set(), 
    "hostile": set(),
    "taboo": set()
}
FIXED_KEYS = list(KEYS.keys())

Too similar words that will not be taken into account as neighbouring words:

lemmatizer = WordNetLemmatizer() 
ps = PorterStemmer() 


for key in KEYS.keys():
    lemma = lemmatizer.lemmatize(key)
    stem = ps.stem(key)
    KEYS[key].add(lemma)
    KEYS[key].add(stem)
    KEYS[key].add(key)
KEYS

{'offensive': {'offens', 'offensive'},
 'abusive': {'abus', 'abusive'},
 'cyberbullying': {'cyberbulli', 'cyberbullying'},
 'vulgar': {'vulgar'},
 'racist': {'racist'},
 'homophobic': {'homophob', 'homophobic'},
 'profane': {'profan', 'profane'},
 'slur': {'slur'},
 'harrasment': {'harras', 'harrasment'},
 'obscene': {'obscen', 'obscene'},
 'threat': {'threat'},
 'discredit': {'discredit'},
 'hateful': {'hate', 'hateful'},
 'insult': {'insult'},
 'hostile': {'hostil', 'hostile'},
 'taboo': {'taboo'}}

Functions¶

def same_word(similar_word, ommit_words):
    similar_word = similar_word.replace("_", " ").replace("-", " ").lower()
    
    if similar_word in ommit_words:
        print(f"{ommit_words} -- {similar_word}")
        return True
    
    for ommit_word in ommit_words:
        if ommit_word in similar_word:
            print(f"{ommit_words} -- {similar_word}")
            return True
        
    return False

def getSimilarWords(model_gn):
    embedding_clusters = []
    word_clusters = []
    for key in FIXED_KEYS:
        ommit_words = KEYS[key]
        embeddings = []
        words = []
        for similar_word, _ in model_gn.most_similar(key, topn=TOP_N * 3):
            if not same_word(similar_word, ommit_words):
                words.append(similar_word)
                embeddings.append(model_gn[similar_word])
                
        if len(words) < TOP_N or len(embeddings) < TOP_N:
            print("ERROR")
            
        words = words[:TOP_N]
        embeddings = embeddings[:TOP_N]
        
        embedding_clusters.append(embeddings)
        word_clusters.append(words)
        
    return (word_clusters, embedding_clusters)

def displayDF(word_clusters):
    df = pd.DataFrame(dict(zip(FIXED_KEYS, word_clusters)))  
    display(df)

def plot_similar_words(title, labels, embedding_clusters, word_clusters, filename=None):
    plt.figure(figsize=(16, 9))
    colors = cm.rainbow(np.linspace(0, 1, len(labels)))
    for label, embeddings, words, color in zip(labels, embedding_clusters, word_clusters, colors):
        x = embeddings[:, 0]
        y = embeddings[:, 1]
        plt.scatter(x, y, c=color, alpha=0.7, label=label)
        for i, word in enumerate(words):              
            plt.annotate(word, alpha=0.5, xy=(x[i], y[i]), xytext=(5, 2),
                         textcoords='offset points', ha='right', va='bottom', size=8)
        #plt.annotate(label.upper(), alpha=1.0, xy=(mean(x), mean(y)), xytext=(0, 0),
        #    textcoords='offset points', ha='center', va='center', size=15)
        
        plt.text(x.mean(), y.mean(), label.upper(), color='white', weight='bold', fontsize=13, 
                 path_effects=[PathEffects.withStroke(linewidth=3, foreground="black", alpha=0.9)])
    plt.legend(loc=4)
    plt.title(title)
    plt.grid(False)
    if filename:
        plt.savefig(filename, format='png', dpi=150, bbox_inches='tight')
    plt.show()

def plotTSNE(title, word_clusters, embedding_clusters, filename = None):
    embedding_clusters = np.array(embedding_clusters)
    n, m, k = embedding_clusters.shape
    model_en_2d = TSNE(perplexity=15, n_components=2, init='pca', n_iter=3500, random_state=32)
    model_en_2d = model_en_2d.fit_transform(embedding_clusters.reshape(n * m, k))
    embeddings_en_2d = np.array(model_en_2d).reshape(n, m, 2)
    plot_similar_words(title, FIXED_KEYS, embeddings_en_2d, word_clusters, filename)

def plotMDS(title, word_clusters, embedding_clusters, filename = None):
    embedding_clusters = np.array(embedding_clusters)
    n, m, k = embedding_clusters.shape
    model_en_2d = MDS(n_components=2, max_iter=3500, random_state=32)
    model_en_2d = model_en_2d.fit_transform(embedding_clusters.reshape(n * m, k))
    embeddings_en_2d = np.array(model_en_2d).reshape(n, m, 2)
    plot_similar_words(title, FIXED_KEYS, embeddings_en_2d, word_clusters, filename)

def plotPCA(title, word_clusters, embedding_clusters, filename = None):
    embedding_clusters = np.array(embedding_clusters)
    n, m, k = embedding_clusters.shape
    model_en_2d = PCA(n_components=2, random_state = 32)
    model_en_2d = model_en_2d.fit_transform(embedding_clusters.reshape(n * m, k))
    embeddings_en_2d = np.array(model_en_2d).reshape(n, m, 2)
    plot_similar_words(title, FIXED_KEYS, embeddings_en_2d, word_clusters, filename)

Word2Vec¶

Loading model

model_gn = gensim.downloader.load('word2vec-google-news-300')

print(f"Vocabulary size: {len(model_gn.vocab)}")

Vocabulary size: 3000000

Getting similar words. Printed out words are neighboring words that are ommited for the analysis.

word_clusters, embedding_clusters = getSimilarWords(model_gn)

{'offensive', 'offens'} -- offensive
{'offensive', 'offens'} -- offensively
{'offensive', 'offens'} -- offense
{'offensive', 'offens'} -- offensively
{'offensive', 'offens'} -- offensive line
{'offensive', 'offens'} -- offensive firepower
{'abusive', 'abus'} -- abusive
{'abusive', 'abus'} -- verbally abusive
{'abusive', 'abus'} -- abuse
{'abusive', 'abus'} -- verbal abuse
{'abusive', 'abus'} -- abused
{'abusive', 'abus'} -- physically abused
{'abusive', 'abus'} -- abuser
{'abusive', 'abus'} -- abusive neglectful
{'abusive', 'abus'} -- verbal abuse
{'abusive', 'abus'} -- profane abusive
{'abusive', 'abus'} -- sexually abusive
{'abusive', 'abus'} -- verbally abused
{'abusive', 'abus'} -- abusing
{'abusive', 'abus'} -- sexually abused
{'abusive', 'abus'} -- abusive profane
{'abusive', 'abus'} -- verbally abusing
{'abusive', 'abus'} -- nonabusive
{'abusive', 'abus'} -- physically abusing
{'cyberbulli', 'cyberbullying'} -- cyberbullying
{'cyberbulli', 'cyberbullying'} -- cyberbullies
{'cyberbulli', 'cyberbullying'} -- bullying cyberbullying
{'cyberbulli', 'cyberbullying'} -- cyberbulling
{'cyberbulli', 'cyberbullying'} -- cyberbullied
{'cyberbulli', 'cyberbullying'} -- cyberbullying sexting
{'cyberbulli', 'cyberbullying'} -- cyberbullies
{'vulgar'} -- vulgarity
{'vulgar'} -- vulgar obscene
{'vulgar'} -- vulgar language
{'vulgar'} -- vulgarism
{'vulgar'} -- profane vulgar
{'vulgar'} -- vulgar language
{'racist'} -- racists
{'racist'} -- racist
{'racist'} -- overtly racist
{'racist'} -- racist sexist
{'racist'} -- blatantly racist
{'racist'} -- racist slur
{'racist'} -- racist remark
{'racist'} -- racist
{'racist'} -- racist overtones
{'racist'} -- racist bigoted
{'racist'} -- racist undertones
{'racist'} -- racist bigot
{'racist'} -- racist slurs
{'racist'} -- racist connotations
{'racist'} -- racist sexist homophobic
{'racist'} -- sexist racist
{'racist'} -- racist homophobic
{'racist'} -- racist taunts
{'racist'} -- racist bigots
{'racist'} -- racist epithets
{'homophobic', 'homophob'} -- homophobia
{'homophobic', 'homophob'} -- homophobic
{'homophobic', 'homophob'} -- homophobes
{'homophobic', 'homophob'} -- homophobe
{'homophobic', 'homophob'} -- homophobic slurs
{'homophobic', 'homophob'} -- homophobic insults
{'homophobic', 'homophob'} -- homophobic bullying
{'homophobic', 'homophob'} -- homophobic attitudes
{'homophobic', 'homophob'} -- homophobic taunts
{'homophobic', 'homophob'} -- sexist homophobic
{'homophobic', 'homophob'} -- virulently homophobic
{'homophobic', 'homophob'} -- blatant homophobia
{'homophobic', 'homophob'} -- racist sexist homophobic
{'homophobic', 'homophob'} -- homophobic misogynistic
{'homophobic', 'homophob'} -- homophobia
{'homophobic', 'homophob'} -- bigot homophobe
{'homophobic', 'homophob'} -- homophobic bigots
{'homophobic', 'homophob'} -- racist homophobic sexist
{'profan', 'profane'} -- profanity
{'profan', 'profane'} -- profanities
{'profan', 'profane'} -- profanity
{'profan', 'profane'} -- profane vulgar
{'profan', 'profane'} -- profane abusive
{'profan', 'profane'} -- obscene profane
{'profan', 'profane'} -- abusive profane
{'profan', 'profane'} -- vulgar profane
{'profan', 'profane'} -- profane
{'profan', 'profane'} -- profanity laced
{'profan', 'profane'} -- profanity laden
{'profan', 'profane'} -- profanity obscenity
{'profan', 'profane'} -- mild profanity
{'slur'} -- racial slur
{'slur'} -- racist slur
{'slur'} -- slurs
{'slur'} -- racial slurs
{'slur'} -- homophobic slurs
{'slur'} -- homophobic slur
{'slur'} -- sexist slurs
{'slur'} -- sexist slur
{'slur'} -- racist slurs
{'slur'} -- homosexual slur
{'slur'} -- ethnic slur
{'slur'} -- gay slur
{'slur'} -- derogatory slurs
{'slur'} -- antigay slur
{'slur'} -- racial slurs
{'slur'} -- derogatory slur
{'slur'} -- anti semitic slur
{'slur'} -- ethnic slurs
{'slur'} -- homosexual slurs
{'harras', 'harrasment'} -- harrassment
{'harras', 'harrasment'} -- sexual harrasment
{'harras', 'harrasment'} -- harrasing
{'harras', 'harrasment'} -- sexual harrassment
{'harras', 'harrasment'} -- harrassment
{'harras', 'harrasment'} -- harrassing
{'harras', 'harrasment'} -- harrased
{'obscene', 'obscen'} -- obscene
{'obscene', 'obscen'} -- vulgar obscene
{'obscene', 'obscen'} -- obscenity
{'obscene', 'obscen'} -- obscene pornographic
{'obscene', 'obscen'} -- profanity obscenity
{'obscene', 'obscen'} -- obscenities
{'obscene', 'obscen'} -- obscenity pornography
{'threat'} -- threats
{'threat'} -- threat posed
{'threat'} -- threat
{'threat'} -- existential threat
{'threat'} -- pose threat
{'threat'} -- graver threat
{'threat'} -- gravest threat
{'threat'} -- threats
{'threat'} -- threatened
{'threat'} -- cyberthreat
{'threat'} -- threaten
{'threat'} -- threatening
{'threat'} -- gravest threats
{'threat'} -- threatens
{'threat'} -- veiled threat
{'discredit'} -- discrediting
{'discredit'} -- discredit plame
{'discredit'} -- discrediting
{'discredit'} -- discredited
{'discredit'} -- discredits
{'hate', 'hateful'} -- vile hateful
{'hate', 'hateful'} -- hate mongering
{'hate', 'hateful'} -- hateful rhetoric
{'hate', 'hateful'} -- hatefully
{'hate', 'hateful'} -- hatefulness
{'hate', 'hateful'} -- hate
{'hate', 'hateful'} -- spew hateful
{'hate', 'hateful'} -- offensive hateful
{'hate', 'hateful'} -- hate mongers
{'hate', 'hateful'} -- hateful racist
{'hate', 'hateful'} -- hatemongering
{'hate', 'hateful'} -- hatemongers
{'hate', 'hateful'} -- bigoted hateful
{'hate', 'hateful'} -- hate speech
{'hate', 'hateful'} -- hatered
{'insult'} -- insulting
{'insult'} -- insulted
{'insult'} -- insult
{'insult'} -- gratuitous insult
{'insult'} -- insults
{'insult'} -- adding insult
{'insult'} -- grievous insult
{'insult'} -- gratuitously insulting
{'insult'} -- racially insulting
{'insult'} -- insulting
{'hostile', 'hostil'} -- hostile
{'hostile', 'hostil'} -- hostility
{'hostile', 'hostil'} -- nonhostile
{'hostile', 'hostil'} -- nonhostile causes
{'hostile', 'hostil'} -- hostile takeover
{'hostile', 'hostil'} -- racially hostile
{'hostile', 'hostil'} -- hostile takeover
{'hostile', 'hostil'} -- outright hostility
{'hostile', 'hostil'} -- overtly hostile
{'hostile', 'hostil'} -- nonhostile incidents
{'hostile', 'hostil'} -- hostilely
{'hostile', 'hostil'} -- overt hostility
{'hostile', 'hostil'} -- hostility toward
{'hostile', 'hostil'} -- hostile takeover bid
{'hostile', 'hostil'} -- implacably hostile
{'taboo'} -- taboos
{'taboo'} -- taboo subjects
{'taboo'} -- taboo topic
{'taboo'} -- tabooed
{'taboo'} -- cultural taboos
{'taboo'} -- taboos
{'taboo'} -- unspoken taboo
{'taboo'} -- societal taboos

displayDF(word_clusters)

plotTSNE("Similar words - Word2Vec [t-SNE]", word_clusters, embedding_clusters, "SimilarWords - word2vec - t-SNE.png")

plotMDS("Similar words - Word2Vec [MDS]", word_clusters, embedding_clusters, "SimilarWords - word2vec - MDS.png")

plotPCA("Similar words - Word2Vec [PCA]", word_clusters, embedding_clusters, "SimilarWords - word2vec - PCA.png")

Glove¶

Loading model

model_gn = gensim.downloader.load('glove-wiki-gigaword-300')

print(f"Vocabulary size: {len(model_gn.vocab)}")

Vocabulary size: 400000

Getting similar words. Printed out words are neighboring words that are ommited for the analysis.

word_clusters, embedding_clusters = getSimilarWords(model_gn)

{'offensive', 'offens'} -- offense
{'offensive', 'offens'} -- offensives
{'offensive', 'offens'} -- offensively
{'abusive', 'abus'} -- abuse
{'abusive', 'abus'} -- abused
{'abusive', 'abus'} -- abusing
{'vulgar'} -- vulgarity
{'racist'} -- racists
{'homophobic', 'homophob'} -- homophobia
{'profan', 'profane'} -- profanity
{'slur'} -- slurs
{'harras', 'harrasment'} -- harrassment
{'obscene', 'obscen'} -- obscenity
{'obscene', 'obscen'} -- obscenities
{'threat'} -- threats
{'threat'} -- threatening
{'threat'} -- threatened
{'threat'} -- threaten
{'threat'} -- threatens
{'discredit'} -- discrediting
{'discredit'} -- discredited
{'hate', 'hateful'} -- hate
{'insult'} -- insulting
{'insult'} -- insults
{'insult'} -- insulted
{'hostile', 'hostil'} -- hostility
{'taboo'} -- taboos

displayDF(word_clusters)

plotTSNE("Similar words - Glove [t-SNE]", word_clusters, embedding_clusters, "SimilarWords - Glove - t-SNE.png")

plotMDS("Similar words - Glove [MDS]", word_clusters, embedding_clusters, "SimilarWords - Glove - MDS.png")

plotPCA("Similar words - Glove [PCA]", word_clusters, embedding_clusters, "SimilarWords - Glove - PCA.png")

fastText¶

Loading model

# Run first time only to download model
#import fasttext.util
#fasttext.util.download_model('en', if_exists='ignore')  # English
#ft = fasttext.load_model('cc.en.300.bin')

# Chaya used: https://fasttext.cc/docs/en/crawl-vectors.html
# It includes both Common Crawl and Wikipedia - file cc.en.300.bin.

model_gn = load_facebook_vectors("cc.en.300.bin")

print(f"Vocabulary size: {len(model_gn.vocab)}")

Vocabulary size: 2000000

Getting similar words. Printed out words are neighboring words that are ommited for the analysis.

word_clusters, embedding_clusters = getSimilarWords(model_gn)

{'offensive', 'offens'} -- offensive
{'offensive', 'offens'} -- offensively
{'offensive', 'offens'} -- offensive
{'offensive', 'offens'} -- offensive 
{'offensive', 'offens'} -- offensiv
{'offensive', 'offens'} -- offensive.the
{'offensive', 'offens'} -- offensive.
{'offensive', 'offens'} -- non offensive
{'offensive', 'offens'} -- offense
{'offensive', 'offens'} -- offensive oriented
{'offensive', 'offens'} -- offensiveness
{'offensive', 'offens'} -- offensive.i
{'offensive', 'offens'} -- offensives
{'offensive', 'offens'} -- offensive line
{'offensive', 'offens'} -- offensive defensive
{'offensive', 'offens'} -- offensive minded
{'offensive', 'offens'} -- nonoffensive
{'offensive', 'offens'} -- offense.it
{'offensive', 'offens'} -- offens
{'offensive', 'offens'} -- offense first
{'offensive', 'offens'} -- counter offensive
{'offensive', 'offens'} -- counter offensives
{'offensive', 'offens'} -- offensiveness
{'offensive', 'offens'} -- offense.this
{'offensive', 'offens'} -- offense oriented
{'offensive', 'offens'} -- offense minded
{'offensive', 'offens'} -- offense.that
{'offensive', 'offens'} -- offense.but
{'offensive', 'offens'} -- offensively challenged
{'offensive', 'offens'} -- unoffensive
{'offensive', 'offens'} -- counteroffensives
{'offensive', 'offens'} -- offensively
{'abusive', 'abus'} -- abusiveness
{'abusive', 'abus'} -- abusive
{'abusive', 'abus'} -- abusive
{'abusive', 'abus'} -- abused
{'abusive', 'abus'} -- non abusive
{'abusive', 'abus'} -- abuse
{'abusive', 'abus'} -- abuser
{'abusive', 'abus'} -- abusive.
{'abusive', 'abus'} -- abusively
{'abusive', 'abus'} -- nonabusive
{'abusive', 'abus'} -- abusing
{'abusive', 'abus'} -- self abusive
{'abusive', 'abus'} -- abusers
{'abusive', 'abus'} -- abuses
{'abusive', 'abus'} -- abuser
{'cyberbulli', 'cyberbullying'} -- cyberbullying
{'cyberbulli', 'cyberbullying'} -- cyberbulling
{'cyberbulli', 'cyberbullying'} -- cyberbullies
{'cyberbulli', 'cyberbullying'} -- cyberbullied
{'cyberbulli', 'cyberbullying'} -- cyberbullies
{'cyberbulli', 'cyberbullying'} -- anti cyberbullying
{'cyberbulli', 'cyberbullying'} -- cyberbullying
{'cyberbulli', 'cyberbullying'} -- cyberbullying
{'vulgar'} -- vulgarity
{'vulgar'} -- vulgarities
{'vulgar'} -- vulgarism
{'vulgar'} -- vulgarisms
{'vulgar'} -- vulgarly
{'vulgar'} -- non vulgar
{'vulgar'} -- vulgarians
{'vulgar'} -- vulgar
{'racist'} -- racists
{'racist'} -- racist 
{'racist'} -- racist
{'racist'} -- racist.
{'racist'} -- quasi racist
{'racist'} -- racist
{'racist'} -- racistly
{'racist'} -- racist.the
{'racist'} -- non racists
{'racist'} -- racist.i
{'racist'} -- non racist
{'racist'} -- nonracist
{'racist'} -- anti racist
{'racist'} -- racists.
{'racist'} -- racists
{'homophobic', 'homophob'} -- homophobia
{'homophobic', 'homophob'} -- homophobe
{'homophobic', 'homophob'} -- homophobes
{'homophobic', 'homophob'} -- homophobic
{'homophobic', 'homophob'} -- anti homophobic
{'homophobic', 'homophob'} -- non homophobic
{'homophobic', 'homophob'} -- homophobics
{'homophobic', 'homophob'} -- homophobic
{'homophobic', 'homophob'} -- homophobia
{'homophobic', 'homophob'} -- homophobes
{'homophobic', 'homophob'} -- anti homophobia
{'profan', 'profane'} -- profanely
{'profan', 'profane'} -- profanity
{'profan', 'profane'} -- profanities
{'profan', 'profane'} -- profanity laden
{'profan', 'profane'} -- profane
{'profan', 'profane'} -- profanity filled
{'profan', 'profane'} -- profanes
{'profan', 'profane'} -- profanity laced
{'profan', 'profane'} -- profaned
{'profan', 'profane'} -- profanity free
{'profan', 'profane'} -- profaning
{'profan', 'profane'} -- profanation
{'slur'} -- slurs
{'slur'} -- slur
{'slur'} -- slurring
{'harras', 'harrasment'} -- harrassment
{'harras', 'harrasment'} -- harrasement
{'harras', 'harrasment'} -- harrassement
{'harras', 'harrasment'} -- harrasment
{'harras', 'harrasment'} -- harrassing
{'harras', 'harrasment'} -- harrasing
{'harras', 'harrasment'} -- harrassment
{'harras', 'harrasment'} -- harrasser
{'harras', 'harrasment'} -- harrased
{'harras', 'harrasment'} -- harrassers
{'harras', 'harrasment'} -- harrass
{'harras', 'harrasment'} -- harrassed
{'harras', 'harrasment'} -- harras
{'harras', 'harrasment'} -- harrasses
{'harras', 'harrasment'} -- harrassing
{'obscene', 'obscen'} -- non obscene
{'obscene', 'obscen'} -- obscene
{'obscene', 'obscen'} -- obscenity
{'obscene', 'obscen'} -- obscenely
{'obscene', 'obscen'} -- obscene
{'obscene', 'obscen'} -- obscenities
{'threat'} -- threats
{'threat'} -- threat.the
{'threat'} -- counter threat
{'threat'} -- threat.but
{'threat'} -- threat.as
{'threat'} -- non threat
{'threat'} -- threat.this
{'threat'} -- threat.
{'threat'} -- threat 
{'threat'} -- threat.a
{'threat'} -- threat.and
{'threat'} -- threat.it
{'threat'} -- threat.if
{'threat'} -- threat.in
{'threat'} -- threath
{'threat'} -- threat.i
{'threat'} -- threat
{'threat'} -- threatening
{'threat'} -- threats.the
{'threat'} -- cyber threat
{'threat'} -- threats
{'threat'} -- non threats
{'threat'} -- threatthe
{'threat'} -- threaten
{'threat'} -- threating
{'threat'} -- threat
{'threat'} -- cyber threats
{'threat'} -- threats.in
{'threat'} -- threats.
{'threat'} -- threatener
{'threat'} -- threats 
{'threat'} -- threatened
{'threat'} -- threats.i
{'threat'} -- threate
{'threat'} -- cyberthreat
{'threat'} -- threat based
{'threat'} -- world threatening
{'threat'} -- cyberthreats
{'threat'} -- no threat
{'threat'} -- threatsthe
{'threat'} -- threathening
{'threat'} -- threatining
{'threat'} -- threatens
{'threat'} -- threated
{'threat'} -- threathen
{'threat'} -- death threat
{'discredit'} -- discrediting
{'discredit'} -- discredits
{'discredit'} -- discrediting
{'discredit'} -- discreditation
{'discredit'} -- discredited
{'hate', 'hateful'} -- hate filled
{'hate', 'hateful'} -- hatefilled
{'hate', 'hateful'} -- hatefull
{'hate', 'hateful'} -- hate driven
{'hate', 'hateful'} -- hatefulness
{'hate', 'hateful'} -- hate fueled
{'hate', 'hateful'} -- hatemongering
{'hate', 'hateful'} -- hatefully
{'hate', 'hateful'} -- hate spewing
{'hate', 'hateful'} -- hate mongering
{'hate', 'hateful'} -- hate fuelled
{'hate', 'hateful'} -- hate filled
{'hate', 'hateful'} -- hate mongers
{'hate', 'hateful'} -- hate monger
{'hate', 'hateful'} -- hatemonger
{'hate', 'hateful'} -- hate speech
{'hate', 'hateful'} -- hatemongers
{'hate', 'hateful'} -- hate
{'insult'} -- insulting
{'insult'} -- insulter
{'insult'} -- insults
{'insult'} -- insult
{'insult'} -- insulted
{'insult'} -- insult.
{'insult'} -- insult
{'insult'} -- insult.i
{'insult'} -- insulters
{'insult'} -- insulting
{'insult'} -- insultive
{'insult'} -- insults.
{'insult'} -- insults
{'insult'} -- insultingly
{'insult'} -- non insulting
{'insult'} -- insults
{'insult'} -- insulting
{'hostile', 'hostil'} -- semi hostile
{'hostile', 'hostil'} -- hostile
{'hostile', 'hostil'} -- non hostile
{'hostile', 'hostil'} -- hostility
{'hostile', 'hostil'} -- hostilely
{'hostile', 'hostil'} -- hostile.
{'hostile', 'hostil'} -- hostil
{'hostile', 'hostil'} -- hostiles
{'hostile', 'hostil'} -- often hostile
{'hostile', 'hostil'} -- hostile
{'taboo'} -- taboos
{'taboo'} -- tabooed
{'taboo'} -- once taboo
{'taboo'} -- taboo.
{'taboo'} -- taboos
{'taboo'} -- taboo
{'taboo'} -- taboo breaking
{'taboo'} -- taboo busting

displayDF(word_clusters)

plotTSNE("Similar words - fastText [t-SNE]", word_clusters, embedding_clusters, "SimilarWords - fastText - t-SNE.png")

plotMDS("Similar words - fastText [MDS]", word_clusters, embedding_clusters, "SimilarWords - fastText - MDS.png")

plotPCA("Similar words - fastText [PCA]", word_clusters, embedding_clusters, "SimilarWords - fastText - PCA.png")

	offensive	abusive	cyberbullying	vulgar	racist	homophobic	profane	slur	harrasment	obscene	threat	discredit	hateful	insult	hostile	taboo
0	defensive	graphically_depicts_physically	cyber_bullying	profane	racism	racist	vulgar	derogatory	harassment	vulgar	danger	malign	bigoted	affront	unfriendly	touchy_subject
1	coach_Bob_Palcic	Luke_McNorton	sexting	obscene	anti_Semitic	gay_bashing	obscenities	racist_remark	Harassment	indecent	imminent_danger	besmirch	racist	disrespect	antagonistic	frowned_upon
2	guard_RJ_Mattes	Dorian_Wesson	bullying	crass	bigoted	antigay	vulgar_language	epithet	harassments	pornographic	menace	embarrass	vile	disrespectful	nipple_pinching	verboten
3	coach_Jimmy_Heggins	inappropriate	Cyber_bullying	rude	homophobic	transphobic	foul_language	derogatory_remark	harassement	lewd	challenge	delegitimize	hurtful	disgrace	warlike	stigma_attached
4	promoted_Pete_Metzelaars	adequate_Tamberg	cyberstalking	demeaning	hateful	gay	expletives	word_nigger	ASPEN_Colo._Actor	sexually_explicit	dangers	demonize	mean_spirited	denigrate	unwelcoming	forbidden
5	Dave_Borbely	rude	Bullying	politically_incorrect	anti_semitic	homosexual	curse_words	remark	Verbal_abuse	filthy_diatribe	hazard	smear	anti_semetic	demean	mistaken_celebratory_gunfire	touchy
6	coach_George_Yarno	Mo'Nique_searing	cyberbully	sexist	rascist	bigoted	hateful	racial_epithet	violance	inappropriate	nightmare_scenario	marginalize	bigotry	humiliate	confrontational	touchy_subjects
7	coach_Dave_Magazu	Advocate_Safehouse_Project	Cyber_Bullying	raunchy	racialist	anti_Semitic	foul_mouthed	derogatory_language	sexual_harassments	disgusting	possibility	vilify	vitriolic	dishonor	inhospitable	unmentionable
8	coach_Greg_Studrawa	behaves_unreasonably_yells	schoolyard_bullying	culturally_insensitive_inappropriate	racially_motivated	hateful	politically_incorrect	racist_connotations	verbal_abuse	outrageous	peril	denigrate	racist_sexist_homophobic	disrespecting	belligerent	Broaching
9	coach_Dan_Roushar	delete_inappropriate	cyber_bulling	obnoxious	sexist	anti_semitic	containing_advertising_astroturfing	racist	harassing	insulting	concern	belittle	hatred	belittle	neocolonialist_enemies	broach
10	defensively	rude_disrespectful	sexual_predators	risqué	racial	racist_sexist	scatological_references	disparaging	Mr._Lutfi_drugged	innapropriate	dangers_posed	defame	anti_Semitic	taunt	donned_riot_gear	stigma
11	Pashtun_Zarghun_district	Libellous	cyber_bullies	sexual_innuendos	anti_semetic	homosexuals	Ad_Age_reserves	racist_epithet	Rathore_behest	objectionable	Sadequee_countered	intimidate	racist_bigoted	belittling	unsupportive	genital_piercing
12	specialist_Damien_Groce	profane	Sexting	misogynistic	derogatory	sexist	vulgarity	racially_derogatory	homophobic_taunts	sexually_suggestive	risk	mislead	homophobic	disparage	bellicose	socially_acceptable
13	tackle_Pat_McQuistan	foul_language	cyber_stalking	insulting	antisemitic	antisemitic	sarcastic	derogatory_remarks	slander	vile	undeterrable	impugn	bigots	disrespects	retaliatory	broaching
14	tackle_Rob_Droege	disrespectful	Cyberstalking	lewd	blatant_racism	anti_Semetic	swearwords	pejorative	harassment_intimidation	demeaning	imminent	undermine	bigot_homophobe	denigrating	mutated_creatures	socially_unacceptable
15	offfensive	sexually_predatory	bullies	vile	sexist_homophobic	transphobic_bullying	vulgar_obscene	fagot	brickbatting	sexually_exploitive	specter	smear_campaign	racists	use_racial_slurs	thinly_veiled_barb	unspoken
16	coach_Larry_Beightol	editor@kickoff.com	bullying_cyber_bullying	sexually_suggestive	racially_insensitive	slurs	rude	disparaging_remarks	forcible_conversion	pornography	mortal_danger	refute	sexist_homophobic	slur	adversarial	touchier
17	linebacking_crew	demeaning	cyberstalkers	sexual_innuendoes	xenophobic	homosexuality	cusswords	nigger	anti_semitic_tirade	immoral	scare	debunk	religiously_intolerant	offend	hospitable	Female_genital_mutilation
18	linebacking_corp	derogatory	cybersafety	mildly_suggestive	racially_charged	anti_semetic	mildly_suggestive	tirade	discrimation	offensive_hateful	problem	insinuate	vitriol_spewed	shame	intimidating	transexuality
19	bend_don't_break	harassing	identity_theft	puerile	racially_intolerant	gays	scatological	racial_connotations	non_cognisable_offense	disseminating_pornographic	chant_Omm	humiliate	racist_sexist	travesty	violent	unwritten_rule
20	Andria_Hurley	inapproriate	relational_aggression	overtly_sexual	racial_slurs	lesbian	epithets	n_****_r	Carlos_Irwin_Estevez	sexually_exploitative	Naxalism_Maoism	tarnish	sexist_racist_homophobic	demeaning	intimidatory	Forced_marriages
21	Chris_Kapilovic	vulgar_language	textual_harassment	uncouth	slurs	racism	vitriolic	derogatory_slang	brutalities_meted	Libelous	concerns	cast_aspersions	meanspirited	absolute_disgrace	friendly	stigmas
22	playmakers	behaving_violently	bullycide	hateful	anti_Semetic	blatantly_racist	Avoid_lewd_obscene	disparaging_remark	Eve_teasing	inapropriate	photogs_snaps	delegitimise	bigotted	unkind	conciliatory	politically_incorrect
23	coach_Bob_Bostad	drunken_rages	cyberharassment	derogatory	Islamophobic	gay_epithets	Foul_language	unparliamentary_language	eve_teasing	vulgarity	hazards	divert_attention	racist_bigot	offends	uncongenial	Anal_sex
24	Chad_Germer	Crystal_Kimberly_Elise	inhalant_abuse	tasteless	racially_prejudiced	bigotry	No_vulgarity_racial	homophobic_remark	pro_cussers	reprehensible	risks	rebut	despicable	disparaging	Sayyed_Moqtada_al_Sadr	Interracial_dating
25	offen_sive	mother_Erica_Alphonse	homophobic_bullying	smutty	insulting	racist_sexist_anti_Semitic	sexist_racist	racial_epithets	homophobic_insults	Lewd	abstracted_backdrop	implicate	vulgar	disservice	agressive	Lesbianism
26	fensive	vulgar	cybercrime	indecorous	homophobic_sexist	overtly_racist	derogatory	epithets	sexual_harassment	obsenity	Fred_Daskoski_spokesman	subvert	anti_Semetic	insensitive	Panshir_Afghanistan	stigma_associated
27	physically_outmatched	harmfully_lax	cybercrimes	risque	bigot_homophobe	misogynist	Satanic_symbols	derogative	harassment_meted	profane	warning	misinform	homophobic_sexist	disrepectful	decidedly_unfriendly	touchy_topic
28	coach_Pete_Hoener	sexist	cyber_bullying_sexting	overt_sexuality	nigger	LGBT	laced_tirade	homophobic_insults	homophobic_slurs	lude	formidable_obstacle	disinformation	rascist	mockery	Christos_Kittas	patriarchal_society
29	coach_Stacey_Searels	unsupportive	cyber	misogynist	racially_biased	Islamophobic	sexual_innuendos	insulting	uttering_seditious_words	ridiculous	jeopardy	blacken	homophobic_rants	Jorge_Farinacci	intolerant	strictly_forbidden

	offensive	abusive	cyberbullying	vulgar	racist	homophobic	profane	slur	harrasment	obscene	threat	discredit	hateful	insult	hostile	taboo
0	defensive	neglectful	bullying	profane	homophobic	sexist	vulgar	derogatory	romish	indecent	danger	undermine	bigoted	affront	unfriendly	homosexuality
1	lineman	inappropriate	cyberstalking	obscene	sexist	racist	raunchy	homophobic	pi96	lewd	posed	embarrass	hurtful	disrespect	takeover	topic
2	linemen	sexually	cyber-bullying	raunchy	semitic	misogynistic	obscene	epithets	nonfeasance	vulgar	pose	intimidate	racist	offend	belligerent	sacrosanct
3	coordinator	stepfather	sexting	risque	xenophobic	slurs	vulgarity	disparaging	zety	profane	possibility	defame	vile	humiliation	antagonistic	forbidden
4	attack	behavior	anti-gay	crass	anti-semitic	misogynist	misogynistic	uttered	depressurisation	pornographic	warned	rebut	homophobic	shame	enemy	topics
5	quarterback	verbally	vigilantism	sexist	antisemitic	bigoted	ribald	racist	keyrates	inappropriate	warning	humiliate	sexist	provocation	takeovers	touchy
6	blitz	overbearing	victimisation	tasteless	slurs	antisemitic	irreverent	nigger	jiwamol	insulting	poses	tarnish	despicable	ridicule	threats	broached
7	forces	bullying	victimization	suggestive	racism	anti-semitic	scatological	epithet	demoralisation	defamatory	imminent	smear	intolerant	embarrassment	aggressive	incest
8	fighting	disrespectful	self-harm	derogatory	hateful	xenophobic	rude	sexist	supunnabul	gestures	dangers	destabilize	idiotic	disrespectful	fend	subject
9	tackle	domineering	harrassment	disrespectful	bigoted	semitic	bawdy	semitic	ryryryryryry	immoral	possible	portray	disgusting	injustice	enemies	unspoken
10	attacking	husbands	institutionalised	rude	hate	slur	disrespectful	stereotyping	rw96	objectionable	terrorism	marginalize	ignorant	slur	thwart	celibacy
11	assault	discriminatory	falkoff	slang	racial	hateful	risque	remark	equidistance	derogatory	concern	accuse	spiteful	outrageous	intentions	unwritten
12	patriots	exploitative	anti-muslim	demeaning	racially	anti-gay	demeaning	obscenities	kanoksilp	pornography	potential	subvert	vitriolic	slander	friendly	sexuality
13	quarterbacks	alcoholic	autistics	profanity	slur	bullying	derogatory	profanities	continous	blasphemous	terrorist	refute	bellicose	offended	increasingly	stigma
14	defenses	practices	self-injury	disgusting	discriminatory	disparaging	sarcastic	racial	_____________	disrespectful	fears	denigrate	semitic	indignity	bid	lesbianism
15	onslaught	behaviour	anti-spam	racy	derogatory	disrespectful	sexist	insult	suwannakij	gesture	risks	demonize	xenophobic	dignity	threatening	subjects
16	attacks	manipulative	nanostructure	bawdy	insulting	epithets	impolite	pejorative	tom.fowler@chron.com	sacrilegious	serious	mislead	disrespectful	humiliate	engaging	masturbation
17	raiders	oppressive	cybercrime	impolite	fascist	intolerant	hilariously	uttering	zilliacus	subversive	concerns	disinformation	profane	mockery	actions	untouchable
18	troops	immoral	anti-bullying	misogynistic	insults	derogatory	tirade	vulgar	rw97	deemed	risk	destabilise	anti-semitic	compliment	violent	unthinkable
19	backfield	corrupt	texting	scatological	taunts	insensitive	hateful	tirade	rungfapaisarn	libelous	warnings	ploy	intemperate	apologize	overtures	shunned
20	counterattack	coercive	licentiousness	jokes	misogynistic	condescending	playful	expletives	ufdots	outrageous	fear	undermining	vulgar	disgrace	abusive	discussing
21	rebels	priests	rasted	inappropriate	epithets	vulgar	suggestive	insensitive	mongkolporn	racist	consequences	accusation	misogynistic	racist	engage	euphemism
22	effort	racist	ragging	homophobic	violent	bigotry	epithets	demeaning	rosnazura	hateful	attacks	undercut	demeaning	demeaning	attempts	divisive
23	nfl	insulting	homophobic	lewd	stereotypes	transphobic	outbursts	insults	cw96	demeaning	any	distract	barbaric	perceived	opposing	stereotypes
24	jets	improper	revisionary	pretentious	misogynist	chauvinistic	obnoxious	bigoted	compulsivity	explicit	terror	depose	murderous	joke	confrontational	verboten
25	raids	unscrupulous	spamming	hateful	disrespectful	misogyny	boorish	defamatory	ooooooooooooooooooooooooooooooooooooooo	suggestive	facing	orchestrated	shameful	mocking	threat	homosexual
26	army	violent	exhibitionism	irreverent	hatred	prejudiced	homophobic	insulting	aldingham	lascivious	alert	deflect	hatred	remark	outsiders	frowned
27	rebel	irresponsible	galiardi	obnoxious	denouncing	hypocritical	expletives	profiling	westendorf	disparaging	looming	vilify	contemptuous	gratuitous	stance	anathema
28	firepower	demeaning	islamophobia	insensitive	disparaging	demeaning	rants	scapegoating	misfeasance	unlawful	warn	disprove	bloodthirsty	sexist	suspicious	openly
29	starting	deceptive	bullies	outrageous	insensitive	taunts	lewd	taunts	mo96	seditious	concerned	assassinate	vindictive	exaggeration	attitude	censorship

	offensive	abusive	cyberbullying	vulgar	racist	homophobic	profane	slur	harrasment	obscene	threat	discredit	hateful	insult	hostile	taboo
0	defensive	neglectful	cyber-bullying	crass	rascist	anti-gay	vulgar	derogatory	harassment	vulgar	menace	denigrate	bigoted	affront	unfriendly	tabboo
1	ofensive	hurtful	Cyber-bullying	uncouth	bigoted	anti-homosexual	blasphemous	pejorative	harassement	indecent	danger	defame	vile	disrespect	antagonistic	verboten
2	deffensive	unloving	bullying	profane	anti-white	antigay	irreverent	derogative	harasment	obsene	imminent	impugn	spiteful	demean	belligerent	unmentionable
3	offencive	harassing	cyberbully	obscene	anti-black	heterophobic	scatological	N-word	harassment.	lewd	menaces	undermine	mean-spirited	belittle	inhospitable	forbidden
4	fensive	vindictive	cyber-bullies	puerile	racism	gay-bashing	obscene	disparaging	harassments	profane	imminence	besmirch	vitriolic	disparagement	less-than-friendly	off-limits
5	offesive	hateful	cyber-bullied	lewd	racialist	bigoted	vulgarities	n-word	acusations	pornographic	immenent	villify	hurtful	belittling	unwelcoming	touchy
6	defensive-oriented	disrespectful	cyber-harassment	vulger	anti-semitic	homphobic	sacrilegious	pejoratives	harassing	outrageous	danger.But	demonize	racist	put-down	inimical	tabu
7	run-game	inappropriate	Bullying	raunchy	homophobic	anti-LGBT	lewd	racist	assualt	blasphemous	dangers	disparage	slanderous	redicule	distrustful	sexuality
8	run-based	insultive	cyber-bully	boorish	bigotted	racist	ribald	perjorative	Harassment	X-rated	pose	belittle	misogynistic	denigration	spiteful	stigmatized
9	quick-strike	demeaning	cyberharassment	indelicate	xenophobic	anti-Gay	impious	t-word	harassment.The	x-rated	peril	vilify	bigotted	ridicule	antipathetic	frowned-upon
10	play-callers	violent	bullying.The	scatological	bigot	heterosexist	obscenities	homophobic	assult	crass	looming	de-legitimize	vindictive	denigrate	contemptuous	broaching
11	play-making	exploitive	bullying-related	distasteful	antiwhite	biphobic	sacred	cussword	descrimination	salacious	menance	misrepresent	homophobic	disrepect	non-friendly	lesbianism
12	insulting	spiteful	cyberstalking	vile	anti-White	gay-hating	bawdy	epithets	discrimation	inappropriate	ever-looming	marginalize	biggoted	disrespectful	mistrustful	controversial
13	defensively	misogynistic	bullying.	sophomoric	hateful	anti-lesbian	crass	insult	threatning	unseemly	danger.This	refute	vitrolic	offend	unsympathetic	prudishness
14	play-makers	passive-agressive	bullycide	indecorous	anti-Black	transphobic	foul-mouthed	bigot	harasser	semi-pornographic	posed	demean	despicable	putdown	combative	undiscussable
15	defencive	non-loving	sextortion	ribald	sexist	misogynistic	off-color	nigger	harass	grotesque	danger.The	delegitimize	unkind	mockery	belligerant	unspoken
16	play-caller	mean-spirited	cyber-stalking	low-minded	race-baiting	gay-basher	foulmouthed	h-word	discimination	disgusting	attack	insinuate	mysoginistic	denegrate	disdainful	homosexuality
17	pass-heavy	uncivil	cyber-safety	crudity	white-hating	anti-queer	scatalogical	quasi-racist	uncivility	scandalous	danger.It	debunk	mean-hearted	slur	bellicose	broached
18	O-lines	homophobic	sexting	derogatory	supremacist	misogynist	potty-mouthed	put-down	HARASSMENT	vile	retaliation	disinform	hatred	slander	adversarial	unsayable
19	derogatory	manipulative	Cyberbully	demeaning	anti-semetic	pro-gay	cussword	swear-word	acusation	objectionable	specter	mischaracterize	anger-filled	disparage	indifferent	risqué
20	play-call	exploitative	Cyber-Bullying	low-brow	rascists	homosexual	raunchy	J-word	rudness	vulgarity	cyber-terror	delegitimise	insensative	insinuation	vindictive	un-PC
21	Defensive	self-destructive	cybersafety	risqué	racis	anti-LGBTQ	unsacred	jigaboo	violance	sacrilegious	repercussions	deligitimize	xenophobic	name-call	ill-disposed	hot-button
22	run-blocking	unrespectful	Cyberstalking	misogynistic	racially-motivated	hateful	vulgarity	insinuation	vicitim	vulgarities	risk	denegrate	loathsome	belittlement	untrusting	nudity
23	vulgar	dysfunctional	anti-bullying	tasteless	anti-Semitic	gay-bashers	uncouth	non-derogatory	villification	outrageously	menacing	mis-represent	anti-semitic	derogatory	threatening	eroticism
24	over-the-line	passive-aggressive	antibullying	bawdy	racially-based	bigotted	hateful	tirade	fruad	immoral	cyber-terrorism	misinform	misogynist	affronts	confrontational	broach
25	defensive-minded	domineering	bulling	licentious	supremist	mysoginistic	non-obscene	putdown	vandelism	distasteful	terror	demonise	bigots	umbrage	hateful	bi-sexuality
26	run-oriented	insulting	cyber-crimes	ill-bred	Anti-white	gay	intemperate	demeaning	intimidation	scatological	impending	rebut	demeaning	uncalled	dismissive	necrophilia
27	pass-oriented	foul-mouthed	cybercrimes	disrespectable	rasist	gay-baiting	puerile	disparagement	discrmination	smutty	terrorism	de-legitimise	misogynic	mocking	vitriolic	off-limit
28	tight-ends	harassment	Ask.fm	off-color	biggot	mysogynistic	indelicate	derisive	harassers	lascivious	cyberterror	maligning	disrespectful	invective	mean-spirited	discussable
29	defensively-minded	unkind	Sexting	profanity	reverse-racism	sexist	prophane	rascist	bulling	prurient	danger.And	mislead	anti-Semitic	disparaging	truculent	eroticization