In this lab, we'll explore how word2vec learns semantic relationships in different domains by comparing embeddings trained on two very different corpora:
An important note: The biomedical corpus was trained on substantially more data than the lyrics corpus. This means that some of the differences we observe may be due to data volume rather than domain-specific patterns. However, this itself provides an interesting angle for analysis - how does data volume affect the capture of different types of semantic relationships?
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
import multiprocessing
from nltk.tokenize import word_tokenize
# import nltk
# nltk.download('punkt') # Download tokenizer data
# nltk.download('averaged_perceptron_tagger')
import seaborn as sns
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
from typing import Dict, List, Tuple
pd.options.display.max_columns=None
abstracts_model = Word2Vec.load('https://www.dropbox.com/scl/fi/fktop15arvcy4nitkakmm/word2vec_abstractso.model?rlkey=20nonez9il815khk0j9yz4cqz&st=x8rh5h2k&dl=1')
songs_model = Word2Vec.load('https://www.dropbox.com/scl/fi/t5ac16sbccqu682rdqmez/song_lyrics.model?rlkey=5ry4v06f9e3jdkhma9efc8f9r&st=8gfldsoz&dl=1')
# Print basic model info
print("Biomedical Model:")
print(f"Vocabulary size: {len(abstracts_model.wv.key_to_index)}")
print(f"Vector size: {abstracts_model.vector_size}")
print("\nSong Lyrics Model:")
print(f"Vocabulary size: {len(songs_model.wv.key_to_index)}")
print(f"Vector size: {songs_model.vector_size}")
Biomedical Model: Vocabulary size: 51380 Vector size: 100 Song Lyrics Model: Vocabulary size: 12529 Vector size: 100
To compare how words are represented across our two domains (biomedical papers and song lyrics), we'll conduct an analysis of their similarity patterns:
def compare_similarities(word: str,
models: Dict[str, Word2Vec],
topn: int = 5) -> Dict[str, List[Tuple[str, float]]]:
"""
Compare most similar words across different models
Args:
word: Target word to analyze
models: Dictionary of domain models
topn: Number of similar words to return
Returns:
Dictionary of similar words for each domain
"""
results = {}
for domain, model in models.items():
if word in model.wv:
results[domain] = model.wv.most_similar(word, topn=topn)
else:
results[domain] = []
return results
def calculate_rank_correlation(word: str,
model1: Word2Vec,
model2: Word2Vec,
common_words: List[str]) -> float:
"""
Calculate rank correlation of similarities between two models
Args:
word: Target word
model1, model2: Models to compare
common_words: List of words to compare similarities for
Returns:
Spearman rank correlation coefficient
"""
if word not in model1.wv or word not in model2.wv:
return None
# Get similarities to common words in both models
sims1 = [model1.wv.similarity(word, w) for w in common_words if w in model1.wv]
sims2 = [model2.wv.similarity(word, w) for w in common_words if w in model2.wv]
if len(sims1) > 1 and len(sims2) > 1:
return spearmanr(sims1, sims2)[0]
return None
# Create dictionary of models
models = {
'biomedical': abstracts_model,
'lyrics': songs_model
}
# First get common words between models
common_words = list(set(abstracts_model.wv.key_to_index) &
set(songs_model.wv.key_to_index))
print(f"Number of words in both domains: {len(common_words)}")
print(f"Share of words in abstracts that are in songs: {len(common_words) / len(abstracts_model.wv.key_to_index):.3f}")
print(f"Share of words in songs that are in abstracts: {len(common_words) / len(songs_model.wv.key_to_index):.3f}")
Number of words in both domains: 6810 Share of words in abstracts that are in songs: 0.133 Share of words in songs that are in abstracts: 0.544
sample_of_common_words = pd.DataFrame(common_words, columns=['word']).sample(1000)
center = {}
for i, cw in enumerate(sample_of_common_words.values):
if i % 250 == 0:
print(i, sample_of_common_words.shape[0])
sim_words = compare_similarities(cw[0], models, 7000)
df_bio = pd.DataFrame(sim_words['biomedical'], columns=['word', 'sim'])
df_song = pd.DataFrame(sim_words['lyrics'], columns=['word', 'sim'])
df = df_bio.merge(df_song, on=['word'], suffixes=['_bio', '_song'])
center[cw[0]] = df.iloc[:, 1:].corr(method='spearman').iloc[0, 1]
0 1000 250 1000 500 1000 750 1000
rank_corrs = pd.DataFrame.from_dict(center, orient='index').reset_index()
rank_corrs.columns=['word', 'corr']
rank_corrs.sort_values(by='corr', inplace=True)
# 20 least correlated
rank_corrs.head(20)
word | corr | |
---|---|---|
939 | ova | -0.137636 |
799 | steer | -0.136511 |
474 | blocking | -0.096959 |
361 | gene | -0.094439 |
614 | collector | -0.088616 |
747 | dr | -0.084789 |
601 | hut | -0.077672 |
845 | mccoy | -0.075706 |
626 | horn | -0.075189 |
291 | sanctuary | -0.073536 |
941 | volume | -0.068567 |
243 | prime | -0.064830 |
731 | sera | -0.061896 |
776 | breeds | -0.061139 |
164 | web | -0.059445 |
606 | timothy | -0.058925 |
599 | bid | -0.058215 |
736 | solutions | -0.056910 |
655 | admission | -0.055799 |
492 | grit | -0.052010 |
# 20 most correlated
rank_corrs.tail(20)
word | corr | |
---|---|---|
110 | eastern | 0.350956 |
480 | florida | 0.355719 |
623 | salad | 0.356657 |
42 | birmingham | 0.359916 |
324 | cherry | 0.363270 |
47 | usa | 0.363541 |
56 | france | 0.374994 |
734 | baking | 0.378058 |
877 | des | 0.379742 |
330 | wine | 0.380003 |
968 | suburbs | 0.380126 |
846 | italy | 0.380626 |
318 | nevada | 0.388096 |
300 | avec | 0.389604 |
97 | atlanta | 0.396447 |
262 | spain | 0.403125 |
299 | detroit | 0.406309 |
334 | york | 0.415983 |
716 | fruit | 0.417101 |
36 | missouri | 0.448314 |
# The distrubtion of rank correlations
sns.histplot(rank_corrs, x='corr')
<AxesSubplot: xlabel='corr', ylabel='Count'>
Consider the following hypothesis: Geographic terms show high cross-domain correlation because they participate in similar semantic relationships across domains, even when the specific contexts of their usage differ. This is because geographic relationships (like country-capital pairs, regional groupings, or spatial relationships) are fundamental and consistent, unlike more context-dependent relationships like those between emotions or technical terms.
for word in ['japan', 'alabama', 'eastern', 'massachusetts']:
print(word)
pd.DataFrame({
'top_10_abstract' :abstracts_model.wv.most_similar(word, topn=10),
'top_10_songs' : songs_model.wv.most_similar(word, topn=10)})
print('=' * 20)
japan
top_10_abstract | top_10_songs | |
---|---|---|
0 | (korea, 0.8748170733451843) | (miami, 0.5309703350067139) |
1 | (israel, 0.8398821949958801) | (seattle, 0.5086838006973267) |
2 | (italy, 0.8218989372253418) | (bombay, 0.501028299331665) |
3 | (france, 0.8159109354019165) | (milwaukee, 0.49392423033714294) |
4 | (germany, 0.8139998912811279) | (canada, 0.4893510937690735) |
5 | (taiwan, 0.8009180426597595) | (eastern, 0.4831640422344208) |
6 | (poland, 0.7939981818199158) | (peru, 0.48271527886390686) |
7 | (austria, 0.7908223867416382) | (italy, 0.47260430455207825) |
8 | (canada, 0.7870953679084778) | (texas, 0.4703732132911682) |
9 | (australia, 0.7857426404953003) | (tennessee, 0.46824613213539124) |
==================== alabama
top_10_abstract | top_10_songs | |
---|---|---|
0 | (ohio, 0.8161562085151672) | (alabam, 0.5872521996498108) |
1 | (carolina, 0.8154383301734924) | (birmingham, 0.5807199478149414) |
2 | (texas, 0.8063473701477051) | (kentucky, 0.5631315112113953) |
3 | (washington, 0.797336757183075) | (georgia, 0.5521689057350159) |
4 | (bronx, 0.7942252159118652) | (louisiana, 0.54639732837677) |
5 | (michigan, 0.7938581109046936) | (headin, 0.5460915565490723) |
6 | (utah, 0.7937974333763123) | (south, 0.5328818559646606) |
7 | (california, 0.7934910655021667) | (florida, 0.5296846628189087) |
8 | (maryland, 0.7933011054992676) | (kansas, 0.5220784544944763) |
9 | (houston, 0.7924526929855347) | (missouri, 0.5115543603897095) |
==================== eastern
top_10_abstract | top_10_songs | |
---|---|---|
0 | (southern, 0.840345561504364) | (north, 0.5851677656173706) |
1 | (north, 0.8231372833251953) | (east, 0.5750294923782349) |
2 | (west, 0.8209653496742249) | (african, 0.5740207433700562) |
3 | (southeast, 0.811873733997345) | (across, 0.5662868022918701) |
4 | (northern, 0.7906352877616882) | (redwood, 0.5559266209602356) |
5 | (northeast, 0.7843962907791138) | (ridge, 0.5410141944885254) |
6 | (east, 0.7805769443511963) | (south, 0.5373070240020752) |
7 | (southwest, 0.7763235569000244) | (mountain, 0.5371017456054688) |
8 | (south, 0.7742679715156555) | (oceans, 0.5346466898918152) |
9 | (southwestern, 0.7726342678070068) | (sea, 0.5311489105224609) |
==================== massachusetts
top_10_abstract | top_10_songs | |
---|---|---|
0 | (ohio, 0.8244689702987671) | (south, 0.5175922513008118) |
1 | (missouri, 0.8058115839958191) | (ohio, 0.5163834095001221) |
2 | (pennsylvania, 0.8054637908935547) | (nashville, 0.5137845277786255) |
3 | (connecticut, 0.8047425746917725) | (indiana, 0.4909800887107849) |
4 | (texas, 0.8038879036903381) | (memphis, 0.48726528882980347) |
5 | (virginia, 0.8029403686523438) | (mexico, 0.4843578636646271) |
6 | (california, 0.7953702807426453) | (san, 0.481657475233078) |
7 | (washington, 0.7941966652870178) | (louisiana, 0.4794386923313141) |
8 | (carolina, 0.7905446887016296) | (tennessee, 0.47696375846862793) |
9 | (england, 0.789548397064209) | (kentucky, 0.46984240412712097) |
====================
I asked Claude 3.7 to analyze these lists and its response was impressive enough that I'm pasting it below (with slight edits to remove repition). My comments are in red.
In the biomedical abstracts, 'japan' is strongly associated with:
In song lyrics, 'japan' relates to:
The relationship pattern is fundamentally different. In biomedical literature, Japan clusters with other countries in an expected geopolitical/taxonomic organization. In lyrics, Japan appears to function more as an exotic or distant reference point alongside various cities and places that might evoke similar emotional or cultural connotations in songs.
Claude doesn't know the difference in dataset size here, so some of the lack of specificity in the song lyric dataset may be driven by that as well.
In biomedical abstracts, 'alabama' clusters with:
In song lyrics, 'alabama' relates to:
Here we see stronger consistency - Alabama functions as a U.S. state in both domains, but in lyrics there's a stronger regional identity element (clustering with other southern states) and contextual usage (related to travel - 'headin'). In biomedical literature, the relationship appears more strictly taxonomic.
Given how Japan is so clearly identified with other countires, I'm not sure what explains the presence of Bronx or Houston here.
For the directional term 'eastern', we see: In biomedical abstracts:
In song lyrics:
This term shows the most consistency between domains - directional relationships remain similar, though song lyrics incorporate more physical geography and cultural associations.
Interpretation
This may again be driven by difference in dataset size
This analysis provides qualified support for the hypothesis. Geographic terms do show consistency in their basic referential meaning across domains, but the specific semantic relationships they participate in remain domain-dependent. The fundamental geographic relationships (what is a country, what is a state) transfer well, but the contextual associations and functional uses of these terms vary considerably between scientific and creative writing.
Going back to the original hypothesis: "Geographic terms show high cross-domain correlation because they participate in similar semantic relationships across domains, even when the specific contexts of their usage differ." I'd say Claude 3.7 is being somewhat generous to the hypothesis --- the collocation patterns across the two datasets show that while there are semantic relationships that exist in both datasets (Japan as country, Alabama as state, eastern as direction), there are also certain semantic relationships that are present in one dataset but not the other (in particular, the use of geographical terms for emotive effect).
# emotion words are less common in the abstracts
for word in ['results', 'loss', 'gene', 'relationship', 'alcohol']:
print(word)
pd.DataFrame({
'top_10_abstract' :abstracts_model.wv.most_similar(word, topn=10),
'top_10_songs' : songs_model.wv.most_similar(word, topn=10)})
print('=' * 20)
results
top_10_abstract | top_10_songs | |
---|---|---|
0 | (findings, 0.7533902525901794) | (conspiracy, 0.5340604186058044) |
1 | (data, 0.7228420376777649) | (environment, 0.5331812500953674) |
2 | (observations, 0.6540159583091736) | (progress, 0.5310154557228088) |
3 | (analyses, 0.6239864826202393) | (result, 0.5276044011116028) |
4 | (altogether, 0.621438205242157) | (delusion, 0.5232785940170288) |
5 | (conclusions, 0.6182958483695984) | (decisions, 0.5207023024559021) |
6 | (these, 0.5967600345611572) | (education, 0.5119056701660156) |
7 | (and, 0.5799633264541626) | (value, 0.5096239447593689) |
8 | (analysis, 0.5782771110534668) | (appearance, 0.5045484900474548) |
9 | (conclusion, 0.5779496431350708) | (failure, 0.5006080865859985) |
==================== loss
top_10_abstract | top_10_songs | |
---|---|---|
0 | (gain, 0.655653715133667) | (control, 0.6872790455818176) |
1 | (losses, 0.6424258947372437) | (gain, 0.5800700187683105) |
2 | (regain, 0.635229229927063) | (self, 0.5547401905059814) |
3 | (weight, 0.5950466394424438) | (losing, 0.5478525757789612) |
4 | (reduction, 0.583220899105072) | (destruction, 0.5409050583839417) |
5 | (sensorineural, 0.5607789754867554) | (suffering, 0.5388063192367554) |
6 | (lost, 0.5604578256607056) | (grief, 0.5065642595291138) |
7 | (gains, 0.5603702664375305) | (simplicity, 0.4994810223579407) |
8 | (arhl, 0.5533477663993835) | (civilized, 0.49618569016456604) |
9 | (lose, 0.5489565134048462) | (sanity, 0.49227234721183777) |
==================== gene
top_10_abstract | top_10_songs | |
---|---|---|
0 | (genes, 0.8073875308036804) | (dean, 0.4854452610015869) |
1 | (transcript, 0.7708730697631836) | (hangover, 0.46422114968299866) |
2 | (promoter, 0.7615336179733276) | (fat, 0.4489145874977112) |
3 | (expression, 0.7515049576759338) | (cluck, 0.4478444457054138) |
4 | (reln, 0.7510012984275818) | (teddy, 0.44257891178131104) |
5 | (transcriptional, 0.7473430633544922) | (tub, 0.43551725149154663) |
6 | (chrna7, 0.7427319884300232) | (redneck, 0.4352358877658844) |
7 | (wnt2, 0.7347682118415833) | (vinnie, 0.43189749121665955) |
8 | (akap12, 0.7296922206878662) | (pool, 0.43134692311286926) |
9 | (cdkn1c, 0.7295920848846436) | (mascara, 0.4261293411254883) |
==================== relationship
top_10_abstract | top_10_songs | |
---|---|---|
0 | (relationships, 0.8824570775032043) | (problem, 0.5246297717094421) |
1 | (relation, 0.8729956746101379) | (situation, 0.5208447575569153) |
2 | (association, 0.8719602823257446) | (mutual, 0.4890507161617279) |
3 | (associations, 0.8337894678115845) | (opinion, 0.48446422815322876) |
4 | (relations, 0.824416995048523) | (situations, 0.4682455062866211) |
5 | (link, 0.7976319193840027) | (this, 0.46032848954200745) |
6 | (links, 0.7719379663467407) | (serious, 0.45658427476882935) |
7 | (interrelationship, 0.7520392537117004) | (type, 0.4560496509075165) |
8 | (interrelation, 0.7385400533676147) | (intention, 0.44663071632385254) |
9 | (interrelationships, 0.7377327680587769) | (problems, 0.4459305703639984) |
==================== alcohol
top_10_abstract | top_10_songs | |
---|---|---|
0 | (marijuana, 0.8451120853424072) | (gimmie, 0.6309528946876526) |
1 | (drinking, 0.8213468194007874) | (vodka, 0.5660660266876221) |
2 | (cannabis, 0.8138123154640198) | (pills, 0.5207496285438538) |
3 | (methamphetamine, 0.8100548386573792) | (gin, 0.4987192153930664) |
4 | (binge, 0.8048396110534668) | (champagne, 0.4806707203388214) |
5 | (dependence, 0.7954674959182739) | (drinking, 0.46205461025238037) |
6 | (cocaine, 0.7905853390693665) | (demon, 0.4595335125923157) |
7 | (heroin, 0.7748849391937256) | (bottle, 0.45356303453445435) |
8 | (substance, 0.7657745480537415) | (whiskey, 0.44766926765441895) |
9 | (nicotine, 0.7656602263450623) | (fax, 0.44533926248550415) |
====================
Unlike geographic terms, we see that these emotion/technical terms (that have some overlap across the two datasets), participate in very different semantic relationships across the two datasets. Most interestingly to me is the role of alcohol in biomedical abstracts vs song lyrics. In the former, alcohol clusters with other substances that can be abused, whereas in the latter, alcohol clusters with more specific kinds of alcohol.
Now we'll dive deeper into an interesting aspect of the word2vec architecture: the distinction between input and output vectors.
When word2vec trains, it actually learns two different vector representations for each word:
This raises some interesting questions:
Understanding these patterns can give us insight into how word2vec learns to represent meaning and whether words play different roles when providing context versus being predicted. Let's start exploring this by calculating the cosine similarity between each word's input and output vectors.
def analyze_input_output_similarity(model: Word2Vec):
"""
Analyze the cosine similarity between input and output vectors for each word.
This function examines how similarly words are represented in the input space
(when they serve as context) versus the output space (when they are being predicted).
A high similarity suggests the word has consistent representation across both spaces.
Returns:
DataFrame containing word-level similarity scores and vector statistics
"""
similarities = []
for word in model.wv.index_to_key:
# Get input and output vectors
input_vec = model.wv[word]
output_vec = model.syn1neg[model.wv.key_to_index[word]]
# Normalize vectors
input_norm = np.linalg.norm(input_vec)
output_norm = np.linalg.norm(output_vec)
input_vec_norm = input_vec / input_norm
output_vec_norm = output_vec / output_norm
# Calculate cosine similarity
similarity = np.dot(input_vec_norm, output_vec_norm)
similarities.append({
'word': word,
'similarity': similarity,
'input_norm': input_norm,
'output_norm': output_norm
})
return pd.DataFrame(similarities)
# Analyze both models
print("Analyzing input-output similarities...")
bio_similarities = analyze_input_output_similarity(abstracts_model)
lyrics_similarities = analyze_input_output_similarity(songs_model)
# Add domain labels
bio_similarities['domain'] = 'biomedical'
lyrics_similarities['domain'] = 'lyrics'
# Combine results
all_similarities = pd.concat([bio_similarities, lyrics_similarities])
# Create visualizations to compare distributions
plt.figure(figsize=(12, 6))
# Distribution plot
plt.subplot(1, 2, 1)
sns.kdeplot(data=all_similarities, x='similarity', hue='domain')
plt.title('Distribution of Input-Output Similarities')
plt.xlabel('Cosine Similarity')
# Box plot
plt.subplot(1, 2, 2)
sns.boxplot(data=all_similarities, x='domain', y='similarity')
plt.title('Input-Output Similarities by Domain')
plt.tight_layout()
Analyzing input-output similarities...
<Figure size 1200x600 with 0 Axes>
<AxesSubplot: >
<AxesSubplot: xlabel='similarity', ylabel='Density'>
Text(0.5, 1.0, 'Distribution of Input-Output Similarities')
Text(0.5, 0, 'Cosine Similarity')
<AxesSubplot: >
<AxesSubplot: xlabel='domain', ylabel='similarity'>
Text(0.5, 1.0, 'Input-Output Similarities by Domain')
# Function to analyze extreme cases
def analyze_extreme_cases(df: pd.DataFrame, n: int = 10) -> dict:
"""
Analyze words with highest and lowest input-output similarities.
This helps us understand which words have consistent representations
across spaces versus those that are represented differently.
"""
results = {}
# Get extreme cases for each domain
for domain in df['domain'].unique():
domain_df = df[df['domain'] == domain]
most_similar = domain_df.nlargest(n, 'similarity')[['word', 'similarity']]
least_similar = domain_df.nsmallest(n, 'similarity')[['word', 'similarity']]
results[domain] = {
'most_similar': most_similar,
'least_similar': least_similar
}
return results
# Analyze extreme cases
extreme_cases = analyze_extreme_cases(all_similarities)
# Print findings
for domain, cases in extreme_cases.items():
print(f"\n{domain.upper()} DOMAIN")
print("=" * 50)
print("\nMost similar input-output representations:")
print(cases['most_similar'].to_string(index=False))
print("\nLeast similar input-output representations:")
print(cases['least_similar'].to_string(index=False))
BIOMEDICAL DOMAIN ================================================== Most similar input-output representations: word similarity composite 0.603318 white 0.599204 middle 0.593259 longitudinal 0.576476 living 0.572975 heavy 0.565382 fiber 0.563679 bundle 0.563099 consolidation 0.561041 attachment 0.558926 Least similar input-output representations: word similarity and -0.536199 of -0.430385 the -0.430200 besides -0.410197 in -0.407083 whilst -0.358159 addition -0.351917 additionally -0.340453 was -0.329975 with -0.303203 LYRICS DOMAIN ================================================== Most similar input-output representations: word similarity bang 0.606021 fever 0.598021 oo 0.595833 funk 0.592830 boom 0.588909 la 0.583475 bop 0.582034 america 0.575650 rock 0.572892 bow 0.569074 Least similar input-output representations: word similarity and -0.645927 you -0.415907 that -0.398828 the -0.374967 but -0.345375 to -0.329941 my -0.255253 in -0.242977 your -0.241031 me -0.225409
# Calculate summary statistics
summary_stats = all_similarities.groupby('domain').agg({
'similarity': ['mean', 'std', 'median', 'min', 'max']
}).round(3)
print("\nSummary Statistics:")
print(summary_stats)
Summary Statistics: similarity mean std median min max domain biomedical 0.292 0.089 0.297 -0.536 0.603 lyrics 0.330 0.089 0.337 -0.646 0.606
wv.most_similar
function that comes with gensim is for the input embeddings here.).def most_similar_output(model, word, topn=10):
"""
Find the most similar words to a given word in the output embedding space.
Parameters:
model: The Word2Vec model
word: The target word to find similarities for
topn: Number of top similar words to return
Returns:
List of (word, similarity) tuples
"""
# Get the output vector for the target word
if word not in model.wv.key_to_index:
return []
target_index = model.wv.key_to_index[word]
target_output_vec = model.syn1neg[target_index]
# Normalize the target vector
target_norm = np.linalg.norm(target_output_vec)
if target_norm == 0:
return []
target_output_vec_norm = target_output_vec / target_norm
# Calculate similarities with all other output vectors
similarities = []
for i, other_word in enumerate(model.wv.index_to_key):
if other_word == word: # Skip the target word itself
continue
other_output_vec = model.syn1neg[i]
other_norm = np.linalg.norm(other_output_vec)
if other_norm == 0:
continue
other_output_vec_norm = other_output_vec / other_norm
similarity = np.dot(target_output_vec_norm, other_output_vec_norm)
similarities.append((other_word, similarity))
# Sort by similarity (highest first) and return top n
return sorted(similarities, key=lambda x: x[1], reverse=True)[:topn]
# Example use
most_similar_output(abstracts_model, 'chain')
most_similar_output(songs_model, 'chain')
[('polymerase', 0.9043478), ('reaction', 0.80934185), ('rtpcr', 0.7943145), ('pcr', 0.7834429), ('qpcr', 0.7749369), ('reverse', 0.7698043), ('rflp', 0.768584), ('fragment', 0.7580474), ('multiplex', 0.74632204), ('sybr', 0.74082845)]
[('ball', 0.723468), ('chains', 0.71177894), ('links', 0.7097362), ('rollie', 0.69211906), ('whips', 0.686039), ('presidential', 0.6775297), ('whip', 0.673292), ('link', 0.6658155), ('binds', 0.66426957), ('wrecking', 0.6631808)]
Do words with high input/output similarity also have the same closest words? What about for words with low input/output similarity?
# First, let's get some example words from each domain
def analyze_input_vs_output_similarities(model, domain_name, top_words, bottom_words):
print(f"\n{domain_name.upper()} DOMAIN ANALYSIS")
print("=" * 60)
print("\nWords with HIGH input-output similarity:")
for word, sim in top_words:
print(f"\nWord: '{word}' (input-output similarity: {sim:.4f})")
print("Top 5 similar words in INPUT space:")
input_similar = model.wv.most_similar(word, topn=10)
for w, s in input_similar:
print(f" {w:<15} {s:.4f}")
print("Top 5 similar words in OUTPUT space:")
output_similar = most_similar_output(model, word, topn=10)
for w, s in output_similar:
print(f" {w:<15} {s:.4f}")
print("\nWords with LOW input-output similarity:")
for word, sim in bottom_words:
print(f"\nWord: '{word}' (input-output similarity: {sim:.4f})")
print("Top 5 similar words in INPUT space:")
input_similar = model.wv.most_similar(word, topn=10)
for w, s in input_similar:
print(f" {w:<15} {s:.4f}")
print("Top 5 similar words in OUTPUT space:")
output_similar = most_similar_output(model, word, topn=10)
for w, s in output_similar:
print(f" {w:<15} {s:.4f}")
# For biomedical domain
bio_top = extreme_cases['biomedical']['most_similar'].head(3).values.tolist()
bio_bottom = extreme_cases['biomedical']['least_similar'].head(3).values.tolist()
analyze_input_vs_output_similarities(abstracts_model, "Biomedical", bio_top, bio_bottom)
BIOMEDICAL DOMAIN ANALYSIS ============================================================ Words with HIGH input-output similarity: Word: 'composite' (input-output similarity: 0.6033) Top 5 similar words in INPUT space: nanohybrid 0.6597 composites 0.6585 cgic 0.6435 nanofilled 0.6429 microhybrid 0.6401 flowable 0.6352 z250 0.6344 compomer 0.6234 silorane 0.6190 rmgi 0.6144 Top 5 similar words in OUTPUT space: composites 0.7235 cgic 0.7175 flowable 0.6886 rmgi 0.6787 nanohybrid 0.6742 compomer 0.6737 cpc 0.6704 microhybrid 0.6684 endpoint 0.6544 nanofilled 0.6533 Word: 'white' (input-output similarity: 0.5992) Top 5 similar words in INPUT space: black 0.7710 hispanic 0.6778 hyperintensities 0.6636 latinx 0.6400 multiracial 0.6331 matter 0.6318 african 0.6196 islander 0.6152 juxtacortical 0.6095 wmv 0.6093 Top 5 similar words in OUTPUT space: black 0.7752 hispanic 0.6791 hyperintensities 0.6562 islander 0.6351 matter 0.6288 latinx 0.6277 juxtacortical 0.6237 gray 0.6149 bame 0.6137 african 0.6051 Word: 'middle' (input-output similarity: 0.5933) Top 5 similar words in INPUT space: mid 0.6246 posterior 0.5766 gyri 0.5737 elementary 0.5714 yangtze 0.5628 anterior 0.5592 petrous 0.5556 perisylvian 0.5546 subcallosal 0.5544 mca 0.5543 Top 5 similar words in OUTPUT space: mid 0.6742 mca 0.6638 gyri 0.6546 elementary 0.6452 east 0.6347 opercular 0.6314 postcentral 0.6306 rostral 0.6286 peduncle 0.6271 mcas 0.6256 Words with LOW input-output similarity: Word: 'and' (input-output similarity: -0.5362) Top 5 similar words in INPUT space: including 0.7309 both 0.7151 of 0.7005 while 0.6950 or 0.6789 the 0.6343 for 0.6303 in 0.6057 with 0.6022 additionally 0.5967 Top 5 similar words in OUTPUT space: besides 0.8147 meanwhile 0.7811 whilst 0.7643 namely 0.7630 etc 0.7607 alongside 0.7578 likewise 0.7370 additionally 0.7337 lastly 0.7220 correspondingly 0.7124 Word: 'of' (input-output similarity: -0.4304) Top 5 similar words in INPUT space: the 0.7996 and 0.7005 in 0.6507 including 0.6317 for 0.6192 abstractobjectivethe 0.5893 within 0.5729 its 0.5675 on 0.5661 this 0.5659 Top 5 similar words in OUTPUT space: the 0.7850 besides 0.7187 and 0.7042 thereof 0.6891 consequently 0.6557 correspondingly 0.6456 whilst 0.6394 hence 0.6348 moreover 0.6323 accordingly 0.6310 Word: 'the' (input-output similarity: -0.4302) Top 5 similar words in INPUT space: of 0.7996 this 0.7207 and 0.6343 within 0.6139 our 0.6136 its 0.6007 which 0.5954 each 0.5904 in 0.5894 their 0.5759 Top 5 similar words in OUTPUT space: of 0.7850 backgroundthe 0.7333 summarythe 0.7294 besides 0.7210 secondly 0.7154 and 0.7110 pdp 0.7102 2020 0.7060 isp 0.6998 accordingly 0.6990
# For lyrics domain
lyrics_top = extreme_cases['lyrics']['most_similar'].head(3).values.tolist()
lyrics_bottom = extreme_cases['lyrics']['least_similar'].head(3).values.tolist()
analyze_input_vs_output_similarities(songs_model, "Lyrics", lyrics_top, lyrics_bottom)
LYRICS DOMAIN ANALYSIS ============================================================ Words with HIGH input-output similarity: Word: 'bang' (input-output similarity: 0.6060) Top 5 similar words in INPUT space: shoot 0.5559 shot 0.5504 diggy 0.5197 gong 0.5138 lulu 0.4982 headboard 0.4626 buh 0.4447 bangin 0.4421 click 0.4366 boomerang 0.4365 Top 5 similar words in OUTPUT space: bangin 0.6654 chicka 0.6542 gong 0.6520 shoot 0.6468 ano 0.6417 diggy 0.6385 shot 0.6325 headboard 0.6298 buh 0.6251 mj 0.6214 Word: 'fever' (input-output similarity: 0.5980) Top 5 similar words in INPUT space: jungle 0.5648 hot 0.5173 sizzle 0.4628 homesick 0.4439 bloodbath 0.4374 temperature 0.4370 itching 0.4230 summer 0.4230 fan 0.4183 chills 0.4130 Top 5 similar words in OUTPUT space: sizzle 0.6706 flaming 0.6380 temperature 0.6184 jungle 0.6171 afire 0.6097 furnace 0.5897 bloodbath 0.5847 daybreak 0.5834 hay 0.5817 freezin 0.5808 Word: 'oo' (input-output similarity: 0.5958) Top 5 similar words in INPUT space: ooo 0.6934 oooo 0.4574 ia 0.4256 ahh 0.4126 yah 0.4114 mony 0.4105 aaaahh 0.3904 unconditionally 0.3872 moan 0.3868 aahh 0.3863 Top 5 similar words in OUTPUT space: ooo 0.7624 ia 0.6622 pangako 0.6586 sayang 0.6442 aaaahh 0.6361 kita 0.6286 iba 0.6279 oooo 0.6242 bakit 0.6199 cinta 0.6172 Words with LOW input-output similarity: Word: 'and' (input-output similarity: -0.6459) Top 5 similar words in INPUT space: with 0.6998 when 0.6548 the 0.6366 then 0.6271 they 0.5975 that 0.5786 to 0.5751 but 0.5705 them 0.5654 in 0.5623 Top 5 similar words in OUTPUT space: the 0.8515 you 0.8482 that 0.8424 but 0.8319 to 0.8277 fantasi 0.8260 bish 0.8197 tudo 0.8188 sponge 0.8133 piu 0.8128 Word: 'you' (input-output similarity: -0.4159) Top 5 similar words in INPUT space: me 0.7458 if 0.6944 do 0.6684 that 0.6542 cause 0.6520 what 0.6353 know 0.6338 just 0.6270 but 0.5951 it 0.5842 Top 5 similar words in OUTPUT space: me 0.8835 that 0.8712 and 0.8482 do 0.8395 but 0.8365 belive 0.8354 yasss 0.8326 sais 0.8309 gratia 0.8283 bish 0.8276 Word: 'that' (input-output similarity: -0.3988) Top 5 similar words in INPUT space: but 0.7167 what 0.6567 you 0.6542 cause 0.6470 real 0.6235 it 0.6087 sure 0.6014 really 0.5998 know 0.5969 if 0.5813 Top 5 similar words in OUTPUT space: you 0.8712 but 0.8647 and 0.8424 sensible 0.8280 yasss 0.8201 bish 0.8198 irrelevant 0.8143 ironic 0.8126 the 0.8062 khay 0.8024
From Claude 3.7:
Across both domains, we see a clear pattern where content words with specific semantic meanings (like "composite" or "bang") have high input-output similarity, while function words with primarily grammatical roles (like "and" or "the") have low or negative similarity. This suggests a fundamental linguistic distinction:
Let's explore how well each model captures semantic relationships through analogy testing.
The classic example of word embeddings capturing analogies is "king:queen::man:woman" - the idea that the vector difference between "king" and "queen" is similar to the vector difference between "man" and "woman". This suggests the model has learned the concept of gender relationships and it is represented linearly.
In this section, we'll test different types of analogies in each domain. We're particularly interested in:
from dataclasses import dataclass
from typing import List, Tuple, Optional, Literal
@dataclass
class Analogy:
input_1: str
input_2: str
input_3: str
expected: str
explanation: str = ""
# Gender/Sex Relationships
gender_analogies = [
Analogy("man", "woman", "king", "queen"),
Analogy("father", "mother", "son", "daughter"),
Analogy("brother", "sister", "uncle", "aunt"),
Analogy("husband", "wife", "groom", "bride"),
Analogy("actor", "actress", "waiter", "waitress"),
Analogy("sir", "madam", "mr", "mrs")
]
# Geographic Relationships
geographic_analogies = [
Analogy("france", "paris", "japan", "tokyo"),
Analogy("russia", "moscow", "uk", "london"),
Analogy("egypt", "cairo", "iran", "tehran"),
Analogy("berlin", "germany", "dublin", "ireland"),
Analogy("europe", "european", "asia", "asian")
]
# Currency Relationships
currency_analogies = [
Analogy("japan", "yen", "uk", "pound"),
Analogy("usa", "dollar", "europe", "euro"),
Analogy("russia", "ruble", "china", "yuan"),
Analogy("switzerland", "franc", "india", "rupee")
]
# Comparative/Superlative Relationships
comparative_analogies = [
Analogy("good", "better", "bad", "worse"),
Analogy("big", "biggest", "small", "smallest"),
Analogy("hot", "hottest", "cold", "coldest"),
Analogy("happy", "happiest", "sad", "saddest")
]
# Opposite Relationships
opposite_analogies = [
Analogy("hot", "cold", "light", "dark"),
Analogy("up", "down", "left", "right"),
Analogy("rich", "poor", "strong", "weak"),
Analogy("love", "hate", "friend", "enemy")
]
# Professional Relationships
professional_analogies = [
Analogy("doctor", "hospital", "teacher", "school"),
Analogy("chef", "restaurant", "pilot", "airplane"),
Analogy("artist", "paint", "writer", "pen")
]
def test_analogy_with_embeddings(model, analogy: Analogy,
embedding_type: Literal["input", "output", "both"] = "both",
topn: int = 5) -> dict:
"""
Test analogy using specified embedding type(s) and return results in a format
suitable for DataFrame construction.
Returns dictionary containing:
- Basic analogy information
- Top n predictions for each embedding type
- Whether expected word was found
- Rank of expected word if found
- Similarity score of expected word if found
"""
results = {
'input_1': analogy.input_1,
'input_2': analogy.input_2,
'input_3': analogy.input_3,
'expected': analogy.expected
}
try:
# Test input embeddings
if embedding_type in ["input", "both"]:
input_vec = (model.wv[analogy.input_2] -
model.wv[analogy.input_1] +
model.wv[analogy.input_3])
input_results = model.wv.similar_by_vector(input_vec, topn=topn + 3)
# Filter out input words
input_results = [(word, score) for word, score in input_results
if word not in [analogy.input_1, analogy.input_2, analogy.input_3]][:topn]
# Store top predictions
results.update({'input_preds' : ', '.join(w for w, s in input_results)})
# Check if expected word is in results
expected_in_input = next(((i+1, score) for i, (word, score) in enumerate(input_results)
if word == analogy.expected), (None, None))
results['input_expected_rank'] = expected_in_input[0]
# Test output embeddings
if embedding_type in ["output", "both"]:
output_vec = (model.syn1neg[model.wv.key_to_index[analogy.input_2]] -
model.syn1neg[model.wv.key_to_index[analogy.input_1]] +
model.syn1neg[model.wv.key_to_index[analogy.input_3]])
output_vec = output_vec / np.linalg.norm(output_vec)
similarities = []
for word in model.wv.index_to_key:
if word not in [analogy.input_1, analogy.input_2, analogy.input_3]:
vec = model.syn1neg[model.wv.key_to_index[word]]
vec = vec / np.linalg.norm(vec)
sim = np.dot(output_vec, vec)
similarities.append((word, sim))
output_results = sorted(similarities, key=lambda x: x[1], reverse=True)[:topn]
# Store top predictions
results.update({'output_preds' : ', '.join(w for w,s in output_results)})
# Check if expected word is in results
expected_in_output = next(((i+1, score) for i, (word, score) in enumerate(output_results)
if word == analogy.expected), (None, None))
results['output_expected_rank'] = expected_in_output[0]
except KeyError as e:
results['error'] = str(e)
return results
def analyze_analogies(model, analogy_sets: Dict[str, List[Analogy]], domain: str):
"""
Test multiple sets of analogies and return results as a DataFrame
"""
all_results = []
for category, analogies in analogy_sets.items():
for analogy in analogies:
result = test_analogy_with_embeddings(model, analogy)
result['category'] = category
result['domain'] = domain
all_results.append(result)
# Create DataFrame
df = pd.DataFrame(all_results)
# Add success indicators
df['input_success'] = df['input_expected_rank'].notna()
df['output_success'] = df['output_expected_rank'].notna()
df.drop(columns=['output_expected_rank', 'input_expected_rank'], inplace=True)
df = df[df.input_preds.notna()]
return df
# Test analogies and analyze results
analogy_sets = {
'gender': gender_analogies,
'geographic': geographic_analogies,
'comparative': comparative_analogies,
'opposite': opposite_analogies,
'professional': professional_analogies
}
# Test in both domains
results_bio = analyze_analogies(abstracts_model, analogy_sets, 'biomedical')
results_lyrics = analyze_analogies(songs_model, analogy_sets, 'lyrics')
# Combine results
all_results = pd.concat([results_bio, results_lyrics])
all_results.head()
input_1 | input_2 | input_3 | expected | input_preds | output_preds | category | domain | error | input_success | output_success | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | man | woman | king | queen | abdulaziz, abdullah, fahd, fahad, khalid | abdulaziz, university, riyadh, tertiary, princess | gender | biomedical | NaN | False | False |
1 | father | mother | son | daughter | supraoptic, solitarius, tractus, kndy, mpoa | supraoptic, mayor, avons, sont, postrema | gender | biomedical | NaN | False | False |
6 | france | paris | japan | tokyo | tokyo, sinai, osaka, fukuoka, essen | tokyo, essen, fukuoka, turin, kumamoto | geographic | biomedical | NaN | True | True |
7 | russia | moscow | uk | london | fukuoka, kingston, london, canadian, massachus... | rawalpindi, florence, leipzig, fukuoka, lausanne | geographic | biomedical | NaN | True | False |
8 | egypt | cairo | iran | tehran | ahvaz, tehran, mashhad, shiraz, kermanshah | tehran, shiraz, mashhad, isfahan, tabriz | geographic | biomedical | NaN | True | True |
all_results.groupby(['category', 'domain'])[['input_success', 'output_success']].mean()
all_results.groupby(['category'])[['input_success', 'output_success']].mean()
all_results.groupby(['domain'])[['input_success', 'output_success']].mean()
input_success | output_success | ||
---|---|---|---|
category | domain | ||
comparative | biomedical | 0.333333 | 0.333333 |
lyrics | 0.750000 | 0.500000 | |
gender | biomedical | 0.000000 | 0.000000 |
lyrics | 0.833333 | 0.666667 | |
geographic | biomedical | 0.800000 | 0.600000 |
lyrics | 0.500000 | 0.000000 | |
opposite | biomedical | 0.500000 | 0.250000 |
lyrics | 0.500000 | 0.500000 | |
professional | biomedical | 0.000000 | 0.000000 |
lyrics | 0.333333 | 0.333333 |
input_success | output_success | |
---|---|---|
category | ||
comparative | 0.571429 | 0.428571 |
gender | 0.625000 | 0.500000 |
geographic | 0.714286 | 0.428571 |
opposite | 0.500000 | 0.375000 |
professional | 0.250000 | 0.250000 |
input_success | output_success | |
---|---|---|
domain | ||
biomedical | 0.466667 | 0.333333 |
lyrics | 0.631579 | 0.473684 |
1, Design and test a new set of analogies that you would expect to work particularly well in:
biomedical_analogies = [
# Organ-Function Relationships
Analogy("heart", "pump", "lung", "expand"),
Analogy("kidney", "filter", "liver", "detoxify"),
Analogy("nose", "breathe", "eye", "see"),
]
song_analogies = [
# feeling intensity
Analogy("like", "love", "dislike", "hate"),
Analogy("down", "crushed", "happy", "elated"),
Analogy("smile", "laugh", "frown", "cry"),
]
# Test analogies and analyze results
analogy_sets2 = {
'organ': biomedical_analogies,
'intensity': song_analogies
}
# Test in both domains
results_bio2 = analyze_analogies(abstracts_model, analogy_sets2, 'biomedical')
results_lyrics2 = analyze_analogies(songs_model, analogy_sets2, 'lyrics')
# Combine results
all_results2 = pd.concat([results_bio2, results_lyrics2])
all_results2.groupby(['category', 'domain'])[['input_success', 'output_success']].mean()
all_results2.groupby(['category'])[['input_success', 'output_success']].mean()
all_results2.groupby(['domain'])[['input_success', 'output_success']].mean()
input_success | output_success | ||
---|---|---|---|
category | domain | ||
intensity | biomedical | 0.0 | 0.0 |
lyrics | 0.5 | 0.0 | |
organ | biomedical | 0.0 | 0.0 |
lyrics | 0.0 | 0.0 |
input_success | output_success | |
---|---|---|
category | ||
intensity | 0.25 | 0.0 |
organ | 0.00 | 0.0 |
input_success | output_success | |
---|---|---|
domain | ||
biomedical | 0.00 | 0.0 |
lyrics | 0.25 | 0.0 |
all_results2
input_1 | input_2 | input_3 | expected | input_preds | output_preds | category | domain | error | input_success | output_success | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | heart | pump | lung | expand | ppis, proton, cpis, erlotinib, h441 | h441, oropharyngeal, ppis, intrapleural, washings | organ | biomedical | NaN | False | False |
1 | kidney | filter | liver | detoxify | filters, bubbles, semisolid, droplet, cryoprobe | filters, bubbles, cryoprobe, micropipette, kalman | organ | biomedical | NaN | False | False |
2 | nose | breathe | eye | see | listen, pace, instill, cursor, rop | instill, talks, enrol, breathed, supervise | organ | biomedical | NaN | False | False |
3 | like | love | dislike | hate | ambivalent, femininity, sexism, sympathy, valuing | ambivalent, ambivalence, sexism, femininity, v... | intensity | biomedical | NaN | False | False |
4 | down | crushed | happy | elated | pleasant, fearful, lightly, faces, pictures | pleasant, unpleasant, fearful, pictures, arousing | intensity | biomedical | NaN | False | False |
0 | heart | pump | lung | expand | mosquito, strap, grip, pistol, carcass | extended, strap, rza, slash, uzi | organ | lyrics | NaN | False | False |
2 | nose | breathe | eye | see | moment, impossible, live, emptiness, truth | emptiness, deny, aftershock, drownin, centipede | organ | lyrics | NaN | False | False |
4 | down | crushed | happy | elated | birthday, olden, stupidly, endings, monsters | endings, desired, hereafter, certainty, cheerful | intensity | lyrics | NaN | False | False |
5 | smile | laugh | frown | cry | clown, cry, laughing, joke, bother | wrinkles, academy, chatter, joke, grins | intensity | lyrics | NaN | True | False |
Understanding bias in word embeddings is crucial because these models, trained on real-world text, can absorb and amplify societal biases present in their training data. The method we'll use here builds on a key insight: words that are conceptually related tend to be closer together in the embedding space. By examining how close different profession words are to gendered terms, we can measure potential gender biases in how these professions are represented.
Our approach follows a simplified version of the method introduced by Bolukbasi et al. in their influential paper "Man is to Computer Programmer as Woman is to Homemaker." We'll measure bias by comparing how similar each profession word is to a set of male-associated words (like "he," "man," "father") versus female-associated words (like "she," "woman," "mother"). If a profession is substantially closer to one gender's terms than the other's, this suggests the embedding has learned to associate that profession more strongly with that gender.
What makes this analysis particularly interesting in our context is that we can compare these biases across two very different domains - biomedical literature and song lyrics. This comparison might reveal whether gender biases manifest differently in technical academic writing versus popular culture.
def analyze_gender_bias(model, profession_words):
"""
Analyze gender bias in profession words by measuring their
relative similarity to male/female reference words.
This implementation follows the methodology from Bolukbasi et al.'s
"Man is to Computer Programmer as Woman is to Homemaker" paper, but simplified.
"""
# Define sets of male and female reference words
male_words = ['he', 'man', 'boy', 'male', 'father', 'son', 'brother']
female_words = ['she', 'woman', 'girl', 'female', 'mother', 'daughter', 'sister']
results = []
for profession in profession_words:
if profession not in model.wv:
continue
# Calculate average similarity to male and female words
male_similarities = [
model.wv.similarity(profession, male_word)
for male_word in male_words
if male_word in model.wv
]
female_similarities = [
model.wv.similarity(profession, female_word)
for female_word in female_words
if female_word in model.wv
]
# Calculate bias score (positive means male-leaning, negative means female-leaning)
bias_score = np.mean(male_similarities) - np.mean(female_similarities)
results.append({
'profession': profession,
'bias_score': bias_score,
'male_similarity': np.mean(male_similarities),
'female_similarity': np.mean(female_similarities)
})
return pd.DataFrame(results)
# Test the function with some profession words
professions = [ 'the', 'of', 'and',
'doctor', 'nurse', 'engineer', 'teacher', 'programmer',
'researcher', 'executive',
'scientist', 'assistant', 'secretary', 'ceo', 'manager'
]
# Compare bias across domains
bio_bias = analyze_gender_bias(abstracts_model, professions)
lyrics_bias = analyze_gender_bias(songs_model, professions)
bio_bias.sort_values('bias_score')
lyrics_bias.sort_values('bias_score')
profession | bias_score | male_similarity | female_similarity | |
---|---|---|---|---|
4 | nurse | -0.057328 | 0.213399 | 0.270726 |
3 | doctor | -0.031530 | 0.327081 | 0.358610 |
7 | researcher | -0.029857 | 0.318154 | 0.348011 |
5 | engineer | -0.024330 | 0.115748 | 0.140078 |
6 | teacher | -0.015782 | 0.330983 | 0.346765 |
11 | ceo | 0.007945 | 0.174505 | 0.166560 |
10 | assistant | 0.010459 | 0.265940 | 0.255482 |
2 | and | 0.015279 | 0.209591 | 0.194312 |
1 | of | 0.018519 | 0.212985 | 0.194466 |
0 | the | 0.021214 | 0.244629 | 0.223415 |
12 | manager | 0.027794 | 0.211096 | 0.183302 |
9 | scientist | 0.034105 | 0.313247 | 0.279141 |
8 | executive | 0.055274 | 0.189979 | 0.134704 |
profession | bias_score | male_similarity | female_similarity | |
---|---|---|---|---|
4 | nurse | 0.003960 | 0.253249 | 0.249289 |
1 | of | 0.005190 | 0.158229 | 0.153039 |
2 | and | 0.012605 | 0.259275 | 0.246670 |
6 | teacher | 0.021185 | 0.280294 | 0.259109 |
0 | the | 0.022936 | 0.170250 | 0.147315 |
5 | engineer | 0.032793 | 0.233610 | 0.200818 |
8 | secretary | 0.050105 | 0.298679 | 0.248574 |
3 | doctor | 0.060817 | 0.250212 | 0.189395 |
7 | scientist | 0.062112 | 0.222944 | 0.160831 |
9 | manager | 0.082027 | 0.243264 | 0.161236 |
Modify the function above to allow for arbitrary reference words and test bias across other dimensions (e.g. race/ethnicity, age, geography)
def analyze_bias(model, target_words, reference_sets):
"""
Analyze bias in target words by measuring their relative similarity
to different sets of reference words.
Parameters:
-----------
model : Word2Vec model
The word embedding model to analyze
target_words : list
Words to analyze for bias
reference_sets : dict
Dictionary with keys as category names and values as lists of reference words
Example: {'male': ['he', 'man', ...], 'female': ['she', 'woman', ...]}
Returns:
--------
DataFrame with bias scores and similarities for each target word
"""
results = []
# Verify we have at least two reference sets
if len(reference_sets) < 2:
raise ValueError("Need at least two reference sets to compute bias")
# Get the reference set names
reference_names = list(reference_sets.keys())
for target in target_words:
if target not in model.wv:
continue
# Calculate similarities to each reference set
similarities = {}
for ref_name, ref_words in reference_sets.items():
valid_refs = [word for word in ref_words if word in model.wv]
if not valid_refs:
continue
similarities[ref_name] = [
model.wv.similarity(target, ref_word)
for ref_word in valid_refs
]
# Skip if we don't have at least two reference sets with valid words
if len(similarities) < 2:
continue
# Create result entry
result = {'target': target}
# Add mean similarities for each reference set
for ref_name, sims in similarities.items():
result[f'{ref_name}_similarity'] = np.mean(sims)
# Calculate bias scores between each pair of reference sets
for i, ref1 in enumerate(reference_names):
for ref2 in reference_names[i+1:]:
if ref1 in similarities and ref2 in similarities:
bias_name = 'bias_score'
result[bias_name] = result[f'{ref1}_similarity'] - result[f'{ref2}_similarity']
results.append(result)
return pd.DataFrame(results)
# Define gender reference sets
gender_references = {
'male': ['he', 'man', 'boy', 'male', 'father', 'son', 'brother'],
'female': ['she', 'woman', 'girl', 'female', 'mother', 'daughter', 'sister']
}
# Test gender bias
professions = ['doctor', 'nurse', 'engineer', 'teacher', 'programmer',
'researcher', 'executive', 'scientist', 'assistant',
'secretary', 'ceo', 'manager']
gender_abs = analyze_bias(abstracts_model, professions, gender_references)
gender_song = analyze_bias(songs_model, professions, gender_references)
gender_abs.sort_values('bias_score')
gender_song.sort_values('bias_score')
target | male_similarity | female_similarity | bias_score | |
---|---|---|---|---|
1 | nurse | 0.213399 | 0.270726 | -0.057328 |
0 | doctor | 0.327081 | 0.358610 | -0.031530 |
4 | researcher | 0.318154 | 0.348011 | -0.029857 |
2 | engineer | 0.115748 | 0.140078 | -0.024330 |
3 | teacher | 0.330983 | 0.346765 | -0.015782 |
8 | ceo | 0.174505 | 0.166560 | 0.007945 |
7 | assistant | 0.265940 | 0.255482 | 0.010459 |
9 | manager | 0.211096 | 0.183302 | 0.027794 |
6 | scientist | 0.313247 | 0.279141 | 0.034105 |
5 | executive | 0.189979 | 0.134704 | 0.055274 |
target | male_similarity | female_similarity | bias_score | |
---|---|---|---|---|
1 | nurse | 0.253249 | 0.249289 | 0.003960 |
3 | teacher | 0.280294 | 0.259109 | 0.021185 |
2 | engineer | 0.233610 | 0.200818 | 0.032793 |
5 | secretary | 0.298679 | 0.248574 | 0.050105 |
0 | doctor | 0.250212 | 0.189395 | 0.060817 |
4 | scientist | 0.222944 | 0.160831 | 0.062112 |
6 | manager | 0.243264 | 0.161236 | 0.082027 |
# Define age reference sets
age_references = {
'young': ['young', 'youth', 'teenager', 'student', 'child', 'adolescent', 'inexperienced'],
'old': ['old', 'elderly', 'senior', 'retired', 'experienced', 'veteran', 'mature']
}
# Words to test
target_words = ['innovative', 'energetic', 'creative',
'tech-savvy', 'ambitious']
age_abs = analyze_bias(abstracts_model, target_words, age_references)
age_song = analyze_bias(songs_model, target_words, age_references)
age_abs.sort_values('bias_score')
age_song.sort_values('bias_score')
target | young_similarity | old_similarity | bias_score | |
---|---|---|---|---|
1 | energetic | 0.136747 | 0.177734 | -0.040987 |
3 | ambitious | 0.289747 | 0.244501 | 0.045246 |
0 | innovative | 0.279251 | 0.164020 | 0.115231 |
2 | creative | 0.347807 | 0.227028 | 0.120780 |
target | young_similarity | old_similarity | bias_score | |
---|---|---|---|---|
0 | creative | 0.177518 | 0.141568 | 0.03595 |
# Define socioeconomic reference sets
ses_references = {
'wealthy': ['rich', 'wealthy', 'affluent', 'privileged', 'luxurious'],
'poor': ['poor', 'impoverished', 'underprivileged', 'needy', 'struggling']
}
# Words to test
target_words = ['educated', 'intelligent', 'hardworking', 'violent', 'criminal',
'honest', 'trustworthy', 'ambitious', 'responsible']
ses_abs = analyze_bias(abstracts_model, target_words, ses_references)
ses_song = analyze_bias(songs_model, target_words, ses_references)
ses_abs.sort_values('bias_score')
ses_song.sort_values('bias_score')
target | wealthy_similarity | poor_similarity | bias_score | |
---|---|---|---|---|
6 | ambitious | 0.329815 | 0.437158 | -0.107342 |
2 | violent | 0.298971 | 0.346066 | -0.047095 |
3 | criminal | 0.268299 | 0.308950 | -0.040651 |
4 | honest | 0.222025 | 0.251426 | -0.029401 |
1 | intelligent | 0.237756 | 0.242323 | -0.004567 |
7 | responsible | 0.175503 | 0.146053 | 0.029451 |
0 | educated | 0.523480 | 0.449080 | 0.074400 |
5 | trustworthy | 0.414717 | 0.316322 | 0.098395 |
target | wealthy_similarity | poor_similarity | bias_score | |
---|---|---|---|---|
5 | responsible | 0.136210 | 0.232052 | -0.095841 |
2 | violent | 0.122537 | 0.152473 | -0.029936 |
1 | intelligent | 0.245246 | 0.219476 | 0.025770 |
3 | criminal | 0.228570 | 0.182626 | 0.045945 |
4 | honest | 0.269191 | 0.167533 | 0.101658 |
0 | educated | 0.449457 | 0.252002 | 0.197455 |