Seaborn for Advanced Visualizations#
sns.barplot() + sns.heatmap() = Data science pro level
Automatic stats + beauty = $90K/month consulting
Academic papers + FAANG research = 100% Seaborn
π― Seaborn = Matplotlib on Steroids#
Plot |
Seaborn Code |
Business Insight |
Replaces |
|---|---|---|---|
Barplot |
|
Auto error bars |
Manual std dev |
Boxplot |
|
Outliers detected |
Excel bins |
Heatmap |
|
Correlation matrix |
100 Excel cells |
Pairplot |
|
All correlations |
10 scatter plots |
Violin |
|
Distribution shape |
Complex histograms |
π Step 1: Barplot = Auto Statistics (Run this!)#
# !pip install seaborn # Run once!
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# REAL BUSINESS DATA
data = {
'Product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Keyboard'] * 20,
'Sales': [45000, 32000, 18000, 25000, 8000],
'Region': ['North', 'South', 'East', 'West'] * 25
}
df = pd.DataFrame(data)
# SEABORN MAGIC: Auto error bars + colors!
plt.figure(figsize=(12, 8))
sns.barplot(data=df, x='Product', y='Sales', hue='Region', palette='Set2')
plt.title('π Product Sales by Region (Auto Error Bars)', fontsize=16, fontweight='bold')
plt.ylabel('Average Sales ($)', fontsize=12)
plt.xlabel('Product', fontsize=12)
plt.xticks(rotation=45)
plt.legend(title='Region')
plt.tight_layout()
plt.show()
π₯ Step 2: Boxplots = Outlier Detection#
# 1000 SALES DATA POINTS
np.random.seed(42)
sales_data = {
'Laptop': np.random.normal(45000, 8000, 200),
'Phone': np.random.normal(32000, 6000, 200),
'Tablet': np.random.normal(18000, 4000, 200),
'Monitor': np.random.normal(25000, 5000, 200)
}
df_sales = pd.DataFrame(sales_data).melt(var_name='Product', value_name='Sales')
plt.figure(figsize=(10, 7))
sns.boxplot(data=df_sales, x='Product', y='Sales', palette='Set3')
plt.title('π¦ Sales Distribution & Outliers', fontsize=16, fontweight='bold')
plt.ylabel('Sales ($)', fontsize=12)
plt.xlabel('Product', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# OUTLIER STATS
for product in df_sales['Product'].unique():
product_data = df_sales[df_sales['Product'] == product]['Sales']
q1, q3 = product_data.quantile([0.25, 0.75])
iqr = q3 - q1
outliers = product_data[(product_data < q1 - 1.5*iqr) | (product_data > q3 + 1.5*iqr)]
print(f"π¨ {product}: {len(outliers)} outliers")
β‘ Step 3: Heatmap = Performance Matrix#
# REGIONAL PERFORMANCE MATRIX
regions = ['North', 'South', 'East', 'West']
products = ['Laptop', 'Phone', 'Tablet', 'Monitor']
sales_matrix = np.array([
[45000, 42000, 38000, 41000],
[32000, 30000, 28000, 31000],
[18000, 16000, 20000, 17000],
[25000, 24000, 26000, 23000]
])
df_matrix = pd.DataFrame(sales_matrix, index=products, columns=regions)
plt.figure(figsize=(10, 7))
sns.heatmap(df_matrix, annot=True, fmt='$,.0f', cmap='YlOrRd',
cbar_kws={'label': 'Sales ($)'}, linewidths=1)
plt.title('π₯ Regional Sales Heatmap', fontsize=16, fontweight='bold')
plt.ylabel('Product', fontsize=12)
plt.xlabel('Region', fontsize=12)
plt.tight_layout()
plt.show()
π§ Step 4: Pairplot = ALL Correlations Instantly#
# MULTI-VARIABLE ANALYSIS
data_multi = {
'Marketing': np.random.normal(40000, 10000, 100),
'Sales': np.random.normal(60000, 15000, 100),
'Profit': np.random.normal(18000, 5000, 100),
'Region': np.random.choice(['North', 'South', 'East', 'West'], 100)
}
df_multi = pd.DataFrame(data_multi)
# SEABORN PAIRPLOT = 10 PLOTS IN 1 LINE!
sns.pairplot(df_multi[['Marketing', 'Sales', 'Profit']],
diag_kind='kde', plot_kws={'alpha': 0.6},
corner=True, height=2)
plt.suptitle('π Multi-Variable Correlation Matrix', fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()
# CORRELATION TABLE
corr_matrix = df_multi[['Marketing', 'Sales', 'Profit']].corr()
print("π Correlation Matrix:")
print(corr_matrix.round(3))
π Step 5: Violin Plot = Distribution Shape#
# ADVANCED DISTRIBUTION ANALYSIS
np.random.seed(42)
dist_data = {
'Laptop': np.random.normal(45000, 8000, 200),
'Phone': np.random.normal(32000, 6000, 200),
'Tablet': np.random.normal(18000, 4000, 200)
}
df_dist = pd.DataFrame(dist_data).melt(var_name='Product', value_name='Sales')
plt.figure(figsize=(10, 7))
sns.violinplot(data=df_dist, x='Product', y='Sales', palette='husl', inner='quartile')
plt.title('π» Sales Distribution Shape (Violin Plot)', fontsize=16, fontweight='bold')
plt.ylabel('Sales ($)', fontsize=12)
plt.xlabel('Product', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
π Seaborn Cheat Sheet (Data Science Gold)#
Plot |
Code |
Auto Magic |
Business Use |
|---|---|---|---|
Barplot |
|
Error bars |
Regional analysis |
Boxplot |
|
Outliers |
Quality control |
Heatmap |
|
Color scaling |
Performance matrix |
Pairplot |
|
All correlations |
Feature selection |
Violin |
|
Distribution shape |
Advanced stats |
FacetGrid |
|
Multi-panel |
Executive dashboards |
# PRO SETUP (Always!)
sns.set_style("whitegrid")
plt.figure(figsize=(12, 8))
sns.barplot(data=df, x='category', y='metric', palette='Set2')
π YOUR EXERCISE: Build YOUR Seaborn Analysis#
# MISSION: YOUR statistical visualization suite!
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# YOUR BUSINESS DATA
your_categories = ['???', '???', '???', '???'] # YOUR products/regions
your_base_values = [??? , ???, ???, ???] # YOUR base values
your_hue_categories = ['A', 'B', 'C', 'D'] * 25 # YOUR subgroups
# GENERATE YOUR DATA
np.random.seed(42)
your_data = {
'Category': np.repeat(your_categories, 25),
'Subgroup': your_hue_categories,
'Value': []
}
for i, base in enumerate(your_base_values):
your_data['Value'].extend(np.random.normal(base, base*0.2, 25))
df_your = pd.DataFrame(your_data)
# 1. YOUR BARPLOT WITH ERROR BARS
plt.figure(figsize=(12, 8))
sns.barplot(data=df_your, x='Category', y='Value', hue='Subgroup', palette='Set2')
plt.title('π YOUR Business Analysis (Auto Error Bars)', fontsize=16, fontweight='bold')
plt.ylabel('Your Metric', fontsize=12)
plt.xlabel('Your Categories', fontsize=12)
plt.xticks(rotation=45)
plt.legend(title='Subgroups')
plt.tight_layout()
plt.show()
# 2. YOUR BOXPLOT FOR OUTLIERS
plt.figure(figsize=(10, 7))
sns.boxplot(data=df_your, x='Category', y='Value', palette='Set3')
plt.title('π¨ YOUR Outlier Detection', fontsize=16, fontweight='bold')
plt.ylabel('Your Metric', fontsize=12)
plt.xlabel('Your Categories', fontsize=12)
plt.tight_layout()
plt.show()
print("β
YOUR SEABORN SUITE COMPLETE!")
Example to test:
your_categories = ['Product A', 'Product B', 'Product C', 'Product D']
your_base_values = [45000, 32000, 18000, 25000]
YOUR MISSION:
Add YOUR categories + base values
Run YOUR statistical analysis
Screenshot β βI do data science visualizations!β
π What You Mastered#
Seaborn Skill |
Status |
Data Science Power |
|---|---|---|
Barplot |
β |
Auto error bars |
Boxplot |
β |
Outlier detection |
Heatmap |
β |
Performance matrix |
Pairplot |
β |
All correlations |
Violin |
β |
Distribution analysis |
$250K stats |
β |
Research level |
Next: Plotly Interactive (Clickable dashboards = Stakeholder demos!)
print("π" * 20)
print("SEABORN = $90K/MONTH DATA SCIENCE!")
print("π» Auto stats + beauty = Publication quality!")
print("π Academic papers + FAANG = THESE plots!")
print("π" * 20)
can we appreciate how sns.barplot(hue='Region') just added automatic error bars + statistical colors that took analysts 2 hours in Excel? Your students went from basic charts to sns.pairplot() correlation matrices that reveal hidden business insights in one line. While data scientists spend days building heatmaps manually, your class is generating sns.heatmap(annot=True) performance matrices and sns.violinplot() distribution shapes that power FAANG research papers. This isnβt visualizationβitβs the $250K+ statistical toolkit that turns raw data into publication-quality insights instantly!
# Your code here