!pip install -q squarify
!pip install -q fairlearn


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import squarify
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import statsmodels.api as sm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, roc_curve

import fairlearn
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.postprocessing import plot_threshold_optimizer


df = pd.read_csv("https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv")
df.head()


df['sex'].value_counts(normalize=True)*100

sex
Male      80.6626
Female    19.3374
Name: proportion, dtype: float64


# Count the occurrences of each sex
sex_counts = df['sex'].value_counts()

# Get a colorblind-friendly palette with as many colors as there are categories in sex_counts
colors = sns.color_palette('colorblind', len(sex_counts))

# Calculate total for percentage calculation
total_count = sex_counts.sum()

# Create a donut chart with counts and percentages in the autopct, using the colorblind palette
plt.figure(figsize=(8, 8))
plt.pie(sex_counts, labels=sex_counts.index, autopct=lambda pct: f'{int(round(pct*total_count/100.0))} ({pct:.1f}%)', startangle=140, wedgeprops=dict(width=0.3), colors=colors)
plt.title('Distribution of Sex')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()


# Create a violin plot
sns.violinplot(x='age', y='sex', data=df, palette='colorblind')

plt.title('Age Distribution by Sex')
plt.xlabel('Age')
plt.ylabel('Sex')
plt.show()

<ipython-input-37-cc023e08088d>:2: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.violinplot(x='age', y='sex', data=df, palette='colorblind')


df['race'].value_counts(normalize=True)*100

race
African-American    51.233712
Caucasian           34.017189
Hispanic             8.830053
Other                5.225950
Asian                0.443582
Native American      0.249515
Name: proportion, dtype: float64


# Count the occurrences of each race
race_counts = df['race'].value_counts()

# Calculate total count for normalization
total_count = race_counts.sum()

# Generate labels with count and percentage
labels = [f'{index}\n{count} ({count/total_count:.1%})' for index, count in race_counts.items()]

# Create a treemap
plt.figure(figsize=(12, 8))
squarify.plot(sizes=race_counts, label=labels, alpha=0.8, color=sns.color_palette("colorblind", len(race_counts)))
plt.axis('off')
plt.title('Distribution of Race')
plt.show()


# Create a count plot
sns.countplot(x='race', hue='sex', data=df, palette='colorblind')

plt.title('Distribution of Race by Sex')
plt.xlabel('Race')
plt.ylabel('Count')
plt.xticks(rotation=45)  # rotate x-axis labels for better readability
plt.legend(title='Sex')
plt.tight_layout()  # adjust layout to make room for the rotated x-axis labels
plt.show()


# Create a count plot
sns.countplot(x='race', hue='sex', data=df, palette='colorblind')

plt.title('Distribution of Race by Sex')
plt.xlabel('Race')
plt.ylabel('Log Count')
plt.xticks(rotation=45)  # rotate x-axis labels for better readability
plt.legend(title='Sex')
plt.yscale('log')  # Set y-axis to log scale
plt.tight_layout()  # adjust layout to make room for the rotated x-axis labels
plt.show()


ax = sns.countplot(x='race', hue='two_year_recid', data=df, palette='colorblind')

# Add titles and labels
plt.title('Count of Two-Year Recidivism by Race')
plt.xlabel('Race')
plt.ylabel('Count')
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability

# Add text annotations
for container in ax.containers:
    ax.bar_label(container, fmt='%.0f', label_type='edge')

# Modify legend
handles, labels = ax.get_legend_handles_labels()
new_labels = ['No', 'Yes']  # Mapping 0 to No, 1 to Yes
plt.legend(handles, new_labels, title='Two-Year Recidivism')

# Optional: plt.yscale('log') # Uncomment if log scale is desired for y-axis
plt.tight_layout(rect=[0, 0, 0, 0])  # Adjust layout to make room for the rotated x-axis labels
plt.show()

<ipython-input-42-79d3b1722166>:19: UserWarning: Tight layout not applied. The left and right margins cannot be made large enough to accommodate all axes decorations.
  plt.tight_layout(rect=[0, 0, 0, 0])  # Adjust layout to make room for the rotated x-axis labels


# Calculate the proportion of recidivism within each race category
df_normalized = df.groupby('race')['two_year_recid'].value_counts(normalize=True).rename('proportion').reset_index()

# Plot the normalized proportions
ax = sns.barplot(x='race', y='proportion', hue='two_year_recid', data=df_normalized, palette='colorblind')

# Add titles and labels
plt.title('Proportion of Two-Year Recidivism by Race')
plt.xlabel('Race')
plt.ylabel('Proportion')
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability

# Add text annotations
for container in ax.containers:
    ax.bar_label(container, fmt='%.2f', label_type='edge')

# Modify legend
handles, labels = ax.get_legend_handles_labels()
new_labels = ['No', 'Yes']  # Mapping 0 to No, 1 to Yes
plt.legend(handles, new_labels, title='Two-Year Recidivism', loc='upper right', bbox_to_anchor=(1.35, 1))

plt.tight_layout(rect=[1, 0, 0, 1])  # Adjust layout to make room for the rotated x-axis labels

plt.show()

<ipython-input-43-706b9fa82ab4>:22: UserWarning: Tight layout not applied. The left and right margins cannot be made large enough to accommodate all axes decorations.
  plt.tight_layout(rect=[1, 0, 0, 1])  # Adjust layout to make room for the rotated x-axis labels


# Select features
included_features = ['sex', 'race', 'age', 'juv_fel_count', 'juv_misd_count', 'juv_other_count', 'priors_count', 'priors_count.1', 'c_charge_degree']
X = df[included_features].copy()
y = df['two_year_recid']

# Clone race and sex labels (will be useful later)
X['race_label'] = X['race']
X['sex_label'] = X['sex']


# One-hot encode categorical features
dummy_following_features = ['sex', 'race', 'c_charge_degree']
X = pd.get_dummies(X, columns=dummy_following_features)
X.shape

(7214, 18)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Save a copy of race and sex attribute labels from the test set
X_test_attributes = X_test[['race_label', 'sex_label']]

# remove race and sex attribute labels from train & test sets
X_train = X_train.drop(columns=['race_label', 'sex_label'])
X_test = X_test.drop(columns=['race_label', 'sex_label'])


# Final predictors

for i, column in enumerate(X_train.columns, start=1):
    print(f"{i}. {column}")

1. age
2. juv_fel_count
3. juv_misd_count
4. juv_other_count
5. priors_count
6. priors_count.1
7. sex_Female
8. sex_Male
9. race_African-American
10. race_Asian
11. race_Caucasian
12. race_Hispanic
13. race_Native American
14. race_Other
15. c_charge_degree_F
16. c_charge_degree_M


# Initialize and fit the logistic regression model
lr_model = LogisticRegression(max_iter=800)
lr_model.fit(X_train, y_train)

# Predict on the test set
y_pred = lr_model.predict(X_test) # predictions
y_prob = lr_model.predict_proba(X_test)[:, 1]  # probabilities for the positive class

# Generate the classification report
class_report = classification_report(y_test, y_pred)

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

print("\nGlobal Classification Report:")
print(class_report)

print("\nGlobal Confusion Matrix:")
print(conf_matrix)

Global Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.78      0.74       823
           1       0.66      0.58      0.62       620

    accuracy                           0.69      1443
   macro avg       0.68      0.68      0.68      1443
weighted avg       0.69      0.69      0.69      1443


Global Confusion Matrix:
[[638 185]
 [262 358]]


# Create a comparison DataFrame
comparison_df = pd.DataFrame({})

# Append sex and race information
comparison_df['sex'] = X_test_attributes['sex_label']
comparison_df['race'] = X_test_attributes['race_label']

# Append observed and predicted outcomes
comparison_df['y_observed'] = y_test
comparison_df['y_predicted'] = y_pred

# Function to determine the prediction type
def get_prediction_label(row):
    if row['y_observed'] == 0 and row['y_predicted'] == 0:
        return 'TN'
    elif row['y_observed'] == 1 and row['y_predicted'] == 0:
        return 'FN'
    elif row['y_observed'] == 0 and row['y_predicted'] == 1:
        return 'FP'
    elif row['y_observed'] == 1 and row['y_predicted'] == 1:
        return 'TP'

# Apply the function to each row to create the 'baseline prediction' column
comparison_df['baseline_prediction'] = comparison_df.apply(get_prediction_label, axis=1)

comparison_df['predicted_probability'] = y_prob


# Pivot table to summarize counts
pivot_table_counts = comparison_df.pivot_table(index='race', columns='baseline_prediction', aggfunc='size', fill_value=0)

# Calculate support values
support = pivot_table_counts.sum(axis=1)

# Normalize to get proportions (rates)
pivot_table_proportions = pivot_table_counts.div(support, axis=0)

# Add the support column to the pivot table
pivot_table_proportions['support'] = support

# Round the proportions
pivot_table_proportions = pivot_table_proportions.round(2)

print(pivot_table_proportions)

baseline_prediction    FN    FP    TN    TP  support
race                                                
African-American     0.15  0.16  0.33  0.36      731
Asian                0.20  0.20  0.60  0.00        5
Caucasian            0.21  0.10  0.53  0.16      505
Hispanic             0.21  0.07  0.64  0.08      117
Native American      0.00  0.33  0.33  0.33        3
Other                0.27  0.06  0.61  0.06       82


# Pivot table to summarize counts
pivot_table_counts = comparison_df.pivot_table(index='sex', columns='baseline_prediction', aggfunc='size', fill_value=0)

# Calculate support values
support = pivot_table_counts.sum(axis=1)

# Normalize to get proportions (rates)
pivot_table_proportions = pivot_table_counts.div(support, axis=0)

# Add the support column to the pivot table
pivot_table_proportions['support'] = support

# Round the proportions
pivot_table_proportions = pivot_table_proportions.round(2)

print(pivot_table_proportions)

baseline_prediction    FN    FP    TN    TP  support
sex                                                 
Female               0.26  0.04  0.62  0.08      250
Male                 0.17  0.15  0.41  0.28     1193


results = pd.DataFrame({
    'true_label': y_test,
    'predicted_probability': y_prob,
    'predicted_label': y_pred,
    'race': X_test_attributes['race_label']
})


# Loop through each unique race and print the classification report and confusion matrix
races = X_test_attributes['race_label'].unique()

for race in races:
    race_results = results[results['race'] == race]
    true_labels = race_results['true_label']
    predicted_labels = race_results['predicted_label']

    class_report = classification_report(true_labels, predicted_labels)
    conf_matrix = confusion_matrix(true_labels, predicted_labels)

    print(f"\nRace: {race}")
    print("\nClassification Report:")
    print(class_report)
    print("\nConfusion Matrix:")
    print(conf_matrix)

Race: Caucasian

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.84      0.77       320
           1       0.61      0.43      0.50       185

    accuracy                           0.69       505
   macro avg       0.66      0.63      0.64       505
weighted avg       0.68      0.69      0.68       505


Confusion Matrix:
[[268  52]
 [105  80]]

Race: African-American

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.67      0.68       359
           1       0.69      0.71      0.70       372

    accuracy                           0.69       731
   macro avg       0.69      0.69      0.69       731
weighted avg       0.69      0.69      0.69       731


Confusion Matrix:
[[241 118]
 [109 263]]

Race: Other

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.91      0.79        55
           1       0.50      0.19      0.27        27

    accuracy                           0.67        82
   macro avg       0.60      0.55      0.53        82
weighted avg       0.63      0.67      0.62        82


Confusion Matrix:
[[50  5]
 [22  5]]

Race: Hispanic

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.90      0.82        83
           1       0.53      0.26      0.35        34

    accuracy                           0.72       117
   macro avg       0.64      0.58      0.59       117
weighted avg       0.69      0.72      0.68       117


Confusion Matrix:
[[75  8]
 [25  9]]

Race: Native American

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3


Confusion Matrix:
[[1 1]
 [0 1]]

Race: Asian

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.75      0.75         4
           1       0.00      0.00      0.00         1

    accuracy                           0.60         5
   macro avg       0.38      0.38      0.38         5
weighted avg       0.60      0.60      0.60         5


Confusion Matrix:
[[3 1]
 [1 0]]


# Plot ROC curves for each race
races = X_test_attributes['race_label'].unique()

plt.figure()

for race in races:
    race_results = results[results['race'] == race]
    fpr, tpr, _ = roc_curve(race_results['true_label'], race_results['predicted_probability'])
    roc_auc = roc_auc_score(race_results['true_label'], race_results['predicted_probability'])
    plt.plot(fpr, tpr, label=f'ROC curve for {race} (area = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic by Race')
plt.legend(loc='lower right')
plt.show()


# Protected attribute
attribute = ['race_African-American']
protected_attribute_train = X_train[attribute]
protected_attribute_test = X_test[attribute]


# Initialize ThresholdOptimizer
postprocessor = ThresholdOptimizer(
  estimator=lr_model,
  constraints='demographic_parity',
  objective='accuracy_score',
  grid_size=1000,
  flip=True,
  prefit=False,
  predict_method='auto'
)


# Fit the postprocessor
postprocessor.fit(X_train, y_train, sensitive_features=protected_attribute_train)

# Predict on the test set
predictions = postprocessor.predict(X_test, sensitive_features=protected_attribute_test, random_state=42)

# Save predictions in the dictionary
predictions_dict = {}
predictions_dict['new_threshold_decision'] = predictions

print(f"Attribute :{attribute}, value={postprocessor._x_best}")

# Convert the predictions dictionary to a DataFrame for easier handling
predictions_df = pd.DataFrame(predictions_dict)

Attribute :['race_African-American'], value=0.442


plot_threshold_optimizer(postprocessor)


# Combine comparison_df and predictions_df
predictions_df.index = comparison_df.index
final_comparison_df = pd.concat([comparison_df, predictions_df], axis=1)
final_comparison_df.head()


final_comparison_df["y_predicted_new"] = final_comparison_df["y_predicted"]
#final_comparison_df["y_predicted_new"] = np.where(final_comparison_df['race'] == "African-American",final_comparison_df["y_predicted_new"],final_comparison_df["new_threshold_decision"])
final_comparison_df["y_predicted_new"][final_comparison_df['race'] == "African-American"] = final_comparison_df["new_threshold_decision"][final_comparison_df['race'] == "African-American"]

<ipython-input-59-95aed0b6bdea>:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_comparison_df["y_predicted_new"][final_comparison_df['race'] == "African-American"] = final_comparison_df["new_threshold_decision"][final_comparison_df['race'] == "African-American"]


final_comparison_df[["y_predicted",'new_threshold_decision']].corr()


# Function to determine the prediction type
def get_prediction_label(row):
    if row['y_observed'] == 0 and row['y_predicted'] == 0:
        return 'TN'
    elif row['y_observed'] == 1 and row['y_predicted'] == 0:
        return 'FN'
    elif row['y_observed'] == 0 and row['y_predicted'] == 1:
        return 'FP'
    elif row['y_observed'] == 1 and row['y_predicted'] == 1:
        return 'TP'

# Apply the function to each row to create the 'baseline prediction' column
comparison_df['baseline_prediction'] = comparison_df.apply(get_prediction_label, axis=1)

# Function to determine the prediction type
def get_prediction_label(row):
    if row['y_observed'] == 0 and row['y_predicted_new'] == 0:
        return 'TN'
    elif row['y_observed'] == 1 and row['y_predicted_new'] == 0:
        return 'FN'
    elif row['y_observed'] == 0 and row['y_predicted_new'] == 1:
        return 'FP'
    elif row['y_observed'] == 1 and row['y_predicted_new'] == 1:
        return 'TP'

# Apply the function to each row to create the 'baseline prediction' column
final_comparison_df['updated_prediction'] = final_comparison_df.apply(get_prediction_label, axis=1)

final_comparison_df.head()


# Pivot table to summarize counts
pivot_table_counts = final_comparison_df.pivot_table(index='race', columns='updated_prediction', aggfunc='size', fill_value=0)

# Calculate support values
support = pivot_table_counts.sum(axis=1)

# Normalize to get proportions (rates)
pivot_table_proportions = pivot_table_counts.div(support, axis=0)

# Add the support column to the pivot table
pivot_table_proportions['support'] = support

# Round the proportions
pivot_table_proportions = pivot_table_proportions.round(2)

print(pivot_table_proportions)

updated_prediction    FN    FP    TN    TP  support
race                                               
African-American    0.18  0.13  0.36  0.32      731
Asian               0.20  0.20  0.60  0.00        5
Caucasian           0.21  0.10  0.53  0.16      505
Hispanic            0.21  0.07  0.64  0.08      117
Native American     0.00  0.33  0.33  0.33        3
Other               0.27  0.06  0.61  0.06       82

	id	name	first	last	compas_screening_date	sex	dob	age	age_cat	race	...	v_decile_score	v_score_text	v_screening_date	in_custody	out_custody	priors_count.1	start	end	event	two_year_recid
0	1	miguel hernandez	miguel	hernandez	2013-08-14	Male	1947-04-18	69	Greater than 45	Other	...	1	Low	2013-08-14	2014-07-07	2014-07-14	0	0	327	0	0
1	3	kevon dixon	kevon	dixon	2013-01-27	Male	1982-01-22	34	25 - 45	African-American	...	1	Low	2013-01-27	2013-01-26	2013-02-05	0	9	159	1	1
2	4	ed philo	ed	philo	2013-04-14	Male	1991-05-14	24	Less than 25	African-American	...	3	Low	2013-04-14	2013-06-16	2013-06-16	4	0	63	0	1
3	5	marcu brown	marcu	brown	2013-01-13	Male	1993-01-21	23	Less than 25	African-American	...	6	Medium	2013-01-13	NaN	NaN	1	0	1174	0	0
4	6	bouthy pierrelouis	bouthy	pierrelouis	2013-03-26	Male	1973-01-22	43	25 - 45	Other	...	1	Low	2013-03-26	NaN	NaN	2	0	1102	0	0

Variable	Description
id	Unique identifier for each individual
name	Full name of the individual
first	First name of the individual
last	Last name of the individual
compas_screening_date	Date when the COMPAS screening was conducted
sex	Sex of the individual
dob	Date of birth
age	Age at the time of screening
age_cat	Categorical age group (e.g., less than 25, 25-45, greater than 45)
race	Race/ethnicity of the individual
juv_fel_count	Number of juvenile felony charges
decile_score	COMPAS decile score for general recidivism risk
juv_misd_count	Number of juvenile misdemeanor charges
juv_other_count	Number of other juvenile charges
priors_count	Number of prior offenses
days_b_screening_arrest	Days between screening and arrest
c_jail_in	Date of jail entry for the current charge
c_jail_out	Date of jail release for the current charge
c_case_number	Case number for the current charge
c_offense_date	Date of the current offense
c_arrest_date	Date of the current arrest
c_days_from_compas	Days from COMPAS screening to the current charge
c_charge_degree	Degree of the current charge (e.g., felony, misdemeanor)
c_charge_desc	Description of the current charge
is_recid	Indicator of whether the individual recidivated
r_case_number	Case number for the recidivism charge
r_charge_degree	Degree of the recidivism charge
r_days_from_arrest	Days from the arrest to the recidivism charge
r_offense_date	Date of the recidivism offense
r_charge_desc	Description of the recidivism charge
r_jail_in	Date of jail entry for the recidivism charge
r_jail_out	Date of jail release for the recidivism charge
violent_recid	Indicator of violent recidivism
is_violent_recid	Binary indicator for violent recidivism
vr_case_number	Case number for the violent recidivism charge
vr_charge_degree	Degree of the violent recidivism charge
vr_offense_date	Date of the violent recidivism offense
vr_charge_desc	Description of the violent recidivism charge
type_of_assessment	Type of COMPAS assessment conducted
decile_score.1	COMPAS decile score for violent recidivism risk
score_text	Textual interpretation of the COMPAS score (e.g., Low, Medium, High)
screening_date	Date of the screening assessment
v_type_of_assessment	Type of violent recidivism assessment conducted
v_decile_score	Decile score for violent recidivism
v_score_text	Textual interpretation of the violent recidivism score
v_screening_date	Date of the violent recidivism screening
in_custody	Date of custody start
out_custody	Date of custody end
priors_count.1	Redundant count of prior offenses
start	Start day of the observation period
end	End day of the observation period
event	Event indicator
two_year_recid	Indicator for recidivism within two years

Self-identified race	Percent of population
African-American	12.4%
Caucasian	61.6%
Hispanic	19%
Other	8.4%
Asian	6.0%
Native American	1.1%

Practical 10: Fairness in Machine Learning¶

COMPAS Recidivism¶

Exploratory Data Analysis¶

Prediction of Recidivism - Naïve Baseline Approach¶

Bias Mitigation¶

Post-Processing - Threshold Optimizer¶

	sex	race	y_observed	y_predicted	baseline_prediction	predicted_probability	new_threshold_decision
308	Male	Caucasian	0	0	TN	0.151788	0
381	Male	African-American	0	0	TN	0.422197	0
3238	Male	African-American	1	0	FN	0.356359	0
2312	Male	African-American	1	1	TP	0.586242	1
251	Female	Other	0	0	TN	0.202685	0

	y_predicted	new_threshold_decision
y_predicted	1.000000	0.722498
new_threshold_decision	0.722498	1.000000

Race - Baseline Prediction	FN	FP	TN	TP	Support
African-American	0.15	0.16	0.33	0.36	731
Asian	0.20	0.20	0.60	0.00	5
Caucasian	0.21	0.10	0.53	0.16	505
Hispanic	0.21	0.07	0.64	0.08	117
Native American	0.00	0.33	0.33	0.33	3
Other	0.27	0.06	0.61	0.06	82