!pip install hmmlearn
!pip install plotly

Collecting hmmlearn
  Downloading hmmlearn-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.9 kB)
Requirement already satisfied: numpy>=1.10 in /usr/local/lib/python3.10/dist-packages (from hmmlearn) (1.25.2)
Requirement already satisfied: scikit-learn!=0.22.0,>=0.16 in /usr/local/lib/python3.10/dist-packages (from hmmlearn) (1.2.2)
Requirement already satisfied: scipy>=0.19 in /usr/local/lib/python3.10/dist-packages (from hmmlearn) (1.11.4)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn!=0.22.0,>=0.16->hmmlearn) (1.4.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn!=0.22.0,>=0.16->hmmlearn) (3.5.0)
Downloading hmmlearn-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.1/161.1 kB 2.0 MB/s eta 0:00:00
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.3.2
Requirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (5.15.0)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly) (8.5.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from plotly) (24.1)


# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import hmmlearn
import plotly.graph_objects as go
from hmmlearn import hmm
from scipy.stats import norm


emotion_data = pd.read_csv("emotion_data.csv")
emotion_data.head()


emotion_data.describe()


# Count of NaN values in each column
NaN_counts = emotion_data.isnull().sum()
NaN_counts

subj_id         0
dayno           0
beep            0
group           0
happy        8430
excited      8430
relaxed      8430
satisfied    8430
angry        8430
anxious      8430
depressed    8430
sad          8430
dtype: int64


# Remove rows with any NaN values
emotion_data_clean = emotion_data.fillna(emotion_data.mean())


# Creating a new column to represent the beep normalized over days
emotion_data_clean.loc[:, 'beep2'] = (emotion_data['dayno'] - 1) * 6 + emotion_data['beep']

# Reshaping the data for visualization: melt the DataFrame to long format
emo_long = emotion_data_clean.melt(id_vars=['subj_id', 'beep2'],
                                   value_vars=['happy', 'excited', 'relaxed', 'angry', 'depressed'],
                                   var_name='emotion', value_name='outcome')


# Filter for specific subjects to avoid a crowded plot (e.g., first 4 subjects)
emo_filtered = emo_long[emo_long['subj_id'].isin([1, 2, 3, 4])]

# Set up the FacetGrid in Seaborn
g = sns.FacetGrid(emo_filtered, col='subj_id', col_wrap=1, height=3, aspect=3)
g.map_dataframe(sns.lineplot, x='beep2', y='outcome', hue='emotion', estimator=None)

# Enhancing the plot
g.add_legend()
g.set_titles("Subject {col_name}")
g.set_axis_labels("Beep Time Point", "Emotional Outcome")
plt.show()


# Create the emotion_mHMM dataset
emotion_mHMM = emotion_data_clean[['subj_id', 'happy', 'excited', 'relaxed', 'angry', 'depressed']]

# Inspect the first few rows
print(emotion_mHMM.head())

   subj_id     happy    excited    relaxed      angry  depressed
0        1  79.00000  64.000000  30.000000  19.000000   8.000000
1        1  59.33338  40.937089  54.761196  13.508067  20.590264
2        1  20.00000  24.000000  37.000000  62.000000  73.000000
3        1  25.00000  24.000000  33.000000  42.000000  77.000000
4        1  37.00000  24.000000  40.000000  35.000000  12.000000


# Define the number of states and dependent variables
m = 2
n_dep = 5

# Define starting values for the transition matrix (gamma)
start_gamma = np.array([[0.7, 0.3], [0.3, 0.7]])

# Define starting values for the emission distribution
start_emiss = [
    np.array([[70, 15], [30, 15]]),  # happy
    np.array([[70, 15], [30, 15]]),  # excited
    np.array([[70, 15], [30, 15]]),  # relaxed
    np.array([[15, 10], [40, 15]]),  # angry
    np.array([[15, 10], [40, 15]])   # sad
]


# Convert start_emiss to the format required by hmmlearn
start_means = np.array([emiss[:, 0] for emiss in start_emiss]).T
start_covars = np.array([emiss[:, 1] for emiss in start_emiss]).T ** 2


# Prepare the data for HMM excluding the subject ID
observations = emotion_mHMM.drop(columns=['subj_id']).values
# Ensure the shape of observations
print(f"Shape of observations: {observations.shape}")  # Should be (n_samples, n_dep)

Shape of observations: (30000, 5)


# Initialize the HMM model without initializing the parameters ('stmc')
model = hmm.GaussianHMM(n_components=m, n_iter=500, init_params='')

# Set initial parameters
model.startprob_ = np.array([0.5, 0.5])  # Example initial state probabilities
model.transmat_ = start_gamma
model.means_ = start_means
model.covars_ = start_covars


# Fit the HMM model
model.fit(observations)

# Save the fitted model
out_2st_emotion = model

print("Model fitting completed.")

Model fitting completed.


# Number of subjects
num_subjects = emotion_data_clean['subj_id'].nunique()
avg_log_likelihood = model.score(observations)
print(f"Number of subjects: {num_subjects}")
print(f"Average Log likelihood over all subjects: {avg_log_likelihood}")

Number of subjects: 125
Average Log likelihood over all subjects: -637693.4192899191


num_states = model.n_components
num_dependent_vars = observations.shape[1]
print(f"Number of states used: {num_states}")
print(f"Number of dependent variables used: {num_dependent_vars}")

Number of states used: 2
Number of dependent variables used: 5


transition_matrix = model.transmat_
print("\nState transition probability matrix:")
print(transition_matrix)

means = model.means_
covariances = np.sqrt(np.diagonal(model.covars_, axis1=1, axis2=2))  # Extract standard deviations

print("\nEmission distribution (continuous) for each of the dependent variables:")
emotions = ['happy', 'excited', 'relaxed', 'angry', 'depressed']
for i, emotion in enumerate(emotions):
    print(f"\n${emotion.capitalize()}")
    print(f"State 1 Mean: {means[0][i]:.3f}, SD: {covariances[0][i]:.3f}")
    print(f"State 2 Mean: {means[1][i]:.3f}, SD: {covariances[1][i]:.3f}")

State transition probability matrix:
[[0.85621468 0.14378532]
 [0.38117036 0.61882964]]

Emission distribution (continuous) for each of the dependent variables:

$Happy
State 1 Mean: 66.487, SD: 14.231
State 2 Mean: 40.370, SD: 22.513

$Excited
State 1 Mean: 46.179, SD: 23.239
State 2 Mean: 27.040, SD: 21.001

$Relaxed
State 1 Mean: 61.398, SD: 17.772
State 2 Mean: 37.165, SD: 23.263

$Angry
State 1 Mean: 8.085, SD: 7.099
State 2 Mean: 27.886, SD: 27.061

$Depressed
State 1 Mean: 12.970, SD: 10.159
State 2 Mean: 40.793, SD: 27.446


# Save transition probabilities
gamma = model.transmat_

# Inspect the  transition probabilities
print("Transition matrix (gamma):")
print(gamma)

Transition matrix (gamma):
[[0.85621468 0.14378532]
 [0.38117036 0.61882964]]


# Save emission distribution parameters
means = model.means_
covariances = np.sqrt(np.diagonal(model.covars_, axis1=1, axis2=2))  # Extract standard deviations

emiss= {emotion: {'State 1 Mean': means[0][i], 'State 1 SD': covariances[0][i],
                         'State 2 Mean': means[1][i], 'State 2 SD': covariances[1][i]}
               for i, emotion in enumerate(['happy', 'excited', 'relaxed', 'angry', 'depressed'])}

# Inspect the emission distribution parameters
print("\nEmission distributions:")
for emotion, stats in emiss.items():
    print(f"\n${emotion.capitalize()}")
    for state, values in stats.items():
        print(f"{state}: {values:.3f}")

Emission distributions:

$Happy
State 1 Mean: 66.487
State 1 SD: 14.231
State 2 Mean: 40.370
State 2 SD: 22.513

$Excited
State 1 Mean: 46.179
State 1 SD: 23.239
State 2 Mean: 27.040
State 2 SD: 21.001

$Relaxed
State 1 Mean: 61.398
State 1 SD: 17.772
State 2 Mean: 37.165
State 2 SD: 23.263

$Angry
State 1 Mean: 8.085
State 1 SD: 7.099
State 2 Mean: 27.886
State 2 SD: 27.061

$Depressed
State 1 Mean: 12.970
State 1 SD: 10.159
State 2 Mean: 40.793
State 2 SD: 27.446


# Prepare the data for the Sankey diagram
states = ['State 2', 'State 1']
num_states = len(states)

labels = states + states  # From and To states will have the same labels
source = np.repeat(np.arange(num_states), num_states)
target = np.tile(np.arange(num_states), num_states)
values = gamma.flatten()

# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        color=['white', 'white','white', 'white'] #Custom colors for nodes
    ),
    link=dict(
        source=source,
        target=target + num_states,  # Target states indices should start after source states
        value=values,
        color=['lightblue','lightblue','tomato','tomato']
    ))])

# Set plot title and layout
fig.update_layout(title_text="Transition Probabilities", font_size=14, title_x=0.5, width=700, height=500)

fig.show()


# Visualize the transition probabilities
plt.figure(figsize=(6, 4))
sns.heatmap(gamma, annot=True, cmap='viridis', cbar=True, xticklabels=['To state 1', 'To state 2'], yticklabels=['From state 1', 'From state 2'])
plt.title('State Transition Probability Matrix')
plt.xlabel('To State')
plt.ylabel('From State')
plt.show()


# Prepare the data for visualization
emotions = ['happy', 'excited', 'relaxed', 'angry', 'depressed']
emiss_melt = pd.DataFrame({
    'State': np.repeat([1, 2], len(emotions)),
    'Mean': [emiss[emotion]['State 1 Mean'] for emotion in emotions] + [emiss[emotion]['State 2 Mean'] for emotion in emotions],
    'Dep': np.tile(emotions, 2)
})

# Visualize the emission distributions using bar plot
plt.figure(figsize=(10, 6))
sns.barplot(x='Dep', y='Mean', hue='State', data=emiss_melt, palette='Set3')
plt.title('Emission Distributions by State and Dependent Variable')
plt.xlabel('Mood State')
plt.ylabel('Mean Emission')
plt.legend(title='State')
plt.show()


# Prepare the data for density plot
len_grid = 200
grid = np.linspace(1, 100, len_grid)

# Ensure we only use emotions present in
emotions = [emotion for emotion in ['happy', 'excited', 'relaxed', 'angry', 'anxious', 'depressed', 'sad'] if emotion in emiss]

emiss_dens = pd.DataFrame({
    'State': np.repeat(np.repeat([1, 2], len_grid), len(emotions)),
    'Dep': np.tile(np.repeat(emotions, len_grid), 2),
    'X': np.tile(grid, len(emotions) * 2),
    'Prob': np.nan
})

for emotion in emotions:
    for i in range(1, 3):
        mask = (emiss_dens['Dep'] == emotion) & (emiss_dens['State'] == i)
        emiss_dens.loc[mask, 'Prob'] = norm.pdf(grid, loc=emiss[emotion][f'State {i} Mean'], scale=emiss[emotion][f'State {i} SD'])

emiss_dens.head()


# Visualize the emission distributions using density plot
g = sns.FacetGrid(emiss_dens, col="Dep", col_wrap=3, height=4, aspect=1.2, hue='State', palette='Set1')
g.map(sns.lineplot, 'X', 'Prob')

# Add vertical lines for the means
for ax, emotion in zip(g.axes.flat, emotions):
    for i, state in enumerate([1, 2]):
        mean_val = emiss[emotion][f'State {state} Mean']
        ax.axvline(mean_val, color=g._colors[i], linestyle='--')

# Set the y-axis limit from 0 to 0.05
for ax in g.axes.flat:
    ax.set_ylim(0, 0.05)

g.add_legend(title='State')
g.set_axis_labels('Mood Value', 'Density')
plt.show()


# Obtain the most likely hidden state sequence
emotion_states_2st = model.predict(observations)

# Create a DataFrame to store the results
emotion_states_2st_df = pd.DataFrame({
    'subj': emotion_data_clean['subj_id'],
    'state': emotion_states_2st
})

# Inspect the first few entries
print(emotion_states_2st_df.head())

# Create a table to inspect the state distribution across subjects
state_distribution = pd.crosstab(emotion_states_2st_df['subj'], emotion_states_2st_df['state'])
print(state_distribution)

   subj  state
0     1      0
1     1      0
2     1      1
3     1      1
4     1      1
state    0    1
subj           
1      158   82
2      148   92
3      217   23
4      109  131
6      215   25
...    ...  ...
156    209   31
159    128  112
161    159   81
163     81  159
164    212   28

[125 rows x 2 columns]


# Create a DataFrame to store the results including the beep moment
emotion_states_2st_df = pd.DataFrame({
    'subj': emotion_data_clean['subj_id'],
    'state':emotion_states_2st,
    'beep': emotion_data_clean['beep2']
})

# Filter for a selection of subjects (e.g., subjects 1 to 10)
selected_subjects = emotion_states_2st_df[emotion_states_2st_df['subj'].isin(range(1, 11))]

# Convert the state to a categorical variable with meaningful labels
selected_subjects.loc[:, 'state'] = selected_subjects['state'].map({0: 'state 1', 1: 'state 2'})

# Define the palette
palette = sns.color_palette('Set2')

# Plot the state sequences over time using horizontal bars
plt.figure(figsize=(10, 15))
g = sns.FacetGrid(selected_subjects, col='subj', col_wrap=1, height=1, aspect=10, sharey=False)
g.map_dataframe(sns.histplot, x='beep', hue='state', multiple='stack', bins=500, palette='Set2', discrete=True)

# Customize the plot
g.set_axis_labels("Beep", "")
g.set_titles("Subject {col_name}")
for ax in g.axes.flatten():
    ax.set_yticks([])
    ax.set_ylabel('')

# Add a legend
g.add_legend(title='State')

# Customize the legend
state_labels = ['state 1', 'state 2']
handles = [plt.Line2D([0], [0], color=color, lw=6) for color in palette[:2]]
plt.legend(handles, state_labels, title='State', loc='upper right', bbox_to_anchor=(1.15, 1))

plt.show()

<Figure size 1000x1500 with 0 Axes>

	subj_id	dayno	beep	group	happy	excited	relaxed	satisfied	angry	anxious	depressed	sad
count	30000.000000	30000.000000	30000.000000	30000.000000	21570.000000	21570.000000	21570.000000	21570.000000	21570.000000	21570.000000	21570.000000	21570.000000
mean	80.736000	20.500000	3.500000	1.488000	59.333380	40.937089	54.761196	54.989754	13.508067	13.200649	20.590264	15.969866
std	46.926961	11.543589	1.707854	0.499864	24.213283	28.544285	26.221243	26.250658	20.935787	21.287047	24.603760	22.706737
min	1.000000	1.000000	1.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	37.000000	10.750000	2.000000	1.000000	39.000000	18.000000	33.000000	33.000000	0.000000	0.000000	0.000000	0.000000
50%	84.000000	20.500000	3.500000	1.000000	64.000000	35.000000	61.000000	61.000000	4.000000	3.000000	11.000000	5.000000
75%	121.000000	30.250000	5.000000	2.000000	77.000000	66.000000	74.000000	74.750000	19.000000	18.000000	31.000000	24.000000
max	164.000000	40.000000	6.000000	2.000000	100.000000	100.000000	100.000000	100.000000	100.000000	100.000000	100.000000	100.000000

No	Name	Label	Options
1	subjno	individual code	1-164
2	dayno	study days	1-40
3	beep	daily signals	1-6
4	group	condition allocation	1 = control, 2 = training
5	emo1_m	happy	0-100
6	emo2_m	excited	0-100
7	emo3_m	relaxed	0-100
8	emo4_m	satisfied	0-100
9	emo5_m	angry	0-100
10	emo6_m	anxious	0-100
11	emo7_m	depressed	0-100
12	emo8_m	sad	0-100

	State	Dep	X	Prob
0	1	happy	1.000000	7.067787e-07
1	1	happy	1.497487	8.296296e-07
2	1	happy	1.994975	9.726449e-07
3	1	happy	2.492462	1.138921e-06
4	1	happy	2.989950	1.331994e-06

Practical 8: Fitting a Hidden Markov Model¶

Data¶

Setting Up the Hidden Markov Model¶

Fitting a 2-State Hidden Markov Model with Custom Initialization¶

Inspecting General Model Output¶

Visualizing the Obtained Output¶

Obtaining the Most Likely Sequence of Hidden States¶

Plotting the Inferred State Sequence Over Time¶

	subj_id	dayno	beep	group	happy	excited	relaxed	satisfied	angry	anxious	depressed	sad
0	1	1	1	2	79.0	64.0	30.0	74.0	19.0	7.0	8.0	58.0
1	1	1	2	2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	1	1	3	2	20.0	24.0	37.0	36.0	62.0	41.0	73.0	67.0
3	1	1	4	2	25.0	24.0	33.0	29.0	42.0	68.0	77.0	81.0
4	1	1	5	2	37.0	24.0	40.0	17.0	35.0	36.0	12.0	76.0