iMoodJournal_visualization.ipynb
Visualize the data exported from iMoodJournal.
This notebook is used to analyze mood changed over time based on iMoodJournal data. It used a data file exported from iMoodJournal. You can upload your own Mood data and use the code to analyze. And This notebook is free to reuse and adapt, distributed under an MIT license: https://opensource.org/licenses/MIT
Let's import packages first.
This notebook is used to analyze mood changed over time based on iMoodJournal data. It used a data file exported from iMoodJournal. You can upload your own Mood data and use the code to analyze. And This notebook is free to reuse and adapt, distributed under an MIT license: https://opensource.org/licenses/MIT
Let's import packages first.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import time
Mood = pd.read_csv('mood-Jul 8, 2019.csv', index_col=False)
date_strings = ['%d. %b %Y', '%b %d, %Y']
date_format = None
datetime_format = None
Mood['Time'] = Mood['Hour'].map(str).str.cat(Mood['Minute'].map(str), sep = ':')
Mood['Date'] = Mood['Date'].map(str)
Mood['DateTime'] = Mood['Date'].str.cat(Mood['Time'], sep=' ')
while date_strings:
date_format_test = date_strings.pop()
datetime_string = '{} %H:%M'.format(date_format_test)
try:
Mood['DateTime']=pd.to_datetime(Mood['DateTime'], format=datetime_string)
date_format = date_format_test
datetime_format = datetime_string
break
except ValueError:
continue
if not datetime_format:
raise Exception('Failed to parse datetime - maybe we need another datetime_string?')
mood = Mood.set_index('DateTime', drop= False)
DateTime = mood.pop('DateTime')
mood.insert(0, 'DateTime', DateTime)
Time = mood.pop('Time')
mood.insert(1, 'Time', Time)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
begin_date = mood.iloc[[0]]['Date'].apply(lambda x: datetime.strptime(x,date_format))
end_date = mood.iloc[[-1]]['Date'].apply(lambda x: datetime.strptime(x,date_format))
mood['Level'].plot(linewidth=1);
plt.title('Figure 1. Mood Change From {} to {}'.format(begin_date[0],end_date[0]))
plt.show()
moodlevel_daily= mood['Level'].resample('D')
moodlevel_daily_mean = moodlevel_daily.mean()
moodlevel_daily_mean.plot(linewidth=1);
plt.title('Figure 2. Daily Average of the Mood From {} to {}'.format(begin_date[0],end_date[0]))
plt.show()
fig, ax = plt.subplots(figsize=(30, 14))
ax.plot(mood['Level'],
marker='.', linestyle='-', linewidth=0.5, label='Mood Level')
ax.plot(moodlevel_daily_mean,
marker='o', markersize=8, linestyle='-', label='Daily Mean')
plt.title('Figure 3. Comparison of Real Mood Change and Daily Average Mood Change')
plt.show()
mood.loc[mood.Minute>30,'Hour']= mood['Hour'] + 1
mood.head(10)
hourly_mood = mood[['Date','Day of week','Hour','Level']]
def get_date_list(begin_date,end_date):
date_list = [x.strftime(datetime_format) for x in list(pd.date_range(start=begin_date, end=end_date, freq='H'))]
return date_list
begin_date = Mood.iloc[[0]]['Date'].apply(lambda x: datetime.strptime(x,date_format))
end_date = Mood.iloc[[-1]]['Date'].apply(lambda x: datetime.strptime(x,date_format))
Time_list = pd.DataFrame({'DateTime':get_date_list(begin_date[0],end_date[len(Mood)-1])})
Time_list['DateTime'] = pd.to_datetime(Time_list['DateTime'], format=datetime_format)
Time_list['Time'] = Time_list['DateTime'].apply(lambda x: x.strftime(date_format)) + ' ' + Time_list['DateTime'].apply(lambda x: x.strftime('%H'))
Time_list['Date']= Time_list['DateTime'].apply(lambda x: x.strftime(date_format) )
hourly_mood['Date'] = pd.to_datetime(hourly_mood['Date'], format=date_format)
hourly_mood['Time'] = hourly_mood['Date'].apply(lambda x: x.strftime(date_format)) + ' ' + hourly_mood['Hour'].apply(lambda x: str(x))
hourly_mood = hourly_mood.drop(['Date'],axis=1)
Hourly_mood = pd.merge(Time_list, hourly_mood, how='left', on='Time' )
Hourly_mood_heatmap = Hourly_mood.drop(['Time','Date','Day of week','Hour'], axis=1)
Hourly_mood_heatmap = Hourly_mood_heatmap.set_index('DateTime')
groups = Hourly_mood_heatmap.groupby(pd.Grouper(freq='D'))
Hourly_mood_heatmap = pd.concat([pd.DataFrame(x[1].values) for x in groups], axis=1)
Hourly_mood_heatmap= pd.DataFrame(Hourly_mood_heatmap)
Hourly_mood_heatmap.columns = Hourly_mood['Date'].drop_duplicates(keep='first', inplace=False)
plt.matshow(Hourly_mood_heatmap, interpolation=None, cmap='jet', vmin=1, vmax=8)
plt.xlabel('Date',fontsize=14)
plt.ylabel('Time of a Day',fontsize=14)
plt.xticks(np.arange(Hourly_mood_heatmap.shape[1]),Hourly_mood_heatmap.columns, rotation=90)
plt.title('Figure 4. Heatmap of Mood in A Day', y=1.5)
plt.show()
oneday = '2019-6-19'
Daily_mood = mood.loc[oneday]
Daily_mood['Level'].plot(linewidth=1)
plt.title('Figure 5. Mood Change on {}'.format(oneday))
plt.show()
Levels = mood.groupby('LevelText', as_index=False)[['Date']].count()
#Levels['LevelText'] = Levels['LevelText'].apply(str)
Levels
Levels.plot(kind='bar', x='LevelText', y='Date', legend=None, title='Frequency of Mood levels')
plt.title('Figure 6. Frequency of Mood Levels')
plt.show()
def create_pareto_plot(df, x=None, y=None, title=None, show_pct_y=False, pct_format='{0:.0%}'):
xlabel = x
ylabel = y
tmp = df.sort_values(y, ascending=False)
x = tmp[x].values
y = tmp[y].values
weights = y.cumsum() / y.sum()
fig, ax1 = plt.subplots()
ax1.bar(x, y)
ax1.set_xlabel(xlabel)
ax1.set_ylabel(ylabel)
ax2 = ax1.twinx()
ax2.plot(x, weights, '-ro', alpha=0.5)
ax2.set_ylabel('', color='r')
ax2.tick_params('y', colors='r')
vals = ax2.get_yticks()
ax2.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
formatted_weights = [pct_format.format(x) for x in weights]
for i, txt in enumerate(formatted_weights):
ax2.annotate(txt, (x[i], weights[i]), fontweight='heavy')
if not show_pct_y:
ax2.set_yticks([])
if title:
plt.title(title)
plt.tight_layout()
plt.show()
create_pareto_plot(Levels, x='LevelText', y='Date', title='Figure 7. Pareto Chart of Mood Level Frequency')
BadMoodText = ['So-so','Meh', 'Bad', 'Very bad']
GoodMoodText = ['Okay', 'Good', 'Very good', 'Great']
BadMoodLevels = Levels.loc[Levels['LevelText'].isin(BadMoodText)]
GoodMoodLevels = Levels.loc[Levels['LevelText'].isin(GoodMoodText)]
create_pareto_plot(BadMoodLevels, x='LevelText', y='Date', title='Figure 8. Pareto Chart of Bad Mood Level Frequency')
create_pareto_plot(GoodMoodLevels, x='LevelText', y='Date', title='Figure 9. Pareto Chart of Good Mood Level Frequency')
time_periods = [
{
"start": "2019-07-01",
"end": "2019-07-04",
"label": "something happened",
},
]
period_selected = mood[time_periods[0]["start"]:time_periods[0]["end"]]
period_selected_event = time_periods[0]["label"]
mood_period_max = period_selected['Level'].max()
mood_period_max_idx = period_selected['Level'].idxmax(axis=0, skipna=True)
print('The moment that your felt best during the period:',mood_period_max_idx)
#mood_period_max_event = input('please input the event happened when you felt best during the period:')
mood_period_min = period_selected['Level'].min()
mood_period_min_idx= period_selected['Level'].idxmin(axis=0, skipna=True)
print('The moment that your felt worse during the period:',mood_period_min_idx)
#mood_period_min_event = input('please input the event happened when you felt worse during the period:')
events = [
{
"event_mood_max": "thing A",
"event_mood_min": "thing B"
},
]
fig, ax = plt.subplots(figsize=(30, 14))
ax.plot(mood['Level'],marker='.', linestyle='-', linewidth=0.5, label='Mood Level')
ax.plot(moodlevel_daily_mean,marker='o', markersize=8, linestyle='-', label='Daily Mean')
ax.axvspan(time_periods[0]["start"], time_periods[0]["end"], color=sns.xkcd_rgb['grey'], alpha=0.5)
ax.set_title('Figure 10. Mood Changes Over Time with Annotations')
ax.set_ylabel('Mood')
ax.set_xlabel('Date')
ax.legend(loc='upper left', fontsize=11, frameon=True).get_frame().set_edgecolor('blue')
bbox_props0 = dict(boxstyle='square, pad=0.6', fc='mediumvioletred', ec='r', alpha=.4, lw=.5)
ax.text(time_periods[0]["start"], 9, 'Event happened during this period:\n{}'.format(period_selected_event) , size=12,ha='left',
family = 'serif', color='yellow', style = 'italic', weight = 'bold', bbox = bbox_props0)
bbox_props1 = dict(boxstyle='round4, pad=0.6', fc='cyan', ec='b', lw=.5)
ax.annotate('Mood Max = {}\nEvent = {}\nDate = {}'
.format(mood_period_max, events[0]["event_mood_max"], mood_period_max_idx.strftime('%a, %Y-%m-%d')),
fontsize=12,
fontweight='demi',
xy=(mood_period_max_idx, mood_period_max),
xycoords='data',
xytext=(-150, -30),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"), bbox=bbox_props1)
ax.annotate('Mood Min = {}\nEvent = {}\nDate = {}'
.format(mood_period_min, events[0]["event_mood_min"], mood_period_min_idx.strftime('%a, %Y-%m-%d')),
fontsize=12,
fontweight='demi',
xy=(mood_period_min_idx, mood_period_min),
xycoords='data',
xytext=(-150, 30),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"), bbox=bbox_props1)
plt.tight_layout()
tags = list(mood.columns.values)[10:]
tag_sum = pd.DataFrame(mood[tags].apply(lambda x: x.sum()))
tag_sum['tags'] = tag_sum.index.values
tag_sum.columns = ['frequency','tags']
tag_sum.plot(kind='bar',x='tags',y='frequency', legend=None, title='Figure 11. Frequencies of Mood Tags')
plt.show()
mood_good = mood[mood["Level"]>=6]
mood_bad = mood[mood["Level"]<6]
goodmood_tag_sum = pd.DataFrame(mood_good[tags].apply(lambda x: x.sum()))
goodmood_tag_sum['tags'] = goodmood_tag_sum.index.values
goodmood_tag_sum.columns = ['frequency','tags']
badmood_tag_sum = pd.DataFrame(mood_bad[tags].apply(lambda x: x.sum()))
badmood_tag_sum['tags'] = badmood_tag_sum.index.values
badmood_tag_sum.columns = ['frequency','tags']
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True)
goodmood_tag_sum.plot(kind='bar',x='tags',y='frequency', legend=None, ax=ax1)
ax1.set_title('Frequencies of Good Mood Tags',loc= 'right')
badmood_tag_sum.plot(kind='bar',x='tags',y='frequency', legend=None, ax=ax2)
ax2.set_title('Frequencies of Bad Mood Tags', loc = 'right')
plt.suptitle('Figure 12. Frequencies of Mood Tags')
plt.show()
!pip install wordcloud
from PIL import Image, ImageSequence
from wordcloud import WordCloud
def DrawWordcloud(df):
wc = WordCloud(background_color = 'White',width=1000, height=860, margin=2)
name = list(df.tags)
value = df.frequency
for i in range(len(name)):
name[i] = str(name[i])
dic = dict(zip(name, value))
wc.generate_from_frequencies(dic)
plt.imshow(wc)
plt.axis("off")
plt.title('Figure 13. Wordcloud of the Mood Tags')
plt.show()
wc.to_file('Wordcloud.png')
DrawWordcloud(tag_sum)
mood_perminute = mood
mood_perminute['Day of week'] = mood_perminute.index.weekday_name
mood_perminute['Date'] = mood_perminute.index.date
mood_perminute['Time'] = mood_perminute.index.time
mood_perminute.loc[mood_perminute.Minute>30,'Hour']= mood_perminute['Hour'] - 1
mood_perminute['TimeStamp'] = mood_perminute['Hour'] + mood_perminute['Minute']/60
weekday = ['Monday','Tuesday','Wednesday','Thursday','Friday']
weekend = ['Saturday','Sunday']
weekday_mood = mood_perminute.loc[mood_perminute['Day of week'].isin(weekday)]
weekend_mood = mood_perminute.loc[mood_perminute['Day of week'].isin(weekend)]
weekday_mood_hourly_mean = weekday_mood.groupby('Hour')['Level'].mean()
weekend_mood_hourly_mean = weekend_mood.groupby('Hour')['Level'].mean()
weekday_rolling = weekday_mood_hourly_mean.rolling(3, center=True).mean()
weekend_rolling = weekend_mood_hourly_mean.rolling(3, center=True).mean()
fig, ax = plt.subplots(figsize=(30, 10))
for Date, selection in weekday_mood.groupby("Date"):
selection.plot(x='TimeStamp', y='Level', ax=ax, marker='o', markersize=8, linestyle='None', color ='salmon', legend=False)
for Date, selection in weekend_mood.groupby("Date"):
selection.plot(x='TimeStamp', y='Level', ax=ax,marker='o', markersize=8, linestyle='None', color= 'lightskyblue', legend=False)
ax.plot(weekday_mood_hourly_mean,color='r', linewidth=2, label='Weekday Hourly Average Mood')
ax.plot(weekend_mood_hourly_mean,color='b', linewidth=2, label='Weekend Hourly Average Mood')
ax.plot(weekday_rolling, color='r', linewidth=20, alpha = 0.2, label='Rolling Mean')
ax.plot(weekend_rolling, color='b', linewidth=20, alpha = 0.2, label='Rolling Mean')
ax.set_xlabel('TimeStamp')
ax.set_ylabel('Mood')
plt.title('Figure 14. Differences Between Weekend and Weekday')
plt.show()
fig, ax = plt.subplots(figsize=(30, 15))
dayofweek = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
sns.boxplot(data = mood, x = 'Day of week', y = 'Level')
dayofweek_average_mood = mood.groupby('Day of week')['Level'].apply(lambda x: x.mean())
ax.plot(dayofweek_average_mood,color='r', linewidth=5, label='Average Mood' )
ax.set_ylabel('Mood')
ax.set_xlabel('Day of Week')
plt.xticks(np.arange(len(dayofweek)),dayofweek)
plt.title('Figure 15. Boxplot based on Day of the Week ')
plt.legend()
plt.show()
fig, ax = plt.subplots(figsize=(30, 15))
dayofweek = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
ax.plot('Day of week', 'Level', data=mood, marker='o', markersize=8, linestyle='None', color ='b')
dayofweek_average_mood = pd.DataFrame(mood.groupby('Day of week')['Level'].mean())
dayofweek_average_mood= dayofweek_average_mood.reset_index(drop=False)
ax.plot(dayofweek_average_mood['Level'],color='r', linewidth=2, label='Average Mood' )
ax.set_ylabel('Mood')
ax.set_xlabel('Day of Week')
plt.xticks(np.arange(len(dayofweek)),dayofweek)
plt.title('Figure 16. Line Chart based on Day of the Week')
plt.legend()
plt.show()