# all the imports

from chat_downloader import ChatDownloader
from api_keys import *
import numpy as np
import math
import pandas as pd
import requests
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
from dateutil import parser
import seaborn as sns
from scipy import stats


# dataframe will have columns:
# channel name | channel id | video name | video id | description | publishedAt | video start time | video end time |
# video length | # superchats | $ superchats | localization | viewcount | tags (topicdetails) | timestamps of each superchat


c_names = pd.read_csv("vtuber_channels.csv", names=["vtuber_name", "affiliation", "channel_id"]);
df = pd.DataFrame(columns = ['channel_name', 'channel_id', 'video_name', 'video_id', 'description', 'published_at', 
                             'video_start_time', 'video_end_time', 'video_length', 'num_superchats', 'val_superchats',
                            'locale', 'viewcount', 'tags', 'timestamps']);

c_names.head()


c_names = pd.read_hdf("./h5s/vtubers.h5")

# let's sort it by subs count (ascending)
c_names["subs_count"] = pd.to_numeric(c_names["subs_count"])
c_names = c_names.sort_values(by=['subs_count'], ignore_index=True)
c_names.head()


joined = pd.read_hdf('h5s/joined.h5')
joined['timestamps'] = joined['timestamps'].apply(lambda x: [datetime.utcfromtimestamp(i/1000000) for i in x])
joined['published_at'] = joined['published_at'].apply(parser.isoparse)
joined["video_start_time"] = joined["video_start_time"].apply(parser.isoparse)
joined["video_end_time"] = joined["video_end_time"].apply(parser.isoparse)
for ind, row in joined.iterrows():
    joined.at[ind, "video_length"] = (row["video_end_time"]-row["video_start_time"])/np.timedelta64(1,'m')
    
joined["viewcount"] = pd.to_numeric(joined["viewcount"])
joined["video_length"] = pd.to_numeric(joined["video_length"])
joined["num_superchats"] = pd.to_numeric(joined["num_superchats"])

joined["stream_start_hour"] = joined.apply(lambda row: row["video_start_time"].hour, axis=1)
joined["stream_end_hour"] = joined.apply(lambda row: row["video_end_time"].hour, axis=1)
joined["is_gaming"] = joined.apply(lambda row: int('game' in ''.join(row["tags"])), axis=1)
joined["average_superchat_value"] = joined.apply(lambda row: row["val_superchats"]/row["num_superchats"] if row["num_superchats"] > 0 else 0, axis=1)


def vtuber_to_ordinal(vt):
    return c_names[c_names['vtuber_name'] == vt].index[0]

def affiliation_to_ordinal(vt):
    ordinal_list = ["independent", "voms", "holostars", "nijisanji", "hololive_id", "hololive_en", "hololive"]
    st = c_names[c_names['vtuber_name'] == vt].iloc[0].affiliation
    return ordinal_list.index(st)

joined["vtuber_ordinal"] = joined.apply(lambda row: vtuber_to_ordinal(row["channel_name"]), axis=1)
joined["affiliation_ordinal"] = joined.apply(lambda row: affiliation_to_ordinal(row["channel_name"]), axis=1)

joined.head()


from datetime import time,date, timedelta
# bar chart for all superchats
datetimes = {}
schats = {}
# 00:00 to 23:59


# HOLOLIVE (JP) ONLY VTUBERS
for ind, row in joined.loc[joined['affiliation_ordinal'].isin([6])].iterrows():
    
    ct = row["video_start_time"]
    while ct <= row["video_end_time"]:
        # use a random fake date because matplotlib needs an actual date to plot time series
        t = datetime.combine(date(2000, 1, 1), 
                          ct.replace(second=0).time())
        if t in datetimes:
            datetimes[t]+= 1
        else:
            datetimes[t]=1
        ct += timedelta(minutes=1)
        
    
    for i in row["timestamps"]:
        t1 = ct.hour
        if t1 in schats:
            schats[t1]+= 1
        else:
            schats[t1]=1
#print(datetimes)
print(schats)
x,y = zip(*sorted(datetimes.items()))
x2,y2 = zip(*sorted(schats.items()))
fig, ax = plt.subplots(figsize=(15, 10));
ax.plot(x,y)

ax2 = ax.twiny().twinx()
ax2.set_xticks([])
ax2.tick_params('y', colors='b')
#ax2.set_ylim(top=8000)
ax2.bar(x2, y2, color='orange', alpha=0.5)

xfmt = mdates.DateFormatter('%H:%M')
ax.set_title("Streams and Superchats - Hololive JP")
ax.set_xlabel("Time of day")
ax.set_ylabel("Number of streams live")
ax2.set_ylabel("Number of superchats in hour")
ax.xaxis.set_major_locator(mdates.HourLocator())
ax.xaxis.set_major_formatter(xfmt)

{15: 26023, 11: 7578, 2: 937, 14: 23381, 10: 5298, 13: 20404, 17: 12136, 3: 3271, 6: 3180, 5: 3278, 12: 21781, 9: 4314, 8: 1790, 16: 19000, 19: 6570, 7: 2122, 1: 231, 0: 1498, 20: 3197, 18: 9142, 4: 1301, 21: 2234, 23: 2250, 22: 2099}


for vtname in c_names.loc[c_names["affiliation"]=="hololive"]["vtuber_name"]:
    datetimes = {}
    schats = {}
    # 00:00 to 23:59
    
    for i in range(24):
        schats[i] = 0


    # HOLOLIVE (JP) ONLY VTUBERS
    for ind, row in joined.loc[joined['channel_name']==vtname].iterrows():
        datetimes[datetime.combine(date(2000, 1, 1),time(0,0))] = 0
        datetimes[datetime.combine(date(2000, 1, 1),time(23,59))] = 0
        ct = row["video_start_time"]
        while ct <= row["video_end_time"]:
            # use a random fake date because matplotlib needs an actual date to plot time series
            t = datetime.combine(date(2000, 1, 1), 
                              ct.replace(second=0).time())
            if t in datetimes:
                datetimes[t]+= 1
                temp = t - timedelta(minutes=1)
                if temp not in datetimes:
                    datetimes[temp] = 0
                temp = t + timedelta(minutes=1)
                if temp not in datetimes:
                    datetimes[temp] = 0
            else:
                datetimes[t]=1
                temp = t - timedelta(minutes=1)
                if temp not in datetimes:
                    datetimes[temp] = 0
                temp = t + timedelta(minutes=1)
                if temp not in datetimes:
                    datetimes[temp] = 0
            ct += timedelta(minutes=1)


        for i in row["timestamps"]:
            t1 = ct.hour
            if t1 in schats:
                schats[t1]+= 1
            else:
                schats[t1]=1
    #print(datetimes)
    #print(schats)
    x,y = zip(*sorted(datetimes.items()))
    x2,y2 = zip(*sorted(schats.items()))
    fig, ax = plt.subplots(figsize=(15, 10));
    ax.plot(x,y)

    ax2 = ax.twiny().twinx()
    ax2.set_xticks([])
    ax2.tick_params('y', colors='b')
    #ax2.set_ylim(top=8000)
    ax2.bar(x2, y2, color='orange', alpha=0.5)

    xfmt = mdates.DateFormatter('%H:%M')
    ax.set_title("Streams and Superchats - {0}".format(vtname))
    ax.set_xlabel("Time of day")
    ax.set_ylabel("Number of streams live")
    ax2.set_ylabel("Number of superchats in hour")
    ax.xaxis.set_major_locator(mdates.HourLocator())
    ax.xaxis.set_major_formatter(xfmt)


for vtname in c_names.loc[c_names["affiliation"]=="hololive_en"]["vtuber_name"]:
    datetimes = {}
    schats = {}
    # 00:00 to 23:59
    
    for i in range(24):
        schats[i] = 0


    # HOLOLIVE (JP) ONLY VTUBERS
    for ind, row in joined.loc[joined['channel_name']==vtname].iterrows():
        datetimes[datetime.combine(date(2000, 1, 1),time(0,0))] = 0
        datetimes[datetime.combine(date(2000, 1, 1),time(23,59))] = 0
        ct = row["video_start_time"]
        while ct <= row["video_end_time"]:
            # use a random fake date because matplotlib needs an actual date to plot time series
            t = datetime.combine(date(2000, 1, 1), 
                              ct.replace(second=0).time())
            if t in datetimes:
                datetimes[t]+= 1
                temp = t - timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(23, 59):
                    datetimes[temp] = 0
                temp = t + timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(0, 0):
                    datetimes[temp] = 0
            else:
                datetimes[t]=1
                temp = t - timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(23, 59):
                    datetimes[temp] = 0
                temp = t + timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(0, 0):
                    datetimes[temp] = 0
            ct += timedelta(minutes=1)


        for i in row["timestamps"]:
            t1 = ct.hour
            if t1 in schats:
                schats[t1]+= 1
            else:
                schats[t1]=1
    #print(datetimes)
    #print(schats)
    x,y = zip(*sorted(datetimes.items()))
    x2,y2 = zip(*sorted(schats.items()))
    fig, ax = plt.subplots(figsize=(15, 10));
    ax.plot(x,y)

    ax2 = ax.twiny().twinx()
    ax2.set_xticks([])
    ax2.tick_params('y', colors='b')
    #ax2.set_ylim(top=8000)
    ax2.bar(x2, y2, color='orange', alpha=0.5)

    xfmt = mdates.DateFormatter('%H:%M')
    ax.set_title("Streams and Superchats - {0}".format(vtname))
    ax.set_xlabel("Time of day")
    ax.set_ylabel("Number of streams live")
    ax2.set_ylabel("Number of superchats in hour")
    ax.xaxis.set_major_locator(mdates.HourLocator())
    ax.xaxis.set_major_formatter(xfmt)


for vtname in c_names.loc[c_names["affiliation"]=="hololive_id"]["vtuber_name"]:
    datetimes = {}
    schats = {}
    # 00:00 to 23:59
    
    for i in range(24):
        schats[i] = 0


    # HOLOLIVE (JP) ONLY VTUBERS
    for ind, row in joined.loc[joined['channel_name']==vtname].iterrows():
        datetimes[datetime.combine(date(2000, 1, 1),time(0,0))] = 0
        datetimes[datetime.combine(date(2000, 1, 1),time(23,59))] = 0
        ct = row["video_start_time"]
        while ct <= row["video_end_time"]:
            # use a random fake date because matplotlib needs an actual date to plot time series
            t = datetime.combine(date(2000, 1, 1), 
                              ct.replace(second=0).time())
            if t in datetimes:
                datetimes[t]+= 1
                temp = t - timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(23, 59):
                    datetimes[temp] = 0
                temp = t + timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(0, 0):
                    datetimes[temp] = 0
            else:
                datetimes[t]=1
                temp = t - timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(23, 59):
                    datetimes[temp] = 0
                temp = t + timedelta(minutes=1)
                if temp not in datetimes and temp.time() != time(0, 0):
                    datetimes[temp] = 0
            ct += timedelta(minutes=1)


        for i in row["timestamps"]:
            t1 = ct.hour
            if t1 in schats:
                schats[t1]+= 1
            else:
                schats[t1]=1
    #print(datetimes)
    #print(schats)
    x,y = zip(*sorted(datetimes.items()))
    x2,y2 = zip(*sorted(schats.items()))
    fig, ax = plt.subplots(figsize=(15, 10));
    ax.plot(x,y)

    ax2 = ax.twiny().twinx()
    ax2.set_xticks([])
    ax2.tick_params('y', colors='b')
    #ax2.set_ylim(top=8000)
    ax2.bar(x2, y2, color='orange', alpha=0.5)

    xfmt = mdates.DateFormatter('%H:%M')
    ax.set_title("Streams and Superchats - {0}".format(vtname))
    ax.set_xlabel("Time of day")
    ax.set_ylabel("Number of streams live")
    ax2.set_ylabel("Number of superchats in hour")
    ax.xaxis.set_major_locator(mdates.HourLocator())
    ax.xaxis.set_major_formatter(xfmt)


fig, ax = plt.subplots(figsize=(15, 10))
plt.xlim([10**3, 5*10**6])
ax.plot(joined["viewcount"] ,joined["val_superchats"], 'o', alpha=0.5)
ax.set_yscale('log')
ax.set_xscale('log')

ax.set_xlabel("View Count")
ax.set_ylabel("Superchat Earnings (USD)")
ax.set_title("View Count vs Superchat Earnings (log-log scale)")
x = np.array(range(10**3, 10**7, 5))
# convert the slope and intercept to the relevant values in the right places
print("a = ", math.e**intercept)
print("k = ", slope)
ax.plot(x, (math.e**intercept)*x**slope);

plt.show()

a =  0.06631264243217902
k =  0.7581418841466804


games_only = joined.loc[joined['game_name'] != 'N/A']
temp_df = games_only[["viewcount","val_superchats"]]
temp_df = temp_df[temp_df['val_superchats']>0]
temp_df['viewcount'] = temp_df['viewcount'].apply(math.log)
temp_df['val_superchats'] = temp_df['val_superchats'].apply(math.log)

slope_g, intercept_g, r_value_g, p_value_g, std_err_g = stats.linregress(temp_df["viewcount"],temp_df["val_superchats"])
print('Regression from scipy y=mx+b: \ny = %f x + %f'%(slope_g, intercept_g))
print('p-value: %e'%(p_value_g))
print('r-value: %f'%(r_value_g))
print('standard error: %f'%(std_err_g))


fig, ax = plt.subplots(figsize=(15, 10))
plt.xlim([10**3, 5*10**6])
ax.plot(games_only["viewcount"] ,games_only["val_superchats"], 'o', alpha=0.5)
ax.set_yscale('log')
ax.set_xscale('log')

ax.set_xlabel("View Count")
ax.set_ylabel("Superchat Earnings (USD)")
ax.set_title("View Count vs Superchat Earnings (Games only, log-log scale)")
x = np.array(range(10**3, 10**7, 5))
# convert the slope and intercept to the relevant values in the right places
print("a = ", math.e**intercept_g)
print("k = ", slope_g)
ax.plot(x, (math.e**intercept)*x**slope, color='C1', label="old regression line");
ax.plot(x, (math.e**intercept_g)*x**slope_g, color='C2', label="games regression line");
plt.legend(loc='upper left');


plt.show()

Regression from scipy y=mx+b: 
y = 0.957162 x + -5.281012
p-value: 8.104118e-133
r-value: 0.631815
standard error: 0.034170
a =  0.005087282402439689
k =  0.9571623123318653


print(len(temp_df))
fig, ax = plt.subplots(figsize=(15, 10))
residuals = []
for ind, row in temp_df.iterrows():
    residuals.append(row["val_superchats"] - slope_g*row["viewcount"]-intercept_g)
#print(residuals)
ax.plot(temp_df["viewcount"] ,residuals, 'o', alpha=0.5)
#plt.yscale('symlog')
ax.set_xlabel("View Count")
ax.set_ylabel("Superchat Earnings (USD)")
ax.set_title("Residuals (log-log)")
ax.axhline(color="gray")
plt.show()

1183


dd = {}
for ind, row in joined.iterrows():
    gn = row["game_name"]
    if gn != 'N/A':
        if gn in dd:
            dd[gn] += 1
        else:
            dd[gn] = 1
            

popular_games = [i for i in dd if dd[i]>=25]
popular_games

['Monster Hunter Rise',
 'Minecraft',
 'Among Us',
 'Apex Legends',
 'Uma Musume Pretty Derby',
 'ARK: Survival Evolved']


colors_chart = ["#A6BDD7", "#FFB300", "#803E75", "#FF6800", "#C10020", "#007D34", "#F6768E", "#CEA262", "#817066"]

all_games = joined.loc[joined['game_name'] != 'N/A']

fig, ax = plt.subplots(figsize=(15, 10))
ax.plot(all_games["viewcount"], all_games["val_superchats"], 'o', color = colors_chart[0], alpha=0.5, label="Other games")
ax.set_yscale('log')
ax.set_xscale('log')

ax.set_xlabel("View Count")
ax.set_ylabel("Superchat Earnings (USD)")
ax.set_title("View Count vs Superchat Earnings (GAMES ONLY / double log scale)")
ind = 1
for game in popular_games:
    cur_game = joined.loc[joined['game_name'] == game]
    ax.plot(cur_game["viewcount"], cur_game["val_superchats"], 'o', color = colors_chart[ind], label=game)
    ind += 1
    
handles, labels = ax.get_legend_handles_labels()
handles = handles[1:] + [handles[0]]
labels = labels[1:] + [labels[0]]
ax.legend(handles, labels, loc='upper left');
plt.show()


ind = 1
regs = []
x = np.array(range(10**3, 10**7, 5))
for game in popular_games:
    fig, ax = plt.subplots(figsize=(15, 10))
    ax.plot(all_games["viewcount"], all_games["val_superchats"], 'o', color = colors_chart[0], alpha=0.5, label="Other games")
    ax.set_yscale('log')
    ax.set_xscale('log')

    ax.set_xlabel("View Count")
    ax.set_ylabel("Superchat Earnings (USD)")
    ax.set_title("View Count vs Superchat Earnings ({0} only / log-log scale)".format(game))


    cur_game = joined.loc[joined['game_name'] == game]
    
    temp_cg_df = cur_game[["viewcount","val_superchats"]]
    temp_cg_df = temp_cg_df[temp_cg_df['val_superchats']>0]
    temp_cg_df['viewcount'] = temp_cg_df['viewcount'].apply(math.log)
    temp_cg_df['val_superchats'] = temp_cg_df['val_superchats'].apply(math.log)

    slope_t, intercept_t, r_value_t, p_value_t, std_err_t = stats.linregress(temp_cg_df["viewcount"],temp_cg_df["val_superchats"])
    print("a = ", math.e**intercept_t)
    print("k = ", slope_t)
    print('p-value: %e'%(p_value_t))
    print('r-value: %f'%(r_value_t))
    print('standard error: %f'%(std_err_t))
    print('num_points: %d'%(len(temp_cg_df)))
    
    ax.plot(x, (math.e**intercept_t)*x**slope_t, color=colors_chart[ind]);
    regs.append((intercept_t,slope_t))
    
    ax.plot(cur_game["viewcount"], cur_game["val_superchats"], 'o', color = colors_chart[ind], label=game)
    ind += 1
    handles, labels = ax.get_legend_handles_labels()
    handles = handles[1:] + [handles[0]]
    labels = labels[1:] + [labels[0]]
    ax.legend(handles, labels, loc='upper left');
    plt.show()

a =  0.0022374775669243094
k =  1.0140693210453922
p-value: 5.036286e-26
r-value: 0.641997
standard error: 0.083571
num_points: 212

a =  0.024647340585237255
k =  0.8541428920820173
p-value: 1.186951e-14
r-value: 0.571653
standard error: 0.099767
num_points: 153

a =  9.198275724464118e-05
k =  1.2590787495197502
p-value: 1.320476e-05
r-value: 0.765291
standard error: 0.225781
num_points: 24

a =  0.010261389553868317
k =  0.9040575053393242
p-value: 5.337854e-17
r-value: 0.660434
standard error: 0.092680
num_points: 125

a =  0.007836455982409087
k =  0.9045304900931068
p-value: 4.767082e-08
r-value: 0.698563
standard error: 0.138118
num_points: 47

a =  0.00029844862524387765
k =  1.1028099912192952
p-value: 1.710327e-03
r-value: 0.504334
standard error: 0.323824
num_points: 36


colors_chart = ["#A6BDD7", "#FFB300", "#803E75", "#FF6800", "#C10020", "#007D34", "#F6768E", "#CEA262", "#817066"]

all_games = joined.loc[joined['game_name'] != 'N/A']

fig, ax = plt.subplots(figsize=(15, 10))
ax.plot(all_games["viewcount"], all_games["val_superchats"], 'o', color = colors_chart[0], alpha=0.5, label="Other games")
ax.set_yscale('log')
ax.set_xscale('log')

ax.set_xlabel("View Count")
ax.set_ylabel("Superchat Earnings (USD)")
ax.set_title("View Count vs Superchat Earnings (GAMES ONLY / double log scale)")
ind = 1
for game in popular_games:
    cur_game = joined.loc[joined['game_name'] == game]
    ax.plot(cur_game["viewcount"], cur_game["val_superchats"], 'o', color = colors_chart[ind], label=game)
    ax.plot(x, (math.e**regs[ind-1][0])*x**regs[ind-1][1], color=colors_chart[ind]);
    ind += 1
    
handles, labels = ax.get_legend_handles_labels()
handles = handles[1:] + [handles[0]]
labels = labels[1:] + [labels[0]]



ax.legend(handles, labels, loc='upper left');
plt.show()

	vtuber_name	affiliation	channel_id
0	shirakami_fubuki	hololive	UCdn5BQ06XqgXoAxIhbqw5Rg
1	tokino_sora	hololive	UCp6993wxpyDPHUpavwDFqgg
2	haachama	hololive	UC1CfXB_kRs3C-zaeTG3oGyg
3	natsuiro_matsuri	hololive	UCQ0UDLQCjY0rmuxCDE38FGg
4	minato_aqua	hololive	UC1opHUrw8rvnsadT-iGp7Cg

	vtuber_name	affiliation	channel_id	subs_count	views_count	videos_count	country_loc
0	deisu_aruran	holostars	UCKeAhJvy8zgXWbh9duVjIaQ	98400	3140315	610	JP
1	peanut_kun	independent	UCmgWMQkenFc72QnYkdxdoKA	111000	22847740	266	JP
2	kageyama_shien	holostars	UChSvpZYRPh0FvG4SJGSga3g	112000	3388583	258	JP
3	tomoshika_hikasa	voms	UC3vzVK_N_SUVKqbX69L_X4g	142000	6108838	201	JP
4	yukoku_roberu	holostars	UCANDOlYTJT7N5jlRC3zfzVA	166000	7847131	719	JP

	channel_name	channel_id	video_name	video_id	description	published_at	video_start_time	video_end_time	video_length	num_superchats	...	viewcount	tags	timestamps	game_name	stream_start_hour	stream_end_hour	is_gaming	average_superchat_value	vtuber_ordinal	affiliation_ordinal
0	shirakami_fubuki	UCdn5BQ06XqgXoAxIhbqw5Rg	【#FOXDEMON】クシャルダオラの時間だゴラァ！！！【ホロライブプロダクション/白上フブ...	14YQQoswvu0	#FOXDEMON　モンハンコラボです\n\n荒咬オウガ\nYoutube:　@Oga Ch...	2021-04-28 15:41:51+00:00	2021-04-28 13:00:12+00:00	2021-04-28 15:21:21+00:00	141.150000	8	...	78525	[Action_game, Role-playing_video_game, Strateg...	[2021-04-28 13:06:56.872040, 2021-04-28 13:23:...	Monster Hunter Rise	13	15	1	3.917635	45	6
1	shirakami_fubuki	UCdn5BQ06XqgXoAxIhbqw5Rg	【SONG】夕方の貴重な歌枠。【ホロライブ/白上フブキ】	-p8DUBvsMwc	ホロライブ所属の白上フブキです\nお久しぶりに歌っていきます✨✨\n\n歌っちゃ王\nhtt...	2021-04-28 11:20:11+00:00	2021-04-28 08:57:39+00:00	2021-04-28 11:09:31+00:00	131.866667	117	...	165992	[Entertainment, Film]	[2021-04-28 08:52:57.092673, 2021-04-28 09:00:...	N/A	8	11	0	7.203043	45	6
2	shirakami_fubuki	UCdn5BQ06XqgXoAxIhbqw5Rg	【#バカタレ共】おきろ！！！！狩りの時間だ！！！【ホロライブ/白上フブキ/角巻わため/不知火...	2XH7EkqTWI8	バカタレ共です。\nモンハンアプデ！！！\n\n■不知火フレア\n@Flare Ch. 不知...	2021-04-28 03:15:29+00:00	2021-04-28 00:00:49+00:00	2021-04-28 02:52:30+00:00	171.683333	28	...	185481	[Action_game, Role-playing_video_game, Strateg...	[2021-04-27 22:27:24.007195, 2021-04-28 00:02:...	Monster Hunter	0	2	1	24.612741	45	6
3	shirakami_fubuki	UCdn5BQ06XqgXoAxIhbqw5Rg	【#バカタレ共】モンスターハンタースペシャルプログラム同時視聴!!【ホロライブ/白上フブ...	v3IYT5p79Fg	バカタレ共です。\n\n「モンスターハンタースペシャルプログラム 2021.4.27」\n...	2021-04-27 14:25:41+00:00	2021-04-27 13:46:33+00:00	2021-04-27 14:20:20+00:00	33.783333	14	...	114674	[Entertainment, Film]	[2021-04-27 13:50:17.603994, 2021-04-27 13:51:...	N/A	13	14	0	6.882685	45	6
4	shirakami_fubuki	UCdn5BQ06XqgXoAxIhbqw5Rg	【#from1st 】開催まであと１ヶ月!!見どころや未公開情報も解禁!!【ホロライブ１期生】	hKdM0ldOJlE	ホロライブ1期生の3周年記念ライブ「from 1st」\n2021年 5月 28日（金）\n...	2021-04-27 12:03:13+00:00	2021-04-27 10:58:11+00:00	2021-04-27 11:57:47+00:00	59.600000	3	...	75277	[Entertainment, Film]	[2021-04-27 11:02:03.777833, 2021-04-27 11:02:...	N/A	10	11	0	4.990000	45	6

Name	Affiliation
A.I.Channel	independent
Gawr Gura Ch. hololive-EN	hololive_en
Korone Ch. 戌神ころね	hololive
Pekora Ch. 兎田ぺこら	hololive
フブキCh。白上フブキ	hololive
Mori Calliope Ch. hololive-EN	hololive_en
Marine Ch. 宝鐘マリン	hololive
Aqua Ch. 湊あくあ	hololive
Watson Amelia Ch. hololive-EN	hololive_en
Rushia Ch. 潤羽るしあ	hololive
HAACHAMA Ch. 赤井はあと	hololive
Coco Ch. 桐生ココ	hololive
Noel Ch. 白銀ノエル	hololive
Okayu Ch. 猫又おかゆ	hololive
Matsuri Channel 夏色まつり	hololive
Takanashi Kiara Ch. hololive-EN	hololive_en
Ninomae Ina'nis Ch. hololive-EN	hololive_en
Suisei Channel	hololive
Subaru Ch. 大空スバル	hololive
Watame Ch. 角巻わため	hololive
Kanata Ch. 天音かなた	hololive
Botan Ch.獅白ぼたん	hololive
SoraCh. ときのそらチャンネル	hololive
月ノ美兎	nijisanji
Mio Channel 大神ミオ	hololive
Moona Hoshinova hololive-ID	hololive_id
本間ひまわり - Himawari Honma -	nijisanji
犬山たまき / 佃煮のりおチャンネル	independent
Towa Ch. 常闇トワ	hololive
Nene Ch.桃鈴ねね	hololive
Kureiji Ollie Ch. hololive-ID	hololive_id
鈴原るる【にじさんじ所属】	nijisanji
リゼ・ヘルエスタ -Lize Helesta-	nijisanji
アルス・アルマル -ars almal- 【にじさんじ】	nijisanji
戌亥とこ -Inui Toko-	nijisanji
Ayunda Risu Ch. hololive-ID	hololive_id
竜胆尊 / Rindou Mikoto	nijisanji
天野ピカミィ. Pikamee	voms
Pavolia Reine Ch. hololive-ID	hololive_id
Airani Iofifteen Channel hololive-ID	hololive_id
鷹宮リオン / Rion Takamiya	nijisanji
夢月ロア🌖Yuzuki Roa	nijisanji
Anya Melfissa Ch. hololive-ID	hololive_id
魔界ノりりむ	nijisanji
ぽちまる:POCHI-GOYA channel	independent
Roberu Ch. 夕刻ロベル	holostars
緋笠トモシカ - Tomoshika Hikasa -	voms
Shien Ch.影山シエン	holostars
ピーナッツくん!オシャレになりたい!	independent
Aruran Ch. アルランディス	holostars

Vtuber Superchat Data Analysis - Hololive Charts¶

The main data science process can be found here.¶

Data Management + Representation ¶

CHARTS ¶

Correlation between time of day and superchats ¶

HOLOLIVE JP CHARTS¶

HOLOLIVE EN CHARTS¶

HOLOLIVE ID CHARTS¶

Time of Day and Superchats: Summary ¶

Possible Future Work ¶

Video game streams - better or worse?¶

Views vs Superchats Findings, summarized ¶

Takeaways ¶

Future Possibilities ¶

Vtubers list¶

Vtuber Superchat Data Analysis - Hololive Charts¶

The main data science process can be found here.¶

Skip to here for charts. The following part is the code to set up the charts.¶

Data Management + Representation ¶

CHARTS¶

Correlation between time of day and superchats¶

HOLOLIVE JP CHARTS¶

HOLOLIVE EN CHARTS¶

HOLOLIVE ID CHARTS¶

Time of Day and Superchats: Summary¶

Possible Future Work¶

Video game streams - better or worse?¶

Views vs Superchats Findings, summarized¶

Takeaways¶

Future Possibilities¶

Vtubers list¶

CHARTS ¶

Correlation between time of day and superchats ¶

Time of Day and Superchats: Summary ¶

Possible Future Work ¶

Views vs Superchats Findings, summarized ¶

Takeaways ¶

Future Possibilities ¶