Data currently comes from 2016-09-14, patch 6.17
import json, pandas, requests, re, os.path, sys
from IPython.core.display import display, HTML
idx = pandas.IndexSlice
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns;
sns.set_palette('deep')
#import mpld3, mpld3.plugins; mpld3.enable_notebook()
Lazily download a Wayback Machine archive of http://champion.gg/statistics/ and extract the data blob that makes up the table
json_file = 'championgg.json'
def get_json_data(url='https://web.archive.org/web/20160914110810/http://champion.gg/statistics/'):
"Extract the JSON data from a champion.gg statistics page"
# The data is one line of Javascript setting the variable matchupData.stats
parse_re = re.compile(r'^\s+matchupData\.stats = (\[.*\]);')
r = requests.get(url)
for l in r.text.split('\n'):
m = parse_re.match(l)
if m:
return m.group(1)
# Download the file if necessary
if not os.path.isfile(json_file):
with open(json_file, 'w') as fp:
fp.write(get_json_data())
with open(json_file) as fp:
data_blob = json.load(fp)
column_names = [
'winPercent',
'playPercent',
'banRate',
'experience',
'kills',
'deaths',
'assists',
'largestKillingSpree',
'totalDamageDealtToChampions',
'totalDamageTaken',
'totalHeal',
'minionsKilled',
'neutralMinionsKilledEnemyJungle',
'neutralMinionsKilledTeamJungle',
'goldEarned',
'overallPosition',
'overallPositionChange',
]
d = []
for datum in data_blob:
row = [datum['title'], datum['role']]
row.extend (datum['general'][n] for n in column_names)
d.append(row)
df = pandas.DataFrame(d,
columns=['Champion', 'Role'] + column_names)
df = df.set_index(['Champion', 'Role'])
df.insert(7, 'KDA', (df.kills + df.assists) / df.deaths)
df.sort_values('playPercent', ascending=False, inplace=True)
df.head(4)
for role in df.index.levels[1]:
subset = df.xs(role, level='Role')
display(HTML('<b>%s</b> %d<br>%s' % (role, len(subset), ' • '.join(subset.index))))
combine = df.mean(level=1).applymap(lambda n: "%.2f" % n) + df.std(level=1).applymap(lambda m: " ±%.2f" % m)
combine.transpose()
sns.jointplot(x='winPercent', y='playPercent', data=df, size=6, ylim=(0,35), xlim=(40,60), kind="scatter")
if 'mpld3' in sys.modules:
labels = list("%s: %s<br>Win: %.2f%%<br>Play: %.2f%%<br>" % (x[0], x[1], df['winPercent'][x], df['playPercent'][x]) for x in df.index)
tooltip = mpld3.plugins.PointHTMLTooltip(points, labels, css=".mpld3-tooltip { background-color: #fff }")
mpld3.plugins.connect(plt.gcf(), tooltip)
sns.jointplot(x='experience', y='winPercent', data=df, size=6, kind="scatter")
fig, axs = plt.subplots(figsize=(10,6), ncols=4, nrows=4)
for i, c in enumerate(['KDA',] + column_names[:-2]):
row, col = int(i/4), i%4
g = sns.distplot(df[c], ax=axs[row, col])
g.set(yticklabels=[], xticklabels=[])
fig.tight_layout(pad=0)
subset = df.filter(('experience', 'winPercent', 'KDA', 'goldEarned', 'totalDamageDealtToChampions'))
sns.pairplot(subset)
role
value column back to the DataFrame, from the index.¶df['role'] = df.index.get_level_values(level=1)
variables = ('playPercent', 'winPercent', 'experience', 'KDA', 'goldEarned', 'totalDamageDealtToChampions')
fig, axs = plt.subplots(figsize=(7, len(variables)*4), ncols=1, nrows=len(variables))
for i, c in enumerate(variables):
sns.violinplot(y=c, x='role', data=df, inner=None, ax=axs[i]).set(xlabel="", ylabel="", title=c)
sns.swarmplot(y=c, x='role', data=df, color="w", linewidth=0, size=2.5, ax=axs[i]).set(xlabel="", ylabel="", title=c)
fig.tight_layout(pad=0, h_pad=1)