import numpy as np
import pandas as pd
from scipy.stats import pearsonr, chi2_contingency
import matplotlib.pyplot as plt
import seaborn as sns

np.set_printoptions(suppress=True, precision = 2)

nba = pd.read_csv('nba_games.csv')

# Subset Data to 2010 Season, 2014 Season
nba_2010 = nba[nba.year_id == 2010]
nba_2014 = nba[nba.year_id == 2014]

print(nba_2010.head())
print(nba_2014.head())

            game_id  year_id  fran_id      opp_fran game_location  \
21717  200910270CLE     2010  Celtics     Cavaliers             A   
21718  200910280BOS     2010  Celtics       Hornets             H   
21719  200910280MIA     2010   Knicks          Heat             A   
21720  200910280MIN     2010     Nets  Timberwolves             A   
21721  200910280OKC     2010  Thunder         Kings             H   

       is_playoffs  pts  opp_pts game_result  forecast  point_diff  
21717            0   95       89           W  0.277472           6  
21718            0   92       59           W  0.814619          33  
21719            0   93      115           L  0.258755         -22  
21720            0   93       95           L  0.475155          -2  
21721            0  102       89           W  0.716764          13  
            game_id  year_id  fran_id   opp_fran game_location  is_playoffs  \
23468  201310300CLE     2014     Nets  Cavaliers             A            0   
23469  201310300NYK     2014   Knicks      Bucks             H            0   
23470  201310300SAS     2014    Spurs  Grizzlies             H            0   
23471  201310300TOR     2014  Celtics    Raptors             A            0   
23472  201310300UTA     2014  Thunder       Jazz             A            0   

       pts  opp_pts game_result  forecast  point_diff  
23468   94       98           L  0.611981          -4  
23469   90       83           W  0.793150           7  
23470  101       94           W  0.692980           7  
23471   87       93           L  0.361233          -6  
23472  101       98           W  0.526056           3

#2010 points compared between Knicks and Nets through overlapping histograms
knicks_pts_10 = nba_2010[nba_2010.fran_id == 'Knicks']['pts']
nets_pts_10 = nba_2010[nba_2010.fran_id == 'Nets']['pts']
diff_means_2010 = np.mean(knicks_pts_10) - np.mean(nets_pts_10)
print(diff_means_2010)

plt.hist(knicks_pts_10, alpha=0.5, density=True, label='Knicks')
plt.hist(nets_pts_10, alpha=0.5, density=True, label='Nets')
plt.legend()
plt.title('2010 Season')
plt.show()

plt.clf()

9.731707317073173

<Figure size 640x480 with 0 Axes>

# 2014 points compared between Knicks and Nets through overlapping histograms
knicks_pts_14 = nba_2014[nba_2014.fran_id == 'Knicks']['pts']
nets_pts_14 = nba_2014[nba_2014.fran_id == 'Nets']['pts']
diff_means_2014 = np.mean(knicks_pts_14) - np.mean(nets_pts_14)
print(diff_means_2014)

plt.hist(knicks_pts_14, alpha=0.5, density=True, label='Knicks')
plt.hist(nets_pts_14, alpha=0.5, density=True, label='Nets')
plt.legend()
plt.title('2014 Season')
plt.show()

plt.clf()

0.44706798131809933

<Figure size 640x480 with 0 Axes>

# Boxplot of Season 2010
sns.boxplot(data=nba_2010, x='fran_id', y='pts')
plt.show()

# Table of frequencies (contingency) of results home or away
location_results_freq = pd.crosstab(nba_2010.game_result, nba_2010.game_location)
print(location_results_freq)

game_location    A    H
game_result            
L              133  105
W               92  120

# Table of proportions of results
location_result_proportions = location_results_freq / len(nba_2010)
print(location_result_proportions)

game_location         A         H
game_result                      
L              0.295556  0.233333
W              0.204444  0.266667

# Expected contingency table and Chi-Square
chi2, pval, dof, expected = chi2_contingency(location_results_freq)
print(expected)
print(chi2)

[[119. 119.]
 [106. 106.]]
6.501704455367053

# Covariance and correlation between the forecast and point difference calculated by 'FiveThirtyEight'
point_diff_forecast_cov = np.cov(nba_2010.forecast, nba_2010.point_diff)
print(point_diff_forecast_cov)

point_diff_forecast_corr = pearsonr(nba_2010.forecast, nba_2010.point_diff)
print(point_diff_forecast_corr)

[[  0.05   1.37]
 [  1.37 186.56]]
PearsonRResult(statistic=0.4402088708468081, pvalue=9.41039157313972e-23)

plt.clf()
plt.scatter(nba_2010['forecast'], nba_2010['point_diff'])
plt.xlabel('Forecasted win prob.')
plt.ylabel('Point differential')
plt.show()

avg_pts = (nba[nba.fran_id.isin(["Knicks", "Nets"]) & nba.year_id.isin([2010,2014])].groupby(["year_id","fran_id"])["pts"].mean().reset_index())

sns.barplot(data=avg_pts, x="year_id", y="pts", hue="fran_id")
plt.title("Average Points per Game (2010 vs 2014)")
plt.ylabel("Average Points")
plt.xlabel("Season")
plt.legend(title="Team") 
plt.show()

sns.boxplot(
    data=nba[nba.fran_id.isin(["Knicks", "Nets"]) & nba.year_id.isin([2010,2014])],
    x="year_id", y="point_diff", hue="fran_id"
)
plt.title("Point Differential Distribution (2010 vs 2014)")
plt.ylabel("Point Difference per Game")
plt.xlabel("Season")
plt.legend(title="Team")
plt.show()

NBA Game Analysis: Knicks vs Nets (2010 & 2014)¶

1. Import Libraries & Load Data¶

2. Data Overview¶

3. Data Analysis & Visualizations¶

Knicks vs Nets Points Distribution (2010)¶

Chi-Square Test¶

Covariance & Correlation¶

Scatterplot of Forecast vs Point Difference¶

Barplot of average points¶

4. Knicks vs Nets Points Distribution (2014)¶

4. Conclusions¶