# Importing NumPy and Pandas to work with

import numpy as np
import pandas as pd

# Import seaborn to use for scatter plots later on
import seaborn.objects as so

# Define a function that sets axis labels and title for a given plot
# Default font size is 8, but can be overriden
def set_labels(plot, x, y, title, size=8):
    plot.set_xlabel(x, fontsize=size)
    plot.set_ylabel(y, fontsize=size)
    plot.set_title(title, fontsize=size);

# Read in dataset and look at the number of rows and columns

df_guns = pd.read_csv('Database_Ncis_and_Census_data/gun_data.csv')
df_guns.shape

(12485, 27)

# Look at the head to see what the table looks like

df_guns.head()

# Use the describe method to see some of the descriptive statistics for
#    this dataset

df_guns.describe()

# Use the info method to see data types and number of non-null values
#    in each column

df_guns.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12485 entries, 0 to 12484
Data columns (total 27 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   month                      12485 non-null  object 
 1   state                      12485 non-null  object 
 2   permit                     12461 non-null  float64
 3   permit_recheck             1100 non-null   float64
 4   handgun                    12465 non-null  float64
 5   long_gun                   12466 non-null  float64
 6   other                      5500 non-null   float64
 7   multiple                   12485 non-null  int64  
 8   admin                      12462 non-null  float64
 9   prepawn_handgun            10542 non-null  float64
 10  prepawn_long_gun           10540 non-null  float64
 11  prepawn_other              5115 non-null   float64
 12  redemption_handgun         10545 non-null  float64
 13  redemption_long_gun        10544 non-null  float64
 14  redemption_other           5115 non-null   float64
 15  returned_handgun           2200 non-null   float64
 16  returned_long_gun          2145 non-null   float64
 17  returned_other             1815 non-null   float64
 18  rentals_handgun            990 non-null    float64
 19  rentals_long_gun           825 non-null    float64
 20  private_sale_handgun       2750 non-null   float64
 21  private_sale_long_gun      2750 non-null   float64
 22  private_sale_other         2750 non-null   float64
 23  return_to_seller_handgun   2475 non-null   float64
 24  return_to_seller_long_gun  2750 non-null   float64
 25  return_to_seller_other     2255 non-null   float64
 26  totals                     12485 non-null  int64  
dtypes: float64(23), int64(2), object(2)
memory usage: 2.6+ MB

# I am interested to see what our values are in the 'month' column.
#    Where does the data start and end?

df_guns['month'].unique()

array(['2017-09', '2017-08', '2017-07', '2017-06', '2017-05', '2017-04',
       '2017-03', '2017-02', '2017-01', '2016-12', '2016-11', '2016-10',
       '2016-09', '2016-08', '2016-07', '2016-06', '2016-05', '2016-04',
       '2016-03', '2016-02', '2016-01', '2015-12', '2015-11', '2015-10',
       '2015-09', '2015-08', '2015-07', '2015-06', '2015-05', '2015-04',
       '2015-03', '2015-02', '2015-01', '2014-12', '2014-11', '2014-10',
       '2014-09', '2014-08', '2014-07', '2014-06', '2014-05', '2014-04',
       '2014-03', '2014-02', '2014-01', '2013-12', '2013-11', '2013-10',
       '2013-09', '2013-08', '2013-07', '2013-06', '2013-05', '2013-04',
       '2013-03', '2013-02', '2013-01', '2012-12', '2012-11', '2012-10',
       '2012-09', '2012-08', '2012-07', '2012-06', '2012-05', '2012-04',
       '2012-03', '2012-02', '2012-01', '2011-12', '2011-11', '2011-10',
       '2011-09', '2011-08', '2011-07', '2011-06', '2011-05', '2011-04',
       '2011-03', '2011-02', '2011-01', '2010-12', '2010-11', '2010-10',
       '2010-09', '2010-08', '2010-07', '2010-06', '2010-05', '2010-04',
       '2010-03', '2010-02', '2010-01', '2009-12', '2009-11', '2009-10',
       '2009-09', '2009-08', '2009-07', '2009-06', '2009-05', '2009-04',
       '2009-03', '2009-02', '2009-01', '2008-12', '2008-11', '2008-10',
       '2008-09', '2008-08', '2008-07', '2008-06', '2008-05', '2008-04',
       '2008-03', '2008-02', '2008-01', '2007-12', '2007-11', '2007-10',
       '2007-09', '2007-08', '2007-07', '2007-06', '2007-05', '2007-04',
       '2007-03', '2007-02', '2007-01', '2006-12', '2006-11', '2006-10',
       '2006-09', '2006-08', '2006-07', '2006-06', '2006-05', '2006-04',
       '2006-03', '2006-02', '2006-01', '2005-12', '2005-11', '2005-10',
       '2005-09', '2005-08', '2005-07', '2005-06', '2005-05', '2005-04',
       '2005-03', '2005-02', '2005-01', '2004-12', '2004-11', '2004-10',
       '2004-09', '2004-08', '2004-07', '2004-06', '2004-05', '2004-04',
       '2004-03', '2004-02', '2004-01', '2003-12', '2003-11', '2003-10',
       '2003-09', '2003-08', '2003-07', '2003-06', '2003-05', '2003-04',
       '2003-03', '2003-02', '2003-01', '2002-12', '2002-11', '2002-10',
       '2002-09', '2002-08', '2002-07', '2002-06', '2002-05', '2002-04',
       '2002-03', '2002-02', '2002-01', '2001-12', '2001-11', '2001-10',
       '2001-09', '2001-08', '2001-07', '2001-06', '2001-05', '2001-04',
       '2001-03', '2001-02', '2001-01', '2000-12', '2000-11', '2000-10',
       '2000-09', '2000-08', '2000-07', '2000-06', '2000-05', '2000-04',
       '2000-03', '2000-02', '2000-01', '1999-12', '1999-11', '1999-10',
       '1999-09', '1999-08', '1999-07', '1999-06', '1999-05', '1999-04',
       '1999-03', '1999-02', '1999-01', '1998-12', '1998-11'],
      dtype=object)

# Read in dataset and look at the number of rows and columns

df_census = pd.read_csv('Database_Ncis_and_Census_data/US_Census_Data.csv')
df_census.shape

(85, 52)

# Take a look at the head of the data to see how our table is organized

df_census.head()

# Use the info method to see the data type for each column and non-null
#    counts

df_census.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85 entries, 0 to 84
Data columns (total 52 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Fact            80 non-null     object
 1   Fact Note       28 non-null     object
 2   Alabama         65 non-null     object
 3   Alaska          65 non-null     object
 4   Arizona         65 non-null     object
 5   Arkansas        65 non-null     object
 6   California      65 non-null     object
 7   Colorado        65 non-null     object
 8   Connecticut     65 non-null     object
 9   Delaware        65 non-null     object
 10  Florida         65 non-null     object
 11  Georgia         65 non-null     object
 12  Hawaii          65 non-null     object
 13  Idaho           65 non-null     object
 14  Illinois        65 non-null     object
 15  Indiana         65 non-null     object
 16  Iowa            65 non-null     object
 17  Kansas          65 non-null     object
 18  Kentucky        65 non-null     object
 19  Louisiana       65 non-null     object
 20  Maine           65 non-null     object
 21  Maryland        65 non-null     object
 22  Massachusetts   65 non-null     object
 23  Michigan        65 non-null     object
 24  Minnesota       65 non-null     object
 25  Mississippi     65 non-null     object
 26  Missouri        65 non-null     object
 27  Montana         65 non-null     object
 28  Nebraska        65 non-null     object
 29  Nevada          65 non-null     object
 30  New Hampshire   65 non-null     object
 31  New Jersey      65 non-null     object
 32  New Mexico      65 non-null     object
 33  New York        65 non-null     object
 34  North Carolina  65 non-null     object
 35  North Dakota    65 non-null     object
 36  Ohio            65 non-null     object
 37  Oklahoma        65 non-null     object
 38  Oregon          65 non-null     object
 39  Pennsylvania    65 non-null     object
 40  Rhode Island    65 non-null     object
 41  South Carolina  65 non-null     object
 42  South Dakota    65 non-null     object
 43  Tennessee       65 non-null     object
 44  Texas           65 non-null     object
 45  Utah            65 non-null     object
 46  Vermont         65 non-null     object
 47  Virginia        65 non-null     object
 48  Washington      65 non-null     object
 49  West Virginia   65 non-null     object
 50  Wisconsin       65 non-null     object
 51  Wyoming         65 non-null     object
dtypes: object(52)
memory usage: 34.7+ KB

# Create new, clean dataset with only the columns we are interested in
# This new, clean, dataset has no null values

df_guns_clean = df_guns[['state', 'month', 'totals']].copy()

df_guns_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12485 entries, 0 to 12484
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   state   12485 non-null  object
 1   month   12485 non-null  object
 2   totals  12485 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 292.7+ KB

# In order to convert months to years, we first have to 
#    convert month strings to datetime type
df_guns_clean['month'] = pd.to_datetime(df_guns_clean['month'])

# Next, we can use the groupby and resample methods to find
#    totals for each year by state
df_guns_clean = df_guns_clean.groupby('state').resample('Y', on='month').sum(numeric_only=True)
df_guns_clean.reset_index(inplace=True)
df_guns_clean.insert(1, 'year', df_guns_clean['month'].dt.year)
df_guns_clean.drop(columns='month', inplace=True)

# Finally, we need to drop data from 1998 and 2017, as these are 
#    incomplete years

df_guns_clean.drop(df_guns_clean[df_guns_clean['year'] == 1998].index, inplace=True)
df_guns_clean.drop(df_guns_clean[df_guns_clean['year'] == 2017].index, inplace=True)

# View the clean dataset
df_guns_clean.head()

# Dropping Fact Note column, as this will not help with our questions
# dropping all NaNs since these pertain to irrelevant rows 
#    (notes included with census data)

df_census_clean = df_census.drop(columns='Fact Note').copy()
df_census_clean.dropna(inplace=True)

# Transpose columns and rows to make the data match up better with 
#    our other data. I am setting the index to "Fact" so that these
#    values will be our new columns. Finally, I am resetting the index
#    so that it does not index over state names and it removes "Fact" 
#    as the index label.

df_census_clean = df_census_clean.set_index('Fact').T.rename_axis(None, axis=1).reset_index()
df_census_clean.head()

# Rename state column to have proper label
df_census_clean.rename(columns={'index':'state'}, inplace=True)

# Now that the table has just the information needed, I can fix the 
#    datatypes for each column. Currently, everything is a string--
#    I want percents to be represented as floats, and integers as ints

for c in df_census_clean.columns[1:]:
    if 'percent' in c:
        # if the column label represents a percent, remove % symbol from values and change to type float
        df_census_clean[c] = df_census_clean[c].astype(str).str.replace('%', '').astype(float, errors='ignore')
    else:
        # if not a percent, take any commas out of the values and change to type int
        df_census_clean[c] = df_census_clean[c].astype(str).str.replace(',', '').astype(int, errors='ignore')

    
# Replace null values with the means
df_census_clean = df_census_clean.fillna(df_census_clean.mean(numeric_only=True))

# View the clean dataset
df_census_clean.head()

# Create a new dataframe that will contain the growth in gun
#    registrations from 1999 to 2016 for each state
df_guns_growth = pd.DataFrame()

# Create a 'state' column with only the unique values in our clean
#    dataframe. (USing
df_guns_growth['state'] = df_guns_clean['state'].unique()

# Reset the index to make our table a bit easier to read
df_guns_growth.reset_index(inplace=True, drop=True)

# Iterate through the columns and create a row in the new dataframe 
#    that contains the change from the value in 1999 to the value 
#    in 2016.
#    Note: Pandas will automatically match indeces when calculating
#    the difference, so I am using reset_index to ensure the indeces
#    from each query match up
df_guns_growth['growth'] = df_guns_clean.query('year==2016').reset_index()['totals'] - df_guns_clean.query('year==1999').reset_index()['totals']

# View this new dataset
df_guns_growth.describe()

# I am interested in states with highest total growth 
#    (I chose to select states with growth higher than the 90th quantile, as this narrows down the
#    pool to the top 6)
df_guns_high = df_guns_growth[df_guns_growth['growth'] > df_guns_growth['growth'].quantile(0.90)]

# Assigning this new dataframe to a filtered version of the clean data
#    containing only the states determined to have the highest growth
df_guns_high = df_guns_clean[df_guns_clean['state'].isin(df_guns_high['state'])]

# View all states in this new dataset
df_guns_high['state'].unique()

array(['California', 'Florida', 'Illinois', 'Indiana', 'Kentucky',
       'Texas'], dtype=object)

# Create graph plotting states against total growth
plot1 = df_guns_growth.plot(x='state', y='growth', kind='bar', width=0.8, fontsize=8, figsize=(10,5), legend=False, title='Gun Registration Growth from 1999 to 2016, by State')
set_labels(plot1, 'State', 'Increase in Gun Registration', 'Gun Registration Growth from 1999 to 2016, by State');

# Creating a pivot table to set year column against totals column, 
#    adding a new line for each state
plot2 = pd.pivot_table(df_guns_high.set_index('year'),values='totals', index=['year'],columns=['state'])

# Creating a plot from the new pivot table and tweaking attributes for
#    better readability
plot2 = plot2.plot(colormap='viridis', figsize=(10, 5), xticks=df_guns_high['year'], fontsize=8);
set_labels(plot2,'Year', 'Total Number of Gun Registrations', 'Gun Registrations per Year in States with High Growth');

# Create new list that we will fill with our guns per capita values
guns_per_cap = []

# Iterate through the rows in our census data and calculate guns per capita (2016)
for r in range(len(df_census_clean)):
    # Find gun total from gun dataframe that matches the current state in census
    #    dataframe and is from the year 2016
    guns = df_guns_clean.loc[(df_guns_clean['state'] == df_census_clean.iloc[r]['state']) & (df_guns_clean['year'] == 2016)]['totals'].item()
    # Calculate guns per capita by dividing gun totals by population in 2016
    #    and add this value to our list
    guns_per_cap.append(guns / df_census_clean.iloc[:, 1][r]) 

# Assign our list to a new column in the dataframe
df_census_clean['gun per capita'] = guns_per_cap

# View descriptive statistics for this new column
df_census_clean['gun per capita'].describe()

count    50.000000
mean      0.097695
std       0.111928
min       0.011677
25%       0.060872
50%       0.083753
75%       0.102326
max       0.828683
Name: gun per capita, dtype: float64

# The max value is much larger than the mean; We can create a boxplot to view the distribution
#    and any outliers
boxplot = df_census_clean['gun per capita'].plot.box()
boxplot.set_xticklabels([])
set_labels(boxplot, 'All States', 'Gun per Capita', 'Distribution of Gun per Capita for All States');

# find states with 'guns per capita' value larger than the mean to see which states have highest
#    guns per capita. (I chose to select states with gun per capita higher than the 90th quantile, 
#    as this narrows down the pool to the top 5)
df_census_high = df_census_clean[df_census_clean['gun per capita'] > df_census_clean['gun per capita'].quantile(0.90)]
df_census_high[['state', 'gun per capita']]

# Create a new dataframe that stores the correlation coefficient of guns per 
#    capita against other census data (ignoring the last row, which is a self 
#    comparison)
df_assoc = pd.DataFrame(df_census_high.corr(numeric_only=True)['gun per capita'][:-2])
df_assoc.rename_axis('census data', inplace=True)

# Create a bar graph showing the correlation for each piece of census data
# I set the y-axis range from -1 to 1, as this is the full possible range of correlation coefficients
assoc_plot = df_assoc.plot(kind='bar', figsize=(10, 5), fontsize=6)
assoc_plot.set_ylim(-1, 1)
set_labels(assoc_plot, 'Census Data', 'Correlation Coefficient', 'Relationship Between Census Data and High Guns per Capita');

# Create list of columns we are interested in; This includes the columns with
#    a correlation coefficient larger than 0.5 (or smaller than -0.5), as these
#    indicate at least moderate strength
cols_to_keep = (df_assoc['gun per capita'][df_assoc['gun per capita'].abs() > 0.5]).index.tolist()
# We also want to keep the gun per capita column
cols_to_keep.append('gun per capita')

# Create a new dataframe using our list of columns to keep
df_census_corr = df_census_high[cols_to_keep]

# Create scatterplots to for each of these columns to see a visual representation
#    of the relationships (below)

plot = sns.regplot(df_census_corr, x='gun per capita', y=cols_to_keep[0], ci=None);
simple_label = cols_to_keep[0].split(',')[0]
set_labels(plot, 'Gun per Capita', cols_to_keep[0], simple_label + " vs Gun per Capita", 7)

plot = sns.regplot(df_census_corr, x='gun per capita', y=cols_to_keep[1], ci=None);
simple_label = cols_to_keep[1].split(',')[0]
set_labels(plot, 'Gun per Capita', cols_to_keep[1], simple_label + " vs Gun per Capita", 7)

plot = sns.regplot(df_census_corr, x='gun per capita', y=cols_to_keep[2], ci=None);
simple_label = cols_to_keep[2].split(',')[0]
set_labels(plot, 'Gun per Capita', cols_to_keep[2], simple_label + " vs Gun per Capita", 7)

# Running this cell will execute a bash command to convert this notebook to an .html file
!python -m nbconvert --to html Investigate_a_Dataset.ipynb

[NbConvertApp] Converting notebook Investigate_a_Dataset.ipynb to html
[NbConvertApp] WARNING | Alternative text is missing on 7 image(s).
[NbConvertApp] Writing 810348 bytes to Investigate_a_Dataset.html

	permit	permit_recheck	handgun	long_gun	other	multiple	admin	prepawn_handgun	prepawn_long_gun	prepawn_other	...	returned_other	rentals_handgun	rentals_long_gun	private_sale_handgun	private_sale_long_gun	private_sale_other	return_to_seller_handgun	return_to_seller_long_gun	return_to_seller_other	totals
count	12461.000000	1100.000000	12465.000000	12466.000000	5500.000000	12485.000000	12462.000000	10542.000000	10540.000000	5115.000000	...	1815.000000	990.000000	825.000000	2750.000000	2750.000000	2750.000000	2475.000000	2750.000000	2255.000000	12485.000000
mean	6413.629404	1165.956364	5940.881107	7810.847585	360.471636	268.603364	58.898090	4.828021	7.834156	0.165591	...	1.027548	0.076768	0.087273	14.936000	11.602909	1.030182	0.402020	0.441818	0.105987	21595.725911
std	23752.338269	9224.200609	8618.584060	9309.846140	1349.478273	783.185073	604.814818	10.907756	16.468028	1.057105	...	4.386296	0.634503	0.671649	71.216021	54.253090	4.467843	1.446568	1.528223	0.427363	32591.418387
min	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	0.000000	0.000000	865.000000	2078.250000	17.000000	15.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	4638.000000
50%	518.000000	0.000000	3059.000000	5122.000000	121.000000	125.000000	0.000000	0.000000	1.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	12399.000000
75%	4272.000000	0.000000	7280.000000	10380.750000	354.000000	301.000000	0.000000	5.000000	8.000000	0.000000	...	0.000000	0.000000	0.000000	2.000000	4.000000	0.000000	0.000000	0.000000	0.000000	25453.000000
max	522188.000000	116681.000000	107224.000000	108058.000000	77929.000000	38907.000000	28083.000000	164.000000	269.000000	49.000000	...	64.000000	12.000000	12.000000	1017.000000	777.000000	71.000000	28.000000	17.000000	4.000000	541978.000000

	Fact	Fact Note	Alabama	Alaska	Arizona	Arkansas	California	Colorado	Connecticut	Delaware	...	South Dakota	Tennessee	Texas	Utah	Vermont	Virginia	Washington	West Virginia	Wisconsin	Wyoming
0	Population estimates, July 1, 2016, (V2016)	NaN	4,863,300	741,894	6,931,071	2,988,248	39,250,017	5,540,545	3,576,452	952,065	...	865454	6651194	27,862,596	3,051,217	624,594	8,411,808	7,288,000	1,831,102	5,778,708	585,501
1	Population estimates base, April 1, 2010, (V2...	NaN	4,780,131	710,249	6,392,301	2,916,025	37,254,522	5,029,324	3,574,114	897,936	...	814195	6346298	25,146,100	2,763,888	625,741	8,001,041	6,724,545	1,853,011	5,687,289	563,767
2	Population, percent change - April 1, 2010 (es...	NaN	1.70%	4.50%	8.40%	2.50%	5.40%	10.20%	0.10%	6.00%	...	0.063	0.048	10.80%	10.40%	-0.20%	5.10%	8.40%	-1.20%	1.60%	3.90%
3	Population, Census, April 1, 2010	NaN	4,779,736	710,231	6,392,017	2,915,918	37,253,956	5,029,196	3,574,097	897,934	...	814180	6346105	25,145,561	2,763,885	625,741	8,001,024	6,724,540	1,852,994	5,686,986	563,626
4	Persons under 5 years, percent, July 1, 2016, ...	NaN	6.00%	7.30%	6.30%	6.40%	6.30%	6.10%	5.20%	5.80%	...	0.071	0.061	7.20%	8.30%	4.90%	6.10%	6.20%	5.50%	5.80%	6.50%

	state	Population estimates, July 1, 2016, (V2016)	Population estimates base, April 1, 2010, (V2016)	Population, percent change - April 1, 2010 (estimates base) to July 1, 2016, (V2016)	Population, Census, April 1, 2010	Persons under 5 years, percent, July 1, 2016, (V2016)	Persons under 5 years, percent, April 1, 2010	Persons under 18 years, percent, July 1, 2016, (V2016)	Persons under 18 years, percent, April 1, 2010	Persons 65 years and over, percent, July 1, 2016, (V2016)	...	All firms, 2012	Men-owned firms, 2012	Women-owned firms, 2012	Minority-owned firms, 2012	Nonminority-owned firms, 2012	Veteran-owned firms, 2012	Nonveteran-owned firms, 2012	Population per square mile, 2010	Land area in square miles, 2010	FIPS Code
0	Alabama	4863300	4780131	1.7	4779736	6.0	6.4	22.6	23.7	16.1	...	374153	203604	137630	92219	272651	41943	316984	94.4	50645.33	"01"
1	Alaska	741894	710249	4.5	710231	7.3	7.6	25.2	26.4	10.4	...	68032	35402	22141	13688	51147	7953	56091	1.2	570640.95	"02"
2	Arizona	6931071	6392301	8.4	6392017	6.3	7.1	23.5	25.5	16.9	...	499926	245243	182425	135313	344981	46780	427582	56.3	113594.08	"04"
3	Arkansas	2988248	2916025	2.5	2915918	6.4	6.8	23.6	24.4	16.3	...	231959	123158	75962	35982	189029	25915	192988	56	52035.48	"05"
4	California	39250017	37254522	5.4	37253956	6.3	6.8	23.2	25.0	13.6	...	3548449	1852580	1320085	1619857	1819107	252377	3176341	239.1	155779.22	"06"

Project: NICS Firearm Background Check Data¶

Table of Contents¶

Introduction¶

Dataset Description¶

Questions for Analysis¶

Import statements¶

Data Wrangling¶

Data Assessment¶

FBI Gun Background Check Data¶

U.S. Census Data¶

Data Cleaning¶

Gun Data¶

Census Data¶

Exploratory Data Analysis¶

1. Which states have had the highest growth in gun registrations?¶

Highest total growth from 1999 to 2016¶

Growth in registrations by year¶

2. What census data is most associated with high gun per capita?¶

Finding guns per capita¶

Examine correlation between high gun per capita and other data¶

Conclusions¶

1. Which states have had the highest growth in gun registrations?¶

2. What census data is most associated with high gun per capita?¶

	month	state	permit	permit_recheck	handgun	long_gun	other	multiple	admin	prepawn_handgun	...	private_sale_handgun	private_sale_long_gun	private_sale_other	return_to_seller_long_gun	return_to_seller_other	totals
0	2017-09	Alabama	16717.0	0.0	5734.0	6320.0	221.0	317	0.0	15.0	...	9.0	16.0	3.0	0.0	3.0	32019
1	2017-09	Alaska	209.0	2.0	2320.0	2930.0	219.0	160	0.0	5.0	...	17.0	24.0	1.0	0.0	0.0	6303
2	2017-09	Arizona	5069.0	382.0	11063.0	7946.0	920.0	631	0.0	13.0	...	38.0	12.0	2.0	0.0	0.0	28394
3	2017-09	Arkansas	2935.0	632.0	4347.0	6063.0	165.0	366	51.0	12.0	...	13.0	23.0	0.0	2.0	1.0	17747
4	2017-09	California	57839.0	0.0	37165.0	24581.0	2984.0	0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	123506

	state	year	totals
1	Alabama	1999	246756
2	Alabama	2000	221911
3	Alabama	2001	230187
4	Alabama	2002	221008
5	Alabama	2003	225479

	growth
count	5.500000e+01
mean	3.342139e+05
std	5.550837e+05
min	-3.400000e+01
25%	6.386600e+04
50%	1.643500e+05
75%	3.450030e+05
max	3.428635e+06

	state	gun per capita
12	Illinois	0.150300
13	Indiana	0.216601
16	Kentucky	0.828683
25	Montana	0.130776
47	West Virginia	0.132352