__author__ = 'Thomaz L Santana' import urllib.request import functools import ast import numpy as np import matplotlib.pyplot as plt class Census: def __init__(self, key): self.key = key def get(self, fields, geo, year=2013, dataset='acs5'): fields = [','.join(fields)] base_url = 'http://api.census.gov/data/%s/%s?key=%s&get=' % (str(year), dataset, self.key) query = fields for item in geo: query.append(item) add_url = '&'.join(query) url = base_url + add_url req = urllib.request.Request(url) response = urllib.request.urlopen(req) return ast.literal_eval(response.read().decode('utf8')) c = Census('YOUR_KEY_HERE') # How tedious... variables at http://api.census.gov/data/2013/acs5/variables.html # Male never married: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 # Male widowed: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 # Male divorced: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 # Male married but separated: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 # Female never married: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 # Female widowed: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 # Female divorced: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 # Female married but separated: 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64 single_data_m = ['B12002_006E', 'B12002_007E', 'B12002_008E', 'B12002_009E', 'B12002_010E', 'B12002_011E', 'B12002_012E', 'B12002_013E', 'B12002_014E', 'B12002_068E', 'B12002_069E', 'B12002_070E', 'B12002_071E', 'B12002_072E', 'B12002_073E', 'B12002_074E', 'B12002_075E', 'B12002_076E', 'B12002_083E', 'B12002_084E', 'B12002_085E', 'B12002_086E', 'B12002_087E', 'B12002_088E', 'B12002_089E', 'B12002_090E', 'B12002_091E', 'B12002_038E', 'B12002_039E', 'B12002_040E', 'B12002_041E', 'B12002_042E', 'B12002_043E', 'B12002_044E', 'B12002_045E', 'B12002_046E'] single_data_f = ['B12002_099E', 'B12002_100E', 'B12002_101E', 'B12002_102E', 'B12002_103E', 'B12002_104E', 'B12002_105E', 'B12002_106E', 'B12002_107E', 'B12002_161E', 'B12002_162E', 'B12002_163E', 'B12002_164E', 'B12002_165E', 'B12002_166E', 'B12002_167E', 'B12002_168E', 'B12002_169E', 'B12002_176E', 'B12002_177E', 'B12002_178E', 'B12002_179E', 'B12002_180E', 'B12002_181E', 'B12002_182E', 'B12002_183E', 'B12002_184E', 'B12002_131E', 'B12002_132E', 'B12002_133E', 'B12002_134E', 'B12002_135E', 'B12002_136E', 'B12002_137E', 'B12002_138E', 'B12002_139E'] # Male 20, 21, 22-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-61, 62-64 gender_data = ['B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E', 'B01001_012E', 'B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E', 'B01001_018E', 'B01001_019E', 'B01001_032E', 'B01001_033E', 'B01001_034E', 'B01001_035E', 'B01001_036E', 'B01001_037E', 'B01001_038E', 'B01001_039E', 'B01001_040E', 'B01001_041E', 'B01001_042E', 'B01001_043E'] result = c.get(single_data_m, ['for=us:*']) result2 = c.get(single_data_f, ['for=us:*']) result3 = c.get(gender_data, ['for=us:*']) m20 = int(result3[1][0]) + int(result3[1][1]) + int(result3[1][2]) m25 = int(result3[1][3]) m30 = int(result3[1][4]) m35 = int(result3[1][5]) m40 = int(result3[1][6]) m45 = int(result3[1][7]) m50 = int(result3[1][8]) m55 = int(result3[1][9]) m60 = int(result3[1][10]) + int(result3[1][11]) f20 = int(result3[1][12]) + int(result3[1][13]) + int(result3[1][14]) f25 = int(result3[1][15]) f30 = int(result3[1][16]) f35 = int(result3[1][17]) f40 = int(result3[1][18]) f45 = int(result3[1][19]) f50 = int(result3[1][20]) f55 = int(result3[1][21]) f60 = int(result3[1][22]) + int(result3[1][23]) m_all = [m20, m25, m30, m35, m40, m45, m50, m55, m60] f_all = [f20, f25, f30, f35, f40, f45, f50, f55, f60] # print(result) data = [result[1][x] for x in range(0, 28, 9)] # Male 20-24: never married, widowed, divorced, married but separated m_single20 = functools.reduce(lambda x, y: int(x)+int(y), data) # Sum it up data = [result[1][x] for x in range(1, 29, 9)] m_single25 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result[1][x] for x in range(2, 30, 9)] m_single30 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result[1][x] for x in range(3, 31, 9)] m_single35 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result[1][x] for x in range(4, 32, 9)] m_single40 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result[1][x] for x in range(5, 33, 9)] m_single45 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result[1][x] for x in range(6, 34, 9)] m_single50 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result[1][x] for x in range(7, 35, 9)] m_single55 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result[1][x] for x in range(8, 36, 9)] m_single60 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(0, 28, 9)] f_single20 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(1, 29, 9)] f_single25 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(2, 30, 9)] f_single30 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(3, 31, 9)] f_single35 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(4, 32, 9)] f_single40 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(5, 33, 9)] f_single45 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(6, 34, 9)] f_single50 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(7, 35, 9)] f_single55 = functools.reduce(lambda x, y: int(x)+int(y), data) data = [result2[1][x] for x in range(8, 36, 9)] f_single60 = functools.reduce(lambda x, y: int(x)+int(y), data) m_single = [m_single20, m_single25, m_single30, m_single35, m_single40, m_single45, m_single50, m_single55, m_single60] f_single = [f_single20, f_single25, f_single30, f_single35, f_single40, f_single45, f_single50, f_single55, f_single60] bin_names = ('20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64') bins = 9 ind = np.arange(bins) # the x locations for the groups width = 0.35 # the width of the bars fig, ax = plt.subplots() rects1 = ax.bar(ind, m_single, width, color='blue') rects11 = ax.bar(ind, m_all, width, color='blue', alpha=0.3) rects2 = ax.bar(ind+width, f_single, width, color='pink') rects22 = ax.bar(ind+width, f_all, width, color='pink', alpha=0.3) # add some text for labels, title and axes ticks ax.set_ylabel('Population (Millions)') ax.set_xlabel('Age') ax.set_title('Gender Differences in Population and Singles By Age (2013 Census acs5)') ax.set_xticks(ind+width) ax.set_xticklabels(bin_names) ax.grid() ax.set_ylim([0, 1.7*10**7]) ax.set_yticklabels([x*2 for x in range(0, 17, 1)]) ax.legend( (rects1[0], rects11[0], rects2[0], rects22[0]), ('Single Men', 'All Men', 'Single Women', 'All Women') ) # plt.show() plt.savefig('Gender Differences in Population and Singles By Age.png'.replace(' ', '_'))