Global Anestehsia Workforce Analysis

What is this notebook?

This notebook is the jupyter version of the streamlit app. It should use the same base data set but perhaps perform different calculations to iterate more quickly than on streamlit.

Code

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
# pio.renderers.default = "notebook_connected+png"
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from itables import show
import seaborn.objects as so
import seaborn as sns

Code

df = pd.read_csv('analysis (korea and mkd).csv')
verification = pd.read_csv('verification.csv')
df_clean = df.copy() #unmodified dataframe without corrections

import gspread
client = gspread.service_account('generic-project-for-api.json')
sheet = client.open_by_url('https://docs.google.com/spreadsheets/d/1a_JIqrAgOhP5gQk3Xw0V_SqvT3imofDabAbuqABpikg/')
worksheet = sheet.get_worksheet_by_id(0)
data = worksheet.get_all_records()
df2 = pd.DataFrame(data)

url = 'https://docs.google.com/spreadsheets/d/1f6v1cGXgiQibiFMsvtVDjOfTPob-5aUrLaA4jy_Cu9I/'
sheet = client.open_by_url(url)
worksheet = sheet.get_worksheet_by_id(476816199)
vf = worksheet.get_all_records()
verification_df = pd.DataFrame(vf)

print(df[df['population'].isnull() & df['country'].notnull()][['country','population']])

countrypops = {
    'Taiwan, Republic of China' : 235.70000,
    'Cook Islands': .17564,
    'Somaliland' : 42.00000,
    'Niue' : .01626,
    'Tokelau' : .01357,
    'Eritrea': 35.46421
}
for i in countrypops:
    df['population'] = df.apply(lambda x: countrypops[i] if x['country'] == i else x['population'], axis=1)

##################################physician specialists
cert_spec_verifications = {
    'Chile':2131, 
    'Peru':2850,
    'Guatemala':554,
    'El Salvador':143,
    'Austria':2650,
    'Jordan':474,
    'Cyprus':207,
    'Switzerland':2016,
    'Timor-Leste':12,
    'Solomon Islands':3,
    'Samoa':3,
    'Burundi':4,
    'Turkey': 8570,
    'Spain':8000,
    'Indonesia':4134,
    'Morocco':830,
    'Colombia': 4148, #per Pedro after confirming with SCARE
    }

# function to take a dictionary of changes, and return the new number to the row
def verify_update(row, country, number):
    if country == row['country']:
        return number
    else:
        return row['certified_specialist']

# make changes to certified specialists
for i in cert_spec_verifications:
    df['certified_specialist'] = df.apply(verify_update, axis=1, args=(i,cert_spec_verifications.get(i)))

# correct USA in 2015
df.loc[df['country']=='United States of America','physicians2015'] = 47500

##################################NONspecialist physicians

nonspec_phys_verifications= {
    'Switzerland':679,
    'Solomon Islands':4,
    'Samoa':1,
    'Turkey':0,
    'Guatemala':0
}

for i in nonspec_phys_verifications:
    df['provider_number'] = df.apply(lambda x: nonspec_phys_verifications.get(i) if x['country']==i else x['provider_number'], axis=1)

##################################nurses specialists
nurse_num_verifications = {
    'Greece':1050,
    'Thailand':5000,
    'China':28200,
    'Saudi Arabia':5,
    'Indonesia':4000,
    'Serbia':700,
    'Solomon Islands':4,
    'Turkey':6486,
    'Australia':0,
    'Finland':3000,
    'Liberia':90,
    'Croatia': 1000,
    'Luxembourg': 409,
    'Germany':1400,
    'Austria':130,
    'Korea (Republic of)':680 # filled in 680 for nurse 1, nurse 2, and nurse 3. shouldy only be entered once
}

#make changes to nurse numbers
for i in nurse_num_verifications:
    df['nurse_num'] = df.apply(lambda x: nurse_num_verifications.get(i) if x['country']==i else x['nurse_num'], axis=1)

# thailand special case, reported nurses and non nurses, but in fact they are all nurse anesthetists and total about 5000
# Turkey has certified nurse technicians as 'other' , but they are really non-physician providers
# 
# set thailand nurse anesthetists to 5000 above, set all other NPA categories to nan
# set turkey technicians to 6486 above, set all other NPA categories to nan
# move finland other1 providers to nurses, set above, remove from other1 below
# same for croatia, move 1000 to nurses
# set korea nurses to 680 above, and remove all other nurse cadres to nan
for i in ['nurse_num2', 'nurse_num3', 'nurse_num4', 'other_1_numbers']:
    df[i] = df.apply(lambda x: np.nan if x['country']=='Thailand' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Turkey' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Finland' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Croatia' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Korea (Republic of)' else x[i], axis=1)


# somaliland in 2015 should have ben 40 nurses

                       country  population
53   Taiwan, Republic of China         NaN
134               Cook Islands         NaN
144                 Somaliland         NaN
148                       Niue         NaN
149                    Tokelau         NaN
184                    Eritrea         NaN
185                    Eritrea         NaN

Code

###########################################
############## variable calculation
############## defining column orders and map colors
###########################################)

isolist = df[['country', 'code3']].drop_duplicates()

# create bins for per capita figure
def percapitabins(row):
    if row['totalpap_cap'] >= 20:
        return ">= 20"
    elif row['totalpap_cap'] >=15 and row['totalpap_cap']<20:
        return "15-20"
    elif row['totalpap_cap'] >=10 and row['totalpap_cap']<15:
        return "10-15"
    elif row['totalpap_cap'] >=6 and row['totalpap_cap']<10:
        return "6-10"
    elif row ['totalpap_cap'] >=3 and row['totalpap_cap'] <6:
        return "3-6"
    elif row['totalpap_cap'] >= 1 and row['totalpap_cap'] <3:
        return "1-3"
    elif row['totalpap_cap'] <1:
        return "<1"
    else:
        return "no data"

catorder = {"totalpap_cap_bins":['>= 20','15-20', '10-15','6-10', '3-6', '1-3','<1','no data']}

colmap = {'>= 20':'#4a9563',
'15-20': '#87e3a6', 
'10-15': '#c0f1d0',
'6-10': '#d4f4df', 
'3-6': '#dcc127', 
'1-3': '#fe8a0e',
'<1': '#dd1e39',
'no data': 'gray'
}

# field conversion to numeric
dflist = [df, df_clean]

for i in dflist:
    i['nurses2015'] = pd.to_numeric(i['nurses2015'], errors='coerce')
    i['nurses2015_cap'] = pd.to_numeric(i['nurses2015_cap'], errors='coerce')

# create verification categories:
def verificationstatus(row):
    if row['Verified: you agree this number should be published'] == "TRUE":
        return "Verified"
    elif row['Have sent verification email'] == "TRUE" and row['Verified: you agree this number should be published'] == "FALSE":
        return "Attempted verification"
    else:
        return "Verification not required"

#Group for individual country analysis
dfg = df.groupby(['country','Region','wbincome']).mean(numeric_only=True)
dfg = dfg.reset_index()

#Group by World Bank income group
wbc = dfg.groupby('wbincome').sum(numeric_only=True, min_count=1)
wbc = wbc.reset_index()

#Group by WHO Regional group
who = dfg.groupby('Region').sum(numeric_only=True, min_count=1)
who = who.reset_index()

#Group by WHO regional group, minus USA and Canada
whonousa = dfg[dfg['country'] != 'United States of America']
whonousa = whonousa[whonousa['country'] != 'Canada']
whonousa = whonousa.groupby('Region').sum(numeric_only=True, min_count=1)
whonousa = whonousa.reset_index()

legendict = dict(orientation='h', 
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1)

dflist = [df, dfg,wbc,who,whonousa]
regionlist = ['European Region','African Region','Region of the Americas','Eastern Mediterranean Region','South-East Asia Region','Western Pacific Region']

for i in dflist:
    i['physicians2015_cap'] = i['physicians2015']/(i['population_2015']/100000)
    i['nurses2015_cap'] = i['nurses2015']/(i['population_2015']/100000)
    i['totalproviders2015'] = i[['physicians2015','nurses2015']].sum(axis=1, min_count=1)
    i['totalproviders2015_cap'] = i[['physicians2015_cap','nurses2015_cap']].sum(axis=1, min_count=1)
    i['totalnpap'] = i[['nurse_num', 'nurse_num2', 'nurse_num3', 'nurse_num4']].sum(axis=1, min_count=1)
    i['totalpap'] = i[['certified_specialist', 'provider_number']].sum(axis=1, min_count=1)
    i['totalproviders'] = i[['totalpap','totalnpap']].sum(axis=1, min_count=1)
    i['totalpap_cap'] = i['totalpap']/i['population']
    i['totalnpap_cap'] = i['totalnpap']/i['population']
    i['totalproviders_cap'] = i['totalproviders']/i['population']
    i['paptrainee'] = i[['trainee_current','trainees_current']].sum(axis=1,min_count=1)
    i['npaptrainee'] = i[['nurse_trainee','nurse_trainee2','nurse_trainee3','nurse_trainee4']].sum(axis=1, min_count=1)
    i['totaltrainee'] = i[['paptrainee','npaptrainee']].sum(axis=1,min_count=1)
    i['gradtrainee'] = i[['trainee_grad','trainees_grad','nurse_grad','nurse_grad2','nurse_grad3','nurse_grad4']].sum(axis=1, min_count=1)
    #genders
    i['totalpap_gender'] = i[['papgender', 'provider_number_2gender']].mean(axis=1)
    i['totalnpap_gender'] = i[['nurse_numgender', 'nurse_num2gender', 'nurse_num3gender', 'nurse_num4gender']].mean(axis=1)
    i['totalgender'] = i[['papgender', 'provider_number_2gender', 'nurse_numgender', 'nurse_num2gender', 'nurse_num3gender', 'nurse_num4gender']].mean(axis=1)
    #population difference
    i['population_2015'] = i['population_2015']/100000
    i['popdiff'] = i['population']-i['population_2015']
    #calculate difference in providers 2015 to now
    i['papdiff'] = i['totalpap']-i['physicians2015'] #absolute difference, number of physicians
    
    i['certifiedpapdiff'] = round(i['certified_specialist']-i['physicians2015'],2) #absolute difference, certified specialists
    i['totalpap_capdiff'] = i['totalpap_cap'] - i['physicians2015_cap'] # absolute difference, physicians per capita
    i['totalnpap_capdiff'] = i['totalnpap_cap'].sub(i['nurses2015_cap'], fill_value=0)
    i['totalprovidersdiff'] = i['totalproviders'] - i['totalproviders2015'] #abs difference, total providers
    i['totalproviders_cap_diff'] = i['totalproviders_cap'] - i['totalproviders2015_cap'] # absolute difference, total providers per capita

    #calculate proportional change in providers, 2015 to now
    # i['papdiffpct'] = round((1-((i['totalpap']/i['physicians2015'])))*-100,0) #proportion change, number of physicians
    # i['certifiedpapdiffpct'] = round((1-((i['certified_specialist']/i['physicians2015'])))*-100,0) # proportion change, number of physicians (certified onlyh)
    # i['physiciancap_pct'] = round((1-((i['totalpap_cap']/i['physicians2015_cap'])))*-100,0) #proportion change, physicians per capita
    # i['totalpap_capdiffpct'] = round((1-((i['totalpap_cap']/i['physicians2015_cap'])))*-100,0) # proportion change, physicians per capita DUPLICATE
    # i['totalprovidersdiffpct'] = round((1-((i['totalproviders']/i['totalproviders2015'])))*-100,0) #proportion change, total providers
    # i['totalproviders_cap_pct'] = round((1-((i['totalproviders_cap']/i['totalproviders2015_cap'])))*-100,0) #proportion change, total providers per capita
    i['papdiffpct'] = round(((i['totalpap']-i['physicians2015'])/i['physicians2015'])*100,0)
    i['certifiedpapdiffpct'] = round(((i['certified_specialist']-i['physicians2015'])/i['physicians2015'])*100,0)
    i['physiciancap_pct'] = round(((i['totalpap_cap']-i['physicians2015_cap'])/i['physicians2015_cap'])*100,0)
    i['totalpap_capdiffpct'] = i['physiciancap_pct']
    i['totalprovidersdiffpct'] = round(((i['totalproviders']-i['totalproviders2015'])/i['totalproviders2015'])*100,0)
    i['totalproviders_cap_pct'] =  round(((i['totalproviders_cap']-i['totalproviders2015_cap'])/i['totalproviders2015_cap'])*100,0) 
    i['total_cert_ratio'] = i['certifiedpapdiffpct']/i['papdiffpct']

    ##nurses percent difference
    i['npapdiff'] = i['totalnpap'].sub(i['nurses2015'], fill_value=0) #absolute difference, nurse numbers
    i['npapdiffpct'] = round((1-((i['totalnpap']/i['nurses2015'])))*-100,0) #proportion change, nurse numbers
    i['npapcapdiff']= round(((i['totalnpap_cap']-i['nurses2015_cap'])/i['nurses2015_cap'])*100,0) #proportion change, nurses per capita

provnumbers = dfg[['country','Region', 'certified_specialist','provider_number','totalpap','totalpap_cap', 'totalnpap','totalnpap_cap', 'totalproviders', 'totalproviders_cap','physicians2015', 'qualified_physicians2015', 'physicians2015_cap', 'physiciancap_pct','papdiff','papdiffpct','nurses2015','nurses2015_cap','npapdiff','npapdiffpct','npapcapdiff']].copy()

dfg = dfg.merge(isolist, on='country', how='left')
dfg = dfg.merge(verification_df, left_on='country', right_on='Country', how='left')
dfg = dfg.merge(verification, left_on='country', right_on='Country', how='left')

dfg['totalpap_cap_bins'] = dfg.apply(percapitabins, axis=1)
dfg['verification_status'] = dfg.apply(verificationstatus, axis=1)
dfg.to_csv('dfg.csv')
df.to_csv('df.csv')

for dylan

Code

# ctrylist = ['Australia','Canada','China','Colombia','Fiji','India','Lebanon']
ctrylist = ['Mexico','Norway','Pakistan','Paraguay','Romania','South Africa','Uganda','United States of America', 'Viet Nam']
collist = ['country','population','totalpap','totalnpap','totalpap_cap','totalproviders_cap']
npaplist = ['totalnpap','totalnpapcap']

dfg.loc[dfg['country']=='North Macedonia',collist]

	country	population	totalpap	totalnpap	totalpap_cap	totalproviders_cap
101	North Macedonia	20.7	237.0	NaN	11.449275	11.449275

Code

df[df['country']=='Korea (Republic of)'][['nurse_num', 'nurse_num2', 'nurse_num3', 'nurse_num4','totalnpap']]

	nurse_num	nurse_num2	nurse_num3	nurse_num4	totalnpap
28	680.0	NaN	NaN	NaN	680.0
29	680.0	NaN	NaN	NaN	680.0
30	680.0	NaN	NaN	NaN	680.0

Code

# dfg.loc[dfg['country'].isin(ctrylist)][collist].set_index('country').T.style.format({'totalpap':"{:,.0f}",'certified_specialist':"{:,.0f}",'totalnpap':"{:,.0f}",'totalproviders_cap':"{:,.1f}"})
dfg.loc[dfg['country'].isin(ctrylist)][collist].set_index('country').T.style.format(precision=1)

country	Mexico	Norway	Pakistan	Paraguay	Romania	South Africa	Uganda	United States of America	Viet Nam
population	1315.6	54.7	2294.9	73.1	190.7	607.6	484.3	3352.3	989.5
totalpap	9665.0	1099.0	3393.0	310.0	1159.0	1750.0	76.0	53250.0	1500.0
totalnpap	nan	2000.0	nan	160.0	nan	nan	500.0	56172.0	nan
totalpap_cap	7.3	20.1	1.5	4.2	6.1	2.9	0.2	15.9	1.5
totalproviders_cap	7.3	56.7	1.5	6.4	6.1	2.9	1.2	32.6	1.5

Code

mask = (dfg['Region_x'] == 'African Region') & (dfg['totalproviders_cap'] <=1)
mask = dfg['totalproviders_cap'] <=5
dfg[mask]

	country	Region_x	wbincome	Unnamed: 0	record_id	redcap_survey_identifier	certified_specialist	papgender	trainee_current	trainee_grad	...	% difference in PAP per capita compared to 2015_y	Absolute Difference in # PAPs per capita	Unnamed: 11	% difference in # of PAPs compared to 2015_y	verified	contacted	Verification Comments_y	Contact_y	totalpap_cap_bins	verification_status
5	Bangladesh	South-East Asia Region	Lower middle income	10.0	45.000000	NaN	1839.0	NaN	860.0	230.0	...	0.453811	660.0	52.0	52.00%	True	True	10/10 TL confirmed correct numbers	NaN	1-3	Verified
9	Benin	African Region	Lower middle income	213.0	16.000000	NaN	26.0	18.5	60.0	6.0	...	-0.637280	-35.0	-57.0	-57.00%	False	True	NaN	NaN	<1	Attempted verification
11	Bolivia	Region of the Americas	Lower middle income	194.0	24.000000	NaN	450.0	45.0	50.0	20.0	...	-0.441080	-270.0	-38.0	-38.00%	False	True	9.16 TL verification sent	NaN	3-6	Attempted verification
15	Burkina Faso	African Region	Low income	211.0	12.000000	NaN	57.0	23.0	69.0	7.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	<1	Verified
16	Burundi	African Region	Low income	31.0	75.000000	NaN	4.0	0.0	3.0	0.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	<1	Verified
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
151	Venezuela	Region of the Americas	Upper middle income	200.0	32.000000	NaN	800.0	67.0	800.0	510.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1-3	Verification not required
152	Viet Nam	Western Pacific Region	Lower middle income	150.5	217.000000	NaN	1500.0	45.0	225.0	175.0	...	0.416537	500.0	50.0	50.00%	False	True	NaN	NaN	1-3	Attempted verification
153	Yemen	Eastern Mediterranean Region	Low income	59.0	102.000000	NaN	74.0	18.0	65.0	18.0	...	4.454534	80.0	533.0	533.00%	False	False	NaN	NaN	<1	Verification not required
154	Zambia	African Region	Lower middle income	39.0	109.333333	NaN	29.0	37.0	10.0	4.0	...	0.359669	50.0	63.0	63.00%	False	True	NaN	Data from macabels2002@gmail.com inconsistent ...	<1	Attempted verification
155	Zimbabwe	African Region	Lower middle income	174.0	249.000000	NaN	73.0	50.0	25.0	2.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	<1	Verification not required

66 rows × 115 columns

End dylan

Comparison of population differences

In the paper we talk about how some countries had a decrease in providers per 100k despite having an increase in absolute providers. This was attributed to population growth.

The editors have essentially asked if the reverse is also true. Did any countries or regions have an increase in providers per capita, despite having a decrease in absolute providers? If so this would be due to population shrinkage.

Code

cols = ['country','population','population_2015','totalproviders','totalproviders2015','totalproviders_cap','totalproviders2015_cap','totalproviders_cap_diff','totalprovidersdiff','popdiff']
dfg['popdiff'] = dfg['population'].sub(dfg['population_2015'])

Code

# which countries had a population decrease?
dfg.loc[dfg['popdiff']<0][cols]

	country	population	population_2015	totalproviders	totalproviders2015	totalproviders_cap	totalproviders2015_cap	totalproviders_cap_diff	totalprovidersdiff	popdiff
0	Albania	28.25000	28.97000	210.000000	350.0	7.433628	12.081464	-4.647835	-140.000000	-0.72000
6	Belarus	93.58000	94.96000	2395.000000	2185.0	25.593075	23.009688	2.583387	210.000000	-1.38000
12	Bosnia and Herzegovina	32.49000	38.10000	NaN	230.0	NaN	6.036745	NaN	NaN	-5.61000
25	Cook Islands	0.17564	0.21000	1.000000	1.0	5.693464	4.761905	0.931559	0.000000	-0.03436
27	Croatia	40.03000	42.40000	1676.333333	870.0	41.876926	20.518868	21.358058	806.333333	-2.37000
28	Cuba	113.06000	113.90000	1452.000000	1786.0	12.842738	15.680421	-2.837683	-334.000000	-0.84000
37	Eritrea	35.46421	52.28000	98.000000	57.0	2.763349	1.090283	1.673066	41.000000	-16.81579
45	Georgia	37.00000	40.00000	500.000000	500.0	13.513514	12.500000	1.013514	0.000000	-3.00000
48	Greece	105.83000	109.55000	2050.000000	1300.0	19.370689	11.866728	7.503961	750.000000	-3.72000
52	Hungary	96.90000	98.55000	2457.000000	900.0	25.356037	9.132420	16.223617	1557.000000	-1.65000
59	Italy	592.02000	597.98000	15000.000000	15500.0	25.336982	25.920599	-0.583617	-500.000000	-5.96000
60	Japan	1247.47000	1265.74000	4606.000000	12208.0	3.692273	9.644951	-5.952678	-7602.000000	-18.27000
68	Latvia	18.65000	19.70000	350.000000	846.0	18.766756	42.944162	-24.177406	-496.000000	-1.05000
73	Lithuania	27.40000	28.78000	851.000000	533.0	31.058394	18.519805	12.538589	318.000000	-1.38000
83	Mauritius	12.69000	12.73000	76.000000	55.0	5.988968	4.320503	1.668465	21.000000	-0.04000
86	Moldova	26.06000	40.69000	420.000000	470.0	16.116654	11.550750	4.565904	-50.000000	-14.63000
88	Montenegro	6.21000	6.26000	117.000000	81.0	18.840580	12.939297	5.901283	36.000000	-0.05000
101	North Macedonia	20.70000	20.78000	237.000000	206.0	11.449275	9.913378	1.535897	31.000000	-0.08000
105	Palau	0.18000	0.21000	3.000000	3.0	16.666667	14.285714	2.380952	0.000000	-0.03000
112	Poland	377.81000	386.12000	12400.000000	10480.0	32.820730	27.141821	5.678909	1920.000000	-8.31000
113	Portugal	102.34000	103.50000	1542.000000	1855.0	15.067422	17.922705	-2.855283	-313.000000	-1.16000
114	Romania	190.69000	195.11000	1159.000000	1400.0	6.077928	7.175439	-1.097512	-241.000000	-4.42000
120	Serbia	68.25000	88.51000	1600.000000	950.0	23.443223	10.733250	12.709973	650.000000	-20.26000
126	Somaliland	42.00000	234.15126	60.000000	5311.0	1.428571	22.681919	-21.253348	-5251.000000	-192.15126
131	Swaziland	11.85000	12.87000	7.000000	69.0	0.590717	5.361305	-4.770588	-62.000000	-1.02000
151	Venezuela	292.67000	311.08000	800.000000	690.0	2.733454	2.218079	0.515375	110.000000	-18.41000
155	Zimbabwe	153.31000	156.03000	73.000000	280.0	0.476159	1.794527	-1.318367	-207.000000	-2.72000

Code

mask_increasepercapita = dfg['totalproviders_cap'] >0
mask_decreaseabs = dfg['totalproviders'] <0

dfg.loc[mask_increasepercapita & mask_decreaseabs][cols]

	country	population	population_2015	totalproviders	totalproviders2015	totalproviders_cap	totalproviders2015_cap	totalproviders_cap_diff	totalprovidersdiff	popdiff

Code

who['popdiff'] = who['population'].sub(who['population_2015'])
who[['Region','population','population_2015','totalproviders','totalproviders2015','totalproviders_cap','totalproviders2015_cap','totalproviders_cap_diff','totalprovidersdiff','popdiff']]

	Region	population	population_2015	totalproviders	totalproviders2015	totalproviders_cap	totalproviders2015_cap	totalproviders_cap_diff	totalprovidersdiff	popdiff
0	African Region	10758.90421	9046.16212	21372.333333	15426.0	1.986479	1.705254	0.281225	5946.333333	1712.74209
1	Eastern Mediterranean Region	7070.41000	6268.46592	41171.000000	26313.0	5.823000	4.197678	1.625323	14858.000000	801.94408
2	European Region	8001.55000	7841.99000	207537.333333	180536.0	25.937141	23.021708	2.915434	27001.333333	159.56000
3	Region of the Americas	10172.69000	9713.66000	170986.000000	135050.0	16.808337	13.903101	2.905235	35936.000000	459.03000
4	South-East Asia Region	20339.73000	18920.16311	79069.166667	22889.0	3.887425	1.209768	2.677657	56180.166667	1419.56689
5	Western Pacific Region	19417.25547	18707.75000	149599.500000	103061.0	7.704462	5.509000	2.195461	46538.500000	709.50547

Code

who['population'].sum()*100000

7576053968.0

Population change in percent

Code

dfg['popdiffpct'] = dfg['population']/dfg['population_2015']

Code

# generate plotly boxplot for popdiff, facet by region
fig = px.box(dfg, x='Region_x', y='popdiffpct', color='Region_x', hover_data=['country'], title='Population change, 2015-2020', width=800, height=600, points='all')
fig.show()

Overall stats, change in providers, current total

Code

print('Overall change in total paps:', 
      dfg['totalpap'].sum() - dfg['physicians2015'].sum())

print('Overall change in PAP density: ',
      (dfg['totalpap'].sum()/dfg['population'].sum()) - (dfg['physicians2015'].sum()/dfg['population_2015'].sum()))

Overall change in total paps: 97140.5
Overall change in PAP density:  0.8864765863027477

Code

print('Overall average in total providers:',
      dfg['totalproviders_cap'].mean())

print('Another way: calculating total provider density as an aggregate: ',
      dfg['totalproviders'].sum() / dfg['population'].sum())

print('Overall PAP and NPAP density: ',
      dfg['totalpap'].sum()/dfg['population'].sum(),
      dfg['totalnpap'].sum()/dfg['population'].sum())

Overall average in total providers: 12.17627671789102
Another way: calculating total provider density as an aggregate:  8.84016053953925
Overall PAP and NPAP density:  6.5836582224304445 2.256502317108802

These two numbers give slightly different numbers, but the difference is small. The first is the average of the provider densities of each country. The second is the total number of providers divided by the total population.

Neither is more ‘correct’, but in Table 1 9.7 is the average of the income groups and that’s what we cite in the paper. One of the above numbers may be more accurate, but thinking in terms of income groups is also a natural way of doing so.

Table 1

Code

wbc['wbincome'] = pd.Categorical(wbc['wbincome'], ordered=True)
wbc['wbincome'] = wbc['wbincome'].cat.reorder_categories(['High income','Upper middle income','Lower middle income','Low income'], ordered=True)

Code

tablecolumns = ['totalpap','totalpap_cap','totalnpap','totalnpap_cap','totalproviders','totalproviders_cap']
t1 = who[['Region']+tablecolumns]
t1 = t1.set_index('Region')
#add total 
t1.loc['Overall (sum, *mean)'] = [t1['totalpap'].sum(), t1['totalpap_cap'].mean(), t1['totalnpap'].sum(), t1['totalnpap_cap'].mean(), t1['totalproviders'].sum(), t1['totalproviders_cap'].mean()]

Code

tab1= wbc[['wbincome']+tablecolumns].sort_values(by='wbincome')
tab1 = tab1.set_index('wbincome')
tab1.loc['Overall (sum, *mean)'] = [tab1['totalpap'].sum(), tab1['totalpap_cap'].mean(), tab1['totalnpap'].sum(), tab1['totalnpap_cap'].mean(), tab1['totalproviders'].sum(), tab1['totalproviders_cap'].mean()]

Code

from IPython.display import display, HTML
from jinja2 import Template

#### Start by having at least these lists
tab1cols = ['totalpap','totalpap_cap','totalnpap','totalnpap_cap','totalproviders','totalproviders_cap'] # names of the dataframe columns
tab1labels = ['Total PAP','Total PAP density*','Total NPAP', 'Total NPAP density*','Total providers','Total provider density*'] #labels of the dataframe columns

#### this can go in a different cell/file, it doesn't change.
table_head =  '''
<html>
<head>
<style>
.table-style {
    border: 1px solid white;
    text-align: center;
    border-collapse: collapse;
}

.table-style td{
    padding: 5px 5px;
}

.darkgray {
    background-color: #d9d9d9;
}

.section-break {
    border-top: 1.5pt solid black;
    border-bottom: 1.5pt solid black;
    background-color: #efefef;
}

.bottom-solid-hard{
    border-bottom: 1.5pt solid black;
}

.left-solid {
    border-left: 1pt solid black;
}

.right-solid {
    border-right: 1pt solid black;
}

.right-dash {
    border-right: 1pt dotted black;
}


</style>
</head>
'''

Code

table_str='''
<table class='table-style'>
    <thead>
        {# ######## FIRST MAKE THE COLUMNS IN THE HEADER #########  #}
                {# ######## THE COLUMNS #########  #}
        <tr class='darkgray'> 
            <td></td>
            {% for i in tab1labels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>
    </thead>

    <tbody>
        {# ######## NOW MAKE THE BODY #########  #}
                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> Income Group </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in tab1.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ tab1.at[i,'totalpap']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalpap_cap']|round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalnpap'] |int}} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalnpap_cap'] |round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalproviders']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalproviders_cap'] |round(1)}} </td>
            </tr>
                {% endfor %}

                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> WHO Region </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in t1.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ t1.at[i,'totalpap']|int }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalpap_cap']|round(1) }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalnpap'] |int}} </td>
                <td class='left-solid'> {{ t1.at[i,'totalnpap_cap'] |round(1) }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalproviders']|int }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalproviders_cap'] |round(1)}} </td>
            </tr>
                {% endfor %}

    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table1.html', "w") as text_file:
    print(template.render(tab1cols=tab1cols, t1=t1, tab1=tab1, tab1labels=tab1labels), file=text_file)

HTML(template.render(tab1cols=tab1cols, t1=t1, tab1=tab1, tab1labels=tab1labels))

	Total PAP	Total PAP density*	Total NPAP	Total NPAP density*	Total providers	Total provider density*
Income Group
High income	194106	16.1	94065	7.8	288171	23.8
Upper middle income	215808	7.5	53369	1.9	269177	9.4
Lower middle income	87229	3.0	13231	0.5	100460	3.5
Low income	1637	0.3	10288	1.7	11925	2.0
Overall (sum, *mean)	498781	6.7	170953	2.9	669735	9.7
WHO Region
African Region	5949	0.6	15422	1.4	21372	2.0
Eastern Mediterranean Region	26306	3.7	14865	2.1	41171	5.8
European Region	167174	20.9	40363	5.0	207537	25.9
Region of the Americas	113320	11.1	57666	5.7	170986	16.8
South-East Asia Region	70020	3.4	9049	0.4	79069	3.9
Western Pacific Region	116011	6.0	33588	1.7	149599	7.7
Overall (sum, *mean)	498781	7.6	170953	2.7	669735	10.4

Table 2

Code

table2cols = ['papdiff','totalpap_capdiff','npapdiff','totalnpap_capdiff','totalprovidersdiff','totalproviders_cap_diff']
tab1 = wbc[['wbincome']+table2cols].sort_values(by='wbincome')

tab1 = tab1.set_index('wbincome')
tab1.loc['Overall (sum, *mean)'] = [tab1['papdiff'].sum(), tab1['totalpap_capdiff'].mean(), tab1['npapdiff'].sum(), tab1['totalnpap_capdiff'].mean(), tab1['totalprovidersdiff'].sum(), tab1['totalproviders_cap_diff'].mean()]
tab1

Code

tab2 = who[['Region']+table2cols]
tab2 = tab2.set_index('Region')
tab2.loc['Overall (sum, *mean)'] = [tab2['papdiff'].sum(), tab2['totalpap_capdiff'].mean(), tab2['npapdiff'].sum(), tab2['totalnpap_capdiff'].mean(), tab2['totalprovidersdiff'].sum(), tab2['totalproviders_cap_diff'].mean()]

tab2

Code

tab2cols = tab1.columns.to_list()

#these are the tab2labels:
#Change in PAPs
# Change in PAP density
# Change in NPAPs
# Change in NPAP density
# Change in total providers
# Change in total provider density

tab2labels = ['Change in PAPs','Change in PAP density','Change in NPAPs','Change in NPAP density','Change in total providers','Change in total provider density']

table_str='''
<table class='table-style'>
    <thead>
        {# ######## FIRST MAKE THE COLUMNS IN THE HEADER #########  #}
                {# ######## THE COLUMNS #########  #}
        <tr class='darkgray'> 
            <td></td>
            {% for i in tab2labels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>
    </thead>

    <tbody>
        {# ######## NOW MAKE THE BODY #########  #}
                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> Income Group </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in tab1.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ tab1.at[i,'papdiff']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalpap_capdiff']|round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'npapdiff'] |int}} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalnpap_capdiff'] |round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalprovidersdiff']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalproviders_cap_diff'] |round(1)}} </td>
            </tr>
                {% endfor %}

                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> WHO Region </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in tab2.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ tab2.at[i,'papdiff']|int }} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalpap_capdiff']|round(1) }} </td>
                <td class='left-solid'> {{ tab2.at[i,'npapdiff'] |int}} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalnpap_capdiff'] |round(1) }} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalprovidersdiff']|int }} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalproviders_cap_diff'] |round(1)}} </td>
            </tr>
                {% endfor %}

    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table2.html', "w") as text_file:
    print(template.render(tab2cols=tab2cols, tab2labels = tab2labels, tab2=tab2, tab1=tab1), file=text_file)

HTML(template.render(tab2cols=tab2cols, tab2labels = tab2labels, tab2=tab2, tab1=tab1, ))

	Change in PAPs	Change in PAP density	Change in NPAPs	Change in NPAP density	Change in total providers	Change in total provider density
Income Group
High income	8707	-0.1	28309	2.0	37016	1.9
Upper middle income	42012	1.2	52836	1.8	94848	3.1
Lower middle income	47359	1.5	5847	0.2	53206	1.7
Low income	-939	-0.2	2327	0.2	1388	-0.0
Overall (sum, *mean)	97140	0.6	89319	1.1	186460	1.7
WHO Region
African Region	-164	-0.1	6110	0.4	5946	0.3
Eastern Mediterranean Region	5974	0.5	8884	1.1	14858	1.6
European Region	18569	1.9	8432	1.0	27001	2.9
Region of the Americas	12236	0.7	23700	2.2	35936	2.9
South-East Asia Region	47387	2.2	8793	0.4	56180	2.7
Western Pacific Region	13138	0.5	33400	1.7	46538	2.2
Overall (sum, *mean)	97140	1.0	89319	1.1	186460	2.1

Change in PAPs, minus USA and Canada

Code

tab2 = whonousa[['Region']+table2cols]
tab2 = tab2.set_index('Region')
tab2.loc['Overall (sum, *mean)'] = [tab2['papdiff'].sum(), tab2['totalpap_capdiff'].mean(), tab2['npapdiff'].sum(), tab2['totalnpap_capdiff'].mean(), tab2['totalprovidersdiff'].sum(), tab2['totalproviders_cap_diff'].mean()]

tab2.loc['Region of the Americas']

papdiff                    7272.000000
totalpap_capdiff              0.760614
npapdiff                   1442.000000
totalnpap_capdiff             0.223742
totalprovidersdiff         8714.000000
totalproviders_cap_diff       0.984356
Name: Region of the Americas, dtype: float64

Supplemental table

Code

inspect = dfg[['country','Region_x',
               'physicians2015',
               'physicians2015_cap',
               'totalpap',
               'totalpap_cap',
               'papdiff',
               'totalpap_capdiff',
               'nurses2015',
               'totalnpap',
               'totalnpap_cap',           
               'npapdiff',
               'totalnpap_capdiff',
               'totalprovidersdiff',
               'totalproviders_cap',
               'totalproviders_cap_diff',
               'totalproviders2015_cap']]
inspect

	country	Region_x	physicians2015	physicians2015_cap	totalpap	totalpap_cap	papdiff	totalpap_capdiff	nurses2015	totalnpap	totalnpap_cap	npapdiff	totalnpap_capdiff	totalprovidersdiff	totalproviders_cap	totalproviders_cap_diff	totalproviders2015_cap
0	Albania	European Region	260.0	8.974802	210.0	7.433628	-50.0	-1.541173	90.0	NaN	NaN	-90.0	-3.106662	-140.0	7.433628	-4.647835	12.081464
1	Algeria	African Region	1550.0	3.907629	500.0	1.102536	-1050.0	-2.805093	3500.0	2500.0	5.512679	-1000.0	-3.310999	-2050.0	6.615215	-6.116091	12.731306
2	Argentina	Region of the Americas	4760.0	10.963447	4900.0	10.608816	140.0	-0.354631	0.0	NaN	NaN	0.0	0.000000	140.0	10.608816	-0.354631	10.963447
3	Australia	Western Pacific Region	5535.0	23.092328	5400.0	20.573780	-135.0	-2.518548	0.0	0.0	0.000000	0.0	0.000000	-135.0	20.573780	-2.518548	23.092328
4	Austria	European Region	3362.0	39.344646	2650.0	29.599017	-712.0	-9.745629	0.0	130.0	1.452027	130.0	1.452027	-582.0	31.051044	-8.293602	39.344646
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
151	Venezuela	Region of the Americas	690.0	2.218079	800.0	2.733454	110.0	0.515375	0.0	NaN	NaN	0.0	0.000000	110.0	2.733454	0.515375	2.218079
152	Viet Nam	Western Pacific Region	1000.0	1.070114	1500.0	1.515856	500.0	0.445742	NaN	NaN	NaN	NaN	NaN	500.0	1.515856	0.445742	1.070114
153	Yemen	Eastern Mediterranean Region	15.0	0.055903	95.0	0.304927	80.0	0.249024	NaN	NaN	NaN	NaN	NaN	80.0	0.304927	0.249024	0.055903
154	Zambia	African Region	79.0	0.487293	129.0	0.662558	50.0	0.175264	2.0	100.0	0.513611	98.0	0.501274	148.0	1.176168	0.676539	0.499630
155	Zimbabwe	African Region	87.0	0.557585	73.0	0.476159	-14.0	-0.081426	193.0	NaN	NaN	-193.0	-1.236942	-207.0	0.476159	-1.318367	1.794527

156 rows × 17 columns

Code

columnlabeldict = {
    'country': 'Country',
    'Region_x': 'WHO Region', 
    'physicians2015': '# of PAPs 2016', 
    'physicians2015_cap': 'PAP density (2016)',
    'totalpap': '# of PAPs',
    'totalpap_cap': 'PAP density', 
    'papdiff': 'PAP difference', 
    'totalpap_capdiff': 'PAP density difference', 
    'nurses2015': '# of NPAP 2016',
    'totalnpap': '# of NPAPs',
    'totalnpap_cap': 'NPAP density',
    'npapdiff': 'NPAP difference',
    'totalnpap_capdiff': 'NPAP density difference',
    'totalprovidersdiff': 'Total provider difference', 
    'totalproviders_cap': 'Total provider density',
    'totalproviders_cap_diff': 'Total provider density difference',
    'totalproviders2015_cap': 'Total provider density (2016)'
}

Code

#rename columns according to the dictionary
inspect = inspect.rename(columns=columnlabeldict)

Code

# to csv
inspect.to_csv('Supplementaltable.csv')

Figure 3

Plotly

Code

incomeorder = ['High income', 'Upper middle income','Lower middle income','Low income']

t1 = px.bar(wbc, x='wbincome', y=['totalpap_capdiff','totalproviders_cap_diff'], barmode='group', labels={'variable':""},
       template='plotly_white',
       text_auto=True, color_discrete_sequence=['darkblue','lightblue']).update_layout(
    xaxis_title='',
    xaxis_categoryorder='array',
    xaxis_categoryarray=incomeorder,
    yaxis_title='Change in providers per 100,000 people',
    legend=legendict,
    width=1000,
    height=600,
    template='plotly_white'
).update_traces(texttemplate='%{y:.2f}')
t1.update_yaxes(showline=True, ticksuffix="  ").update_xaxes(showline=True)

labeldict={'totalpap_capdiff':'Difference in PAP density', 'totalproviders_cap_diff':'Difference in total provider density'}

for trace in t1['data']:
    trace['name'] = labeldict[trace['name']]


#t1.write_image('figures/fig3 - pap_and_totalprovider_cap_diff.jpg', width=1000, height=562, scale=3, engine='kaleido')
t1

Seaborn

Code

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import pandas as pd

sns.set_style("whitegrid")

# Define the income order for x-axis
income_order = ['High income', 'Upper middle income', 'Lower middle income', 'Low income']

# Melt the data to convert it to long format
wbc_melted = wbc.melt(id_vars='wbincome', value_vars=['totalpap_capdiff', 'totalproviders_cap_diff'])

# Reorder the DataFrame based on 'income_order'
wbc_melted['wbincome'] = pd.Categorical(wbc_melted['wbincome'], categories=income_order, ordered=True)
wbc_melted = wbc_melted.sort_values('wbincome')

# Create a grouped bar plot using catplot
g = sns.catplot(x='wbincome', y='value', hue='variable', data=wbc_melted, kind='bar',
                palette={'totalpap_capdiff': 'darkblue', 'totalproviders_cap_diff': 'lightblue'},
                height=6, aspect=1.5, legend_out=False)

# Set axis labels and titles
g.set_axis_labels('', 'Change in provider density (per 100,000 population)')
g.set_yticklabels([f'{tick:.2f}' for tick in g.ax.get_yticks()])

# Remove top and right spines
sns.despine(right=True, top=True)

# Add grid lines for the y-axis
g.ax.yaxis.grid(linestyle='--', alpha=0.5)

# Set plot title and tighten layout
plt.tight_layout()

# Add value labels to the bars
for p in g.ax.patches[:8]:
    height = p.get_height()
    width = p.get_width()
    x, y = p.get_xy()
    value = height if height >= 0 else height - 0.13  # Adjust the vertical alignment for negative values
    g.ax.annotate(f'{height:.2f}', (x + width / 2, y + value),
                  ha='center', va='bottom', fontsize=12)

# Custom legend with colors matching the bar colors
legend_patches = [
    mpatches.Patch(color='darkblue', label='Difference in PAP density'),
    mpatches.Patch(color='lightblue', label='Difference in total provider density')
]
g.ax.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(0.99, 0.99), frameon=False)

# Save the figure
plt.savefig('figures/fig3 - pap_and_totalprovider_cap_diff.jpg', dpi=1200)

# Display the figure
plt.show()

Change in PAPs per capita

Code

mapfig = px.choropleth(dfg,
                                    locations='code3',
                                    locationmode='ISO-3',
                                    scope='world',
                                    color='totalpap_capdiff',
                                    hover_name='country',
                                    hover_data=['totalpap_capdiff'],
                                     color_continuous_scale=px.colors.diverging.RdBu,
                                    # color_continuous_scale=px.colors.sequential.Rainbow,
                                    #color_continuous_scale=[(0,px.colors.qualitative.Safe[1]), (0.5, 'white'), (1,px.colors.qualitative.Prism[3])],
                                    range_color=[-10,10],
                                     color_continuous_midpoint=0,
                                    projection='miller'
                                    ).update_layout(margin={"r":0,"t":0,"l":0,"b":0})

mapfig

Figure 2: Regional PAPs vs NPAPS

Code

#to change the order of regions for this particular figure
regionlist = ['European Region','Region of the Americas','Western Pacific Region', 'Eastern Mediterranean Region','South-East Asia Region','African Region']

Plotly horizontal

Plotly vertical

Code

fig = make_subplots(rows=3, cols=2, subplot_titles=regionlist, vertical_spacing=0.03, horizontal_spacing=.25,
                    x_title='Provider density (per 100,000 people)')

dtemp = dfg

dfg=dfg[~dfg['country'].isin(['Tokelau', 'Niue'])]
#dfg['country_shr'] = dfg['country'].str.slice(0,10)

colcount = [1,2,1,2,1,2]
rowcount = [1,1,2,2,3,3]
yax1 = [1,2,3,4,5,6]
yax2 = [7,8,9,10,11,12]
for i,j,k,g,h in zip(regionlist, rowcount, colcount, yax1, yax2):
    data=dfg[dfg['Region_x']==i]
    pxfig = px.bar(data, y='country',x=['totalpap_cap','totalnpap_cap'])
    fig.add_trace(go.Bar(x=pxfig.data[0]['x'], y=pxfig.data[0]['y'], marker=dict(color='darkblue'), name=('PAP'), orientation='h'), row=j, col=k)
    fig.add_trace(go.Bar(x=pxfig.data[1]['x'], y=pxfig.data[1]['y'], marker=dict(color='lightblue'), name='NPAP', orientation='h'), row=j, col=k)
fig.update_yaxes(categoryorder='total ascending', dtick=1, ticksuffix="   ", linecolor='darkgray')
fig.update_xaxes(showline=True, linecolor='darkgray', gridcolor='lightgray')
fig.update_layout(barmode='stack', template='plotly_white', width=1100, height=1800, margin_t=25)
for trace in range(2,12):
    fig.data[trace]['showlegend'] = False

#fig.write_image('figures/fig2_horizontal.jpg', width=600, height=1100, scale=3, engine='kaleido')

dfg = dtemp
fig.show()

Seaborn

Code

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set_style("whitegrid")

regionlist = ['European Region','Region of the Americas','Western Pacific Region', 'Eastern Mediterranean Region','South-East Asia Region','African Region']

fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 22), sharex=False, sharey=False, gridspec_kw={'hspace': 0.15, 'wspace':0.5})
axes = axes.flatten()

for i, region in enumerate(regionlist):
    data = dfg[dfg['Region_x'] == region]
    data = data[~data['country'].isin(['Tokelau', 'Niue'])]

    sorted_countries = data[['country', 'totalpap_cap', 'totalnpap_cap']].set_index('country').sum(axis=1).sort_values(ascending=False)

    sns.barplot(x=sorted_countries.values, y=sorted_countries.index, ax=axes[i], color='lightblue', label='NPAP', order=sorted_countries.index)
    sns.barplot(x='totalpap_cap', y='country', ax=axes[i], data=data, color='darkblue', label='PAP', order=sorted_countries.index)

    sns.despine(right=True, ax=axes[i])
    axes[i].set_ylabel('')
    axes[i].set_xlabel('Provider density (per 100,000 people)')
    axes[i].set_title(region)

    # Add a vertical line at x=4
    axes[i].axvline(x=4, color='r', linestyle='--', linewidth=1)

    if i == 1:
        axes[i].legend(frameon=False, bbox_to_anchor=(1.07, 1), loc='upper left')

    if i < 4:
        axes[i].set_xlabel('')

for i in range(len(regionlist), 6):
    fig.delaxes(axes[i])

plt.tight_layout()
plt.savefig('figures/fig2_subplots.jpg', dpi=300, bbox_inches='tight')
plt.show()

Code

for i in regionlist:
    data = dfg[dfg['Region_x']==i]
    fig = go.Figure()
    pxfig = px.bar(data, y='country',x=['totalpap_cap','totalnpap_cap'])
    fig.add_trace(go.Bar(x=pxfig.data[0]['x'], y=pxfig.data[0]['y'], marker=dict(color='darkblue'), name=('PAP'), orientation='h'))
    fig.add_trace(go.Bar(x=pxfig.data[1]['x'], y=pxfig.data[1]['y'], marker=dict(color='lightblue'), name='NPAP', orientation='h'))
    fig.update_yaxes(categoryorder='total ascending', dtick=1)
    fig.update_layout(barmode='stack', template='plotly_white')
    fig.write_image('figures/fig2_'+str(i)+'.jpg', width=1000,height=750,scale=2)

by income group

Code

incomelist = ['High income','Upper middle income','Lower middle income','Low income']

fig = make_subplots(rows=3, cols=2, subplot_titles=incomelist, vertical_spacing=0.2)

dtemp = dfg

dfg=dfg[~dfg['country'].isin(['Tokelau', 'Niue'])]
dfg['country_shr'] = dfg['country'].str.slice(0,10)

colcount = [1,2,1,2,1,2]
rowcount = [1,1,2,2,3,3]
yax1 = [1,2,3,4,5,6]
yax2 = [7,8,9,10,11,12]
for i,j,k,g,h in zip(incomelist, rowcount, colcount, yax1, yax2):
    data=dfg[dfg['wbincome']==i]
    pxfig = px.bar(data, x='country_shr',y=['totalpap_cap','totalnpap_cap'])
    fig.add_trace(go.Bar(x=pxfig.data[0]['x'], y=pxfig.data[0]['y'], marker=dict(color='darkblue'), name=('PAP')), row=j, col=k)
    fig.add_trace(go.Bar(x=pxfig.data[1]['x'], y=pxfig.data[1]['y'], marker=dict(color='lightblue'), name='NPAP'), row=j, col=k)
fig.update_xaxes(categoryorder='total descending')
fig.update_layout(barmode='stack', template='plotly_white')
for trace in range(2,8):
    fig.data[trace]['showlegend'] = False

fig.write_image('figures/fig2x.jpg', width=1000, height=750, scale=3, engine='kaleido')

dfg = dtemp
fig.show()

Code

t1 = px.bar(dfg[dfg['Region_x']=='Region of the Americas'], x='country', y=['totalpap_cap','totalnpap_cap'], labels={'variable':""},
       title='Total anesthesia providers per 100,000 population (PAP and NPAP)',
       color_discrete_sequence=['darkblue','lightblue']).update_layout(
    xaxis_categoryorder='total descending',
    yaxis_title='Total providers per 100,000',
    xaxis_title=''
)
labeldict={'totalpap_cap':'PAP', 'totalnpap_cap':'NPAP'}

for trace in t1['data']:
    trace['name'] = labeldict[trace['name']]

#t1.write_image('americas_pap_npap.jpg', width=1000, height=500, scale=3, engine='kaleido')
t1

Figure 4

Code

subplots = make_subplots(rows=1, cols=3, horizontal_spacing=0.07)#,
                         #subplot_titles=['Absolute difference in total providers (#)', 'Difference in total provider density','Percentage change in <br> total provider density (percent)'])

def addcustomplot(var):
    data = dfg[((dfg[var].notnull()) & (dfg[var] != np.inf))].sort_values(by=[var], ascending=False).head()
    data = pd.concat([data, dfg[dfg[var].notnull()].sort_values(by=[var], ascending=False).tail()])
    fig = go.Bar(x=data['country'], y=data[var])
    return fig

t1 = dfg[dfg['country']=='North Macedonia']['totalprovidersdiffpct']

#plot 1
subplots.add_trace(addcustomplot('totalprovidersdiff'), row=1,col=1)
subplots.update_yaxes(title_text='Difference in total providers (#)').update_traces(marker=dict(color='#1f77b4'), row=1, col=1)

#plot 2
subplots.add_trace(addcustomplot('totalproviders_cap_diff'), row=1,col=2)
subplots.update_yaxes(title_text='Difference in provider density', row=1,col=2).update_traces(marker=dict(color='#6baed6'), row=1, col=2)

#plot 3
subplots.add_trace(addcustomplot('totalproviders_cap_pct'), row=1,col=3)
subplots.update_yaxes(title_text='% change in provider density', row=1, col=3).update_traces(marker_color='#b3cde3', row=1, col=3)

#subplots.update_layout(title='Top and bottom changes in providers (absolute, per capita, and percent change)')
subplots.update_layout(template='plotly_white', showlegend=False, width=1750, height=600, margin=dict(l=20, r=20, t=20, b=20))

(subplots.update_xaxes(tickangle=270, 
                       tickfont=dict(size=16), 
                       linecolor='lightgray')
 .update_yaxes(showgrid=True, 
               tickfont_size=1, 
               title_font_size=20, 
               title_standoff=0, 
               linecolor='lightgray', 
               linewidth=1)
)

#subplots.write_image('figures/fig 4 - diff_metrics plotly.jpg', engine='kaleido', scale=3)
subplots

as 2x2 grid

Code

# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np

# sns.set_style("whitegrid")

# # Your custom function modified for Seaborn
# def addcustomplot(df, var, ax, y_label, plot_color):
#     data = df[((df[var].notnull()) & (df[var] != np.inf))].sort_values(by=[var], ascending=False).head()
#     data = data.append(df[df[var].notnull()].sort_values(by=[var], ascending=False).tail())
#     plot = sns.barplot(x='country', y=var, data=data, ax=ax, palette=[plot_color])
#     plot.set(xlabel=None)  # remove the x axis label "country"
#     plot.set(ylabel=y_label)  # use the current subplot titles as y axis labels
#     plot.yaxis.label.set_fontsize(12)  # Increase y-axis label font size
#     for item in plot.get_xticklabels():  # rotate x labels for better fit
#         item.set_rotation(90)
#         item.set_fontsize(12)  # Increase x-axis label font size
#     return ax

# # Set up the subplot structure with a 2x2 grid, but we will only use three of these
# fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 15), gridspec_kw={'height_ratios': [1, 1], 'hspace': 0.5, 'wspace': 0.2})

# # Add your plots and increase y axis label font size

# # Define custom colors to match the desired shades
# darkblue = '#1f77b4'     # Equivalent to Plotly's 'darkblue'
# middleblue = '#6baed6'   # A middle shade of blue
# lightblue = '#b3cde3'    # Equivalent to Plotly's 'lightblue'

# colors = [darkblue, middleblue, lightblue]

# addcustomplot(dfg, 'totalprovidersdiff', axes[0, 0], 'Difference in total providers (#)', colors[0])
# addcustomplot(dfg, 'totalproviders_cap_diff', axes[0, 1], 'Difference in provider density', colors[1])
# addcustomplot(dfg, 'totalproviders_cap_pct', axes[1, 0], '% change in provider density', colors[2])

# for i in range(2):
#     for j in range(2):
#         axes[i, j].get_yaxis().get_label().set_fontsize(14)
#         axes[i, j].yaxis.labelpad = 10  # Increase space between y-axis label and the grid
#         axes[i, j].grid(axis='y', linestyle='--', alpha=0.5)  # Add gridlines for y-axis

# # Remove the top and right side borders of the plots
# for ax in axes.flat:
#     ax.spines['top'].set_visible(False)
#     ax.spines['right'].set_visible(False)

# # Remove the unused subplot
# fig.delaxes(axes[1, 1])

# # Adjust bottom margin to make space for x-axis labels
# plt.subplots_adjust(bottom=.2)

# # Tight layout often produces nice results
# # It adjusts subplot params so that subplots are nicely fit in the figure
# plt.tight_layout()

# # Save your figure
# plt.savefig('figures/fig 4 - diff_metrics.jpg', dpi=300, bbox_inches='tight')

# # Display the figure
# plt.show()

as 3x1 grid

Code

################
############ make into 1 row

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set_style("whitegrid")

# Your custom function modified for Seaborn
def addcustomplot(df, var, ax, y_label, plot_color):
    data = df[((df[var].notnull()) & (df[var] != np.inf))].sort_values(by=[var], ascending=False).head()
    data = pd.concat([data, dfg[dfg[var].notnull()].sort_values(by=[var], ascending=False).tail()])
    plot = sns.barplot(x='country', y=var, data=data, ax=ax, palette=[plot_color])
    plot.set(xlabel=None)  # remove the x axis label "country"
    plot.set(ylabel=y_label)  # use the current subplot titles as y axis labels
    plot.yaxis.label.set_fontsize(12)  # Increase y-axis label font size
    for item in plot.get_xticklabels():  # rotate x labels for better fit
        item.set_rotation(90)
        item.set_fontsize(12)  # Increase x-axis label font size
    return ax

# Set up the subplot structure with a 2x2 grid, but we will only use three of these
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 6), gridspec_kw={'wspace': 0.3})

# Add your plots and increase y axis label font size

# Define custom colors to match the desired shades
darkblue = '#1f77b4'     # Equivalent to Plotly's 'darkblue'
middleblue = '#6baed6'   # A middle shade of blue
lightblue = '#b3cde3'    # Equivalent to Plotly's 'lightblue'

colors = [darkblue, middleblue, lightblue]

addcustomplot(dfg, 'totalprovidersdiff', axes[0], 'Difference in total providers (#)', colors[0])
addcustomplot(dfg, 'totalproviders_cap_diff', axes[1], 'Difference in providers per 100,000 population', colors[1])
addcustomplot(dfg, 'totalproviders_cap_pct', axes[2], '% change in providers per 100,000 population', colors[2])

for i in range(3):
    axes[i].get_yaxis().get_label().set_fontsize(14)
    axes[i].yaxis.labelpad = 10  # Increase space between y-axis label and the grid
    axes[i].grid(axis='y', linestyle='--', alpha=0.5)  # Add gridlines for y-axis

# Remove the top and right side borders of the plots
for ax in axes.flat:
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

# Remove the unused subplot
#fig.delaxes(axes[1, 1])

# Adjust bottom margin to make space for x-axis labels
plt.subplots_adjust(bottom=.2)

# Tight layout often produces nice results
# It adjusts subplot params so that subplots are nicely fit in the figure
plt.tight_layout()

# Save your figure
plt.savefig('figures/fig 4 - diff_metrics.jpg', dpi=1200, bbox_inches='tight')

# Display the figure
plt.show()

Response rate

There are 6 possible groups of responses:

UN member countries, contacted responded
UN member countries, contacted, didn’t respond
UN member countries, did not contact
non UN member countries, contacted, responded
non UN member countries, contacted, didn’t respond
non UN member countries, did not contact

Code

df2['Email'] = df2.apply(lambda y: np.nan if y['Email']=="" else y['Email'], axis=1)
df2['Home Country'] = df2.apply(lambda x: np.nan if x['Home Country']=="" else x['Home Country'], axis=1)

Code

un = pd.read_csv("country responses.csv")
un = un[['allmembers', 'code3']].copy()
un = un[un['allmembers'].notnull()]
un['allmembers'] = un['allmembers'].str.strip()

Code

templist = ['Bosnia and Herzegovina',
 'Niue',
 'Somaliland',
 'Taiwan, Republic of China',
 'Tokelau']

for i in templist:
    df['code3'] = df.apply(lambda x: 'nil' if x['country'] == i else x['code3'], axis=1)

Code

#countries with responses are those with total providers >1
countrieswithresponses = df[df['totalproviders']>0][['country','code3']]

# UN countries with responses are those countries with responses, that are also in the UN list
un['response'] = un.apply(lambda x: "Has data" if x['allmembers'] in countrieswithresponses['country'].tolist() else 'Have no data', axis=1)
nodata = un['response'].str.contains('Have no data')

# for those countries in the UN list with NO responses, do we have emails for them?
un.loc[nodata,'response'] = un.loc[nodata].apply(lambda x: "No data, contacted" if x['allmembers'] in df2.query('Email.notnull()')['Home Country'].tolist() else "No data, not contacted", axis=1)
un

	allmembers	code3	response
0	Afghanistan	AFG	No data, contacted
1	Albania	ALB	Has data
2	Algeria	DZA	Has data
3	Andorra	AND	No data, not contacted
4	Angola	AGO	No data, contacted
...	...	...	...
188	Venezuela	VEN	Has data
189	Viet Nam	VNM	Has data
190	Yemen	YEM	Has data
191	Zambia	ZMB	Has data
192	Zimbabwe	ZWE	Has data

193 rows × 3 columns

Code

px.histogram(un, x='response').update_xaxes(categoryorder='total descending')

Code

un[un['response'].str.contains('No data')]['allmembers'].tolist()

['Afghanistan',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Armenia',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Barbados',
 'Bosnia and Herzegovina',
 'Botswana',
 'Bulgaria',
 'Cape Verde',
 'Cameroon',
 'Comoros',
 'Congo (Brazzaville)',
 'Djibouti',
 'Dominica',
 'Equatorial Guinea',
 'Grenada',
 'Guinea',
 'Guinea-Bissau',
 'Haiti',
 'Ireland',
 'Jamaica',
 'Korea, North',
 'Kyrgyzstan',
 'Liechtenstein',
 'Monaco',
 'Qatar',
 'Saint Kitts and Nevis',
 'Saint Lucia',
 'Saint Vincent and the Grenadines',
 'San Marino',
 'Sao Tome and Principe',
 'Seychelles',
 'Slovakia',
 'South Sudan',
 'Suriname',
 'Tajikistan',
 'Turkmenistan',
 'Ukraine',
 'United Arab Emirates',
 'Uzbekistan']

Code

un['response'].value_counts()

response
Has data                  149
No data, contacted         23
No data, not contacted     21
Name: count, dtype: int64

Code

show(un[un['response']=='No data, contacted'])

	allmembers	code3	response
Loading... (need help?)

Code

# what are the places with responses that aren't in the UN list?
countrieswithresponses[~countrieswithresponses['country'].isin(un['allmembers'].tolist())]['country'].value_counts()

country
Palestine                    3
Taiwan, Republic of China    1
Cook Islands                 1
Somaliland                   1
Niue                         1
Tokelau                      1
Name: count, dtype: int64

Code

# are there places with responses that aren't in the contact list!?
countrieswithresponses[~countrieswithresponses['country'].isin(df2['Home Country'].tolist())]

	country	code3
144	Somaliland	nil

Code

# are there countries in the contact list that aren't in the un list?
print('these are all non-countries')
df2[~df2['Home Country'].isin(un['allmembers'].tolist())]['Home Country'].value_counts()

these are all non-countries

Home Country
Taiwan, Republic of China    3
West Africa                  2
Palestine                    2
Kosovo                       2
French Polynesia             2
Hong Kong                    2
Republika Srpska             1
Aruba                        1
Bermuda                      1
Puerto Rico                  1
Cook Islands                 1
Niue                         1
Tokelau                      1
Name: count, dtype: int64

Code

# places in the contact list, that aren't countries, that dont have responses
df2[df2['Email'].notnull() & ~df2['Home Country'].isin(un['allmembers'].tolist()) & ~df2['Home Country'].isin(countrieswithresponses['country'].tolist())]['Home Country'].value_counts()

Home Country
Kosovo              2
French Polynesia    2
Hong Kong           2
West Africa         1
Republika Srpska    1
Puerto Rico         1
Name: count, dtype: int64

In summary:

Code

print('There are 193 UN member countries')
print('Of these, we have data from ',un['response'].value_counts()['Has data'],' of them')
print('Of the countries with no data, we had contact information and tried to contact ',un['response'].value_counts()['No data, contacted'], 'of them')
print('Of the countries with no data, we lacked contact information for',un['response'].value_counts()['No data, not contacted'], 'of them')
print('We also have data from places that are not countries')
print('There are',len(countrieswithresponses[~countrieswithresponses['country'].isin(un['allmembers'].tolist())]['country'].value_counts()), 'responses from those')
# what about non countries that we attempted to contact?

There are 193 UN member countries
Of these, we have data from  149  of them
Of the countries with no data, we had contact information and tried to contact  23 of them
Of the countries with no data, we lacked contact information for 21 of them
We also have data from places that are not countries
There are 6 responses from those

How many countries w/ providers <5

Code

# realcountries is the countries from dfg that are present in the df called 'un', which is a csv of un member countries
# necessary to exclude those answers that aren't real countries
realcountries = dfg.loc[dfg['code3'].isin(un['code3'].value_counts().index)]

Code

# number of countries w/ pap <5
len(realcountries[realcountries['totalpap_cap']<5])

Code

# number of countries w/ total prov <5
len(realcountries[realcountries['totalproviders_cap']<5])

acknowledgements

Code

df[df['interest']=='Yes'][['name','email','country']]

	name	email	country
0	Francisco A Lobo	francisco.lobo@me.com	Portugal
1	KARTIK MUDLIAR	mudliarkartik@gmail.com	Fiji
2	Nebojsa Ladjevic	nladjevic@yahoo.com	Serbia
5	christian Wamberg	christian.aage.wamberg.02@regionh.dk	Denmark
6	Owain Thomas	odt@cantab.net	Sweden
...	...	...	...
214	ZOUMENOU	ezoumenou@gmail.com	Benin
215	DEHDOUH ABDELKADER	dehdouhaek@hotmail.com	Algeria
216	Ngboh-kassa Kainy Boniface	bonifacekassa01@gmail.com	Central African Republic
217	Bilal Dah	bilalmodah@yahoo.fr	Mauritania
218	Dr Philippe WELTER	phwelter@pt.lu	Luxembourg

168 rows × 3 columns

Code

df2= pd.read_csv('gawstemplate.csv')

Code

df3 = pd.DataFrame(columns=['record_id','name','email','country','contact_info_complete','record_id_a74727', 'ack_consent', 'consent_for_acknowledgement_complete'])
df3['name'] = df2['name']
df3['email'] = df2['email']
df3['country'] = df2['country']

Code

df3 = df3.drop_duplicates(subset='email')

Code

df3['record_id'] = df3.index

Code

df3

	record_id	name	email	country	contact_info_complete	record_id_a74727	ack_consent	consent_for_acknowledgement_complete
0	0	Rakotondrainibe	aaurelia.rakotondrainibe@gmail.com	Madagascar	NaN	NaN	NaN	NaN
1	1	Amanda Cormier	acormier@cas.ca	Canada	NaN	NaN	NaN	NaN
2	2	Prof. Agzam ZHUMADILOV	agzamzh@gmail.com	Kazakhstan	NaN	NaN	NaN	NaN
3	3	Ahmed El Safi	ahmadalsafi@gmail.com	Sudan	NaN	NaN	NaN	NaN
4	4	Dr. Abdelkarim AlOweidi	akaloweidi@hotmail.com	Jordan	NaN	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...
133	133	Viktor Kubricht	viktor.kubricht@gmail.com	Czech Republic	NaN	NaN	NaN	NaN
134	134	Vishaal Kissoon	vishq5@yahoo.com	Mauritius	NaN	NaN	NaN	NaN
135	135	Veronica Varas	vvaras@sachile.cl	Chile	NaN	NaN	NaN	NaN
136	136	Yonatan M. Andemeskel	yonimer2@gmail.com	Eritrea	NaN	NaN	NaN	NaN
137	137	ZORICA KARDOŠ	zorica.kardos@gmail.com	Slovenia	NaN	NaN	NaN	NaN

138 rows × 8 columns

Code

from redcap import Project
from config import *
# oxygen calculator database
api_key = logins['REDCAP_CALCULATOR']
api_url = 'https://redcap.ucsf.edu/api/'

project = Project(api_url, api_key)

Code

project.export_records(format_type='df')

	name	email	country	contact_info_complete	ack_consent	consent_for_acknowledgement_complete
record_id
1	Rakotondrainibe	aaurelia.rakotondrainibe@gmail.com	Madagascar	0	1.0	2
2	Amanda Cormier	acormier@cas.ca	Canada	0	NaN	0
3	Prof. Agzam ZHUMADILOV	agzamzh@gmail.com	Kazakhstan	0	NaN	2
4	Ahmed El Safi	ahmadalsafi@gmail.com	Sudan	0	NaN	0
5	Dr. Abdelkarim AlOweidi	akaloweidi@hotmail.com	Jordan	0	NaN	0
...	...	...	...	...	...	...
135	Vishaal Kissoon	vishq5@yahoo.com	Mauritius	0	1.0	2
136	Veronica Varas	vvaras@sachile.cl	Chile	0	1.0	2
137	Yonatan M. Andemeskel	yonimer2@gmail.com	Eritrea	0	NaN	0
138	ZORICA KARDOŠ	zorica.kardos@gmail.com	Slovenia	0	NaN	0
139	Tyler Law	tyler.law@gmail.com	Burkina Faso	2	NaN	0

139 rows × 6 columns

verification status

are there any countries in the verification list that aren’t in the countries with responses? There shouldn’t be…

Code

verification_df[~verification_df['Country'].isin(countrieswithresponses['country'].tolist())]['Country']

Series([], Name: Country, dtype: object)

are there any countries with data that are not in the country verification list? Shouldn’t be - the verification list should contain all the countries with data, and whether or not they need to be verified.

Code

countrieswithresponses[~countrieswithresponses['country'].isin(verification_df['Country'].tolist())]['country']

Series([], Name: country, dtype: object)

Number of countries undergoing verification

Code

# i'm so sorry this has to be done but bosnia has a response to OTHER_1 providers, so it's included in the groupby, but NOT included in the countries with responses!
# so it should really be deleted from dfg. dfg should have been defined as countries with responses.
dfg=dfg[~dfg['country'].str.contains('Bosnia')]
dfg['verification_status'].value_counts().sum()

Code

len(countrieswithresponses.value_counts().index)

total number of countries is 156 because I deleted bosnia in dfg earlier, see comment

Code

df['country'].value_counts()

country
Morocco      4
Croatia      4
Austria      4
Slovenia     3
Zambia       3
            ..
Swaziland    1
Namibia      1
Jordan       1
Gambia       1
Senegal      1
Name: count, Length: 156, dtype: int64

Code

dfg['verification_status'].value_counts()

verification_status
Verification not required    67
Attempted verification       54
Verified                     34
Name: count, dtype: int64

Code

dfg['verification_status'].value_counts(normalize=True)

verification_status
Verification not required    0.432258
Attempted verification       0.348387
Verified                     0.219355
Name: proportion, dtype: float64

trainee data

Code

traineecols = ['paptrainee','npaptrainee','totaltrainee','gradtrainee']
regiondrop = ['African Region', 'Eastern Mediterranean Region', 'European Region',
       'Region of the Americas', 'South-East Asia Region',
       'Western Pacific Region']

Code

def crossfx2(var1):
    taba =  pd.crosstab(dfg['Region_x'], dfg['wbincome'], values=dfg[var1], aggfunc=sum, margins=True)
    taba = taba.drop(['High income', 'Low income','Lower middle income','Upper middle income'],axis=1)
    tabb = pd.crosstab(dfg['Region_x'], dfg['wbincome'], values=dfg[var1], aggfunc=sum, normalize='columns', margins=True)
    tabb = tabb.drop(['High income', 'Low income','Lower middle income','Upper middle income'],axis=1)
    tab2 = taba.join(tabb, lsuffix='_n', rsuffix = '_%')
    tab2.columns=[i+'_n', i+'%']
    return tab2

Code

dfs = []

for i in traineecols:
    tab2 = crossfx2(i)
    dfs.append(tab2)

traineexregion = pd.concat(dfs, axis=1)
traineexregion

	paptrainee_n	paptrainee%	npaptrainee_n	npaptrainee%	totaltrainee_n	totaltrainee%	gradtrainee_n	gradtrainee%
Region_x
African Region	2566.500000	0.027735	5597.0	0.217186	8163.500000	0.069002	2857.500000	0.062986
Eastern Mediterranean Region	4582.333333	0.049519	2491.0	0.096661	7073.333333	0.059788	1956.000000	0.043115
European Region	37528.166667	0.405548	5016.5	0.194661	42544.666667	0.359611	13854.333333	0.305382
Region of the Americas	22134.500000	0.239197	8181.0	0.317456	30315.500000	0.256244	10642.500000	0.234586
South-East Asia Region	16262.333333	0.175739	115.0	0.004462	16377.333333	0.138430	4505.833333	0.099319
Western Pacific Region	9463.000000	0.102262	4370.0	0.169574	13833.000000	0.116924	11551.000000	0.254611
All	92536.833333	NaN	25770.5	NaN	118307.333333	NaN	45367.166667	NaN

Code

def crossfx3(var1):
    taba =  pd.crosstab(dfg['wbincome'], dfg['Region_x'], values=dfg[var1], aggfunc=sum, margins=True)
    taba = taba.drop(regiondrop,axis=1)
    tabb = pd.crosstab(dfg['wbincome'], dfg['Region_x'], values=dfg[var1], aggfunc=sum, normalize='columns', margins=True)
    tabb = tabb.drop(regiondrop,axis=1)
    tab2 = taba.join(tabb, lsuffix='_n', rsuffix = '_%')
    tab2.columns=[i+'_n', i+'%']
    return tab2

Code

dfs = []

for i in traineecols:
    tab2 = crossfx3(i)
    dfs.append(tab2)

traineexwbincome = pd.concat(dfs, axis=1)
traineexwbincome = traineexwbincome.reindex(['High income', 'Upper middle income','Lower middle income','Low income','All'])
traineexwbincome

	paptrainee_n	paptrainee%	npaptrainee_n	npaptrainee%	totaltrainee_n	totaltrainee%	gradtrainee_n	gradtrainee%
wbincome
High income	42503.666667	0.459316	17040.5	0.661241	59544.166667	0.503301	21652.833333	0.477280
Upper middle income	28562.833333	0.308664	619.0	0.024020	29181.833333	0.246661	15099.333333	0.332825
Lower middle income	20255.333333	0.218889	2370.0	0.091966	22625.333333	0.191242	6917.000000	0.152467
Low income	1215.000000	0.013130	5741.0	0.222774	6956.000000	0.058796	1698.000000	0.037428
All	92536.833333	NaN	25770.5	NaN	118307.333333	NaN	45367.166667	NaN

This is the same as below

Code

wbc[['paptrainee','npaptrainee','totaltrainee','gradtrainee']].reset_index().style.format(precision=0)

	index	paptrainee	npaptrainee	totaltrainee	gradtrainee
0	0	42504	17040	59544	21653
1	1	1215	5741	6956	1698
2	2	20255	2370	22625	6917
3	3	28563	619	29182	15099

Code

t1 = who[['Region','paptrainee','npaptrainee','totaltrainee','gradtrainee']].style.format(precision=0)
t1

	Region	paptrainee	npaptrainee	totaltrainee	gradtrainee
0	African Region	2566	5597	8164	2858
1	Eastern Mediterranean Region	4582	2491	7073	1956
2	European Region	37528	5016	42545	13854
3	Region of the Americas	22134	8181	30316	10642
4	South-East Asia Region	16262	115	16377	4506
5	Western Pacific Region	9463	4370	13833	11551

Code

from IPython.display import display, HTML
from jinja2 import Template

Code

traineelabels = ['PAP trainees','NPAP trainees','Total trainees', 'Graduates per year']
table_head =  '''
<html>
<head>
<style>
.table-style {
    border: 1px solid white;
    text-align: center;
    border-collapse: collapse;
}

.table-style td{
    padding: 5px 5px;
}

.darkgray {
    background-color: #d9d9d9;
}

.section-break {
    border-top: 1.5pt solid black;
    border-bottom: 1.5pt solid black;
    background-color: #efefef;
}

.bottom-solid-hard{
    border-bottom: 1.5pt solid black;
}

.left-solid {
    border-left: 1pt solid black;
}

.right-solid {
    border-right: 1pt solid black;
}

.right-dash {
    border-right: 1pt dotted black;
}


</style>
</head>
'''


table_str='''

<table class='table-style'>
    <thead>
        <tr class='darkgray'> 
            <td></td>
            {% for i in traineelabels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>

        <tr class='darkgray bottom-solid-hard'>
        <th></th>
        {# ######## n, % headers #########          #}

        {% for i in traineelabels %}
            <th class='left-solid'>n (%)</th>
        {% endfor %}
    
        </tr>
    </thead>

    <tbody>
        <tr class='section-break'><td colspan = 5> Income Group </td> </tr>
        {% for i in traineexwbincome.index %}
            <tr>
                <td> {{i}} </td>
            {% for j in traineecols %}
                <td class='left-solid'> {{ traineexwbincome.at[i,j+'_n']|int }} ({{ (traineexwbincome.at[i,j+'%']*100)|int}}) </td>
            {% endfor %}
            </tr>
        {% endfor %}

        <tr class='section-break'><td colspan = 5> WHO Region </td> </tr>

        {% for i in traineexregion.index %}
            <tr>
                <td> {{i}}
                {% for j in traineecols %}
                    <td class='left-solid'> {{ traineexregion.at[i,j+'_n']|int}} ({{ (traineexregion.at[i,j+'%']*100)|int}}) </td>
                {%endfor%}
            </tr>
        {% endfor %}
    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table_trainee.html', "w") as text_file:
    print(template.render(traineecols = traineecols, traineexwbincome = traineexwbincome, traineexregion=traineexregion, traineelabels=traineelabels), file=text_file)

HTML(template.render(traineecols = traineecols, traineexwbincome = traineexwbincome, traineexregion=traineexregion, traineelabels=traineelabels))

	PAP trainees	NPAP trainees	Total trainees	Graduates per year
	n (%)	n (%)	n (%)	n (%)
Income Group
High income	42503 (45)	17040 (66)	59544 (50)	21652 (47)
Upper middle income	28562 (30)	619 (2)	29181 (24)	15099 (33)
Lower middle income	20255 (21)	2370 (9)	22625 (19)	6917 (15)
Low income	1215 (1)	5741 (22)	6956 (5)	1698 (3)
All	92536 (0)	25770 (0)	118307 (0)	45367 (0)
WHO Region
African Region	2566 (2)	5597 (21)	8163 (6)	2857 (6)
Eastern Mediterranean Region	4582 (4)	2491 (9)	7073 (5)	1956 (4)
European Region	37528 (40)	5016 (19)	42544 (35)	13854 (30)
Region of the Americas	22134 (23)	8181 (31)	30315 (25)	10642 (23)
South-East Asia Region	16262 (17)	115 (0)	16377 (13)	4505 (9)
Western Pacific Region	9463 (10)	4370 (16)	13833 (11)	11551 (25)
All	92536 (0)	25770 (0)	118307 (0)	45367 (0)

Code

td = {'paptrainee':'Physicians','npaptrainee':'NPAPs'}

fig = px.bar(who, x='Region', y=['paptrainee','npaptrainee'], barmode='group', labels={'paptrainee':'Physician trainees'}
    ).update_layout(template='plotly_white', 
                    legend_title='Trainee type',
                    yaxis_title='Number of trainees',
                    xaxis_categoryorder='total descending').for_each_trace(lambda t: t.update(name = td[t.name]))

fig.show()

fig.write_image('trainees.jpg', width=1000, height=750, scale=2, engine='kaleido')

Code

fig = px.bar(wbc, x='wbincome', y=['paptrainee','npaptrainee'], barmode='stack', labels={'paptrainee':'Physician trainees'}
    ).update_layout(template='plotly_white', 
                    legend_title='Trainee type',
                    yaxis_title='Number of trainees',
                    xaxis_categoryorder='total descending')
td = {'paptrainee':'Physicians','npaptrainee':'NPAPs'}
fig.for_each_trace(lambda t: t.update(name = td[t.name]))

fig.show()

fig.write_image('trainees.jpg', width=1000, height=750, scale=2, engine='kaleido')

ICU Data

Code

means = df.groupby(by=['Region', 'wbincome']).mean(numeric_only=True)

Does a certified pathway exist for becoming an ICU physician?

Code

pd.crosstab(df['Region'], df['critcare'], margins=True, normalize=False)

critcare	I don't know, or cannot provide data	No	Yes	All
Region
African Region	5	27	9	41
Eastern Mediterranean Region	0	9	15	24
European Region	2	11	43	56
Region of the Americas	0	3	27	30
South-East Asia Region	0	3	11	14
Western Pacific Region	0	15	18	33
All	7	68	123	198

Code

px.bar(df, x='Region', color='critcare', barmode='stack')

Code

df['critcare'].value_counts()

critcare
Yes                                     123
No                                       68
I don't know, or cannot provide data      7
Name: count, dtype: int64

What proportion of patients have care by ICU physician?

Code

df['crit_providers'].mean()

54.69178082191781

Code

df.pivot_table(index='Region', columns='wbincome', values='crit_providers', aggfunc=np.mean, margins=True)

wbincome	High income	Low income	Lower middle income	Upper middle income	All
Region
African Region	0.000000	43.357143	36.700000	12.500000	37.000000
Eastern Mediterranean Region	42.500000	2.500000	54.800000	39.500000	44.050000
European Region	88.923077	NaN	100.000000	59.000000	80.552632
Region of the Americas	100.000000	NaN	32.500000	74.235294	70.500000
South-East Asia Region	NaN	NaN	61.000000	39.500000	53.181818
Western Pacific Region	51.142857	NaN	26.583333	12.857143	29.500000
All	76.585366	38.250000	42.977273	52.044444	54.691781

Code

dfg.pivot_table(index='Region_x', columns='wbincome', values=['totalproviders_cap'], aggfunc=np.sum, margins=True).style.format(precision=1)

	totalproviders_cap
wbincome	High income	Low income	Lower middle income	Upper middle income	All
Region_x
African Region	6.0	29.1	26.6	10.3	72.0
Eastern Mediterranean Region	63.5	8.7	42.4	43.7	158.4
European Region	893.4	nan	16.1	158.4	1067.9
Region of the Americas	86.9	nan	30.5	106.2	223.7
South-East Asia Region	nan	nan	23.1	20.6	43.6
Western Pacific Region	120.0	nan	105.6	96.2	321.8
All	1169.7	37.8	244.3	435.4	1887.3

Code

t1 = dfg.pivot_table(index='Region_x', columns='wbincome', values=['totalproviders_cap'], aggfunc=np.mean)
t1 = t1.droplevel(0, axis=1)
t1
px.imshow(t1)

Code


t1 = px.scatter(dfg[dfg['crit_providers'].notnull()], x='country',y='crit_providers', color='wbincome', template='plotly_white',
category_orders={'wbincome':incomeorder}, 
labels={'crit_providers':'Percent of patients cared for by critical care physician',
'wbincome':'Income Group',
'country':'Country'}).update_layout(xaxis_categoryorder='total descending', legend_orientation='h', legend_y=1.1)

t1.write_image('prop_crit.jpg', width=1000, height=750, scale=2, engine='kaleido')
t1

How many ICU specialists are there?

Code

px.box(dfg, x='wbincome', y='crit_number', points=False).update_layout(xaxis_categoryorder='total descending')

Code

px.scatter(dfg, x='country', y='crit_number').update_layout(xaxis_categoryorder='total descending')

Is the training sufficent to provide ICU care solo?

Code

pd.crosstab(df['wbincome'], df['training_sufficient'], normalize='index')

training_sufficient	No	Yes
wbincome
High income	0.357143	0.642857
Low income	0.571429	0.428571
Lower middle income	0.510638	0.489362
Upper middle income	0.600000	0.400000

Code

df['training_sufficient'].value_counts(normalize=True)

training_sufficient
Yes    0.505747
No     0.494253
Name: proportion, dtype: float64

Code

pd.crosstab(df['Region'], df['training_sufficient'])

training_sufficient	No	Yes
Region
African Region	15	20
Eastern Mediterranean Region	10	10
European Region	10	38
Region of the Americas	23	6
South-East Asia Region	8	5
Western Pacific Region	20	9

Code

px.histogram(df, x='Region', color='training_sufficient', barnorm='percent')

Is there a certified pathway for ICU nursing?

Code

df['crit_nurse'].value_counts(normalize=True)

crit_nurse
No     0.516484
Yes    0.483516
Name: proportion, dtype: float64

Code

pd.crosstab(df['Region'], df['crit_nurse'], normalize='index')

crit_nurse	No	Yes
Region
African Region	0.631579	0.368421
Eastern Mediterranean Region	0.285714	0.714286
European Region	0.490196	0.509804
Region of the Americas	0.464286	0.535714
South-East Asia Region	0.428571	0.571429
Western Pacific Region	0.666667	0.333333

Code

px.scatter(df, x='country', y='crit_nurse')

Gender

Code

data = dfg.groupby(by='wbincome').mean(numeric_only=True)[['totalgender','totalpap_gender','totalnpap_gender']]
data2 = dfg.groupby(by='Region_x').mean(numeric_only=True)[['totalgender','totalpap_gender','totalnpap_gender']]

Code

incomeorder = ['High income', 'Upper middle income','Lower middle income','Low income']

Code

data = data.reindex(incomeorder)
data

	totalgender	totalpap_gender	totalnpap_gender
wbincome
High income	48.241135	43.662879	63.242188
Upper middle income	42.631944	43.388889	35.150000
Lower middle income	37.120370	36.959459	39.138889
Low income	31.910417	30.525000	35.740741

Code

data2

	totalgender	totalpap_gender	totalnpap_gender
Region_x
African Region	32.315236	29.403226	38.592172
Eastern Mediterranean Region	35.213542	34.625000	51.750000
European Region	54.280952	51.343434	67.444444
Region of the Americas	48.363636	48.787879	52.812500
South-East Asia Region	36.944444	40.388889	25.400000
Western Pacific Region	35.623457	33.250000	36.348214

Code

gendercols = ['totalpap_gender','totalnpap_gender','totalgender']
genderlabels = ['PAPs','NPAPs','All providers']

table_str='''
<table class='table-style'>
    <thead>
        {# ######## FIRST MAKE THE COLUMNS IN THE HEADER #########  #}
                {# ######## THE COLUMNS #########  #}
        <tr class='darkgray'> 
            <td></td>
            {% for i in genderlabels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>
    </thead>

    <tbody>
        {# ######## NOW MAKE THE BODY #########  #}
                {# ######## FIRST SECTION #########  #}
        <tr class='section-break'><td colspan = 4> Income Group </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in data.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
            {% for j in gendercols %}
                <td class='left-solid'> {{ data.at[i,j]|int }} </td>
            {% endfor %}
            </tr>
        {% endfor %}

        {# ######## Second SECTION #########  #}
        <tr class='section-break'><td colspan = 4> Region </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in data2.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
            {% for j in gendercols %}
                <td class='left-solid'> {{ data2.at[i,j]|int }} </td>
            {% endfor %}
            </tr>
        {% endfor %}

    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table_gender.html', "w") as text_file:
    print(template.render(data=data, data2=data2, genderlabels=genderlabels, gendercols=gendercols), file=text_file)

#PUT ALL YOUR VARIABLES IN HERE
HTML(template.render(data=data, data2=data2, genderlabels=genderlabels, gendercols=gendercols))

	PAPs	NPAPs	All providers
Income Group
High income	43	63	48
Upper middle income	43	35	42
Lower middle income	36	39	37
Low income	30	35	31
Region
African Region	29	38	32
Eastern Mediterranean Region	34	51	35
European Region	51	67	54
Region of the Americas	48	52	48
South-East Asia Region	40	25	36
Western Pacific Region	33	36	35

Code

t1 = px.scatter(dfg, x='country',y='totalgender', color='wbincome', template='plotly_white', labels={
    'wbincome':'Income group',
    'totalgender':'% of providers who are women',
    'country_shr':"Country"
}, category_orders={'wbincome':incomeorder}).update_layout(xaxis_categoryorder='total descending', 
legend_orientation='h',
legend_y=1.1)

t1.write_image('prop_women.jpg', width=1000, height=500, scale=2, engine='kaleido')
t1