Global Anestehsia Workforce Analysis

What is this notebook?

This notebook is the jupyter version of the streamlit app. It should use the same base data set but perhaps perform different calculations to iterate more quickly than on streamlit.

Code
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
# pio.renderers.default = "notebook_connected+png"
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from itables import show
import seaborn.objects as so
import seaborn as sns
Code
df = pd.read_csv('analysis (korea and mkd).csv')
verification = pd.read_csv('verification.csv')
df_clean = df.copy() #unmodified dataframe without corrections

import gspread
client = gspread.service_account('generic-project-for-api.json')
sheet = client.open_by_url('https://docs.google.com/spreadsheets/d/1a_JIqrAgOhP5gQk3Xw0V_SqvT3imofDabAbuqABpikg/')
worksheet = sheet.get_worksheet_by_id(0)
data = worksheet.get_all_records()
df2 = pd.DataFrame(data)

url = 'https://docs.google.com/spreadsheets/d/1f6v1cGXgiQibiFMsvtVDjOfTPob-5aUrLaA4jy_Cu9I/'
sheet = client.open_by_url(url)
worksheet = sheet.get_worksheet_by_id(476816199)
vf = worksheet.get_all_records()
verification_df = pd.DataFrame(vf)

print(df[df['population'].isnull() & df['country'].notnull()][['country','population']])

countrypops = {
    'Taiwan, Republic of China' : 235.70000,
    'Cook Islands': .17564,
    'Somaliland' : 42.00000,
    'Niue' : .01626,
    'Tokelau' : .01357,
    'Eritrea': 35.46421
}
for i in countrypops:
    df['population'] = df.apply(lambda x: countrypops[i] if x['country'] == i else x['population'], axis=1)

##################################physician specialists
cert_spec_verifications = {
    'Chile':2131, 
    'Peru':2850,
    'Guatemala':554,
    'El Salvador':143,
    'Austria':2650,
    'Jordan':474,
    'Cyprus':207,
    'Switzerland':2016,
    'Timor-Leste':12,
    'Solomon Islands':3,
    'Samoa':3,
    'Burundi':4,
    'Turkey': 8570,
    'Spain':8000,
    'Indonesia':4134,
    'Morocco':830,
    'Colombia': 4148, #per Pedro after confirming with SCARE
    }

# function to take a dictionary of changes, and return the new number to the row
def verify_update(row, country, number):
    if country == row['country']:
        return number
    else:
        return row['certified_specialist']

# make changes to certified specialists
for i in cert_spec_verifications:
    df['certified_specialist'] = df.apply(verify_update, axis=1, args=(i,cert_spec_verifications.get(i)))

# correct USA in 2015
df.loc[df['country']=='United States of America','physicians2015'] = 47500

##################################NONspecialist physicians

nonspec_phys_verifications= {
    'Switzerland':679,
    'Solomon Islands':4,
    'Samoa':1,
    'Turkey':0,
    'Guatemala':0
}

for i in nonspec_phys_verifications:
    df['provider_number'] = df.apply(lambda x: nonspec_phys_verifications.get(i) if x['country']==i else x['provider_number'], axis=1)

##################################nurses specialists
nurse_num_verifications = {
    'Greece':1050,
    'Thailand':5000,
    'China':28200,
    'Saudi Arabia':5,
    'Indonesia':4000,
    'Serbia':700,
    'Solomon Islands':4,
    'Turkey':6486,
    'Australia':0,
    'Finland':3000,
    'Liberia':90,
    'Croatia': 1000,
    'Luxembourg': 409,
    'Germany':1400,
    'Austria':130,
    'Korea (Republic of)':680 # filled in 680 for nurse 1, nurse 2, and nurse 3. shouldy only be entered once
}

#make changes to nurse numbers
for i in nurse_num_verifications:
    df['nurse_num'] = df.apply(lambda x: nurse_num_verifications.get(i) if x['country']==i else x['nurse_num'], axis=1)

# thailand special case, reported nurses and non nurses, but in fact they are all nurse anesthetists and total about 5000
# Turkey has certified nurse technicians as 'other' , but they are really non-physician providers
# 
# set thailand nurse anesthetists to 5000 above, set all other NPA categories to nan
# set turkey technicians to 6486 above, set all other NPA categories to nan
# move finland other1 providers to nurses, set above, remove from other1 below
# same for croatia, move 1000 to nurses
# set korea nurses to 680 above, and remove all other nurse cadres to nan
for i in ['nurse_num2', 'nurse_num3', 'nurse_num4', 'other_1_numbers']:
    df[i] = df.apply(lambda x: np.nan if x['country']=='Thailand' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Turkey' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Finland' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Croatia' else x[i], axis=1)
    df[i] = df.apply(lambda x: np.nan if x['country']=='Korea (Republic of)' else x[i], axis=1)


# somaliland in 2015 should have ben 40 nurses
                       country  population
53   Taiwan, Republic of China         NaN
134               Cook Islands         NaN
144                 Somaliland         NaN
148                       Niue         NaN
149                    Tokelau         NaN
184                    Eritrea         NaN
185                    Eritrea         NaN
Code
###########################################
############## variable calculation
############## defining column orders and map colors
###########################################)

isolist = df[['country', 'code3']].drop_duplicates()

# create bins for per capita figure
def percapitabins(row):
    if row['totalpap_cap'] >= 20:
        return ">= 20"
    elif row['totalpap_cap'] >=15 and row['totalpap_cap']<20:
        return "15-20"
    elif row['totalpap_cap'] >=10 and row['totalpap_cap']<15:
        return "10-15"
    elif row['totalpap_cap'] >=6 and row['totalpap_cap']<10:
        return "6-10"
    elif row ['totalpap_cap'] >=3 and row['totalpap_cap'] <6:
        return "3-6"
    elif row['totalpap_cap'] >= 1 and row['totalpap_cap'] <3:
        return "1-3"
    elif row['totalpap_cap'] <1:
        return "<1"
    else:
        return "no data"

catorder = {"totalpap_cap_bins":['>= 20','15-20', '10-15','6-10', '3-6', '1-3','<1','no data']}

colmap = {'>= 20':'#4a9563',
'15-20': '#87e3a6', 
'10-15': '#c0f1d0',
'6-10': '#d4f4df', 
'3-6': '#dcc127', 
'1-3': '#fe8a0e',
'<1': '#dd1e39',
'no data': 'gray'
}

# field conversion to numeric
dflist = [df, df_clean]

for i in dflist:
    i['nurses2015'] = pd.to_numeric(i['nurses2015'], errors='coerce')
    i['nurses2015_cap'] = pd.to_numeric(i['nurses2015_cap'], errors='coerce')

# create verification categories:
def verificationstatus(row):
    if row['Verified: you agree this number should be published'] == "TRUE":
        return "Verified"
    elif row['Have sent verification email'] == "TRUE" and row['Verified: you agree this number should be published'] == "FALSE":
        return "Attempted verification"
    else:
        return "Verification not required"

#Group for individual country analysis
dfg = df.groupby(['country','Region','wbincome']).mean(numeric_only=True)
dfg = dfg.reset_index()

#Group by World Bank income group
wbc = dfg.groupby('wbincome').sum(numeric_only=True, min_count=1)
wbc = wbc.reset_index()

#Group by WHO Regional group
who = dfg.groupby('Region').sum(numeric_only=True, min_count=1)
who = who.reset_index()

#Group by WHO regional group, minus USA and Canada
whonousa = dfg[dfg['country'] != 'United States of America']
whonousa = whonousa[whonousa['country'] != 'Canada']
whonousa = whonousa.groupby('Region').sum(numeric_only=True, min_count=1)
whonousa = whonousa.reset_index()

legendict = dict(orientation='h', 
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1)

dflist = [df, dfg,wbc,who,whonousa]
regionlist = ['European Region','African Region','Region of the Americas','Eastern Mediterranean Region','South-East Asia Region','Western Pacific Region']

for i in dflist:
    i['physicians2015_cap'] = i['physicians2015']/(i['population_2015']/100000)
    i['nurses2015_cap'] = i['nurses2015']/(i['population_2015']/100000)
    i['totalproviders2015'] = i[['physicians2015','nurses2015']].sum(axis=1, min_count=1)
    i['totalproviders2015_cap'] = i[['physicians2015_cap','nurses2015_cap']].sum(axis=1, min_count=1)
    i['totalnpap'] = i[['nurse_num', 'nurse_num2', 'nurse_num3', 'nurse_num4']].sum(axis=1, min_count=1)
    i['totalpap'] = i[['certified_specialist', 'provider_number']].sum(axis=1, min_count=1)
    i['totalproviders'] = i[['totalpap','totalnpap']].sum(axis=1, min_count=1)
    i['totalpap_cap'] = i['totalpap']/i['population']
    i['totalnpap_cap'] = i['totalnpap']/i['population']
    i['totalproviders_cap'] = i['totalproviders']/i['population']
    i['paptrainee'] = i[['trainee_current','trainees_current']].sum(axis=1,min_count=1)
    i['npaptrainee'] = i[['nurse_trainee','nurse_trainee2','nurse_trainee3','nurse_trainee4']].sum(axis=1, min_count=1)
    i['totaltrainee'] = i[['paptrainee','npaptrainee']].sum(axis=1,min_count=1)
    i['gradtrainee'] = i[['trainee_grad','trainees_grad','nurse_grad','nurse_grad2','nurse_grad3','nurse_grad4']].sum(axis=1, min_count=1)
    #genders
    i['totalpap_gender'] = i[['papgender', 'provider_number_2gender']].mean(axis=1)
    i['totalnpap_gender'] = i[['nurse_numgender', 'nurse_num2gender', 'nurse_num3gender', 'nurse_num4gender']].mean(axis=1)
    i['totalgender'] = i[['papgender', 'provider_number_2gender', 'nurse_numgender', 'nurse_num2gender', 'nurse_num3gender', 'nurse_num4gender']].mean(axis=1)
    #population difference
    i['population_2015'] = i['population_2015']/100000
    i['popdiff'] = i['population']-i['population_2015']
    #calculate difference in providers 2015 to now
    i['papdiff'] = i['totalpap']-i['physicians2015'] #absolute difference, number of physicians
    
    i['certifiedpapdiff'] = round(i['certified_specialist']-i['physicians2015'],2) #absolute difference, certified specialists
    i['totalpap_capdiff'] = i['totalpap_cap'] - i['physicians2015_cap'] # absolute difference, physicians per capita
    i['totalnpap_capdiff'] = i['totalnpap_cap'].sub(i['nurses2015_cap'], fill_value=0)
    i['totalprovidersdiff'] = i['totalproviders'] - i['totalproviders2015'] #abs difference, total providers
    i['totalproviders_cap_diff'] = i['totalproviders_cap'] - i['totalproviders2015_cap'] # absolute difference, total providers per capita

    #calculate proportional change in providers, 2015 to now
    # i['papdiffpct'] = round((1-((i['totalpap']/i['physicians2015'])))*-100,0) #proportion change, number of physicians
    # i['certifiedpapdiffpct'] = round((1-((i['certified_specialist']/i['physicians2015'])))*-100,0) # proportion change, number of physicians (certified onlyh)
    # i['physiciancap_pct'] = round((1-((i['totalpap_cap']/i['physicians2015_cap'])))*-100,0) #proportion change, physicians per capita
    # i['totalpap_capdiffpct'] = round((1-((i['totalpap_cap']/i['physicians2015_cap'])))*-100,0) # proportion change, physicians per capita DUPLICATE
    # i['totalprovidersdiffpct'] = round((1-((i['totalproviders']/i['totalproviders2015'])))*-100,0) #proportion change, total providers
    # i['totalproviders_cap_pct'] = round((1-((i['totalproviders_cap']/i['totalproviders2015_cap'])))*-100,0) #proportion change, total providers per capita
    i['papdiffpct'] = round(((i['totalpap']-i['physicians2015'])/i['physicians2015'])*100,0)
    i['certifiedpapdiffpct'] = round(((i['certified_specialist']-i['physicians2015'])/i['physicians2015'])*100,0)
    i['physiciancap_pct'] = round(((i['totalpap_cap']-i['physicians2015_cap'])/i['physicians2015_cap'])*100,0)
    i['totalpap_capdiffpct'] = i['physiciancap_pct']
    i['totalprovidersdiffpct'] = round(((i['totalproviders']-i['totalproviders2015'])/i['totalproviders2015'])*100,0)
    i['totalproviders_cap_pct'] =  round(((i['totalproviders_cap']-i['totalproviders2015_cap'])/i['totalproviders2015_cap'])*100,0) 
    i['total_cert_ratio'] = i['certifiedpapdiffpct']/i['papdiffpct']

    ##nurses percent difference
    i['npapdiff'] = i['totalnpap'].sub(i['nurses2015'], fill_value=0) #absolute difference, nurse numbers
    i['npapdiffpct'] = round((1-((i['totalnpap']/i['nurses2015'])))*-100,0) #proportion change, nurse numbers
    i['npapcapdiff']= round(((i['totalnpap_cap']-i['nurses2015_cap'])/i['nurses2015_cap'])*100,0) #proportion change, nurses per capita

provnumbers = dfg[['country','Region', 'certified_specialist','provider_number','totalpap','totalpap_cap', 'totalnpap','totalnpap_cap', 'totalproviders', 'totalproviders_cap','physicians2015', 'qualified_physicians2015', 'physicians2015_cap', 'physiciancap_pct','papdiff','papdiffpct','nurses2015','nurses2015_cap','npapdiff','npapdiffpct','npapcapdiff']].copy()

dfg = dfg.merge(isolist, on='country', how='left')
dfg = dfg.merge(verification_df, left_on='country', right_on='Country', how='left')
dfg = dfg.merge(verification, left_on='country', right_on='Country', how='left')

dfg['totalpap_cap_bins'] = dfg.apply(percapitabins, axis=1)
dfg['verification_status'] = dfg.apply(verificationstatus, axis=1)
dfg.to_csv('dfg.csv')
df.to_csv('df.csv')

for dylan

Code
# ctrylist = ['Australia','Canada','China','Colombia','Fiji','India','Lebanon']
ctrylist = ['Mexico','Norway','Pakistan','Paraguay','Romania','South Africa','Uganda','United States of America', 'Viet Nam']
collist = ['country','population','totalpap','totalnpap','totalpap_cap','totalproviders_cap']
npaplist = ['totalnpap','totalnpapcap']

dfg.loc[dfg['country']=='North Macedonia',collist]
country population totalpap totalnpap totalpap_cap totalproviders_cap
101 North Macedonia 20.7 237.0 NaN 11.449275 11.449275
Code
df[df['country']=='Korea (Republic of)'][['nurse_num', 'nurse_num2', 'nurse_num3', 'nurse_num4','totalnpap']]
nurse_num nurse_num2 nurse_num3 nurse_num4 totalnpap
28 680.0 NaN NaN NaN 680.0
29 680.0 NaN NaN NaN 680.0
30 680.0 NaN NaN NaN 680.0
Code
# dfg.loc[dfg['country'].isin(ctrylist)][collist].set_index('country').T.style.format({'totalpap':"{:,.0f}",'certified_specialist':"{:,.0f}",'totalnpap':"{:,.0f}",'totalproviders_cap':"{:,.1f}"})
dfg.loc[dfg['country'].isin(ctrylist)][collist].set_index('country').T.style.format(precision=1)
country Mexico Norway Pakistan Paraguay Romania South Africa Uganda United States of America Viet Nam
population 1315.6 54.7 2294.9 73.1 190.7 607.6 484.3 3352.3 989.5
totalpap 9665.0 1099.0 3393.0 310.0 1159.0 1750.0 76.0 53250.0 1500.0
totalnpap nan 2000.0 nan 160.0 nan nan 500.0 56172.0 nan
totalpap_cap 7.3 20.1 1.5 4.2 6.1 2.9 0.2 15.9 1.5
totalproviders_cap 7.3 56.7 1.5 6.4 6.1 2.9 1.2 32.6 1.5
Code
mask = (dfg['Region_x'] == 'African Region') & (dfg['totalproviders_cap'] <=1)
mask = dfg['totalproviders_cap'] <=5
dfg[mask]
country Region_x wbincome Unnamed: 0 record_id redcap_survey_identifier certified_specialist papgender trainee_current trainee_grad ... % difference in PAP per capita compared to 2015_y Absolute Difference in # PAPs per capita Unnamed: 11 % difference in # of PAPs compared to 2015_y verified contacted Verification Comments_y Contact_y totalpap_cap_bins verification_status
5 Bangladesh South-East Asia Region Lower middle income 10.0 45.000000 NaN 1839.0 NaN 860.0 230.0 ... 0.453811 660.0 52.0 52.00% True True 10/10 TL confirmed correct numbers NaN 1-3 Verified
9 Benin African Region Lower middle income 213.0 16.000000 NaN 26.0 18.5 60.0 6.0 ... -0.637280 -35.0 -57.0 -57.00% False True NaN NaN <1 Attempted verification
11 Bolivia Region of the Americas Lower middle income 194.0 24.000000 NaN 450.0 45.0 50.0 20.0 ... -0.441080 -270.0 -38.0 -38.00% False True 9.16 TL verification sent NaN 3-6 Attempted verification
15 Burkina Faso African Region Low income 211.0 12.000000 NaN 57.0 23.0 69.0 7.0 ... NaN NaN NaN NaN NaN NaN NaN NaN <1 Verified
16 Burundi African Region Low income 31.0 75.000000 NaN 4.0 0.0 3.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN <1 Verified
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
151 Venezuela Region of the Americas Upper middle income 200.0 32.000000 NaN 800.0 67.0 800.0 510.0 ... NaN NaN NaN NaN NaN NaN NaN NaN 1-3 Verification not required
152 Viet Nam Western Pacific Region Lower middle income 150.5 217.000000 NaN 1500.0 45.0 225.0 175.0 ... 0.416537 500.0 50.0 50.00% False True NaN NaN 1-3 Attempted verification
153 Yemen Eastern Mediterranean Region Low income 59.0 102.000000 NaN 74.0 18.0 65.0 18.0 ... 4.454534 80.0 533.0 533.00% False False NaN NaN <1 Verification not required
154 Zambia African Region Lower middle income 39.0 109.333333 NaN 29.0 37.0 10.0 4.0 ... 0.359669 50.0 63.0 63.00% False True NaN Data from macabels2002@gmail.com inconsistent ... <1 Attempted verification
155 Zimbabwe African Region Lower middle income 174.0 249.000000 NaN 73.0 50.0 25.0 2.0 ... NaN NaN NaN NaN NaN NaN NaN NaN <1 Verification not required

66 rows × 115 columns

End dylan

Comparison of population differences

In the paper we talk about how some countries had a decrease in providers per 100k despite having an increase in absolute providers. This was attributed to population growth.

The editors have essentially asked if the reverse is also true. Did any countries or regions have an increase in providers per capita, despite having a decrease in absolute providers? If so this would be due to population shrinkage.

Code
cols = ['country','population','population_2015','totalproviders','totalproviders2015','totalproviders_cap','totalproviders2015_cap','totalproviders_cap_diff','totalprovidersdiff','popdiff']
dfg['popdiff'] = dfg['population'].sub(dfg['population_2015'])
Code
# which countries had a population decrease?
dfg.loc[dfg['popdiff']<0][cols]
country population population_2015 totalproviders totalproviders2015 totalproviders_cap totalproviders2015_cap totalproviders_cap_diff totalprovidersdiff popdiff
0 Albania 28.25000 28.97000 210.000000 350.0 7.433628 12.081464 -4.647835 -140.000000 -0.72000
6 Belarus 93.58000 94.96000 2395.000000 2185.0 25.593075 23.009688 2.583387 210.000000 -1.38000
12 Bosnia and Herzegovina 32.49000 38.10000 NaN 230.0 NaN 6.036745 NaN NaN -5.61000
25 Cook Islands 0.17564 0.21000 1.000000 1.0 5.693464 4.761905 0.931559 0.000000 -0.03436
27 Croatia 40.03000 42.40000 1676.333333 870.0 41.876926 20.518868 21.358058 806.333333 -2.37000
28 Cuba 113.06000 113.90000 1452.000000 1786.0 12.842738 15.680421 -2.837683 -334.000000 -0.84000
37 Eritrea 35.46421 52.28000 98.000000 57.0 2.763349 1.090283 1.673066 41.000000 -16.81579
45 Georgia 37.00000 40.00000 500.000000 500.0 13.513514 12.500000 1.013514 0.000000 -3.00000
48 Greece 105.83000 109.55000 2050.000000 1300.0 19.370689 11.866728 7.503961 750.000000 -3.72000
52 Hungary 96.90000 98.55000 2457.000000 900.0 25.356037 9.132420 16.223617 1557.000000 -1.65000
59 Italy 592.02000 597.98000 15000.000000 15500.0 25.336982 25.920599 -0.583617 -500.000000 -5.96000
60 Japan 1247.47000 1265.74000 4606.000000 12208.0 3.692273 9.644951 -5.952678 -7602.000000 -18.27000
68 Latvia 18.65000 19.70000 350.000000 846.0 18.766756 42.944162 -24.177406 -496.000000 -1.05000
73 Lithuania 27.40000 28.78000 851.000000 533.0 31.058394 18.519805 12.538589 318.000000 -1.38000
83 Mauritius 12.69000 12.73000 76.000000 55.0 5.988968 4.320503 1.668465 21.000000 -0.04000
86 Moldova 26.06000 40.69000 420.000000 470.0 16.116654 11.550750 4.565904 -50.000000 -14.63000
88 Montenegro 6.21000 6.26000 117.000000 81.0 18.840580 12.939297 5.901283 36.000000 -0.05000
101 North Macedonia 20.70000 20.78000 237.000000 206.0 11.449275 9.913378 1.535897 31.000000 -0.08000
105 Palau 0.18000 0.21000 3.000000 3.0 16.666667 14.285714 2.380952 0.000000 -0.03000
112 Poland 377.81000 386.12000 12400.000000 10480.0 32.820730 27.141821 5.678909 1920.000000 -8.31000
113 Portugal 102.34000 103.50000 1542.000000 1855.0 15.067422 17.922705 -2.855283 -313.000000 -1.16000
114 Romania 190.69000 195.11000 1159.000000 1400.0 6.077928 7.175439 -1.097512 -241.000000 -4.42000
120 Serbia 68.25000 88.51000 1600.000000 950.0 23.443223 10.733250 12.709973 650.000000 -20.26000
126 Somaliland 42.00000 234.15126 60.000000 5311.0 1.428571 22.681919 -21.253348 -5251.000000 -192.15126
131 Swaziland 11.85000 12.87000 7.000000 69.0 0.590717 5.361305 -4.770588 -62.000000 -1.02000
151 Venezuela 292.67000 311.08000 800.000000 690.0 2.733454 2.218079 0.515375 110.000000 -18.41000
155 Zimbabwe 153.31000 156.03000 73.000000 280.0 0.476159 1.794527 -1.318367 -207.000000 -2.72000
Code
mask_increasepercapita = dfg['totalproviders_cap'] >0
mask_decreaseabs = dfg['totalproviders'] <0

dfg.loc[mask_increasepercapita & mask_decreaseabs][cols]
country population population_2015 totalproviders totalproviders2015 totalproviders_cap totalproviders2015_cap totalproviders_cap_diff totalprovidersdiff popdiff
Code
who['popdiff'] = who['population'].sub(who['population_2015'])
who[['Region','population','population_2015','totalproviders','totalproviders2015','totalproviders_cap','totalproviders2015_cap','totalproviders_cap_diff','totalprovidersdiff','popdiff']]
Region population population_2015 totalproviders totalproviders2015 totalproviders_cap totalproviders2015_cap totalproviders_cap_diff totalprovidersdiff popdiff
0 African Region 10758.90421 9046.16212 21372.333333 15426.0 1.986479 1.705254 0.281225 5946.333333 1712.74209
1 Eastern Mediterranean Region 7070.41000 6268.46592 41171.000000 26313.0 5.823000 4.197678 1.625323 14858.000000 801.94408
2 European Region 8001.55000 7841.99000 207537.333333 180536.0 25.937141 23.021708 2.915434 27001.333333 159.56000
3 Region of the Americas 10172.69000 9713.66000 170986.000000 135050.0 16.808337 13.903101 2.905235 35936.000000 459.03000
4 South-East Asia Region 20339.73000 18920.16311 79069.166667 22889.0 3.887425 1.209768 2.677657 56180.166667 1419.56689
5 Western Pacific Region 19417.25547 18707.75000 149599.500000 103061.0 7.704462 5.509000 2.195461 46538.500000 709.50547
Code
who['population'].sum()*100000
7576053968.0

Population change in percent

Code
dfg['popdiffpct'] = dfg['population']/dfg['population_2015']
Code
# generate plotly boxplot for popdiff, facet by region
fig = px.box(dfg, x='Region_x', y='popdiffpct', color='Region_x', hover_data=['country'], title='Population change, 2015-2020', width=800, height=600, points='all')
fig.show()

Overall stats, change in providers, current total

Code
print('Overall change in total paps:', 
      dfg['totalpap'].sum() - dfg['physicians2015'].sum())

print('Overall change in PAP density: ',
      (dfg['totalpap'].sum()/dfg['population'].sum()) - (dfg['physicians2015'].sum()/dfg['population_2015'].sum()))
Overall change in total paps: 97140.5
Overall change in PAP density:  0.8864765863027477
Code
print('Overall average in total providers:',
      dfg['totalproviders_cap'].mean())

print('Another way: calculating total provider density as an aggregate: ',
      dfg['totalproviders'].sum() / dfg['population'].sum())

print('Overall PAP and NPAP density: ',
      dfg['totalpap'].sum()/dfg['population'].sum(),
      dfg['totalnpap'].sum()/dfg['population'].sum())
Overall average in total providers: 12.17627671789102
Another way: calculating total provider density as an aggregate:  8.84016053953925
Overall PAP and NPAP density:  6.5836582224304445 2.256502317108802

These two numbers give slightly different numbers, but the difference is small. The first is the average of the provider densities of each country. The second is the total number of providers divided by the total population.

Neither is more ‘correct’, but in Table 1 9.7 is the average of the income groups and that’s what we cite in the paper. One of the above numbers may be more accurate, but thinking in terms of income groups is also a natural way of doing so.

Table 1

Code
wbc['wbincome'] = pd.Categorical(wbc['wbincome'], ordered=True)
wbc['wbincome'] = wbc['wbincome'].cat.reorder_categories(['High income','Upper middle income','Lower middle income','Low income'], ordered=True)
Code
tablecolumns = ['totalpap','totalpap_cap','totalnpap','totalnpap_cap','totalproviders','totalproviders_cap']
t1 = who[['Region']+tablecolumns]
t1 = t1.set_index('Region')
#add total 
t1.loc['Overall (sum, *mean)'] = [t1['totalpap'].sum(), t1['totalpap_cap'].mean(), t1['totalnpap'].sum(), t1['totalnpap_cap'].mean(), t1['totalproviders'].sum(), t1['totalproviders_cap'].mean()]
Code
tab1= wbc[['wbincome']+tablecolumns].sort_values(by='wbincome')
tab1 = tab1.set_index('wbincome')
tab1.loc['Overall (sum, *mean)'] = [tab1['totalpap'].sum(), tab1['totalpap_cap'].mean(), tab1['totalnpap'].sum(), tab1['totalnpap_cap'].mean(), tab1['totalproviders'].sum(), tab1['totalproviders_cap'].mean()]
Code
from IPython.display import display, HTML
from jinja2 import Template

#### Start by having at least these lists
tab1cols = ['totalpap','totalpap_cap','totalnpap','totalnpap_cap','totalproviders','totalproviders_cap'] # names of the dataframe columns
tab1labels = ['Total PAP','Total PAP density*','Total NPAP', 'Total NPAP density*','Total providers','Total provider density*'] #labels of the dataframe columns

#### this can go in a different cell/file, it doesn't change.
table_head =  '''
<html>
<head>
<style>
.table-style {
    border: 1px solid white;
    text-align: center;
    border-collapse: collapse;
}

.table-style td{
    padding: 5px 5px;
}

.darkgray {
    background-color: #d9d9d9;
}

.section-break {
    border-top: 1.5pt solid black;
    border-bottom: 1.5pt solid black;
    background-color: #efefef;
}

.bottom-solid-hard{
    border-bottom: 1.5pt solid black;
}

.left-solid {
    border-left: 1pt solid black;
}

.right-solid {
    border-right: 1pt solid black;
}

.right-dash {
    border-right: 1pt dotted black;
}


</style>
</head>
'''
Code
table_str='''
<table class='table-style'>
    <thead>
        {# ######## FIRST MAKE THE COLUMNS IN THE HEADER #########  #}
                {# ######## THE COLUMNS #########  #}
        <tr class='darkgray'> 
            <td></td>
            {% for i in tab1labels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>
    </thead>

    <tbody>
        {# ######## NOW MAKE THE BODY #########  #}
                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> Income Group </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in tab1.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ tab1.at[i,'totalpap']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalpap_cap']|round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalnpap'] |int}} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalnpap_cap'] |round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalproviders']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalproviders_cap'] |round(1)}} </td>
            </tr>
                {% endfor %}

                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> WHO Region </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in t1.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ t1.at[i,'totalpap']|int }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalpap_cap']|round(1) }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalnpap'] |int}} </td>
                <td class='left-solid'> {{ t1.at[i,'totalnpap_cap'] |round(1) }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalproviders']|int }} </td>
                <td class='left-solid'> {{ t1.at[i,'totalproviders_cap'] |round(1)}} </td>
            </tr>
                {% endfor %}

    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table1.html', "w") as text_file:
    print(template.render(tab1cols=tab1cols, t1=t1, tab1=tab1, tab1labels=tab1labels), file=text_file)

HTML(template.render(tab1cols=tab1cols, t1=t1, tab1=tab1, tab1labels=tab1labels))
Total PAP Total PAP density* Total NPAP Total NPAP density* Total providers Total provider density*
Income Group
High income 194106 16.1 94065 7.8 288171 23.8
Upper middle income 215808 7.5 53369 1.9 269177 9.4
Lower middle income 87229 3.0 13231 0.5 100460 3.5
Low income 1637 0.3 10288 1.7 11925 2.0
Overall (sum, *mean) 498781 6.7 170953 2.9 669735 9.7
WHO Region
African Region 5949 0.6 15422 1.4 21372 2.0
Eastern Mediterranean Region 26306 3.7 14865 2.1 41171 5.8
European Region 167174 20.9 40363 5.0 207537 25.9
Region of the Americas 113320 11.1 57666 5.7 170986 16.8
South-East Asia Region 70020 3.4 9049 0.4 79069 3.9
Western Pacific Region 116011 6.0 33588 1.7 149599 7.7
Overall (sum, *mean) 498781 7.6 170953 2.7 669735 10.4

Table 2

Code
table2cols = ['papdiff','totalpap_capdiff','npapdiff','totalnpap_capdiff','totalprovidersdiff','totalproviders_cap_diff']
tab1 = wbc[['wbincome']+table2cols].sort_values(by='wbincome')

tab1 = tab1.set_index('wbincome')
tab1.loc['Overall (sum, *mean)'] = [tab1['papdiff'].sum(), tab1['totalpap_capdiff'].mean(), tab1['npapdiff'].sum(), tab1['totalnpap_capdiff'].mean(), tab1['totalprovidersdiff'].sum(), tab1['totalproviders_cap_diff'].mean()]
tab1
Code
tab2 = who[['Region']+table2cols]
tab2 = tab2.set_index('Region')
tab2.loc['Overall (sum, *mean)'] = [tab2['papdiff'].sum(), tab2['totalpap_capdiff'].mean(), tab2['npapdiff'].sum(), tab2['totalnpap_capdiff'].mean(), tab2['totalprovidersdiff'].sum(), tab2['totalproviders_cap_diff'].mean()]

tab2
Code
tab2cols = tab1.columns.to_list()

#these are the tab2labels:
#Change in PAPs
# Change in PAP density
# Change in NPAPs
# Change in NPAP density
# Change in total providers
# Change in total provider density

tab2labels = ['Change in PAPs','Change in PAP density','Change in NPAPs','Change in NPAP density','Change in total providers','Change in total provider density']

table_str='''
<table class='table-style'>
    <thead>
        {# ######## FIRST MAKE THE COLUMNS IN THE HEADER #########  #}
                {# ######## THE COLUMNS #########  #}
        <tr class='darkgray'> 
            <td></td>
            {% for i in tab2labels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>
    </thead>

    <tbody>
        {# ######## NOW MAKE THE BODY #########  #}
                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> Income Group </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in tab1.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ tab1.at[i,'papdiff']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalpap_capdiff']|round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'npapdiff'] |int}} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalnpap_capdiff'] |round(1) }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalprovidersdiff']|int }} </td>
                <td class='left-solid'> {{ tab1.at[i,'totalproviders_cap_diff'] |round(1)}} </td>
            </tr>
                {% endfor %}

                {# ######## Second SECTION, colspan is NUMBER OF LABELS +1 #########  #}
        <tr class='section-break'><td colspan = 7> WHO Region </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in tab2.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
                <td class='left-solid'> {{ tab2.at[i,'papdiff']|int }} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalpap_capdiff']|round(1) }} </td>
                <td class='left-solid'> {{ tab2.at[i,'npapdiff'] |int}} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalnpap_capdiff'] |round(1) }} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalprovidersdiff']|int }} </td>
                <td class='left-solid'> {{ tab2.at[i,'totalproviders_cap_diff'] |round(1)}} </td>
            </tr>
                {% endfor %}

    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table2.html', "w") as text_file:
    print(template.render(tab2cols=tab2cols, tab2labels = tab2labels, tab2=tab2, tab1=tab1), file=text_file)

HTML(template.render(tab2cols=tab2cols, tab2labels = tab2labels, tab2=tab2, tab1=tab1, ))
Change in PAPs Change in PAP density Change in NPAPs Change in NPAP density Change in total providers Change in total provider density
Income Group
High income 8707 -0.1 28309 2.0 37016 1.9
Upper middle income 42012 1.2 52836 1.8 94848 3.1
Lower middle income 47359 1.5 5847 0.2 53206 1.7
Low income -939 -0.2 2327 0.2 1388 -0.0
Overall (sum, *mean) 97140 0.6 89319 1.1 186460 1.7
WHO Region
African Region -164 -0.1 6110 0.4 5946 0.3
Eastern Mediterranean Region 5974 0.5 8884 1.1 14858 1.6
European Region 18569 1.9 8432 1.0 27001 2.9
Region of the Americas 12236 0.7 23700 2.2 35936 2.9
South-East Asia Region 47387 2.2 8793 0.4 56180 2.7
Western Pacific Region 13138 0.5 33400 1.7 46538 2.2
Overall (sum, *mean) 97140 1.0 89319 1.1 186460 2.1

Change in PAPs, minus USA and Canada

Code
tab2 = whonousa[['Region']+table2cols]
tab2 = tab2.set_index('Region')
tab2.loc['Overall (sum, *mean)'] = [tab2['papdiff'].sum(), tab2['totalpap_capdiff'].mean(), tab2['npapdiff'].sum(), tab2['totalnpap_capdiff'].mean(), tab2['totalprovidersdiff'].sum(), tab2['totalproviders_cap_diff'].mean()]

tab2.loc['Region of the Americas']
papdiff                    7272.000000
totalpap_capdiff              0.760614
npapdiff                   1442.000000
totalnpap_capdiff             0.223742
totalprovidersdiff         8714.000000
totalproviders_cap_diff       0.984356
Name: Region of the Americas, dtype: float64

Supplemental table

Code
inspect = dfg[['country','Region_x',
               'physicians2015',
               'physicians2015_cap',
               'totalpap',
               'totalpap_cap',
               'papdiff',
               'totalpap_capdiff',
               'nurses2015',
               'totalnpap',
               'totalnpap_cap',           
               'npapdiff',
               'totalnpap_capdiff',
               'totalprovidersdiff',
               'totalproviders_cap',
               'totalproviders_cap_diff',
               'totalproviders2015_cap']]
inspect
country Region_x physicians2015 physicians2015_cap totalpap totalpap_cap papdiff totalpap_capdiff nurses2015 totalnpap totalnpap_cap npapdiff totalnpap_capdiff totalprovidersdiff totalproviders_cap totalproviders_cap_diff totalproviders2015_cap
0 Albania European Region 260.0 8.974802 210.0 7.433628 -50.0 -1.541173 90.0 NaN NaN -90.0 -3.106662 -140.0 7.433628 -4.647835 12.081464
1 Algeria African Region 1550.0 3.907629 500.0 1.102536 -1050.0 -2.805093 3500.0 2500.0 5.512679 -1000.0 -3.310999 -2050.0 6.615215 -6.116091 12.731306
2 Argentina Region of the Americas 4760.0 10.963447 4900.0 10.608816 140.0 -0.354631 0.0 NaN NaN 0.0 0.000000 140.0 10.608816 -0.354631 10.963447
3 Australia Western Pacific Region 5535.0 23.092328 5400.0 20.573780 -135.0 -2.518548 0.0 0.0 0.000000 0.0 0.000000 -135.0 20.573780 -2.518548 23.092328
4 Austria European Region 3362.0 39.344646 2650.0 29.599017 -712.0 -9.745629 0.0 130.0 1.452027 130.0 1.452027 -582.0 31.051044 -8.293602 39.344646
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
151 Venezuela Region of the Americas 690.0 2.218079 800.0 2.733454 110.0 0.515375 0.0 NaN NaN 0.0 0.000000 110.0 2.733454 0.515375 2.218079
152 Viet Nam Western Pacific Region 1000.0 1.070114 1500.0 1.515856 500.0 0.445742 NaN NaN NaN NaN NaN 500.0 1.515856 0.445742 1.070114
153 Yemen Eastern Mediterranean Region 15.0 0.055903 95.0 0.304927 80.0 0.249024 NaN NaN NaN NaN NaN 80.0 0.304927 0.249024 0.055903
154 Zambia African Region 79.0 0.487293 129.0 0.662558 50.0 0.175264 2.0 100.0 0.513611 98.0 0.501274 148.0 1.176168 0.676539 0.499630
155 Zimbabwe African Region 87.0 0.557585 73.0 0.476159 -14.0 -0.081426 193.0 NaN NaN -193.0 -1.236942 -207.0 0.476159 -1.318367 1.794527

156 rows × 17 columns

Code
columnlabeldict = {
    'country': 'Country',
    'Region_x': 'WHO Region', 
    'physicians2015': '# of PAPs 2016', 
    'physicians2015_cap': 'PAP density (2016)',
    'totalpap': '# of PAPs',
    'totalpap_cap': 'PAP density', 
    'papdiff': 'PAP difference', 
    'totalpap_capdiff': 'PAP density difference', 
    'nurses2015': '# of NPAP 2016',
    'totalnpap': '# of NPAPs',
    'totalnpap_cap': 'NPAP density',
    'npapdiff': 'NPAP difference',
    'totalnpap_capdiff': 'NPAP density difference',
    'totalprovidersdiff': 'Total provider difference', 
    'totalproviders_cap': 'Total provider density',
    'totalproviders_cap_diff': 'Total provider density difference',
    'totalproviders2015_cap': 'Total provider density (2016)'
}
Code
#rename columns according to the dictionary
inspect = inspect.rename(columns=columnlabeldict)
Code
# to csv
inspect.to_csv('Supplementaltable.csv')

Figure 3

Plotly

Code
incomeorder = ['High income', 'Upper middle income','Lower middle income','Low income']

t1 = px.bar(wbc, x='wbincome', y=['totalpap_capdiff','totalproviders_cap_diff'], barmode='group', labels={'variable':""},
       template='plotly_white',
       text_auto=True, color_discrete_sequence=['darkblue','lightblue']).update_layout(
    xaxis_title='',
    xaxis_categoryorder='array',
    xaxis_categoryarray=incomeorder,
    yaxis_title='Change in providers per 100,000 people',
    legend=legendict,
    width=1000,
    height=600,
    template='plotly_white'
).update_traces(texttemplate='%{y:.2f}')
t1.update_yaxes(showline=True, ticksuffix="  ").update_xaxes(showline=True)

labeldict={'totalpap_capdiff':'Difference in PAP density', 'totalproviders_cap_diff':'Difference in total provider density'}

for trace in t1['data']:
    trace['name'] = labeldict[trace['name']]


#t1.write_image('figures/fig3 - pap_and_totalprovider_cap_diff.jpg', width=1000, height=562, scale=3, engine='kaleido')
t1

Seaborn

Code
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import pandas as pd

sns.set_style("whitegrid")

# Define the income order for x-axis
income_order = ['High income', 'Upper middle income', 'Lower middle income', 'Low income']

# Melt the data to convert it to long format
wbc_melted = wbc.melt(id_vars='wbincome', value_vars=['totalpap_capdiff', 'totalproviders_cap_diff'])

# Reorder the DataFrame based on 'income_order'
wbc_melted['wbincome'] = pd.Categorical(wbc_melted['wbincome'], categories=income_order, ordered=True)
wbc_melted = wbc_melted.sort_values('wbincome')

# Create a grouped bar plot using catplot
g = sns.catplot(x='wbincome', y='value', hue='variable', data=wbc_melted, kind='bar',
                palette={'totalpap_capdiff': 'darkblue', 'totalproviders_cap_diff': 'lightblue'},
                height=6, aspect=1.5, legend_out=False)

# Set axis labels and titles
g.set_axis_labels('', 'Change in provider density (per 100,000 population)')
g.set_yticklabels([f'{tick:.2f}' for tick in g.ax.get_yticks()])

# Remove top and right spines
sns.despine(right=True, top=True)

# Add grid lines for the y-axis
g.ax.yaxis.grid(linestyle='--', alpha=0.5)

# Set plot title and tighten layout
plt.tight_layout()

# Add value labels to the bars
for p in g.ax.patches[:8]:
    height = p.get_height()
    width = p.get_width()
    x, y = p.get_xy()
    value = height if height >= 0 else height - 0.13  # Adjust the vertical alignment for negative values
    g.ax.annotate(f'{height:.2f}', (x + width / 2, y + value),
                  ha='center', va='bottom', fontsize=12)

# Custom legend with colors matching the bar colors
legend_patches = [
    mpatches.Patch(color='darkblue', label='Difference in PAP density'),
    mpatches.Patch(color='lightblue', label='Difference in total provider density')
]
g.ax.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(0.99, 0.99), frameon=False)

# Save the figure
plt.savefig('figures/fig3 - pap_and_totalprovider_cap_diff.jpg', dpi=1200)

# Display the figure
plt.show()

Change in PAPs per capita

Code
mapfig = px.choropleth(dfg,
                                    locations='code3',
                                    locationmode='ISO-3',
                                    scope='world',
                                    color='totalpap_capdiff',
                                    hover_name='country',
                                    hover_data=['totalpap_capdiff'],
                                     color_continuous_scale=px.colors.diverging.RdBu,
                                    # color_continuous_scale=px.colors.sequential.Rainbow,
                                    #color_continuous_scale=[(0,px.colors.qualitative.Safe[1]), (0.5, 'white'), (1,px.colors.qualitative.Prism[3])],
                                    range_color=[-10,10],
                                     color_continuous_midpoint=0,
                                    projection='miller'
                                    ).update_layout(margin={"r":0,"t":0,"l":0,"b":0})

mapfig

Figure 2: Regional PAPs vs NPAPS

Code
#to change the order of regions for this particular figure
regionlist = ['European Region','Region of the Americas','Western Pacific Region', 'Eastern Mediterranean Region','South-East Asia Region','African Region']

Plotly horizontal

Plotly vertical

Code
fig = make_subplots(rows=3, cols=2, subplot_titles=regionlist, vertical_spacing=0.03, horizontal_spacing=.25,
                    x_title='Provider density (per 100,000 people)')

dtemp = dfg

dfg=dfg[~dfg['country'].isin(['Tokelau', 'Niue'])]
#dfg['country_shr'] = dfg['country'].str.slice(0,10)

colcount = [1,2,1,2,1,2]
rowcount = [1,1,2,2,3,3]
yax1 = [1,2,3,4,5,6]
yax2 = [7,8,9,10,11,12]
for i,j,k,g,h in zip(regionlist, rowcount, colcount, yax1, yax2):
    data=dfg[dfg['Region_x']==i]
    pxfig = px.bar(data, y='country',x=['totalpap_cap','totalnpap_cap'])
    fig.add_trace(go.Bar(x=pxfig.data[0]['x'], y=pxfig.data[0]['y'], marker=dict(color='darkblue'), name=('PAP'), orientation='h'), row=j, col=k)
    fig.add_trace(go.Bar(x=pxfig.data[1]['x'], y=pxfig.data[1]['y'], marker=dict(color='lightblue'), name='NPAP', orientation='h'), row=j, col=k)
fig.update_yaxes(categoryorder='total ascending', dtick=1, ticksuffix="   ", linecolor='darkgray')
fig.update_xaxes(showline=True, linecolor='darkgray', gridcolor='lightgray')
fig.update_layout(barmode='stack', template='plotly_white', width=1100, height=1800, margin_t=25)
for trace in range(2,12):
    fig.data[trace]['showlegend'] = False

#fig.write_image('figures/fig2_horizontal.jpg', width=600, height=1100, scale=3, engine='kaleido')

dfg = dtemp
fig.show()

Seaborn

Code
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set_style("whitegrid")

regionlist = ['European Region','Region of the Americas','Western Pacific Region', 'Eastern Mediterranean Region','South-East Asia Region','African Region']

fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 22), sharex=False, sharey=False, gridspec_kw={'hspace': 0.15, 'wspace':0.5})
axes = axes.flatten()

for i, region in enumerate(regionlist):
    data = dfg[dfg['Region_x'] == region]
    data = data[~data['country'].isin(['Tokelau', 'Niue'])]

    sorted_countries = data[['country', 'totalpap_cap', 'totalnpap_cap']].set_index('country').sum(axis=1).sort_values(ascending=False)

    sns.barplot(x=sorted_countries.values, y=sorted_countries.index, ax=axes[i], color='lightblue', label='NPAP', order=sorted_countries.index)
    sns.barplot(x='totalpap_cap', y='country', ax=axes[i], data=data, color='darkblue', label='PAP', order=sorted_countries.index)

    sns.despine(right=True, ax=axes[i])
    axes[i].set_ylabel('')
    axes[i].set_xlabel('Provider density (per 100,000 people)')
    axes[i].set_title(region)

    # Add a vertical line at x=4
    axes[i].axvline(x=4, color='r', linestyle='--', linewidth=1)

    if i == 1:
        axes[i].legend(frameon=False, bbox_to_anchor=(1.07, 1), loc='upper left')

    if i < 4:
        axes[i].set_xlabel('')

for i in range(len(regionlist), 6):
    fig.delaxes(axes[i])

plt.tight_layout()
plt.savefig('figures/fig2_subplots.jpg', dpi=300, bbox_inches='tight')
plt.show()

Code
for i in regionlist:
    data = dfg[dfg['Region_x']==i]
    fig = go.Figure()
    pxfig = px.bar(data, y='country',x=['totalpap_cap','totalnpap_cap'])
    fig.add_trace(go.Bar(x=pxfig.data[0]['x'], y=pxfig.data[0]['y'], marker=dict(color='darkblue'), name=('PAP'), orientation='h'))
    fig.add_trace(go.Bar(x=pxfig.data[1]['x'], y=pxfig.data[1]['y'], marker=dict(color='lightblue'), name='NPAP', orientation='h'))
    fig.update_yaxes(categoryorder='total ascending', dtick=1)
    fig.update_layout(barmode='stack', template='plotly_white')
    fig.write_image('figures/fig2_'+str(i)+'.jpg', width=1000,height=750,scale=2)

by income group

Code
incomelist = ['High income','Upper middle income','Lower middle income','Low income']

fig = make_subplots(rows=3, cols=2, subplot_titles=incomelist, vertical_spacing=0.2)

dtemp = dfg

dfg=dfg[~dfg['country'].isin(['Tokelau', 'Niue'])]
dfg['country_shr'] = dfg['country'].str.slice(0,10)

colcount = [1,2,1,2,1,2]
rowcount = [1,1,2,2,3,3]
yax1 = [1,2,3,4,5,6]
yax2 = [7,8,9,10,11,12]
for i,j,k,g,h in zip(incomelist, rowcount, colcount, yax1, yax2):
    data=dfg[dfg['wbincome']==i]
    pxfig = px.bar(data, x='country_shr',y=['totalpap_cap','totalnpap_cap'])
    fig.add_trace(go.Bar(x=pxfig.data[0]['x'], y=pxfig.data[0]['y'], marker=dict(color='darkblue'), name=('PAP')), row=j, col=k)
    fig.add_trace(go.Bar(x=pxfig.data[1]['x'], y=pxfig.data[1]['y'], marker=dict(color='lightblue'), name='NPAP'), row=j, col=k)
fig.update_xaxes(categoryorder='total descending')
fig.update_layout(barmode='stack', template='plotly_white')
for trace in range(2,8):
    fig.data[trace]['showlegend'] = False

fig.write_image('figures/fig2x.jpg', width=1000, height=750, scale=3, engine='kaleido')

dfg = dtemp
fig.show()
Code
t1 = px.bar(dfg[dfg['Region_x']=='Region of the Americas'], x='country', y=['totalpap_cap','totalnpap_cap'], labels={'variable':""},
       title='Total anesthesia providers per 100,000 population (PAP and NPAP)',
       color_discrete_sequence=['darkblue','lightblue']).update_layout(
    xaxis_categoryorder='total descending',
    yaxis_title='Total providers per 100,000',
    xaxis_title=''
)
labeldict={'totalpap_cap':'PAP', 'totalnpap_cap':'NPAP'}

for trace in t1['data']:
    trace['name'] = labeldict[trace['name']]

#t1.write_image('americas_pap_npap.jpg', width=1000, height=500, scale=3, engine='kaleido')
t1

Figure 4

Code
subplots = make_subplots(rows=1, cols=3, horizontal_spacing=0.07)#,
                         #subplot_titles=['Absolute difference in total providers (#)', 'Difference in total provider density','Percentage change in <br> total provider density (percent)'])

def addcustomplot(var):
    data = dfg[((dfg[var].notnull()) & (dfg[var] != np.inf))].sort_values(by=[var], ascending=False).head()
    data = pd.concat([data, dfg[dfg[var].notnull()].sort_values(by=[var], ascending=False).tail()])
    fig = go.Bar(x=data['country'], y=data[var])
    return fig

t1 = dfg[dfg['country']=='North Macedonia']['totalprovidersdiffpct']

#plot 1
subplots.add_trace(addcustomplot('totalprovidersdiff'), row=1,col=1)
subplots.update_yaxes(title_text='Difference in total providers (#)').update_traces(marker=dict(color='#1f77b4'), row=1, col=1)

#plot 2
subplots.add_trace(addcustomplot('totalproviders_cap_diff'), row=1,col=2)
subplots.update_yaxes(title_text='Difference in provider density', row=1,col=2).update_traces(marker=dict(color='#6baed6'), row=1, col=2)

#plot 3
subplots.add_trace(addcustomplot('totalproviders_cap_pct'), row=1,col=3)
subplots.update_yaxes(title_text='% change in provider density', row=1, col=3).update_traces(marker_color='#b3cde3', row=1, col=3)

#subplots.update_layout(title='Top and bottom changes in providers (absolute, per capita, and percent change)')
subplots.update_layout(template='plotly_white', showlegend=False, width=1750, height=600, margin=dict(l=20, r=20, t=20, b=20))

(subplots.update_xaxes(tickangle=270, 
                       tickfont=dict(size=16), 
                       linecolor='lightgray')
 .update_yaxes(showgrid=True, 
               tickfont_size=1, 
               title_font_size=20, 
               title_standoff=0, 
               linecolor='lightgray', 
               linewidth=1)
)

#subplots.write_image('figures/fig 4 - diff_metrics plotly.jpg', engine='kaleido', scale=3)
subplots

as 2x2 grid

Code
# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np

# sns.set_style("whitegrid")

# # Your custom function modified for Seaborn
# def addcustomplot(df, var, ax, y_label, plot_color):
#     data = df[((df[var].notnull()) & (df[var] != np.inf))].sort_values(by=[var], ascending=False).head()
#     data = data.append(df[df[var].notnull()].sort_values(by=[var], ascending=False).tail())
#     plot = sns.barplot(x='country', y=var, data=data, ax=ax, palette=[plot_color])
#     plot.set(xlabel=None)  # remove the x axis label "country"
#     plot.set(ylabel=y_label)  # use the current subplot titles as y axis labels
#     plot.yaxis.label.set_fontsize(12)  # Increase y-axis label font size
#     for item in plot.get_xticklabels():  # rotate x labels for better fit
#         item.set_rotation(90)
#         item.set_fontsize(12)  # Increase x-axis label font size
#     return ax

# # Set up the subplot structure with a 2x2 grid, but we will only use three of these
# fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 15), gridspec_kw={'height_ratios': [1, 1], 'hspace': 0.5, 'wspace': 0.2})

# # Add your plots and increase y axis label font size

# # Define custom colors to match the desired shades
# darkblue = '#1f77b4'     # Equivalent to Plotly's 'darkblue'
# middleblue = '#6baed6'   # A middle shade of blue
# lightblue = '#b3cde3'    # Equivalent to Plotly's 'lightblue'

# colors = [darkblue, middleblue, lightblue]

# addcustomplot(dfg, 'totalprovidersdiff', axes[0, 0], 'Difference in total providers (#)', colors[0])
# addcustomplot(dfg, 'totalproviders_cap_diff', axes[0, 1], 'Difference in provider density', colors[1])
# addcustomplot(dfg, 'totalproviders_cap_pct', axes[1, 0], '% change in provider density', colors[2])

# for i in range(2):
#     for j in range(2):
#         axes[i, j].get_yaxis().get_label().set_fontsize(14)
#         axes[i, j].yaxis.labelpad = 10  # Increase space between y-axis label and the grid
#         axes[i, j].grid(axis='y', linestyle='--', alpha=0.5)  # Add gridlines for y-axis

# # Remove the top and right side borders of the plots
# for ax in axes.flat:
#     ax.spines['top'].set_visible(False)
#     ax.spines['right'].set_visible(False)

# # Remove the unused subplot
# fig.delaxes(axes[1, 1])

# # Adjust bottom margin to make space for x-axis labels
# plt.subplots_adjust(bottom=.2)

# # Tight layout often produces nice results
# # It adjusts subplot params so that subplots are nicely fit in the figure
# plt.tight_layout()

# # Save your figure
# plt.savefig('figures/fig 4 - diff_metrics.jpg', dpi=300, bbox_inches='tight')

# # Display the figure
# plt.show()

as 3x1 grid

Code
################
############ make into 1 row

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set_style("whitegrid")

# Your custom function modified for Seaborn
def addcustomplot(df, var, ax, y_label, plot_color):
    data = df[((df[var].notnull()) & (df[var] != np.inf))].sort_values(by=[var], ascending=False).head()
    data = pd.concat([data, dfg[dfg[var].notnull()].sort_values(by=[var], ascending=False).tail()])
    plot = sns.barplot(x='country', y=var, data=data, ax=ax, palette=[plot_color])
    plot.set(xlabel=None)  # remove the x axis label "country"
    plot.set(ylabel=y_label)  # use the current subplot titles as y axis labels
    plot.yaxis.label.set_fontsize(12)  # Increase y-axis label font size
    for item in plot.get_xticklabels():  # rotate x labels for better fit
        item.set_rotation(90)
        item.set_fontsize(12)  # Increase x-axis label font size
    return ax

# Set up the subplot structure with a 2x2 grid, but we will only use three of these
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 6), gridspec_kw={'wspace': 0.3})

# Add your plots and increase y axis label font size

# Define custom colors to match the desired shades
darkblue = '#1f77b4'     # Equivalent to Plotly's 'darkblue'
middleblue = '#6baed6'   # A middle shade of blue
lightblue = '#b3cde3'    # Equivalent to Plotly's 'lightblue'

colors = [darkblue, middleblue, lightblue]

addcustomplot(dfg, 'totalprovidersdiff', axes[0], 'Difference in total providers (#)', colors[0])
addcustomplot(dfg, 'totalproviders_cap_diff', axes[1], 'Difference in providers per 100,000 population', colors[1])
addcustomplot(dfg, 'totalproviders_cap_pct', axes[2], '% change in providers per 100,000 population', colors[2])

for i in range(3):
    axes[i].get_yaxis().get_label().set_fontsize(14)
    axes[i].yaxis.labelpad = 10  # Increase space between y-axis label and the grid
    axes[i].grid(axis='y', linestyle='--', alpha=0.5)  # Add gridlines for y-axis

# Remove the top and right side borders of the plots
for ax in axes.flat:
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

# Remove the unused subplot
#fig.delaxes(axes[1, 1])

# Adjust bottom margin to make space for x-axis labels
plt.subplots_adjust(bottom=.2)

# Tight layout often produces nice results
# It adjusts subplot params so that subplots are nicely fit in the figure
plt.tight_layout()

# Save your figure
plt.savefig('figures/fig 4 - diff_metrics.jpg', dpi=1200, bbox_inches='tight')

# Display the figure
plt.show()

Response rate

There are 6 possible groups of responses:

  1. UN member countries, contacted responded
  2. UN member countries, contacted, didn’t respond
  3. UN member countries, did not contact
  4. non UN member countries, contacted, responded
  5. non UN member countries, contacted, didn’t respond
  6. non UN member countries, did not contact
Code
df2['Email'] = df2.apply(lambda y: np.nan if y['Email']=="" else y['Email'], axis=1)
df2['Home Country'] = df2.apply(lambda x: np.nan if x['Home Country']=="" else x['Home Country'], axis=1)
Code
un = pd.read_csv("country responses.csv")
un = un[['allmembers', 'code3']].copy()
un = un[un['allmembers'].notnull()]
un['allmembers'] = un['allmembers'].str.strip()
Code
templist = ['Bosnia and Herzegovina',
 'Niue',
 'Somaliland',
 'Taiwan, Republic of China',
 'Tokelau']

for i in templist:
    df['code3'] = df.apply(lambda x: 'nil' if x['country'] == i else x['code3'], axis=1)
Code
#countries with responses are those with total providers >1
countrieswithresponses = df[df['totalproviders']>0][['country','code3']]

# UN countries with responses are those countries with responses, that are also in the UN list
un['response'] = un.apply(lambda x: "Has data" if x['allmembers'] in countrieswithresponses['country'].tolist() else 'Have no data', axis=1)
nodata = un['response'].str.contains('Have no data')

# for those countries in the UN list with NO responses, do we have emails for them?
un.loc[nodata,'response'] = un.loc[nodata].apply(lambda x: "No data, contacted" if x['allmembers'] in df2.query('Email.notnull()')['Home Country'].tolist() else "No data, not contacted", axis=1)
un
allmembers code3 response
0 Afghanistan AFG No data, contacted
1 Albania ALB Has data
2 Algeria DZA Has data
3 Andorra AND No data, not contacted
4 Angola AGO No data, contacted
... ... ... ...
188 Venezuela VEN Has data
189 Viet Nam VNM Has data
190 Yemen YEM Has data
191 Zambia ZMB Has data
192 Zimbabwe ZWE Has data

193 rows × 3 columns

Code
px.histogram(un, x='response').update_xaxes(categoryorder='total descending')
Code
un[un['response'].str.contains('No data')]['allmembers'].tolist()
['Afghanistan',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Armenia',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Barbados',
 'Bosnia and Herzegovina',
 'Botswana',
 'Bulgaria',
 'Cape Verde',
 'Cameroon',
 'Comoros',
 'Congo (Brazzaville)',
 'Djibouti',
 'Dominica',
 'Equatorial Guinea',
 'Grenada',
 'Guinea',
 'Guinea-Bissau',
 'Haiti',
 'Ireland',
 'Jamaica',
 'Korea, North',
 'Kyrgyzstan',
 'Liechtenstein',
 'Monaco',
 'Qatar',
 'Saint Kitts and Nevis',
 'Saint Lucia',
 'Saint Vincent and the Grenadines',
 'San Marino',
 'Sao Tome and Principe',
 'Seychelles',
 'Slovakia',
 'South Sudan',
 'Suriname',
 'Tajikistan',
 'Turkmenistan',
 'Ukraine',
 'United Arab Emirates',
 'Uzbekistan']
Code
un['response'].value_counts()
response
Has data                  149
No data, contacted         23
No data, not contacted     21
Name: count, dtype: int64
Code
show(un[un['response']=='No data, contacted'])
allmembers code3 response
Loading... (need help?)
Code
# what are the places with responses that aren't in the UN list?
countrieswithresponses[~countrieswithresponses['country'].isin(un['allmembers'].tolist())]['country'].value_counts()
country
Palestine                    3
Taiwan, Republic of China    1
Cook Islands                 1
Somaliland                   1
Niue                         1
Tokelau                      1
Name: count, dtype: int64
Code
# are there places with responses that aren't in the contact list!?
countrieswithresponses[~countrieswithresponses['country'].isin(df2['Home Country'].tolist())]
country code3
144 Somaliland nil
Code
# are there countries in the contact list that aren't in the un list?
print('these are all non-countries')
df2[~df2['Home Country'].isin(un['allmembers'].tolist())]['Home Country'].value_counts()
these are all non-countries
Home Country
Taiwan, Republic of China    3
West Africa                  2
Palestine                    2
Kosovo                       2
French Polynesia             2
Hong Kong                    2
Republika Srpska             1
Aruba                        1
Bermuda                      1
Puerto Rico                  1
Cook Islands                 1
Niue                         1
Tokelau                      1
Name: count, dtype: int64
Code
# places in the contact list, that aren't countries, that dont have responses
df2[df2['Email'].notnull() & ~df2['Home Country'].isin(un['allmembers'].tolist()) & ~df2['Home Country'].isin(countrieswithresponses['country'].tolist())]['Home Country'].value_counts()
Home Country
Kosovo              2
French Polynesia    2
Hong Kong           2
West Africa         1
Republika Srpska    1
Puerto Rico         1
Name: count, dtype: int64

In summary:

Code
print('There are 193 UN member countries')
print('Of these, we have data from ',un['response'].value_counts()['Has data'],' of them')
print('Of the countries with no data, we had contact information and tried to contact ',un['response'].value_counts()['No data, contacted'], 'of them')
print('Of the countries with no data, we lacked contact information for',un['response'].value_counts()['No data, not contacted'], 'of them')
print('We also have data from places that are not countries')
print('There are',len(countrieswithresponses[~countrieswithresponses['country'].isin(un['allmembers'].tolist())]['country'].value_counts()), 'responses from those')
# what about non countries that we attempted to contact?
There are 193 UN member countries
Of these, we have data from  149  of them
Of the countries with no data, we had contact information and tried to contact  23 of them
Of the countries with no data, we lacked contact information for 21 of them
We also have data from places that are not countries
There are 6 responses from those

How many countries w/ providers <5

Code
# realcountries is the countries from dfg that are present in the df called 'un', which is a csv of un member countries
# necessary to exclude those answers that aren't real countries
realcountries = dfg.loc[dfg['code3'].isin(un['code3'].value_counts().index)]
Code
# number of countries w/ pap <5
len(realcountries[realcountries['totalpap_cap']<5])
75
Code
# number of countries w/ total prov <5
len(realcountries[realcountries['totalproviders_cap']<5])
64

acknowledgements

Code
df[df['interest']=='Yes'][['name','email','country']]
name email country
0 Francisco A Lobo francisco.lobo@me.com Portugal
1 KARTIK MUDLIAR mudliarkartik@gmail.com Fiji
2 Nebojsa Ladjevic nladjevic@yahoo.com Serbia
5 christian Wamberg christian.aage.wamberg.02@regionh.dk Denmark
6 Owain Thomas odt@cantab.net Sweden
... ... ... ...
214 ZOUMENOU ezoumenou@gmail.com Benin
215 DEHDOUH ABDELKADER dehdouhaek@hotmail.com Algeria
216 Ngboh-kassa Kainy Boniface bonifacekassa01@gmail.com Central African Republic
217 Bilal Dah bilalmodah@yahoo.fr Mauritania
218 Dr Philippe WELTER phwelter@pt.lu Luxembourg

168 rows × 3 columns

Code
df2= pd.read_csv('gawstemplate.csv')
Code
df3 = pd.DataFrame(columns=['record_id','name','email','country','contact_info_complete','record_id_a74727', 'ack_consent', 'consent_for_acknowledgement_complete'])
df3['name'] = df2['name']
df3['email'] = df2['email']
df3['country'] = df2['country']
Code
df3 = df3.drop_duplicates(subset='email')
Code
df3['record_id'] = df3.index
Code
df3
record_id name email country contact_info_complete record_id_a74727 ack_consent consent_for_acknowledgement_complete
0 0 Rakotondrainibe aaurelia.rakotondrainibe@gmail.com Madagascar NaN NaN NaN NaN
1 1 Amanda Cormier acormier@cas.ca Canada NaN NaN NaN NaN
2 2 Prof. Agzam ZHUMADILOV agzamzh@gmail.com Kazakhstan NaN NaN NaN NaN
3 3 Ahmed El Safi ahmadalsafi@gmail.com Sudan NaN NaN NaN NaN
4 4 Dr. Abdelkarim AlOweidi akaloweidi@hotmail.com Jordan NaN NaN NaN NaN
... ... ... ... ... ... ... ... ...
133 133 Viktor Kubricht viktor.kubricht@gmail.com Czech Republic NaN NaN NaN NaN
134 134 Vishaal Kissoon vishq5@yahoo.com Mauritius NaN NaN NaN NaN
135 135 Veronica Varas vvaras@sachile.cl Chile NaN NaN NaN NaN
136 136 Yonatan M. Andemeskel yonimer2@gmail.com Eritrea NaN NaN NaN NaN
137 137 ZORICA KARDOŠ zorica.kardos@gmail.com Slovenia NaN NaN NaN NaN

138 rows × 8 columns

Code
from redcap import Project
from config import *
# oxygen calculator database
api_key = logins['REDCAP_CALCULATOR']
api_url = 'https://redcap.ucsf.edu/api/'

project = Project(api_url, api_key)
Code
project.export_records(format_type='df')
name email country contact_info_complete ack_consent consent_for_acknowledgement_complete
record_id
1 Rakotondrainibe aaurelia.rakotondrainibe@gmail.com Madagascar 0 1.0 2
2 Amanda Cormier acormier@cas.ca Canada 0 NaN 0
3 Prof. Agzam ZHUMADILOV agzamzh@gmail.com Kazakhstan 0 NaN 2
4 Ahmed El Safi ahmadalsafi@gmail.com Sudan 0 NaN 0
5 Dr. Abdelkarim AlOweidi akaloweidi@hotmail.com Jordan 0 NaN 0
... ... ... ... ... ... ...
135 Vishaal Kissoon vishq5@yahoo.com Mauritius 0 1.0 2
136 Veronica Varas vvaras@sachile.cl Chile 0 1.0 2
137 Yonatan M. Andemeskel yonimer2@gmail.com Eritrea 0 NaN 0
138 ZORICA KARDOŠ zorica.kardos@gmail.com Slovenia 0 NaN 0
139 Tyler Law tyler.law@gmail.com Burkina Faso 2 NaN 0

139 rows × 6 columns

verification status

are there any countries in the verification list that aren’t in the countries with responses? There shouldn’t be…

Code
verification_df[~verification_df['Country'].isin(countrieswithresponses['country'].tolist())]['Country']
Series([], Name: Country, dtype: object)

are there any countries with data that are not in the country verification list? Shouldn’t be - the verification list should contain all the countries with data, and whether or not they need to be verified.

Code
countrieswithresponses[~countrieswithresponses['country'].isin(verification_df['Country'].tolist())]['country']
Series([], Name: country, dtype: object)

Number of countries undergoing verification

Code
# i'm so sorry this has to be done but bosnia has a response to OTHER_1 providers, so it's included in the groupby, but NOT included in the countries with responses!
# so it should really be deleted from dfg. dfg should have been defined as countries with responses.
dfg=dfg[~dfg['country'].str.contains('Bosnia')]
dfg['verification_status'].value_counts().sum()
155
Code
len(countrieswithresponses.value_counts().index)
155

total number of countries is 156 because I deleted bosnia in dfg earlier, see comment

Code
df['country'].value_counts()
country
Morocco      4
Croatia      4
Austria      4
Slovenia     3
Zambia       3
            ..
Swaziland    1
Namibia      1
Jordan       1
Gambia       1
Senegal      1
Name: count, Length: 156, dtype: int64
Code
dfg['verification_status'].value_counts()
verification_status
Verification not required    67
Attempted verification       54
Verified                     34
Name: count, dtype: int64
Code
dfg['verification_status'].value_counts(normalize=True)
verification_status
Verification not required    0.432258
Attempted verification       0.348387
Verified                     0.219355
Name: proportion, dtype: float64

trainee data

Code
traineecols = ['paptrainee','npaptrainee','totaltrainee','gradtrainee']
regiondrop = ['African Region', 'Eastern Mediterranean Region', 'European Region',
       'Region of the Americas', 'South-East Asia Region',
       'Western Pacific Region']
Code
def crossfx2(var1):
    taba =  pd.crosstab(dfg['Region_x'], dfg['wbincome'], values=dfg[var1], aggfunc=sum, margins=True)
    taba = taba.drop(['High income', 'Low income','Lower middle income','Upper middle income'],axis=1)
    tabb = pd.crosstab(dfg['Region_x'], dfg['wbincome'], values=dfg[var1], aggfunc=sum, normalize='columns', margins=True)
    tabb = tabb.drop(['High income', 'Low income','Lower middle income','Upper middle income'],axis=1)
    tab2 = taba.join(tabb, lsuffix='_n', rsuffix = '_%')
    tab2.columns=[i+'_n', i+'%']
    return tab2
Code
dfs = []

for i in traineecols:
    tab2 = crossfx2(i)
    dfs.append(tab2)

traineexregion = pd.concat(dfs, axis=1)
traineexregion
paptrainee_n paptrainee% npaptrainee_n npaptrainee% totaltrainee_n totaltrainee% gradtrainee_n gradtrainee%
Region_x
African Region 2566.500000 0.027735 5597.0 0.217186 8163.500000 0.069002 2857.500000 0.062986
Eastern Mediterranean Region 4582.333333 0.049519 2491.0 0.096661 7073.333333 0.059788 1956.000000 0.043115
European Region 37528.166667 0.405548 5016.5 0.194661 42544.666667 0.359611 13854.333333 0.305382
Region of the Americas 22134.500000 0.239197 8181.0 0.317456 30315.500000 0.256244 10642.500000 0.234586
South-East Asia Region 16262.333333 0.175739 115.0 0.004462 16377.333333 0.138430 4505.833333 0.099319
Western Pacific Region 9463.000000 0.102262 4370.0 0.169574 13833.000000 0.116924 11551.000000 0.254611
All 92536.833333 NaN 25770.5 NaN 118307.333333 NaN 45367.166667 NaN
Code
def crossfx3(var1):
    taba =  pd.crosstab(dfg['wbincome'], dfg['Region_x'], values=dfg[var1], aggfunc=sum, margins=True)
    taba = taba.drop(regiondrop,axis=1)
    tabb = pd.crosstab(dfg['wbincome'], dfg['Region_x'], values=dfg[var1], aggfunc=sum, normalize='columns', margins=True)
    tabb = tabb.drop(regiondrop,axis=1)
    tab2 = taba.join(tabb, lsuffix='_n', rsuffix = '_%')
    tab2.columns=[i+'_n', i+'%']
    return tab2
Code
dfs = []

for i in traineecols:
    tab2 = crossfx3(i)
    dfs.append(tab2)

traineexwbincome = pd.concat(dfs, axis=1)
traineexwbincome = traineexwbincome.reindex(['High income', 'Upper middle income','Lower middle income','Low income','All'])
traineexwbincome
paptrainee_n paptrainee% npaptrainee_n npaptrainee% totaltrainee_n totaltrainee% gradtrainee_n gradtrainee%
wbincome
High income 42503.666667 0.459316 17040.5 0.661241 59544.166667 0.503301 21652.833333 0.477280
Upper middle income 28562.833333 0.308664 619.0 0.024020 29181.833333 0.246661 15099.333333 0.332825
Lower middle income 20255.333333 0.218889 2370.0 0.091966 22625.333333 0.191242 6917.000000 0.152467
Low income 1215.000000 0.013130 5741.0 0.222774 6956.000000 0.058796 1698.000000 0.037428
All 92536.833333 NaN 25770.5 NaN 118307.333333 NaN 45367.166667 NaN

This is the same as below

Code
wbc[['paptrainee','npaptrainee','totaltrainee','gradtrainee']].reset_index().style.format(precision=0)
  index paptrainee npaptrainee totaltrainee gradtrainee
0 0 42504 17040 59544 21653
1 1 1215 5741 6956 1698
2 2 20255 2370 22625 6917
3 3 28563 619 29182 15099
Code
t1 = who[['Region','paptrainee','npaptrainee','totaltrainee','gradtrainee']].style.format(precision=0)
t1
  Region paptrainee npaptrainee totaltrainee gradtrainee
0 African Region 2566 5597 8164 2858
1 Eastern Mediterranean Region 4582 2491 7073 1956
2 European Region 37528 5016 42545 13854
3 Region of the Americas 22134 8181 30316 10642
4 South-East Asia Region 16262 115 16377 4506
5 Western Pacific Region 9463 4370 13833 11551
Code
from IPython.display import display, HTML
from jinja2 import Template
Code
traineelabels = ['PAP trainees','NPAP trainees','Total trainees', 'Graduates per year']
table_head =  '''
<html>
<head>
<style>
.table-style {
    border: 1px solid white;
    text-align: center;
    border-collapse: collapse;
}

.table-style td{
    padding: 5px 5px;
}

.darkgray {
    background-color: #d9d9d9;
}

.section-break {
    border-top: 1.5pt solid black;
    border-bottom: 1.5pt solid black;
    background-color: #efefef;
}

.bottom-solid-hard{
    border-bottom: 1.5pt solid black;
}

.left-solid {
    border-left: 1pt solid black;
}

.right-solid {
    border-right: 1pt solid black;
}

.right-dash {
    border-right: 1pt dotted black;
}


</style>
</head>
'''


table_str='''

<table class='table-style'>
    <thead>
        <tr class='darkgray'> 
            <td></td>
            {% for i in traineelabels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>

        <tr class='darkgray bottom-solid-hard'>
        <th></th>
        {# ######## n, % headers #########          #}

        {% for i in traineelabels %}
            <th class='left-solid'>n (%)</th>
        {% endfor %}
    
        </tr>
    </thead>

    <tbody>
        <tr class='section-break'><td colspan = 5> Income Group </td> </tr>
        {% for i in traineexwbincome.index %}
            <tr>
                <td> {{i}} </td>
            {% for j in traineecols %}
                <td class='left-solid'> {{ traineexwbincome.at[i,j+'_n']|int }} ({{ (traineexwbincome.at[i,j+'%']*100)|int}}) </td>
            {% endfor %}
            </tr>
        {% endfor %}

        <tr class='section-break'><td colspan = 5> WHO Region </td> </tr>

        {% for i in traineexregion.index %}
            <tr>
                <td> {{i}}
                {% for j in traineecols %}
                    <td class='left-solid'> {{ traineexregion.at[i,j+'_n']|int}} ({{ (traineexregion.at[i,j+'%']*100)|int}}) </td>
                {%endfor%}
            </tr>
        {% endfor %}
    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table_trainee.html', "w") as text_file:
    print(template.render(traineecols = traineecols, traineexwbincome = traineexwbincome, traineexregion=traineexregion, traineelabels=traineelabels), file=text_file)

HTML(template.render(traineecols = traineecols, traineexwbincome = traineexwbincome, traineexregion=traineexregion, traineelabels=traineelabels))
PAP trainees NPAP trainees Total trainees Graduates per year
n (%) n (%) n (%) n (%)
Income Group
High income 42503 (45) 17040 (66) 59544 (50) 21652 (47)
Upper middle income 28562 (30) 619 (2) 29181 (24) 15099 (33)
Lower middle income 20255 (21) 2370 (9) 22625 (19) 6917 (15)
Low income 1215 (1) 5741 (22) 6956 (5) 1698 (3)
All 92536 (0) 25770 (0) 118307 (0) 45367 (0)
WHO Region
African Region 2566 (2) 5597 (21) 8163 (6) 2857 (6)
Eastern Mediterranean Region 4582 (4) 2491 (9) 7073 (5) 1956 (4)
European Region 37528 (40) 5016 (19) 42544 (35) 13854 (30)
Region of the Americas 22134 (23) 8181 (31) 30315 (25) 10642 (23)
South-East Asia Region 16262 (17) 115 (0) 16377 (13) 4505 (9)
Western Pacific Region 9463 (10) 4370 (16) 13833 (11) 11551 (25)
All 92536 (0) 25770 (0) 118307 (0) 45367 (0)
Code
td = {'paptrainee':'Physicians','npaptrainee':'NPAPs'}

fig = px.bar(who, x='Region', y=['paptrainee','npaptrainee'], barmode='group', labels={'paptrainee':'Physician trainees'}
    ).update_layout(template='plotly_white', 
                    legend_title='Trainee type',
                    yaxis_title='Number of trainees',
                    xaxis_categoryorder='total descending').for_each_trace(lambda t: t.update(name = td[t.name]))

fig.show()

fig.write_image('trainees.jpg', width=1000, height=750, scale=2, engine='kaleido')
Code
fig = px.bar(wbc, x='wbincome', y=['paptrainee','npaptrainee'], barmode='stack', labels={'paptrainee':'Physician trainees'}
    ).update_layout(template='plotly_white', 
                    legend_title='Trainee type',
                    yaxis_title='Number of trainees',
                    xaxis_categoryorder='total descending')
td = {'paptrainee':'Physicians','npaptrainee':'NPAPs'}
fig.for_each_trace(lambda t: t.update(name = td[t.name]))

fig.show()

fig.write_image('trainees.jpg', width=1000, height=750, scale=2, engine='kaleido')

ICU Data

Code
means = df.groupby(by=['Region', 'wbincome']).mean(numeric_only=True)

Does a certified pathway exist for becoming an ICU physician?

Code
pd.crosstab(df['Region'], df['critcare'], margins=True, normalize=False)
critcare I don't know, or cannot provide data No Yes All
Region
African Region 5 27 9 41
Eastern Mediterranean Region 0 9 15 24
European Region 2 11 43 56
Region of the Americas 0 3 27 30
South-East Asia Region 0 3 11 14
Western Pacific Region 0 15 18 33
All 7 68 123 198
Code
px.bar(df, x='Region', color='critcare', barmode='stack')
Code
df['critcare'].value_counts()
critcare
Yes                                     123
No                                       68
I don't know, or cannot provide data      7
Name: count, dtype: int64

What proportion of patients have care by ICU physician?

Code
df['crit_providers'].mean()
54.69178082191781
Code
df.pivot_table(index='Region', columns='wbincome', values='crit_providers', aggfunc=np.mean, margins=True)
wbincome High income Low income Lower middle income Upper middle income All
Region
African Region 0.000000 43.357143 36.700000 12.500000 37.000000
Eastern Mediterranean Region 42.500000 2.500000 54.800000 39.500000 44.050000
European Region 88.923077 NaN 100.000000 59.000000 80.552632
Region of the Americas 100.000000 NaN 32.500000 74.235294 70.500000
South-East Asia Region NaN NaN 61.000000 39.500000 53.181818
Western Pacific Region 51.142857 NaN 26.583333 12.857143 29.500000
All 76.585366 38.250000 42.977273 52.044444 54.691781
Code
dfg.pivot_table(index='Region_x', columns='wbincome', values=['totalproviders_cap'], aggfunc=np.sum, margins=True).style.format(precision=1)
  totalproviders_cap
wbincome High income Low income Lower middle income Upper middle income All
Region_x          
African Region 6.0 29.1 26.6 10.3 72.0
Eastern Mediterranean Region 63.5 8.7 42.4 43.7 158.4
European Region 893.4 nan 16.1 158.4 1067.9
Region of the Americas 86.9 nan 30.5 106.2 223.7
South-East Asia Region nan nan 23.1 20.6 43.6
Western Pacific Region 120.0 nan 105.6 96.2 321.8
All 1169.7 37.8 244.3 435.4 1887.3
Code
t1 = dfg.pivot_table(index='Region_x', columns='wbincome', values=['totalproviders_cap'], aggfunc=np.mean)
t1 = t1.droplevel(0, axis=1)
t1
px.imshow(t1)
Code

t1 = px.scatter(dfg[dfg['crit_providers'].notnull()], x='country',y='crit_providers', color='wbincome', template='plotly_white',
category_orders={'wbincome':incomeorder}, 
labels={'crit_providers':'Percent of patients cared for by critical care physician',
'wbincome':'Income Group',
'country':'Country'}).update_layout(xaxis_categoryorder='total descending', legend_orientation='h', legend_y=1.1)

t1.write_image('prop_crit.jpg', width=1000, height=750, scale=2, engine='kaleido')
t1

How many ICU specialists are there?

Code
px.box(dfg, x='wbincome', y='crit_number', points=False).update_layout(xaxis_categoryorder='total descending')
Code
px.scatter(dfg, x='country', y='crit_number').update_layout(xaxis_categoryorder='total descending')

Is the training sufficent to provide ICU care solo?

Code
pd.crosstab(df['wbincome'], df['training_sufficient'], normalize='index')
training_sufficient No Yes
wbincome
High income 0.357143 0.642857
Low income 0.571429 0.428571
Lower middle income 0.510638 0.489362
Upper middle income 0.600000 0.400000
Code
df['training_sufficient'].value_counts(normalize=True)
training_sufficient
Yes    0.505747
No     0.494253
Name: proportion, dtype: float64
Code
pd.crosstab(df['Region'], df['training_sufficient'])
training_sufficient No Yes
Region
African Region 15 20
Eastern Mediterranean Region 10 10
European Region 10 38
Region of the Americas 23 6
South-East Asia Region 8 5
Western Pacific Region 20 9
Code
px.histogram(df, x='Region', color='training_sufficient', barnorm='percent')

Is there a certified pathway for ICU nursing?

Code
df['crit_nurse'].value_counts(normalize=True)
crit_nurse
No     0.516484
Yes    0.483516
Name: proportion, dtype: float64
Code
pd.crosstab(df['Region'], df['crit_nurse'], normalize='index')
crit_nurse No Yes
Region
African Region 0.631579 0.368421
Eastern Mediterranean Region 0.285714 0.714286
European Region 0.490196 0.509804
Region of the Americas 0.464286 0.535714
South-East Asia Region 0.428571 0.571429
Western Pacific Region 0.666667 0.333333
Code
px.scatter(df, x='country', y='crit_nurse')

Gender

Code
data = dfg.groupby(by='wbincome').mean(numeric_only=True)[['totalgender','totalpap_gender','totalnpap_gender']]
data2 = dfg.groupby(by='Region_x').mean(numeric_only=True)[['totalgender','totalpap_gender','totalnpap_gender']]
Code
incomeorder = ['High income', 'Upper middle income','Lower middle income','Low income']
Code
data = data.reindex(incomeorder)
data
totalgender totalpap_gender totalnpap_gender
wbincome
High income 48.241135 43.662879 63.242188
Upper middle income 42.631944 43.388889 35.150000
Lower middle income 37.120370 36.959459 39.138889
Low income 31.910417 30.525000 35.740741
Code
data2
totalgender totalpap_gender totalnpap_gender
Region_x
African Region 32.315236 29.403226 38.592172
Eastern Mediterranean Region 35.213542 34.625000 51.750000
European Region 54.280952 51.343434 67.444444
Region of the Americas 48.363636 48.787879 52.812500
South-East Asia Region 36.944444 40.388889 25.400000
Western Pacific Region 35.623457 33.250000 36.348214
Code
gendercols = ['totalpap_gender','totalnpap_gender','totalgender']
genderlabels = ['PAPs','NPAPs','All providers']

table_str='''
<table class='table-style'>
    <thead>
        {# ######## FIRST MAKE THE COLUMNS IN THE HEADER #########  #}
                {# ######## THE COLUMNS #########  #}
        <tr class='darkgray'> 
            <td></td>
            {% for i in genderlabels %}
                <td class='left-solid'>{{i}} </td>
            {% endfor %}
        </tr>
    </thead>

    <tbody>
        {# ######## NOW MAKE THE BODY #########  #}
                {# ######## FIRST SECTION #########  #}
        <tr class='section-break'><td colspan = 4> Income Group </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in data.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
            {% for j in gendercols %}
                <td class='left-solid'> {{ data.at[i,j]|int }} </td>
            {% endfor %}
            </tr>
        {% endfor %}

        {# ######## Second SECTION #########  #}
        <tr class='section-break'><td colspan = 4> Region </td> </tr>
        
                {# ######## START EACH ROW WITH A LABEL #########  #}
                {% for i in data2.index %}
            <tr>
                <td> {{i}} </td>
                        {# ######## NOW EACH COLUMN FOR THAT ROW FROM THE DATAFRAME #########  #}
            {% for j in gendercols %}
                <td class='left-solid'> {{ data2.at[i,j]|int }} </td>
            {% endfor %}
            </tr>
        {% endfor %}

    </tbody>

</table>
</html>

'''

template=Template(table_head+table_str)

with open('table_gender.html', "w") as text_file:
    print(template.render(data=data, data2=data2, genderlabels=genderlabels, gendercols=gendercols), file=text_file)

#PUT ALL YOUR VARIABLES IN HERE
HTML(template.render(data=data, data2=data2, genderlabels=genderlabels, gendercols=gendercols))
PAPs NPAPs All providers
Income Group
High income 43 63 48
Upper middle income 43 35 42
Lower middle income 36 39 37
Low income 30 35 31
Region
African Region 29 38 32
Eastern Mediterranean Region 34 51 35
European Region 51 67 54
Region of the Americas 48 52 48
South-East Asia Region 40 25 36
Western Pacific Region 33 36 35
Code
t1 = px.scatter(dfg, x='country',y='totalgender', color='wbincome', template='plotly_white', labels={
    'wbincome':'Income group',
    'totalgender':'% of providers who are women',
    'country_shr':"Country"
}, category_orders={'wbincome':incomeorder}).update_layout(xaxis_categoryorder='total descending', 
legend_orientation='h',
legend_y=1.1)

t1.write_image('prop_women.jpg', width=1000, height=500, scale=2, engine='kaleido')
t1