Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sdg/outputs/OutputOpenSdg.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ def build(self, language=None):
stats_reporting = sdg.stats.reporting_status(all_meta, self.reporting_status_grouping_fields)
status = status & sdg.json.write_json('reporting', stats_reporting, ftype='stats', site_dir=site_dir)

# Progress status
stats_progress = sdg.stats.progress_status(all_meta)
status = status & sdg.json.write_json('progress', stats_progress, ftype='stats', site_dir=site_dir)

disaggregation_status_service = sdg.DisaggregationStatusService(
site_dir,
self.indicators,
Expand Down
106 changes: 105 additions & 1 deletion sdg/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

@author: dashton

Aggregate information for reporting statistics
Aggregate information for reporting and progress statistics
"""
import pandas as pd

Expand Down Expand Up @@ -111,3 +111,107 @@ def one_status_report(g, status):
del output['extra_fields']['goal_number']

return output


def progress_status(all_meta, extra_fields=None):
"""
Args:
all_meta: A dictionary containing all metadata items
extra_fields: List of fields to group stats by, in addition to goal

Returns:
Dictionary of progress statuses at goal (plus any extra fields) and
total level
"""

# Make sure 'goal_number' is in the list of fields.
grouping_fields = extra_fields.copy() if extra_fields is not None else []
if 'goal_number' not in grouping_fields:
grouping_fields.append('goal_number')

# Generate a report of the possible statuses.
status_values_by_type = {}
for indicator_id in all_meta:
if 'progress_status' in all_meta[indicator_id]:
status_values_by_type[all_meta[indicator_id]['progress_status']] = True
status_values = list(status_values_by_type.keys())
status_report = [{'value': status } for status in status_values]

# Omit any standalone indicators.
indicators = {k: v for (k, v) in all_meta.items() if 'standalone' not in v or v['standalone'] == False }
# Omit any placeholder indicators.
indicators = {k: v for (k, v) in indicators.items() if 'placeholder' not in v or v['placeholder'] == '' }

# Pick out only the fields we want from each indicators metadata
fields = ['progress_status'] + grouping_fields
rows = [
{k: meta.get(k) for k in fields}
for (key, meta) in indicators.items()
]
# Convert that into a dataframe.
meta_df = pd.DataFrame(rows, index=indicators.keys())
meta_df.fillna('status.not_specified', inplace=True)

# Make sure that numeric columns are numeric.
def value_is_numeric(value):
return value if isinstance(value, int) else (isinstance(value, str) and value.isnumeric())
for field in grouping_fields:
if all(value_is_numeric(x) for x in meta_df[field]):
meta_df[field] = meta_df[field].apply(pd.to_numeric)

# Create a separate dataframe for each grouping field.
grouped_dfs = {}
for grouping_field in grouping_fields:
grouped_df = meta_df.pivot_table(
index=grouping_field,
columns='progress_status',
aggfunc='size',
fill_value=0,
dropna=False)
grouped_df['total'] = grouped_df.sum(axis=1)
grouped_dfs[grouping_field] = grouped_df

# Helper function for putting together one status report.
def one_status_report(g, status):
count = g.get(status, 0) # If status is missing use 0
return {'status': status,
'count': count,
'percentage': round(100 * count / g['total'],3)}

# For a "totals" report, arbitrarily use one of the grouping fields (They
# are all the same, when it comes to the totals.)
tot_series = grouped_dfs[grouping_fields[0]].apply(lambda x: x.sum())
total_report = {
'statuses': [one_status_report(tot_series, status)
for status in status_values],
'totals': {'total': tot_series['total']}
}

# Start to build our output.
output = {
'statuses': status_report,
'overall': total_report,
'extra_fields': {},
}

# Add on a report for each of our grouping fields.
for field in grouped_dfs:
grouped_report = list()
for index, g in grouped_dfs[field].reset_index().iterrows():
# Because the goals report is treated differently, standardize on
# the key of "goal" instead of "goal_number".
fixed_field_name = field.replace('goal_number', 'goal')
grouped_report.append(
{
fixed_field_name: g[field],
'statuses': [one_status_report(g, status)
for status in status_values],
'totals': {'total': g['total']}
})
output['extra_fields'][field] = grouped_report

# Treat goals specially, by putting it outside of "extra_fields".
output['goals'] = output['extra_fields']['goal_number']
del output['extra_fields']['goal_number']

return output