Example Usage

This section demonstrates usage of the gptables API functions and core Classes.

For source code and data used in these examples, please see the examples directory of the package.

Iris - Minimal Example

This example demonstrates use of the gptables.write_workbook function. This API function is designed for production of consistently structured and formatted tables.

Summary statistics from the classic iris dataset are used to build a gptables.GPTable object. Elements of metadata are provided to the corresponding parameters of the class. Where you wish to provide no metadata in required parameters, use None.

Table formatting can be defined as a gptable.Theme, which is passed to the API functions

using the theme parameter. Or you can reply on our default - gptheme.

import gptables as gpt
import pandas as pd
import numpy as np
from pathlib import Path

## Read data and arrange
parent_dir = Path(__file__).parent

iris_data = pd.read_csv(parent_dir / "iris.csv")
iris_data = iris_data.loc[:, ["class", "sepal_length", "sepal_width"]]

iris_summary = iris_data.groupby("class").agg(np.mean)
iris_summary.index = [_[5:].capitalize() for _ in iris_summary.index]
iris_summary.reset_index(inplace=True)
iris_summary.rename(
    columns={
        "index": "Class",
        "sepal_length": "Mean Sepal Length",
        "sepal_width": "Mean Sepal Width",
    },
    inplace=True,
    )

## Define table elements
table_name = "iris_statistics"
title = "Mean Iris$$note2$$ sepal dimensions"
subtitles = [
    "1936 Fisher, R.A; The use of multiple measurements in taxonomic problems$$note1$$",
    "Just another subtitle"
    ]
units = {1:"cm", 2:"cm"}
table_notes = {0: "$$note1$$", 2: "$$note3$$"}
scope = "Iris"
source = "Source: Office for Iris Statistics"
index = {2: 0}  # Column 0 is a level 2 index

# or use kwargs to pass these to the appropriate parameters
kwargs = {
    "table_name": table_name,
    "title": title,
    "subtitles": subtitles,
    "units": units,
    "table_notes": table_notes,
    "scope": scope,
    "source": source,
    "index_columns": index,
    }

## Define our GPTable
iris_table = gpt.GPTable(table=iris_summary, **kwargs)

sheets = {"Iris Flower Dimensions": iris_table}

## Notesheet
notes = {
    "Note reference": ["note1", "note2", "note3"],
    "Note text": ["I've got 99 problems and taxonomy is one.", "Goo Goo Dolls, 1998.", "Just another note"],
    "Useful link": ["[google](https://www.google.com)", "[duckduckgo](https://duckduckgo.com/)", "[ONS](https://www.ons.gov.uk)"],
    }
notes_table = pd.DataFrame.from_dict(notes)

## Use write_workbook to win!
if __name__ == "__main__":
    output_path = parent_dir / "python_iris_gptable.xlsx"
    gpt.write_workbook(
        filename=output_path, 
        sheets=sheets,
        notes_table=notes_table,
        contentsheet_options={"additional_elements": ["subtitles", "scope"]}
        )
    print("Output written at: ", output_path)

Iris - Cover Page

This example demonstrates use of the gptables.Cover class to create a cover page.

A gptables cover pages contain a range of custom text elements, along with a hyperlinked table of contents. Text elements are defined as a gptables.Cover instance, which is passed to the cover parameter of gptables.write_worbook() or gptables.produce_worbook(). In this example, we have also set auto_width to True. This automatically determines the width of the first column on the cover sheet, as well as all columns of the tables of the workbook.

import gptables as gpt
import pandas as pd
import numpy as np
from pathlib import Path
from copy import deepcopy

## Read data and arrange
parent_dir = Path(__file__).parent

iris_data = pd.read_csv(parent_dir / "iris.csv")
iris_data = iris_data.loc[:, ["class", "sepal_length", "sepal_width"]]

iris_summary = iris_data.groupby("class").agg(np.mean)
iris_summary.index = [_[5:].capitalize() for _ in iris_summary.index]
iris_summary.reset_index(inplace=True)
iris_summary.rename(
    columns={
        "index": "Class",
        "sepal_length": "Mean Sepal Length",
        "sepal_width": "Mean Sepal Width",
    },
    inplace=True,
)

## Define table elements
title = "Mean Iris$$note2$$ sepal dimensions"
subtitles = [
    "1936 Fisher, R.A; The use of multiple measurements in taxonomic problems$$note1$$",
    "Just another subtitle",
    ]
units = {"Mean Sepal Length": "cm", "Mean Sepal Width": "cm"}
scope = "Iris"
source = "Source: Office for Iris Statistics"
index = {2: 0}  # Column 0 is a level 2 index

# or use kwargs to pass these to the appropriate parameters
kwargs = {
    "title": title,
    "subtitles": subtitles,
    "units": units,
    "scope": scope,
    "source": source,
    "index_columns": index,
    }

## Define our GPTable
iris_table = gpt.GPTable(table=iris_summary, table_name="iris_statistics", **kwargs)

iris_table_copy = deepcopy(iris_table)
iris_table_copy.set_title("A copy of the first sheet$$note3$$")
iris_table_copy.set_table_name("iris_statistics_copy")

sheets = {
    "Iris Flower Dimensions": iris_table,
    "Copy of Iris Flower Dimensions": iris_table_copy
}

cover = gpt.Cover(
    cover_label="Cover",
    title="A Workbook containing good practice tables",
    intro=["This is some introductory information", "And some more"],
    about=["Even more info about my data", "And a little more"],
    contact=["John Doe", "Tel: 345345345", "Email: [john.doe@snailmail.com](mailto:john.doe@snailmail.com)"],
    )

## Notesheet
notes = {
    "Note reference": ["note1", "note2", "note3"],
    "Note text": ["I've got 99 problems and taxonomy is one.", "Goo Goo Dolls, 1998.", "This is an extra note"],
    }
notes_table = pd.DataFrame.from_dict(notes)

## Use write_workbook to win!
if __name__ == "__main__":
    output_path = parent_dir / "python_iris_cover_gptable.xlsx"
    gpt.write_workbook(
        filename=output_path,
        sheets=sheets,
        cover=cover,
        notes_table=notes_table
    )
    print("Output written at: ", output_path)

Iris - Additional Formatting Example

This example demonstrates additional formatting that is not supported in the gptable.Theme.

Specific columns, rows and cells of the table elements (indexes, column headings and data) can be formatted using the gptable.GPTable(..., additional_formatting = ...) parameter. This parameter takes a list of dictionaries, allowing you to select as many rows, columns or cells as you like.

As with all formatting, supported arguments are desribed in the XlsxWriter documentation.

Any formatting not possibly through this means can be achieved using XlsxWriter Workbook and Worksheet functionality. A gptable.GPWorkbook object is returned when using the gptables.produce_workbook API function. The GPWorkbook.worksheets() function returns a list of GPWorksheet objects, which can also be modified.


import gptables as gpt
import pandas as pd
import numpy as np
from pathlib import Path

## Read data and arrange
parent_dir = Path(__file__).parent

iris_data = pd.read_csv(parent_dir / "iris.csv")

iris_data.rename(
    columns={
        "class": "class",
        "sepal_length": "Sepal Length",
        "petal_length": "Petal Length",
        "petal_width": "Petal Width",
        "sepal_width": "Sepal Width",
    },
    inplace=True,
    )

iris_data["class"] = iris_data.apply(lambda row: row["class"][5:].capitalize(), axis=1)

# Calculate summaries
subtables = []
funcs = [np.mean, np.median]
for func in funcs:
    subtables.append(iris_data.groupby("class").agg(func))
    subtables.append(pd.DataFrame(iris_data.iloc[:,0:4].agg(func).rename("All")).T)
iris_summary = pd.concat(subtables)
iris_summary["Average"] = ["Mean"] * 4 + ["Median"] * 4

# Reshape
iris_summary = iris_summary.reset_index()
iris_summary = iris_summary.melt(["index", "Average"], var_name="Iris feature")
iris_summary = iris_summary.pivot_table(
    index=["Iris feature", "Average"], columns="index", values="value"
    ).reset_index()

## Define table elements
table_name = "iris_statistics"
title = "Iris flower dimensions"
subtitles = [
    "1936 Fisher, R.A; The use of multiple measurements in taxonomic problems",
    [{"bold": True}, "Just", " another subtitle"]
    ]
units = {key: "cm" for key in range(2,6)}
scope = "Iris"
index = {1: 0, 2: 1}

## Define additional formatting
# Columns can be references by name or number
# Rows may only be referenced by number
# Column and row numbers refer to the table elements, including indexes and column headings
additional_formatting = [
    {
        "column": {
            "columns": ["Setosa", "Versicolor"],  # str, int or list of either
            "format": {"align": "center"},
        }
    },
    {"column": {"columns": [3], "format": {"left": 1}}},
    {
        "row": {
            "rows": -1,  # Numbers only, but can refer to last row using -1
            "format": {"bottom": 1},  # Underline row
        }
    },
    ]

# or just use kwargs
kwargs = {
    "table_name": table_name,
    "title": title,
    "subtitles": subtitles,
    "units": units,
    "scope": scope,
    "source": None,
    "index_columns": index,
    "additional_formatting": additional_formatting,
    }

## Define our GPTable
iris_table = gpt.GPTable(table=iris_summary, **kwargs)

## Use produce workbook to return GPWorkbook
if __name__ == "__main__":
    output_path = parent_dir / "python_iris_additional_formatting_gptable.xlsx"
    wb = gpt.produce_workbook(
        filename=output_path, sheets={"Iris Flower Dimensions": iris_table}
        )

    # Carry out additional modifications on the GPWorkbook or GPWorksheets
    # This supports all `XlsxWriter` package functionality
    ws = wb.worksheets()[0]
    ws.set_row(0, 30)  # Set the height of the first row

    # Finally use the close method to save the output
    wb.close()
    print("Output written at: ", output_path)


Cost of Reoffending - Multiple Sheets Example

This example demonstrates how multiple sheets can be defined and written to a single .xlsx file using gptables.

The elements dictionary below is used to define the metadata for each table, with the sheet name as it’s key. This metadata is used to generate the sheets dictionary. gptables.write_workbook uses this dictionary to write each table to the corresponding sheet.

gptables.GPTable objects can be constructed one by one, but this demonstrates one way to make this definition concise.

from copy import deepcopy
import gptables as gpt
import pandas as pd
from pathlib import Path

## Read data and arrange
parent_dir = Path(__file__).parent
core_data = pd.read_csv(parent_dir / "coreB.csv")

# 3 tables: summary, children and young people, and adults
summ = core_data.loc[:, ["age", "total_cost"]]
summ = summ.groupby("age").sum().reset_index()
summ.columns = ["Age group", "Total cost"]

# This returns adults first as per the data
ages = core_data.age.unique()

dct = {ages[0]: pd.DataFrame(), ages[1]: pd.DataFrame()}
for key in dct:
    frame = core_data.loc[core_data.age == key, ["reoffence_group", "total_cost"]]
    frame = frame.groupby("reoffence_group").sum().reset_index()
    frame.columns = ["Reoffence group", "Total cost"]
    dct[key] = frame

dct["Summary"] = summ


## Define table elements for each table

example = {
    "title": "Cost of Reoffending",
    "subtitles": ["12-month follow-up period for the 2016 offender cohort"],
    "units": {1:"£"},
    "scope": "England and Wales",
    "source": "Office for National Statistics",
}

table_parameters_dict = {}
for table in ["summary", "adults", "children"]:
    table_parameters = deepcopy(example)
    table_parameters["table_name"] = f"{table}_table"
    table_parameters["title"] = f"Cost of reoffending - {table}"
    table_parameters_dict[table] = table_parameters

elements = {
    "Summary": table_parameters_dict["summary"], 
    "Adults": table_parameters_dict["adults"], 
    "Children and young people": table_parameters_dict["children"]
}

## Generate a dictionary of sheet names to GPTable objects
## using the elements defined above
sheets = {name: gpt.GPTable(dct[name], **elements[name]) for name in dct}

## Use write_workbook to win!
if __name__ == "__main__":
    output_path = parent_dir / "python_cor_multiple_gptables.xlsx"
    gpt.write_workbook(filename=output_path, sheets=sheets)
    print("Output written at: ", output_path)

R Usage

Use of gptables in R requires use of python via the reticulate package.

This example demonstrates basic usage of the pacakge in R. More advanced usage will use a similar approach to python (above), but may require use of reticulate functions to create/modify python objects.

#Iris - R Example
#----------------------

#This example demonstrates use of the ``gptables.write_workbook`` function in R.
#This API function is designed for production of consistently structured and formatted tables.

#Summary statistics from the classic iris dataset are used to build a ``gptables$GPTable``
#object. Elements of metadata are provided to the corresponding parameters of the class.
#Where you wish to provide no metadata in required parameters, use ``None``.

#Table formatting can be defined as a ``gptable$Theme``, which is passed to the API functions
# using the ``theme`` parameter. Or you can reply on our default - gptheme.


# Reticulate example works on R version 4.0.1 or less
source("gptables/examples/R_install_dependency_file.R")

packagesReq <- c("dplyr", "reticulate", "magrittr")
packageRefresh(packagesReq)

library("magrittr")

gpt <- reticulate::import("gptables")
pd <- reticulate::import("pandas")

iris_df <- reticulate::r_to_py(
  iris[c(5,1,2,3,4)]%>%
    dplyr::group_by(Species) %>%
    dplyr::summarize("Mean Sepal Length" = mean(Sepal.Length, na.rm=TRUE),
                     "Mean Sepal Width" = mean(Sepal.Width, na.rm=TRUE)
    )
)

table_name = "iris_statistics"
title = "Mean Iris$$note2$$ sepal dimensions"
subtitles = c("1936 Fisher, R.A; The use of multiple measurements in taxonomic problems$$note1$$",
              "Just another subtitle")
units = reticulate::dict(list("1" = "cm", "2" = "cm"), convert = FALSE)
table_notes = reticulate::dict(list("0" = "$$note1$$", "2" = "$$note3$$"), convert = FALSE)
scope = "Iris"
souce = "Source: Office for Iris Statistics"
index_columns  = reticulate::py_dict(reticulate::py_eval('2'), "Species")


table = gpt$GPTable(table = iris_df,
                    table_name = table_name,
                    title = title,
                    subtitles = subtitles,
                    units = units,
                    table_notes = table_notes,
                    scope = scope,
                    source = souce,
                    index_columns = index_columns)

notes = reticulate::dict(list("Note reference" = c("note1", "note2", "note3"),
                              "Note text" = c("I've got 99 problems and taxonomy is one.", "Goo Goo Dolls, 1998.", "Just another note"),
                              "Useful link" = c("[google](https://www.google.com)", "[duckduckgo](https://duckduckgo.com/)", "[ONS](https://www.ons.gov.uk)")),
                         convert = FALSE)

notes_table = pd$DataFrame$from_dict(notes)

output_path <- "gptables/examples/R_iris_gptable.xlsx"

gpt$write_workbook(filename = output_path,
                         sheets = reticulate::dict(list("iris" = table)),
                         notes_table = notes_table,
                         contentsheet_options = reticulate::dict(list("additional_elements" = c("subtitles", "scope")))
                         )

R users may also be interested in the a11ytables R package, developed by the Cabinet Office.