Example Usage¶
This section demonstrates usage of the gptables API functions and core Classes.
For source code and data used in these examples, please see the examples directory of the package.
Iris - Minimal Example¶
This example demonstrates use of the gptables.write_workbook
function.
This API function is designed for production of consistently structured and formatted tables.
Summary statistics from the classic iris dataset are used to build a gptables.GPTable
object. Elements of metadata are provided to the corresponding parameters of the class.
Where you wish to provide no metadata in required parameters, use None
.
- Table formatting can be defined as a
gptable.Theme
, which is passed to the API functions using the
theme
parameter. Or you can reply on our default - gptheme.
import gptables as gpt
import pandas as pd
import numpy as np
from pathlib import Path
## Read data and arrange
parent_dir = Path(__file__).parent
iris_data = pd.read_csv(parent_dir / "iris.csv")
iris_data = iris_data.loc[:, ["class", "sepal_length", "sepal_width"]]
iris_summary = iris_data.groupby("class").agg(np.mean)
iris_summary.index = [_[5:].capitalize() for _ in iris_summary.index]
iris_summary.reset_index(inplace=True)
iris_summary.rename(
columns={
"index": "Class",
"sepal_length": "Mean Sepal Length",
"sepal_width": "Mean Sepal Width",
},
inplace=True,
)
## Define table elements
table_name = "iris_statistics"
title = "Mean Iris$$note2$$ sepal dimensions"
subtitles = [
"1936 Fisher, R.A; The use of multiple measurements in taxonomic problems$$note1$$",
"Just another subtitle"
]
units = {1:"cm", 2:"cm"}
table_notes = {0: "$$note1$$", 2: "$$note3$$"}
scope = "Iris"
source = "Source: Office for Iris Statistics"
index = {2: 0} # Column 0 is a level 2 index
# or use kwargs to pass these to the appropriate parameters
kwargs = {
"table_name": table_name,
"title": title,
"subtitles": subtitles,
"units": units,
"table_notes": table_notes,
"scope": scope,
"source": source,
"index_columns": index,
}
## Define our GPTable
iris_table = gpt.GPTable(table=iris_summary, **kwargs)
sheets = {"Iris Flower Dimensions": iris_table}
## Notesheet
notes = {
"Note reference": ["note1", "note2", "note3"],
"Note text": ["I've got 99 problems and taxonomy is one.", "Goo Goo Dolls, 1998.", "Just another note"],
"Useful link": ["[google](https://www.google.com)", "[duckduckgo](https://duckduckgo.com/)", "[ONS](https://www.ons.gov.uk)"],
}
notes_table = pd.DataFrame.from_dict(notes)
## Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_iris_gptable.xlsx"
gpt.write_workbook(
filename=output_path,
sheets=sheets,
notes_table=notes_table,
contentsheet_options={"additional_elements": ["subtitles", "scope"]}
)
print("Output written at: ", output_path)
Iris - Cover Page¶
This example demonstrates use of the gptables.Cover
class to create a cover page.
A gptables cover pages contain a range of custom text elements, along with a hyperlinked table of contents.
Text elements are defined as a gptables.Cover
instance, which is passed to the cover
parameter of gptables.write_worbook()
or gptables.produce_worbook()
.
In this example, we have also set auto_width
to True
.
This automatically determines the width of the first column on the cover sheet, as well as all columns of the tables of the workbook.
import gptables as gpt
import pandas as pd
import numpy as np
from pathlib import Path
from copy import deepcopy
## Read data and arrange
parent_dir = Path(__file__).parent
iris_data = pd.read_csv(parent_dir / "iris.csv")
iris_data = iris_data.loc[:, ["class", "sepal_length", "sepal_width"]]
iris_summary = iris_data.groupby("class").agg(np.mean)
iris_summary.index = [_[5:].capitalize() for _ in iris_summary.index]
iris_summary.reset_index(inplace=True)
iris_summary.rename(
columns={
"index": "Class",
"sepal_length": "Mean Sepal Length",
"sepal_width": "Mean Sepal Width",
},
inplace=True,
)
## Define table elements
title = "Mean Iris$$note2$$ sepal dimensions"
subtitles = [
"1936 Fisher, R.A; The use of multiple measurements in taxonomic problems$$note1$$",
"Just another subtitle",
]
units = {"Mean Sepal Length": "cm", "Mean Sepal Width": "cm"}
scope = "Iris"
source = "Source: Office for Iris Statistics"
index = {2: 0} # Column 0 is a level 2 index
# or use kwargs to pass these to the appropriate parameters
kwargs = {
"title": title,
"subtitles": subtitles,
"units": units,
"scope": scope,
"source": source,
"index_columns": index,
}
## Define our GPTable
iris_table = gpt.GPTable(table=iris_summary, table_name="iris_statistics", **kwargs)
iris_table_copy = deepcopy(iris_table)
iris_table_copy.set_title("A copy of the first sheet$$note3$$")
iris_table_copy.set_table_name("iris_statistics_copy")
sheets = {
"Iris Flower Dimensions": iris_table,
"Copy of Iris Flower Dimensions": iris_table_copy
}
cover = gpt.Cover(
cover_label="Cover",
title="A Workbook containing good practice tables",
intro=["This is some introductory information", "And some more"],
about=["Even more info about my data", "And a little more"],
contact=["John Doe", "Tel: 345345345", "Email: [john.doe@snailmail.com](mailto:john.doe@snailmail.com)"],
)
## Notesheet
notes = {
"Note reference": ["note1", "note2", "note3"],
"Note text": ["I've got 99 problems and taxonomy is one.", "Goo Goo Dolls, 1998.", "This is an extra note"],
}
notes_table = pd.DataFrame.from_dict(notes)
## Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_iris_cover_gptable.xlsx"
gpt.write_workbook(
filename=output_path,
sheets=sheets,
cover=cover,
notes_table=notes_table
)
print("Output written at: ", output_path)
Iris - Additional Formatting Example¶
This example demonstrates additional formatting that is not supported in
the gptable.Theme
.
Specific columns, rows and cells of the table elements (indexes, column headings and data)
can be formatted using the gptable.GPTable(..., additional_formatting = ...)
parameter.
This parameter takes a list of dictionaries, allowing you to select as many rows, columns
or cells as you like.
As with all formatting, supported arguments are desribed in the XlsxWriter documentation.
Any formatting not possibly through this means can be achieved using
XlsxWriter
Workbook
and Worksheet functionality.
A gptable.GPWorkbook
object is returned when using the
gptables.produce_workbook
API function.
The GPWorkbook.worksheets()
function returns a list of GPWorksheet
objects,
which can also be modified.
import gptables as gpt
import pandas as pd
import numpy as np
from pathlib import Path
## Read data and arrange
parent_dir = Path(__file__).parent
iris_data = pd.read_csv(parent_dir / "iris.csv")
iris_data.rename(
columns={
"class": "class",
"sepal_length": "Sepal Length",
"petal_length": "Petal Length",
"petal_width": "Petal Width",
"sepal_width": "Sepal Width",
},
inplace=True,
)
iris_data["class"] = iris_data.apply(lambda row: row["class"][5:].capitalize(), axis=1)
# Calculate summaries
subtables = []
funcs = [np.mean, np.median]
for func in funcs:
subtables.append(iris_data.groupby("class").agg(func))
subtables.append(pd.DataFrame(iris_data.iloc[:,0:4].agg(func).rename("All")).T)
iris_summary = pd.concat(subtables)
iris_summary["Average"] = ["Mean"] * 4 + ["Median"] * 4
# Reshape
iris_summary = iris_summary.reset_index()
iris_summary = iris_summary.melt(["index", "Average"], var_name="Iris feature")
iris_summary = iris_summary.pivot_table(
index=["Iris feature", "Average"], columns="index", values="value"
).reset_index()
## Define table elements
table_name = "iris_statistics"
title = "Iris flower dimensions"
subtitles = [
"1936 Fisher, R.A; The use of multiple measurements in taxonomic problems",
[{"bold": True}, "Just", " another subtitle"]
]
units = {key: "cm" for key in range(2,6)}
scope = "Iris"
index = {1: 0, 2: 1}
## Define additional formatting
# Columns can be references by name or number
# Rows may only be referenced by number
# Column and row numbers refer to the table elements, including indexes and column headings
additional_formatting = [
{
"column": {
"columns": ["Setosa", "Versicolor"], # str, int or list of either
"format": {"align": "center"},
}
},
{"column": {"columns": [3], "format": {"left": 1}}},
{
"row": {
"rows": -1, # Numbers only, but can refer to last row using -1
"format": {"bottom": 1}, # Underline row
}
},
]
# or just use kwargs
kwargs = {
"table_name": table_name,
"title": title,
"subtitles": subtitles,
"units": units,
"scope": scope,
"source": None,
"index_columns": index,
"additional_formatting": additional_formatting,
}
## Define our GPTable
iris_table = gpt.GPTable(table=iris_summary, **kwargs)
## Use produce workbook to return GPWorkbook
if __name__ == "__main__":
output_path = parent_dir / "python_iris_additional_formatting_gptable.xlsx"
wb = gpt.produce_workbook(
filename=output_path, sheets={"Iris Flower Dimensions": iris_table}
)
# Carry out additional modifications on the GPWorkbook or GPWorksheets
# This supports all `XlsxWriter` package functionality
ws = wb.worksheets()[0]
ws.set_row(0, 30) # Set the height of the first row
# Finally use the close method to save the output
wb.close()
print("Output written at: ", output_path)
Cost of Reoffending - Multiple Sheets Example¶
This example demonstrates how multiple sheets can be defined and written
to a single .xlsx file using gptables
.
The elements dictionary below is used to define the metadata for each table,
with the sheet name as it’s key. This metadata is used to generate the sheets
dictionary. gptables.write_workbook
uses this dictionary to write each table
to the corresponding sheet.
gptables.GPTable
objects can be constructed one by one, but this demonstrates
one way to make this definition concise.
from copy import deepcopy
import gptables as gpt
import pandas as pd
from pathlib import Path
## Read data and arrange
parent_dir = Path(__file__).parent
core_data = pd.read_csv(parent_dir / "coreB.csv")
# 3 tables: summary, children and young people, and adults
summ = core_data.loc[:, ["age", "total_cost"]]
summ = summ.groupby("age").sum().reset_index()
summ.columns = ["Age group", "Total cost"]
# This returns adults first as per the data
ages = core_data.age.unique()
dct = {ages[0]: pd.DataFrame(), ages[1]: pd.DataFrame()}
for key in dct:
frame = core_data.loc[core_data.age == key, ["reoffence_group", "total_cost"]]
frame = frame.groupby("reoffence_group").sum().reset_index()
frame.columns = ["Reoffence group", "Total cost"]
dct[key] = frame
dct["Summary"] = summ
## Define table elements for each table
example = {
"title": "Cost of Reoffending",
"subtitles": ["12-month follow-up period for the 2016 offender cohort"],
"units": {1:"£"},
"scope": "England and Wales",
"source": "Office for National Statistics",
}
table_parameters_dict = {}
for table in ["summary", "adults", "children"]:
table_parameters = deepcopy(example)
table_parameters["table_name"] = f"{table}_table"
table_parameters["title"] = f"Cost of reoffending - {table}"
table_parameters_dict[table] = table_parameters
elements = {
"Summary": table_parameters_dict["summary"],
"Adults": table_parameters_dict["adults"],
"Children and young people": table_parameters_dict["children"]
}
## Generate a dictionary of sheet names to GPTable objects
## using the elements defined above
sheets = {name: gpt.GPTable(dct[name], **elements[name]) for name in dct}
## Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_cor_multiple_gptables.xlsx"
gpt.write_workbook(filename=output_path, sheets=sheets)
print("Output written at: ", output_path)
R Usage¶
Use of gptables
in R requires use of python via the reticulate package.
This example demonstrates basic usage of the pacakge in R. More advanced usage will
use a similar approach to python (above), but may require use of reticulate
functions
to create/modify python objects.
#Iris - R Example
#----------------------
#This example demonstrates use of the ``gptables.write_workbook`` function in R.
#This API function is designed for production of consistently structured and formatted tables.
#Summary statistics from the classic iris dataset are used to build a ``gptables$GPTable``
#object. Elements of metadata are provided to the corresponding parameters of the class.
#Where you wish to provide no metadata in required parameters, use ``None``.
#Table formatting can be defined as a ``gptable$Theme``, which is passed to the API functions
# using the ``theme`` parameter. Or you can reply on our default - gptheme.
# Reticulate example works on R version 4.0.1 or less
source("gptables/examples/R_install_dependency_file.R")
packagesReq <- c("dplyr", "reticulate", "magrittr")
packageRefresh(packagesReq)
library("magrittr")
gpt <- reticulate::import("gptables")
pd <- reticulate::import("pandas")
iris_df <- reticulate::r_to_py(
iris[c(5,1,2,3,4)]%>%
dplyr::group_by(Species) %>%
dplyr::summarize("Mean Sepal Length" = mean(Sepal.Length, na.rm=TRUE),
"Mean Sepal Width" = mean(Sepal.Width, na.rm=TRUE)
)
)
table_name = "iris_statistics"
title = "Mean Iris$$note2$$ sepal dimensions"
subtitles = c("1936 Fisher, R.A; The use of multiple measurements in taxonomic problems$$note1$$",
"Just another subtitle")
units = reticulate::dict(list("1" = "cm", "2" = "cm"), convert = FALSE)
table_notes = reticulate::dict(list("0" = "$$note1$$", "2" = "$$note3$$"), convert = FALSE)
scope = "Iris"
souce = "Source: Office for Iris Statistics"
index_columns = reticulate::py_dict(reticulate::py_eval('2'), "Species")
table = gpt$GPTable(table = iris_df,
table_name = table_name,
title = title,
subtitles = subtitles,
units = units,
table_notes = table_notes,
scope = scope,
source = souce,
index_columns = index_columns)
notes = reticulate::dict(list("Note reference" = c("note1", "note2", "note3"),
"Note text" = c("I've got 99 problems and taxonomy is one.", "Goo Goo Dolls, 1998.", "Just another note"),
"Useful link" = c("[google](https://www.google.com)", "[duckduckgo](https://duckduckgo.com/)", "[ONS](https://www.ons.gov.uk)")),
convert = FALSE)
notes_table = pd$DataFrame$from_dict(notes)
output_path <- "gptables/examples/R_iris_gptable.xlsx"
gpt$write_workbook(filename = output_path,
sheets = reticulate::dict(list("iris" = table)),
notes_table = notes_table,
contentsheet_options = reticulate::dict(list("additional_elements" = c("subtitles", "scope")))
)
R users may also be interested in the a11ytables R package, developed by the Cabinet Office.