import os
import re
import warnings
import pandas as pd
import numpy as np
from copy import deepcopy
from xlsxwriter.workbook import Workbook
from xlsxwriter.worksheet import Worksheet
from gptables.core.cover import Cover
from .theme import Theme
from .gptable import GPTable, FormatList
from gptables.utils.unpickle_themes import gptheme
[docs]class GPWorksheet(Worksheet):
"""
Wrapper for an XlsxWriter Worksheet object. Provides a method for writing
a good practice table (GPTable) to a Worksheet.
"""
def write_cover(self, cover):
"""
Write a cover page to the Worksheet. Uses text from a Cover object and
details of the Workbook contents.
Parameters
----------
cover : gptables.Cover
object containing cover sheet text
"""
theme = self.theme
pos = [0, 0]
self._parse_urls(cover)
pos = self._write_element(pos, cover.title, theme.cover_title_format)
if cover.intro is not None:
pos = self._write_element(pos, "Introductory information", theme.cover_subtitle_format)
pos = self._write_element_list(pos, cover.intro, theme.cover_text_format)
if cover.about is not None:
pos = self._write_element(pos, "About these data", theme.cover_subtitle_format)
pos = self._write_element_list(pos, cover.about, theme.cover_text_format)
if cover.contact is not None:
pos = self._write_element(pos, "Contact", theme.cover_subtitle_format)
pos = self._write_element_list(pos, cover.contact, theme.cover_text_format)
def write_gptable(self, gptable, auto_width, reference_order=[]):
"""
Write data from a GPTable object to the worksheet using the workbook
Theme object for formatting.
Parameters
----------
gptable : gptables.GPTable
object containing elements of the gptable to be written to the
Worksheet
reference_order : list, optional
order of annotations in workbook
must be provided if gptable uses annotations
Returns
-------
None
"""
if not isinstance(gptable, GPTable):
raise TypeError("`gptable` must be a gptables.GPTable object")
if len(gptable._annotations)>0 and len(reference_order)==0:
msg = "reference_order must be provided if gptable contains annotations"
raise ValueError(msg)
theme = self.theme
# Write each GPTable element using appropriate Theme attr
pos = [0, 0]
self._reference_annotations(gptable, reference_order)
self._parse_urls(gptable)
gptable = deepcopy(gptable)
pos = self._write_element(
pos,
gptable.title,
theme.title_format
)
pos = self._write_element_list(
pos,
gptable.subtitles,
theme.subtitle_format
)
description = theme.description_order
for element in description:
pos = getattr(self, "_write_" + element)(
pos,
getattr(gptable, element),
getattr(theme, element + "_format")
)
pos = self._write_table_elements(
pos,
gptable,
auto_width,
)
self._mark_data_as_worksheet_table(gptable, theme.column_heading_format)
def _reference_annotations(self, gptable, reference_order):
"""
Replace note references with numbered references and move to end of element.
Acts on `title`, `subtitles`, `table` and `notes` attributes of a GPTable.
References are numbered from top left of spreadsheet, working across each row.
Parameters
----------
gptable : gptables.GPTable
object containing data with references to notes
reference_order : list
order of annotations in workbook
Returns
-------
None
"""
elements = [
"title",
"subtitles",
"legend",
"source",
"scope",
"units",
]
# Loop through elements, replacing references in strings
for attr in elements:
attr_current = getattr(gptable, attr)
setattr(
gptable,
attr,
self._replace_reference_in_attr(
attr_current,
reference_order
)
)
self._reference_table_annotations(gptable, reference_order)
def _reference_table_annotations(self, gptable, reference_order): # TODO: properly integrate this with table_notes parameter
"""
Reference annotations in the table column headings and index columns.
"""
table = getattr(gptable, "table")
table.columns = self._replace_reference_in_attr(
[x for x in table.columns],
reference_order
)
index_columns = gptable.index_columns.values()
for col in index_columns:
table.iloc[:, col] = table.iloc[:, col].apply(
lambda x: self._replace_reference_in_attr(x, reference_order)
)
setattr(gptable, "table", table)
def _replace_reference_in_attr(self, data, reference_order):
"""
Replaces references in a string or list/dict of strings. Works
recursively on list elements and dict values. Other types are returned
without modification.
Parameters
----------
data : any type
object containing strings to replace references in
reference_order : list
order of annotations in workbook
Returns
-------
string : str
input string with references replaced with numerical reference (n),
where n is the order of appearance in the resulting document
"""
if isinstance(data, str):
data = self._replace_reference(data, reference_order)
if isinstance(data, list):
for n in range(len(data)):
data[n] = self._replace_reference_in_attr(
data[n],
reference_order
)
if isinstance(data, dict):
for key in data.keys():
data[key] = self._replace_reference_in_attr(
data[key],
reference_order
)
if isinstance(data, FormatList):
data_list = data.list
for n in range(len(data_list)):
data_list[n] = self._replace_reference_in_attr(
data_list[n],
reference_order
)
data = FormatList(data_list)
return data
@staticmethod
def _replace_reference(string, reference_order):
"""
Given a single string, record occurrences of new references (denoted by
flanking dollar signs [$$reference$$]) and replace with number
reference reflecting order of detection.
Parameters
----------
string : str
the string to replace references within
reference_order : list
order of annotations in workbook
Returns
-------
string : str
input string with references replaced with numerical reference (n),
where n is the order of appearence in the resulting document
"""
text_refs = re.findall(r"[$]{2}.*?[$]{2}", string)
dict_refs = [w.replace("$", "") for w in text_refs]
for n in range(len(dict_refs)):
num_ref = "[note " + str(reference_order.index(dict_refs[n]) + 1) + "]"
string = string.replace(text_refs[n], "") + num_ref
return string
def _parse_urls(self, sheet):
"""
Convert markdown URL formatting into URL, string tuple
Parameters
----------
sheet : gptables.GPTable, gptables.Cover
object containing data with urls
"""
if isinstance(sheet, GPTable):
elements = [
"title",
"subtitles",
"legend",
"source",
"scope",
"units",
]
elif isinstance(sheet, Cover):
elements = [
"title",
"intro",
"about",
"contact",
]
# Loop through elements, replacing urls in strings
for attr in elements:
attr_current = getattr(sheet, attr)
setattr(
sheet,
attr,
self._replace_url_in_attr(
attr_current,
)
)
if isinstance(sheet, GPTable):
self._parse_table_urls(sheet)
def _parse_table_urls(self, gptable):
"""
Parse URLs in table.
"""
table = getattr(gptable, "table")
rows, columns = table.shape
for c in range(columns):
for r in range(rows):
cell = self._replace_url_in_attr(table.iloc[r, c])
if isinstance(cell, dict):
table.iloc[r, c] = [cell]
else:
table.iloc[r, c] = cell
setattr(gptable, "table", table)
def _replace_url_in_attr(self, data):
"""
Replaces urls in a string or list/dict of strings. Works
recursively on list elements and dict values. Other types
are returned without modification.
Parameters
----------
data : any type
object containing strings to replace references in
"""
if isinstance(data, str):
data = self._replace_url(data)
if isinstance(data, list):
for n in range(len(data)):
data[n] = self._replace_url_in_attr(
data[n],
)
if isinstance(data, dict):
for key in data.keys():
data[key] = self._replace_url_in_attr(
data[key],
)
return data
@staticmethod
def _replace_url(string):
"""
Given a single string, record occurrences of markdown
style urls (formatted as `"[url](display_text)"`) and
replace with tuples of `(url, string)`
Parameters
----------
string : str
the string to replace references within
Returns
-------
string or dict
if no markdown style urls found, returns sting
if found, return dictionary with key `string` and value `url`,
where markdown style url in `string` is replaced with `display_text`
"""
f_url_pattern = r"\[.+\]\(.+\)" # "[display_text](url)"
f_urls = re.findall(f_url_pattern, string)
if len(f_urls) == 0:
return string
if len(f_urls) > 1:
msg = "More than one link found in cell. Excel only permits one link per cell"
raise ValueError(msg)
else:
f_url = f_urls[0]
url = re.split(r"\(", f_url)[1].replace(")", "")
display_text = re.split(r"\]", f_url)[0].replace("[", "")
string = re.sub(f_url_pattern, display_text, string)
return {string: url}
def _write_element(self, pos, element, format_dict):
"""
Write a single text element of a GPTable to the GPWorksheet.
Parameters
----------
element : str or list
the string or list of rich string elements to be written
format_dict : dict
format to be applied to string
pos : list
the position of the worksheet cell to write the element to
Returns
-------
pos : list
new position to write next element from
"""
if element:
self._smart_write(*pos, element, format_dict)
pos[0] += 1
return pos
def _write_element_list(self, pos, element_list, format_dict):
"""
Writes a list of elements row-wise.
Parameters
----------
element_list : list
list of strings or nested list of rich string elements to write,
one per row
format_dict : dict
format to be applied to string
pos : list
the position of the worksheet cell to write the elements to
Returns
-------
pos: list
new position to write next element from
"""
if element_list:
for element in element_list:
pos = self._write_element(pos, element, format_dict)
return pos
def _write_instructions(self, pos, element, format_dict):
"""
Alias for writting description elements by name.
"""
return self._write_element(pos, element, format_dict)
def _write_source(self, pos, element, format_dict):
"""
Alias for writting description elements by name.
"""
return self._write_element(pos, element, format_dict)
def _write_scope(self, pos, element, format_dict):
"""
Alias for writting description elements by name.
"""
return self._write_element(pos, element, format_dict)
def _write_legend(self, pos, element_list, format_dict):
"""
Alias for writting description elements by name.
"""
return self._write_element_list(pos, element_list, format_dict)
def _write_notes(self, pos, element_list, format_dict):
"""
Alias for writting description elements by name.
"""
return self._write_element_list(pos, element_list, format_dict)
def _write_table_elements(self, pos, gptable, auto_width):
"""
Writes the table and units elements of a GPTable. Uses the
Workbook Theme, plus any additional formatting associated with the
GPTable.
Parameters
----------
gptable : gptables.GPTable
object containing the table and additional formatting data
pos : list
the position of the worksheet cell to write the units to
auto_width : bool
select if column widths should be determined automatically using
length of text in index and columns
Returns
-------
pos : list
new position to write next element from
"""
# Raise error if any table element is null or whitespace
gptable.table.replace(regex=r'^\s*$', value=np.NaN, inplace=True)
if gptable.table.isna().values.any():
msg = ("""
Empty or null cell found in table, the reason for missingness should
be included above the table before inputting to gptables.
There should only be one reason otherwise a shorthand should be provided.
Guidance on shorthand can be found at:
https://analysisfunction.civilservice.gov.uk/policy-store/symbols-in-tables-definitions-and-help/
""")
warnings.warn(msg)
# Raise error if any table element is only special characters
if gptable.table.stack().str.contains('^[^a-zA-Z0-9_]*$').any():
msg = ("""
Cell found containing only special characters, replace with
alphanumeric characters before inputting to gptables.
Guidance on symbols in tables can be found at:
https://analysisfunction.civilservice.gov.uk/policy-store/symbols-in-tables-definitions-and-help/
""")
raise ValueError(msg)
# Get theme
theme = self.theme
# Reset position to left col on next row
pos[1] = 0
## Create data array
index_levels = gptable.index_levels
index_columns = [col for col in gptable.index_columns.values()]
data = pd.DataFrame(gptable.table, copy=True)
# Create row containing column headings
data.loc[-1] = data.columns
data.index = data.index + 1
data.sort_index(inplace=True)
## Create formats array
# pandas.DataFrame did NOT want to hold dictionaries, so be wary
formats = pd.DataFrame().reindex_like(data)
dict_row = [{} for n in range(formats.shape[1])]
for row in range(formats.shape[0]):
dict_row = [{} for n in range(formats.shape[1])]
formats.iloc[row] = dict_row
## Add Theme formatting to formats dataframe
format_headings_from = 0
self._apply_format(
formats.iloc[0, format_headings_from:],
theme.column_heading_format
)
self._apply_format(
formats.iloc[1:, index_levels:],
theme.data_format
)
index_level_formats = [
theme.index_1_format,
theme.index_2_format,
theme.index_3_format
]
for level, col in gptable.index_columns.items():
self._apply_format(
formats.iloc[1:, col],
index_level_formats[level - 1] # Account for 0-indexing
)
## Add additional table-specific formatting from GPTable
self._apply_additional_formatting(
formats,
gptable.additional_formatting,
gptable.index_levels
)
## Write table
pos = self._write_array(pos, data, formats)
## Set columns widths
if auto_width:
widths = self._calculate_column_widths(data, formats)
self._set_column_widths(widths)
return pos
def _apply_additional_formatting(
self,
formats_table,
additional_formatting,
index_levels
):
"""
Apply row, column and cell formatting to dataframe of formats.
"""
for item in additional_formatting:
fmt_type = list(item.keys())[0]
format_desc = item[fmt_type]
if fmt_type == "cell":
formatting = format_desc["format"]
cell_ilocs = format_desc["cells"]
if isinstance(cell_ilocs, tuple):
cell_ilocs = [cell_ilocs]
for row, col in cell_ilocs:
formats_table_slice = formats_table.iloc[row, col]
self._apply_format(
formats_table_slice,
formatting
)
return None
if fmt_type == "column":
cols_iloc = [
formats_table.columns.get_loc(col)
if isinstance(col, str)
else col
for col in format_desc["columns"]
]
row_start = 0
if "include_names" in format_desc.keys():
row_start = 0 if format_desc["include_names"] else 1
formats_table_slice = formats_table.iloc[row_start:, cols_iloc]
formatting = format_desc["format"]
elif fmt_type == "row":
rows_iloc = format_desc["rows"]
col_start = 0
if "include_names" in format_desc.keys():
col_start = 0 if format_desc["include_names"] else index_levels
formats_table_slice = formats_table.iloc[rows_iloc, col_start:]
formatting = format_desc["format"]
self._apply_format(
formats_table_slice,
formatting
)
def _write_array(self, pos, data, formats):
"""
Write a two-dimensional array to the current Worksheet, starting from
the specified position.
Parameters
----------
data : pandas.DataFrame
array of data to be written to Worksheet
formats : pandas.DataFrame
array of dictionaries that specify the formatting to be applied
to each cell of data
pos : list
the position of the top left cell to start writing the array from
Returns
-------
pos : list
new position to write next element from
"""
if data.shape != formats.shape:
raise ValueError("data and formats arrays must be of equal shape")
rows, cols = data.shape
for row in range(rows):
for col in range(cols):
cell_data = data.iloc[row, col]
cell_format_dict = formats.iloc[row, col]
self._smart_write(
pos[0] + row,
pos[1] + col,
cell_data,
cell_format_dict
)
pos = [pos[0] + rows, 0]
return pos
def _mark_data_as_worksheet_table(self, gptable, column_header_format_dict):
"""
Marks the data to be recognised as a Worksheet Table in Excel.
"""
data_range = gptable.data_range
column_header_format = self._workbook.add_format(column_header_format_dict)
column_list = gptable.table.columns.tolist()
column_headers = [{'header': column, 'header_format': column_header_format} for column in column_list]
self.add_table(*data_range,
{'header_row': True,
'autofilter': False,
'columns': column_headers,
'style': None,
'name': gptable.table_name
})
def _smart_write(self, row, col, data, format_dict, *args):
"""
Depending on the input data, this function will write rich strings or
use the standard `write()` method. For rich strings, the base format is
merged with each rich format supplied within data.
Parameters
----------
row : int
0-indexed row of cell to write to
col : int
0-indexed column of cell to write to
data : str or list or dict
Simple string to be written with `format_dict` formatting. Or a
list of alternating string and dictionary objects. Dictionaries
specify additional formatting to be applied to the following string
in the list.
Dictionaries will be written with (first) key as display text and
(first) value as URL.
format_dict : dict
Dictionary containing base format for the string.
Returns
-------
None
"""
wb = self._workbook # Reference to Workbook that contains sheet
if isinstance(data, list):
if len(data) == 1:
data = data[0]
self._smart_write(row, col, data, format_dict, *args)
elif any([isinstance(element, FormatList) for element in data]):
self._write_with_newlines_and_custom_formats(wb, row, col, data, format_dict, *args)
else:
self._write_with_newlines(wb, row, col, data, format_dict, *args)
elif isinstance(data, FormatList):
self._write_with_custom_formats(wb, row, col, data, format_dict, *args)
elif isinstance(data, dict):
self._write_dict_as_url(wb, row, col, data, format_dict, *args)
elif pd.isna(data):
self.write_blank(row, col, None, wb.add_format(format_dict))
else:
# Write handles all other write types dynamically
self.write(row, col, data, wb.add_format(format_dict), *args)
def _write_with_newlines_and_custom_formats(self, wb, row, col, data, format_dict, *args):
"""
Take list of FormatList (and str), join with newline characters and smart write
"""
data_with_newlines = []
first_element = data.copy()[0]
if isinstance(first_element, FormatList):
first_element = first_element.list
else:
first_element = [first_element]
data_with_newlines.extend(first_element)
for element in data[1:]:
if isinstance(element, FormatList):
element = element.list
element_stings = [item for item in element if isinstance(item, str)]
first_string = element_stings[0]
new_string = "\n" + first_string
element_with_newline = [new_string if item == first_string else item for item in element]
else:
element_with_newline = ["\n" + str(element)]
data_with_newlines.extend(element_with_newline)
self._write_with_custom_formats(
wb,
row,
col,
FormatList(data_with_newlines),
format_dict,
*args
)
def _write_with_newlines(self, wb, row, col, data, format_dict, *args):
"""
Take list of str, join with newline character and write
"""
data_string = "\n".join(data)
self.write(
row,
col,
data_string,
wb.add_format(format_dict),
*args
)
def _write_with_custom_formats(self, wb, row, col, data, format_dict, *args):
data_with_custom_formats = []
for item in data.list:
# Convert dicts to Format (with merge onto base format)
if isinstance(item, dict):
rich_format = format_dict.copy()
rich_format.update(item)
data_with_custom_formats.append(wb.add_format(rich_format))
else:
data_with_custom_formats.append(item)
data_with_all_formats = []
for n in range(len(data_with_custom_formats)-1):
data_with_all_formats.append(data_with_custom_formats[n])
if isinstance(data_with_custom_formats[n], str):
if isinstance(data_with_custom_formats[n+1], str):
data_with_all_formats.append(wb.add_format(format_dict))
data_with_all_formats.append(data_with_custom_formats[-1])
self.write_rich_string(
row,
col,
*data_with_all_formats,
wb.add_format(format_dict),
*args
)
def _write_dict_as_url(self, workbook, row, col, data, format_dict, *args):
url = list(data.values())[0]
display_text = list(data.keys())[0]
url_format = format_dict.copy()
url_format.update({"underline": True, "font_color": "blue"})
self.write_url(
row,
col,
url,
workbook.add_format(url_format),
display_text,
*args
)
@staticmethod
def _apply_format(format_table_slice, format_dict):
"""
Update all cells of a given dataframe slice with the format
dictionary. Handles dict, series or dataframes.
"""
if isinstance(format_table_slice, pd.Series):
(format_table_slice
.apply(lambda d: d.update(format_dict))
)
elif isinstance(format_table_slice, pd.DataFrame):
# Vectorised for 2D
(format_table_slice
.apply(np.vectorize(lambda d: d.update(format_dict)))
)
elif isinstance(format_table_slice, dict):
format_table_slice.update(format_dict)
def _set_column_widths(self, widths):
"""
Set the column widths using a list of widths.
"""
for col_number in range(len(widths)):
self.set_column(
col_number,
col_number,
widths[col_number]
)
def _calculate_column_widths(self, table, formats_table):
"""
Calculate Excel column widths using maximum length of strings
and the maximum font size in each column of the data table.
Parameters
----------
table : pd.DataFrame
data table to calculate widths from
formats_table: pd.DataFrame
formats table to retrieve font size from
Returns
-------
col_widths : list
width to apply to Excel columns
"""
cols = table.shape[1]
max_lengths = [
table.iloc[:, col].apply(self._longest_line_length).max()
for col in range(cols)
]
max_font_sizes = [
formats_table.iloc[:, col]
.apply(lambda x: x.get("font_size") or 10).max()
for col in range(cols)
]
col_widths = [
self._excel_string_width(l, f)
for l, f in zip(max_lengths, max_font_sizes)
]
return col_widths
@staticmethod
def _excel_string_width(string_len, font_size):
"""
Calculate the rough length of a string in Excel character units.
This crude estimate does not account for font name or other font format
(e.g. wrapping).
Parameters
----------
string_len : int
length of string to calculate width in Excel for
font_size : int
size of font
Returns
-------
excel_width : float
width of equivalent string in Excel
"""
if string_len == 0:
excel_width = 0
else:
excel_width = string_len * ((font_size * 0.12) - 0.09)
return excel_width
@staticmethod
def _longest_line_length(cell_val):
"""
Calculate the length of the longest line within a cell.
If the cell contains a string, the longest length between line breaks is returned.
If the cell contains a link formatted as [{display_text: link}], the longest length is calculated from the display text.
If the cell contains a list of strings, the length of the longest string in the list is returned.
Expects new lines to be marked with "\n", "\r\n" or new lines in multiline strings.
Parameters
----------
cell_val:
cell value
Returns
-------
max_length: int
the length of the longest line within the string
"""
split_strings = """
|\r\n|\n"""
if isinstance(cell_val, str):
return(max([len(line) for line in re.split(split_strings, cell_val)]))
elif isinstance(cell_val, list):
if isinstance(cell_val[0], dict):
# text with links are stored as {text: link}, extract key to calculate text length
return(max([len(line) for line in re.split(split_strings, list(cell_val[0])[0])]))
elif isinstance(cell_val[0], FormatList):
string = cell_val[0].string
return(max([len(line) for line in re.split(split_strings, string)]))
else:
return(max([len(line) for line in cell_val]))
else:
return(0)
[docs]class GPWorkbook(Workbook):
"""
Wrapper for and XlsxWriter Workbook object. The Worksheets class has been
replaced by an alternative with a method for writting GPTable objects.
"""
def __init__(self, filename=None, options={}):
super(GPWorkbook, self).__init__(filename=filename, options=options)
self.theme = None
self._annotations = None
# Set default theme
self.set_theme(gptheme)
def add_worksheet(self, name=None):
"""
Overwrite add_worksheet() to create a GPWorksheet object.
Parameters
----------
name : str (optional)
name of the the worksheet to be created
Returns
-------
worksheet : gptables.GPWorksheet
a worksheet object, which supports writing of GPTable objects
"""
worksheet = super(GPWorkbook, self).add_worksheet(name, GPWorksheet)
worksheet.theme = self.theme
worksheet._workbook = self # Create reference to wb, for formatting
worksheet.hide_gridlines(2)
return worksheet
def set_theme(self, theme):
"""
Sets the theme for all GPTable objects written to the Workbook.
Parameters
----------
theme : gptables.Theme
a Theme object containing the formatting to be applied to GPTable
objects written to Worksheets within this Workbook
Returns
-------
None
"""
if not isinstance(theme, Theme):
raise TypeError(f"`theme` must be a gptables.Theme object, not: {type(theme)}")
self.theme = theme
def _update_annotations(self, sheets):
ordered_refs = []
for gptable in sheets.values():
gptable._set_annotations()
ordered_refs.extend(gptable._annotations)
# remove duplicates from ordered_refs and assign to self._annotations
self._annotations = list(dict.fromkeys(ordered_refs))
def make_table_of_contents(
self,
sheets,
additional_elements = None,
column_names = None,
table_name = None,
title = None,
subtitles = None,
instructions = None,
):
"""
Generate table of contents from sheet and optional customisation parameters.
Parameters
----------
sheets : dict
mapping worksheet labels to gptables.GPTable objects
additional_elements : List[str], optional
additional GPTable elements to display in the contents table. Allowed
elements are "subtitles", "scope", "source" and "instructions".
column_names : List[str], optional
table of contents column names, defaults to
"Sheet name", "Table description"
table_name: str, optional
contents table name, defaults to "contents_table"
title : str, optional
table of contents title, defaults to "Table of contents"
subtitles: List[str], optional
list of subtitles as strings
instructions: str, optional
description of the page layout
defaults to "This worksheet contains one table."
Return
------
gpt.GPTable
"""
if column_names is None:
column_names = ["Sheet name", "Table description"]
if table_name is None:
table_name = "contents_table"
if title is None:
title = "Table of contents"
if instructions is None:
instructions = "This worksheet contains one table."
if additional_elements is not None:
valid_elements = ["subtitles", "scope", "source", "instructions"]
if not all(element in valid_elements for element in additional_elements):
msg = ("Cover `additional_elements` list can only contain"
"'subtitles', 'scope', 'source' and 'instructions'")
raise ValueError(msg)
contents_dict = {}
for label, gptable in sheets.items():
contents_entry = []
contents_entry.append(self._strip_annotation_references(gptable.title))
if additional_elements is not None:
for element in additional_elements:
content = getattr(gptable, element)
if element == "subtitles":
[contents_entry.append(self._strip_annotation_references(element)) for element in content]
else:
contents_entry.append(self._strip_annotation_references(content))
link = {label: f"internal:'{label}'!A1"}
contents_dict[label] = [link, contents_entry]
contents_table = pd.DataFrame.from_dict(contents_dict, orient="index").reset_index(drop=True)
contents_table.columns = column_names
return GPTable(
table=contents_table,
table_name=table_name,
title=title,
subtitles=subtitles,
instructions=instructions
)
@staticmethod
def _strip_annotation_references(text):
"""
Strip annotation references (as $$ $$) from a str or list text element.
"""
pattern = r"\$\$.*?\$\$"
if isinstance(text, str):
no_annotations = re.sub(pattern, "", text)
elif isinstance(text, FormatList):
no_annotations = FormatList([
re.sub(pattern, "", part)
if isinstance(part, str) else part
for part in text.list
])
elif isinstance(text, list): # TODO: this shouldn't get used - check and delete
no_annotations = [
re.sub(pattern, "", part)
if isinstance(part, str) else part
for part in text
]
return no_annotations
def make_notesheet(
self,
notes_table,
table_name = None,
title = None,
instructions = None,
):
"""
Generate notes table sheets from notes table and optional customisation parameters.
Parameters
----------
notes_table : pd.DataFrame
table with notes reference, text and (optional) link columns
table_name: str, optional
notes table name, defaults to "notes_table"
title : str, optional
notes page title, defaults to "Notes"
instructions: str, optional
description of the page layout
defaults to "This worksheet contains one table."
Return
------
gpt.GPTable
"""
# set defaults
if table_name is None:
table_name = "notes_table"
if title is None:
title = "Notes"
if instructions is None:
instructions = "This worksheet contains one table."
# order notes table by worksheet reference order
ordered_refs = self._annotations
order_df = pd.DataFrame({"order": ordered_refs})
notes = notes_table.copy()
notes = notes.rename(columns={notes.columns[0]: "order"})
ordered_notes = order_df.merge(notes, on="order", how="left")
unreferenced_notes = notes[~notes["order"].isin(ordered_notes["order"])]
if not unreferenced_notes.empty:
warnings.warn(f"The following notes are not referenced: {list(unreferenced_notes['order'])}")
ordered_notes = pd.concat([ordered_notes, unreferenced_notes])
# replace note references with note number
ordered_notes = (ordered_notes
.reset_index()
.rename(columns={"index": "Note number"})
.drop(columns=["order"])
)
# convert from python 0-indexing
ordered_notes["Note number"] = ordered_notes["Note number"] + 1
return GPTable(
table=ordered_notes,
table_name=table_name,
title=title,
instructions=instructions,
index_columns={}
)