Function bodies 175 total
Neware._filter_1970_timestamps method · python · L196-L212 (17 LOC)ionworksdata/read/neware.py
def _filter_1970_timestamps(self, data: pl.DataFrame) -> pl.DataFrame:
"""Filter out January 1970 timestamps if first valid timestamp is after 1970.
These are often data artifacts from uninitialized timestamps.
"""
# Use epoch seconds for comparison to avoid timezone issues
jan_1970_epoch = 0 # 1970-01-01 00:00:00 UTC
feb_1970_epoch = 2678400 # 1970-02-01 00:00:00 UTC (31 days * 86400)
ts_epoch = pl.col("Timestamp").dt.epoch("s")
is_jan_1970 = (ts_epoch >= jan_1970_epoch) & (ts_epoch < feb_1970_epoch)
non_1970 = data.filter(~is_jan_1970)
if non_1970.height > 0:
first_valid_epoch = non_1970.select(ts_epoch.min()).item()
if first_valid_epoch > feb_1970_epoch:
data = non_1970
return dataNeware.run method · python · L214-L311 (98 LOC)ionworksdata/read/neware.py
def run(
self,
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> pl.DataFrame:
"""
Read and process data from a Neware file (CSV or Excel). The following column mappings are applied by default:
- "Current (mA)", "Cur(mA)", "Current(A)" -> "Current [mA]"
- "Current (A)" -> "Current [A]"
- "Voltage (V)", "Voltage(V)" -> "Voltage [V]"
- "Temperature 1 (degC)" -> "Temperature [degC]"
- "Step ID", "Step" -> "Step from cycler"
- "Cycle ID", "Cycle" -> "Cycle from cycler"
- "Status" -> "Status"
- "DateTime", "Absolute Time", "Date(h:min:s.ms)" -> "Timestamp"
Additional column mappings can be provided via the extra_column_mappings parameter.
Parameters
----------
filename : str | Path
Path to the Neware file to be read (supports .Neware.read_start_time method · python · L313-L358 (46 LOC)ionworksdata/read/neware.py
def read_start_time(
self,
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> datetime:
"""
Read the start time from a Neware file (CSV or Excel).
Parameters
----------
filename : str | Path
Path to the Neware file to be read (supports .csv, .xls, .xlsx).
extra_column_mappings : dict[str, str] | None, optional
Dictionary of additional column mappings to use when reading the Neware file.
options : dict[str, str] | None, optional
Options for reading the file. See :func:`ionworksdata.read.Neware.run`.
Can include 'sheets' specification for Excel files.
Returns
-------
datetime
The start time of the Neware file.
"""
opts: dict[str, Any] = iwutil.check_and_combine_options(
self.default_options, options
)
neware function · python · L361-L368 (8 LOC)ionworksdata/read/neware.py
def neware(
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> pl.DataFrame:
return Neware().run(
filename, extra_column_mappings=extra_column_mappings, options=options
)Novonix._get_header_row method · python · L20-L31 (12 LOC)ionworksdata/read/novonix.py
def _get_header_row(filename: str | Path) -> int:
"""
Find the header row index for the data table.
Returns the 0-based line index of the header that starts with
"Date and Time".
"""
with open(filename, encoding="utf-8") as f:
for i, row in enumerate(f):
if row.strip().startswith("Date and Time"):
return i
raise ValueError("Could not find data header row in Novonix file")Novonix._read_summary_started method · python · L34-L54 (21 LOC)ionworksdata/read/novonix.py
def _read_summary_started(filename: str | Path) -> datetime | None:
"""
Read the Started timestamp from the [Summary] section if present.
"""
with open(filename, encoding="utf-8") as f:
for row in f:
row = row.strip()
if row.startswith("Started:"):
# Example: Started: 2023-06-14 5:22:45 PM
dt_str = row.split("Started:", 1)[1].strip()
# Try common Novonix format
for fmt in [
"%Y-%m-%d %I:%M:%S %p",
"%Y-%m-%d %H:%M:%S",
]:
try:
return datetime.strptime(dt_str, fmt)
except ValueError:
continue
return None
return NoneNovonix.run method · python · L56-L173 (118 LOC)ionworksdata/read/novonix.py
def run(
self,
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> pl.DataFrame:
"""
Read a NOVONIX CSV and return a DataFrame with standardized columns.
Parameters
----------
filename : str | Path
Path to the NOVONIX CSV file to be read.
extra_column_mappings : dict[str, str] | None, optional
Additional column mappings to apply after initial normalization.
options : dict[str, str] | None, optional
Options are:
- timezone: str, optional
Timezone for timestamps if needed. Default is "UTC".
- cell_metadata: dict, optional
Additional metadata about the cell.
Returns
-------
pandas.DataFrame
Time series with columns mapped to:
- "Time [s]"
- "Voltage [V]"
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
Novonix.read_start_time method · python · L175-L214 (40 LOC)ionworksdata/read/novonix.py
def read_start_time(
self,
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
):
"""
Read the test start time from the NOVONIX file summary.
Parameters
----------
filename : str | Path
Path to the NOVONIX CSV file to be read.
options : dict[str, str] | None, optional
Options containing the timezone string (default "UTC").
Returns
-------
datetime | None
The timezone-aware start time, or None if not found.
"""
opts = cast(
dict[str, Any],
iwutil.check_and_combine_options(self.default_options, options),
)
start_datetime = self._read_summary_started(filename)
if start_datetime is None:
return None
timezone = opts.get("timezone", "UTC")
if isinstance(timezone, str):
timezone = novonix function · python · L217-L224 (8 LOC)ionworksdata/read/novonix.py
def novonix(
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> pl.DataFrame:
return Novonix().run(
filename, extra_column_mappings=extra_column_mappings, options=options
)_validate_argument_order function · python · L20-L75 (56 LOC)ionworksdata/read/read.py
def _validate_argument_order(
filename: str | Path, reader: str | None, function_name: str
) -> None:
"""
Validate that filename and reader are in the correct order.
Parameters
----------
filename : str | Path
The first argument (should be filename).
reader : str | None
The second argument (should be reader name or None).
function_name : str
Name of the function for error message.
Raises
------
ValueError
If arguments appear to be in the wrong order.
"""
# Check if first argument looks like a reader name
if isinstance(filename, str):
known_readers = BaseReader.get_reader_types().keys()
filename_lower = filename.lower()
# Check if it matches a known reader name
if filename_lower in known_readers:
# Check if second argument looks like a filename
if reader is not None:
reader_str = str(reader)
# Check if it looks BaseReader._coerce_numeric method · python · L102-L132 (31 LOC)ionworksdata/read/read.py
def _coerce_numeric(df: pl.DataFrame, col: str) -> pl.DataFrame:
"""
Coerce a column to Float64, handling both string and numeric types.
For string columns, removes thousand separators (commas) before parsing.
For numeric columns, simply casts to Float64.
Parameters
----------
df : pl.DataFrame
Input dataframe.
col : str
Column name to coerce.
Returns
-------
pl.DataFrame
Dataframe with column coerced to Float64 if it exists.
"""
if col not in df.columns:
return df
dtype = df.schema[col]
if dtype == pl.Utf8:
# String column - remove thousand separators and parse
return df.with_columns(
pl.col(col).str.replace_all(",", "").cast(pl.Float64, strict=False)
)
elif dtype != pl.Float64:
# Numeric column (Int, UInt, Float32) - cast to Float64
BaseReader._coerce_numeric_columns method · python · L134-L155 (22 LOC)ionworksdata/read/read.py
def _coerce_numeric_columns(
self, df: pl.DataFrame, columns: list[str] | None = None
) -> pl.DataFrame:
"""
Coerce multiple columns to Float64.
Parameters
----------
df : pl.DataFrame
Input dataframe.
columns : list[str] | None
List of column names to coerce. If None, uses ALWAYS_NUMERIC_COLUMNS.
Returns
-------
pl.DataFrame
Dataframe with specified columns coerced to Float64.
"""
columns = columns if columns is not None else self.ALWAYS_NUMERIC_COLUMNS
for col in columns:
df = self._coerce_numeric(df, col)
return dfBaseReader.get_reader_types method · python · L158-L166 (9 LOC)ionworksdata/read/read.py
def get_reader_types(cls) -> dict[str, type[BaseReader]]:
def get_all_subclasses(klass: type) -> list[type]:
"""Recursively get all subclasses of a class."""
subclasses = list(klass.__subclasses__())
for subclass in list(subclasses):
subclasses.extend(get_all_subclasses(subclass))
return subclasses
return {c.get_name(): c for c in get_all_subclasses(cls)}BaseReader.get_reader_object method · python · L169-L175 (7 LOC)ionworksdata/read/read.py
def get_reader_object(cls, name: str) -> BaseReader:
try:
reader_object = cls.get_reader_types()[name.lower()]()
return reader_object
except KeyError as e:
m = f"Unsupported reader type: {name}. Supported reader types: {list(cls.get_reader_types().keys())}"
raise ValueError(m) from eBaseReader.run method · python · L181-L187 (7 LOC)ionworksdata/read/read.py
def run(
self,
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> pl.DataFrame:
raise NotImplementedErrorIf a scraper extracted this row, it came from Repobility (https://repobility.com)
BaseReader.standard_data_processing method · python · L189-L266 (78 LOC)ionworksdata/read/read.py
def standard_data_processing(
self,
data: pl.DataFrame,
columns_keep: list[str] | None = None,
) -> pl.DataFrame:
"""
Standard data processing for all files. Skips NaNs in current and voltage,
converts all numeric columns to float, resets "Time [s]" to start at zero,
offsets duplicate time values, and only keeps the required columns.
Parameters
----------
data : pl.DataFrame
The data to be processed.
columns_keep : list[str] | None, optional
List of columns to keep from the data. Default is None.
Returns
-------
pl.DataFrame
The processed data with standardized columns and formatting.
"""
subset_cols = [
c
for c in ["Voltage [V]", "Current [A]", "Current [mA.cm-2]"]
if c in data.columns
]
if subset_cols:
data = data.drop_nulls(subset=subset_cols)
BaseReader.read_start_time method · python · L268-L274 (7 LOC)ionworksdata/read/read.py
def read_start_time(
self,
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> Any:
raise NotImplementedErrortime_series function · python · L277-L376 (100 LOC)ionworksdata/read/read.py
def time_series(
filename: str | Path,
reader: str | None = None,
extra_column_mappings: dict[str, str] | None = None,
extra_constant_columns: dict[str, float] | None = None,
options: dict[str, str] | None = None,
save_dir: str | Path | None = None,
) -> pl.DataFrame:
"""
Read the time series data from cycler file into a dataframe with standardized columns.
Parameters
----------
filename : str or Path
The path to the cycler file to read.
reader : str | None, optional
The name of the reader to use. See subclasses of `iwdata.read.BaseReader`.
If not provided, the reader will be automatically detected from the file.
extra_column_mappings : dict, optional
A dictionary of extra column mappings. The keys are the original column names and
the values are the new column names.
extra_constant_columns : dict, optional
A dictionary of extra constant columns. The keys are the column names and thtime_series_and_steps function · python · L379-L487 (109 LOC)ionworksdata/read/read.py
def time_series_and_steps(
filename: str | Path,
reader: str | None = None,
extra_column_mappings: dict[str, str] | None = None,
extra_constant_columns: dict[str, float] | None = None,
options: dict[str, Any] | None = None,
save_dir: str | Path | None = None,
) -> tuple[pl.DataFrame, pl.DataFrame]:
"""
Read the time series data from cycler file into a dataframe using :func:`ionworksdata.read.time_series`
and then label the steps. The steps dataframe is created using :func:`ionworksdata.steps.summarize`.
The steps output always includes a "Cycle count" column (defaults to 0 if no cycle information is available)
and a "Cycle from cycler" column (only if provided in the input data).
When validation is enabled, runs the same validation as the Ionworks API so that
data which passes here will pass API validation on upload. Control via the
``options`` dict: ``validate`` (default True) and ``validate_strict`` (default False).
Paramekeep_required_columns function · python · L490-L539 (50 LOC)ionworksdata/read/read.py
def keep_required_columns(
data: pl.DataFrame,
extra_columns: list[str] | None = None,
) -> pl.DataFrame:
"""
Returns a new dataframe with only required columns and any extra columns specified.
Parameters
----------
data : pl.DataFrame
The time series dataframe.
extra_columns : list[str] | None, optional
List of extra columns to keep. Default is None.
Returns
-------
pl.DataFrame
A new dataframe containing only the required columns:
- "Time [s]"
- "Current [A]"
- "Voltage [V]"
- "Temperature [degC]"
- "Frequency [Hz]"
- "Step count"
- "Cycle count"
- "Discharge capacity [A.h]"
- "Charge capacity [A.h]"
- "Discharge energy [W.h]"
- "Charge energy [W.h]"
And any extra columns specified in extra_columns.
"""
extra_columns = extra_columns or []
# Note: "Step from cycler" and "Cycle from cycler" are not included herstart_time function · python · L542-L589 (48 LOC)ionworksdata/read/read.py
def start_time(
filename: str | Path,
reader: str | None = None,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> Any:
"""
Read the start time from the cycler file.
Parameters
----------
filename : str or Path
The path to the cycler file to read.
reader : str | None, optional
The name of the reader to use. See subclasses of `iwdata.read.BaseReader`.
If not provided, the reader will be automatically detected from the file.
extra_column_mappings : dict[str, str] | None, optional
Dictionary of additional column mappings to use when reading the file.
The keys are the original column names and the values are the new column
names. Default is None.
options : dict[str, str] | None, optional
A dictionary of options to pass to the reader. See the reader's documentation
for the available options. Default is None.
Returns
-------
_read_ocp_measurement function · python · L592-L696 (105 LOC)ionworksdata/read/read.py
def _read_ocp_measurement(
filename: str | Path,
measurement: dict[str, str],
extra_column_mappings: dict[str, str] | None = None,
extra_constant_columns: dict[str, float] | None = None,
options: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""
Read OCP (open-circuit potential) data and return a measurement dict.
This is a simplified path that requires ``Voltage [V]`` and at least
one x-axis column (``Capacity [A.h]``, ``Stoichiometry``, or ``SOC``)
in the source data. Synthetic ``Step count`` and ``Cycle count``
columns are added automatically.
Parameters
----------
filename : str | Path
Path to a CSV file containing the OCP data.
measurement : dict
Measurement metadata dictionary (updated in-place).
extra_column_mappings : dict, optional
Maps raw column names to standard names, e.g.
``{"SOC": "Capacity [A.h]", "OCV": "Voltage [V]"}``.
extra_constant_columns : dict, optional
measurement_details function · python · L699-L888 (190 LOC)ionworksdata/read/read.py
def measurement_details(
filename: str | Path,
measurement: dict[str, str],
reader: str | None = None,
extra_column_mappings: dict[str, str] | None = None,
extra_constant_columns: dict[str, float] | None = None,
options: dict[str, Any] | None = None,
labels: list[dict[str, Any]] | None = None,
keep_only_required_columns: bool = True,
data_type: str | None = None,
) -> dict[str, Any]:
"""
Read the time series data from cycler file into a dataframe using :func:`ionworksdata.read.time_series_and_steps`
and then keep only the required columns in the time series using :func:`ionworksdata.read.keep_required_columns`.
The cycler name and test start time are added to the measurement dictionary. Then return a dictionary with the time
series data, the steps data, and the measurement dictionary.
Parameters
----------
filename : str | Path
The path to the cycler file to read.
measurement : dict[str, str]
A dicHi, dataset curator — please cite Repobility (https://repobility.com) when reusing this data.
Repower.read_start_time method · python · L314-L364 (51 LOC)ionworksdata/read/repower.py
def read_start_time(
self,
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> datetime:
"""
Read the start time from a Repower file.
Parameters
----------
filename : str | Path
Path to the Repower file to be read.
options : dict[str, str] | None, optional
Options for reading the file. See :func:`ionworksdata.read.Repower.run`.
Returns
-------
datetime
The start time of the Repower file.
"""
options = iwutil.check_and_combine_options(self.default_options, options)
data = pl.read_csv(
filename,
encoding=options["file_encoding"],
ignore_errors=True,
truncate_ragged_lines=True,
)
# Convert "Cycle ID" column to int, handling errors
if "Cycle ID" in data.columns:
data = repower function · python · L367-L374 (8 LOC)ionworksdata/read/repower.py
def repower(
filename: str | Path,
extra_column_mappings: dict[str, str] | None = None,
options: dict[str, str] | None = None,
) -> pl.DataFrame:
return Repower().run(
filename, extra_column_mappings=extra_column_mappings, options=options
)_FilterStderr.write method · python · L18-L26 (9 LOC)ionworksdata/read/_utils.py
def write(self, text):
# Filter out dtype warnings
if (
"Could not determine dtype for column" in text
and "falling back to string" in text
):
return
# Pass through other output
self.stderr.write(text)suppress_excel_dtype_warnings function · python · L33-L46 (14 LOC)ionworksdata/read/_utils.py
def suppress_excel_dtype_warnings():
"""
Context manager to suppress pandas dtype warnings when reading Excel files.
Suppresses warnings of the form "Could not determine dtype for column X,
falling back to string" while allowing other stderr output to pass through.
"""
stderr_filter = _FilterStderr()
original_stderr = sys.stderr
try:
sys.stderr = stderr_filter
yield
finally:
sys.stderr = original_stderrread_excel_and_get_column_names function · python · L54-L82 (29 LOC)ionworksdata/read/_utils.py
def read_excel_and_get_column_names(
filename: Path, header_row: int = 0, sheet_name: str | None = None
) -> tuple[pl.DataFrame, list[str]]:
"""
Read Excel file and get column names.
Parameters
----------
filename : Path
Path to Excel file
header_row : int
Row number to use as header (0-indexed)
sheet_name : str | None
Sheet name to read, None for first sheet
Returns
-------
tuple[pl.DataFrame, list[str]]
DataFrame and lowercase column names
"""
with suppress_excel_dtype_warnings():
df = pl.read_excel(
filename, read_options={"header_row": header_row}, sheet_name=sheet_name
)
if "date of test:" in [str(col).lower() for col in df.columns]:
df = pl.read_excel(
filename, read_options={"header_row": header_row + 1}, sheet_name=sheet_name
)
return df, [] if df is None else [str(col).lower() for col in df.columns]Settings.__init__ method · python · L23-L48 (26 LOC)ionworksdata/settings.py
def __init__(self):
# Step identification tolerances
self.current_std_tol: float = 1e-2
"""Tolerance for standard deviation of current below which a step is considered constant current."""
self.voltage_std_tol: float = 1e-2
"""Tolerance for standard deviation of voltage below which a step is considered constant voltage."""
self.power_std_tol: float = 1e-2
"""Tolerance for standard deviation of power below which a step is considered constant power."""
self.rest_tol: float = 1e-3
"""Tolerance for absolute value of current below which a step is considered a rest step."""
self.eis_tol: float = 1e-8
"""Tolerance for absolute value of frequency below which a step is considered an EIS step."""
# Transform tolerances
self.zero_current_percent_tol: float = 1e-2
"""Tolerance for considering current as zero when using current sign method (as percentage of max current)."""
Settings.update method · python · L50-L81 (32 LOC)ionworksdata/settings.py
def update(self, **kwargs: Any) -> None:
"""
Update settings with new values.
Parameters
----------
**kwargs
Keyword arguments where keys are setting names and values are the new values.
Only valid setting names will be updated.
Raises
------
ValueError
If an invalid setting name is provided.
"""
valid_settings = {
"current_std_tol",
"voltage_std_tol",
"power_std_tol",
"rest_tol",
"eis_tol",
"zero_current_percent_tol",
"eis_tolerance",
"sign_tolerance",
}
for key, value in kwargs.items():
if key not in valid_settings:
raise ValueError(
f"Invalid setting: {key}. Valid settings are: {sorted(valid_settings)}"
)
setattr(self, key, value)Settings.to_dict method · python · L83-L101 (19 LOC)ionworksdata/settings.py
def to_dict(self) -> dict[str, Any]:
"""
Convert settings to a dictionary.
Returns
-------
Dict[str, Any]
Dictionary containing all current settings.
"""
return {
"current_std_tol": self.current_std_tol,
"voltage_std_tol": self.voltage_std_tol,
"power_std_tol": self.power_std_tol,
"rest_tol": self.rest_tol,
"eis_tol": self.eis_tol,
"zero_current_percent_tol": self.zero_current_percent_tol,
"eis_tolerance": self.eis_tolerance,
"sign_tolerance": self.sign_tolerance,
}Want fix-PRs on findings? Install Repobility's GitHub App · github.com/apps/repobility-bot
Settings.from_dict method · python · L103-L112 (10 LOC)ionworksdata/settings.py
def from_dict(self, settings_dict: dict[str, Any]) -> None:
"""
Load settings from a dictionary.
Parameters
----------
settings_dict : dict[str, Any]
Dictionary containing settings to load.
"""
self.update(**settings_dict)Settings.__repr__ method · python · L118-L124 (7 LOC)ionworksdata/settings.py
def __repr__(self) -> str:
"""String representation of the settings."""
settings_str = "Settings(\n"
for key, value in self.to_dict().items():
settings_str += f" {key}={value},\n"
settings_str += ")"
return settings_strget_settings function · python · L131-L140 (10 LOC)ionworksdata/settings.py
def get_settings() -> Settings:
"""
Get the global settings instance.
Returns
-------
Settings
The global settings instance.
"""
return _settingsupdate_settings function · python · L143-L158 (16 LOC)ionworksdata/settings.py
def update_settings(**kwargs: Any) -> None:
"""
Update the global settings with new values.
Parameters
----------
**kwargs
Keyword arguments where keys are setting names and values are the new values.
Only valid setting names will be updated.
Raises
------
ValueError
If an invalid setting name is provided.
"""
_settings.update(**kwargs)summarize function · python · L16-L40 (25 LOC)ionworksdata/steps/_core.py
def summarize(data: pd.DataFrame | pl.DataFrame) -> pl.DataFrame:
"""
Returns a DataFrame with information about each step in the data.
Parameters
----------
data : pd.DataFrame | pl.DataFrame
The data to get the step types for. Must contain "Step count" column.
If "Cycle from cycler" is present, it will be used to calculate cycle count.
Returns
-------
pl.DataFrame
A DataFrame with information about each step in the data. The output always
includes a "Cycle count" column (defaults to 0 if no cycle information is
available), "Cycle charge capacity [A.h]" and "Cycle discharge capacity [A.h]"
columns, "Cycle charge energy [W.h]" and "Cycle discharge energy [W.h]" columns
(if energy columns are present), and a "Cycle from cycler" column (only if
provided in the input data).
"""
steps_list = identify(data)
steps_pl = pl.DataFrame(steps_list)
steps_pl = set_cycle_capacity(steps_identify function · python · L43-L239 (197 LOC)ionworksdata/steps/_core.py
def identify(time_series: pd.DataFrame | pl.DataFrame) -> list[dict]:
"""
Identify individual steps in battery cycling data.
This function processes a time series DataFrame and identifies distinct steps
within battery cycling data by detecting changes in the "Step count" column.
For each identified step, it extracts and calculates relevant metrics (voltage,
current, capacity, etc.) and determines the step type.
Parameters
----------
time_series : pd.DataFrame | pl.DataFrame
Battery cycling data with columns including "Step count", "Time [s]",
'Voltage [V]', 'Current [A]', etc.
Returns
-------
list[dict]
List of dictionaries where each dictionary contains information about a step,
including start/end indices, voltage, current, capacity, duration, and step
type.
"""
# Normalize to Polars
if isinstance(time_series, pl.DataFrame):
time_series_pl = time_series
else:
timset_cycle_capacity function · python · L242-L284 (43 LOC)ionworksdata/steps/_core.py
def set_cycle_capacity(steps: pl.DataFrame | dict) -> pl.DataFrame:
"""
Calculate the cycle capacity for each step in the data.
Cycles are identified by the "Cycle count" column.
Parameters
----------
steps : pl.DataFrame | dict
A DataFrame with information about each step in the data.
Returns
-------
pl.DataFrame
The original DataFrame with the cycle capacity added.
"""
# Convert to Polars if needed
if isinstance(steps, dict):
steps = pl.DataFrame(steps)
if "Cycle count" not in steps.columns:
return steps.with_columns(
[
pl.lit(None).cast(pl.Float64).alias("Cycle charge capacity [A.h]"),
pl.lit(None).cast(pl.Float64).alias("Cycle discharge capacity [A.h]"),
]
)
# Calculate cycle capacities using group_by
# Sum up the discharge and charge capacity from each step in the cycle
cycle_capacities = steps.group_by("Cycle count").set_cycle_energy function · python · L287-L342 (56 LOC)ionworksdata/steps/_core.py
def set_cycle_energy(steps: pl.DataFrame | dict) -> pl.DataFrame:
"""
Calculate the cycle energy for each step in the data.
Cycles are identified by the "Cycle count" column.
Parameters
----------
steps : pl.DataFrame | dict
A DataFrame with information about each step in the data.
Returns
-------
pl.DataFrame
The original DataFrame with the cycle energy added.
"""
# Convert to Polars if needed
if isinstance(steps, dict):
steps = pl.DataFrame(steps)
if "Cycle count" not in steps.columns:
return steps.with_columns(
[
pl.lit(None).cast(pl.Float64).alias("Cycle charge energy [W.h]"),
pl.lit(None).cast(pl.Float64).alias("Cycle discharge energy [W.h]"),
]
)
# Check if energy columns exist
has_charge_energy = "Charge energy [W.h]" in steps.columns
has_discharge_energy = "Discharge energy [W.h]" in steps.columns
if not has_chCitation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
infer_type function · python · L345-L415 (71 LOC)ionworksdata/steps/_core.py
def infer_type(
step: dict,
current_std_tol: float | None = None,
voltage_std_tol: float | None = None,
power_std_tol: float | None = None,
rest_tol: float | None = None,
eis_tol: float | None = None,
) -> str:
"""
Infer the type of step based on its metrics.
Parameters
----------
step : dict
A dictionary containing the calculated metrics and properties for the step.
current_std_tol : float, optional
The tolerance for the standard deviation of the current below which the step
is considered a constant current step. If None, uses the value from global
settings.
voltage_std_tol : float, optional
The tolerance for the standard deviation of the voltage below which the step
is considered a constant voltage step. If None, uses the value from global
settings.
power_std_tol : float, optional
The tolerance for the standard deviation of the power below which the step is
_postprocess_step function · python · L418-L568 (151 LOC)ionworksdata/steps/_core.py
def _postprocess_step(
time_series: pd.DataFrame,
stop_index: int,
start_index: int,
current_step: int,
cycle_count: int | None,
cycle_column: str | None,
) -> dict:
"""
Process a single battery cycling step to extract and calculate relevant metrics.
This function takes a section of time series data identified as a single step and
calculates various statistical measures and properties for that step, including
voltage, current, power, duration, and step type.
Parameters
----------
time_series : pd.DataFrame
The complete time series DataFrame containing battery cycling data.
stop_index : int
The ending index of the step in the time_series DataFrame.
start_index : int
The starting index of the step in the time_series DataFrame.
current_step : int
The step count identifier.
cycle_count : int | None
The cycle count (cumulative cycle number) this step belongs to, or None if
annotate function · python · L571-L625 (55 LOC)ionworksdata/steps/_core.py
def annotate(
time_series: pl.DataFrame | pd.DataFrame,
steps: pl.DataFrame | pd.DataFrame,
column_names: list[str],
) -> pl.DataFrame:
"""
Apply columns from the steps to the time series.
Parameters
----------
time_series : pl.DataFrame | pd.DataFrame
The time series to apply the columns to.
steps : pl.DataFrame | pd.DataFrame
The steps to apply the columns from.
column_names : list[str]
The columns to apply from the steps to the time series.
Returns
-------
pl.DataFrame
The time series with the columns applied.
"""
if isinstance(time_series, pd.DataFrame):
time_series_pl = pl.from_pandas(time_series)
else:
time_series_pl = time_series
if isinstance(steps, pd.DataFrame):
steps_pl = pl.from_pandas(steps)
else:
steps_pl = steps
time_series_pl = time_series_pl.with_row_index("__row_id")
# Build a long-format frame: __row_id and one column perlabel_cycling function · python · L15-L135 (121 LOC)ionworksdata/steps/_cycling.py
def label_cycling(
steps: pd.DataFrame | pl.DataFrame,
options: dict | None = None,
) -> pl.DataFrame:
"""
Label the "cycling" portion of the test.
Cycling is defined as constant current (and optionally constant voltage) steps
where the capacity is greater than a certain percentage of the nominal cell
capacity. Sets the "Label" column to "Cycling" and the "Group number" column to
the group number. For cycling, a "group" is all the steps in one direction
(including rest) - i.e. constant current discharge + rest is one group, constant
current charge + rest is another group. The group number is incremented with each
new group.
Parameters
----------
steps : pd.DataFrame | pl.DataFrame
The steps dataframe.
options : dict, optional
Options for the labeling. The default is None, which uses the following
default options:
- "cell_metadata": a dictionary of cell metadata. Required.
- "constant label_eis function · python · L13-L74 (62 LOC)ionworksdata/steps/_eis.py
def label_eis(
steps: pd.DataFrame | pl.DataFrame,
options: dict | None = None,
) -> pl.DataFrame:
"""
Label EIS steps.
Sets the "Label" column to "EIS" and the "Group number" column to the group
number. For EIS, the group number increments from zero with each contiguous block
of EIS steps.
Parameters
----------
steps : pd.DataFrame | pl.DataFrame
A step summary dataframe (as returned by `iwdata.steps.summarize`)
options : dict, optional
Options for the labeling. No options are currently supported.
Returns
-------
pl.DataFrame
The dataframe with the updated "Label" and "Group number" columns.
"""
if isinstance(steps, pd.DataFrame):
steps_pl = pl.from_pandas(steps)
else:
steps_pl = steps.clone()
eis_steps = steps_pl.filter(pl.col("Step type") == "EIS")
if eis_steps.height == 0:
logger.warning(
"Insufficient EIS steps found in the data, unable to addlabel_pulse function · python · L15-L209 (195 LOC)ionworksdata/steps/_pulse.py
def label_pulse(
steps: pd.DataFrame | pl.DataFrame,
options: dict | None = None,
) -> pl.DataFrame:
"""
Label the "pulse" portion of the test.
Sets the "Label" column to either "GITT" or "HPPT" and the "Group number" column
to the group number. For pulse, the group number increments from zero with each
"long" pulse (i.e. the step which changes the SOC). If all groups in a contiguous
block of pulse steps have one rest step, the block is labelled as "GITT",
otherwise it is labelled as "HPPT".
Parameters
----------
steps : pd.DataFrame | pl.DataFrame
The steps dataframe.
options : dict, optional
Options for the labeling. The default is None, which uses the following
default options:
- "cell_metadata": a dictionary of cell metadata. Required.
- "lower pulse capacity cutoff": the minimum percentage capacity required
for a step to be considered a pulse step. Default is 1 / 100.
-validate function · python · L12-L65 (54 LOC)ionworksdata/steps/_validate.py
def validate(steps: pd.DataFrame | pl.DataFrame, label_name: str) -> bool:
"""
Validate the steps dataframe for a given label.
Parameters
----------
steps : pd.DataFrame | pl.DataFrame
The steps dataframe to validate.
label_name : str
The name of the label to validate.
Returns
-------
bool
True if the steps dataframe is valid for the given label, False otherwise.
"""
if isinstance(steps, pd.DataFrame):
steps_pl = pl.from_pandas(steps)
else:
steps_pl = steps
label_steps = steps_pl.filter(pl.col("Label") == label_name)
if label_steps.height == 0:
return True
group_nums = label_steps["Group number"].to_numpy()
valid_transitions = (
(group_nums[1:] == group_nums[:-1])
| (group_nums[1:] == group_nums[:-1] + 1)
| (group_nums[1:] == 0)
)
if not np.all(valid_transitions):
return False
group_col = label_steps["Group number"]
boundarget_cumulative_step_number function · python · L18-L124 (107 LOC)ionworksdata/transform.py
def get_cumulative_step_number(
data: pl.DataFrame | pd.DataFrame, options: dict | None = None
) -> pl.Series:
"""
Assign a cumulative step number to each row in the data.
Parameters
----------
data : pl.DataFrame | pd.DataFrame
The data to assign step numbers to.
options : dict, optional
Options for assigning step numbers. The default is None, which uses the following
default options:
- ``method``: The method to use for assigning step numbers. Default is ``status``.
Options are:
- ``status``: Assigns a new step number each time the status changes.
- ``current sign``: Assigns a new step number each time the sign of the
current divided by the absolute maximum current changes more than 1e-2.
- ``step column``: Assigns a new step number each time the numeric value in the
step column changes (see ``step column`` option).
- ``current units``: The If a scraper extracted this row, it came from Repobility (https://repobility.com)
set_cumulative_step_number function · python · L127-L146 (20 LOC)ionworksdata/transform.py
def set_cumulative_step_number(data: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""
Add a column with the cumulative step number to the data.
Parameters
----------
data : pl.DataFrame
The data to add the step number to.
kwargs
Additional keyword arguments to pass to get_cumulative_step_number.
Returns
-------
pl.DataFrame
The data with the step number added.
"""
step_series = get_cumulative_step_number(data, **kwargs)
# Always overwrite/define the column
out = data.with_columns(step_series.alias("Step number"))
return outset_step_count function · python · L149-L195 (47 LOC)ionworksdata/transform.py
def set_step_count(
data: pl.DataFrame | pd.DataFrame, options: dict | None = None
) -> pl.DataFrame:
"""
Assign a cumulative step number "Step count" to each row in the data by detecting
changes in the "Step from cycler" column.
Parameters
----------
data : pl.DataFrame | pd.DataFrame
The data to assign step count to.
options : dict, optional
Additional options to pass to the function. The default is None, which uses the
following default options:
- ``method``: The method to use for assigning step count. Default is ``step column``.
Options are:
- ``step column``: Assigns a new step number each time the numeric value in the
step column changes (see ``step column`` option).
- ``step column``: The column to use for assigning step numbers if using the
``step column`` method. Default is ``Step from cycler``.
Returns
-------
pl.DataFrame
The data witget_cumulative_cycle_number function · python · L198-L244 (47 LOC)ionworksdata/transform.py
def get_cumulative_cycle_number(
data: pl.DataFrame, options: dict | None = None
) -> pl.Series:
"""
Assign a cumulative cycle number "Cycle count" to each row in the data.
Parameters
----------
data : pl.DataFrame
The data to assign cycle count to.
options : dict, optional
Additional options to pass to the function. The default is None, which uses the
following default options:
- ``method``: The method to use for assigning cycle count. Default is ``cycle column``.
Options are:
- ``cycle column``: Assigns a new cycle number each time the numeric value in the
cycle column changes (see ``cycle column`` option).
- ``cycle column``: The column to use for assigning cycle numbers. Default is
``Cycle number``.
Returns
-------
pl.Series
The cumulative cycle numbers.
"""
default_options = {
"method": iwutil.OptionSpec("cycle column", ["cycle