← back to ionworks__ionworksdata

Function bodies 175 total

All specs Real LLM only Function bodies
Neware._filter_1970_timestamps method · python · L196-L212 (17 LOC)
ionworksdata/read/neware.py
    def _filter_1970_timestamps(self, data: pl.DataFrame) -> pl.DataFrame:
        """Filter out January 1970 timestamps if first valid timestamp is after 1970.

        These are often data artifacts from uninitialized timestamps.
        """
        # Use epoch seconds for comparison to avoid timezone issues
        jan_1970_epoch = 0  # 1970-01-01 00:00:00 UTC
        feb_1970_epoch = 2678400  # 1970-02-01 00:00:00 UTC (31 days * 86400)

        ts_epoch = pl.col("Timestamp").dt.epoch("s")
        is_jan_1970 = (ts_epoch >= jan_1970_epoch) & (ts_epoch < feb_1970_epoch)
        non_1970 = data.filter(~is_jan_1970)
        if non_1970.height > 0:
            first_valid_epoch = non_1970.select(ts_epoch.min()).item()
            if first_valid_epoch > feb_1970_epoch:
                data = non_1970
        return data
Neware.run method · python · L214-L311 (98 LOC)
ionworksdata/read/neware.py
    def run(
        self,
        filename: str | Path,
        extra_column_mappings: dict[str, str] | None = None,
        options: dict[str, str] | None = None,
    ) -> pl.DataFrame:
        """
        Read and process data from a Neware file (CSV or Excel). The following column mappings are applied by default:

            - "Current (mA)", "Cur(mA)", "Current(A)" -> "Current [mA]"
            - "Current (A)" -> "Current [A]"
            - "Voltage (V)", "Voltage(V)" -> "Voltage [V]"
            - "Temperature 1 (degC)" -> "Temperature [degC]"
            - "Step ID", "Step" -> "Step from cycler"
            - "Cycle ID", "Cycle" -> "Cycle from cycler"
            - "Status" -> "Status"
            - "DateTime", "Absolute Time", "Date(h:min:s.ms)" -> "Timestamp"

        Additional column mappings can be provided via the extra_column_mappings parameter.

        Parameters
        ----------
        filename : str | Path
            Path to the Neware file to be read (supports .
Neware.read_start_time method · python · L313-L358 (46 LOC)
ionworksdata/read/neware.py
    def read_start_time(
        self,
        filename: str | Path,
        extra_column_mappings: dict[str, str] | None = None,
        options: dict[str, str] | None = None,
    ) -> datetime:
        """
        Read the start time from a Neware file (CSV or Excel).

        Parameters
        ----------
        filename : str | Path
            Path to the Neware file to be read (supports .csv, .xls, .xlsx).
        extra_column_mappings : dict[str, str] | None, optional
            Dictionary of additional column mappings to use when reading the Neware file.
        options : dict[str, str] | None, optional
            Options for reading the file. See :func:`ionworksdata.read.Neware.run`.
            Can include 'sheets' specification for Excel files.

        Returns
        -------
        datetime
            The start time of the Neware file.
        """
        opts: dict[str, Any] = iwutil.check_and_combine_options(
            self.default_options, options
        )

    
neware function · python · L361-L368 (8 LOC)
ionworksdata/read/neware.py
def neware(
    filename: str | Path,
    extra_column_mappings: dict[str, str] | None = None,
    options: dict[str, str] | None = None,
) -> pl.DataFrame:
    return Neware().run(
        filename, extra_column_mappings=extra_column_mappings, options=options
    )
Novonix._get_header_row method · python · L20-L31 (12 LOC)
ionworksdata/read/novonix.py
    def _get_header_row(filename: str | Path) -> int:
        """
        Find the header row index for the data table.

        Returns the 0-based line index of the header that starts with
        "Date and Time".
        """
        with open(filename, encoding="utf-8") as f:
            for i, row in enumerate(f):
                if row.strip().startswith("Date and Time"):
                    return i
        raise ValueError("Could not find data header row in Novonix file")
Novonix._read_summary_started method · python · L34-L54 (21 LOC)
ionworksdata/read/novonix.py
    def _read_summary_started(filename: str | Path) -> datetime | None:
        """
        Read the Started timestamp from the [Summary] section if present.
        """
        with open(filename, encoding="utf-8") as f:
            for row in f:
                row = row.strip()
                if row.startswith("Started:"):
                    # Example: Started: 2023-06-14 5:22:45 PM
                    dt_str = row.split("Started:", 1)[1].strip()
                    # Try common Novonix format
                    for fmt in [
                        "%Y-%m-%d %I:%M:%S %p",
                        "%Y-%m-%d %H:%M:%S",
                    ]:
                        try:
                            return datetime.strptime(dt_str, fmt)
                        except ValueError:
                            continue
                    return None
        return None
Novonix.run method · python · L56-L173 (118 LOC)
ionworksdata/read/novonix.py
    def run(
        self,
        filename: str | Path,
        extra_column_mappings: dict[str, str] | None = None,
        options: dict[str, str] | None = None,
    ) -> pl.DataFrame:
        """
        Read a NOVONIX CSV and return a DataFrame with standardized columns.

        Parameters
        ----------
        filename : str | Path
            Path to the NOVONIX CSV file to be read.
        extra_column_mappings : dict[str, str] | None, optional
            Additional column mappings to apply after initial normalization.
        options : dict[str, str] | None, optional
            Options are:

                - timezone: str, optional
                    Timezone for timestamps if needed. Default is "UTC".
                - cell_metadata: dict, optional
                    Additional metadata about the cell.

        Returns
        -------
        pandas.DataFrame
            Time series with columns mapped to:
            - "Time [s]"
            - "Voltage [V]"
      
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
Novonix.read_start_time method · python · L175-L214 (40 LOC)
ionworksdata/read/novonix.py
    def read_start_time(
        self,
        filename: str | Path,
        extra_column_mappings: dict[str, str] | None = None,
        options: dict[str, str] | None = None,
    ):
        """
        Read the test start time from the NOVONIX file summary.

        Parameters
        ----------
        filename : str | Path
            Path to the NOVONIX CSV file to be read.
        options : dict[str, str] | None, optional
            Options containing the timezone string (default "UTC").

        Returns
        -------
        datetime | None
            The timezone-aware start time, or None if not found.
        """
        opts = cast(
            dict[str, Any],
            iwutil.check_and_combine_options(self.default_options, options),
        )
        start_datetime = self._read_summary_started(filename)
        if start_datetime is None:
            return None

        timezone = opts.get("timezone", "UTC")
        if isinstance(timezone, str):
            timezone = 
novonix function · python · L217-L224 (8 LOC)
ionworksdata/read/novonix.py
def novonix(
    filename: str | Path,
    extra_column_mappings: dict[str, str] | None = None,
    options: dict[str, str] | None = None,
) -> pl.DataFrame:
    return Novonix().run(
        filename, extra_column_mappings=extra_column_mappings, options=options
    )
_validate_argument_order function · python · L20-L75 (56 LOC)
ionworksdata/read/read.py
def _validate_argument_order(
    filename: str | Path, reader: str | None, function_name: str
) -> None:
    """
    Validate that filename and reader are in the correct order.

    Parameters
    ----------
    filename : str | Path
        The first argument (should be filename).
    reader : str | None
        The second argument (should be reader name or None).
    function_name : str
        Name of the function for error message.

    Raises
    ------
    ValueError
        If arguments appear to be in the wrong order.
    """
    # Check if first argument looks like a reader name
    if isinstance(filename, str):
        known_readers = BaseReader.get_reader_types().keys()
        filename_lower = filename.lower()
        # Check if it matches a known reader name
        if filename_lower in known_readers:
            # Check if second argument looks like a filename
            if reader is not None:
                reader_str = str(reader)
                # Check if it looks 
BaseReader._coerce_numeric method · python · L102-L132 (31 LOC)
ionworksdata/read/read.py
    def _coerce_numeric(df: pl.DataFrame, col: str) -> pl.DataFrame:
        """
        Coerce a column to Float64, handling both string and numeric types.

        For string columns, removes thousand separators (commas) before parsing.
        For numeric columns, simply casts to Float64.

        Parameters
        ----------
        df : pl.DataFrame
            Input dataframe.
        col : str
            Column name to coerce.

        Returns
        -------
        pl.DataFrame
            Dataframe with column coerced to Float64 if it exists.
        """
        if col not in df.columns:
            return df
        dtype = df.schema[col]
        if dtype == pl.Utf8:
            # String column - remove thousand separators and parse
            return df.with_columns(
                pl.col(col).str.replace_all(",", "").cast(pl.Float64, strict=False)
            )
        elif dtype != pl.Float64:
            # Numeric column (Int, UInt, Float32) - cast to Float64
        
BaseReader._coerce_numeric_columns method · python · L134-L155 (22 LOC)
ionworksdata/read/read.py
    def _coerce_numeric_columns(
        self, df: pl.DataFrame, columns: list[str] | None = None
    ) -> pl.DataFrame:
        """
        Coerce multiple columns to Float64.

        Parameters
        ----------
        df : pl.DataFrame
            Input dataframe.
        columns : list[str] | None
            List of column names to coerce. If None, uses ALWAYS_NUMERIC_COLUMNS.

        Returns
        -------
        pl.DataFrame
            Dataframe with specified columns coerced to Float64.
        """
        columns = columns if columns is not None else self.ALWAYS_NUMERIC_COLUMNS
        for col in columns:
            df = self._coerce_numeric(df, col)
        return df
BaseReader.get_reader_types method · python · L158-L166 (9 LOC)
ionworksdata/read/read.py
    def get_reader_types(cls) -> dict[str, type[BaseReader]]:
        def get_all_subclasses(klass: type) -> list[type]:
            """Recursively get all subclasses of a class."""
            subclasses = list(klass.__subclasses__())
            for subclass in list(subclasses):
                subclasses.extend(get_all_subclasses(subclass))
            return subclasses

        return {c.get_name(): c for c in get_all_subclasses(cls)}
BaseReader.get_reader_object method · python · L169-L175 (7 LOC)
ionworksdata/read/read.py
    def get_reader_object(cls, name: str) -> BaseReader:
        try:
            reader_object = cls.get_reader_types()[name.lower()]()
            return reader_object
        except KeyError as e:
            m = f"Unsupported reader type: {name}. Supported reader types: {list(cls.get_reader_types().keys())}"
            raise ValueError(m) from e
BaseReader.run method · python · L181-L187 (7 LOC)
ionworksdata/read/read.py
    def run(
        self,
        filename: str | Path,
        extra_column_mappings: dict[str, str] | None = None,
        options: dict[str, str] | None = None,
    ) -> pl.DataFrame:
        raise NotImplementedError
If a scraper extracted this row, it came from Repobility (https://repobility.com)
BaseReader.standard_data_processing method · python · L189-L266 (78 LOC)
ionworksdata/read/read.py
    def standard_data_processing(
        self,
        data: pl.DataFrame,
        columns_keep: list[str] | None = None,
    ) -> pl.DataFrame:
        """
        Standard data processing for all files. Skips NaNs in current and voltage,
        converts all numeric columns to float, resets "Time [s]" to start at zero,
        offsets duplicate time values, and only keeps the required columns.

        Parameters
        ----------
        data : pl.DataFrame
            The data to be processed.
        columns_keep : list[str] | None, optional
            List of columns to keep from the data. Default is None.

        Returns
        -------
        pl.DataFrame
            The processed data with standardized columns and formatting.
        """
        subset_cols = [
            c
            for c in ["Voltage [V]", "Current [A]", "Current [mA.cm-2]"]
            if c in data.columns
        ]
        if subset_cols:
            data = data.drop_nulls(subset=subset_cols)

    
BaseReader.read_start_time method · python · L268-L274 (7 LOC)
ionworksdata/read/read.py
    def read_start_time(
        self,
        filename: str | Path,
        extra_column_mappings: dict[str, str] | None = None,
        options: dict[str, str] | None = None,
    ) -> Any:
        raise NotImplementedError
time_series function · python · L277-L376 (100 LOC)
ionworksdata/read/read.py
def time_series(
    filename: str | Path,
    reader: str | None = None,
    extra_column_mappings: dict[str, str] | None = None,
    extra_constant_columns: dict[str, float] | None = None,
    options: dict[str, str] | None = None,
    save_dir: str | Path | None = None,
) -> pl.DataFrame:
    """
    Read the time series data from cycler file into a dataframe with standardized columns.

    Parameters
    ----------
    filename : str or Path
        The path to the cycler file to read.
    reader : str | None, optional
        The name of the reader to use. See subclasses of `iwdata.read.BaseReader`.
        If not provided, the reader will be automatically detected from the file.
    extra_column_mappings : dict, optional
        A dictionary of extra column mappings. The keys are the original column names and
        the values are the new column names.
    extra_constant_columns : dict, optional
        A dictionary of extra constant columns. The keys are the column names and th
time_series_and_steps function · python · L379-L487 (109 LOC)
ionworksdata/read/read.py
def time_series_and_steps(
    filename: str | Path,
    reader: str | None = None,
    extra_column_mappings: dict[str, str] | None = None,
    extra_constant_columns: dict[str, float] | None = None,
    options: dict[str, Any] | None = None,
    save_dir: str | Path | None = None,
) -> tuple[pl.DataFrame, pl.DataFrame]:
    """
    Read the time series data from cycler file into a dataframe using :func:`ionworksdata.read.time_series`
    and then label the steps. The steps dataframe is created using :func:`ionworksdata.steps.summarize`.
    The steps output always includes a "Cycle count" column (defaults to 0 if no cycle information is available)
    and a "Cycle from cycler" column (only if provided in the input data).

    When validation is enabled, runs the same validation as the Ionworks API so that
    data which passes here will pass API validation on upload. Control via the
    ``options`` dict: ``validate`` (default True) and ``validate_strict`` (default False).

    Parame
keep_required_columns function · python · L490-L539 (50 LOC)
ionworksdata/read/read.py
def keep_required_columns(
    data: pl.DataFrame,
    extra_columns: list[str] | None = None,
) -> pl.DataFrame:
    """
    Returns a new dataframe with only required columns and any extra columns specified.

    Parameters
    ----------
    data : pl.DataFrame
        The time series dataframe.
    extra_columns : list[str] | None, optional
        List of extra columns to keep. Default is None.

    Returns
    -------
    pl.DataFrame
        A new dataframe containing only the required columns:
        - "Time [s]"
        - "Current [A]"
        - "Voltage [V]"
        - "Temperature [degC]"
        - "Frequency [Hz]"
        - "Step count"
        - "Cycle count"
        - "Discharge capacity [A.h]"
        - "Charge capacity [A.h]"
        - "Discharge energy [W.h]"
        - "Charge energy [W.h]"
        And any extra columns specified in extra_columns.
    """
    extra_columns = extra_columns or []
    # Note: "Step from cycler" and "Cycle from cycler" are not included her
start_time function · python · L542-L589 (48 LOC)
ionworksdata/read/read.py
def start_time(
    filename: str | Path,
    reader: str | None = None,
    extra_column_mappings: dict[str, str] | None = None,
    options: dict[str, str] | None = None,
) -> Any:
    """
    Read the start time from the cycler file.

    Parameters
    ----------
    filename : str or Path
        The path to the cycler file to read.
    reader : str | None, optional
        The name of the reader to use. See subclasses of `iwdata.read.BaseReader`.
        If not provided, the reader will be automatically detected from the file.
    extra_column_mappings : dict[str, str] | None, optional
        Dictionary of additional column mappings to use when reading the file.
        The keys are the original column names and the values are the new column
        names. Default is None.
    options : dict[str, str] | None, optional
        A dictionary of options to pass to the reader. See the reader's documentation
        for the available options. Default is None.

    Returns
    -------
_read_ocp_measurement function · python · L592-L696 (105 LOC)
ionworksdata/read/read.py
def _read_ocp_measurement(
    filename: str | Path,
    measurement: dict[str, str],
    extra_column_mappings: dict[str, str] | None = None,
    extra_constant_columns: dict[str, float] | None = None,
    options: dict[str, Any] | None = None,
) -> dict[str, Any]:
    """
    Read OCP (open-circuit potential) data and return a measurement dict.

    This is a simplified path that requires ``Voltage [V]`` and at least
    one x-axis column (``Capacity [A.h]``, ``Stoichiometry``, or ``SOC``)
    in the source data.  Synthetic ``Step count`` and ``Cycle count``
    columns are added automatically.

    Parameters
    ----------
    filename : str | Path
        Path to a CSV file containing the OCP data.
    measurement : dict
        Measurement metadata dictionary (updated in-place).
    extra_column_mappings : dict, optional
        Maps raw column names to standard names, e.g.
        ``{"SOC": "Capacity [A.h]", "OCV": "Voltage [V]"}``.
    extra_constant_columns : dict, optional
  
measurement_details function · python · L699-L888 (190 LOC)
ionworksdata/read/read.py
def measurement_details(
    filename: str | Path,
    measurement: dict[str, str],
    reader: str | None = None,
    extra_column_mappings: dict[str, str] | None = None,
    extra_constant_columns: dict[str, float] | None = None,
    options: dict[str, Any] | None = None,
    labels: list[dict[str, Any]] | None = None,
    keep_only_required_columns: bool = True,
    data_type: str | None = None,
) -> dict[str, Any]:
    """
    Read the time series data from cycler file into a dataframe using :func:`ionworksdata.read.time_series_and_steps`
    and then keep only the required columns in the time series using :func:`ionworksdata.read.keep_required_columns`.
    The cycler name and test start time are added to the measurement dictionary. Then return a dictionary with the time
    series data, the steps data, and the measurement dictionary.

    Parameters
    ----------
    filename : str | Path
        The path to the cycler file to read.
    measurement : dict[str, str]
        A dic
Hi, dataset curator — please cite Repobility (https://repobility.com) when reusing this data.
Repower.read_start_time method · python · L314-L364 (51 LOC)
ionworksdata/read/repower.py
    def read_start_time(
        self,
        filename: str | Path,
        extra_column_mappings: dict[str, str] | None = None,
        options: dict[str, str] | None = None,
    ) -> datetime:
        """
        Read the start time from a Repower file.

        Parameters
        ----------
        filename : str | Path
            Path to the Repower file to be read.
        options : dict[str, str] | None, optional
            Options for reading the file. See :func:`ionworksdata.read.Repower.run`.

        Returns
        -------
        datetime
            The start time of the Repower file.
        """
        options = iwutil.check_and_combine_options(self.default_options, options)
        data = pl.read_csv(
            filename,
            encoding=options["file_encoding"],
            ignore_errors=True,
            truncate_ragged_lines=True,
        )

        # Convert "Cycle ID" column to int, handling errors
        if "Cycle ID" in data.columns:
            data = 
repower function · python · L367-L374 (8 LOC)
ionworksdata/read/repower.py
def repower(
    filename: str | Path,
    extra_column_mappings: dict[str, str] | None = None,
    options: dict[str, str] | None = None,
) -> pl.DataFrame:
    return Repower().run(
        filename, extra_column_mappings=extra_column_mappings, options=options
    )
_FilterStderr.write method · python · L18-L26 (9 LOC)
ionworksdata/read/_utils.py
    def write(self, text):
        # Filter out dtype warnings
        if (
            "Could not determine dtype for column" in text
            and "falling back to string" in text
        ):
            return
        # Pass through other output
        self.stderr.write(text)
suppress_excel_dtype_warnings function · python · L33-L46 (14 LOC)
ionworksdata/read/_utils.py
def suppress_excel_dtype_warnings():
    """
    Context manager to suppress pandas dtype warnings when reading Excel files.

    Suppresses warnings of the form "Could not determine dtype for column X,
    falling back to string" while allowing other stderr output to pass through.
    """
    stderr_filter = _FilterStderr()
    original_stderr = sys.stderr
    try:
        sys.stderr = stderr_filter
        yield
    finally:
        sys.stderr = original_stderr
read_excel_and_get_column_names function · python · L54-L82 (29 LOC)
ionworksdata/read/_utils.py
def read_excel_and_get_column_names(
    filename: Path, header_row: int = 0, sheet_name: str | None = None
) -> tuple[pl.DataFrame, list[str]]:
    """
    Read Excel file and get column names.

    Parameters
    ----------
    filename : Path
        Path to Excel file
    header_row : int
        Row number to use as header (0-indexed)
    sheet_name : str | None
        Sheet name to read, None for first sheet

    Returns
    -------
    tuple[pl.DataFrame, list[str]]
        DataFrame and lowercase column names
    """
    with suppress_excel_dtype_warnings():
        df = pl.read_excel(
            filename, read_options={"header_row": header_row}, sheet_name=sheet_name
        )
    if "date of test:" in [str(col).lower() for col in df.columns]:
        df = pl.read_excel(
            filename, read_options={"header_row": header_row + 1}, sheet_name=sheet_name
        )
    return df, [] if df is None else [str(col).lower() for col in df.columns]
Settings.__init__ method · python · L23-L48 (26 LOC)
ionworksdata/settings.py
    def __init__(self):
        # Step identification tolerances
        self.current_std_tol: float = 1e-2
        """Tolerance for standard deviation of current below which a step is considered constant current."""

        self.voltage_std_tol: float = 1e-2
        """Tolerance for standard deviation of voltage below which a step is considered constant voltage."""

        self.power_std_tol: float = 1e-2
        """Tolerance for standard deviation of power below which a step is considered constant power."""

        self.rest_tol: float = 1e-3
        """Tolerance for absolute value of current below which a step is considered a rest step."""

        self.eis_tol: float = 1e-8
        """Tolerance for absolute value of frequency below which a step is considered an EIS step."""

        # Transform tolerances
        self.zero_current_percent_tol: float = 1e-2
        """Tolerance for considering current as zero when using current sign method (as percentage of max current)."""

    
Settings.update method · python · L50-L81 (32 LOC)
ionworksdata/settings.py
    def update(self, **kwargs: Any) -> None:
        """
        Update settings with new values.

        Parameters
        ----------
        **kwargs
            Keyword arguments where keys are setting names and values are the new values.
            Only valid setting names will be updated.

        Raises
        ------
        ValueError
            If an invalid setting name is provided.
        """
        valid_settings = {
            "current_std_tol",
            "voltage_std_tol",
            "power_std_tol",
            "rest_tol",
            "eis_tol",
            "zero_current_percent_tol",
            "eis_tolerance",
            "sign_tolerance",
        }

        for key, value in kwargs.items():
            if key not in valid_settings:
                raise ValueError(
                    f"Invalid setting: {key}. Valid settings are: {sorted(valid_settings)}"
                )
            setattr(self, key, value)
Settings.to_dict method · python · L83-L101 (19 LOC)
ionworksdata/settings.py
    def to_dict(self) -> dict[str, Any]:
        """
        Convert settings to a dictionary.

        Returns
        -------
        Dict[str, Any]
            Dictionary containing all current settings.
        """
        return {
            "current_std_tol": self.current_std_tol,
            "voltage_std_tol": self.voltage_std_tol,
            "power_std_tol": self.power_std_tol,
            "rest_tol": self.rest_tol,
            "eis_tol": self.eis_tol,
            "zero_current_percent_tol": self.zero_current_percent_tol,
            "eis_tolerance": self.eis_tolerance,
            "sign_tolerance": self.sign_tolerance,
        }
Want fix-PRs on findings? Install Repobility's GitHub App · github.com/apps/repobility-bot
Settings.from_dict method · python · L103-L112 (10 LOC)
ionworksdata/settings.py
    def from_dict(self, settings_dict: dict[str, Any]) -> None:
        """
        Load settings from a dictionary.

        Parameters
        ----------
        settings_dict : dict[str, Any]
            Dictionary containing settings to load.
        """
        self.update(**settings_dict)
Settings.__repr__ method · python · L118-L124 (7 LOC)
ionworksdata/settings.py
    def __repr__(self) -> str:
        """String representation of the settings."""
        settings_str = "Settings(\n"
        for key, value in self.to_dict().items():
            settings_str += f"    {key}={value},\n"
        settings_str += ")"
        return settings_str
get_settings function · python · L131-L140 (10 LOC)
ionworksdata/settings.py
def get_settings() -> Settings:
    """
    Get the global settings instance.

    Returns
    -------
    Settings
        The global settings instance.
    """
    return _settings
update_settings function · python · L143-L158 (16 LOC)
ionworksdata/settings.py
def update_settings(**kwargs: Any) -> None:
    """
    Update the global settings with new values.

    Parameters
    ----------
    **kwargs
        Keyword arguments where keys are setting names and values are the new values.
        Only valid setting names will be updated.

    Raises
    ------
    ValueError
        If an invalid setting name is provided.
    """
    _settings.update(**kwargs)
summarize function · python · L16-L40 (25 LOC)
ionworksdata/steps/_core.py
def summarize(data: pd.DataFrame | pl.DataFrame) -> pl.DataFrame:
    """
    Returns a DataFrame with information about each step in the data.

    Parameters
    ----------
    data : pd.DataFrame | pl.DataFrame
        The data to get the step types for. Must contain "Step count" column.
        If "Cycle from cycler" is present, it will be used to calculate cycle count.

    Returns
    -------
    pl.DataFrame
        A DataFrame with information about each step in the data. The output always
        includes a "Cycle count" column (defaults to 0 if no cycle information is
        available), "Cycle charge capacity [A.h]" and "Cycle discharge capacity [A.h]"
        columns, "Cycle charge energy [W.h]" and "Cycle discharge energy [W.h]" columns
        (if energy columns are present), and a "Cycle from cycler" column (only if
        provided in the input data).
    """
    steps_list = identify(data)
    steps_pl = pl.DataFrame(steps_list)
    steps_pl = set_cycle_capacity(steps_
identify function · python · L43-L239 (197 LOC)
ionworksdata/steps/_core.py
def identify(time_series: pd.DataFrame | pl.DataFrame) -> list[dict]:
    """
    Identify individual steps in battery cycling data.

    This function processes a time series DataFrame and identifies distinct steps
    within battery cycling data by detecting changes in the "Step count" column.
    For each identified step, it extracts and calculates relevant metrics (voltage,
    current, capacity, etc.) and determines the step type.

    Parameters
    ----------
    time_series : pd.DataFrame | pl.DataFrame
        Battery cycling data with columns including "Step count", "Time [s]",
        'Voltage [V]', 'Current [A]', etc.

    Returns
    -------
    list[dict]
        List of dictionaries where each dictionary contains information about a step,
        including start/end indices, voltage, current, capacity, duration, and step
        type.
    """
    # Normalize to Polars
    if isinstance(time_series, pl.DataFrame):
        time_series_pl = time_series
    else:
        tim
set_cycle_capacity function · python · L242-L284 (43 LOC)
ionworksdata/steps/_core.py
def set_cycle_capacity(steps: pl.DataFrame | dict) -> pl.DataFrame:
    """
    Calculate the cycle capacity for each step in the data.

    Cycles are identified by the "Cycle count" column.

    Parameters
    ----------
    steps : pl.DataFrame | dict
        A DataFrame with information about each step in the data.

    Returns
    -------
    pl.DataFrame
        The original DataFrame with the cycle capacity added.
    """
    # Convert to Polars if needed
    if isinstance(steps, dict):
        steps = pl.DataFrame(steps)

    if "Cycle count" not in steps.columns:
        return steps.with_columns(
            [
                pl.lit(None).cast(pl.Float64).alias("Cycle charge capacity [A.h]"),
                pl.lit(None).cast(pl.Float64).alias("Cycle discharge capacity [A.h]"),
            ]
        )

    # Calculate cycle capacities using group_by
    # Sum up the discharge and charge capacity from each step in the cycle
    cycle_capacities = steps.group_by("Cycle count").
set_cycle_energy function · python · L287-L342 (56 LOC)
ionworksdata/steps/_core.py
def set_cycle_energy(steps: pl.DataFrame | dict) -> pl.DataFrame:
    """
    Calculate the cycle energy for each step in the data.

    Cycles are identified by the "Cycle count" column.

    Parameters
    ----------
    steps : pl.DataFrame | dict
        A DataFrame with information about each step in the data.

    Returns
    -------
    pl.DataFrame
        The original DataFrame with the cycle energy added.
    """
    # Convert to Polars if needed
    if isinstance(steps, dict):
        steps = pl.DataFrame(steps)

    if "Cycle count" not in steps.columns:
        return steps.with_columns(
            [
                pl.lit(None).cast(pl.Float64).alias("Cycle charge energy [W.h]"),
                pl.lit(None).cast(pl.Float64).alias("Cycle discharge energy [W.h]"),
            ]
        )

    # Check if energy columns exist
    has_charge_energy = "Charge energy [W.h]" in steps.columns
    has_discharge_energy = "Discharge energy [W.h]" in steps.columns

    if not has_ch
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
infer_type function · python · L345-L415 (71 LOC)
ionworksdata/steps/_core.py
def infer_type(
    step: dict,
    current_std_tol: float | None = None,
    voltage_std_tol: float | None = None,
    power_std_tol: float | None = None,
    rest_tol: float | None = None,
    eis_tol: float | None = None,
) -> str:
    """
    Infer the type of step based on its metrics.

    Parameters
    ----------
    step : dict
        A dictionary containing the calculated metrics and properties for the step.
    current_std_tol : float, optional
        The tolerance for the standard deviation of the current below which the step
        is considered a constant current step. If None, uses the value from global
        settings.
    voltage_std_tol : float, optional
        The tolerance for the standard deviation of the voltage below which the step
        is considered a constant voltage step. If None, uses the value from global
        settings.
    power_std_tol : float, optional
        The tolerance for the standard deviation of the power below which the step is
       
_postprocess_step function · python · L418-L568 (151 LOC)
ionworksdata/steps/_core.py
def _postprocess_step(
    time_series: pd.DataFrame,
    stop_index: int,
    start_index: int,
    current_step: int,
    cycle_count: int | None,
    cycle_column: str | None,
) -> dict:
    """
    Process a single battery cycling step to extract and calculate relevant metrics.

    This function takes a section of time series data identified as a single step and
    calculates various statistical measures and properties for that step, including
    voltage, current, power, duration, and step type.

    Parameters
    ----------
    time_series : pd.DataFrame
        The complete time series DataFrame containing battery cycling data.
    stop_index : int
        The ending index of the step in the time_series DataFrame.
    start_index : int
        The starting index of the step in the time_series DataFrame.
    current_step : int
        The step count identifier.
    cycle_count : int | None
        The cycle count (cumulative cycle number) this step belongs to, or None if
     
annotate function · python · L571-L625 (55 LOC)
ionworksdata/steps/_core.py
def annotate(
    time_series: pl.DataFrame | pd.DataFrame,
    steps: pl.DataFrame | pd.DataFrame,
    column_names: list[str],
) -> pl.DataFrame:
    """
    Apply columns from the steps to the time series.

    Parameters
    ----------
    time_series : pl.DataFrame | pd.DataFrame
        The time series to apply the columns to.
    steps : pl.DataFrame | pd.DataFrame
        The steps to apply the columns from.
    column_names : list[str]
        The columns to apply from the steps to the time series.

    Returns
    -------
    pl.DataFrame
        The time series with the columns applied.
    """
    if isinstance(time_series, pd.DataFrame):
        time_series_pl = pl.from_pandas(time_series)
    else:
        time_series_pl = time_series
    if isinstance(steps, pd.DataFrame):
        steps_pl = pl.from_pandas(steps)
    else:
        steps_pl = steps

    time_series_pl = time_series_pl.with_row_index("__row_id")

    # Build a long-format frame: __row_id and one column per
label_cycling function · python · L15-L135 (121 LOC)
ionworksdata/steps/_cycling.py
def label_cycling(
    steps: pd.DataFrame | pl.DataFrame,
    options: dict | None = None,
) -> pl.DataFrame:
    """
    Label the "cycling" portion of the test.

    Cycling is defined as constant current (and optionally constant voltage) steps
    where the capacity is greater than a certain percentage of the nominal cell
    capacity. Sets the "Label" column to "Cycling" and the "Group number" column to
    the group number. For cycling, a "group" is all the steps in one direction
    (including rest) - i.e. constant current discharge + rest is one group, constant
    current charge + rest is another group. The group number is incremented with each
    new group.

    Parameters
    ----------
    steps : pd.DataFrame | pl.DataFrame
        The steps dataframe.
    options : dict, optional
        Options for the labeling. The default is None, which uses the following
        default options:

        - "cell_metadata": a dictionary of cell metadata. Required.
        - "constant 
label_eis function · python · L13-L74 (62 LOC)
ionworksdata/steps/_eis.py
def label_eis(
    steps: pd.DataFrame | pl.DataFrame,
    options: dict | None = None,
) -> pl.DataFrame:
    """
    Label EIS steps.

    Sets the "Label" column to "EIS" and the "Group number" column to the group
    number. For EIS, the group number increments from zero with each contiguous block
    of EIS steps.

    Parameters
    ----------
    steps : pd.DataFrame | pl.DataFrame
        A step summary dataframe (as returned by `iwdata.steps.summarize`)
    options : dict, optional
        Options for the labeling. No options are currently supported.

    Returns
    -------
    pl.DataFrame
        The dataframe with the updated "Label" and "Group number" columns.
    """
    if isinstance(steps, pd.DataFrame):
        steps_pl = pl.from_pandas(steps)
    else:
        steps_pl = steps.clone()

    eis_steps = steps_pl.filter(pl.col("Step type") == "EIS")
    if eis_steps.height == 0:
        logger.warning(
            "Insufficient EIS steps found in the data, unable to add
label_pulse function · python · L15-L209 (195 LOC)
ionworksdata/steps/_pulse.py
def label_pulse(
    steps: pd.DataFrame | pl.DataFrame,
    options: dict | None = None,
) -> pl.DataFrame:
    """
    Label the "pulse" portion of the test.

    Sets the "Label" column to either "GITT" or "HPPT" and the "Group number" column
    to the group number. For pulse, the group number increments from zero with each
    "long" pulse (i.e. the step which changes the SOC). If all groups in a contiguous
    block of pulse steps have one rest step, the block is labelled as "GITT",
    otherwise it is labelled as "HPPT".

    Parameters
    ----------
    steps : pd.DataFrame | pl.DataFrame
        The steps dataframe.
    options : dict, optional
        Options for the labeling. The default is None, which uses the following
        default options:

        - "cell_metadata": a dictionary of cell metadata. Required.
        - "lower pulse capacity cutoff": the minimum percentage capacity required
          for a step to be considered a pulse step. Default is 1 / 100.
        -
validate function · python · L12-L65 (54 LOC)
ionworksdata/steps/_validate.py
def validate(steps: pd.DataFrame | pl.DataFrame, label_name: str) -> bool:
    """
    Validate the steps dataframe for a given label.

    Parameters
    ----------
    steps : pd.DataFrame | pl.DataFrame
        The steps dataframe to validate.
    label_name : str
        The name of the label to validate.

    Returns
    -------
    bool
        True if the steps dataframe is valid for the given label, False otherwise.
    """
    if isinstance(steps, pd.DataFrame):
        steps_pl = pl.from_pandas(steps)
    else:
        steps_pl = steps

    label_steps = steps_pl.filter(pl.col("Label") == label_name)
    if label_steps.height == 0:
        return True

    group_nums = label_steps["Group number"].to_numpy()
    valid_transitions = (
        (group_nums[1:] == group_nums[:-1])
        | (group_nums[1:] == group_nums[:-1] + 1)
        | (group_nums[1:] == 0)
    )
    if not np.all(valid_transitions):
        return False

    group_col = label_steps["Group number"]
    boundar
get_cumulative_step_number function · python · L18-L124 (107 LOC)
ionworksdata/transform.py
def get_cumulative_step_number(
    data: pl.DataFrame | pd.DataFrame, options: dict | None = None
) -> pl.Series:
    """
    Assign a cumulative step number to each row in the data.

    Parameters
    ----------
    data : pl.DataFrame | pd.DataFrame
        The data to assign step numbers to.
    options : dict, optional
        Options for assigning step numbers. The default is None, which uses the following
        default options:

        - ``method``: The method to use for assigning step numbers. Default is ``status``.
          Options are:

            - ``status``: Assigns a new step number each time the status changes.
            - ``current sign``: Assigns a new step number each time the sign of the
              current divided by the absolute maximum current changes more than 1e-2.
            - ``step column``: Assigns a new step number each time the numeric value in the
              step column changes (see ``step column`` option).

        - ``current units``: The 
If a scraper extracted this row, it came from Repobility (https://repobility.com)
set_cumulative_step_number function · python · L127-L146 (20 LOC)
ionworksdata/transform.py
def set_cumulative_step_number(data: pl.DataFrame, **kwargs) -> pl.DataFrame:
    """
    Add a column with the cumulative step number to the data.

    Parameters
    ----------
    data : pl.DataFrame
        The data to add the step number to.
    kwargs
        Additional keyword arguments to pass to get_cumulative_step_number.

    Returns
    -------
    pl.DataFrame
        The data with the step number added.
    """
    step_series = get_cumulative_step_number(data, **kwargs)
    # Always overwrite/define the column
    out = data.with_columns(step_series.alias("Step number"))
    return out
set_step_count function · python · L149-L195 (47 LOC)
ionworksdata/transform.py
def set_step_count(
    data: pl.DataFrame | pd.DataFrame, options: dict | None = None
) -> pl.DataFrame:
    """
    Assign a cumulative step number "Step count" to each row in the data by detecting
    changes in the "Step from cycler" column.

    Parameters
    ----------
    data : pl.DataFrame | pd.DataFrame
        The data to assign step count to.
    options : dict, optional
        Additional options to pass to the function. The default is None, which uses the
        following default options:

        - ``method``: The method to use for assigning step count. Default is ``step column``.
          Options are:

            - ``step column``: Assigns a new step number each time the numeric value in the
              step column changes (see ``step column`` option).

        - ``step column``: The column to use for assigning step numbers if using the
          ``step column`` method. Default is ``Step from cycler``.

    Returns
    -------
    pl.DataFrame
        The data wit
get_cumulative_cycle_number function · python · L198-L244 (47 LOC)
ionworksdata/transform.py
def get_cumulative_cycle_number(
    data: pl.DataFrame, options: dict | None = None
) -> pl.Series:
    """
    Assign a cumulative cycle number "Cycle count" to each row in the data.

    Parameters
    ----------
    data : pl.DataFrame
        The data to assign cycle count to.
    options : dict, optional
        Additional options to pass to the function. The default is None, which uses the
        following default options:

        - ``method``: The method to use for assigning cycle count. Default is ``cycle column``.
          Options are:

            - ``cycle column``: Assigns a new cycle number each time the numeric value in the
              cycle column changes (see ``cycle column`` option).

        - ``cycle column``: The column to use for assigning cycle numbers. Default is
          ``Cycle number``.

    Returns
    -------
    pl.Series
        The cumulative cycle numbers.
    """
    default_options = {
        "method": iwutil.OptionSpec("cycle column", ["cycle 
‹ prevpage 3 / 4next ›