Adapters

This page documents the adapter modules under jumper_extension.adapters that implement reporting, visualization, session management, and script writing. High‑level usage is described in the Public API and Jupyter API sections; the content below is generated directly from the Python code.

Data

`NodeDataStore`

Single source of truth for per-node hardware metadata and time-series data.

register_node(info) stores both the NodeInfo and the corresponding PerformanceData container under the same node key.

Access patterns

store.hardware – Dict[str, NodeInfo] (metadata) store.view(level) – aggregate DataFrame across all nodes store.view(level, node=n) – single-node DataFrame store.add_sample(node, level, row) – append one flat-dict sample

Source code in jumper_extension/adapters/data/node.py

class NodeDataStore:
    """Single source of truth for per-node hardware metadata and time-series data.

    ``register_node(info)`` stores both the ``NodeInfo`` and the corresponding
    ``PerformanceData`` container under the same node key.

    Access patterns
    ---------------
    ``store.hardware``                     – Dict[str, NodeInfo] (metadata)
    ``store.view(level)``                  – aggregate DataFrame across all nodes
    ``store.view(level, node=n)``          – single-node DataFrame
    ``store.add_sample(node, level, row)`` – append one flat-dict sample
    """

    def __init__(self) -> None:
        self._info: Dict[str, NodeInfo] = {}
        self._nodes: Dict[str, PerformanceData] = {}

    # ------------------------------------------------------------------ #
    # Registration                                                        #
    # ------------------------------------------------------------------ #

    def register_node(self, info: NodeInfo) -> None:
        self._info[info.node] = info
        self._nodes[info.node] = PerformanceData()

    @property
    def hardware(self) -> Dict[str, NodeInfo]:
        return self._info

    def node_names(self) -> List[str]:
        return list(self._info.keys())

    @property
    def levels(self) -> List[str]:
        if not self._nodes:
            return get_available_levels()
        return list(next(iter(self._nodes.values())).levels)

    # ------------------------------------------------------------------ #
    # Writing                                                             #
    # ------------------------------------------------------------------ #

    def add_sample(self, node: str, level: str, row: dict) -> None:
        perf_data = self._nodes.get(node)
        if perf_data is None:
            return
        perf_data.add_sample(level, row)

    def init_node_schema(
        self, node: str, columns_by_level: Dict[str, List[str]]
    ) -> None:
        """Store per-level column lists so view() can return a correctly shaped
        empty DataFrame before the first sample arrives."""
        perf_data = self._nodes.get(node)
        if perf_data is None:
            return
        perf_data._schema_columns = dict(columns_by_level)

    def load_frames(self, node: str, frames: Dict[str, pd.DataFrame]) -> None:
        """Inject pre-loaded DataFrames into a registered node's data container.

        Used by offline (imported) monitors to populate data without going
        through the live add_sample() path.
        """
        perf_data = self._nodes.get(node)
        if perf_data is None:
            return
        for level, df in frames.items():
            perf_data._rows[level] = df.to_dict("records")

    # ------------------------------------------------------------------ #
    # Reading                                                             #
    # ------------------------------------------------------------------ #

    def view(
        self,
        level: str = "process",
        node: Optional[str] = None,
        slice_=None,
        cell_history=None,
    ) -> pd.DataFrame:
        if not self._nodes:
            return pd.DataFrame()

        if node is not None:
            perf_data = self._nodes.get(node)
            if perf_data is None:
                return pd.DataFrame()
            return perf_data.view(level=level, slice_=slice_, cell_history=cell_history)

        if len(self._nodes) == 1:
            return next(iter(self._nodes.values())).view(
                level=level, slice_=slice_, cell_history=cell_history
            )

        return self._aggregate(level, cell_history)

    def _aggregate(self, level: str, cell_history=None) -> pd.DataFrame:
        node_dfs: Dict[str, pd.DataFrame] = {}
        for n, perf in self._nodes.items():
            df = perf.view(level)
            if not df.empty:
                node_dfs[n] = df

        if not node_dfs:
            return pd.DataFrame()
        if len(node_dfs) == 1:
            df = next(iter(node_dfs.values()))
            return self._attach_cell_index(df, cell_history) if cell_history else df

        min_len = min(len(df) for df in node_dfs.values())
        if min_len == 0:
            return pd.DataFrame()

        frames = [
            df.iloc[:min_len].reset_index(drop=True)
            for df in node_dfs.values()
        ]
        result = frames[0].copy()

        self._aggregate_memory(frames, result)
        self._aggregate_io(frames, result)
        self._aggregate_cpu(frames, result)
        self._aggregate_gpu(frames, result)

        if cell_history is not None:
            result = self._attach_cell_index(result, cell_history)
        return result

    def _aggregate_memory(
        self, frames: List[pd.DataFrame], result: pd.DataFrame
    ) -> None:
        if all("memory" in f.columns for f in frames):
            result["memory"] = sum(f["memory"] for f in frames)

    def _aggregate_io(
        self, frames: List[pd.DataFrame], result: pd.DataFrame
    ) -> None:
        for col in ("io_read", "io_write", "io_read_count", "io_write_count"):
            if all(col in f.columns for f in frames):
                result[col] = sum(f[col] for f in frames)

    def _aggregate_cpu(
        self, frames: List[pd.DataFrame], result: pd.DataFrame
    ) -> None:
        if all("cpu_util_avg" in f.columns for f in frames):
            result["cpu_util_avg"] = sum(f["cpu_util_avg"] for f in frames) / len(frames)
        if all("cpu_util_min" in f.columns for f in frames):
            result["cpu_util_min"] = pd.concat(
                [f["cpu_util_min"] for f in frames], axis=1
            ).min(axis=1)
        if all("cpu_util_max" in f.columns for f in frames):
            result["cpu_util_max"] = pd.concat(
                [f["cpu_util_max"] for f in frames], axis=1
            ).max(axis=1)
        drop = [c for c in result.columns if re.match(r"cpu_util_\d+$", c)]
        result.drop(columns=drop, errors="ignore", inplace=True)

    def _aggregate_gpu(
        self, frames: List[pd.DataFrame], result: pd.DataFrame
    ) -> None:
        for metric in ("util", "band", "mem"):
            avg_col = f"gpu_{metric}_avg"
            vals = [f[avg_col] for f in frames if avg_col in f.columns]
            if vals:
                result[avg_col] = sum(vals) / len(vals)

            min_col = f"gpu_{metric}_min"
            vals = [f[min_col] for f in frames if min_col in f.columns]
            if vals:
                result[min_col] = pd.concat(vals, axis=1).min(axis=1)

            max_col = f"gpu_{metric}_max"
            vals = [f[max_col] for f in frames if max_col in f.columns]
            if vals:
                result[max_col] = pd.concat(vals, axis=1).max(axis=1)

        drop = [c for c in result.columns if re.match(r"gpu_(util|band|mem)_\d+$", c)]
        result.drop(columns=drop, errors="ignore", inplace=True)

    def _attach_cell_index(self, df: pd.DataFrame, cell_history) -> pd.DataFrame:
        result = df.copy()
        result["cell_index"] = pd.NA
        times = result["time"].to_numpy()
        for row in cell_history.data.itertuples(index=False):
            mask = (times >= row.start_time) & (times <= row.end_time)
            result.loc[mask, "cell_index"] = row.cell_index
        return result

    # ------------------------------------------------------------------ #
    # Export / load (delegate to primary node)                           #
    # ------------------------------------------------------------------ #

    def export(
        self,
        filename: str = "performance_data.csv",
        level: str = "process",
        cell_history=None,
    ) -> None:
        if not self._nodes:
            return
        df = self.view(level=level, cell_history=cell_history)
        if df.empty:
            return
        first = next(iter(self._nodes.values()))
        _, ext = os.path.splitext(filename)
        format = ext.lower().lstrip(".") or "csv"
        if not format:
            format = "csv"
            filename += ".csv"
        writer = first._file_writers.get(format)
        if writer:
            writer(filename, df)

    def load(self, filename: str) -> Optional[pd.DataFrame]:
        if not self._nodes:
            return None
        return next(iter(self._nodes.values())).load(filename)

`init_node_schema(node, columns_by_level)`

Store per-level column lists so view() can return a correctly shaped empty DataFrame before the first sample arrives.

Source code in jumper_extension/adapters/data/node.py

def init_node_schema(
    self, node: str, columns_by_level: Dict[str, List[str]]
) -> None:
    """Store per-level column lists so view() can return a correctly shaped
    empty DataFrame before the first sample arrives."""
    perf_data = self._nodes.get(node)
    if perf_data is None:
        return
    perf_data._schema_columns = dict(columns_by_level)

`load_frames(node, frames)`

Inject pre-loaded DataFrames into a registered node's data container.

Used by offline (imported) monitors to populate data without going through the live add_sample() path.

Source code in jumper_extension/adapters/data/node.py

def load_frames(self, node: str, frames: Dict[str, pd.DataFrame]) -> None:
    """Inject pre-loaded DataFrames into a registered node's data container.

    Used by offline (imported) monitors to populate data without going
    through the live add_sample() path.
    """
    perf_data = self._nodes.get(node)
    if perf_data is None:
        return
    for level, df in frames.items():
        perf_data._rows[level] = df.to_dict("records")

`aggregate_node_info(hardware)`

Return a synthetic NodeInfo aggregating all nodes in hardware.

CPUs/GPUs are summed, gpu_memory takes the max, memory_limits are summed per level, gpu_name is taken from the first node that has one. Used by reporter/service/session to get a single summary view.

Source code in jumper_extension/adapters/data/node.py

def aggregate_node_info(hardware: Dict[str, NodeInfo]) -> NodeInfo:
    """Return a synthetic NodeInfo aggregating all nodes in *hardware*.

    CPUs/GPUs are summed, gpu_memory takes the max, memory_limits are
    summed per level, gpu_name is taken from the first node that has one.
    Used by reporter/service/session to get a single summary view.
    """
    nodes = list(hardware.values())
    if not nodes:
        return NodeInfo(
            node="aggregate", num_cpus=0, num_system_cpus=0, num_gpus=0,
            gpu_memory=0.0, gpu_name="", memory_limits={},
        )
    all_levels = {lvl for n in nodes for lvl in n.memory_limits}
    return NodeInfo(
        node="aggregate",
        num_cpus=sum(n.num_cpus for n in nodes),
        num_system_cpus=sum(n.num_system_cpus for n in nodes),
        num_gpus=sum(n.num_gpus for n in nodes),
        gpu_memory=max(n.gpu_memory for n in nodes),
        gpu_name=next((n.gpu_name for n in nodes if n.gpu_name), ""),
        memory_limits={
            lvl: sum(n.memory_limits.get(lvl, 0.0) for n in nodes)
            for lvl in all_levels
        },
        cpu_handles=[h for n in nodes for h in n.cpu_handles],
    )

Cell history and analysis

`CellHistory`

Source code in jumper_extension/adapters/cell_history.py

class CellHistory:
    def __init__(self):
        self._columns = [
            "cell_index",
            "raw_cell",
            "start_time",
            "end_time",
            "duration",
            "wallclock_start_time",
            "wallclock_end_time",
        ]
        self.data = pd.DataFrame(columns=self._columns)
        self.file_readers = {
            "json": pd.read_json,
            "csv": pd.read_csv,
        }
        self.current_cell = None

    def start_cell(self, raw_cell: str, cell_magics: List[str]):
        self.current_cell = {
            "cell_index": len(self.data),
            "cell_magics": cell_magics,
            "raw_cell": raw_cell,
            "start_time": time.perf_counter(),
            "end_time": None,
            "duration": None,
            "wallclock_start_time": time.time(),
            "wallclock_end_time": None,
        }

    def end_cell(self, result):
        if self.current_cell:
            self.current_cell["end_time"] = time.perf_counter()
            self.current_cell["duration"] = (
                self.current_cell["end_time"] - self.current_cell["start_time"]
            )
            self.current_cell["wallclock_end_time"] = time.time()

            new_row = pd.DataFrame([self.current_cell])
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", FutureWarning)
                warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
                self.data = pd.concat([self.data, new_row], ignore_index=True)
            self.current_cell = None

    def view(self, start=None, end=None):
        if start is None and end is None:
            return self.data
        return self.data.iloc[start:end]

    def print(self):
        for _, cell in self.data.iterrows():
            print(
                f"Cell #{int(cell['cell_index'])} - Duration: "
                f"{cell['duration']:.2f}s"
            )
            print("-" * 40)
            print(cell["raw_cell"])
            print("=" * 40)

    def show_itable(self):
        if self.data.empty:
            logger.warning(
                EXTENSION_ERROR_MESSAGES[ExtensionErrorCode.NO_CELL_HISTORY]
            )
            return

        data = []
        for _, row in self.data.iterrows():
            duration = row["end_time"] - row["start_time"]
            data.append(
                {
                    "Cell index": row["cell_index"],
                    "Duration (s)": f"{duration:.2f}",
                    "Start Time": time.strftime(
                        "%H:%M:%S", time.localtime(row["start_time"])
                    ),
                    "End Time": time.strftime(
                        "%H:%M:%S", time.localtime(row["end_time"])
                    ),
                    "Code": row["raw_cell"].replace("\n", "<br>"),
                }
            )

        df = pd.DataFrame(data)

        # To avoid warnings about a non-documented 'escape' option in a notebook
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=SyntaxWarning, module="itables\\.typing")
            show(
                df,
                layout={"topStart": "search", "topEnd": None},
                columnDefs=[
                    {"targets": [4], "className": "dt-left"}
                ],  # 4 - "Code" index
                escape=False,
            )

    def export(self, filename="cell_history.json"):
        if self.data.empty:
            logger.warning(
                EXTENSION_ERROR_MESSAGES[ExtensionErrorCode.NO_CELL_HISTORY]
            )
            return

        # Determine format from filename extension
        _, ext = os.path.splitext(filename)
        format = ext.lower().lstrip(".")

        # Default to csv if no extension provided
        if not format:
            format = "csv"
            filename += ".csv"

        if format == "json":
            with open(filename, "w") as f:
                json.dump(self.data.to_dict("records"), f, indent=2)
        elif format == "csv":
            self.data.to_csv(filename, index=False)
        else:
            logger.warning(
                EXTENSION_ERROR_MESSAGES[
                    ExtensionErrorCode.UNSUPPORTED_FORMAT
                ].format(
                    format=format,
                    supported_formats=", ".join(["json", "csv"]),
                )
            )
            return

        logger.info(
            EXTENSION_INFO_MESSAGES[ExtensionInfoCode.EXPORT_SUCCESS].format(
                filename=filename
            )
        )

    def load(self, filename: str) -> Optional[pd.DataFrame]:
        """Load cell history from CSV or JSON file.

        Returns:
            DataFrame if successful, None otherwise
        """
        return load_dataframe_from_file(
            filename,
            self.file_readers,
            self._columns,
            entity_name="cell history",
        )

    def __len__(self):
        return len(self.data)

`load(filename)`

Load cell history from CSV or JSON file.

Returns:

Type	Description
`Optional[DataFrame]`	DataFrame if successful, None otherwise

Source code in jumper_extension/adapters/cell_history.py

def load(self, filename: str) -> Optional[pd.DataFrame]:
    """Load cell history from CSV or JSON file.

    Returns:
        DataFrame if successful, None otherwise
    """
    return load_dataframe_from_file(
        filename,
        self.file_readers,
        self._columns,
        entity_name="cell history",
    )

`PerformanceAnalyzer`

Performance analyzer to determine workload type using relative thresholds.

Inspired by JobLabeller from: https://gitlab.hrz.tu-chemnitz.de/pika/pika-server/-/blob/ 619d62926cd85f8c20589c75aba0c6e2c51087e1/ src/post_processing/post_processing.py#L711

Source code in jumper_extension/adapters/analyzer.py

class PerformanceAnalyzer:
    """
    Performance analyzer to determine workload type using relative thresholds.

    Inspired by `JobLabeller` from:
    https://gitlab.hrz.tu-chemnitz.de/pika/pika-server/-/blob/
    619d62926cd85f8c20589c75aba0c6e2c51087e1/
    src/post_processing/post_processing.py#L711
    """
    # Default thresholds
    DEFAULT_THRESHOLDS = {
        'memory_ratio': 0.8,  # memory limit 0.80
        'cpu_ratio': 0.7,  # CPU capacity 0.70
        'gpu_util_ratio': 0.8,  # GPU utilization
        'gpu_memory_ratio': 0.8,  # GPU memory

        # --- thresholds for "GPU idle" detection
        # minimum memory usage required to treat GPU as allocated
        'gpu_alloc_min_mem_gb': 0.1,
        # minimum GPU utilization to treat GPU in idle state
        'gpu_util_idle_threshold': 0.05,
        # minimum overall usage fraction required to trigger the tag
        'gpu_alloc_min_fraction': 0.5,

    }

    def __init__(
        self,
        thresholds: Optional[Dict[str, float]] = None,
    ):
        """
        Initialize analyzer with relative thresholds

        Args:
            thresholds: Custom threshold values (uses defaults if None)
        """
        self.thresholds = {**self.DEFAULT_THRESHOLDS, **(thresholds or {})}

    def analyze_cell_performance(
        self,
        perfdata: "pd.DataFrame",
        memory_limit: float,
        gpu_memory_limit: Optional[float] = None,
    ) -> List[TagScore]:
        """
        Analyze cell performance and determine tags

        Args:
            perfdata: DataFrame with performance data
            memory_limit: System memory limit in GB
            gpu_memory_limit: GPU memory limit in GB (if available)

        Returns:
            List[TagScore]: Ranked performance tags for the cell
        """

        logger.debug(f"{memory_limit = }")
        logger.debug(f"{gpu_memory_limit = }")

        # Compute normalized metrics
        metrics = self._compute_metrics(perfdata, gpu_memory_limit)

        # Calculate resource utilization ratios
        ratios = self._calculate_utilization_ratios(metrics, memory_limit, gpu_memory_limit)

        # Create the ranked tags list
        ranked_tags = self._create_ranked_tags(ratios)

        # Detect "GPU allocated but not used" and prepend if applicable
        gpu_unused_tag = self._detect_gpu_allocated_but_not_used(perfdata, gpu_memory_limit)
        if gpu_unused_tag is not None:
            # Prepend the GPU allocated but not used as this is the most important tag
            ranked_tags = [gpu_unused_tag] +  ranked_tags

        logger.debug(f"{ranked_tags = }")

        return ranked_tags if ranked_tags else [TagScore(PerformanceTag.NORMAL, 0.0)]

    @staticmethod
    def _compute_metrics(
            perfdata: "pd.DataFrame",
            gpu_memory_limit: Optional[float],
    ) -> Dict[str, float]:
        """Compute raw performance metrics"""
        metrics = {}

        # CPU metrics
        if 'cpu_util_avg' in perfdata.columns:
            metrics['cpu_avg'] = perfdata['cpu_util_avg'].mean()

        # Memory metrics
        if 'memory' in perfdata.columns:
            metrics['memory_avg_gb'] = perfdata['memory'].mean()

        # GPU metrics
        if 'gpu_util_avg' in perfdata.columns:
            metrics['gpu_util_avg'] = perfdata['gpu_util_avg'].mean()

        if 'gpu_mem_avg' in perfdata.columns and gpu_memory_limit:
            metrics['gpu_memory_avg_gb'] = perfdata['gpu_mem_avg'].mean()

        return metrics

    def _calculate_utilization_ratios(self, metrics: Dict[str, float],
                                      memory_limit: float,
                                      gpu_memory_limit: Optional[float]) -> Dict[str, float]:
        """Calculate utilization ratios relative to system limits"""
        ratios = {}

        # Memory ratio (current usage / limit)
        memory_avg = metrics.get('memory_avg_gb', 0)
        ratios['memory'] = self._safe_ratio(memory_avg, memory_limit)

        # CPU ratio (utilization / 100%)
        cpu_avg = metrics.get('cpu_avg', 0)
        ratios['cpu'] = self._safe_ratio(cpu_avg, 100.0)

        # GPU utilization ratio
        gpu_util = metrics.get('gpu_util_avg', 0)
        ratios['gpu_util'] = self._safe_ratio(gpu_util, 100.0)

        # GPU memory ratio
        if gpu_memory_limit and gpu_memory_limit > 0:
            gpu_memory = metrics.get('gpu_memory_avg_gb', 0)
            ratios['gpu_memory'] = self._safe_ratio(gpu_memory, gpu_memory_limit)
        else:
            ratios['gpu_memory'] = 0.0

        logger.debug(f"ratios: {ratios}")

        return ratios

    @staticmethod
    def _safe_ratio(measured: float, maximum: float) -> float:
        """Safely calculate ratio with error handling"""
        try:
            if maximum is None or maximum <= 0 or measured is None:
                return 0.0
            return min(1.0, max(0.0, measured / maximum))
        except (TypeError, ZeroDivisionError):
            return 0.0

    def _create_ranked_tags(self, ratios: Dict[str, float]) -> List[TagScore]:
        """Create the ranked list of tags based on ratios (0.0-1.0 scale)"""

        # Sort by descending ratios
        sorted_ratios = sorted(ratios.items(), key=lambda x: x[1], reverse=True)

        # Create ranked tags for resources that exceed the minimum threshold
        tag_mapping = {
            'cpu': PerformanceTag.CPU_BOUND,
            'memory': PerformanceTag.MEMORY_BOUND,
            'gpu_util': PerformanceTag.GPU_UTIL_BOUND,
            'gpu_memory': PerformanceTag.GPU_MEMORY_BOUND,
        }

        ranked_tags = []

        for resource, ratio in sorted_ratios:
            threshold_key = f'{resource}_ratio'
            threshold = self.thresholds.get(threshold_key, 0.0)
            if ratio >= threshold:
                tag = tag_mapping.get(resource)
                if tag:
                    ranked_tags.append(TagScore(tag, ratio))

        return ranked_tags

    def _detect_gpu_allocated_but_not_used(
        self,
        perfdata: "pd.DataFrame",
        gpu_memory_limit: Optional[float],
    ) -> Optional[TagScore]:
        """
        Detect case when GPU memory is allocated but GPU compute utilization stays idle
        for a significant fraction of measurement time.
        """
        # must have GPU columns and a GPU present
        if gpu_memory_limit is None:
            return None
        if 'gpu_mem_avg' not in perfdata.columns or 'gpu_util_avg' not in perfdata.columns:
            return None
        if perfdata.empty:
            return None

        memory_threshold_gb = max(float(self.thresholds.get('gpu_alloc_min_mem_gb', 0.1)), 0.0)
        utilization_idle_threshold = float(self.thresholds.get('gpu_util_idle_threshold', 0.05))  # 0..1
        min_fraction = float(self.thresholds.get('gpu_alloc_min_fraction', 0.5))        # 0..1

        # allocation considered if memory usage exceeds memory_threshold_gb
        mask_allocated = perfdata['gpu_mem_avg'] > memory_threshold_gb
        if mask_allocated.sum() == 0:
            return None

        # idle if util ≤ util_idle_thr * 100 (%)
        mask_idle = perfdata['gpu_util_avg'] <= (utilization_idle_threshold * 100.0)

        mask_allocated_and_idle = mask_allocated & mask_idle
        frac = float(mask_allocated_and_idle.mean())

        logger.debug(f"GPU idle check:")
        logger.debug(f"gpu_mem_avg:\n{perfdata['gpu_mem_avg']}")
        logger.debug(f"mask_allocated:\n{mask_allocated}\n")
        logger.debug(f"gpu_util_avg:\n{perfdata['gpu_util_avg']}")
        logger.debug(f"mask_idle:\n{mask_idle}\n")
        logger.debug(f"GPU not used {min_fraction = }")
        logger.debug(f"GPU not used {frac = }")

        if frac >= min_fraction:
            return TagScore(PerformanceTag.GPU_ALLOCATED_BUT_NOT_USED, frac)
        return None

`init(thresholds=None)`

Initialize analyzer with relative thresholds

Parameters:

Name	Type	Description	Default
`thresholds`	`Optional[Dict[str, float]]`	Custom threshold values (uses defaults if None)	`None`

Source code in jumper_extension/adapters/analyzer.py

def __init__(
    self,
    thresholds: Optional[Dict[str, float]] = None,
):
    """
    Initialize analyzer with relative thresholds

    Args:
        thresholds: Custom threshold values (uses defaults if None)
    """
    self.thresholds = {**self.DEFAULT_THRESHOLDS, **(thresholds or {})}

`analyze_cell_performance(perfdata, memory_limit, gpu_memory_limit=None)`

Analyze cell performance and determine tags

Parameters:

Name	Type	Description	Default
`perfdata`	`DataFrame`	DataFrame with performance data	required
`memory_limit`	`float`	System memory limit in GB	required
`gpu_memory_limit`	`Optional[float]`	GPU memory limit in GB (if available)	`None`

Returns:

Type	Description
`List[TagScore]`	List[TagScore]: Ranked performance tags for the cell

Source code in jumper_extension/adapters/analyzer.py

def analyze_cell_performance(
    self,
    perfdata: "pd.DataFrame",
    memory_limit: float,
    gpu_memory_limit: Optional[float] = None,
) -> List[TagScore]:
    """
    Analyze cell performance and determine tags

    Args:
        perfdata: DataFrame with performance data
        memory_limit: System memory limit in GB
        gpu_memory_limit: GPU memory limit in GB (if available)

    Returns:
        List[TagScore]: Ranked performance tags for the cell
    """

    logger.debug(f"{memory_limit = }")
    logger.debug(f"{gpu_memory_limit = }")

    # Compute normalized metrics
    metrics = self._compute_metrics(perfdata, gpu_memory_limit)

    # Calculate resource utilization ratios
    ratios = self._calculate_utilization_ratios(metrics, memory_limit, gpu_memory_limit)

    # Create the ranked tags list
    ranked_tags = self._create_ranked_tags(ratios)

    # Detect "GPU allocated but not used" and prepend if applicable
    gpu_unused_tag = self._detect_gpu_allocated_but_not_used(perfdata, gpu_memory_limit)
    if gpu_unused_tag is not None:
        # Prepend the GPU allocated but not used as this is the most important tag
        ranked_tags = [gpu_unused_tag] +  ranked_tags

    logger.debug(f"{ranked_tags = }")

    return ranked_tags if ranked_tags else [TagScore(PerformanceTag.NORMAL, 0.0)]

`PerformanceTag`

Bases: Enum

Performance tags for classifying cells

Source code in jumper_extension/adapters/analyzer.py

class PerformanceTag(Enum):
    """Performance tags for classifying cells"""
    NORMAL = "normal"
    CPU_BOUND = "cpu-bound"
    MEMORY_BOUND = "memory-bound"
    GPU_UTIL_BOUND = "gpu-util-bound"
    GPU_MEMORY_BOUND = "gpu-memory-bound"
    GPU_ALLOCATED_BUT_NOT_USED = "gpu-allocated-but-not-used"

    def __str__(self):
        return self.value

`TagScore` `dataclass`

Tag with its score for ranking

Source code in jumper_extension/adapters/analyzer.py

@dataclass
class TagScore:
    """Tag with its score for ranking"""
    tag: PerformanceTag
    score: float