Parse a DataFrame's test_title column into structured columns.
Adds test_file, test_name, test_opcode, test_params, and
block_limit_million columns. Titles that don't match the
<file>.py__<test_name> shape get empty parsed columns and are
returned in the second tuple element so the caller can warn.
Parameters:
| Name |
Type |
Description |
Default |
df
|
DataFrame
|
DataFrame with a test_title column. Not mutated.
|
required
|
Returns:
| Type |
Description |
DataFrame
|
Tuple of (parsed DataFrame, list of titles that did not match the
|
list[str]
|
|
Source code in src/benchmarkoor_fetch/parse/titles.py
| def parse_test_titles(df: pd.DataFrame) -> tuple[pd.DataFrame, list[str]]:
"""Parse a DataFrame's `test_title` column into structured columns.
Adds `test_file`, `test_name`, `test_opcode`, `test_params`, and
`block_limit_million` columns. Titles that don't match the
`<file>.py__<test_name>` shape get empty parsed columns and are
returned in the second tuple element so the caller can warn.
Args:
df: DataFrame with a `test_title` column. Not mutated.
Returns:
Tuple of (parsed DataFrame, list of titles that did not match the
fixture shape).
"""
out = df.copy()
titles = out["test_title"].astype(str)
extracted = titles.str.extract(_TITLE_RE)
matched_mask = extracted["test_file"].notna()
out["test_file"] = extracted["test_file"].fillna("").astype(object)
out["test_name"] = extracted["test_name"].fillna("").astype(object)
out["test_params"] = extracted["test_params"].fillna("").astype(object)
opcodes = [
_compute_opcode(name, params) or ""
for name, params in zip(out["test_name"], out["test_params"], strict=True)
]
out["test_opcode"] = pd.Series(opcodes, index=out.index, dtype=object)
blm_match = titles.str.extract(_BLOCK_LIMIT_RE)
out["block_limit_million"] = pd.to_numeric(blm_match[0], errors="coerce").astype(
"Int64"
)
unparsed: list[str] = titles[~matched_mask].tolist()
return out, unparsed
|