`scripts.step_1c_fit_hill_curves`¶

Fit Hill functions to dTAG-13 dose–response curves of the reporter–repressor.

This module

1) Loads FCS files for dose–response replicates (R1–R3) and NFC. 2) Applies boundary and singlet gates; subtracts NFC-derived backgrounds. 3) Maps plate indices to dTAG-13 concentrations; constructs day-4 steady state. 4) Computes fold-change relative to NTC and normalizes BFP (repressor) levels. 5) Fits Hill curves (fc vs normalized BFP) to estimate K (EC50-like) and Hill n. 6) Saves per-plasmid parameters (CSV) and diagnostic plots (PDFs).

Outputs

plots/Hill_dCas9.pdf plots/Hill_CasRx.pdf plots/Hill-HDAC4-dCas9.pdf plots/Hill-KRAB-dCas9.pdf plots/Hill-KRAB-Split-dCas9.pdf parameters/Hill_parameters.csv

apply_boundary_gate ¶

apply_boundary_gate(df)

Apply coarse FSC/SSC boundary gate.

Parameters¶

df : pd.DataFrame Event table with FSC-A and SSC-A.

Returns¶

pd.DataFrame Subset of events within the rectangle gate.

Source code in scripts/step_1c_fit_hill_curves.py

def apply_boundary_gate(df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply coarse FSC/SSC boundary gate.

    Parameters
    ----------
    df : pd.DataFrame
        Event table with FSC-A and SSC-A.

    Returns
    -------
    pd.DataFrame
        Subset of events within the rectangle gate.
    """
    m = (  # build mask within numeric limits
            (df[CH_FSC_A] >= BOUND_MIN[CH_FSC_A]) & (df[CH_FSC_A] <= BOUND_MAX[CH_FSC_A]) &
            (df[CH_SSC_A] >= BOUND_MIN[CH_SSC_A]) & (df[CH_SSC_A] <= BOUND_MAX[CH_SSC_A])
    )
    out = df.loc[m]  # filter events
    print(f"[gate] boundary: kept {len(out):,}/{len(df):,}")  # debug
    return out  # return gated events

apply_singlet_gate ¶

apply_singlet_gate(df)

Keep singlets using FSC-H/FSC-A ratio window.

Parameters¶

df : pd.DataFrame Event table with FSC-H and FSC-A.

Returns¶

pd.DataFrame Events passing the singlet gate.

Source code in scripts/step_1c_fit_hill_curves.py

def apply_singlet_gate(df: pd.DataFrame) -> pd.DataFrame:
    """
    Keep singlets using FSC-H/FSC-A ratio window.

    Parameters
    ----------
    df : pd.DataFrame
        Event table with FSC-H and FSC-A.

    Returns
    -------
    pd.DataFrame
        Events passing the singlet gate.
    """
    ratio = df[CH_FSC_H] / (df[CH_FSC_A].replace(0, np.nan))  # robust ratio
    m = (ratio >= SINGLET_RATIO_LOW) & (ratio <= SINGLET_RATIO_HIGH)  # in-window
    out = df.loc[m]  # filter
    print(f"[gate] singlet : kept {len(out):,}/{len(df):,}")  # debug
    return out  # singlets only

compute_nfc_background ¶

compute_nfc_background(nfc_dir)

Compute NFC background medians (mean of first 3 files).

Parameters¶

nfc_dir : str Directory of NFC .fcs files.

Returns¶

(float, float) Tuple (mBFP_neg, mmCherry_neg).

Source code in scripts/step_1c_fit_hill_curves.py

def compute_nfc_background(nfc_dir: str):
    """
    Compute NFC background medians (mean of first 3 files).

    Parameters
    ----------
    nfc_dir : str
        Directory of NFC .fcs files.

    Returns
    -------
    (float, float)
        Tuple (mBFP_neg, mmCherry_neg).
    """
    df = load_flowset_medians(nfc_dir)  # NFC medians
    print(f"[NFC] files={len(df)}; using first 3 for background")  # debug
    mBFP_neg = float(df.iloc[:3][CH_BFP].mean())  # BFP background
    mmCherry_neg = float(df.iloc[:3][CH_mCh].mean())  # mCherry background
    print(f"[NFC] mBFP_neg={mBFP_neg:.6g}, mmCherry_neg={mmCherry_neg:.6g}")  # debug
    return mBFP_neg, mmCherry_neg  # return tuple

fit_hill ¶

fit_hill(R_vals, y_vals, start=(0.1, 1.0))

Fit Hill function to data using nonlinear least squares.

Parameters¶

R_vals : array-like Normalized repressor levels (predictor). y_vals : array-like Fold-change (response). start : (float, float), optional Initial guesses (K, n), by default (0.1, 1.0).

Returns¶

(np.ndarray, np.ndarray | None) (popt, pcov) where popt=[K, n]; pcov is covariance matrix or None.

Raises¶

RuntimeError If fewer than 3 finite data points are available.

Source code in scripts/step_1c_fit_hill_curves.py

def fit_hill(R_vals, y_vals, start=(0.1, 1.0)):
    """
    Fit Hill function to data using nonlinear least squares.

    Parameters
    ----------
    R_vals : array-like
        Normalized repressor levels (predictor).
    y_vals : array-like
        Fold-change (response).
    start : (float, float), optional
        Initial guesses (K, n), by default (0.1, 1.0).

    Returns
    -------
    (np.ndarray, np.ndarray | None)
        (popt, pcov) where popt=[K, n]; pcov is covariance matrix or None.

    Raises
    ------
    RuntimeError
        If fewer than 3 finite data points are available.
    """
    R_vals = np.asarray(R_vals, dtype=float)  # to float array
    y_vals = np.asarray(y_vals, dtype=float)  # to float array
    m = np.isfinite(R_vals) & np.isfinite(y_vals)  # finite mask
    R_vals, y_vals = R_vals[m], y_vals[m]  # filter
    print(f"[fit] hill points used: {len(R_vals)}")  # debug
    if len(R_vals) < 3:  # guard
        raise RuntimeError("Not enough points for Hill fit.")
    popt, pcov = curve_fit(hill_func, R_vals, y_vals,  # LM fit (like nlsLM)
                           p0=np.array(start, float), maxfev=20000)
    print(f"[fit] K={popt[0]:.6g}, n={popt[1]:.6g}")  # debug
    return popt, pcov  # params + covariance

hill_func ¶

hill_func(R, K, n)

Hill repression function

y = K^n / (K^n + R^n)

Parameters¶

R : array-like Repressor proxy (normalized BFP). K : float Half-maximal constant. n : float Hill coefficient.

Returns¶

np.ndarray Predicted normalized reporter level.

Source code in scripts/step_1c_fit_hill_curves.py

def hill_func(R, K, n):
    """
    Hill repression function:
      y = K^n / (K^n + R^n)

    Parameters
    ----------
    R : array-like
        Repressor proxy (normalized BFP).
    K : float
        Half-maximal constant.
    n : float
        Hill coefficient.

    Returns
    -------
    np.ndarray
        Predicted normalized reporter level.
    """
    return (K ** n) / (K ** n + np.power(R, n))  # vectorized equation

load_flowset_medians ¶

load_flowset_medians(folder)

Load all .fcs files under a folder and compute gated medians.

Parameters¶

folder : str Directory path containing .fcs files.

Returns¶

pd.DataFrame One row per file with medians and '__filename'.

Raises¶

FileNotFoundError If no .fcs files were found.

Source code in scripts/step_1c_fit_hill_curves.py

def load_flowset_medians(folder: str) -> pd.DataFrame:
    """
    Load all .fcs files under a folder and compute gated medians.

    Parameters
    ----------
    folder : str
        Directory path containing .fcs files.

    Returns
    -------
    pd.DataFrame
        One row per file with medians and '__filename'.

    Raises
    ------
    FileNotFoundError
        If no .fcs files were found.
    """
    files = sorted(glob.glob(os.path.join(folder, "*.fcs")))  # discover FCS files
    print(f"[scan] {folder} → {len(files)} files")  # debug
    if not files:  # guard
        raise FileNotFoundError(f"No FCS files found in: {folder}")
    out = pd.DataFrame([median_channels_for_file(f) for f in files])  # compute medians
    print(f"[table] medians shape={out.shape}")  # debug
    return out  # return table

load_replicate ¶

load_replicate(folder, mBFP_neg, mmCherry_neg)

Load one replicate folder: medians → background subtraction → token parsing.

Parameters¶

folder : str Path to replicate folder with .fcs files. mBFP_neg : float NFC background for BFP. mmCherry_neg : float NFC background for mCherry.

Returns¶

pd.DataFrame Columns: [BV421-A, PE-A, plasmid, guide, dTAG]

Source code in scripts/step_1c_fit_hill_curves.py

def load_replicate(folder: str, mBFP_neg: float, mmCherry_neg: float) -> pd.DataFrame:
    """
    Load one replicate folder: medians → background subtraction → token parsing.

    Parameters
    ----------
    folder : str
        Path to replicate folder with .fcs files.
    mBFP_neg : float
        NFC background for BFP.
    mmCherry_neg : float
        NFC background for mCherry.

    Returns
    -------
    pd.DataFrame
        Columns: [BV421-A, PE-A, plasmid, guide, dTAG]
    """
    raw = load_flowset_medians(folder).copy()  # per-file medians
    print(f"[rep] {folder} shape={raw.shape}")  # debug
    raw[CH_BFP] = raw[CH_BFP] - mBFP_neg  # subtract BFP background
    raw[CH_mCh] = raw[CH_mCh] - mmCherry_neg  # subtract mCh background
    print(f"[rep] bg-sub head:\n{raw[[CH_BFP, CH_mCh]].head().to_string(index=False)}")  # preview
    parsed = raw["__filename"].apply(parse_filename_tokens).tolist()  # parse tokens
    parsed_df = pd.DataFrame(parsed, columns=["plasmid", "guide", "dTAG_token"])  # to frame
    df = pd.concat([raw[[CH_BFP, CH_mCh]], parsed_df], axis=1)  # combine
    df["dTAG"] = df["dTAG_token"].map(DTAG_MAP).astype(float)  # map to concentration
    df = df.drop(columns=["dTAG_token"])  # drop helper
    print(f"[rep] combined shape={df.shape}")  # debug
    return df  # return replicate table

median_channels_for_file ¶

median_channels_for_file(fpath)

Compute per-file medians after boundary + singlet gating.

Parameters¶

fpath : str Path to an .fcs file.

Returns¶

pd.Series Numeric medians per channel with '__filename' set to basename (sans extension).

Raises¶

ValueError If required channels are missing.

Source code in scripts/step_1c_fit_hill_curves.py

def median_channels_for_file(fpath: str) -> pd.Series:
    """
    Compute per-file medians after boundary + singlet gating.

    Parameters
    ----------
    fpath : str
        Path to an .fcs file.

    Returns
    -------
    pd.Series
        Numeric medians per channel with '__filename' set to basename (sans extension).

    Raises
    ------
    ValueError
        If required channels are missing.
    """
    print(f"[read] {os.path.basename(fpath)}")  # which file
    sample = FCMeasurement(ID=os.path.basename(fpath), datafile=fpath).data  # read FCS
    print(f"[read] events={len(sample):,}, cols={len(sample.columns)}")  # size
    for ch in (CH_FSC_A, CH_SSC_A, CH_FSC_H, CH_BFP, CH_mCh):  # channel check
        if ch not in sample.columns:
            raise ValueError(f"Channel '{ch}' not found in: {fpath}")
    gated = apply_boundary_gate(sample)  # boundary gate
    if len(gated) == 0:  # fallback
        print("[gate] boundary empty → using raw sample")
        gated = sample
    singlets = apply_singlet_gate(gated)  # singlet gate
    if len(singlets) == 0:  # fallback
        print("[gate] singlet empty  → using boundary-gated")
        singlets = gated
    s = singlets.median(numeric_only=True)  # medians
    print(f"[median] BFP~{s.get(CH_BFP, np.nan):.3g} | mCh~{s.get(CH_mCh, np.nan):.3g}")  # preview
    s["__filename"] = os.path.splitext(os.path.basename(fpath))[0]  # filename stem
    return s  # return series

parse_filename_tokens ¶

parse_filename_tokens(name)

Extract plasmid, guide, and plate-index token (for dTAG concentration).

Parameters¶

name : str Basename of file without extension.

Returns¶

(str, str, str) (plasmid, guide, dtag_idx_token)

Source code in scripts/step_1c_fit_hill_curves.py

def parse_filename_tokens(name: str):
    """
    Extract plasmid, guide, and plate-index token (for dTAG concentration).

    Parameters
    ----------
    name : str
        Basename of file without extension.

    Returns
    -------
    (str, str, str)
        (plasmid, guide, dtag_idx_token)
    """
    parts = FILENAME_SPLIT_RE.split(name)  # tokens
    plasmid = parts[0] if len(parts) > 0 else ""  # plasmid code
    guide = parts[1] if len(parts) > 1 else ""  # guide label 'G'/'N'
    dtag_token = parts[2] if len(parts) > 2 else ""  # token like 'dTAG11'
    m = re.search(r"([A-Za-z]+)(\d+)$", dtag_token)  # split alpha+digits
    dtag_idx = m.group(2) if m else dtag_token  # keep index digits
    return plasmid, guide, dtag_idx  # return tokens

save_hill_plot ¶

save_hill_plot(df, fit_params, out_pdf)

Save Hill fit plot (fc vs normalized BFP) to PDF.

Parameters¶

df : pd.DataFrame Data with columns 'norm.bfp' and 'fc'. fit_params : (float, float) Tuple (K, n) of fitted parameters. out_pdf : str Output PDF filename (saved under OUT_PATH).

Source code in scripts/step_1c_fit_hill_curves.py

def save_hill_plot(df: pd.DataFrame, fit_params, out_pdf: str):
    """
    Save Hill fit plot (fc vs normalized BFP) to PDF.

    Parameters
    ----------
    df : pd.DataFrame
        Data with columns 'norm.bfp' and 'fc'.
    fit_params : (float, float)
        Tuple (K, n) of fitted parameters.
    out_pdf : str
        Output PDF filename (saved under OUT_PATH).
    """
    K, n = fit_params  # unpack params
    tmin, tmax = float(df["norm.bfp"].min()), float(df["norm.bfp"].max())  # x-range
    curve = pd.DataFrame({"norm.bfp": np.linspace(tmin, tmax, 200)})  # grid
    curve["fc_fit"] = hill_func(curve["norm.bfp"], K, n)  # predictions
    p = (  # build plot
            ggplot(df, aes("norm.bfp", "fc"))
            + geom_point(size=0.8, alpha=0.4)
            + geom_line(data=curve, mapping=aes(x="norm.bfp", y="fc_fit"))
            + coord_cartesian(ylim=(0, 1))
            + labs(x="Normalized repressor level", y="Normalized reporter level")
            + scale_y_continuous()
            + theme_castuner_like()
    )
    out_path = os.path.join(OUT_PATH, out_pdf)  # destination
    p.save(out_path, width=PLOT_W, height=PLOT_H, units="in")  # write PDF
    print(f"[plot] saved: {out_path}")  # debug

theme_castuner_like ¶

theme_castuner_like()

Return a clean plot theme resembling the original R plots.

Source code in scripts/step_1c_fit_hill_curves.py

def theme_castuner_like():
    """Return a clean plot theme resembling the original R plots."""
    return theme_classic()

scripts.step_1c_fit_hill_curves¶

apply_boundary_gate ¶

Parameters¶

Returns¶

apply_singlet_gate ¶

Parameters¶

Returns¶

compute_nfc_background ¶

Parameters¶

Returns¶

fit_hill ¶

Parameters¶

Returns¶

Raises¶

hill_func ¶

Parameters¶

Returns¶

load_flowset_medians ¶

Parameters¶

Returns¶

Raises¶

load_replicate ¶

Parameters¶

Returns¶

median_channels_for_file ¶

Parameters¶

Returns¶

Raises¶

parse_filename_tokens ¶

Parameters¶

Returns¶

save_hill_plot ¶

Parameters¶

theme_castuner_like ¶

`scripts.step_1c_fit_hill_curves`¶