scripts.step_1c_fit_hill_curves

Fit Hill functions to dTAG-13 dose–response curves of the reporter–repressor.

This module

1) Loads FCS files for dose–response replicates (R1–R3) and NFC. 2) Applies boundary and singlet gates; subtracts NFC-derived backgrounds. 3) Maps plate indices to dTAG-13 concentrations; constructs day-4 steady state. 4) Computes fold-change relative to NTC and normalizes BFP (repressor) levels. 5) Fits Hill curves (fc vs normalized BFP) to estimate K (EC50-like) and Hill n. 6) Saves per-plasmid parameters (CSV) and diagnostic plots (PDFs).

Outputs

plots/Hill_dCas9.pdf plots/Hill_CasRx.pdf plots/Hill-HDAC4-dCas9.pdf plots/Hill-KRAB-dCas9.pdf plots/Hill-KRAB-Split-dCas9.pdf parameters/Hill_parameters.csv

apply_boundary_gate

apply_boundary_gate(df)

Apply coarse FSC/SSC boundary gate.

Parameters

df : pd.DataFrame Event table with FSC-A and SSC-A.

Returns

pd.DataFrame Subset of events within the rectangle gate.

Source code in scripts/step_1c_fit_hill_curves.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def apply_boundary_gate(df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply coarse FSC/SSC boundary gate.

    Parameters
    ----------
    df : pd.DataFrame
        Event table with FSC-A and SSC-A.

    Returns
    -------
    pd.DataFrame
        Subset of events within the rectangle gate.
    """
    m = (  # build mask within numeric limits
            (df[CH_FSC_A] >= BOUND_MIN[CH_FSC_A]) & (df[CH_FSC_A] <= BOUND_MAX[CH_FSC_A]) &
            (df[CH_SSC_A] >= BOUND_MIN[CH_SSC_A]) & (df[CH_SSC_A] <= BOUND_MAX[CH_SSC_A])
    )
    out = df.loc[m]  # filter events
    print(f"[gate] boundary: kept {len(out):,}/{len(df):,}")  # debug
    return out  # return gated events

apply_singlet_gate

apply_singlet_gate(df)

Keep singlets using FSC-H/FSC-A ratio window.

Parameters

df : pd.DataFrame Event table with FSC-H and FSC-A.

Returns

pd.DataFrame Events passing the singlet gate.

Source code in scripts/step_1c_fit_hill_curves.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def apply_singlet_gate(df: pd.DataFrame) -> pd.DataFrame:
    """
    Keep singlets using FSC-H/FSC-A ratio window.

    Parameters
    ----------
    df : pd.DataFrame
        Event table with FSC-H and FSC-A.

    Returns
    -------
    pd.DataFrame
        Events passing the singlet gate.
    """
    ratio = df[CH_FSC_H] / (df[CH_FSC_A].replace(0, np.nan))  # robust ratio
    m = (ratio >= SINGLET_RATIO_LOW) & (ratio <= SINGLET_RATIO_HIGH)  # in-window
    out = df.loc[m]  # filter
    print(f"[gate] singlet : kept {len(out):,}/{len(df):,}")  # debug
    return out  # singlets only

compute_nfc_background

compute_nfc_background(nfc_dir)

Compute NFC background medians (mean of first 3 files).

Parameters

nfc_dir : str Directory of NFC .fcs files.

Returns

(float, float) Tuple (mBFP_neg, mmCherry_neg).

Source code in scripts/step_1c_fit_hill_curves.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def compute_nfc_background(nfc_dir: str):
    """
    Compute NFC background medians (mean of first 3 files).

    Parameters
    ----------
    nfc_dir : str
        Directory of NFC .fcs files.

    Returns
    -------
    (float, float)
        Tuple (mBFP_neg, mmCherry_neg).
    """
    df = load_flowset_medians(nfc_dir)  # NFC medians
    print(f"[NFC] files={len(df)}; using first 3 for background")  # debug
    mBFP_neg = float(df.iloc[:3][CH_BFP].mean())  # BFP background
    mmCherry_neg = float(df.iloc[:3][CH_mCh].mean())  # mCherry background
    print(f"[NFC] mBFP_neg={mBFP_neg:.6g}, mmCherry_neg={mmCherry_neg:.6g}")  # debug
    return mBFP_neg, mmCherry_neg  # return tuple

fit_hill

fit_hill(R_vals, y_vals, start=(0.1, 1.0))

Fit Hill function to data using nonlinear least squares.

Parameters

R_vals : array-like Normalized repressor levels (predictor). y_vals : array-like Fold-change (response). start : (float, float), optional Initial guesses (K, n), by default (0.1, 1.0).

Returns

(np.ndarray, np.ndarray | None) (popt, pcov) where popt=[K, n]; pcov is covariance matrix or None.

Raises

RuntimeError If fewer than 3 finite data points are available.

Source code in scripts/step_1c_fit_hill_curves.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
def fit_hill(R_vals, y_vals, start=(0.1, 1.0)):
    """
    Fit Hill function to data using nonlinear least squares.

    Parameters
    ----------
    R_vals : array-like
        Normalized repressor levels (predictor).
    y_vals : array-like
        Fold-change (response).
    start : (float, float), optional
        Initial guesses (K, n), by default (0.1, 1.0).

    Returns
    -------
    (np.ndarray, np.ndarray | None)
        (popt, pcov) where popt=[K, n]; pcov is covariance matrix or None.

    Raises
    ------
    RuntimeError
        If fewer than 3 finite data points are available.
    """
    R_vals = np.asarray(R_vals, dtype=float)  # to float array
    y_vals = np.asarray(y_vals, dtype=float)  # to float array
    m = np.isfinite(R_vals) & np.isfinite(y_vals)  # finite mask
    R_vals, y_vals = R_vals[m], y_vals[m]  # filter
    print(f"[fit] hill points used: {len(R_vals)}")  # debug
    if len(R_vals) < 3:  # guard
        raise RuntimeError("Not enough points for Hill fit.")
    popt, pcov = curve_fit(hill_func, R_vals, y_vals,  # LM fit (like nlsLM)
                           p0=np.array(start, float), maxfev=20000)
    print(f"[fit] K={popt[0]:.6g}, n={popt[1]:.6g}")  # debug
    return popt, pcov  # params + covariance

hill_func

hill_func(R, K, n)
Hill repression function

y = K^n / (K^n + R^n)

Parameters

R : array-like Repressor proxy (normalized BFP). K : float Half-maximal constant. n : float Hill coefficient.

Returns

np.ndarray Predicted normalized reporter level.

Source code in scripts/step_1c_fit_hill_curves.py
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def hill_func(R, K, n):
    """
    Hill repression function:
      y = K^n / (K^n + R^n)

    Parameters
    ----------
    R : array-like
        Repressor proxy (normalized BFP).
    K : float
        Half-maximal constant.
    n : float
        Hill coefficient.

    Returns
    -------
    np.ndarray
        Predicted normalized reporter level.
    """
    return (K ** n) / (K ** n + np.power(R, n))  # vectorized equation

load_flowset_medians

load_flowset_medians(folder)

Load all .fcs files under a folder and compute gated medians.

Parameters

folder : str Directory path containing .fcs files.

Returns

pd.DataFrame One row per file with medians and '__filename'.

Raises

FileNotFoundError If no .fcs files were found.

Source code in scripts/step_1c_fit_hill_curves.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def load_flowset_medians(folder: str) -> pd.DataFrame:
    """
    Load all .fcs files under a folder and compute gated medians.

    Parameters
    ----------
    folder : str
        Directory path containing .fcs files.

    Returns
    -------
    pd.DataFrame
        One row per file with medians and '__filename'.

    Raises
    ------
    FileNotFoundError
        If no .fcs files were found.
    """
    files = sorted(glob.glob(os.path.join(folder, "*.fcs")))  # discover FCS files
    print(f"[scan] {folder}{len(files)} files")  # debug
    if not files:  # guard
        raise FileNotFoundError(f"No FCS files found in: {folder}")
    out = pd.DataFrame([median_channels_for_file(f) for f in files])  # compute medians
    print(f"[table] medians shape={out.shape}")  # debug
    return out  # return table

load_replicate

load_replicate(folder, mBFP_neg, mmCherry_neg)

Load one replicate folder: medians → background subtraction → token parsing.

Parameters

folder : str Path to replicate folder with .fcs files. mBFP_neg : float NFC background for BFP. mmCherry_neg : float NFC background for mCherry.

Returns

pd.DataFrame Columns: [BV421-A, PE-A, plasmid, guide, dTAG]

Source code in scripts/step_1c_fit_hill_curves.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def load_replicate(folder: str, mBFP_neg: float, mmCherry_neg: float) -> pd.DataFrame:
    """
    Load one replicate folder: medians → background subtraction → token parsing.

    Parameters
    ----------
    folder : str
        Path to replicate folder with .fcs files.
    mBFP_neg : float
        NFC background for BFP.
    mmCherry_neg : float
        NFC background for mCherry.

    Returns
    -------
    pd.DataFrame
        Columns: [BV421-A, PE-A, plasmid, guide, dTAG]
    """
    raw = load_flowset_medians(folder).copy()  # per-file medians
    print(f"[rep] {folder} shape={raw.shape}")  # debug
    raw[CH_BFP] = raw[CH_BFP] - mBFP_neg  # subtract BFP background
    raw[CH_mCh] = raw[CH_mCh] - mmCherry_neg  # subtract mCh background
    print(f"[rep] bg-sub head:\n{raw[[CH_BFP, CH_mCh]].head().to_string(index=False)}")  # preview
    parsed = raw["__filename"].apply(parse_filename_tokens).tolist()  # parse tokens
    parsed_df = pd.DataFrame(parsed, columns=["plasmid", "guide", "dTAG_token"])  # to frame
    df = pd.concat([raw[[CH_BFP, CH_mCh]], parsed_df], axis=1)  # combine
    df["dTAG"] = df["dTAG_token"].map(DTAG_MAP).astype(float)  # map to concentration
    df = df.drop(columns=["dTAG_token"])  # drop helper
    print(f"[rep] combined shape={df.shape}")  # debug
    return df  # return replicate table

median_channels_for_file

median_channels_for_file(fpath)

Compute per-file medians after boundary + singlet gating.

Parameters

fpath : str Path to an .fcs file.

Returns

pd.Series Numeric medians per channel with '__filename' set to basename (sans extension).

Raises

ValueError If required channels are missing.

Source code in scripts/step_1c_fit_hill_curves.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def median_channels_for_file(fpath: str) -> pd.Series:
    """
    Compute per-file medians after boundary + singlet gating.

    Parameters
    ----------
    fpath : str
        Path to an .fcs file.

    Returns
    -------
    pd.Series
        Numeric medians per channel with '__filename' set to basename (sans extension).

    Raises
    ------
    ValueError
        If required channels are missing.
    """
    print(f"[read] {os.path.basename(fpath)}")  # which file
    sample = FCMeasurement(ID=os.path.basename(fpath), datafile=fpath).data  # read FCS
    print(f"[read] events={len(sample):,}, cols={len(sample.columns)}")  # size
    for ch in (CH_FSC_A, CH_SSC_A, CH_FSC_H, CH_BFP, CH_mCh):  # channel check
        if ch not in sample.columns:
            raise ValueError(f"Channel '{ch}' not found in: {fpath}")
    gated = apply_boundary_gate(sample)  # boundary gate
    if len(gated) == 0:  # fallback
        print("[gate] boundary empty → using raw sample")
        gated = sample
    singlets = apply_singlet_gate(gated)  # singlet gate
    if len(singlets) == 0:  # fallback
        print("[gate] singlet empty  → using boundary-gated")
        singlets = gated
    s = singlets.median(numeric_only=True)  # medians
    print(f"[median] BFP~{s.get(CH_BFP, np.nan):.3g} | mCh~{s.get(CH_mCh, np.nan):.3g}")  # preview
    s["__filename"] = os.path.splitext(os.path.basename(fpath))[0]  # filename stem
    return s  # return series

parse_filename_tokens

parse_filename_tokens(name)

Extract plasmid, guide, and plate-index token (for dTAG concentration).

Parameters

name : str Basename of file without extension.

Returns

(str, str, str) (plasmid, guide, dtag_idx_token)

Source code in scripts/step_1c_fit_hill_curves.py
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
def parse_filename_tokens(name: str):
    """
    Extract plasmid, guide, and plate-index token (for dTAG concentration).

    Parameters
    ----------
    name : str
        Basename of file without extension.

    Returns
    -------
    (str, str, str)
        (plasmid, guide, dtag_idx_token)
    """
    parts = FILENAME_SPLIT_RE.split(name)  # tokens
    plasmid = parts[0] if len(parts) > 0 else ""  # plasmid code
    guide = parts[1] if len(parts) > 1 else ""  # guide label 'G'/'N'
    dtag_token = parts[2] if len(parts) > 2 else ""  # token like 'dTAG11'
    m = re.search(r"([A-Za-z]+)(\d+)$", dtag_token)  # split alpha+digits
    dtag_idx = m.group(2) if m else dtag_token  # keep index digits
    return plasmid, guide, dtag_idx  # return tokens

save_hill_plot

save_hill_plot(df, fit_params, out_pdf)

Save Hill fit plot (fc vs normalized BFP) to PDF.

Parameters

df : pd.DataFrame Data with columns 'norm.bfp' and 'fc'. fit_params : (float, float) Tuple (K, n) of fitted parameters. out_pdf : str Output PDF filename (saved under OUT_PATH).

Source code in scripts/step_1c_fit_hill_curves.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
def save_hill_plot(df: pd.DataFrame, fit_params, out_pdf: str):
    """
    Save Hill fit plot (fc vs normalized BFP) to PDF.

    Parameters
    ----------
    df : pd.DataFrame
        Data with columns 'norm.bfp' and 'fc'.
    fit_params : (float, float)
        Tuple (K, n) of fitted parameters.
    out_pdf : str
        Output PDF filename (saved under OUT_PATH).
    """
    K, n = fit_params  # unpack params
    tmin, tmax = float(df["norm.bfp"].min()), float(df["norm.bfp"].max())  # x-range
    curve = pd.DataFrame({"norm.bfp": np.linspace(tmin, tmax, 200)})  # grid
    curve["fc_fit"] = hill_func(curve["norm.bfp"], K, n)  # predictions
    p = (  # build plot
            ggplot(df, aes("norm.bfp", "fc"))
            + geom_point(size=0.8, alpha=0.4)
            + geom_line(data=curve, mapping=aes(x="norm.bfp", y="fc_fit"))
            + coord_cartesian(ylim=(0, 1))
            + labs(x="Normalized repressor level", y="Normalized reporter level")
            + scale_y_continuous()
            + theme_castuner_like()
    )
    out_path = os.path.join(OUT_PATH, out_pdf)  # destination
    p.save(out_path, width=PLOT_W, height=PLOT_H, units="in")  # write PDF
    print(f"[plot] saved: {out_path}")  # debug

theme_castuner_like

theme_castuner_like()

Return a clean plot theme resembling the original R plots.

Source code in scripts/step_1c_fit_hill_curves.py
 98
 99
100
def theme_castuner_like():
    """Return a clean plot theme resembling the original R plots."""
    return theme_classic()