Experiment 023 — No-Till vs Tilled 16S Sampling

Rendered from exp-023-notill-sampling.ipynb

Experiment 023 — No-Till vs Tilled 16S Sampling

Extends Exp 004 (sequencing noise / rarefaction) to compare sampling strategies for no-till (high diversity) vs tilled (low diversity) soil microbiome communities. Answers:

Does the saturation depth differ between soil management regimes?
Does aggregate stability affect effective sampling?
What is the minimum depth to reliably distinguish the two communities?

Method:

Generate two synthetic communities: no-till (150 genera, high evenness) and tilled (100 genera, lower evenness / more dominant species)
Run rarefaction curves for both at multiple depths
Compare Shannon diversity convergence depths
Compare Chao1 richness estimation accuracy at various depths
Determine minimum depth to reliably distinguish the two communities

Cross-spring: wetSpring (16S microbiome pipeline).

Domain: Soil Science Faculty: Cross-spring (wetSpring) Reference: Cross-spring: wetSpring 16S pipeline

Data source: control/notill_sampling/notill_sampling.py + benchmark_*.json

This notebook is the publication-grade Python baseline for Experiment 023. The identical computations are validated in Rust (see validate_* binary) and delegated to barraCuda for GPU acceleration.

import json
import math
import sys
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt

# Wire path to groundSpring control/ for common utilities
CONTROL = Path('..') / '..' / 'control'
sys.path.insert(0, str(CONTROL))
from common import *  # noqa: F403 — validation harness

# Load benchmark data
benchmark_path = CONTROL / 'notill_sampling' / 'benchmark_notill_sampling.json'
with open(benchmark_path) as f:
    benchmark = json.load(f)

PASS_COLOR = '#2ecc71'
FAIL_COLOR = '#e74c3c'
INFO_COLOR = '#3498db'

print(f'Loaded benchmark: benchmark_notill_sampling.json')
print(f'Provenance: {benchmark.get("_provenance", {})}')

Model Implementation

def generate_community(
    n_genera: int,
    log_normal_mu: float,
    log_normal_sigma: float,
    seed: int = 42,
) -> dict:
    """Generate a synthetic community from log-normal abundance distribution.

    Uses log-normal to produce realistic rank-abundance curves.
    Higher sigma -> more uneven (dominant species); lower sigma -> higher evenness.
    """
    rng = np.random.default_rng(seed)
    raw = rng.lognormal(mean=log_normal_mu, sigma=log_normal_sigma, size=n_genera)
    raw = np.maximum(raw, 1e-12)  # avoid zeros
    abundances = raw / raw.sum()
    return {
        "n_genera": n_genera,
        "true_abundances": abundances,
    }

def compute_shannon(counts: np.ndarray) -> float:
    """Shannon diversity index H' = -Sigma(p_i ln p_i)."""
    total = counts.sum()
    if total == 0:
        return 0.0
    proportions = counts[counts > 0] / total
    return float(-np.sum(proportions * np.log(proportions)))


def chao1(counts: np.ndarray) -> float:
    """Chao1 non-parametric richness estimator.

    S_chao1 = S_obs + f1^2 / (2*f2)
    When f2=0 and f1>0: S_obs + f1*(f1-1)/2  (bias-corrected, Chao 1984).
    """
    s_obs = int(np.sum(counts > 0))
    f1 = int(np.sum(counts == 1))
    f2 = int(np.sum(counts == 2))

    if f2 > 0:
        return s_obs + f1**2 / (2 * f2)
    if f1 > 0:
        return s_obs + f1 * (f1 - 1) / 2
    return float(s_obs)

def rarefaction_at_depth(
    community: dict,
    depth: int,
    n_replicates: int,
    base_seed: int,
) -> dict:
    """Run rarefaction at a specific sequencing depth."""
    rng = np.random.default_rng(base_seed + depth)
    true_abund = community["true_abundances"]

    shannon_values: list[float] = []
    chao1_values: list[float] = []

    for _ in range(n_replicates):
        counts = rng.multinomial(depth, true_abund)
        shannon_values.append(compute_shannon(counts))
        chao1_values.append(chao1(counts))

    return {
        "depth": depth,
        "shannon_mean": float(np.mean(shannon_values)),
        "shannon_std": float(np.std(shannon_values)),
        "chao1_mean": float(np.mean(chao1_values)),
        "chao1_std": float(np.std(chao1_values)),
    }

def find_saturation_depth(
    rarefaction_results: list[dict],
    true_shannon: float,
    threshold_pct: float = 5.0,
) -> int:
    """Find depth where Shannon stabilizes within threshold_pct of true."""
    for result in rarefaction_results:
        obs_h = result["shannon_mean"]
        if true_shannon > 0:
            pct_diff = abs(obs_h - true_shannon) / true_shannon * 100
            if pct_diff <= threshold_pct:
                return int(result["depth"])
    return -1

Validation

Initialization

print("groundSpring Exp 023: No-Till vs Tilled 16S Sampling Design")
print("  Cross-spring: wetSpring (16S microbiome pipeline)")

communities_config = benchmark["communities"]
rarefaction_config = benchmark["rarefaction"]
expected = benchmark["expected"]

depths = rarefaction_config["depths"]
n_replicates = rarefaction_config["n_replicates"]
base_seed = rarefaction_config["seed"]

Synthetic Communities

notill_cfg = communities_config["notill"]
tilled_cfg = communities_config["tilled"]

if "abundances" in notill_cfg:
    notill = {"n_genera": notill_cfg["n_genera"], "true_abundances": np.array(notill_cfg["abundances"])}
else:
    notill = generate_community(
        notill_cfg["n_genera"],
        notill_cfg["log_normal_mu"],
        notill_cfg["log_normal_sigma"],
        seed=base_seed,
    )
if "abundances" in tilled_cfg:
    tilled = {"n_genera": tilled_cfg["n_genera"], "true_abundances": np.array(tilled_cfg["abundances"])}
else:
    tilled = generate_community(
        tilled_cfg["n_genera"],
        tilled_cfg["log_normal_mu"],
        tilled_cfg["log_normal_sigma"],
        seed=base_seed + 1000,
    )

True Shannon from high-resolution counts

notill_true_shannon = compute_shannon(
    (notill["true_abundances"] * 1e8).astype(np.int64)
)
tilled_true_shannon = compute_shannon(
    (tilled["true_abundances"] * 1e8).astype(np.int64)
)

print(f"  No-till: {notill['n_genera']} genera, true Shannon = {notill_true_shannon:.4f}")
print(f"  Tilled:  {tilled['n_genera']} genera, true Shannon = {tilled_true_shannon:.4f}")

Rarefaction Analysis

notill_results = []
tilled_results = []

for depth in depths:
    nr = rarefaction_at_depth(notill, depth, n_replicates, base_seed)
    tr = rarefaction_at_depth(tilled, depth, n_replicates, base_seed)
    notill_results.append(nr)
    tilled_results.append(tr)

    print(f"\n  Depth {depth:>6d} reads:")
    print(
        f"    No-till Shannon: {nr['shannon_mean']:.4f} +/- {nr['shannon_std']:.4f}, "
        f"Chao1: {nr['chao1_mean']:.1f} +/- {nr['chao1_std']:.1f}"
    )
    print(
        f"    Tilled  Shannon: {tr['shannon_mean']:.4f} +/- {tr['shannon_std']:.4f}, "
        f"Chao1: {tr['chao1_mean']:.1f} +/- {tr['chao1_std']:.1f}"
    )

Validate Expected Patterns

depth_to_notill = {r["depth"]: r for r in notill_results}
depth_to_tilled = {r["depth"]: r for r in tilled_results}

high_depth = max(depths)
notill_shannon_high = depth_to_notill[high_depth]["shannon_mean"]
tilled_shannon_high = depth_to_tilled[high_depth]["shannon_mean"]

check_true(
    "No-till has higher diversity than tilled at high depth",
    notill_shannon_high > tilled_shannon_high,
)

check_range(
    "No-till Shannon at high depth",
    notill_shannon_high,
    expected["notill_shannon_range"][0],
    expected["notill_shannon_range"][1],
)

check_range(
    "Tilled Shannon at high depth",
    tilled_shannon_high,
    expected["tilled_shannon_range"][0],
    expected["tilled_shannon_range"][1],
)

notill_chao1_high = depth_to_notill[high_depth]["chao1_mean"]
tilled_chao1_high = depth_to_tilled[high_depth]["chao1_mean"]
check_true(
    "No-till Chao1 higher than tilled at high depth",
    notill_chao1_high > tilled_chao1_high,
)

if 1000 in depth_to_notill:
    notill_shannon_1k = depth_to_notill[1000]["shannon_mean"]
    tilled_shannon_1k = depth_to_tilled[1000]["shannon_mean"]
    check_true(
        "Communities distinguishable at 1000 reads (no-till Shannon > tilled)",
        notill_shannon_1k > tilled_shannon_1k,
    )

sat_notill = find_saturation_depth(notill_results, notill_true_shannon)
sat_tilled = find_saturation_depth(tilled_results, tilled_true_shannon)

print("\n  Saturation depth (5% convergence):")
print(f"    No-till: {sat_notill} reads")
print(f"    Tilled:  {sat_tilled} reads")

check_range(
    "No-till saturation depth in expected range",
    sat_notill,
    expected["saturation_depth_notill_range"][0],
    expected["saturation_depth_notill_range"][1],
)

check_range(
    "Tilled saturation depth in expected range",
    sat_tilled,
    expected["saturation_depth_tilled_range"][0],
    expected["saturation_depth_tilled_range"][1],
)

Key Findings

print(f"\n{'=' * 72}")

Distinguishable at 1000 reads: yes

print(f"4. No-till Chao1 ({notill_chao1_high:.1f}) > Tilled Chao1 ({tilled_chao1_high:.1f})")

# Results: {pass_count()}/{total_count()} checks passed
print_summary("Exp 023: No-Till vs Tilled Sampling Design")

Visualization

# Publication-grade summary chart for Exp 023
fig, ax = plt.subplots(figsize=(8, 4))

p, f_count, t = pass_count(), fail_count(), total_count()
ax.barh(['Pass', 'Fail'], [p, f_count], color=[PASS_COLOR, FAIL_COLOR])
ax.set_xlim(0, max(t * 1.15, 1))
ax.set_title('Exp 023: No-Till vs Tilled 16S Sampling — Validation Results')
ax.set_xlabel('Check Count')
for i, v in enumerate([p, f_count]):
    if v > 0:
        ax.text(v + 0.3, i, str(v), va='center', fontweight='bold')

plt.tight_layout()
plt.savefig(f'/tmp/groundspring_exp023.png', dpi=150, bbox_inches='tight')
plt.show()
print(f'\nResult: {p}/{t} PASS, {f_count}/{t} FAIL')

Provenance & Summary

Field	Value
Experiment	023 — No-Till vs Tilled 16S Sampling
Domain	Soil Science
Reference	Cross-spring: wetSpring 16S pipeline
Faculty	Cross-spring (wetSpring)
Python baseline	`control/notill_sampling/notill_sampling.py`
Benchmark JSON	`control/notill_sampling/benchmark_notill_sampling.json`
Rust validator	`validate_*` binary (exit-code protocol)
Rust speedup	See benchmark comparison notebook
License	AGPL-3.0-or-later

Provenance chain: Python baseline → Rust validation → barraCuda GPU → metalForge cross-substrate → primal IPC composition

See primals.eco for rendered lab notebooks.