import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

np.random.seed(42)
%matplotlib inline
plt.rcParams.update({'figure.dpi':110, 'font.size':11})

# Optional: install cvxpy if not present
# !pip install cvxpy --quiet
try:
    import cvxpy as cp
    CVXPY = True
    print("cvxpy available ✓")
except ImportError:
    CVXPY = False
    print("cvxpy not found — CVX section will be skipped")

def generate_data(n=150, d=5, sigma=0.5, n_outliers=20, seed=0):
    """
    Generate a regression dataset with Gaussian outliers.

    Returns
    -------
    X        : ndarray (n, d)   design matrix (NO bias column)
    y        : ndarray (n,)     noisy targets
    beta_star: ndarray (d,)     true coefficient vector
    outlier_idx: ndarray (int)  indices of corrupted observations
    """
    # YOUR CODE HERE
    raise NotImplementedError


# Generate and inspect
X, y, beta_star, outlier_idx = generate_data()
print(f"X shape: {X.shape}, y shape: {y.shape}")
print(f"True beta: {beta_star}")
print(f"Number of outliers: {len(outlier_idx)}")

def huber_loss(beta, X, y, delta=1.0):
    """
    Mean Huber loss.

    Parameters
    ----------
    beta  : ndarray (d,)
    X     : ndarray (n, d)
    y     : ndarray (n,)
    delta : float  — transition point

    Returns
    -------
    loss : float
    """
    # YOUR CODE HERE
    raise NotImplementedError


def huber_gradient(beta, X, y, delta=1.0):
    """
    Gradient of the mean Huber loss w.r.t. beta.

    Returns
    -------
    grad : ndarray (d,)
    """
    # YOUR CODE HERE
    raise NotImplementedError


# --- Sanity checks (do not modify) ---
beta0 = np.zeros(X.shape[1])
assert np.isscalar(ols_loss(beta0, X, y)),    "OLS loss must return a scalar"
assert np.isscalar(huber_loss(beta0, X, y)),  "Huber loss must return a scalar"
assert huber_gradient(beta0, X, y).shape == beta0.shape, "Gradient shape mismatch"
print("Loss function checks passed ✓")

# Q2.3 — Plot h_delta(r) for delta in {0.5, 1.0, 2.0} and compare with |r|
r = np.linspace(-4, 4, 400)

# YOUR CODE HERE
# Plot h_delta(r) and |r| on the same axes

def subgradient_descent_huber(X, y, delta=1.0, eta0=0.1, n_iter=500):
    """
    Subgradient descent for Huber regression.

    Parameters
    ----------
    X      : ndarray (n, d)
    y      : ndarray (n,)
    delta  : float    Huber threshold
    eta0   : float    initial step-size
    n_iter : int      number of iterations

    Returns
    -------
    beta_hist : ndarray (n_iter+1, d)  parameter history (including init)
    loss_hist : ndarray (n_iter,)      Huber loss at each step
    """
    # YOUR CODE HERE
    raise NotImplementedError


beta_hist_sgd, loss_hist_sgd = subgradient_descent_huber(X, y)
print(f"Final Huber loss (subgradient): {loss_hist_sgd[-1]:.4f}")

# Q3.1 — Plot the convergence curve (loss vs iteration)
# YOUR CODE HERE

def solve_huber_cvxpy(X, y, delta=1.0):
    """
    Solve Huber regression using CVXPY.

    Returns
    -------
    beta_cvx : ndarray (d,)  or None if cvxpy unavailable
    """
    if not CVXPY:
        print("cvxpy not available — skipping")
        return None
    # YOUR CODE HERE
    # Hint: call prob.solve() with NO solver argument —
    # CVXPY will automatically select the best installed solver.
    # After solving, check prob.status before returning beta.value.
    raise NotImplementedError


beta_cvx = solve_huber_cvxpy(X, y)
if beta_cvx is not None:
    print(f"CVXPY solution:      {beta_cvx}")
    print(f"True beta:           {beta_star}")

CVXPY solution: [ 0.462  0.543  1.338 -1.775  1.732]
True beta:      [ 0.468  0.527  1.375 -1.815  1.739]

def irls_huber(X, y, delta=1.0, n_iter=50, tol=1e-6):
    """
    IRLS for Huber regression.

    Returns
    -------
    beta      : ndarray (d,)
    loss_hist : list of floats
    """
    # YOUR CODE HERE
    raise NotImplementedError


beta_irls, loss_hist_irls = irls_huber(X, y)
print(f"IRLS solution:   {beta_irls}")
print(f"True beta:       {beta_star}")
print(f"Final Huber loss (IRLS): {loss_hist_irls[-1]:.4f}")

IRLS solution:   [ 0.462  0.543  1.338 -1.775  1.732]
True beta:       [ 0.468  0.527  1.375 -1.815  1.739]
Final Huber loss (IRLS): 0.5309

# OLS closed-form
beta_ols = np.linalg.lstsq(X, y, rcond=None)[0]

# Q6.1 — Compute errors
for name, beta_hat in [('OLS', beta_ols), ('Huber-SGD', beta_hist_sgd[-1]),
                        ('Huber-IRLS', beta_irls)]:
    mse  = np.mean((beta_hat - beta_star)**2)
    loss = ols_loss(beta_hat, X, y)
    print(f"{name:12s} | beta MSE: {mse:.4f} | OLS loss: {loss:.4f}")

# Q6.2 — 2-D plot
# YOUR CODE HERE

Method       | beta MSE
----------------------------------------
Huber-SGD    | 0.0197  
Huber-IRLS   | 0.0007

# YOUR CODE HERE

E2 236 Foundations of ML¶

Lab 3 Optimization¶

Part A — Robust Regression with Huber Loss via Convex Optimization¶

Imports and Setup¶

1. Data Generation¶

2. Loss Functions¶

Huber loss¶

3. Subgradient Descent¶

4. Solving with CVXPY (Convex Solver)¶

5. IRLS for Huber Regression¶

6. Comparison¶

7. Effect of Delta¶