i asked an llm to test it on the digits dataset and [here](<a href="https://imgur.com/a/XAt0VRU" rel="nofollow">https://imgur.com/a/XAt0VRU</a>) are the results.<p>```
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.datasets import load_digits
from sklearn.metrics import pairwise_distances
from sklearn.manifold import TSNE<p>try:
import umap
HAS_UMAP = True
except ImportError:
HAS_UMAP = False
print("Warning: 'umap-learn' not installed. Comparison will be skipped.")<p>class SineLandmarkReduction(BaseEstimator, TransformerMixin):
def __init__(self, n_components=2, n_landmarks=50,
mode='data_derived', # 'sine' or 'data_derived'
distance_warping=1.0,
random_state=42):
self.n_components = n_components
self.n_landmarks = n_landmarks
self.mode = mode
self.p = distance_warping
self.random_state = random_state
self.rng = np.random.RandomState(random_state)<p><pre><code> def _generate_sine_landmarks(self, n_features):
"""Generates the high-dim 'sine skeleton'."""
a = self.rng.uniform(0.5, 2.0, n_features)
omega = self.rng.uniform(0.5, 1.5, n_features)
phi = self.rng.uniform(0, 2 * np.pi, n_features)
t = np.linspace(0, 2 * np.pi, self.n_landmarks)
L_high = (a[:, None] * np.sin(omega[:, None] * t + phi[:, None])).T
return L_high
def fit(self, X, y=None):
self.scaler = StandardScaler()
X_scaled = self.scaler.fit_transform(X)
n_samples, n_features = X_scaled.shape
if self.mode == 'sine':
self.L_high = self._generate_sine_landmarks(n_features)
l_min, l_max = self.L_high.min(), self.L_high.max()
x_min, x_max = X_scaled.min(), X_scaled.max()
self.L_high = (self.L_high - l_min) / (l_max - l_min) * (x_max - x_min) + x_min
else: # 'data_derived'
indices = self.rng.choice(n_samples, self.n_landmarks, replace=False)
self.L_high = X_scaled[indices].copy()
self.pca_landmarks = PCA(n_components=self.n_components)
self.L_low = self.pca_landmarks.fit_transform(self.L_high)
self.L0_low = self.L_low[0]
self.L_others_low = self.L_low[1:]
self.A = 2 * (self.L_others_low - self.L0_low)
self.A_pinv = np.linalg.pinv(self.A)
self.L0_sq_norm = np.sum(self.L0_low**2)
self.Li_sq_norms = np.sum(self.L_others_low**2, axis=1)
return self
def transform(self, X):
X_scaled = self.scaler.transform(X)
D = pairwise_distances(X_scaled, self.L_high, metric='euclidean')
if self.p != 1.0:
D = np.power(D, self.p)
D_sq = D**2
d0_sq = D_sq[:, 0:1]
di_sq = D_sq[:, 1:]
term_dist = d0_sq - di_sq
term_geom = self.Li_sq_norms - self.L0_sq_norm
B = term_dist + term_geom
Y = np.dot(self.A_pinv, B.T).T
return Y
</code></pre>
if HAS_UMAP:
digits = load_digits()
X = digits.data
y = digits.target<p><pre><code> print(f"Dataset: Digits (N={X.shape[0]}, D={X.shape[1]})")
print("-" * 40)
start = time.time()
umap_reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, random_state=42)
X_umap = umap_reducer.fit_transform(X)
umap_time = time.time() - start
print(f"UMAP Time: {umap_time:.4f} s")
start = time.time()
tsne_reducer = TSNE(n_components=2, perplexity=30, random_state=42)
X_tsne = tsne_reducer.fit_transform(X)
tsne_time = time.time() - start
print(f"t-SNE Time: {tsne_time:.4f} s")
start = time.time()
slr = SineLandmarkReduction(n_landmarks=50, mode='data_derived', distance_warping=0.5)
X_slr = slr.fit_transform(X)
slr_time = time.time() - start
print(f"SLR Time: {slr_time:.4f} s")
print("-" * 40)
print(f"SLR vs UMAP Speedup: {umap_time / slr_time:.1f}x")
print(f"SLR vs t-SNE Speedup: {tsne_time / slr_time:.1f}x")
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
sc1 = axes[0].scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='Spectral', s=5, alpha=0.7)
axes[0].set_title(f"UMAP\nTime: {umap_time:.3f}s")
axes[0].axis('off')
sc2 = axes[1].scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='Spectral', s=5, alpha=0.7)
axes[1].set_title(f"t-SNE\nTime: {tsne_time:.3f}s")
axes[1].axis('off')
sc3 = axes[2].scatter(X_slr[:, 0], X_slr[:, 1], c=y, cmap='Spectral', s=5, alpha=0.7)
axes[2].set_title(f"Sine Landmark Reduction (SLR)\nTime: {slr_time:.3f}s")
axes[2].axis('off')
plt.tight_layout()
plt.savefig('comparison_plot.png', dpi=150, bbox_inches='tight')
print("\nPlot saved to comparison_plot.png")
plt.show()
</code></pre>
else:
print("Please install umap-learn to run the comparison: pip install umap-learn")
```