Skip to content

back to SWE-Agent summary

SWE-Agent: imbalanced-learn

Failed to run pytests for test tests

Pytest collection failure.

Patch diff

diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
index 8101f52..291c058 100644
--- a/imblearn/over_sampling/_smote/base.py
+++ b/imblearn/over_sampling/_smote/base.py
@@ -39,8 +39,8 @@ class BaseSMOTE(BaseOverSampler):
         """Check the NN estimators shared across the different SMOTE
         algorithms.
         """
-        pass
-
+        self.nn_ = check_neighbors_object('k_neighbors', self.k_neighbors, additional_neighbor=1)
+        self.nn_.set_params(**{'n_jobs': self.n_jobs})
     def _make_samples(self, X, y_dtype, y_type, nn_data, nn_num, n_samples, step_size=1.0, y=None):
         """A support function that returns artificial samples constructed along
         the line connecting nearest neighbours.
@@ -82,7 +82,15 @@ class BaseSMOTE(BaseOverSampler):
         y_new : ndarray of shape (n_samples_new,)
             Target values for synthetic samples.
         """
-        pass
+        random_state = check_random_state(self.random_state)
+        samples_indices = random_state.randint(low=0, high=nn_num.size, size=n_samples)
+        steps = step_size * random_state.uniform(size=n_samples)
+        rows = np.floor_divide(samples_indices, nn_num.shape[1])
+        cols = np.mod(samples_indices, nn_num.shape[1])
+
+        X_new, y_new = self._generate_samples(X, nn_data, nn_num, rows, cols, steps, y_type, y)
+
+        return X_new, y_new

     def _generate_samples(self, X, nn_data, nn_num, rows, cols, steps, y_type=None, y=None):
         """Generate a synthetic sample.
@@ -131,8 +139,23 @@ class BaseSMOTE(BaseOverSampler):
         -------
         X_new : {ndarray, sparse matrix} of shape (n_samples, n_features)
             Synthetically generated samples.
+
+        y_new : ndarray of shape (n_samples,)
+            Target values for synthetic samples.
         """
-        pass
+        n_samples, n_features = X.shape
+        X_new = np.zeros((steps.shape[0], n_features))
+        
+        if sparse.issparse(X):
+            for i, (row, col, step) in enumerate(zip(rows, cols, steps)):
+                X_new[i] = X[row].toarray() + step * (nn_data[nn_num[row, col]].toarray() - X[row].toarray())
+        else:
+            for i, (row, col, step) in enumerate(zip(rows, cols, steps)):
+                X_new[i] = X[row] + step * (nn_data[nn_num[row, col]] - X[row])
+        
+        y_new = np.full(steps.shape[0], fill_value=y_type)
+        
+        return X_new, y_new

     def _in_danger_noise(self, nn_estimator, samples, target_class, y, kind='danger'):
         """Estimate if a set of sample are in danger or noise.
@@ -166,7 +189,17 @@ class BaseSMOTE(BaseOverSampler):
         output : ndarray of shape (n_samples,)
             A boolean array where True refer to samples in danger or noise.
         """
-        pass
+        x = nn_estimator.kneighbors(samples, return_distance=False)[:, 1:]
+        nn_label = (y[x] != target_class).astype(int).sum(axis=1)
+        
+        if kind == 'danger':
+            # Samples are in danger if some but not all neighbors are of a different class
+            return (nn_label > 0) & (nn_label < x.shape[1])
+        elif kind == 'noise':
+            # Samples are noise if all neighbors are of a different class
+            return nn_label == x.shape[1]
+        else:
+            raise ValueError("'kind' should be either 'danger' or 'noise'.")

 @Substitution(sampling_strategy=BaseOverSampler._sampling_strategy_docstring, n_jobs=_n_jobs_docstring, random_state=_random_state_docstring)
 class SMOTE(BaseSMOTE):
@@ -581,4 +614,4 @@ class SMOTEN(SMOTE):

     def _validate_estimator(self):
         """Force to use precomputed distance matrix."""
-        pass
\ No newline at end of file
+        pass
diff --git a/imblearn/over_sampling/_smote/tests/test_smote.py b/imblearn/over_sampling/_smote/tests/test_smote.py
index 18feb55..a507341 100644
--- a/imblearn/over_sampling/_smote/tests/test_smote.py
+++ b/imblearn/over_sampling/_smote/tests/test_smote.py
@@ -6,4 +6,44 @@ from imblearn.over_sampling import SMOTE
 RND_SEED = 0
 X = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], [1.25192108, -0.22367336], [0.53366841, -0.30312976], [1.52091956, -0.49283504], [-0.28162401, -2.10400981], [0.83680821, 1.72827342], [0.3084254, 0.33299982], [0.70472253, -0.73309052], [0.28893132, -0.38761769], [1.15514042, 0.0129463], [0.88407872, 0.35454207], [1.31301027, -0.92648734], [-1.11515198, -0.93689695], [-0.18410027, -0.45194484], [0.9281014, 0.53085498], [-0.14374509, 0.27370049], [-0.41635887, -0.38299653], [0.08711622, 0.93259929], [1.70580611, -0.11219234]])
 Y = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0])
-R_TOL = 0.0001
\ No newline at end of file
+R_TOL = 0.0001
+
+def test_generate_samples():
+    smote = SMOTE(random_state=RND_SEED)
+    smote._validate_estimator()
+    X_new, y_new = smote._generate_samples(X, X, np.arange(len(X)), np.array([0, 1]), np.array([0.5, 0.5]), y_type=0)
+    
+    assert X_new.shape[0] == 2
+    assert y_new.shape[0] == 2
+    assert np.all(y_new == 0)
+
+def test_in_danger_noise():
+    smote = SMOTE(random_state=RND_SEED)
+    nn_estimator = NearestNeighbors(n_neighbors=6)
+    nn_estimator.fit(X)
+    
+    # Test 'danger' classification
+    danger_samples = smote._in_danger_noise(nn_estimator, X[Y==0], 0, Y, kind='danger')
+    assert isinstance(danger_samples, np.ndarray)
+    assert danger_samples.dtype == bool
+    
+    # Test 'noise' classification
+    noise_samples = smote._in_danger_noise(nn_estimator, X[Y==0], 0, Y, kind='noise')
+    assert isinstance(noise_samples, np.ndarray)
+    assert noise_samples.dtype == bool
+    
+    # Test invalid 'kind' parameter
+    try:
+        smote._in_danger_noise(nn_estimator, X[Y==0], 0, Y, kind='invalid')
+    except ValueError:
+        pass
+    else:
+        assert False, "ValueError not raised for invalid 'kind' parameter"
+
+def test_smote_fit_resample():
+    smote = SMOTE(random_state=RND_SEED)
+    X_resampled, y_resampled = smote.fit_resample(X, Y)
+    
+    assert X_resampled.shape[0] > X.shape[0]
+    assert y_resampled.shape[0] > Y.shape[0]
+    assert np.sum(y_resampled == 0) == np.sum(y_resampled == 1)