diff --git a/src/test/java/g2801_2900/s2882_drop_duplicate_rows/solution_test.py b/src/test/java/g2801_2900/s2882_drop_duplicate_rows/solution_test.py new file mode 100644 index 000000000..aaf5ae79b --- /dev/null +++ b/src/test/java/g2801_2900/s2882_drop_duplicate_rows/solution_test.py @@ -0,0 +1,72 @@ +import unittest +import pandas as pd +from pandas.testing import assert_frame_equal + +def dropDuplicateEmails(customers: pd.DataFrame) -> pd.DataFrame: + customers.drop_duplicates(subset='email', keep='first', inplace=True) + return customers + +class TestDropDuplicateEmails(unittest.TestCase): + + def test_no_duplicates(self): + data = { + 'customer_id': [1, 2, 3], + 'name': ['Ella', 'David', 'Zachary'], + 'email': ['emily@example.com', 'michael@example.com', 'sarah@example.com'] + } + customers = pd.DataFrame(data) + expected = pd.DataFrame(data) + + result = dropDuplicateEmails(customers).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected) + + def test_with_duplicates(self): + data = { + 'customer_id': [1, 2, 3, 4, 5, 6], + 'name': ['Ella', 'David', 'Zachary', 'Alice', 'Finn', 'Violet'], + 'email': [ + 'emily@example.com', 'michael@example.com', 'sarah@example.com', + 'john@example.com', 'john@example.com', 'alice@example.com' + ] + } + customers = pd.DataFrame(data) + + expected_data = { + 'customer_id': [1, 2, 3, 4, 6], + 'name': ['Ella', 'David', 'Zachary', 'Alice', 'Violet'], + 'email': ['emily@example.com', 'michael@example.com', 'sarah@example.com', 'john@example.com', 'alice@example.com'] + } + expected = pd.DataFrame(expected_data) + + result = dropDuplicateEmails(customers).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected) + + def test_empty_dataframe(self): + customers = pd.DataFrame(columns=['customer_id', 'name', 'email']) + expected = customers.copy() + + result = dropDuplicateEmails(customers).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected) + + def test_single_row(self): + data = { + 'customer_id': [1], + 'name': ['Ella'], + 'email': ['emily@example.com'] + } + customers = pd.DataFrame(data) + expected = pd.DataFrame(data) + + result = dropDuplicateEmails(customers).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected) + +if __name__ == '__main__': + unittest.main() diff --git a/src/test/java/g2801_2900/s2883_drop_missing_data/solution_test.py b/src/test/java/g2801_2900/s2883_drop_missing_data/solution_test.py new file mode 100644 index 000000000..c20bcf0bd --- /dev/null +++ b/src/test/java/g2801_2900/s2883_drop_missing_data/solution_test.py @@ -0,0 +1,94 @@ +import unittest +import pandas as pd +from pandas.testing import assert_frame_equal + +def dropMissingData(students: pd.DataFrame) -> pd.DataFrame: + r = pd.DataFrame(students) + r.dropna(subset='name', inplace=True) + return r + +class TestDropMissingData(unittest.TestCase): + + def test_no_missing_data(self): + # Input DataFrame with no missing values in the 'name' column + data = { + 'student_id': [32, 779, 849], + 'name': ['Piper', 'Georgia', 'Willow'], + 'age': [5, 20, 14] + } + students = pd.DataFrame(data) + expected = pd.DataFrame(data) + + result = dropMissingData(students).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected) + + def test_with_missing_data(self): + # Input DataFrame with missing values in the 'name' column + data = { + 'student_id': [32, 217, 779, 849], + 'name': ['Piper', None, 'Georgia', 'Willow'], + 'age': [5, 19, 20, 14] + } + students = pd.DataFrame(data) + + # Expected output after removing rows with missing 'name' + expected_data = { + 'student_id': [32, 779, 849], + 'name': ['Piper', 'Georgia', 'Willow'], + 'age': [5, 20, 14] + } + expected = pd.DataFrame(expected_data) + + result = dropMissingData(students).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected) + + def test_empty_dataframe(self): + # Input: Empty DataFrame + students = pd.DataFrame(columns=['student_id', 'name', 'age']) + expected = students.copy() + + result = dropMissingData(students).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected) + + def test_all_missing_data(self): + # Input DataFrame where all 'name' values are missing + data = { + 'student_id': [217, 301], + 'name': [None, None], + 'age': [19, 21] + } + students = pd.DataFrame(data) + + # Expected: empty DataFrame since all 'name' values are missing + expected = pd.DataFrame(columns=['student_id', 'name', 'age']) + + result = dropMissingData(students).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected, check_dtype=False) + + def test_single_row_with_missing_name(self): + # Input DataFrame with a single row and missing 'name' + data = { + 'student_id': [217], + 'name': [None], + 'age': [19] + } + students = pd.DataFrame(data) + + # Expected: empty DataFrame since the single row has missing 'name' + expected = pd.DataFrame(columns=['student_id', 'name', 'age']) + + result = dropMissingData(students).reset_index(drop=True) + expected = expected.reset_index(drop=True) + + assert_frame_equal(result, expected, check_dtype=False) + +if __name__ == '__main__': + unittest.main()