|
13 | 13 | from io import StringIO
|
14 | 14 |
|
15 | 15 | import numpy as np
|
| 16 | +import pandas as pd |
16 | 17 | import scipy.sparse as sp
|
17 | 18 |
|
18 | 19 | from Orange.data import Variable, ContinuousVariable, DiscreteVariable, \
|
@@ -698,6 +699,117 @@ def varcls_modified(self, name):
|
698 | 699 | var.have_time = 1
|
699 | 700 | return var
|
700 | 701 |
|
| 702 | + def test_additional_formats(self): |
| 703 | + expected_date = datetime(2022, 2, 7) |
| 704 | + dates = { |
| 705 | + "2021-11-25": ("2022-02-07",), |
| 706 | + "25.11.2021": ("07.02.2022", "07. 02. 2022", "7.2.2022", "7. 2. 2022"), |
| 707 | + "25.11.21": ("07.02.22", "07. 02. 22", "7.2.22", "7. 2. 22"), |
| 708 | + "11/25/2021": ("02/07/2022", "2/7/2022"), |
| 709 | + "11/25/21": ("02/07/22", "2/7/22"), |
| 710 | + "20211125": ("20220207",), |
| 711 | + } |
| 712 | + expected_date_time = datetime(2022, 2, 7, 10, 11, 12) |
| 713 | + date_times = { |
| 714 | + "2021-11-25 00:00:00": ( |
| 715 | + "2022-02-07 10:11:12", |
| 716 | + "2022-02-07 10:11:12.00", |
| 717 | + ), |
| 718 | + "25.11.2021 00:00:00": ( |
| 719 | + "07.02.2022 10:11:12", |
| 720 | + "07. 02. 2022 10:11:12", |
| 721 | + "7.2.2022 10:11:12", |
| 722 | + "7. 2. 2022 10:11:12", |
| 723 | + "07.02.2022 10:11:12.00", |
| 724 | + "07. 02. 2022 10:11:12.00", |
| 725 | + "7.2.2022 10:11:12.00", |
| 726 | + "7. 2. 2022 10:11:12.00", |
| 727 | + ), |
| 728 | + "25.11.21 00:00:00": ( |
| 729 | + "07.02.22 10:11:12", |
| 730 | + "07. 02. 22 10:11:12", |
| 731 | + "7.2.22 10:11:12", |
| 732 | + "7. 2. 22 10:11:12", |
| 733 | + "07.02.22 10:11:12.00", |
| 734 | + "07. 02. 22 10:11:12.00", |
| 735 | + "7.2.22 10:11:12.00", |
| 736 | + "7. 2. 22 10:11:12.00", |
| 737 | + ), |
| 738 | + "11/25/2021 00:00:00": ( |
| 739 | + "02/07/2022 10:11:12", |
| 740 | + "2/7/2022 10:11:12", |
| 741 | + "02/07/2022 10:11:12.00", |
| 742 | + "2/7/2022 10:11:12.00", |
| 743 | + ), |
| 744 | + "11/25/21 00:00:00": ( |
| 745 | + "02/07/22 10:11:12", |
| 746 | + "2/7/22 10:11:12", |
| 747 | + "02/07/22 10:11:12.00", |
| 748 | + "2/7/22 10:11:12.00", |
| 749 | + ), |
| 750 | + "20211125000000": ("20220207101112", "20220207101112.00"), |
| 751 | + } |
| 752 | + # times without seconds |
| 753 | + expected_date_time2 = datetime(2022, 2, 7, 10, 11, 0) |
| 754 | + date_times2 = { |
| 755 | + "2021-11-25 00:00:00": ("2022-02-07 10:11",), |
| 756 | + "25.11.2021 00:00:00": ( |
| 757 | + "07.02.2022 10:11", |
| 758 | + "07. 02. 2022 10:11", |
| 759 | + "7.2.2022 10:11", |
| 760 | + "7. 2. 2022 10:11", |
| 761 | + ), |
| 762 | + "25.11.21 00:00:00": ( |
| 763 | + "07.02.22 10:11", |
| 764 | + "07. 02. 22 10:11", |
| 765 | + "7.2.22 10:11", |
| 766 | + "7. 2. 22 10:11", |
| 767 | + ), |
| 768 | + "11/25/2021 00:00:00": ("02/07/2022 10:11", "2/7/2022 10:11"), |
| 769 | + "11/25/21 00:00:00": ("02/07/22 10:11", "2/7/22 10:11"), |
| 770 | + "20211125000000": ("202202071011",), |
| 771 | + } |
| 772 | + # datetime defaults to 1900, 01, 01 |
| 773 | + expected_time = datetime(1900, 1, 1, 10, 11, 12) |
| 774 | + times = { |
| 775 | + "00:00:00": ("10:11:12", "10:11:12.00"), |
| 776 | + "000000": ("101112", "101112.00"), |
| 777 | + } |
| 778 | + expected_time2 = datetime(1900, 1, 1, 10, 11, 0) |
| 779 | + times2 = { |
| 780 | + "00:00:00": ("10:11",), |
| 781 | + } |
| 782 | + expected_year = datetime(2022, 1, 1) |
| 783 | + years = { |
| 784 | + "2021": (2022,), |
| 785 | + } |
| 786 | + expected_day = datetime(1900, 2, 7) |
| 787 | + days = { |
| 788 | + "11-25": ("02-07",), |
| 789 | + "25.11.": ("07.02.", "07. 02.", "7.2.", "7. 2."), |
| 790 | + "11/25": ("02/07", "2/7"), |
| 791 | + } |
| 792 | + data = ( |
| 793 | + (expected_date, dates), |
| 794 | + (expected_date_time, date_times), |
| 795 | + (expected_date_time2, date_times2), |
| 796 | + (expected_time, times), |
| 797 | + (expected_time2, times2), |
| 798 | + (expected_year, years), |
| 799 | + (expected_day, days), |
| 800 | + ) |
| 801 | + for expected, dts in data: |
| 802 | + for k, dt in dts.items(): |
| 803 | + for t in dt: |
| 804 | + parsed = [ |
| 805 | + pd.to_datetime(t, format=f, errors="coerce") |
| 806 | + for f in TimeVariable.ADDITIONAL_FORMATS[k][0] |
| 807 | + ] |
| 808 | + # test any equal to expected |
| 809 | + self.assertTrue(any(d == expected for d in parsed)) |
| 810 | + # test that no other equal to any other date - only nan or expected |
| 811 | + self.assertTrue(any(d == expected or pd.isnull(d) for d in parsed)) |
| 812 | + |
701 | 813 |
|
702 | 814 | PickleContinuousVariable = create_pickling_tests(
|
703 | 815 | "PickleContinuousVariable",
|
|
0 commit comments