diff --git a/community/py3-niaarmts/APKBUILD b/community/py3-niaarmts/APKBUILD index 383a75c393e..81aad40d8c4 100644 --- a/community/py3-niaarmts/APKBUILD +++ b/community/py3-niaarmts/APKBUILD @@ -2,7 +2,7 @@ # Maintainer: Iztok Fister, Jr. pkgname=py3-niaarmts pkgver=0.2.6 -pkgrel=0 +pkgrel=1 pkgdesc="Time series numerical association rule mining variants" url="https://github.com/firefly-cpp/NiaARMTS" arch="noarch" @@ -11,7 +11,9 @@ depends="python3 py3-matplotlib py3-niapy py3-numpy py3-pandas" checkdepends="py3-pytest-xdist" makedepends="py3-gpep517 py3-poetry-core" subpackages="$pkgname-doc $pkgname-pyc" -source="https://github.com/firefly-cpp/NiaARMTS/archive/$pkgver/niaarmts-$pkgver.tar.gz" +source="https://github.com/firefly-cpp/NiaARMTS/archive/$pkgver/niaarmts-$pkgver.tar.gz + pandas.patch + " builddir="$srcdir/NiaARMTS-$pkgver" build() { @@ -37,4 +39,5 @@ package() { sha512sums=" 59f94b4791fa8fae3cc9cb2d16a0c5bea326906d714e126165497112de9b99f0fb87a7403aa09a79eacb09282916bba3b3870a9a65dd29d504bc5a92959e95cf niaarmts-0.2.6.tar.gz +246abac646f0a42e7e5298ee891c08d16721ffa760ef3bbc413790deda12e30d28d2976dffd17a03611877625029fd1940115e6ec9b659f9e38cc08741f7cd44 pandas.patch " diff --git a/community/py3-niaarmts/pandas.patch b/community/py3-niaarmts/pandas.patch new file mode 100644 index 00000000000..da39f5f9f25 --- /dev/null +++ b/community/py3-niaarmts/pandas.patch @@ -0,0 +1,105 @@ +diff --git a/niaarmts/feature.py b/niaarmts/feature.py +index 106bdfb..e6af172 100644 +--- a/niaarmts/feature.py ++++ b/niaarmts/feature.py +@@ -1,5 +1,10 @@ + import pandas as pd +-import numpy as np ++from pandas.api.types import ( ++ is_datetime64_any_dtype, ++ is_numeric_dtype, ++ is_object_dtype, ++ is_string_dtype, ++) + + class Feature: + def __init__(self, data: pd.DataFrame): +@@ -22,7 +27,7 @@ class Feature: + for column in self.data.columns: + col_data = self.data[column] + +- if np.issubdtype(col_data.dtype, np.number): ++ if is_numeric_dtype(col_data.dtype): + summary[column] = { + 'type': 'Numerical', + 'min': col_data.min(), +@@ -30,13 +35,13 @@ class Feature: + 'mean': col_data.mean(), + 'std_dev': col_data.std(), + } +- elif col_data.dtype == 'object': ++ elif self._is_categorical(col_data): + summary[column] = { + 'type': 'Categorical', + 'unique_classes': col_data.nunique(), + 'classes': col_data.unique() + } +- elif np.issubdtype(col_data.dtype, np.datetime64): ++ elif is_datetime64_any_dtype(col_data.dtype): + summary[column] = { + 'type': 'Datetime', + 'min': col_data.min(), +@@ -55,7 +60,7 @@ class Feature: + + :return: A list of numerical feature names. + """ +- numerical_features = [col for col in self.data.columns if np.issubdtype(self.data[col].dtype, np.number)] ++ numerical_features = [col for col in self.data.columns if is_numeric_dtype(self.data[col].dtype)] + return numerical_features + + def get_categorical_features(self): +@@ -64,7 +69,7 @@ class Feature: + + :return: A list of categorical feature names. + """ +- categorical_features = [col for col in self.data.columns if self.data[col].dtype == 'object'] ++ categorical_features = [col for col in self.data.columns if self._is_categorical(self.data[col])] + return categorical_features + + def get_datetime_features(self): +@@ -73,7 +78,7 @@ class Feature: + + :return: A list of datetime feature names. + """ +- datetime_features = [col for col in self.data.columns if np.issubdtype(self.data[col].dtype, np.datetime64)] ++ datetime_features = [col for col in self.data.columns if is_datetime64_any_dtype(self.data[col].dtype)] + return datetime_features + + def get_feature_stats(self, feature_name: str): +@@ -87,7 +92,7 @@ class Feature: + raise ValueError(f"Feature '{feature_name}' not found in the dataset.") + + col_data = self.data[feature_name] +- if np.issubdtype(col_data.dtype, np.number): ++ if is_numeric_dtype(col_data.dtype): + return { + 'type': 'Numerical', + 'min': col_data.min(), +@@ -95,13 +100,13 @@ class Feature: + 'mean': col_data.mean(), + 'std_dev': col_data.std(), + } +- elif col_data.dtype == 'object': ++ elif self._is_categorical(col_data): + return { + 'type': 'Categorical', + 'unique_classes': col_data.nunique(), + 'classes': col_data.unique(), + } +- elif np.issubdtype(col_data.dtype, np.datetime64): ++ elif is_datetime64_any_dtype(col_data.dtype): + return { + 'type': 'Datetime', + 'min': col_data.min(), +@@ -111,3 +116,11 @@ class Feature: + return { + 'type': 'Unknown', + } ++ ++ @staticmethod ++ def _is_categorical(series: pd.Series): ++ return ( ++ is_string_dtype(series.dtype) ++ or is_object_dtype(series.dtype) ++ or isinstance(series.dtype, pd.CategoricalDtype) ++ ) and not is_datetime64_any_dtype(series.dtype)