
List of publications and preprints by Tom Hanika
2023
- Stumme, G., D{{ü}}rrschnabel, D., Hanika, T.: Towards Ordinal Data Science, https://doi.org/10.48550/arXiv.2307.09477, (2023).
@misc{DBLP:journals/corr/abs-2307-09477,
author = {Stumme, Gerd and D{{ü}}rrschnabel, Dominik and Hanika, Tom},
journal = {CoRR},
keywords = {publist},
title = {Towards Ordinal Data Science},
volume = {abs/2307.09477},
year = 2023
}%0 Generic
%1 DBLP:journals/corr/abs-2307-09477
%A Stumme, Gerd
%A D{{ü}}rrschnabel, Dominik
%A Hanika, Tom
%D 2023
%J CoRR
%R 10.48550/arXiv.2307.09477
%T Towards Ordinal Data Science
%U https://doi.org/10.48550/arXiv.2307.09477
%V abs/2307.09477 - Hanika, T., Hirth, J.: Conceptual views on tree ensemble classifiers International Journal of Approximate Reasoning. 159, 108930 (2023).Random Forests and related tree-based methods are popular for supervised learning from table based data. Apart from their ease of parallelization, their classification performance is also superior. However, this performance, especially parallelizability, is offset by the loss of explainability. Statistical methods are often used to compensate for this disadvantage. Yet, their ability for local explanations, and in particular for global explanations, is limited. In the present work we propose an algebraic method, rooted in lattice theory, for the (global) explanation of tree ensembles. In detail, we introduce two novel conceptual views on tree ensemble classifiers and demonstrate their explanatory capabilities on Random Forests that were trained with standard parameters.
@article{HANIKA2023108930,
abstract = {Random Forests and related tree-based methods are popular for supervised learning from table based data. Apart from their ease of parallelization, their classification performance is also superior. However, this performance, especially parallelizability, is offset by the loss of explainability. Statistical methods are often used to compensate for this disadvantage. Yet, their ability for local explanations, and in particular for global explanations, is limited. In the present work we propose an algebraic method, rooted in lattice theory, for the (global) explanation of tree ensembles. In detail, we introduce two novel conceptual views on tree ensemble classifiers and demonstrate their explanatory capabilities on Random Forests that were trained with standard parameters.},
author = {Hanika, Tom and Hirth, Johannes},
journal = {International Journal of Approximate Reasoning},
keywords = {xai},
pages = 108930,
title = {Conceptual views on tree ensemble classifiers},
volume = 159,
year = 2023
}%0 Journal Article
%1 HANIKA2023108930
%A Hanika, Tom
%A Hirth, Johannes
%D 2023
%J International Journal of Approximate Reasoning
%P 108930
%R https://doi.org/10.1016/j.ijar.2023.108930
%T Conceptual views on tree ensemble classifiers
%U https://www.sciencedirect.com/science/article/pii/S0888613X23000610
%V 159
%X Random Forests and related tree-based methods are popular for supervised learning from table based data. Apart from their ease of parallelization, their classification performance is also superior. However, this performance, especially parallelizability, is offset by the loss of explainability. Statistical methods are often used to compensate for this disadvantage. Yet, their ability for local explanations, and in particular for global explanations, is limited. In the present work we propose an algebraic method, rooted in lattice theory, for the (global) explanation of tree ensembles. In detail, we introduce two novel conceptual views on tree ensemble classifiers and demonstrate their explanatory capabilities on Random Forests that were trained with standard parameters. - Stubbemann, M., Hille, T., Hanika, T.: Selecting Features by their Resilience to the Curse of Dimensionality (2023).
@article{stubbemann2023selecting,
author = {Stubbemann, Maximilian and Hille, Tobias and Hanika, Tom},
keywords = {selecting},
title = {Selecting Features by their Resilience to the Curse of Dimensionality},
year = 2023
}%0 Journal Article
%1 stubbemann2023selecting
%A Stubbemann, Maximilian
%A Hille, Tobias
%A Hanika, Tom
%D 2023
%T Selecting Features by their Resilience to the Curse of Dimensionality - Hirth, J., Horn, V., Stumme, G., Hanika, T.: Ordinal Motifs in Lattices, http://arxiv.org/abs/2304.04827, (2023).Lattices are a commonly used structure for the representation and analysis of relational and ontological knowledge. In particular, the analysis of these requires a decomposition of a large and high-dimensional lattice into a set of understandably large parts. With the present work we propose /ordinal motifs/ as analytical units of meaning. We study these ordinal substructures (or standard scales) through (full) scale-measures of formal contexts from the field of formal concept analysis. We show that the underlying decision problems are NP-complete and provide results on how one can incrementally identify ordinal motifs to save computational effort. Accompanying our theoretical results, we demonstrate how ordinal motifs can be leveraged to retrieve basic meaning from a medium sized ordinal data set.
@misc{hirth2023ordinal,
abstract = {Lattices are a commonly used structure for the representation and analysis of relational and ontological knowledge. In particular, the analysis of these requires a decomposition of a large and high-dimensional lattice into a set of understandably large parts. With the present work we propose /ordinal motifs/ as analytical units of meaning. We study these ordinal substructures (or standard scales) through (full) scale-measures of formal contexts from the field of formal concept analysis. We show that the underlying decision problems are NP-complete and provide results on how one can incrementally identify ordinal motifs to save computational effort. Accompanying our theoretical results, we demonstrate how ordinal motifs can be leveraged to retrieve basic meaning from a medium sized ordinal data set.},
author = {Hirth, Johannes and Horn, Viktoria and Stumme, Gerd and Hanika, Tom},
keywords = {publist},
title = {Ordinal Motifs in Lattices},
year = 2023
}%0 Generic
%1 hirth2023ordinal
%A Hirth, Johannes
%A Horn, Viktoria
%A Stumme, Gerd
%A Hanika, Tom
%D 2023
%R 10.48550/arXiv.2304.04827
%T Ordinal Motifs in Lattices
%U http://arxiv.org/abs/2304.04827
%X Lattices are a commonly used structure for the representation and analysis of relational and ontological knowledge. In particular, the analysis of these requires a decomposition of a large and high-dimensional lattice into a set of understandably large parts. With the present work we propose /ordinal motifs/ as analytical units of meaning. We study these ordinal substructures (or standard scales) through (full) scale-measures of formal contexts from the field of formal concept analysis. We show that the underlying decision problems are NP-complete and provide results on how one can incrementally identify ordinal motifs to save computational effort. Accompanying our theoretical results, we demonstrate how ordinal motifs can be leveraged to retrieve basic meaning from a medium sized ordinal data set. - Stubbemann, M., Hanika, T., Schneider, F.M.: Intrinsic Dimension for Large-Scale Geometric Learning Transactions on Machine Learning Research. (2023).The concept of dimension is essential to grasp the complexity of data. A naive approach to determine the dimension of a dataset is based on the number of attributes. More sophisticated methods derive a notion of intrinsic dimension (ID) that employs more complex feature functions, e.g., distances between data points. Yet, many of these approaches are based on empirical observations, cannot cope with the geometric character of contemporary datasets, and do lack an axiomatic foundation. A different approach was proposed by V. Pestov, who links the intrinsic dimension axiomatically to the mathematical concentration of measure phenomenon. First methods to compute this and related notions for ID were computationally intractable for large-scale real-world datasets. In the present work, we derive a computationally feasible method for determining said axiomatic ID functions. Moreover, we demonstrate how the geometric properties of complex data are accounted for in our modeling. In particular, we propose a principle way to incorporate neighborhood information, as in graph data, into the ID. This allows for new insights into common graph learning procedures, which we illustrate by experiments on the Open Graph Benchmark.
@article{stubbemann2022intrinsic,
abstract = {The concept of dimension is essential to grasp the complexity of data. A naive approach to determine the dimension of a dataset is based on the number of attributes. More sophisticated methods derive a notion of intrinsic dimension (ID) that employs more complex feature functions, e.g., distances between data points. Yet, many of these approaches are based on empirical observations, cannot cope with the geometric character of contemporary datasets, and do lack an axiomatic foundation. A different approach was proposed by V. Pestov, who links the intrinsic dimension axiomatically to the mathematical concentration of measure phenomenon. First methods to compute this and related notions for ID were computationally intractable for large-scale real-world datasets. In the present work, we derive a computationally feasible method for determining said axiomatic ID functions. Moreover, we demonstrate how the geometric properties of complex data are accounted for in our modeling. In particular, we propose a principle way to incorporate neighborhood information, as in graph data, into the ID. This allows for new insights into common graph learning procedures, which we illustrate by experiments on the Open Graph Benchmark.},
author = {Stubbemann, Maximilian and Hanika, Tom and Schneider, Friedrich Martin},
journal = {Transactions on Machine Learning Research},
keywords = {publist},
title = {Intrinsic Dimension for Large-Scale Geometric Learning},
year = 2023
}%0 Journal Article
%1 stubbemann2022intrinsic
%A Stubbemann, Maximilian
%A Hanika, Tom
%A Schneider, Friedrich Martin
%D 2023
%J Transactions on Machine Learning Research
%T Intrinsic Dimension for Large-Scale Geometric Learning
%U https://openreview.net/forum?id=85BfDdYMBY
%X The concept of dimension is essential to grasp the complexity of data. A naive approach to determine the dimension of a dataset is based on the number of attributes. More sophisticated methods derive a notion of intrinsic dimension (ID) that employs more complex feature functions, e.g., distances between data points. Yet, many of these approaches are based on empirical observations, cannot cope with the geometric character of contemporary datasets, and do lack an axiomatic foundation. A different approach was proposed by V. Pestov, who links the intrinsic dimension axiomatically to the mathematical concentration of measure phenomenon. First methods to compute this and related notions for ID were computationally intractable for large-scale real-world datasets. In the present work, we derive a computationally feasible method for determining said axiomatic ID functions. Moreover, we demonstrate how the geometric properties of complex data are accounted for in our modeling. In particular, we propose a principle way to incorporate neighborhood information, as in graph data, into the ID. This allows for new insights into common graph learning procedures, which we illustrate by experiments on the Open Graph Benchmark.