diff --git a/TODO b/TODO index 512ce34..35d58bf 100644 --- a/TODO +++ b/TODO @@ -3,5 +3,6 @@ - [ ] Add docstrings to constants. Simply add "///" instead of "//" ? Find how to transfert comments from doxygen xml to breathe - [x] Welcome page: better integrate code lines (`pip install...`). Also move section "Note for developpers" into a more appropriated section ? - [ ] Move objects description from "tutorials.rst" into "Usage" section -- [ ] Ensure that warnings are raise is non-existing keyword arguments are used in functions (e.g. Nbiteration in place of NbIteration) +- [ ] Ensure that warnings are raised if non-existing keyword arguments are used in functions (e.g. Nbiteration in place of NbIteration) - [ ] Have wrapper documentation be transferred to python module. +- [ ] Replace hard-coded relative paths by Path().parent etc. diff --git a/conda/environment.yml b/conda/environment.yml index fe410f9..c269c5d 100644 --- a/conda/environment.yml +++ b/conda/environment.yml @@ -7,6 +7,7 @@ dependencies: - setuptools_scm - cmake - boost + - doxygen - matplotlib-base - pip - pip: diff --git a/doc/api/index.rst b/doc/api/index.rst index 7571889..385e054 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -1,5 +1,6 @@ API Reference ============= + .. toctree:: :maxdepth: 2 :caption: AML API: diff --git a/doc/user/autosum.rst b/doc/user/autosum.rst index bff366b..5608ed3 100644 --- a/doc/user/autosum.rst +++ b/doc/user/autosum.rst @@ -6,6 +6,16 @@ Reference guide .. contents:: +C++ binding guide +================= +.. automodule:: openalea.stat_tool._stat_tool + :members: + :undoc-members: + :inherited-members: + :show-inheritance: + :private-members: + + Data structures =================== diff --git a/pyproject.toml b/pyproject.toml index 8ca35f1..73dd863 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,7 @@ doc = [ # section specific to conda-only distributed package (not used by pip yet) [tool.conda.environment] channels = ["openalea3", "conda-forge"] -dependencies = ["boost", "matplotlib-base", "doxygen"] +dependencies = ["boost", "matplotlib-base"] [project.urls] Repository = "https://github.com/openalea/stat_tool" diff --git a/src/cpp/stat_tool/discrete_parametric_process.cpp b/src/cpp/stat_tool/discrete_parametric_process.cpp index d9dd26a..84ae362 100644 --- a/src/cpp/stat_tool/discrete_parametric_process.cpp +++ b/src/cpp/stat_tool/discrete_parametric_process.cpp @@ -138,6 +138,7 @@ DiscreteParametricProcess::DiscreteParametricProcess(int inb_state , DiscretePar * \brief Copy of a DiscreteParametricProcess object. * * \param[in] process reference on a DiscreteParametricProcess object. + * \param[in] mass_copy flag on copying or not probabilities (mass) */ /*--------------------------------------------------------------*/ diff --git a/src/cpp/stat_tool/distribution.cpp b/src/cpp/stat_tool/distribution.cpp index 240cad5..0395458 100644 --- a/src/cpp/stat_tool/distribution.cpp +++ b/src/cpp/stat_tool/distribution.cpp @@ -583,10 +583,6 @@ ostream& Distribution::ascii_characteristic_print(ostream &os , bool shape , boo /*--------------------------------------------------------------*/ /** * \brief Set seed of random generator - * - * \param[in] seed - * - * \ */ /*--------------------------------------------------------------*/ diff --git a/src/cpp/stat_tool/vectors.cpp b/src/cpp/stat_tool/vectors.cpp index e120bea..94ad8a1 100644 --- a/src/cpp/stat_tool/vectors.cpp +++ b/src/cpp/stat_tool/vectors.cpp @@ -3506,7 +3506,7 @@ Vectors* Vectors::remove_variable_1() const * * \param[in] error reference on a StatError object, * \param[in] nb_summed_variable number of variables to be summed, - * \param[in] variable variable indices. + * \param[in] ivariable variable indices. * * \return Vectors object. */ diff --git a/src/openalea/stat_tool/compound.py b/src/openalea/stat_tool/compound.py index 3337009..adc7004 100644 --- a/src/openalea/stat_tool/compound.py +++ b/src/openalea/stat_tool/compound.py @@ -36,8 +36,8 @@ def Compound(*args, **kargs): elementary distribution or from an ASCII file. A compound (or stopped-sum) distribution is defined as the distribution - of the sum of n independent and identically distributed random variables :math:`X_i` - where `n` is the value taken by the random variable `N`. The distribution of N is referred + of the sum of `n` independent and identically distributed random variables :math:`X_i` + where `n` is the value taken by the random variable `N`. The distribution of :math:`N` is referred to as the sum distribution while the distribution of the :math:`X_i` is referred to as the elementary distribution. diff --git a/src/openalea/stat_tool/convolution.py b/src/openalea/stat_tool/convolution.py index 514496f..133211f 100644 --- a/src/openalea/stat_tool/convolution.py +++ b/src/openalea/stat_tool/convolution.py @@ -39,8 +39,8 @@ def Convolution(*args): """Construction of an object of type convolution from elementary distributions or from an ASCII file. - The distribution of the sum of independent random variables is the - convolution of the distributions of these elementary random variables. + The convolution of independent random variables is the distribution + of their sum. :Parameters: * dist1, dist2, ...(distribution, mixture, convolution, compound) - @@ -64,9 +64,9 @@ def Convolution(*args): :include-source: from openalea.stat_tool import * - sum_dist = Binomial(0,10,0.5) - dist = Binomial(0,15,0.2) - c = Convolution(sum_dist, dist) + dist1 = Binomial(0,10,0.5) + dist2 = Binomial(0,15,0.2) + c = Convolution(dist1, dist2) c.plot() diff --git a/src/openalea/stat_tool/distribution.py b/src/openalea/stat_tool/distribution.py index 20171b6..ac5db9c 100644 --- a/src/openalea/stat_tool/distribution.py +++ b/src/openalea/stat_tool/distribution.py @@ -117,7 +117,7 @@ def Distribution(utype, *args): elif utype in ["P", "POISSON"]: result = Poisson(*args) elif utype in ["M", "MULTINOMIAL"]: - raise NotImplementedError("Multinomial not yet implemented") + result = Multinomial(*args) elif utype in ["NB", "NEGATIVE_BINOMIAL"]: result = NegativeBinomial(*args) elif utype in ["U", "UNIFORM"]: @@ -135,7 +135,7 @@ def Binomial(inf_bound, sup_bound=I_DEFAULT, Construction of a binomial distribution :param float inf_bound: lower bound to the range of possible values (shift parameter) - :param float sup_bound: upper bound to the range of possilbe values + :param float sup_bound: upper bound to the range of possible values :param float proba: probability of `success` .. plot:: @@ -166,7 +166,7 @@ def Binomial(inf_bound, sup_bound=I_DEFAULT, def Poisson(inf_bound, param=D_DEFAULT): """ - Construction of a poisson distribution + Construction of a Poisson distribution :Parameters: * `inf_bound` (int) : lower bound to the range of possible values @@ -195,8 +195,12 @@ def Poisson(inf_bound, param=D_DEFAULT): def NegativeBinomial(inf_bound, param=D_DEFAULT, proba=D_DEFAULT): - """ + r""" Construction of a negative binomial distribution + The negative binomial distribution has the following parameterization: + + .. math:: + P(X-inf\_bound=i) = \frac{\Gamma(param+i)}{i! \Gamma(param)} p^{param} (1-p)^i :Parameters: * inf_bound (int) : lower bound to the range of possible values @@ -209,7 +213,7 @@ def NegativeBinomial(inf_bound, param=D_DEFAULT, :include-source: from openalea.stat_tool.distribution import NegativeBinomial - b = NegativeBinomial(1,2 ,.5) + b = NegativeBinomial(1,2,.5) b.plot(legend_size=12) """ @@ -233,7 +237,7 @@ def Uniform(inf_bound, sup_bound=I_DEFAULT): :Parameters: * inf_bound (int) : lower bound to the range of possible values (shift parameter) - * sup_bound (int) : upper bound to the range of possilbe values + * sup_bound (int) : upper bound to the range of possible values .. plot:: :width: 50% @@ -260,9 +264,27 @@ def Uniform(inf_bound, sup_bound=I_DEFAULT): inf_bound, sup_bound, param, proba, cumul_threshold) -def Multinomial(): - """to be done""" - raise NotImplementedError("Multinomial not yet implemented") +def Multinomial(probabilities): + r""" + + Construction of a categorical distribution. + A categorical distribution is implemented as a particular case + of the multinomial distribution :math:`{\mathcal{M}}(1; p_1, \ldots, p_K)` + + :Parameters: + * probabilities (list): list of probabilities :math:`p_1,\ldots,p_K` + + .. plot:: + :width: 50% + :include-source: + + from openalea.stat_tool.distribution import Multinomial + m = Multinomial([0.1, 0.6, .3]) + from openalea.stat_tool import Histogram + Histogram([m.simulation() for i in range(100)]).plot() + """ + + return _Distribution(probabilities) # Extend _DiscreteParametricModel interface.extend_class( _DiscreteParametricModel, interface.StatInterface) diff --git a/src/wrapper/export_distribution.cpp b/src/wrapper/export_distribution.cpp index bc76959..38793e9 100644 --- a/src/wrapper/export_distribution.cpp +++ b/src/wrapper/export_distribution.cpp @@ -50,6 +50,49 @@ class DistributionWrap { public: + static boost::shared_ptr distribution_from_mass(const boost::python::list& mass) + { + StatError error; + Distribution *d = NULL; + int nb_values = boost::python::len(mass); + double cumul = 0.; + bool status = true; + double *probs = NULL; + + probs = new double[nb_values]; + + for (int i = 0; i < nb_values; i++) { + extract x(mass[i]); + if (x.check()) { + probs[i] = x(); + if ((probs[i] < 0) || (probs[i] > 1)) { + status = false; + error.update(STAT_error[STATR_VALUE]); + ostringstream correction_message; + correction_message << " should be between 0 and 1."; + error.correction_update(STAT_word[STATW_PROBABILITY], + (correction_message.str()).c_str(), + 1, i+1);} + else + cumul += probs[i];} + else { + status = false; + error.update(STAT_error[STATR_VARIABLE_TYPE]);}} + if ((status) && ((cumul < CUMUL_THRESHOLD) || (cumul > 1.))) { + status = false; + error.update(STAT_parsing[STATP_PROBABILITY_SUM]); + } + + if (!status) { + stat_tool::wrap_util::throw_error(error); + return NULL; + } else { + d = new Distribution(nb_values, probs); + delete [] probs; + return boost::shared_ptr(d); + } + } + static MultiPlotSet* get_plotable_dists(const Distribution &p, const boost::python::list& dist_list) @@ -83,6 +126,15 @@ class DistributionWrap { return ret; } + static double + mass(const Distribution &p, int v) + { + if (v < p.offset | v >= p.nb_value) + return 0; + else + return p.mass[v]; + } + static MultiPlotSet* get_plotable(const Distribution &p) { @@ -118,6 +170,7 @@ void class_distribution() .def(init()) .def(init()) .def(init >()) + .def("__init__", make_constructor(DistributionWrap::distribution_from_mass)) .def(self_ns::str(self)) // __str__ .def( self == self ) @@ -140,6 +193,7 @@ void class_distribution() .def_readonly("get_nb_parameter", &Distribution::nb_parameter, "number of unknown parameters") + .def("mass", WRAP::mass, "return probability of a given value") .def("simulation", &Distribution::simulation, "simulate one realization") // no tested. is it useful ? diff --git a/test/test_compound_functional.py b/test/test_compound_functional.py index 1badf2f..a0b7488 100644 --- a/test/test_compound_functional.py +++ b/test/test_compound_functional.py @@ -25,28 +25,33 @@ Plot, Shift ) -# read data file def test(): - # cdist1 = Compound("data/compound1.cd") - # Plot(cdist1) - # - # chisto1 = Simulate(cdist1, 200) - # Plot(chisto1) - # - # histo30 = ExtractHistogram(chisto1, "Sum") - # Plot(histo30) - # - # histo30e = ExtractHistogram(chisto1, "Elementary") - # Plot(histo30e) - # - # - # cdist2 = Estimate(chisto1, "COMPOUND", - # ExtractDistribution(cdist1, "Elementary"), - # "Sum", MinInfBound=0) - # - # histo31 = ExtractHistogram(ExtractData(cdist2), "Sum") - # histo32 = ToHistogram(ExtractDistribution(cdist2, "Sum")) - # Plot(histo31, histo32) + # read data file + from pathlib import Path + this_dir = Path(__file__).parent if '__file__' in globals() else Path(".").resolve() + data_dir = this_dir.parent / "test" + data_path = data_dir / "data/compound1.cd" + cdist1 = Compound(str(data_path)) + + Plot(cdist1) + + chisto1 = Simulate(cdist1, 200) + Plot(chisto1) + + histo30 = ExtractHistogram(chisto1, "Sum") + Plot(histo30) + + histo30e = ExtractHistogram(chisto1, "Elementary") + Plot(histo30e) + + + cdist2 = Estimate(chisto1, "COMPOUND", + ExtractDistribution(cdist1, "Elementary"), + "Sum", MinInfBound=0) + + histo31 = ExtractHistogram(ExtractData(cdist2), "Sum") + histo32 = ToHistogram(ExtractDistribution(cdist2, "Sum")) + Plot(histo31, histo32) peup1 = Histogram(get_shared_data("peup1.his")) mixt4 = Estimate(peup1, "MIXTURE", "B", "NB") diff --git a/test/test_distribution.py b/test/test_distribution.py index 1bb93eb..fab47bd 100644 --- a/test/test_distribution.py +++ b/test/test_distribution.py @@ -196,18 +196,58 @@ def test_neg_binomial(self): m = d.simulate(1000).extract_model() assert isinstance(m, _DiscreteParametricModel) - def test_multimodial(self): - """multinomial not yet implemented""" + def test_multinomial(self): + """Test multinomial (i.e. categorical) distribution""" + set_seed(0) + p = [0.1, 0.6, 0.3] + d = Multinomial(p) + for i in range(len(p)): + assert p[i] == d.mass(i) + assert(Histogram([d.simulation() for i in range(100)])) + try: + d = Multinomial([0.1, 0.6, 0.29]) + except: + assert True + else: + assert False, "Bad sum for probabilties" + try: + d = Multinomial([0.1, 0.6, 0.31]) + except: + assert True + else: + assert False, "Bad sum for probabilties" + try: + d = Multinomial([]) + except: + assert True + else: + assert False, "Bad sum for probabilties" + try: + d = Multinomial([0.1, 0.6, "a"]) + except: + assert True + else: + assert False, "Bad type for probabilties" try: - Distribution("MULTINOMIAL", 10) - assert False + d = Multinomial([-0.1, 0.6, 0.4, 0.1]) except: assert True + else: + assert False, "Bad value for probabilties" try: - d = Multinomial() - assert False + d = Multinomial([1.1]) except: assert True + else: + assert False, "Bad value for probabilties" + + def test_mass(self): + """test mass function""" + dist = Binomial(1,15,0.2) + assert dist.mass(0) == 0 + assert dist.mass(16) == 0 + for i in range(1,16): + assert dist.mass(i) def test_simulation(self): set_seed(0) diff --git a/test/test_regression_functional.py b/test/test_regression_functional.py index 6bb9e2c..4141356 100644 --- a/test/test_regression_functional.py +++ b/test/test_regression_functional.py @@ -36,6 +36,10 @@ # ######################################################################### """ +try: + from .tools import DISABLE_PLOT +except ImportError: + from tools import DISABLE_PLOT from openalea.stat_tool import Vectors from openalea.stat_tool import ExtractHistogram @@ -60,7 +64,11 @@ def test(): pass else: return - vec1 = Vectors(Sequences("../examples/Sample/Sequences/hetre.seq")) + from pathlib import Path + this_dir = Path(__file__).parent if '__file__' in globals() else Path(".").resolve() + data_dir = this_dir.parent / "examples" + data_path = data_dir / "Sample/Sequences/hetre.seq" + vec1 = Vectors(Sequences(str(data_path))) Plot(ExtractHistogram(ValueSelect(vec1, 1, 0), 4), ExtractHistogram(ValueSelect(vec1, 1, 1), 4),