Source code for fivpy.srs

"""Module to perform forest inventories using Simple Random Sampling
"""

import math
import logging
import pandas as pd
from scipy import stats


logging.basicConfig(format='%(levelname)s - %(message)s', level=logging.DEBUG)


[docs]class RandomSampling: """Class to create a forest inventory object which uses Simple Random Sampling to estimate total wood volume""" def __init__(self, dataframe, unit_area, sampling_area, significance=95, sampling_error=20, deg_free=None): self.dataframe = dataframe self.unit_area = unit_area self.sampling_area = sampling_area self.significance = significance self.sampling_error = sampling_error self.deg_free = deg_free self.n_sampled_units = self.dataframe.groupby("units").count().shape[0] self._add_vol() self._get_deg_free() self.mean = self.dataframe.groupby("units").sum().mean()[["volume"]] self.std = self.dataframe.groupby("units").sum().std()[["volume"]] self.var_coef = self.std / self.mean * 100 self.total_units = self.sampling_area / self.unit_area self.t_value = stats.t.ppf(1 - (1 - (self.significance/100)) / 2, self.deg_free)
[docs] def _add_vol(self): try: self.get_vol() logging.warning('Adding colum with Volume.') except KeyError: if 'volume' in self.dataframe.columns: logging.warning('Volume column already exists.') return self.dataframe logging.warning('Adding columns with DBH and Volume.') self.dataframe["dbh"] = self.dataframe["cbh"] / math.pi self.get_vol() return self.dataframe
[docs] def get_vol(self, beta_0=0.000074, beta_1=1.707348, beta_2=1.16873): """Add a column with volume info to the dataframe Args: beta_0 (float, optional): first parameter. Defaults to 0.000074. beta_1 (float, optional): second parameter. Defaults to 1.707348. beta_2 (float, optional): third parameter. Defaults to 1.16873. Returns: dataframe (pd.DataFrame): A DataFrame containing volume info. Examples -------- >>> instance.get_vol() ### To change the equation used to calculate volume >>> instance.get_vol(beta_0=0.000025, beta_1=1.4568, beta_2=1.2563) """ self.dataframe["volume"] = beta_0 * (self.dataframe["dbh"] ** beta_1)\ * (self.dataframe["height"] ** beta_2) return self.dataframe
[docs] def _get_deg_free(self): # Get the degrees of freedom if self.deg_free is None: self.deg_free = self.n_sampled_units - 1 logging.warning("Degrees of freedom not informed." "Using default value: %s", self.deg_free) return self.deg_free
[docs] def get_sample_size(self): """Returns the estimated required sample size to perform the definitive inventory Returns: sample size (int): the required sample size Examples -------- >>> instance.get_sample_size() """ dividend_sample_size = ((self.t_value ** 2) * (self.var_coef ** 2)) divisor_sample_size = (self.sampling_error ** 2) + (((self.t_value\ ** 2) * (self.var_coef ** 2)) / self.total_units) sample_size = dividend_sample_size / divisor_sample_size return math.ceil(sample_size)
[docs] def srs_inventory(self): """Performs the Simple Random Sampling inventory Raises: ValueError: If the required sample size is greater than the number of units sampled, it is not possible to perform the inventory. Returns: dataframe (pd.DataFrame): A DataFrame containing the inventory results. Examples -------- >>> instance.srs_inventory() """ if self.get_sample_size() > self.n_sampled_units: raise ValueError("Required sample size is greater than " "the number of sampled units " f"{self.get_sample_size()}. " "It is necessary to obtain more data") logging.info("Calculating final inventory.") var = self.dataframe.groupby("units").sum().var()[["volume"]] # standardized error of mean - sem sem = math.sqrt(((var / self.n_sampled_units) * (1 -\ (self.n_sampled_units / self.total_units)))) total_vol = self.total_units * self.mean sample_error = sem * self.t_value sample_error_perc = (sample_error / self.mean) * 100 # CI true mean ic_up = self.mean + sample_error ic_lo = self.mean - sample_error # CI prod by ha mean_ha = self.mean * (1 / self.unit_area) ic_pop_up = (self.mean + sample_error) * (1 / self.unit_area) ic_pop_lo = (self.mean - sample_error) * (1 / self.unit_area) # CI vol total pop ic_pop_total = self.total_units * self.mean ic_pop_up_total = (self.total_units * self.mean) + \ (self.total_units * sample_error) ic_pop_lo_total = (self.total_units * self.mean) - \ (self.total_units * sample_error) t_value_uni = stats.t.ppf(1 - ((1 - (self.significance/100)) * 2) \ / 2, self.deg_free) # Minimum Reliable Estimate - mre mre = self.mean - (t_value_uni * sem) mre_population = (self.mean - (t_value_uni * sem)) * self.total_units results = {"Variables": ["Ym", "StdDev", "CV", "S²", "N samples", "N Total", "MeanErr", "Total V", "Err", "E(%)", "CI Supe", "CI Infe", "Ym/ha", "CI Supe ha", "CI Infe ha", "Ym/total", "CI Supe total", "CI Infe total", "MRE", "MRE Pop"], "Data": [self.mean.item(), self.std.item(), self.var_coef.item(), var.item(), self.deg_free, self.total_units, sem, total_vol.item(), sample_error, sample_error_perc.item(), ic_up.item(), ic_lo.item(), mean_ha.item(), ic_pop_up.item(), ic_pop_lo.item(), ic_pop_total.item(), ic_pop_up_total.item(), ic_pop_lo_total.item(), mre.item(), mre_population.item()]} return pd.DataFrame(data=results)