Forecasting

UANL

#+Name forecasting_uanl

import matplotlib.pyplot as plt
import statsmodels.api as sm
import numbers
import pandas as pd
from tabulate import tabulate
from statsmodels.stats.outliers_influence import summary_table
from typing import Tuple, Dict
import numpy as np


def print_tabulate(df: pd.DataFrame):
    print(tabulate(df, headers=df.columns, tablefmt="orgtbl"))

def transform_variable(df: pd.DataFrame, x:str)->pd.Series:
    if isinstance(df[x][0], numbers.Number):
        return df[x] # type: pd.Series
    else:
        return pd.Series([i for i in range(0, len(df[x]))])

def linear_regression(df: pd.DataFrame, x:str, y: str)->Dict[str, float]:
    fixed_x = transform_variable(df, x)
    model= sm.OLS(df[y],sm.add_constant(fixed_x)).fit()
    bands = pd.read_html(model.summary().tables[1].as_html(),header=0,index_col=0)[0]
    print_tabulate(pd.read_html(model.summary().tables[1].as_html(),header=0,index_col=0)[0])
    coef = pd.read_html(model.summary().tables[1].as_html(),header=0,index_col=0)[0]['coef']
    r_2_t = pd.read_html(model.summary().tables[0].as_html(),header=None,index_col=None)[0]
    return {'m': coef.values[1], 'b': coef.values[0], 'r2': r_2_t.values[0][3], 'r2_adj': r_2_t.values[1][3], 'low_band': bands['[0.025'][0], 'hi_band': bands['0.975]'][0]}

def plt_lr(df: pd.DataFrame, x:str, y: str, m: float, b: float, r2: float, r2_adj: float, low_band: float, hi_band: float, colors: Tuple[str,str]):
    fixed_x = transform_variable(df, x)
    df.plot(x=x,y=y, kind='scatter')
    plt.plot(df[x],[ m * x + b for _, x in fixed_x.items()], color=colors[0])
    plt.fill_between(df[x],
                     [ m * x  + low_band for _, x in fixed_x.items()],
                     [ m * x + hi_band for _, x in fixed_x.items()], alpha=0.2, color=colors[1])





df = pd.read_csv("csv/typed_uanl.csv") # type: pd.DataFrame
#print_tabulate(df.head(50))
df_by_sal = df.groupby("Fecha")\
              .aggregate(sueldo_mensual=pd.NamedAgg(column="Sueldo Neto", aggfunc=pd.DataFrame.mean))
df_by_sal.reset_index(inplace=True)
# df_by_sal["sueldo_mensual"] = df_by_sal["sueldo_mensual"]**10
#print_tabulate(df_by_sal.head(5))
a = linear_regression(df_by_sal, "Fecha", "sueldo_mensual")
plt_lr(df=df_by_sal, x="Fecha", y="sueldo_mensual", colors=('red', 'orange'), **a)

plt.xticks(rotation=90)
plt.savefig('img/lr_sueldo_mensual_Fecha_m.png')
plt.close()

	coef	std err	t	P>	t		[0.025	0.975]
const	10800	555.67	19.444	0	9652.1	12000
0	175.686	41.398	4.244	0	89.832	261.541

forecasting

#+Name forecasting

import matplotlib.pyplot as plt
import statsmodels.api as sm
import numbers
import pandas as pd
from tabulate import tabulate
from statsmodels.stats.outliers_influence import summary_table
from typing import Tuple, Dict
import numpy as np


def print_tabulate(df: pd.DataFrame):
    print(tabulate(df, headers=df.columns, tablefmt="orgtbl"))

def transform_variable(df: pd.DataFrame, x:str)->pd.Series:
    if isinstance(df[x][df.index[0]], numbers.Number):
        return df[x] # type: pd.Series
    else:
        return pd.Series([i for i in range(0, len(df[x]))])

def linear_regression(df: pd.DataFrame, x:str, y: str)->Dict[str, float]:
    fixed_x = transform_variable(df, x)
    model= sm.OLS(list(df[y]),sm.add_constant(fixed_x)).fit()
    bands = pd.read_html(model.summary().tables[1].as_html(),header=0,index_col=0)[0]
    #print_tabulate(pd.read_html(model.summary().tables[1].as_html(),header=0,index_col=0)[0])
    coef = pd.read_html(model.summary().tables[1].as_html(),header=0,index_col=0)[0]['coef']
    r_2_t = pd.read_html(model.summary().tables[0].as_html(),header=None,index_col=None)[0]
    return {'m': coef.values[1], 'b': coef.values[0], 'r2': r_2_t.values[0][3], 'r2_adj': r_2_t.values[1][3], 'low_band': bands['[0.025'][0], 'hi_band': bands['0.975]'][0]}

def plt_lr(df: pd.DataFrame, x:str, y: str, m: float, b: float, r2: float, r2_adj: float, low_band: float, hi_band: float, colors: Tuple[str,str]):
    fixed_x = transform_variable(df, x)
    plt.plot(df[x],[ m * x + b for _, x in fixed_x.items()], color=colors[0])
    plt.fill_between(df[x],
                     [ m * x  + low_band for _, x in fixed_x.items()],
                     [ m * x + hi_band for _, x in fixed_x.items()], alpha=0.2, color=colors[1])


def normalize_distribution(dist: np.array, n: int)->np.array:
    b = dist - min(dist) + 0.000001
    c = (b / np.sum(b)) * n
    return np.round(c)

begin_date = '2016-01-01'
end_date = '2023-01-01'
date_range = pd.date_range(start=begin_date, end=end_date, freq='1D')
norm_dist = np.random.standard_normal(len(date_range))
sales = normalize_distribution(norm_dist, 100*len(date_range))
df = pd.DataFrame({'Fecha': date_range, 'ventas': sales}) # type: pd.DataFrame
df.to_csv('csv/sales.csv', index=False)

full_df = pd.read_csv('csv/sales.csv')
print(f"full -> mean: {np.mean(full_df['ventas'])}, sd: {np.std(full_df['ventas'])}")
df = full_df.tail(50)
x = "Fecha"
y= "ventas"
full_df[full_df["Fecha"]>'2022-12-01'].plot(x=x,y=y, kind='scatter')
plt.xticks(rotation=90)
plt.savefig('img/full_ventas_Fecha_m.png')
plt.close()

df.plot(x=x,y=y, kind='scatter')
a = linear_regression(df, x,y)
plt_lr(df=df, x=x, y=y, colors=('red', 'orange'), **a)
a = linear_regression(df.tail(5), x,y)
plt_lr(df=df.tail(5), x=x, y=y, colors=('red', 'orange'), **a)
df_j = df[pd.to_datetime(df[x]).dt.dayofweek == 1]
print_tabulate(df_j)
a = linear_regression(df_j, x,y)
plt_lr(df=df_j, x=x, y=y, colors=('blue', 'blue'), **a)
#
plt.xticks(rotation=90)
plt.savefig('img/lr_ventas_Fecha_m.png')
plt.close()
df2 = full_df.loc[(pd.to_datetime(full_df[x])>='2019-11-11') & (pd.to_datetime(full_df[x]) < '2020-01-02')]
dfs = [
    ('50D', df),
    ('10D', df.tail(10)),
    ('5D', df.tail(5)),
    ('jueves', df[pd.to_datetime(df[x]).dt.dayofweek == 1]),
    ('50D-1Y', df2),
    ('10D-Y', df2.tail(10)),
    ('5D-Y', df2.tail(5)),
    ('jueves-Y', df2[pd.to_datetime(df2[x]).dt.dayofweek == 1]),
]
lrs = [(title, linear_regression(_df,x=x,y=y), len(_df)) for title, _df in dfs]
lrs_p = [(title, lr_dict["m"]*size  + lr_dict["b"], lr_dict) for title, lr_dict, size in lrs]
print(lrs_p)

full -> mean: 99.99413604378421, sd: 29.088195349566618 /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ”

	Fecha	ventas
2510	2022-11-15	98
2517	2022-11-22	100
2524	2022-11-29	95
2531	2022-12-06	130
2538	2022-12-13	129
2545	2022-12-20	115
2552	2022-12-27	74

/usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 7 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 7 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 7 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=10 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=10 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=10 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 7 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 7 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 7 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=10 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=10 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=10 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given. warn(“omni_normtest is not valid with less than 8 observations; %i ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=8 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=8 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” /usr/local/lib64/python3.9/site-packages/scipy/stats/_stats_py.py:1769: UserWarning: kurtosistest only valid for n>=20 … continuing anyway, n=8 warnings.warn(“kurtosistest only valid for n>=20 … continuing ” [(‘50D’, 96.7985, {‘m’: -0.2557, ‘b’: 109.5835, ‘r2’: 0.016, ‘r2_adj’: -0.004, ‘low_band’: 93.258, ‘hi_band’: 125.909}), (‘10D’, 105.7336, {‘m’: 1.897, ‘b’: 86.7636, ‘r2’: 0.023, ‘r2_adj’: -0.099, ‘low_band’: 33.207, ‘hi_band’: 140.32}), (‘5D’, 128.0, {‘m’: 7.4, ‘b’: 91.0, ‘r2’: 0.154, ‘r2_adj’: -0.128, ‘low_band’: 12.875, ‘hi_band’: 169.125}), (‘jueves’, 104.7144, {‘m’: -0.2857, ‘b’: 106.7143, ‘r2’: 0.001, ‘r2_adj’: -0.199, ‘low_band’: 68.112, ‘hi_band’: 145.317}), (‘50D-1Y’, 104.0542, {‘m’: 0.0391, ‘b’: 102.021, ‘r2’: 0.001, ‘r2_adj’: -0.019, ‘low_band’: 87.626, ‘hi_band’: 116.416}), (‘10D-Y’, 97.6, {‘m’: -1.0, ‘b’: 107.6, ‘r2’: 0.006, ‘r2_adj’: -0.118, ‘low_band’: 50.622, ‘hi_band’: 164.578}), (‘5D-Y’, 96.3, {‘m’: 0.3, ‘b’: 94.8, ‘r2’: 0.0, ‘r2_adj’: -0.333, ‘low_band’: 4.514, ‘hi_band’: 185.086}), (‘jueves-Y’, 109.2859, {‘m’: 1.4524, ‘b’: 97.6667, ‘r2’: 0.019, ‘r2_adj’: -0.145, ‘low_band’: 53.559, ‘hi_band’: 141.774})]

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Forecasting

UANL

forecasting

FilesExpand file tree

forecasting.org

Latest commit

History

forecasting.org

File metadata and controls

Forecasting

UANL

forecasting