quantarmyz
diff --git a/‎.gitignore‎
Lines changed: 20 additions & 0 deletions b/‎.gitignore‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎DockerSource/Dockerfile‎
Lines changed: 23 additions & 10 deletions b/‎DockerSource/Dockerfile‎
Lines changed: 23 additions & 10 deletions
diff --git a/‎DockerSource/qa_datalake.py‎
Lines changed: 53 additions & 49 deletions b/‎DockerSource/qa_datalake.py‎
Lines changed: 53 additions & 49 deletions
diff --git a/‎DockerSource/req.txt‎
Lines changed: 15 additions & 4 deletions b/‎DockerSource/req.txt‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎docker-compose.yml‎
Lines changed: 4 additions & 4 deletions b/‎docker-compose.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎quantarmy_repo/backtest/01-bt-zipline-variance.ipynb‎
Lines changed: 7 additions & 17 deletions b/‎quantarmy_repo/backtest/01-bt-zipline-variance.ipynb‎
Lines changed: 7 additions & 17 deletions
@@ -1 +1,21 @@
+# Credentials
 DockerSource/.env
+*.env
+
+# Jupyter
+.ipynb_checkpoints/
+*/.ipynb_checkpoints/
+**/.ipynb_checkpoints/
+
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+
+# Data files
+*.h5
+*.hdf5
+
+# OS
+.DS_Store
+Thumbs.db
@@ -21,21 +21,34 @@ FROM continuumio/miniconda3:23.3.1-0
 
 RUN apt-get update && \
     apt-get upgrade -y && \
-    apt-get install -y --no-install-recommends git nodejs gcc g++ exa nano && \
+    apt-get install -y --no-install-recommends git nodejs gcc g++ nano && \
+    apt-get clean && rm -rf /var/lib/apt/lists/* && \
     chmod 777 /opt
 
 RUN mkdir /WorkingData
 WORKDIR /WorkingData
 
-COPY --chown=${NB_UID}:${NB_GID} req.txt /tmp/
-COPY --chown=${NB_UID}:${NB_GID} overrides.json /opt/conda/share/jupyter/lab/settings/overrides.json
+COPY req.txt /tmp/
+COPY overrides.json /opt/conda/share/jupyter/lab/settings/overrides.json
 
-RUN conda install -c conda-forge jupyterlab ta-lib nodejs jupyterlab-night
-RUN pip install --no-cache-dir --requirement /tmp/req.txt 
+RUN conda install -c conda-forge jupyterlab ta-lib nodejs jupyterlab-night && \
+    conda clean -afy
+RUN pip install --no-cache-dir --requirement /tmp/req.txt
+
+RUN mkdir -p /opt/conda/etc/jupyter && \
+    echo "c.Completer.use_jedi = False" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py && \
+    echo "c.ResourceUseDisplay.track_cpu_percent = True" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py
+
+SHELL ["/bin/bash", "-c"]
+RUN PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") && \
+    BUNDLES_DIR="/opt/conda/lib/python${PYTHON_VERSION}/site-packages/zipline/data/bundles" && \
+    mkdir -p /root/.zipline && \
+    echo "Zipline bundles dir: ${BUNDLES_DIR}"
+
+COPY extension.py /root/.zipline/extension.py
+COPY qa_datalake.py /tmp/qa_datalake.py
+RUN PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") && \
+    cp /tmp/qa_datalake.py "/opt/conda/lib/python${PYTHON_VERSION}/site-packages/zipline/data/bundles/qa_datalake.py"
+COPY .env /root/.env
 
-RUN echo "c.Completer.use_jedi = False" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py
-RUN echo "c.ResourceUseDisplay.track_cpu_percent = True" >> /opt/conda/etc/jupyter/jupyter_notebook_config.py
-COPY --chown=${NB_UID}:${NB_GID} extension.py /root/.zipline/extension.py
-COPY --chown=${NB_UID}:${NB_GID} qa_datalake.py /opt/conda/lib/python3.10/site-packages/zipline/data/bundles/qa_datalake.py
-COPY --chown=${NB_UID}:${NB_GID} .env /root/.env
 CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]
@@ -19,22 +19,24 @@
 #################################
 
 import pandas as pd
-from os import listdir, getenv
+from os import getenv
 from exchange_calendars import get_calendar
 from dotenv import load_dotenv
 from arcticdb import Arctic
 import warnings
 
 warnings.filterwarnings('ignore')
-dotenv_path = '/root/.env'
-load_dotenv(dotenv_path)
 
-endpoint = getenv("ENDPOINT")
-db = getenv("DB")
-access_key = getenv("ACCESS_KEY")
-secret_key = getenv("SECRET_KEY")
-
-ac = Arctic(f's3s://{endpoint}:{db}?access={access_key}&secret={secret_key}')
+def _get_arctic_connection():
+    dotenv_path = '/root/.env'
+    load_dotenv(dotenv_path)
+    endpoint = getenv("ENDPOINT")
+    db = getenv("DB")
+    access_key = getenv("ACCESS_KEY")
+    secret_key = getenv("SECRET_KEY")
+    if not all([endpoint, db, access_key, secret_key]):
+        raise ValueError("Faltan variables de entorno. Revisa tu archivo .env (ENDPOINT, DB, ACCESS_KEY, SECRET_KEY)")
+    return Arctic(f's3s://{endpoint}:{db}?access={access_key}&secret={secret_key}')
 
 
 def bse_data(environ,
@@ -51,73 +53,75 @@ def bse_data(environ,
 
     symbols = ['XLE.US','XLF.US','XLI.US','XLK.US','XLP.US','XLU.US','XLV.US','XLY.US','XLB.US','XLC.US','ITA.US']
     if not symbols:
-        
         raise ValueError("No se han encontrado TICKERS en el QA DATALAKE")
-        
-    divs_splits = {"divs": pd.DataFrame(columns=["sid","amount","ex_date","record_date","declared_date","pay_date",]),"splits": pd.DataFrame(columns=["sid", "ratio", "effective_date"]),}
-    
+
+    divs_splits = {
+        "divs": pd.DataFrame(columns=["sid","amount","ex_date","record_date","declared_date","pay_date"]),
+        "splits": pd.DataFrame(columns=["sid", "ratio", "effective_date"]),
+    }
+
     metadata = pd.DataFrame(columns=('start_date','end_date','auto_close_date','symbol','exchange'))
     sessions = calendar.sessions_in_range(start_session, end_session)
 
-    daily_bar_writer.write(process_stocks(symbols, sessions, metadata, divs_splits))
+    ac = _get_arctic_connection()
+    daily_bar_writer.write(process_stocks(ac, symbols, sessions, metadata, divs_splits))
 
     metadata["exchange"] = "QAX"
     exchange = {'exchange': 'QAX', 'canonical_name': 'QUANTARMY BACKTEST', 'country_code': 'US'}
-    exchange_df = pd.DataFrame(exchange, index = [0])
-
-
+    exchange_df = pd.DataFrame(exchange, index=[0])
 
     divs_splits["divs"]["sid"] = divs_splits["divs"]["sid"].astype(int)
     divs_splits["splits"]["sid"] = divs_splits["splits"]["sid"].astype(int)
-    daily_bar_writer.write(process_stocks(symbols, sessions, metadata, divs_splits))
     asset_db_writer.write(equities=metadata, exchanges=exchange_df)
     adjustment_writer.write(splits=divs_splits["splits"], dividends=divs_splits["divs"])
 
-def process_stocks(symbols, sessions, metadata, divs_splits):
+def process_stocks(ac, symbols, sessions, metadata, divs_splits):
     my_cal = get_calendar('NYSE')
     prices = ac.get_library('prices.etfs.us.stable')
+    divs_lib = ac.get_library('divs.etfs.us.stable')
+    splits_lib = ac.get_library('splits.etfs.us.stable')
+
     for sid, symbol in enumerate(symbols):
-        print('[QA DATALAKE CARNIVORE ] ||| Loading {}...'.format(symbol))
+        print('[QA DATALAKE] Loading {}...'.format(symbol))
         df = prices.read(symbol).data
         df = df['2010':]
         start_date = df.index[0]
-        end_date = df.index[-1] 
-        sessions = my_cal.sessions_in_range(start_date, end_date)
-        df = df[df.index.isin(sessions)]
-        df = df.reindex(sessions.tz_localize(None))[start_date:end_date] #tz_localize(None)
-        df.fillna(method='ffill', inplace=True)
-        df.dropna(inplace=True)    
+        end_date = df.index[-1]
+        sym_sessions = my_cal.sessions_in_range(start_date, end_date)
+        df = df[df.index.isin(sym_sessions)]
+        df = df.reindex(sym_sessions.tz_localize(None))[start_date:end_date]
+        df = df.ffill()
+        df.dropna(inplace=True)
         ac_date = end_date + pd.Timedelta(days=1)
         metadata.loc[sid] = start_date, end_date, ac_date, symbol, 'QAX'
 
-        if ac.get_library('divs.etfs.us.stable').has_symbol(symbol):
-            data_divs = ac['divs.etfs.us.stable'].read(symbol).data
-            data_divs = data_divs.reset_index()
-            div = pd.DataFrame()
-            div['ex_date'] = data_divs['date']
-            div['record_date'] = data_divs['recordDate']
-            div['declared_date'] = data_divs['declarationDate']
-            div['pay_date'] = data_divs['paymentDate']           
-            div['amount'] = data_divs['value']   
-            div['sid'] = sid
-
+        if divs_lib.has_symbol(symbol):
+            data_divs = divs_lib.read(symbol).data.reset_index()
+            div = pd.DataFrame({
+                'ex_date': data_divs['date'],
+                'record_date': data_divs['recordDate'],
+                'declared_date': data_divs['declarationDate'],
+                'pay_date': data_divs['paymentDate'],
+                'amount': data_divs['value'],
+                'sid': sid,
+            })
             divs = divs_splits['divs']
             ind = pd.Index(range(divs.shape[0], divs.shape[0] + div.shape[0]))
             div.set_index(ind, inplace=True)
             divs_splits["divs"] = pd.concat([divs, div], axis=0)
-            print('[QA DATALAKE CARNIVORE] DIVS INFO ADDED OVER',symbol)
-    
-        if ac.get_library('splits.etfs.us.stable').has_symbol(symbol):
-            data_splits = ac['splits.etfs.us.stable'].read(symbol).data
-            data_splits = data_splits.reset_index()
-            split = pd.DataFrame()
-            split['effective_date'] = data_splits['date']
-            split['ratio'] = data_splits['split']
-            split['sid'] = sid
+            print('[QA DATALAKE] DIVS loaded for', symbol)
+
+        if splits_lib.has_symbol(symbol):
+            data_splits = splits_lib.read(symbol).data.reset_index()
+            split = pd.DataFrame({
+                'effective_date': data_splits['date'],
+                'ratio': data_splits['split'],
+                'sid': sid,
+            })
             splits = divs_splits["splits"]
-            index = pd.Index(range(splits.shape[0], splits.shape[0] + split.shape[0]))             
+            index = pd.Index(range(splits.shape[0], splits.shape[0] + split.shape[0]))
             split.set_index(index, inplace=True)
             divs_splits["splits"] = pd.concat([splits, split], axis=0)
-            print('[QA DATALAKE CARNIVORE] SPLIT INFO ADDED OVER',symbol)
-            
+            print('[QA DATALAKE] SPLITS loaded for', symbol)
+
         yield sid, df
@@ -1,23 +1,34 @@
+# JupyterLab Extensions
 jupyterlab_materialdarker
-jupyterlab-git 
+jupyterlab-git
 jupyterlab-fonts
 jupyterlab-lsp
 python-lsp-server[all]
 jupyterlab-system-monitor
 jupyterlab_execute_time
+
+# Data Analysis
 pandas
 numpy
+scipy
+scikit-learn
 matplotlib
 plotly
 seaborn
-yfinance
 mplcyberpunk
+pyarrow
+
+# Market Data & Storage
+yfinance
 arcticdb
+
+# Backtesting
 zipline-reloaded
 pyfolio-reloaded
 alphalens-reloaded
 empyrical-reloaded
 exchange_calendars
+
+# Utilities
 python-dotenv
-logbook
-pyarrow
+logbook
@@ -13,11 +13,10 @@
 ### QUANTARMY.COM - PYTHONPARATRADING.COM
 #################################
 
-# Dockerfile.
+# docker-compose.yml
 # - Lanza el notebook en un servicio localhost:8888
-# - Testing pasword : testing
+# - Password por defecto: testing
 
-version: '3'
 services:
   notebook:
     build:
@@ -28,4 +27,5 @@ services:
     environment:
       JUPYTER_TOKEN: "testing"
     volumes:
-      - ./quantarmy_repo:/WorkingData
+      - ./quantarmy_repo:/WorkingData
+    restart: unless-stopped
@@ -1,5 +1,11 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "hkyb1iz1bfj",
+   "source": "# Backtest: Annualized Volatility Strategy\nEstrategia basada en volatilidad anualizada. Selecciona los N activos con menor volatilidad para posiciones long.",
+   "metadata": {}
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -655,22 +661,6 @@
    "source": [
     "positions.index"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "636793c4-645c-4ed3-9e67-184633e8c3c0",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6a6b7743-6114-4ba5-a9f7-5ef672d2f54c",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -694,4 +684,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}