Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ jobs:
test:
name: Test code
runs-on: ubuntu-latest

env:
ASIC_FILE_CONFIG_PATH: tests/TEST_ASIC_FILE_CONFIG.jsonl

strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# Do not ignore test files
!tests/**/*.tx*
!tests/**/*.csv

ASIC_FILE_CONFIG.jsonl
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
45 changes: 33 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,28 @@ Interfaz de línea de comandos para descargar los archivos de las publicaciones
> **Requiere acceso al FTP del ASIC en `xmftps.xm.com.co`**

------
## 📝 Archivo de configuración necesario
El sistema requiere que definas la variable de entorno `ASIC_FILE_CONFIG_PATH` apuntando a la ruta del archivo .jsonl que contiene la configuración de los tipos de archivos soportados.
Este archivo debe contener objetos JSON en formato newline-delimited (un JSON por línea).

#### Ejemplo de variable de entorno
```txt
# Windows
$Env:ASIC_FILE_CONFIG_PATH = "C:\Users\Usuario\Documents\asic_file_config.jsonl"

# Linux
export ASIC_FILE_CONFIG_PATH="/home/Usuario/asic_file_config.jsonl"

# Mac
export ASIC_FILE_CONFIG_PATH="/Users/Usuario/asic_file_config.jsonl"
```
### 📋Estructura esperada:
```json
{"code":"adem", "visibility": "public","name_pattern":"(?P<kind>adem)(?P<name_month>[0-9]{2})(?P<name_day>[0-9]{2}).(?P<ext_versioned>[a-zA-Z0-9]+)", "location_pattern":"/RUTA/PUBLICA/DEL/FTP/(?P<location_year>[0-9]{4})-(?P<location_month>[0-9]{2})/","description":"Los archivos de demanda comercial"}
```


------
## Ejemplos

Antes de poder usar los comandos que usan el servir FTP de XM, debes proveer la información de autenticación (se recomienda usar variables de entorno).
Expand All @@ -28,24 +49,24 @@ Listing latest published settlements by ASIC in the last 20 days
2022-06:TXR -- published: 2022-07-05
```

1. Listar los archivos publicados para los meses de mayo y junio de 2022 con version de liquidación .tx3:
2. Listar los archivos publicados para los meses de mayo y junio de 2022 con version de liquidación .tx3:

```txt
> asic list --month 2022-06 --month 2022-05 --version .tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\adem0501.Tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\adem0502.Tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\adem0503.Tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\adem0504.Tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\adem0501.Tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\adem0502.Tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\adem0503.Tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\adem0504.Tx3
...
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\pep0530.tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\pep0531.tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\sntie05.tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\afac05.tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\trsm05.tx3
\INFORMACION_XM\PUBLICOK\SIC\COMERCIA\2022-05\ldcbmr05.tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\pep0530.tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\pep0531.tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\sntie05.tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\afac05.tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\trsm05.tx3
\RUTA\PUBLICA\DEL\FTP\2022-05\ldcbmr05.tx3
```

1. Descargar los archivos publicados para los meses de mayo y junio de 2022 con version de liquidación .tx3 a la carpeta local `./asic-files/`:
3. Descargar los archivos publicados para los meses de mayo y junio de 2022 con version de liquidación .tx3 a la carpeta local `./asic-files/`:

```txt
> asic download --month 2022-06 --month 2022-05 --version .tx3 asic-files
Expand Down
33 changes: 24 additions & 9 deletions src/asic/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def main(
ftps_port: int = typer.Option(default=210, envvar="ASIC_FTPS_PORT"),
ftps_user: str = typer.Option(..., envvar="ASIC_FTPS_USER", prompt=True),
ftps_password: str = typer.Option(..., envvar="ASIC_FTPS_PASSWORD", prompt=True),
agent: str = typer.Option(..., envvar="ASIC_AGENT", prompt=True),
agent: str = typer.Option(default=None, envvar="ASIC_AGENT", help="Agent's asic code, required for private files"),
):
"""
FTP authentication info should be provided as environment variables (ASIC_FTP_*)
Expand Down Expand Up @@ -111,6 +111,9 @@ def months_callback(values: list[str]) -> list[str]:


def file_kinds_callback(values: list[str]) -> list[str]:
if values is None:
raise typer.BadParameter(SUPPORTED_FILE_KINDS_ERROR_MESSAGE)

files = sorted(
{validate_file_kind(v) for v in values},
reverse=True,
Expand Down Expand Up @@ -165,7 +168,7 @@ def list_files(
callback=months_callback,
help=YEAR_MONTH_MATCH_ERROR_MESSAGE,
),
agent: Optional[str] = typer.Option(...,
agent: Optional[str] = typer.Option(default=None,
envvar="ASIC_AGENT",
prompt=True,
help="Agent's asic code, required for private files"),
Expand Down Expand Up @@ -245,13 +248,16 @@ def download(
is_preprocessing_required: bool = typer.Option(
False, "--prepro", help="Preprocess each file after donwload"
),
prepocessed_dir: bool = typer.Option(
False, "--prepro-dirs", help="Create directories for preprocessed files if not present"
),
months: list[str] = typer.Option(
...,
"--month",
callback=months_callback,
help=YEAR_MONTH_MATCH_ERROR_MESSAGE,
),
agent: Optional[str] = typer.Option(...,
agent: Optional[str] = typer.Option(default=None,
envvar="ASIC_AGENT",
prompt=True,
help="Agent's asic code, required for private files"),
Expand Down Expand Up @@ -309,7 +315,7 @@ def download(

logger.info(f"Total files to download: {len(file_list)}")

for f in rich.progress.track(file_list, description="Dowloading files..."):
for f in rich.progress.track(file_list, description="Downloading files..."):
logger.info(f"File: {f.path}")
remote = f
local = destination / str(f.path)[1:] # hack to remove root anchor
Expand Down Expand Up @@ -347,10 +353,19 @@ def download(

preprocessed = f.preprocess(local)
write_to = preprocessed_path.with_suffix(".csv")
preprocessed.to_csv(
write_to,
index=False,
encoding="utf-8-sig",
)
try:
if prepocessed_dir:
os.makedirs(preprocessed_path.parent, exist_ok=True)
preprocessed.to_csv(
write_to,
index=False,
encoding="utf-8-sig",
)
except Exception as e:
if "Cannot save file into a non-existent directory: " in str(e):
raise FileNotFoundError(f"{e}. Use the '--prepro-dirs' flag to create the folder")

raise e


ftps.quit()
10 changes: 9 additions & 1 deletion src/asic/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import importlib.resources
import json as json
import pathlib
import re
from enum import Enum
from typing import Annotated
import os

from pydantic import BaseModel, StringConstraints

Expand Down Expand Up @@ -122,7 +124,13 @@ def load_asic_file_config() -> dict[str, ASICFileConfig]:
"""Return a list of ASIC file configurations"""
# This is a stream-like object. If you want the actual info, call
# stream.read()
resource = importlib.resources.files("asic").joinpath("data/ASIC_FILE_CONFIG.jsonl")
path_env = os.getenv("ASIC_FILE_CONFIG_PATH")
if path_env is None:
raise ValueError("\nASIC_FILE_CONFIG_PATH environment variable not set, you can create the file with the following structure:\n"
"""{"code":"adem", "visibility": "public","name_pattern":"(?P<kind>adem)(?P<name_month>[0-9]{2})(?P<name_day>[0-9]{2}).(?P<ext_versioned>[a-zA-Z0-9]+)", "location_pattern":"/RUTA/PUBLICA/DEL/FTP/(?P<location_year>[0-9]{4})-(?P<location_month>[0-9]{2})/","description":"Los archivos de demanda comercial"}\n"""
"then save your file as .jsonl and set the file path in the environment variable")

resource = pathlib.Path(path_env)
lines = []
with resource.open("r") as src:
for line in src:
Expand Down
21 changes: 0 additions & 21 deletions src/asic/data/ASIC_FILE_CONFIG.jsonl

This file was deleted.

21 changes: 3 additions & 18 deletions src/asic/files/definitions/adem.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# Third party imports
import pandas as pd

from asic import ASIC_FILE_CONFIG
from asic.files.file import AsicFile, FileKind, VisibilityEnum

# Local application imports
Expand Down Expand Up @@ -54,8 +55,8 @@ class ADEM(AsicFile):
kind = FileKind.ADEM
visibility = VisibilityEnum.PUBLIC
name_pattern = "(?P<kind>adem)(?P<name_month>[0-9]{2})(?P<name_day>[0-9]{2}).(?P<ext_versioned>[tT]{1}[xX]{1}[a-zA-Z0-9]+)"
location_pattern = "/informacion_xm/publicok/sic/comercia/(?P<location_year>[0-9]{4})-(?P<location_month>[0-9]{2})/"
location = "/informacion_xm/publicok/sic/comercia/{location_year:04}-{location_month:02}/"
location_pattern = ASIC_FILE_CONFIG[kind].location_pattern
location = ASIC_FILE_CONFIG[kind].location_template
description = "Los archivos de demanda comercial"

_format = FORMAT
Expand Down Expand Up @@ -149,19 +150,3 @@ def preprocess(self, target: Path | BytesIO | StringIO) -> pd.DataFrame:
return_cols = ["FECHA_HORA", "AGENTE", "DMRE_VALOR", "PRRE_VALOR"]
return total[return_cols]


if __name__ == "__main__":
import pathlib

path = pathlib.Path(
"./borrar/informacion_xm/PublicoK/SIC/COMERCIA/2023-10/adem1001.Tx2"
)
purepath = pathlib.PureWindowsPath("/") / pathlib.PureWindowsPath(
path.as_posix()
).relative_to("./borrar")
file = ADEM.from_remote_path(purepath)
print(file)
data = file.read(path)
print(data.head(10))
prepro_data = file.preprocess(path)
print(prepro_data.head(10))
22 changes: 3 additions & 19 deletions src/asic/files/definitions/aenc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Third party imports
import pandas as pd

from asic import ASIC_FILE_CONFIG
from asic.files.file import AsicFile, FileKind, VisibilityEnum

# Local application imports
Expand Down Expand Up @@ -53,8 +54,8 @@ class AENC(AsicFile):
kind = FileKind.AENC
visibility = VisibilityEnum.AGENT
name_pattern = "(?P<kind>aenc)(?P<name_month>[0-9]{2})(?P<name_day>[0-9]{2}).(?P<ext_versioned>[a-zA-Z0-9]+)"
location_pattern = "/informacion_xm/USUARIOSK/(?P<location_agent>[a-zA-Z]{4})/SIC/COMERCIA/(?P<location_year>[0-9]{4})-(?P<location_month>[0-9]{2})/"
location = "/informacion_xm/usuariosk/{location_agent}/sic/comercia/{location_year:04}-{location_month:02}/"
location_pattern = ASIC_FILE_CONFIG[kind].location_pattern
location = ASIC_FILE_CONFIG[kind].location_template
description = "Los archivos de demanda de agente por frontera"
# path = None
# year = None
Expand Down Expand Up @@ -152,20 +153,3 @@ def preprocess(self, target: Path | BytesIO | StringIO) -> pd.DataFrame:
"VALOR",
]
return total[return_cols]


if __name__ == "__main__":
import pathlib

path = pathlib.Path(
"./borrar/informacion_xm/UsuariosK/enbc/SIC/COMERCIA/2023-10/aenc1001.Tx2"
)
purepath = pathlib.PureWindowsPath("/") / pathlib.PureWindowsPath(
path.as_posix()
).relative_to("./borrar")
file = AENC.from_remote_path(purepath)
print(file)
data = file.read(path)
print(data.head(10))
prepro_data = file.preprocess(path)
print(prepro_data.head(10))
5 changes: 3 additions & 2 deletions src/asic/files/definitions/afac.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pandas as pd

from asic import ASIC_FILE_CONFIG
from asic.files.file import AsicFile, FileKind, VisibilityEnum

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -137,8 +138,8 @@ class AFAC(AsicFile):
kind = FileKind.AFAC
visibility = VisibilityEnum.PUBLIC
name_pattern = "(?P<kind>afac)(?P<name_month>[0-9]{2}).(?P<ext_versioned>[a-zA-Z0-9]+)"
location_pattern = "/informacion_xm/publicok/sic/comercia/(?P<location_year>[0-9]{4})-(?P<location_month>[0-9]{2})/"
location = "/informacion_xm/publicok/sic/comercia/{location_year:04}-{location_month:02}/"
location_pattern = ASIC_FILE_CONFIG[kind].location_pattern
location = ASIC_FILE_CONFIG[kind].location_template
description = "Muestra para cada uno de los agentes, todos los conceptos de la liquidación del Mercado Colombiano, con los cuales se pueden consolidar las Compras y Ventas Totales del Agente para un proceso de liquidación o ajuste mensual."
_format = FORMAT

Expand Down
5 changes: 3 additions & 2 deletions src/asic/files/definitions/balcttos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Third party imports
import pandas as pd

from asic import ASIC_FILE_CONFIG
from asic.files.file import AsicFile, FileKind, VisibilityEnum

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -53,8 +54,8 @@ class BALCTTOS(AsicFile):
kind = FileKind.BALCTTOS
visibility = VisibilityEnum.AGENT
name_pattern = "(?P<kind>BalCttos)(?P<name_month>[0-9]{2})(?P<name_day>[0-9]{2}).(?P<ext_versioned>[a-zA-Z0-9]+)"
location_pattern = "/informacion_xm/USUARIOSK/(?P<location_agent>[a-zA-Z]{4})/SIC/COMERCIA/(?P<location_year>[0-9]{4})-(?P<location_month>[0-9]{2})/"
location = "/informacion_xm/usuariosk/{location_agent}/sic/comercia/{location_year:04}-{location_month:02}/"
location_pattern = ASIC_FILE_CONFIG[kind].location_pattern
location = ASIC_FILE_CONFIG[kind].location_template
description = "Los archivos de despacho de demanda por mercados R y NR, Nacional, TIE e Internacional"

_format = FORMAT
Expand Down
Loading