Global ArchitectureΒΆ

Authors:

Cao Tri DO <caotri.do88@gmail.com>

Version:

2025-09

Objectives

This article is intended to provide a comprehensive overview of the global architecture of the solution.

Simple architecture workflow diagramΒΆ

        graph TD
    A["πŸ“¦ Data Source (Kaggle / Local)"] -->|πŸ“€ Upload| B["πŸ—„οΈ Databricks Volume (Raw Data)"]
    B -->|"🧹 Cleaning & Preprocessing<br>(scripts/run_cleanup_data.py, data/cleanup.py)"| C["πŸ§ͺ Data Processing (Feature Engineering)"]
    C -->|βœ‚οΈ Train / Test Split| D["🧠 MLflow Experiment<br>(train_register_model.py)"]
    D -->|πŸ“Š Track Experiments & Metrics| E["πŸ“ˆ MLflow Tracking Server"]
    E -->|🏷️ Model Versioning| F["πŸ“š MLflow Model Registry"]
    F -->|πŸš€ Deployment & Integration| G["☁️ Databricks Workspace (Dev / Acc / Prod)"]
    G -->|πŸ“Š Monitoring & Reporting| H["πŸ“‰ Dashboard / Visualization (vizualization/)"]

    subgraph "🧰 Tooling & Environment"
        I["🧩 Devbox + UV + Taskfile (Reproducibility & Environments)"]
        J["βš™οΈ GitHub / GitLab CI-CD (Automated CI/CD)"]
        K["🧼 Pre-commit / Ruff / Commitizen (Code Quality & Standardization)"]
    end

    I --> G
    J --> G
    K --> G

    style A fill:#e6f7ff,stroke:#007acc,stroke-width:2px
    style B fill:#e6f7ff,stroke:#007acc,stroke-width:2px
    style C fill:#e6f7ff,stroke:#007acc,stroke-width:2px
    style D fill:#fff2cc,stroke:#f1c232,stroke-width:2px
    style E fill:#fff2cc,stroke:#f1c232,stroke-width:2px
    style F fill:#d9ead3,stroke:#6aa84f,stroke-width:2px
    style G fill:#d9ead3,stroke:#6aa84f,stroke-width:2px
    style H fill:#d9ead3,stroke:#6aa84f,stroke-width:2px
    style I fill:#f9cb9c,stroke:#e69138,stroke-width:2px
    style J fill:#f9cb9c,stroke:#e69138,stroke-width:2px
    style K fill:#f9cb9c,stroke:#e69138,stroke-width:2px
    

Simple class architecture workflow diagramΒΆ

        flowchart TB
%% Local Dev Environment
subgraph "Local Dev Environment"
    DEV_SCRIPTS["Taskfile & Devbox (Taskfile.yml, devbox.json)"]:::compute
    DEV_SCRIPTS -->|runs| ING_SCRIPTS
end

%% Databricks Workspace
subgraph "Databricks Workspace"
    direction TB
    RAW_DATA["External Raw Data (data/raw)"]:::data
    click RAW_DATA "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/tree/main/data/raw"

    ING_SCRIPTS["Ingestion Scripts (scripts/*.py & notebooks/utils/run_upload_data.py)"]:::compute
    click ING_SCRIPTS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/scripts/00.process_initial_data.py"
    click ING_SCRIPTS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/scripts/01.process_new_data.py"
    click ING_SCRIPTS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/utils/run_upload_data.py"

    VOLUME["Databricks Volume"]:::data

    PROC_JOBS["Data Processing Jobs (notebooks/process_data.py, notebooks/process_new_data.py)"]:::compute
    click PROC_JOBS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/process_data.py"
    click PROC_JOBS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/process_new_data.py"

    DELTA["Delta Tables (data/interim & data/processed)"]:::data
    click DELTA "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/tree/main/data/interim"
    click DELTA "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/tree/main/data/processed"

    FEAT_ENG["Feature Engineering (src/.../data_processor.py)"]:::compute
    click FEAT_ENG "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/src/hotel_reservation/feature/data_processor.py"

    FEATURE_STORE["Feature Store Tables"]:::data
    TRAIN_JOBS["Model Training Jobs (notebooks/train_register_*.py, scripts/02*register*.py)"]:::compute
    click TRAIN_JOBS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/train_register_basic_model.py"
    click TRAIN_JOBS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/train_register_custom_model.py"
    click TRAIN_JOBS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/train_register_fe_model.py"
    click TRAIN_JOBS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/scripts/02.train_register_model.py"
    click TRAIN_JOBS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/scripts/02.b.train_register_custom_model.py"

    MLFLOW_TRACKING["MLflow Tracking Server"]:::data
    click MLFLOW_TRACKING "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/utils/run_create_mlflow_workspace.py"
    click MLFLOW_TRACKING "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/utils/run_cleanup_mlflow_experiments.py"

    MODEL_REGISTRY["Model Registry & Unity Catalog"]:::registry

    DEPLOY_JOB["Deployment Job (databricks.yml + resources/*.yml)"]:::compute
    click DEPLOY_JOB "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/databricks.yml"

    SERVING["Databricks Serving Endpoint"]:::registry
    click SERVING "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/src/hotel_reservation/serving/model_serving.py"

    %% Flows
    RAW_DATA -->|CSV upload| ING_SCRIPTS -->|Databricks API| VOLUME
    VOLUME -->|Delta write| PROC_JOBS -->|writes| DELTA
    DELTA -->|Feature lookup| FEAT_ENG -->|registers| FEATURE_STORE
    FEATURE_STORE -->|training data| TRAIN_JOBS -->|"MLflow SDK"| MLFLOW_TRACKING
    MLFLOW_TRACKING -->|"registers"| MODEL_REGISTRY -->|deploys| DEPLOY_JOB -->|bundle deploy| SERVING
end

%% Client Apps
CLIENT_APP["Streamlit Apps (app/app.py, app/app_monitoring.py)"]:::app
click CLIENT_APP "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/app/app.py"
click CLIENT_APP "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/app/app_monitoring.py"

FUNC_TESTS["Functional Tests (tests/functional/*.py, example.http)"]:::app
click FUNC_TESTS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/tests/functional/basic_model_serving_call.py"
click FUNC_TESTS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/tests/functional/custom_model_serving_call.py"
click FUNC_TESTS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/tests/functional/example.http"

CLIENT_APP -->|HTTP request| SERVING
FUNC_TESTS -->|HTTP call| SERVING

%% Monitoring
MON_JOBS["Model Monitoring Jobs"]:::compute
MON_DASH["Monitoring Dashboard (src/.../monitoring.py, notebooks/create_monitoring.py)"]:::app
click MON_DASH "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/src/hotel_reservation/visualization/monitoring.py"
click MON_DASH "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/create_monitoring.py"
click MON_DASH "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/notebooks/refresh_monitoring.py"

SERVING -->|logs| MON_JOBS -->|updates| MON_DASH

%% CI/CD Pipeline
subgraph "CI/CD Pipeline"
    direction TB
    CI["CI/CD Pipelines (.github/workflows, .gitlab-ci.yml)"]:::ci
    click CI "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/.github/workflows/ci.yml"
    click CI "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/.github/workflows/cd.yml"
    click CI "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/.gitlab/.gitlab-ci.yml"
    CI -->|runs tests, docs, deploy| DEPLOY_JOB
end

%% Configuration & Utils
subgraph "Configuration & Utilities"
    ENV["Env Config & Loader (project_config.yml, .env.template)"]:::data
    click ENV "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/project_config.yml"
    click ENV "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/.env.template"
    click ENV "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/src/hotel_reservation/utils/env_loader.py"
    click ENV "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/src/hotel_reservation/utils/config.py"

    DB_UTILS["Databricks Utils (src/.../databricks_utils.py)"]:::compute
    click DB_UTILS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/src/hotel_reservation/utils/databricks_utils.py"
    click DB_UTILS "https://github.com/end-to-end-mlops-databricks-4/marvelous-databricks-course-malganis35/blob/main/src/hotel_reservation/data/databricks_utils.py"
end

ENV --> ING_SCRIPTS
ENV --> TRAIN_JOBS
ENV --> DEPLOY_JOB

classDef data fill:#a8d5a2,stroke:#333,stroke-width:1px
classDef compute fill:#add8e6,stroke:#333,stroke-width:1px
classDef registry fill:#ffa500,stroke:#333,stroke-width:1px
classDef app fill:#d8bfd8,stroke:#333,stroke-width:1px
classDef ci fill:#d3d3d3,stroke:#333,stroke-width:1px