# Example 4: Fact Table (Star Schema)
# Runnable version of docs/examples/canonical/04_fact_table.md
#
# This example builds a complete star schema:
# - dim_customer (dimension pattern with SCD1)
# - dim_product (dimension pattern with SCD1)
# - dim_date (date_dimension pattern - generated with 366 rows)
# - fact_sales (fact pattern with FK lookups to all dimensions)
#
# Key demonstration: Orphan handling - customer_id=999 maps to customer_sk=0

project: sales_star_schema

connections:
  source:
    type: local
    base_path: ../sample_data
  gold:
    type: local
    base_path: ./data/gold

story:
  connection: gold
  path: stories

system:
  connection: gold
  path: _system

pipelines:
  # ==========================================
  # PIPELINE 1: Build all dimensions
  # ==========================================
  - pipeline: build_dimensions
    layer: gold
    nodes:
      # ------------------------------------------
      # Customer Dimension (SCD1)
      # ------------------------------------------
      - name: dim_customer
        read:
          connection: source
          format: csv
          path: customers.csv
          options:
            header: true
        
        pattern:
          type: dimension
          params:
            natural_key: customer_id
            surrogate_key: customer_sk
            scd_type: 1
            track_cols:
              - name
              - email
              - tier
              - city
            unknown_member: true
        
        write:
          connection: gold
          format: parquet
          path: dim_customer.parquet
          mode: overwrite
      
      # ------------------------------------------
      # Product Dimension (SCD1)
      # ------------------------------------------
      - name: dim_product
        read:
          connection: source
          format: csv
          path: products.csv
          options:
            header: true
        
        pattern:
          type: dimension
          params:
            natural_key: product_id
            surrogate_key: product_sk
            scd_type: 1
            track_cols:
              - name
              - category
              - price
            unknown_member: true
        
        write:
          connection: gold
          format: parquet
          path: dim_product.parquet
          mode: overwrite
      
      # ------------------------------------------
      # Date Dimension (Generated)
      # ------------------------------------------
      - name: dim_date
        pattern:
          type: date_dimension
          params:
            start_date: "2025-01-01"
            end_date: "2025-12-31"
            fiscal_year_start_month: 1
            unknown_member: true
        
        write:
          connection: gold
          format: parquet
          path: dim_date.parquet
          mode: overwrite

  # ==========================================
  # PIPELINE 2: Build fact table
  # ==========================================
  - pipeline: build_facts
    layer: gold
    nodes:
      # Load dimensions into context for FK lookups
      - name: dim_customer
        read:
          connection: gold
          format: parquet
          path: dim_customer.parquet
      
      - name: dim_product
        read:
          connection: gold
          format: parquet
          path: dim_product.parquet
      
      - name: dim_date
        read:
          connection: gold
          format: parquet
          path: dim_date.parquet
      
      # ------------------------------------------
      # Sales Fact Table
      # ------------------------------------------
      - name: fact_sales
        depends_on: [dim_customer, dim_product, dim_date]
        read:
          connection: source
          format: csv
          path: orders.csv
          options:
            header: true
        
        pattern:
          type: fact
          params:
            grain:
              - order_id
              - line_item_id
            
            dimensions:
              - source_column: customer_id
                dimension_table: dim_customer
                dimension_key: customer_id
                surrogate_key: customer_sk
              
              - source_column: product_id
                dimension_table: dim_product
                dimension_key: product_id
                surrogate_key: product_sk
              
              - source_column: order_date
                dimension_table: dim_date
                dimension_key: full_date
                surrogate_key: date_sk
            
            orphan_handling: unknown
            
            measures:
              - quantity
              - amount
            
            audit:
              load_timestamp: true
              source_system: "orders_csv"
        
        write:
          connection: gold
          format: parquet
          path: fact_sales.parquet
          mode: overwrite
