Defining your own ColumnMeta attributes

In this notebook, we will see how to define your own ColumnMeta attributes. This is useful when you want to add some metadata to your columns that are not already defined in the ColumnMeta class.

[1]:
from dataclasses import dataclass
from typing import Annotated
from pyspark.sql.types import LongType, StringType
from typedspark import ColumnMeta, Schema
from typedspark._core.column import Column


@dataclass
class MyColumnMeta(ColumnMeta):
    primary_key: bool = False


class Persons(Schema):
    id: Annotated[
        Column[LongType],
        MyColumnMeta(
            comment="Identifies the person",
            primary_key=True,
        ),
    ]
    name: Column[StringType]
    age: Column[LongType]


Persons.get_metadata()
[1]:
{'id': {'comment': 'Identifies the person', 'primary_key': True},
 'name': {},
 'age': {}}