{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "a006aaea",
   "metadata": {},
   "source": [
    "# Handling StructType columns in notebooks\n",
    "\n",
    "First, let us make some example data again."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5442cec0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "\n",
    "spark = SparkSession.Builder().config(\"spark.ui.showConsoleProgress\", \"false\").getOrCreate()\n",
    "spark.sparkContext.setLogLevel(\"ERROR\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "420da6e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typedspark import Schema, Column, StructType, create_partially_filled_dataset, load_table\n",
    "from pyspark.sql.types import IntegerType\n",
    "\n",
    "\n",
    "class Values(Schema):\n",
    "    a: Column[IntegerType]\n",
    "    b: Column[IntegerType]\n",
    "\n",
    "\n",
    "class Container(Schema):\n",
    "    values: Column[StructType[Values]]\n",
    "\n",
    "\n",
    "create_partially_filled_dataset(\n",
    "    spark,\n",
    "    Container,\n",
    "    {\n",
    "        Container.values: create_partially_filled_dataset(\n",
    "            spark,\n",
    "            Values,\n",
    "            {Values.a: [1, 2, 3]},\n",
    "        ).collect(),\n",
    "    },\n",
    ").createOrReplaceTempView(\"structtype_table\")\n",
    "\n",
    "container, ContainerSchema = load_table(spark, \"structtype_table\", \"Container\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "67cdf490",
   "metadata": {},
   "source": [
    "Like before, we can show the schema simply by running:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "34205e88",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\n",
       "from pyspark.sql.types import IntegerType\n",
       "\n",
       "from typedspark import Column, Schema, StructType\n",
       "\n",
       "\n",
       "class Container(Schema):\n",
       "    values: Column[StructType[Values]]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ContainerSchema"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "7a6fac57",
   "metadata": {},
   "source": [
    "We can show the `StructType` schema using:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e88e06a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\n",
       "from pyspark.sql.types import IntegerType\n",
       "\n",
       "from typedspark import Column, Schema\n",
       "\n",
       "\n",
       "class Values(Schema):\n",
       "    a: Column[IntegerType]\n",
       "    b: Column[IntegerType]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ContainerSchema.values.dtype.schema"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "f51beb47",
   "metadata": {},
   "source": [
    "We can also use this in queries, for example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "397c575b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+\n",
      "|   values|\n",
      "+---------+\n",
      "|{2, NULL}|\n",
      "|{3, NULL}|\n",
      "+---------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "container.filter(ContainerSchema.values.dtype.schema.a > 1).show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "18ea295c",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "typedspark",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}