{"id":1196,"date":"2026-01-03T20:55:12","date_gmt":"2026-01-03T20:55:12","guid":{"rendered":"https:\/\/ranaghazzi.com\/?page_id=1196"},"modified":"2026-05-29T00:59:55","modified_gmt":"2026-05-29T00:59:55","slug":"ibm-using-databricks-pyspar","status":"publish","type":"page","link":"https:\/\/ranaghazzi.com\/?page_id=1196","title":{"rendered":"IBM Stocks ETL \u2013 Bronze Layer"},"content":{"rendered":"<p><style>\n    .light-font-container, .light-font-container p, .light-font-container h2, .light-font-container li {<br \/>\n        font-weight: #FFFFFF !important;<br \/>\n    }<br \/>\n<\/style>\n<\/p>\n<div class=\"light-font-container\" style=\"background-color: #2b85d9; padding: 40px; border-radius: 15px;\">\n\n\n<div style=\"height:13px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-left has-contrast-color has-text-color has-background has-link-color has-large-font-size wp-elements-d8f284f2e6e654c887295605a5957300\" style=\"background-color:#2b85d9\">Tools: Databricks | Pyspark | Pandas|<code> Numpy<\/code> |&nbsp;<code>delta.tables<\/code><\/h2>\n\n\n\n<div style=\"height:24px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<div class=\"wp-block-group alignwide is-content-justification-center is-nowrap is-layout-flex wp-container-core-group-is-layout-d5e1b05c wp-block-group-is-layout-flex\">\n<div class=\"wp-block-buttons is-content-justification-left is-layout-flex wp-container-core-buttons-is-layout-3b8eca09 wp-block-buttons-is-layout-flex\">\n<div class=\"wp-block-button\"><a class=\"wp-block-button__link has-background has-large-font-size has-custom-font-size wp-element-button\" href=\"https:\/\/github.com\/Ranoush-USA\/Databricks_IBM\/blob\/first_upload\/IBM\/resources\/notebooks\/IBM_autoLoader_bronze.ipynb.ipynb\" style=\"background-color:#6ad92b96\"><strong>GitHub<\/strong><\/a><\/div>\n\n\n\n<div class=\"wp-block-button\"><a class=\"wp-block-button__link has-background has-large-font-size has-custom-font-size wp-element-button\" href=\"https:\/\/Ranoush-USA.github.io\/Databricks_IBM\/\" style=\"background-color:#2bcdd9\" target=\"_blank\" rel=\"noreferrer noopener\">    <strong>Digram<\/strong>      <\/a><\/div>\n<\/div>\n\n\n\n<div style=\"height:100px;width:0px\" aria-hidden=\"true\" class=\"wp-block-spacer wp-container-content-6388d5dc\"><\/div>\n<\/div>\n\n\n\n<h2 class=\"wp-block-heading has-contrast-color has-text-color has-link-color has-x-large-font-size wp-elements-3c937b15830dbe38fa7a627b2d1c8cf3\">Description:<\/h2>\n\n\n\n<div class=\"wp-block-group has-global-padding is-layout-constrained wp-block-group-is-layout-constrained\">\n<div class=\"wp-block-group has-global-padding is-layout-constrained wp-block-group-is-layout-constrained\">\n<p class=\"has-contrast-color has-text-color has-background has-link-color has-large-font-size wp-elements-e40c6c9c6df76dd90e9eff2238957ceb wp-block-paragraph\" style=\"background-color:#2b85d9\">This project is an automated, scheduled ETL pipeline built in Databricks that ingests IBM daily stock data from an external API and processes it through a two-layer Delta Lake architecture (Bronze \u2192 Silver).<\/p>\n\n\n\n<div class=\"wp-block-group has-contrast-color has-text-color has-background has-link-color has-large-font-size wp-elements-0e5a81d3016ae9f9ce377fdecf2e2ef1 has-global-padding is-layout-constrained wp-block-group-is-layout-constrained\" style=\"background-color:#2b85d9\">\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\">The pipeline connects to a financial API that returns the latest 100 trading days of IBM stock data in JSON format \u2014 including open, high, low, close prices, and volume \u2014 updated every two to three days. Rather than reloading the full dataset on each run, it implements a Change Data Capture (CDC) approach that processes only new or changed records, keeping dashboards current without redundant data movement.<\/p>\n\n\n\n<div style=\"height:31px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<p class=\"has-contrast-color has-text-color wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\">The <strong>Bronze layer<\/strong> serves as the raw ingestion and long-term historical archive. On each scheduled run, it compares incoming data against the existing table using a date watermark and appends only new records \u2014 ensuring no data is ever overwritten or lost as the 100-day API window shifts forward over time.<\/p>\n\n\n\n<div class=\"wp-block-group has-global-padding is-layout-constrained wp-block-group-is-layout-constrained\">\n<h2 class=\"wp-block-heading\">IBM_ETL Job &#8211; Complete Workflow Description<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/github.com\/Ranoush-USA\/Databricks_IBM\/tree\/first_upload\/IBM#ibm_etl-job---complete-workflow-description\"><\/a><\/p>\n\n\n\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\">This is a&nbsp;<strong>3-stage sequential ETL pipeline<\/strong>&nbsp;for processing IBM stock market data with email notifications and performance optimization.<\/p>\n\n\n\n<h3 class=\"wp-block-heading has-contrast-color has-text-color has-link-color has-x-large-font-size wp-elements-b5b5f1d6be0470ad09b84956f447a4f1\"><strong>Job Configuration<\/strong><a href=\"https:\/\/github.com\/Ranoush-USA\/Databricks_IBM\/tree\/first_upload\/IBM#job-configuration\"><\/a><\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>Name:<\/strong>\u00a0IBM_ETL<\/li>\n\n\n\n<li><strong>Execution Mode:<\/strong>\u00a0Queue-enabled (allows multiple runs to queue if previous run is still active)<\/li>\n\n\n\n<li><strong>Performance Target:<\/strong>\u00a0PERFORMANCE_OPTIMIZED (uses faster compute resources)<\/li>\n\n\n\n<li><strong>Notifications:<\/strong>\u00a0Sends emails\u00a0on both success and failure<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading has-contrast-color has-text-color has-link-color has-x-large-font-size wp-elements-a5e5c335daaaf87bbb3de0693731fdb7\"><strong>Task 1: API-Ingestion<\/strong><a href=\"https:\/\/github.com\/Ranoush-USA\/Databricks_IBM\/tree\/first_upload\/IBM#task-1-api-ingestion\"><\/a><\/h3>\n\n\n\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\"><strong>Notebook:<\/strong>&nbsp;IBM_landing<br><strong>Dependencies:<\/strong>&nbsp;None (first task)<\/p>\n\n\n\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\"><strong>What it does:<\/strong><\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Loads configuration from\u00a0config_Parms\u00a0(catalog, schemas, API key)<\/li>\n\n\n\n<li>Calls Alpha Vantage API to fetch IBM daily stock data (Open, High, Low, Close, Volume)<\/li>\n\n\n\n<li>Transforms API JSON response into pandas DataFrame<\/li>\n\n\n\n<li><strong>Incremental Logic:<\/strong>\u00a0Queries existing\u00a0workspace.bronze.ibm\u00a0table to find the latest date<\/li>\n\n\n\n<li>Filters only NEW records (dates newer than what exists in bronze)<\/li>\n\n\n\n<li>Writes new records as\u00a0Parquet files\u00a0to landing zone:\u00a0\/Volumes\/workspace\/bronze\/landing_zone\/ibm_landing\/<\/li>\n<\/ol>\n\n\n\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\"><strong>Output:<\/strong>&nbsp;Parquet files ready for streaming ingestion<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading has-contrast-color has-text-color has-link-color has-x-large-font-size wp-elements-b3b2dd7d70f579648647834c426214b6\"><strong>Task 2: Auto_Loader_bronze<\/strong><a href=\"https:\/\/github.com\/Ranoush-USA\/Databricks_IBM\/tree\/first_upload\/IBM#task-2-auto_loader_bronze\"><\/a><\/h3>\n\n\n\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\"><strong>Notebook:<\/strong>&nbsp;IBM_autoLoader_bronze<br><strong>Dependencies:<\/strong>&nbsp;Waits for API-Ingestion to complete<\/p>\n\n\n\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\"><strong>What it does:<\/strong><\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Loads configuration from\u00a0config_Parms<\/li>\n\n\n\n<li>Uses\u00a0Auto Loader (cloudFiles)\u00a0to automatically detect new Parquet files in landing zone<\/li>\n\n\n\n<li>Streams data with:\n<ul class=\"wp-block-list\">\n<li>Schema location:\u00a0\/Volumes\/workspace\/bronze\/schemas\/ibm_stream<\/li>\n\n\n\n<li>Checkpoint location:\u00a0\/Volumes\/workspace\/bronze\/checkpoints\/ibm_stream<\/li>\n\n\n\n<li>Merge schema enabled (handles schema evolution)<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>Writes streaming data to\u00a0Delta table:\u00a0workspace.bronze.ibm<\/li>\n\n\n\n<li>Uses\u00a0trigger(availableNow=True)\u00a0for micro-batch processing (processes all available data then stops)<\/li>\n<\/ol>\n\n\n\n<p class=\"has-large-font-size wp-block-paragraph\" style=\"padding-right:var(--wp--preset--spacing--50);padding-left:var(--wp--preset--spacing--50)\"><strong>Output:<\/strong>&nbsp;Raw data in bronze Delta table with exactly-once processing semantics<\/p>\n\n\n\n<p class=\"has-contrast-color has-text-color has-link-color has-x-large-font-size wp-elements-d969cd2bd51fab200311ba80fb8308a1 wp-block-paragraph\"><strong><a href=\"https:\/\/ranaghazzi.com\/?page_id=1129\">Task 3: Silver_Merge<\/a><\/strong><\/p>\n<\/div>\n<\/div>\n<\/div>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n<\/div>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n","protected":false},"excerpt":{"rendered":"<p>Tools: Databricks | Pyspark | Pandas| Numpy |&nbsp;delta.tables Description: This project is an automated, scheduled ETL pipeline built in Databricks that ingests IBM daily stock data from an external API and processes it through a two-layer Delta Lake architecture (Bronze \u2192 Silver). The pipeline connects to a financial API that returns the latest 100 trading [&hellip;]<\/p>\n","protected":false},"author":2,"featured_media":0,"parent":30,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-1196","page","type-page","status-publish","hentry"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.2 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>IBM Stocks ETL \u2013 Bronze Layer - Rana Nasri Ghazzi<\/title>\n<meta name=\"description\" content=\"Browse real-world data projects by Rana Ghazzi, covering data cleaning, analysis, and storytelling with Python, SQL, and Tableau Explore Rana Ghazzi&#039;s data analytics portfolio \u2014 dashboards, visualizations, and insights built with Tableau, Power BI &amp; Python\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/ranaghazzi.com\/?page_id=1196\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"IBM Stocks ETL \u2013 Bronze Layer - Rana Nasri Ghazzi\" \/>\n<meta property=\"og:description\" content=\"Browse real-world data projects by Rana Ghazzi, covering data cleaning, analysis, and storytelling with Python, SQL, and Tableau Explore Rana Ghazzi&#039;s data analytics portfolio \u2014 dashboards, visualizations, and insights built with Tableau, Power BI &amp; Python\" \/>\n<meta property=\"og:url\" content=\"https:\/\/ranaghazzi.com\/?page_id=1196\" \/>\n<meta property=\"og:site_name\" content=\"Rana Nasri Ghazzi\" \/>\n<meta property=\"article:modified_time\" content=\"2026-05-29T00:59:55+00:00\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data1\" content=\"2 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/ranaghazzi.com\/?page_id=1196\",\"url\":\"https:\/\/ranaghazzi.com\/?page_id=1196\",\"name\":\"IBM Stocks ETL \u2013 Bronze Layer - Rana Nasri Ghazzi\",\"isPartOf\":{\"@id\":\"https:\/\/ranaghazzi.com\/#website\"},\"datePublished\":\"2026-01-03T20:55:12+00:00\",\"dateModified\":\"2026-05-29T00:59:55+00:00\",\"description\":\"Browse real-world data projects by Rana Ghazzi, covering data cleaning, analysis, and storytelling with Python, SQL, and Tableau Explore Rana Ghazzi's data analytics portfolio \u2014 dashboards, visualizations, and insights built with Tableau, Power BI & Python\",\"breadcrumb\":{\"@id\":\"https:\/\/ranaghazzi.com\/?page_id=1196#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/ranaghazzi.com\/?page_id=1196\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/ranaghazzi.com\/?page_id=1196#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/ranaghazzi.com\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Home\",\"item\":\"https:\/\/ranaghazzi.com\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"Projects\",\"item\":\"https:\/\/ranaghazzi.com\/?page_id=30\"},{\"@type\":\"ListItem\",\"position\":4,\"name\":\"IBM Stocks ETL \u2013 Bronze Layer\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/ranaghazzi.com\/#website\",\"url\":\"https:\/\/ranaghazzi.com\/\",\"name\":\"Rana Nasri Ghazzi\",\"description\":\"Turning Data into Decisions\",\"publisher\":{\"@id\":\"https:\/\/ranaghazzi.com\/#\/schema\/person\/d8ee34f53cb0df9faaf816fb5363a4cc\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/ranaghazzi.com\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":[\"Person\",\"Organization\"],\"@id\":\"https:\/\/ranaghazzi.com\/#\/schema\/person\/d8ee34f53cb0df9faaf816fb5363a4cc\",\"name\":\"Rana Ghazzi\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png\",\"url\":\"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png\",\"contentUrl\":\"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png\",\"width\":1024,\"height\":1024,\"caption\":\"Rana Ghazzi\"},\"logo\":{\"@id\":\"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png\"}}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"IBM Stocks ETL \u2013 Bronze Layer - Rana Nasri Ghazzi","description":"Browse real-world data projects by Rana Ghazzi, covering data cleaning, analysis, and storytelling with Python, SQL, and Tableau Explore Rana Ghazzi's data analytics portfolio \u2014 dashboards, visualizations, and insights built with Tableau, Power BI & Python","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/ranaghazzi.com\/?page_id=1196","og_locale":"en_US","og_type":"article","og_title":"IBM Stocks ETL \u2013 Bronze Layer - Rana Nasri Ghazzi","og_description":"Browse real-world data projects by Rana Ghazzi, covering data cleaning, analysis, and storytelling with Python, SQL, and Tableau Explore Rana Ghazzi's data analytics portfolio \u2014 dashboards, visualizations, and insights built with Tableau, Power BI & Python","og_url":"https:\/\/ranaghazzi.com\/?page_id=1196","og_site_name":"Rana Nasri Ghazzi","article_modified_time":"2026-05-29T00:59:55+00:00","twitter_card":"summary_large_image","twitter_misc":{"Est. reading time":"2 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/ranaghazzi.com\/?page_id=1196","url":"https:\/\/ranaghazzi.com\/?page_id=1196","name":"IBM Stocks ETL \u2013 Bronze Layer - Rana Nasri Ghazzi","isPartOf":{"@id":"https:\/\/ranaghazzi.com\/#website"},"datePublished":"2026-01-03T20:55:12+00:00","dateModified":"2026-05-29T00:59:55+00:00","description":"Browse real-world data projects by Rana Ghazzi, covering data cleaning, analysis, and storytelling with Python, SQL, and Tableau Explore Rana Ghazzi's data analytics portfolio \u2014 dashboards, visualizations, and insights built with Tableau, Power BI & Python","breadcrumb":{"@id":"https:\/\/ranaghazzi.com\/?page_id=1196#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/ranaghazzi.com\/?page_id=1196"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/ranaghazzi.com\/?page_id=1196#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/ranaghazzi.com\/"},{"@type":"ListItem","position":2,"name":"Home","item":"https:\/\/ranaghazzi.com\/"},{"@type":"ListItem","position":3,"name":"Projects","item":"https:\/\/ranaghazzi.com\/?page_id=30"},{"@type":"ListItem","position":4,"name":"IBM Stocks ETL \u2013 Bronze Layer"}]},{"@type":"WebSite","@id":"https:\/\/ranaghazzi.com\/#website","url":"https:\/\/ranaghazzi.com\/","name":"Rana Nasri Ghazzi","description":"Turning Data into Decisions","publisher":{"@id":"https:\/\/ranaghazzi.com\/#\/schema\/person\/d8ee34f53cb0df9faaf816fb5363a4cc"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/ranaghazzi.com\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":["Person","Organization"],"@id":"https:\/\/ranaghazzi.com\/#\/schema\/person\/d8ee34f53cb0df9faaf816fb5363a4cc","name":"Rana Ghazzi","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png","url":"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png","contentUrl":"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png","width":1024,"height":1024,"caption":"Rana Ghazzi"},"logo":{"@id":"https:\/\/ranaghazzi.com\/wp-content\/uploads\/2025\/11\/logo.png"}}]}},"_hostinger_reach_plugin_has_subscription_block":false,"_hostinger_reach_plugin_is_elementor":false,"_links":{"self":[{"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=\/wp\/v2\/pages\/1196","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1196"}],"version-history":[{"count":139,"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=\/wp\/v2\/pages\/1196\/revisions"}],"predecessor-version":[{"id":5116,"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=\/wp\/v2\/pages\/1196\/revisions\/5116"}],"up":[{"embeddable":true,"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=\/wp\/v2\/pages\/30"}],"wp:attachment":[{"href":"https:\/\/ranaghazzi.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1196"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}