[{"data":1,"prerenderedAt":426},["ShallowReactive",2],{"navigation":3,"\u002Fblog\u002Fapache-airflow-part-1-why-and-goals":204,"\u002Fblog\u002Fapache-airflow-part-1-why-and-goals-surround":421},[4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84,88,92,96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200],{"title":5,"path":6,"stem":7},"You do not have time to not have tests","\u002Fblog\u002Fyou-do-not-have-time-to-not-have-tests","2.blog\u002F20211217.you-do-not-have-time-to-not-have-tests",{"title":9,"path":10,"stem":11},"Migrate Vue 2 with Vuetify and Jest to Vite and Vitest","\u002Fblog\u002Fmigrate-vue-2-with-vuetify-and-jest-to-vite-and-vitest","2.blog\u002F20220109.migrate-vue-2-with-vuetify-and-jest-to-vite-and-vitest",{"title":13,"path":14,"stem":15},"I am a Dark Matter Developer","\u002Fblog\u002Fi-am-a-dark-matter-developer","2.blog\u002F20220626.i-am-a-dark-matter-developer",{"title":17,"path":18,"stem":19},"Why using Conventional commits is useful","\u002Fblog\u002Fusing-conventional-commits","2.blog\u002F20240623.using-conventional-commits",{"title":21,"path":22,"stem":23},"Why you should make a toolbox repository","\u002Fblog\u002Fwhy-you-should-make-a-toolbox-repository","2.blog\u002F20240630.Why-you-should-make-a-toolbox-repository",{"title":25,"path":26,"stem":27},"Apache Airflow Part 1 - Why and Goals for a near Serverless ELT","\u002Fblog\u002Fapache-airflow-part-1-why-and-goals","2.blog\u002F20240710.apache-airflow-part-1-why-and-goals",{"title":29,"path":30,"stem":31},"Oh My Zsh on your server","\u002Fblog\u002Foh-my-zsh-on-your-server","2.blog\u002F20240711.oh-my-zsh-on-your-server",{"title":33,"path":34,"stem":35},"Fire tablet and YouTube Kids","\u002Fblog\u002Ffire-tablet-and-youtube-kids","2.blog\u002F20240714.fire-tablet-and-youtube-kids",{"title":37,"path":38,"stem":39},"Using Ollama and Continue as a GitHub Copilot Alternative","\u002Fblog\u002Fusing-ollama-and-continue-as-github-copilot-alternative","2.blog\u002F20240723.using-ollama-and-continue-as-github-copilot-alternative",{"title":41,"path":42,"stem":43},"Debugging Local Packages Made Easy with pnpm","\u002Fblog\u002Fdebugging-local-packages-with-pnpm-link","2.blog\u002F20250422.debugging local-packages-with-pnpm-link",{"title":45,"path":46,"stem":47},"Two Weeks with Cloudflare AI and Tools","\u002Fblog\u002Ftwo-weeks-with-cloudflare-ai-and-tools","2.blog\u002F20250509.two-weeks-with-cloudflare-aI-and-tools",{"title":49,"path":50,"stem":51},"Adding Prompts to VS Code - How I Learned to Stop Worrying and Love AI Context","\u002Fblog\u002Fadding-prompts-to-vscode","2.blog\u002F20250528.adding-prompts-to-vscode",{"title":53,"path":54,"stem":55},"My Best Practices","\u002Fblog\u002Fmy-best-practicies","2.blog\u002F20250607.my-best-practicies",{"title":57,"path":58,"stem":59},"Creating my own CLI Tool - Towles Tool","\u002Fblog\u002Ftowles-tool","2.blog\u002F20250607.towles-tool",{"title":61,"path":62,"stem":63},"Software Development Best Practices & ITIL","\u002Fblog\u002Fsoftware-engineering-and-itil-best-practices","2.blog\u002F20250612.software-engineering-and-itil-best-practices",{"title":65,"path":66,"stem":67},"Voice to Text","\u002Fblog\u002Fvoice-to-text","2.blog\u002F20250622.voice-to-text",{"title":69,"path":70,"stem":71},"Setting Up ComfyUI - A Better Alternative to Fooocus","\u002Fblog\u002Fcomfy-ui-setup","2.blog\u002F20250628.comfy-ui-setup",{"title":73,"path":74,"stem":75},"Voice to System","\u002Fblog\u002Fvoice-to-system","2.blog\u002F20250705.voice-to-system",{"title":77,"path":78,"stem":79},"Tips for Claude Code","\u002Fblog\u002Ftips-for-claude-code","2.blog\u002F20250713.tips-for-claude-code",{"title":81,"path":82,"stem":83},"Review That AI Code: Why I Read Every Line Generated Code","\u002Fblog\u002Freview-that-ai-code","2.blog\u002F20250720.review-that-ai-code",{"title":85,"path":86,"stem":87},"My Context Engineering Journey: From Dev Scripts to AI Collaboration","\u002Fblog\u002F20250803-1.my-context-engineering-journey","2.blog\u002F20250803-1.my-context-engineering-journey",{"title":89,"path":90,"stem":91},"Context Engineering at Scale: Enterprise Lessons and the Future of Development","\u002Fblog\u002F20250803-2.context-engineering-at-scale","2.blog\u002F20250803-2.context-engineering-at-scale",{"title":93,"path":94,"stem":95},"Check That Your Tools and Linters Do Not Burn Tokens","\u002Fblog\u002Fcheck-that-your-tools-and-linters-do-not-burn-tokens","2.blog\u002F20250806.check-that-your-tools-and-linters-do-not-burn-tokens",{"title":97,"path":98,"stem":99},"Markdown + AI: The Communication Protocol That Changes Everything","\u002Fblog\u002Fmarkdown-plus-ai-the-communication-protocol-that-changes-everything","2.blog\u002F20250814.markdown-plus-ai-the-communication-protocol-that-changes-everything",{"title":101,"path":102,"stem":103},"Finally: Type-Safe AI in Production (And Why I'm Here For It)","\u002Fblog\u002Ffinally-type-safe-ai-in-production-and-why-im-here-for-it","2.blog\u002F20250819.finally-type-safe-ai-in-production-and-why-im-here-for-it",{"title":105,"path":106,"stem":107},"Dotfiles: Masterpiece or Late Stage Picasso?","\u002Fblog\u002Fdotfiles-masterpiece-or-late-stage-picasso","2.blog\u002F20250822.dotfiles-masterpiece-or-late-stage-picasso",{"title":109,"path":110,"stem":111},"Beyond API Wrappers: Building State-Driven MCP Servers for Long-Horizon Agent Orchestration","\u002Fblog\u002Fbeyond-api-wrappers-mcp-servers","2.blog\u002F20250907.beyond-api-wrappers-mcp-servers",{"title":113,"path":114,"stem":115},"Why Vertical Integration Wins: A Software Engineer's Case for Owning Your Stack","\u002Fblog\u002Fwhy-i-bought-tesla-model-3-vertical-integration","2.blog\u002F20250928.why-i-bought-tesla-model-3-vertical-integration",{"title":117,"path":118,"stem":119},"The Min-Maxer's Trifecta: Building Tools for the Game You Actually Play","\u002Fblog\u002Fmin-maxer-trifecta","2.blog\u002F20251004.min-maxer-trifecta",{"title":121,"path":122,"stem":123},"Read The Source: Learning by Cutting Out The Middleman and RTFM","\u002Fblog\u002Fread-the-source","2.blog\u002F20251010.read-the-source",{"title":125,"path":126,"stem":127},"The Exponential Shift: Why AI Progress Feels Different Now","\u002Fblog\u002Fthe-exponential-shift","2.blog\u002F20251015.the-exponential-shift",{"title":129,"path":130,"stem":131},"Plan Mode for Your Problems, Edit Mode for Claude's","\u002Fblog\u002Fplan-mode-problems-edit-mode-solutions","2.blog\u002F20251019.plan-mode-problems-edit-mode-solutions",{"title":133,"path":134,"stem":135},"AWS Aurora DSQL Looked Perfect Until I Needed the Connection String","\u002Fblog\u002Faws-aurora-dsql-postgres-serverless-authentication","2.blog\u002F20251028.aws-aurora-dsql-postgres-serverless-authentication",{"title":137,"path":138,"stem":139},"Switchback: Browser History for Your Thoughts","\u002Fblog\u002Fswitchback-second-order-reasoning","2.blog\u002F20251205.switchback-second-order-reasoning",{"title":141,"path":142,"stem":143},"AI Pairing: Notes to Self","\u002Fblog\u002Fai-pairing-notes-to-self","2.blog\u002F20251216.ai-pairing-notes-to-self",{"title":145,"path":146,"stem":147},"I've Been Sleeping on Zellij","\u002Fblog\u002Fsleeping-on-zellij","2.blog\u002F20251229.sleeping-on-zellij",{"title":149,"path":150,"stem":151},"Implementing a Ralph Wiggum Loop: The Secret is Session Markers","\u002Fblog\u002Fimplementing-ralph-wiggum-loop-for-autonomous-ai-coding","2.blog\u002F20260114.implementing-ralph-wiggum-loop-for-autonomous-ai-coding",{"title":153,"path":154,"stem":155},"Goodhart's Law Ate My Context Window","\u002Fblog\u002Fgoodharts-law-ate-my-context-window","2.blog\u002F20260119.goodharts-law-ate-my-context-window",{"title":157,"path":158,"stem":159},"Claude Code's Hidden Multi-Agent System Is Real","\u002Fblog\u002Fclaude-code-hidden-multi-agent-system","2.blog\u002F20260124.claude-code-hidden-multi-agent-system",{"title":161,"path":162,"stem":163},"Free Printable Math Sheets for Kids — Number Chart, Skip Counting, Multiplication, and More","\u002Fblog\u002Ffree-printable-number-chart-and-coin-sheets","2.blog\u002F20260214.free-printable-number-chart-and-coin-sheets",{"title":165,"path":166,"stem":167},"We Are Near the End of the Exponential","\u002Fblog\u002Fnear-the-end-of-the-exponential","2.blog\u002F20260214.near-the-end-of-the-exponential",{"title":169,"path":170,"stem":171},"Free Printable Language Arts Sheets for Kids — Sight Words, Parts of Speech, Homophones, and More","\u002Fblog\u002Ffree-printable-sight-words-and-grammar-sheets","2.blog\u002F20260215.free-printable-sight-words-and-grammar-sheets",{"title":173,"path":174,"stem":175},"Interactive Code Execution with Artifacts","\u002Fblog\u002Finteractive-code-execution-with-artifacts","2.blog\u002F20260215.interactive-code-execution-with-artifacts",{"title":177,"path":178,"stem":179},"Free Printable Telling Time Worksheet for Kids — Clock Reference & Practice Sheet","\u002Fblog\u002Ffree-printable-telling-time-worksheet","2.blog\u002F20260216.free-printable-telling-time-worksheet",{"title":181,"path":182,"stem":183},"Claude Code Skills: Teaching AI Your Playbook","\u002Fblog\u002Fclaude-code-skills-guide","2.blog\u002F20260221.claude-code-skills-guide",{"title":185,"path":186,"stem":187},"Building a Multi-Agent Loan Approval System with Human-in-the-Loop","\u002Fblog\u002Fmulti-agent-loan-approval-human-in-the-loop","2.blog\u002F20260225.multi-agent-loan-approval-human-in-the-loop",{"title":189,"path":190,"stem":191},"The Inception of AI Infrastructure: Bottlenecks All the Way Down","\u002Fblog\u002Fbiggest-bottleneck-scaling-ai-compute","2.blog\u002F20260313.biggest-bottleneck-scaling-ai-compute",{"title":193,"path":194,"stem":195},"What I Tell Teams About Claude Code","\u002Fblog\u002Fwhat-i-tell-teams-about-claude-code","2.blog\u002F20260314.what-i-tell-teams-about-claude-code",{"title":197,"path":198,"stem":199},"The Hardest Part of AI Isn't the AI","\u002Fblog\u002Fthe-hardest-part-of-ai-isnt-the-ai","2.blog\u002F20260327.the-hardest-part-of-ai-isnt-the-ai",{"title":201,"path":202,"stem":203},"Claude Code Hooks: The Capability I Left on the Table","\u002Fblog\u002Fclaude-code-hooks-capability-left-on-the-table","2.blog\u002F20260401.claude-code-hooks-capability-left-on-the-table",{"id":205,"title":25,"authors":206,"badge":212,"body":214,"date":410,"description":411,"extension":412,"image":413,"meta":416,"navigation":417,"path":26,"seo":418,"status":419,"stem":27,"__hash__":420},"posts\u002F2.blog\u002F20240710.apache-airflow-part-1-why-and-goals.md",[207],{"name":208,"to":209,"avatar":210},"Chris Towles","https:\u002F\u002Ftwitter.com\u002FChris_Towles",{"src":211},"\u002Fimages\u002Fctowles-profile-512x512.png",{"label":213},"airflow",{"type":215,"value":216,"toc":402},"minimark",[217,229,232,237,244,259,263,266,308,311,315,318,388,392,399],[218,219,220,221,228],"p",{},"At work, at ",[222,223,227],"a",{"href":224,"rel":225},"https:\u002F\u002Fwww.geaerospace.com\u002F",[226],"nofollow","GE Aerospace",", I work around supporting data ingestion into a Datalake. I'm not going to go into the details here, but I would love to use Airflow instead of the current stack we have today.",[218,230,231],{},"I've done a few proofs of concept with Airflow in the past. It is a solid solution, and with the hype of AI these days, quick and reliable data ingestion has never been more critical.",[233,234,236],"h2",{"id":235},"why-apache-airflow","Why Apache Airflow?",[218,238,239],{},[240,241],"img",{"alt":242,"src":243},"Apache Airflow","\u002Fimages\u002Fblog\u002Fairflow-logo.png",[245,246,247,251],"ul",{},[248,249,250],"li",{},"It's open source",[248,252,253,254],{},"Hugely popular and used by many companies.\n",[245,255,256],{},[248,257,258],{},"Features and integrations are available with nearly everything",[233,260,262],{"id":261},"why-not-other-solutions","Why not other solutions?",[218,264,265],{},"Over the last few years every provider seems to be reducing their on-premises options to only their hosted solution.",[245,267,268,276,284,292,300],{},[248,269,270,275],{},[222,271,274],{"href":272,"rel":273},"https:\u002F\u002Fwww.talend.com\u002Fproducts\u002Ftalend-open-studio\u002F",[226],"Talend"," and other features are already being deprecated",[248,277,278,283],{},[222,279,282],{"href":280,"rel":281},"https:\u002F\u002Ffivetran.com",[226],"Fivetran"," moving to their own cloud based solution.",[248,285,286,291],{},[222,287,290],{"href":288,"rel":289},"https:\u002F\u002Fdocs.databricks.com\u002Fen\u002Fresources\u002Fsupported-regions.html",[226],"Databricks"," not in us-gov-east-1 as of (2024-07-10)",[248,293,294,299],{},[222,295,298],{"href":296,"rel":297},"https:\u002F\u002Fdocs.prefect.io\u002Flatest\u002Fguides\u002Fhost\u002F",[226],"Prefect"," - lots of cloud-only features, like audit logs, Workspaces, and Automation",[248,301,302,307],{},[222,303,306],{"href":304,"rel":305},"https:\u002F\u002Fgithub.com\u002Fdagster-io\u002Fdagster\u002Fissues\u002F2219",[226],"Dagster"," - Dagit lacks any authentication when self-hosted.",[218,309,310],{},"And I get why—for small to midsize companies, it's easier to just deploy a cloud-based solution. But when you have a really large and\u002For regulation-heavy environment, it's more important to be able to self-host and manage your own data. No shipping it off to a third party and trusting them with your data.",[233,312,314],{"id":313},"my-goals","My goals",[218,316,317],{},"I am however going to put a few constraints in place around how I want to use Airflow.",[245,319,320,329,337,345,362,370],{},[248,321,322,323,328],{},"Can't use ",[222,324,327],{"href":325,"rel":326},"https:\u002F\u002Faws.amazon.com\u002Fmwaa\u002F",[226],"AWS MWAA"," (it's not offered in AWS US Gov East)",[248,330,331,332],{},"Local Development\n",[245,333,334],{},[248,335,336],{},"Changes to a job must be able to be tested locally before being pushed",[248,338,339,340],{},"CI\u002FCD Pipeline\n",[245,341,342],{},[248,343,344],{},"Changes to a job must be able to be deployed to production via a CI\u002FCD pipeline.",[248,346,347,348],{},"Job Management\n",[245,349,350,353,356,359],{},[248,351,352],{},"Offer Web UI to view and re-trigger jobs",[248,354,355],{},"Prefer code-first and config rather than UI-based.",[248,357,358],{},"Time to modify a job should be less than 5 minutes.",[248,360,361],{},"Time to create a job should be less than 30 minutes.",[248,363,364,365],{},"Cost\n",[245,366,367],{},[248,368,369],{},"I'd like as close to zero cost as possible, ideally spinning down to near no resource usage when no jobs running.",[248,371,372,373],{},"Maintenance\n",[245,374,375],{},[248,376,377,378],{},"Would like to be able to deploy new versions of the Airflow container on ECS.\n",[245,379,380],{},[248,381,382,383],{},"This isn't a hard requirement—could use an EC2 instance and update in place, but that's another box to maintain long term.\n",[245,384,385],{},[248,386,387],{},"If instead it was just ECS pointing at an RDS database, you could restore the DB from snapshot and test a deployment before release to production.",[233,389,391],{"id":390},"repo","Repo",[218,393,394,395],{},"Just started with a repo at: ",[222,396,397],{"href":397,"rel":398},"https:\u002F\u002Fgithub.com\u002FChrisTowles\u002Fairflow-playground",[226],[218,400,401],{},"I don't usually post Proofs of Concept like this publicly, but I'm doing it on my own time, so let's see how this goes and where it takes me.",{"title":403,"searchDepth":404,"depth":404,"links":405},"",2,[406,407,408,409],{"id":235,"depth":404,"text":236},{"id":261,"depth":404,"text":262},{"id":313,"depth":404,"text":314},{"id":390,"depth":404,"text":391},"2024-07-10","Reasons I want to use Airflow for a Proof of Concept near Serverless ELT","md",{"src":414,"alt":415},"\u002Fimages\u002Fblog\u002Fairflow-part-1-blog-image.png","A laptop with a data analytics",{},true,{"title":25,"description":411},"published","pRdrsfV0D6PBLbFdk4FfgwUGTziJc5Za3Wj5U4F5LI8",[422,424],{"title":21,"path":22,"stem":23,"description":423,"status":419,"children":-1},"the perfect place to put everything that doesn't belong anywhere else",{"title":29,"path":30,"stem":31,"description":425,"status":419,"children":-1},"Why I'm installing it on servers I ssh into a lot.",1776221196452]