[{"data":1,"prerenderedAt":2264},["ShallowReactive",2],{"/en-us/blog/tags/git/":3,"navigation-en-us":19,"banner-en-us":437,"footer-en-us":452,"git-tag-page-en-us":663},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"content":8,"config":10,"_id":12,"_type":13,"title":14,"_source":15,"_file":16,"_stem":17,"_extension":18},"/en-us/blog/tags/git","tags",false,"",{"tag":9,"tagSlug":9},"git",{"template":11},"BlogTag","content:en-us:blog:tags:git.yml","yaml","Git","content","en-us/blog/tags/git.yml","en-us/blog/tags/git","yml",{"_path":20,"_dir":21,"_draft":6,"_partial":6,"_locale":7,"data":22,"_id":433,"_type":13,"title":434,"_source":15,"_file":435,"_stem":436,"_extension":18},"/shared/en-us/main-navigation","en-us",{"logo":23,"freeTrial":28,"sales":33,"login":38,"items":43,"search":374,"minimal":405,"duo":424},{"config":24},{"href":25,"dataGaName":26,"dataGaLocation":27},"/","gitlab logo","header",{"text":29,"config":30},"Get free trial",{"href":31,"dataGaName":32,"dataGaLocation":27},"https://gitlab.com/-/trial_registrations/new?glm_source=about.gitlab.com&glm_content=default-saas-trial/","free trial",{"text":34,"config":35},"Talk to sales",{"href":36,"dataGaName":37,"dataGaLocation":27},"/sales/","sales",{"text":39,"config":40},"Sign in",{"href":41,"dataGaName":42,"dataGaLocation":27},"https://gitlab.com/users/sign_in/","sign in",[44,88,184,189,295,355],{"text":45,"config":46,"cards":48,"footer":71},"Platform",{"dataNavLevelOne":47},"platform",[49,55,63],{"title":45,"description":50,"link":51},"The most comprehensive AI-powered DevSecOps Platform",{"text":52,"config":53},"Explore our Platform",{"href":54,"dataGaName":47,"dataGaLocation":27},"/platform/",{"title":56,"description":57,"link":58},"GitLab Duo (AI)","Build software faster with AI at every stage of development",{"text":59,"config":60},"Meet GitLab Duo",{"href":61,"dataGaName":62,"dataGaLocation":27},"/gitlab-duo/","gitlab duo ai",{"title":64,"description":65,"link":66},"Why GitLab","10 reasons why Enterprises choose GitLab",{"text":67,"config":68},"Learn more",{"href":69,"dataGaName":70,"dataGaLocation":27},"/why-gitlab/","why gitlab",{"title":72,"items":73},"Get started with",[74,79,84],{"text":75,"config":76},"Platform Engineering",{"href":77,"dataGaName":78,"dataGaLocation":27},"/solutions/platform-engineering/","platform engineering",{"text":80,"config":81},"Developer Experience",{"href":82,"dataGaName":83,"dataGaLocation":27},"/developer-experience/","Developer experience",{"text":85,"config":86},"MLOps",{"href":87,"dataGaName":85,"dataGaLocation":27},"/topics/devops/the-role-of-ai-in-devops/",{"text":89,"left":90,"config":91,"link":93,"lists":97,"footer":166},"Product",true,{"dataNavLevelOne":92},"solutions",{"text":94,"config":95},"View all Solutions",{"href":96,"dataGaName":92,"dataGaLocation":27},"/solutions/",[98,123,145],{"title":99,"description":100,"link":101,"items":106},"Automation","CI/CD and automation to accelerate deployment",{"config":102},{"icon":103,"href":104,"dataGaName":105,"dataGaLocation":27},"AutomatedCodeAlt","/solutions/delivery-automation/","automated software delivery",[107,111,115,119],{"text":108,"config":109},"CI/CD",{"href":110,"dataGaLocation":27,"dataGaName":108},"/solutions/continuous-integration/",{"text":112,"config":113},"AI-Assisted Development",{"href":61,"dataGaLocation":27,"dataGaName":114},"AI assisted development",{"text":116,"config":117},"Source Code Management",{"href":118,"dataGaLocation":27,"dataGaName":116},"/solutions/source-code-management/",{"text":120,"config":121},"Automated Software Delivery",{"href":104,"dataGaLocation":27,"dataGaName":122},"Automated software delivery",{"title":124,"description":125,"link":126,"items":131},"Security","Deliver code faster without compromising security",{"config":127},{"href":128,"dataGaName":129,"dataGaLocation":27,"icon":130},"/solutions/security-compliance/","security and compliance","ShieldCheckLight",[132,135,140],{"text":133,"config":134},"Security & Compliance",{"href":128,"dataGaLocation":27,"dataGaName":133},{"text":136,"config":137},"Software Supply Chain Security",{"href":138,"dataGaLocation":27,"dataGaName":139},"/solutions/supply-chain/","Software supply chain security",{"text":141,"config":142},"Compliance & Governance",{"href":143,"dataGaLocation":27,"dataGaName":144},"/solutions/continuous-software-compliance/","Compliance and governance",{"title":146,"link":147,"items":152},"Measurement",{"config":148},{"icon":149,"href":150,"dataGaName":151,"dataGaLocation":27},"DigitalTransformation","/solutions/visibility-measurement/","visibility and measurement",[153,157,161],{"text":154,"config":155},"Visibility & Measurement",{"href":150,"dataGaLocation":27,"dataGaName":156},"Visibility and Measurement",{"text":158,"config":159},"Value Stream Management",{"href":160,"dataGaLocation":27,"dataGaName":158},"/solutions/value-stream-management/",{"text":162,"config":163},"Analytics & Insights",{"href":164,"dataGaLocation":27,"dataGaName":165},"/solutions/analytics-and-insights/","Analytics and insights",{"title":167,"items":168},"GitLab for",[169,174,179],{"text":170,"config":171},"Enterprise",{"href":172,"dataGaLocation":27,"dataGaName":173},"/enterprise/","enterprise",{"text":175,"config":176},"Small Business",{"href":177,"dataGaLocation":27,"dataGaName":178},"/small-business/","small business",{"text":180,"config":181},"Public Sector",{"href":182,"dataGaLocation":27,"dataGaName":183},"/solutions/public-sector/","public sector",{"text":185,"config":186},"Pricing",{"href":187,"dataGaName":188,"dataGaLocation":27,"dataNavLevelOne":188},"/pricing/","pricing",{"text":190,"config":191,"link":193,"lists":197,"feature":282},"Resources",{"dataNavLevelOne":192},"resources",{"text":194,"config":195},"View all resources",{"href":196,"dataGaName":192,"dataGaLocation":27},"/resources/",[198,231,254],{"title":199,"items":200},"Getting started",[201,206,211,216,221,226],{"text":202,"config":203},"Install",{"href":204,"dataGaName":205,"dataGaLocation":27},"/install/","install",{"text":207,"config":208},"Quick start guides",{"href":209,"dataGaName":210,"dataGaLocation":27},"/get-started/","quick setup checklists",{"text":212,"config":213},"Learn",{"href":214,"dataGaLocation":27,"dataGaName":215},"https://university.gitlab.com/","learn",{"text":217,"config":218},"Product documentation",{"href":219,"dataGaName":220,"dataGaLocation":27},"https://docs.gitlab.com/","product documentation",{"text":222,"config":223},"Best practice videos",{"href":224,"dataGaName":225,"dataGaLocation":27},"/getting-started-videos/","best practice videos",{"text":227,"config":228},"Integrations",{"href":229,"dataGaName":230,"dataGaLocation":27},"/integrations/","integrations",{"title":232,"items":233},"Discover",[234,239,244,249],{"text":235,"config":236},"Customer success stories",{"href":237,"dataGaName":238,"dataGaLocation":27},"/customers/","customer success stories",{"text":240,"config":241},"Blog",{"href":242,"dataGaName":243,"dataGaLocation":27},"/blog/","blog",{"text":245,"config":246},"Remote",{"href":247,"dataGaName":248,"dataGaLocation":27},"https://handbook.gitlab.com/handbook/company/culture/all-remote/","remote",{"text":250,"config":251},"TeamOps",{"href":252,"dataGaName":253,"dataGaLocation":27},"/teamops/","teamops",{"title":255,"items":256},"Connect",[257,262,267,272,277],{"text":258,"config":259},"GitLab Services",{"href":260,"dataGaName":261,"dataGaLocation":27},"/services/","services",{"text":263,"config":264},"Community",{"href":265,"dataGaName":266,"dataGaLocation":27},"/community/","community",{"text":268,"config":269},"Forum",{"href":270,"dataGaName":271,"dataGaLocation":27},"https://forum.gitlab.com/","forum",{"text":273,"config":274},"Events",{"href":275,"dataGaName":276,"dataGaLocation":27},"/events/","events",{"text":278,"config":279},"Partners",{"href":280,"dataGaName":281,"dataGaLocation":27},"/partners/","partners",{"backgroundColor":283,"textColor":284,"text":285,"image":286,"link":290},"#2f2a6b","#fff","Insights for the future of software development",{"altText":287,"config":288},"the source promo card",{"src":289},"/images/navigation/the-source-promo-card.svg",{"text":291,"config":292},"Read the latest",{"href":293,"dataGaName":294,"dataGaLocation":27},"/the-source/","the source",{"text":296,"config":297,"lists":299},"Company",{"dataNavLevelOne":298},"company",[300],{"items":301},[302,307,313,315,320,325,330,335,340,345,350],{"text":303,"config":304},"About",{"href":305,"dataGaName":306,"dataGaLocation":27},"/company/","about",{"text":308,"config":309,"footerGa":312},"Jobs",{"href":310,"dataGaName":311,"dataGaLocation":27},"/jobs/","jobs",{"dataGaName":311},{"text":273,"config":314},{"href":275,"dataGaName":276,"dataGaLocation":27},{"text":316,"config":317},"Leadership",{"href":318,"dataGaName":319,"dataGaLocation":27},"/company/team/e-group/","leadership",{"text":321,"config":322},"Team",{"href":323,"dataGaName":324,"dataGaLocation":27},"/company/team/","team",{"text":326,"config":327},"Handbook",{"href":328,"dataGaName":329,"dataGaLocation":27},"https://handbook.gitlab.com/","handbook",{"text":331,"config":332},"Investor relations",{"href":333,"dataGaName":334,"dataGaLocation":27},"https://ir.gitlab.com/","investor relations",{"text":336,"config":337},"Trust Center",{"href":338,"dataGaName":339,"dataGaLocation":27},"/security/","trust center",{"text":341,"config":342},"AI Transparency Center",{"href":343,"dataGaName":344,"dataGaLocation":27},"/ai-transparency-center/","ai transparency center",{"text":346,"config":347},"Newsletter",{"href":348,"dataGaName":349,"dataGaLocation":27},"/company/contact/","newsletter",{"text":351,"config":352},"Press",{"href":353,"dataGaName":354,"dataGaLocation":27},"/press/","press",{"text":356,"config":357,"lists":358},"Contact us",{"dataNavLevelOne":298},[359],{"items":360},[361,364,369],{"text":34,"config":362},{"href":36,"dataGaName":363,"dataGaLocation":27},"talk to sales",{"text":365,"config":366},"Get help",{"href":367,"dataGaName":368,"dataGaLocation":27},"/support/","get help",{"text":370,"config":371},"Customer portal",{"href":372,"dataGaName":373,"dataGaLocation":27},"https://customers.gitlab.com/customers/sign_in/","customer portal",{"close":375,"login":376,"suggestions":383},"Close",{"text":377,"link":378},"To search repositories and projects, login to",{"text":379,"config":380},"gitlab.com",{"href":41,"dataGaName":381,"dataGaLocation":382},"search login","search",{"text":384,"default":385},"Suggestions",[386,388,392,394,398,402],{"text":56,"config":387},{"href":61,"dataGaName":56,"dataGaLocation":382},{"text":389,"config":390},"Code Suggestions (AI)",{"href":391,"dataGaName":389,"dataGaLocation":382},"/solutions/code-suggestions/",{"text":108,"config":393},{"href":110,"dataGaName":108,"dataGaLocation":382},{"text":395,"config":396},"GitLab on AWS",{"href":397,"dataGaName":395,"dataGaLocation":382},"/partners/technology-partners/aws/",{"text":399,"config":400},"GitLab on Google Cloud",{"href":401,"dataGaName":399,"dataGaLocation":382},"/partners/technology-partners/google-cloud-platform/",{"text":403,"config":404},"Why GitLab?",{"href":69,"dataGaName":403,"dataGaLocation":382},{"freeTrial":406,"mobileIcon":411,"desktopIcon":416,"secondaryButton":419},{"text":407,"config":408},"Start free trial",{"href":409,"dataGaName":32,"dataGaLocation":410},"https://gitlab.com/-/trials/new/","nav",{"altText":412,"config":413},"Gitlab Icon",{"src":414,"dataGaName":415,"dataGaLocation":410},"/images/brand/gitlab-logo-tanuki.svg","gitlab icon",{"altText":412,"config":417},{"src":418,"dataGaName":415,"dataGaLocation":410},"/images/brand/gitlab-logo-type.svg",{"text":420,"config":421},"Get Started",{"href":422,"dataGaName":423,"dataGaLocation":410},"https://gitlab.com/-/trial_registrations/new?glm_source=about.gitlab.com/compare/gitlab-vs-github/","get started",{"freeTrial":425,"mobileIcon":429,"desktopIcon":431},{"text":426,"config":427},"Learn more about GitLab Duo",{"href":61,"dataGaName":428,"dataGaLocation":410},"gitlab duo",{"altText":412,"config":430},{"src":414,"dataGaName":415,"dataGaLocation":410},{"altText":412,"config":432},{"src":418,"dataGaName":415,"dataGaLocation":410},"content:shared:en-us:main-navigation.yml","Main Navigation","shared/en-us/main-navigation.yml","shared/en-us/main-navigation",{"_path":438,"_dir":21,"_draft":6,"_partial":6,"_locale":7,"title":439,"button":440,"image":444,"config":447,"_id":449,"_type":13,"_source":15,"_file":450,"_stem":451,"_extension":18},"/shared/en-us/banner","is now in public beta!",{"text":67,"config":441},{"href":442,"dataGaName":443,"dataGaLocation":27},"/gitlab-duo/agent-platform/","duo banner",{"config":445},{"src":446},"https://res.cloudinary.com/about-gitlab-com/image/upload/v1753720689/somrf9zaunk0xlt7ne4x.svg",{"layout":448},"release","content:shared:en-us:banner.yml","shared/en-us/banner.yml","shared/en-us/banner",{"_path":453,"_dir":21,"_draft":6,"_partial":6,"_locale":7,"data":454,"_id":659,"_type":13,"title":660,"_source":15,"_file":661,"_stem":662,"_extension":18},"/shared/en-us/main-footer",{"text":455,"source":456,"edit":462,"contribute":467,"config":472,"items":477,"minimal":651},"Git is a trademark of Software Freedom Conservancy and our use of 'GitLab' is under license",{"text":457,"config":458},"View page source",{"href":459,"dataGaName":460,"dataGaLocation":461},"https://gitlab.com/gitlab-com/marketing/digital-experience/about-gitlab-com/","page source","footer",{"text":463,"config":464},"Edit this page",{"href":465,"dataGaName":466,"dataGaLocation":461},"https://gitlab.com/gitlab-com/marketing/digital-experience/about-gitlab-com/-/blob/main/content/","web ide",{"text":468,"config":469},"Please contribute",{"href":470,"dataGaName":471,"dataGaLocation":461},"https://gitlab.com/gitlab-com/marketing/digital-experience/about-gitlab-com/-/blob/main/CONTRIBUTING.md/","please contribute",{"twitter":473,"facebook":474,"youtube":475,"linkedin":476},"https://twitter.com/gitlab","https://www.facebook.com/gitlab","https://www.youtube.com/channel/UCnMGQ8QHMAnVIsI3xJrihhg","https://www.linkedin.com/company/gitlab-com",[478,501,558,587,621],{"title":45,"links":479,"subMenu":484},[480],{"text":481,"config":482},"DevSecOps platform",{"href":54,"dataGaName":483,"dataGaLocation":461},"devsecops platform",[485],{"title":185,"links":486},[487,491,496],{"text":488,"config":489},"View plans",{"href":187,"dataGaName":490,"dataGaLocation":461},"view plans",{"text":492,"config":493},"Why Premium?",{"href":494,"dataGaName":495,"dataGaLocation":461},"/pricing/premium/","why premium",{"text":497,"config":498},"Why Ultimate?",{"href":499,"dataGaName":500,"dataGaLocation":461},"/pricing/ultimate/","why ultimate",{"title":502,"links":503},"Solutions",[504,509,512,514,519,524,528,531,535,540,542,545,548,553],{"text":505,"config":506},"Digital transformation",{"href":507,"dataGaName":508,"dataGaLocation":461},"/topics/digital-transformation/","digital transformation",{"text":133,"config":510},{"href":128,"dataGaName":511,"dataGaLocation":461},"security & compliance",{"text":122,"config":513},{"href":104,"dataGaName":105,"dataGaLocation":461},{"text":515,"config":516},"Agile development",{"href":517,"dataGaName":518,"dataGaLocation":461},"/solutions/agile-delivery/","agile delivery",{"text":520,"config":521},"Cloud transformation",{"href":522,"dataGaName":523,"dataGaLocation":461},"/topics/cloud-native/","cloud transformation",{"text":525,"config":526},"SCM",{"href":118,"dataGaName":527,"dataGaLocation":461},"source code management",{"text":108,"config":529},{"href":110,"dataGaName":530,"dataGaLocation":461},"continuous integration & delivery",{"text":532,"config":533},"Value stream management",{"href":160,"dataGaName":534,"dataGaLocation":461},"value stream management",{"text":536,"config":537},"GitOps",{"href":538,"dataGaName":539,"dataGaLocation":461},"/solutions/gitops/","gitops",{"text":170,"config":541},{"href":172,"dataGaName":173,"dataGaLocation":461},{"text":543,"config":544},"Small business",{"href":177,"dataGaName":178,"dataGaLocation":461},{"text":546,"config":547},"Public sector",{"href":182,"dataGaName":183,"dataGaLocation":461},{"text":549,"config":550},"Education",{"href":551,"dataGaName":552,"dataGaLocation":461},"/solutions/education/","education",{"text":554,"config":555},"Financial services",{"href":556,"dataGaName":557,"dataGaLocation":461},"/solutions/finance/","financial services",{"title":190,"links":559},[560,562,564,566,569,571,573,575,577,579,581,583,585],{"text":202,"config":561},{"href":204,"dataGaName":205,"dataGaLocation":461},{"text":207,"config":563},{"href":209,"dataGaName":210,"dataGaLocation":461},{"text":212,"config":565},{"href":214,"dataGaName":215,"dataGaLocation":461},{"text":217,"config":567},{"href":219,"dataGaName":568,"dataGaLocation":461},"docs",{"text":240,"config":570},{"href":242,"dataGaName":243,"dataGaLocation":461},{"text":235,"config":572},{"href":237,"dataGaName":238,"dataGaLocation":461},{"text":245,"config":574},{"href":247,"dataGaName":248,"dataGaLocation":461},{"text":258,"config":576},{"href":260,"dataGaName":261,"dataGaLocation":461},{"text":250,"config":578},{"href":252,"dataGaName":253,"dataGaLocation":461},{"text":263,"config":580},{"href":265,"dataGaName":266,"dataGaLocation":461},{"text":268,"config":582},{"href":270,"dataGaName":271,"dataGaLocation":461},{"text":273,"config":584},{"href":275,"dataGaName":276,"dataGaLocation":461},{"text":278,"config":586},{"href":280,"dataGaName":281,"dataGaLocation":461},{"title":296,"links":588},[589,591,593,595,597,599,601,605,610,612,614,616],{"text":303,"config":590},{"href":305,"dataGaName":298,"dataGaLocation":461},{"text":308,"config":592},{"href":310,"dataGaName":311,"dataGaLocation":461},{"text":316,"config":594},{"href":318,"dataGaName":319,"dataGaLocation":461},{"text":321,"config":596},{"href":323,"dataGaName":324,"dataGaLocation":461},{"text":326,"config":598},{"href":328,"dataGaName":329,"dataGaLocation":461},{"text":331,"config":600},{"href":333,"dataGaName":334,"dataGaLocation":461},{"text":602,"config":603},"Sustainability",{"href":604,"dataGaName":602,"dataGaLocation":461},"/sustainability/",{"text":606,"config":607},"Diversity, inclusion and belonging (DIB)",{"href":608,"dataGaName":609,"dataGaLocation":461},"/diversity-inclusion-belonging/","Diversity, inclusion and belonging",{"text":336,"config":611},{"href":338,"dataGaName":339,"dataGaLocation":461},{"text":346,"config":613},{"href":348,"dataGaName":349,"dataGaLocation":461},{"text":351,"config":615},{"href":353,"dataGaName":354,"dataGaLocation":461},{"text":617,"config":618},"Modern Slavery Transparency Statement",{"href":619,"dataGaName":620,"dataGaLocation":461},"https://handbook.gitlab.com/handbook/legal/modern-slavery-act-transparency-statement/","modern slavery transparency statement",{"title":622,"links":623},"Contact Us",[624,627,629,631,636,641,646],{"text":625,"config":626},"Contact an expert",{"href":36,"dataGaName":37,"dataGaLocation":461},{"text":365,"config":628},{"href":367,"dataGaName":368,"dataGaLocation":461},{"text":370,"config":630},{"href":372,"dataGaName":373,"dataGaLocation":461},{"text":632,"config":633},"Status",{"href":634,"dataGaName":635,"dataGaLocation":461},"https://status.gitlab.com/","status",{"text":637,"config":638},"Terms of use",{"href":639,"dataGaName":640,"dataGaLocation":461},"/terms/","terms of use",{"text":642,"config":643},"Privacy statement",{"href":644,"dataGaName":645,"dataGaLocation":461},"/privacy/","privacy statement",{"text":647,"config":648},"Cookie preferences",{"dataGaName":649,"dataGaLocation":461,"id":650,"isOneTrustButton":90},"cookie preferences","ot-sdk-btn",{"items":652},[653,655,657],{"text":637,"config":654},{"href":639,"dataGaName":640,"dataGaLocation":461},{"text":642,"config":656},{"href":644,"dataGaName":645,"dataGaLocation":461},{"text":647,"config":658},{"dataGaName":649,"dataGaLocation":461,"id":650,"isOneTrustButton":90},"content:shared:en-us:main-footer.yml","Main Footer","shared/en-us/main-footer.yml","shared/en-us/main-footer",{"allPosts":664,"featuredPost":2244,"totalPagesCount":2262,"initialPosts":2263},[665,692,713,735,757,784,805,827,846,866,887,907,927,948,967,989,1009,1029,1050,1071,1090,1108,1128,1148,1168,1187,1207,1228,1249,1268,1288,1309,1329,1349,1370,1390,1409,1429,1449,1469,1490,1509,1529,1549,1569,1588,1611,1630,1649,1669,1688,1708,1727,1745,1764,1784,1802,1821,1841,1859,1878,1898,1917,1938,1957,1977,1997,2015,2035,2054,2074,2093,2112,2130,2148,2167,2185,2203,2224],{"_path":666,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":667,"content":675,"config":685,"_id":688,"_type":13,"title":689,"_source":15,"_file":690,"_stem":691,"_extension":18},"/en-us/blog/a-beginners-guide-to-the-git-reftable-format",{"title":668,"description":669,"ogTitle":668,"ogDescription":669,"noIndex":6,"ogImage":670,"ogUrl":671,"ogSiteName":672,"ogType":673,"canonicalUrls":671,"schema":674},"A beginner's guide to the Git reftable format","In Git 2.45.0, GitLab upstreamed the reftable backend to Git, which completely changes how references are stored. Get an in-depth look at the inner workings of this new format.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749664595/Blog/Hero%20Images/blog-image-template-1800x945__9_.png","https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format","https://about.gitlab.com","article","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"A beginner's guide to the Git reftable format\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Patrick Steinhardt\"}],\n        \"datePublished\": \"2024-05-30\",\n      }",{"title":668,"description":669,"authors":676,"heroImage":670,"date":678,"body":679,"category":680,"tags":681},[677],"Patrick Steinhardt","2024-05-30","Until recently, the \"files\" format was the only way for Git to store references. With the [release of Git 2.45.0](https://about.gitlab.com/blog/whats-new-in-git-2-45-0/), Git can now store references in a \"reftable\" format. This new format is a binary format that is quite a bit more complex, but that complexity allows it to address several shortcomings of the \"files\" format. The design goals for the \"reftable\" format include:\n\n- Make the lookup of a single reference and iteration through ranges of references as efficient and fast as possible.\n- Support for consistent reads of references so that Git never reads an in-between state when an update to multiple references has been applied only partially.\n- Support for atomic writes such that updating multiple references can be implemented as an all-or-nothing operation.\n- Efficient storage of both refs and the reflog.\n\nIn this article, we will go under the hood of the \"reftable\" format to see exactly how it works.\n\n## How Git stores references\n\nBefore we dive into the details of the \"reftable\" format, let's quickly recap how Git has historically stored references. If you are already familiar with this, you can skip this section.\n\nA Git repository keeps track of two important data structures:\n\n- [Objects](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects), which contain the actual data of your repository. This includes commits, the directory tree structure, and the blobs that contain your source code. Objects point to each other, forming an object graph. Furthermore, each object has an object ID that uniquely identifies the object.\n\n- References, such as branches and tags, which are pointers into the object graph so that you can give objects names that are easier to remember and keep track of different tracks of your development history. For example, a repository may contain a `main` branch, which is a reference named `refs/heads/main` that points to a specific commit.\n\nReferences are stored in the reference database. Until Git 2.45.0, there was only the \"files\" database format. In this format, every reference is stored as a normal file that contains either one of the following:\n\n- A regular reference that contains the object ID of the commit it points to.\n- A symbolic reference that contains the name of another reference, similar to how a symbolic link points to another file.\n\nAt regular intervals, these references get packed into a single `packed-refs` file to make lookups more efficient.\n\nThe following examples should give an idea of how the \"files\" format operates:\n\n```shell\n$ git init .\n$ git commit --allow-empty --message \"Initial commit\"\n[main (root-commit) 6917c17] Initial commit\n\n# HEAD is a symbolic reference pointing to refs/heads/main.\n$ cat .git/HEAD\nref: refs/heads/main\n\n# refs/heads/main is a regular reference pointing to a commit.\n$ cat .git/refs/heads/main\n6917c178cfc3c50215a82cf959204e9934af24c8\n\n# git-pack-refs(1) packs these references into the packed-refs file.\n$ git pack-refs --all\n$ cat .git/packed-refs\n# pack-refs with: peeled fully-peeled sorted\n6917c178cfc3c50215a82cf959204e9934af24c8 refs/heads/main\n```\n\n## High-level structure of reftables\n\nAssuming that you've got Git 2.45.0 or newer installed, you can create a repository with the \"reftable\" format by using the `--ref-format=reftable` switch:\n\n```shell\n$ git init --ref-format=reftable .\nInitialized empty Git repository in /tmp/repo/.git/\n$ git rev-parse --show-ref-format\nreftable\n\n# Irrelevant files have been removed for ease of understanding.\n$ tree .git\n.git\n├── config\n├── HEAD\n├── index\n├── objects\n├── refs\n│   └── heads\n└── reftable\n\t├── 0x000000000001-0x000000000002-40a482a9.ref\n\t└── tables.list\n\n4 directories, 6 files\n```\n\nFirst, looking at the repository configuration, you will see it has an `extension.refstorage` key:\n\n```shell\n$ cat .git/config\n[core]\n    repositoryformatversion = 1\n    filemode = true\n    bare = false\n    logallrefupdates = true\n[extensions]\n    refstorage = reftable\n```\n\nThis configuration indicates to Git that the repository has been initialized with the \"reftable\" format and tells Git to use the \"reftable\" backend to access it.\n\nWeirdly enough, the repository still has a few files that look as if the \"files\" backend was in use:\n\n- `HEAD` would usually be a symbolic reference pointing to your currently checked-out branch. While it is not used by the \"reftable\" backend, it is required for Git clients to detect the directory as a Git repository. Therefore, when using the \"reftable\" format, `HEAD` is a stub with contents `ref: refs/heads/.invalid`.\n\n- `refs/heads` is a file with contents `this repository uses the reftable format`. Git clients that do not know about the \"reftable\" format would usually expect this path to be a directory. Consequently, creating this path as a file intentionally causes such older Git clients to fail if they tried to access the repository with the \"files\" backend.\n\nThe actual references are stored in the `reftable/` directory:\n\n```shell\n$ tree .git/reftable\n.git/reftable/\n├── 0x000000000001-0x000000000001-794bd722.ref\n└── tables.list\n\n$ cat .git/reftable/tables.list\n0x000000000001-0x000000000001-794bd722.ref\n```\n\nThere are two files here:\n\n- `0x000000000001-0x000000000001-794bd722.ref` is a table containing references and the reflog data in a binary format.\n\n- `tables.list` is, well, a list of tables. In the current state of the repository, the file contains a single line, which is the name of the table. This file tracks the current set of active tables in the \"reftable\" database and is updated whenever new tables get added to the repository.\n\nUpdating a reference creates a new table:\n\n```shell\n$ git commit --allow-empty --message \"Initial commit\"\n[main (root-commit) 1472a58] Initial commit\n\n$ tree .git/reftable\n.git/reftable/\n├── 0x000000000001-0x000000000002-eb87d12b.ref\n└── tables.list\n\n$ cat .git/reftable/tables.list\n0x000000000001-0x000000000002-eb87d12b.ref\n```\n\nAs you can see, the previous table has been replaced with a new one. Furthermore, the `tables.list` file has been updated to contain the new table.\n\n## The structure of a table\n\nAs mentioned earlier, the actual data of the reference database is contained in tables. Roughly speaking, a table is split up into multiple sections:\n\n- The \"header\" contains metadata about the table. Along with some other information, this includes the version of the format, the block size, and the hash function used by the repository (for example, SHA1 or SHA256).\n- The \"ref\" section contains your references. These records have a key that equals the reference name and point to either an object ID for regular references, or to another reference for symbolic references.\n- The \"obj\" section contains reverse mapping from object IDs to the references that point to those object IDs. These allow Git to efficiently look up which references point to a given object ID.\n- The \"log\" section contains your reflog entries. These records have a key that equals the reference name plus an index that represents the number of the log entry. Furthermore, they contain the old and new object IDs as well as the message for that reflog entry.\n- The \"footer\" contains offsets to the various sections.\n\n![long table with all the reftable sections](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749675179/Blog/Content%20Images/Frame_1_-_Reftable_overview.svg)\n\nEach of the section types are structured in a similar manner. Sections contain a set of records that are sorted by each record's key. For example, when you have two ref records `refs/heads/aaaaa` and `refs/heads/bbb`, you have two ref records with these reference names as their respective keys, and `refs/heads/aaaaa` would come before `refs/heads/bbb`.\n\nFurthermore, each section is divided into blocks of a fixed length. This block length is encoded in the header and serves two purposes:\n\n- Given the start of the section as well as the block size, the reader implicitly knows where each of the blocks starts. This allows Git to easily seek into the middle of a section without reading preceding blocks, which enables binary searches over blocks to speed up the lookup of records.\n- It ensures that the reader knows how much data to read from the disk at a time. Consequently, the block size is by default set to 4KiB, which is the most common sector size for hard disks. The maximum block size is 16MB.\n\nWhen we peek into, for example, a \"ref\" section, it looks roughly like the following graphic. Note how its records are ordered lexicographically inside the blocks, but also across the blocks.\n\n![reference block uncompressed](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749675179/Blog/Content%20Images/Frame_2_-_Ref_block_uncompressed.svg)\n\nEquipped with the current information, we can locate a record by using the following steps:\n\n1. Perform a binary search over the blocks by looking at the keys of their respective first records, identifying the block that must contain our record.\n\n2. Perform a linear search over the records in that block.\n\nBoth of these steps are still somewhat inefficient. If we have many blocks we may have to read logarithmically many of them in our binary search to find the desired one. And when blocks contain many records, we potentially have to read all of them during the linear search.\n\nThe \"reftable\" format has additional built-in mechanisms to address these performance concerns. We will touch on these over the next few sections.\n\n### Prefix compression\n\nAs you may have noticed, all of the record keys share the same prefix `refs/`. This is a common thing in Git:\n\n- All branches start with `refs/heads/`.\n- All tags start with `refs/tags/`.\n\nTherefore, we expect that subsequent records will most likely share a significant prefix of their key. This is a good opportunity to save some precious disk space. Because we know that most keys will share a common prefix, it makes sense to optimize for this.\n\nThe optimization uses prefix compression. Every record encodes a prefix length that tells the reader how many bytes to reuse from the key of the preceding record. If we have two records, `refs/heads/a` and `refs/heads/b`, the latter can be encoded by specifying a prefix length of 11 and then only storing the suffix `b`. The reader will then take the first 11 bytes of `refs/heads/a`, which is `refs/heads/`, and append the suffix `b` to it.\n\n![prefix compression](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749675179/Blog/Content%20Images/Frame_3_-_Ref_block_prefix_compression.svg)\n\n### Restart points\n\nAs explained earlier, the best way to search for a reference in a block with our current understanding of the \"reftable\" format is to do a linear search. This is because records do not have a fixed length, so it is impossible for us to tell where records would start without scanning through the block from the beginning. Also, even if records were of fixed length, we would not be able to seek into the middle of a block because the prefix compression also requires us to read preceding records.\n\nDoing a linear search would be quite inefficient because blocks may contain hundreds or even thousands of records. To address this issue, the \"reftable\" format encodes so-called restart points into every block. Restart points are uncompressed records where the prefix compression is reset. Consequently, records at restart points always contain their full key and it becomes possible to directly seek to and read the record without having to read preceding records. These restart points are listed in the footer of each block.\n\nEquipped with this information, we can avoid performing a linear search over the block. Instead, we can now do a binary search over the restart points where we search for the first restart point with a key larger than the sought-after key. From there, it follows that the desired record must be located in the section spanning from the _preceding_ restart point to the identified one.\n\nThus, our initial procedure to look up a record (binary search for the block, linear search for the record) is now:\n\n1. Perform a binary search over the blocks, identifying the block that must contain our record.\n\n2. Perform a binary search over the restart points, identifying the sub-section of the block that must contain our record.\n\n3. Perform a linear search over the records in that sub-section.\n\n![Linear search for a record](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749675179/Blog/Content%20Images/Frame_4_-_Restart_points.svg)\n\n### Indices\n\nWhile the search for records inside a block is now reasonably efficient, it's still inefficient to locate the block itself. A binary search may be reasonably performant when you have a couple of blocks, but repositories with millions of references may have hundreds or even thousands of blocks. Without any additional data structure, this would cause logarithmically many disk seeks on average.\n\nTo avoid this, every section may be followed by an index section that provides an efficient way to look up a block. Each index record holds the following information:\n\n- The location of the block that it is indexing.\n- The key of the last record of the block that it is indexing.\n\nWith three or less blocks, a binary search will always require, at most, two disk reads to find the desired target block. This is the same number of reads we would have to do with an index: one to read the index itself and one to read the desired block. Consequently, indices are only written when they would actually save some reads, which is the case with four or more indexed blocks.\n\nNow the question is: What happens when the index itself becomes so large that it spans over multiple blocks? You might have guessed it: We write another index that indexes the index. These multi-level indices really only become necessary once you have repositories with hundreds of thousands of references.\n\nEquipped with these indices, we can now make the procedure to look up records even more efficient:\n1. Determine whether there is an index by looking at the footer of the table.\n\t- If there is one, perform a binary search over the index to find the desired block. This block may point into an index block itself, in which case we need to repeat this step until we hit a record of the desired type.\n\t- Otherwise, perform a binary search over the blocks as we did before.\n2. Perform a binary search over the restart points, identifying the sub-section of the block that must contain our record.\n3. Perform a linear search over the records in that sub-section.\n\n## Multiple tables\n\nUp to this point, we have only discussed how to read a _single_ table. But as the name `tables.list` indicates, you can actually have a list of tables in your \"reftable\" database.\n\nEvery time you update a reference in your repository, a new table is written and appended to `tables.list`. Thus, you will eventually end up with multiple tables:\n\n```shell\n$ tree .git/reftable/\n.git/reftable/\n├── 0x000000000001-0x000000000007-8dcd8a77.ref\n├── 0x000000000008-0x000000000008-30e0f6f6.ref\n└── tables.list\n\n$ cat .git/reftable/tables.list\n0x000000000001-0x000000000007-8dcd8a77.ref\n0x000000000008-0x000000000008-30e0f6f6.ref\n```\n\nReading the actual state of a repository requires us to merge these multiple tables into a single virtual table.\n\nYou might be wondering: If a table is written for each reference update and the same reference is updated multiple times, how does the \"reftable\" format know the most up-to-date value of a given reference? Intuitively, one could assume the value would be the one from the newest table containing the reference.\n\nIn fact, every single record has a so-called update index that encodes the \"priority\" of a record. For example, if two ref records with the same name exist, then the one with the higher update index overrides the one with the lower update index.\n\nThese update indices are visible in the file structure above. The long hex strings (for example `0x000000000001`) are the update indices, where the left-hand side of the table name is the minimum update index contained in the table and the right-hand is the maximum update index.\n\nMerging the tables then happens via a [priority queue](https://en.wikipedia.org/wiki/Priority_queue) that is ordered by the key of the ref record as well as its update index. Assuming we want to scan through all ref records, we would:\n\n1. For every table, add its first record to the priority queue.\n\n![Adding first record to the priority queue](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749675179/Blog/Content%20Images/Frame_5_-_Priority_queue_1.svg)\n\n2. Yield the head of the priority queue. Because the queue is ordered by update index, it must be the most up-to-date version. Add the next item from that table to the priority queue.\n\n![Yielding the head of the priority queue](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749675179/Blog/Content%20Images/Frame_6_-_Priority_queue_2.svg)\n\n3. Drop all records from the queue that have the same name. These records are shadowed, which means that they will not be shown. For each table for which we are dropping records, add the next record to the priority queue.\n\n![Dropping all records from queue that have the same name](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749675179/Blog/Content%20Images/Frame_7_-_Priority_queue_3.svg)\n\nNow we can rinse and repeat to read records for other keys.\n\nTables may contain special \"tombstone\" records that mark a record as having been deleted. This allows us to delete records without having to rewrite all tables to not contain the record anymore.\n\n### Auto-compaction\n\nWhile the idea behind the priority queue is simple enough, it would be rather inefficient to merge together hundreds or even only dozens of tables in this way. So while it is true that every update to your references appends a new table to your `tables.list` file, it is only part of the story.\n\nThe other part is auto-compaction: After a new table has been appended to the list of tables, the \"reftable\" backend checks whether some of the tables should be merged. This is done by using a simple heuristic: We check whether the list of tables forms a [geometric sequence](https://en.wikipedia.org/wiki/Geometric_progression) with the file sizes. Every table `n` must be at least twice as large as the next-most-recent table `n + 1`. If that geometric sequence is violated, the backend will compact tables so that the geometric sequence is restored.\n\nOver time, this will lead to structures that look like the following:\n\n```shell\n$ du --apparent-size .git/reftable/*\n429    .git/reftable/0x000000000001-0x00000000bd7c-d9819000.ref\n101    .git/reftable/0x00000000bd7d-0x00000000c5ac-c34b88a4.ref\n32    .git/reftable/0x00000000c5ad-0x00000000cc6c-60391f53.ref\n8    .git/reftable/0x00000000cc6d-0x00000000cdc1-61c30db1.ref\n3    .git/reftable/0x00000000cdc2-0x00000000ce67-d9b55a96.ref\n1    .git/reftable/0x00000000ce68-0x00000000ce6b-44721696.ref\n1    .git/reftable/tables.list\n```\n\nNote how for every single table, the property `size(n) > size(n+1) * 2` holds.\n\nOne of the consequences of auto-compaction is that the \"reftable\" backend maintains itself. We no longer have to run `git pack-refs` in a repository.\n\n## Want to learn more?\n\nYou should now have a good understanding of how the new \"reftable\" format works under the hood. If you want to dive even deeper into the format, you can refer to the [technical documentation](https://git-scm.com/docs/reftable) provided by the Git project.\n\n> Read our [Git 2.45.0 recap](https://about.gitlab.com/blog/whats-new-in-git-2-45-0/) to find out what else is in this version of Git.","open-source",[9,682,683,684],"tutorial","open source","performance",{"slug":686,"featured":90,"template":687},"a-beginners-guide-to-the-git-reftable-format","BlogPost","content:en-us:blog:a-beginners-guide-to-the-git-reftable-format.yml","A Beginners Guide To The Git Reftable Format","en-us/blog/a-beginners-guide-to-the-git-reftable-format.yml","en-us/blog/a-beginners-guide-to-the-git-reftable-format",{"_path":693,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":694,"content":700,"config":707,"_id":709,"_type":13,"title":710,"_source":15,"_file":711,"_stem":712,"_extension":18},"/en-us/blog/appsflyer-moves-to-gitlab",{"title":695,"description":696,"ogTitle":695,"ogDescription":696,"noIndex":6,"ogImage":697,"ogUrl":698,"ogSiteName":672,"ogType":673,"canonicalUrls":698,"schema":699},"Why AppsFlyer moved from Bitbucket to GitLab","AppsFlyer migrated from Mercurial to Git so the company could move to GitLab's open source, self-managed solution.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681252/Blog/Hero%20Images/gitlab-appsflyer.png","https://about.gitlab.com/blog/appsflyer-moves-to-gitlab","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Why AppsFlyer moved from Bitbucket to GitLab\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Sara Kassabian\"}],\n        \"datePublished\": \"2020-04-27\",\n      }",{"title":695,"description":696,"authors":701,"heroImage":697,"date":703,"body":704,"category":705,"tags":706},[702],"Sara Kassabian","2020-04-27","\n\nResearch shows that [more users are choosing GitLab](/why-gitlab/) as their preferred [version control system](/topics/version-control/). In an [analysis by The New Stack of the results of a 2018 and 2019 Jetbrains developer survey](https://thenewstack.io/i-dont-git-it-tracking-the-source-collaboration-market/), there was an increase in the number of users in the study sample that selected GitLab as their version control system of choice between 2018-2019. In that same period, GitHub had a small decrease in users and Bitbucket had a more substantial decline in the number of users. \n\nOne of the most significant barriers to making the move from a version control system such as Bitbucket or GitHub to GitLab is the data migration process. We have an entire data import team at GitLab that is dedicated to making this process as seamless as possible, but let’s face it, migrating data is always going to be painful. Fortunately, other companies have paved the way to make the migration process a bit more bearable.\n\nAppsFlyer is one of those companies that took the plunge and migrated its entire system from Bitbucket to GitLab, and the team has lived to tell the tale. [Elad Leev](https://www.linkedin.com/in/elad-leev/?originalSubdomain=il), platform engineer at AppsFlyer, explains how the engineering organization managed the migration in a presentation at GitLab Commit San Francisco.\n\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube-nocookie.com/embed/z_6tVRCyJxs\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\n## Why AppsFlyer chose GitLab\n\nBefore showing how AppsFlyer moved to GitLab, it’s valuable to review the business case for _why_ AppsFlyer chose GitLab over other version control systems.\n\nAppsFlyer is a large engineering organization that has more than 300 developers on-staff. Demand for the company's services grew, which translated into more than one million incoming HTTP requests per second or up to 90 billion events per day. AppsFlyer needed to move off the hosted solution it was using with Bitbucket because repositories could be accessed by the public too easily and because latency issues caused some builds to fail. And Bitbucket had restrictions – no more than 1000 calls/hour – and that was an easy target for the growing company to exceed.\n\nAppsFlyer tried moving from the Bitbucket-hosted solution to the closed-source, self-managed option but it was a black box. If there was a bug, it was impossible to know if it was due to their configuration or because something was wrong with the product.\n\nThe company considered GitHub Enterprise, but, like Bitbucket, it is also closed-source and was too expensive for a lower ROI. In the end, they chose GitLab because of our growth and [commitment to transparency](https://handbook.gitlab.com/handbook/values/#transparency) – our default to public and the open issue tracker made it the right fit for AppsFlyer.\n\n## Migrating from Mercurial to Git\n\nIn order to convert from Bitbucket to GitLab, AppsFlyer first needed to convert from Mercurial to Git because GitLab runs on Git.\n\nWhen [Bitbucket first launched in 2008, it only supported Mercurial repos](https://Bitbucket.org/blog/sunsetting-mercurial-support-in-Bitbucket). Notably, Bitbucket is actually going to be migrating from Mercurial to Git beginning as of June 1. So whether or not you’re using GitLab, there is no time like the present to transition your repositories to Git, [the version control tool chosen by almost 90% of developers](https://insights.stackoverflow.com/survey/2018#work-_-version-control).\n\nOne of the most complicated parts of the process for AppsFlyer was getting the code from Mercurial to Git, because there isn’t an immediate way to transfer from one version control tool to another.\n\nElad said AppsFlyer needed to save history, commits, tags and, with AppsFlyer being a rapidly growing start-up, to execute the transition as quickly as possible.\n\nThe AppsFlyer devs found a tool called [Fast-Export](https://github.com/frej/fast-export) which basically migrates code from Mercurial to Git and had success on a few different repositories. But could it scale effectively to migrate all the code in the organization?\n\nNext, the team worked with the R&D organization to create a self-service, Fast-Export wrapper to help with the migration from Mercurial to Git at scale. The Fast-Export wrapper had a few characteristics that made it work:\n\n*   It was a one-liner, so it was easy-to-use\n*   It was idiot-proof, meaning nobody could make a catastrophic mistake\n*   It used a Slack channel to keep everyone in sync\n*   It was safe, meaning you cannot override somebody’s repository by mistake\n\nThe end-to-end process is fairly straightforward, beginning with checking for the repository in GitLab and logging it into the Slack channel once the repo migration is complete.\n\n![Illustration showing AppsFlyer's Mercurial to Git migration process](https://about.gitlab.com/images/blogimages/mercurial_git.jpg){: .shadow.medium.center}\nThe Mercurial to Git migration process using the fast-export wrapper created by AppsFlyer.\n{: .note.text-center}\n\n“It's really, really important to close the old repository to writes in Bitbucket service because it happened to us more than once: A developer used this tool to migrate his repository from Bitbucket to GitLab, but other developers didn't know that the repository was moved,” says Elad.\n\nThe migration from Mercurial to Git came with a few added benefits, including the opportunity to clean up old repositories; greater transparency across teams into the GitLab migration; and increased developer trust.\n\n## Education helps\n\nDocumentation was also a large part of the migration to Git. AppsFlyer used Guru to carefully document internal processes and identified two courses on Pluralsight to help devs. There is also the entertaining cheat sheet – “[Oh Shit, Git!](https://ohshitgit.com/)” ([here is a profanity-free version](https://dangitgit.com/en)) which Elad created to share some edge cases with Git that he encountered through his work.\n\n## Now, moving to GitLab is pretty easy\n\nOnce your source is in Git, it is pretty simple to upload your data into any version control system using a data importer. We have detailed instructions on how to import your data from a different version control system, such as [migrating from Bitbucket to GitLab](https://docs.gitlab.com/ee/user/project/import/bitbucket_server.html), which is what AppsFlyer did.\n\n## Perks of working with an open source, self-managed solution\n\nA self-managed product that is closed-source means the user will always rely on an external vendor when it comes to managing their codebase, and we believe that having end-to-end visibility is essential when it comes to self-hosting. One of the main perks of working with an open source, self-managed version control system such as GitLab is that your team has the flexibility to build upon your existing codebase. Here are a few examples:\n\n*   AppsFlyer created another small, one-liner tool (BB2GL) that connects with Slack to help with data migration. Then, they took it a step further and connected the one-liner repository to Slack.\n*   Set deadlines: AppsFlyer created a scheduled task list that checks all the repos in Bitbucket and all the repos in GitLab to see which projects have been moved from Bitbucket to GitLab and posts a reminder on Slack for the teams.\n*   Created an in-house API wrapper which helped cut-down on code that was duplicative but written in different languages. The API wrapper helped create one location for all the GitLab metadata.\n*   The in-house API Wrapper is updated using GitLab System Hooks. Read Elad’s [in-depth Medium article to learn more about System Hooks](https://medium.com/appsflyer/gitlab-the-magic-of-system-hooks-f38c4f7ca8e7).\n\nIt’s been two years since AppsFlyer made the switch to GitLab, and it’s helped the company’s growth considerably, says Elad. Some team members have abandoned the Atlassian project management tools they used before to switch to GitLab.\n\nBut no product is perfect. There are two bugs that AppsFlyer encountered and raised with GitLab support. One of them has been resolved, [one is still pending](https://gitlab.com/gitlab-org/gitlab/-/issues/14130). This level of visibility into bugs wouldn’t be possible without features like the [public issue tracker](https://docs.gitlab.com/ee/user/project/issues/), which promote transparency and collaboration between GitLab users and internal GitLab teams.\n","news",[9,683,266],{"slug":708,"featured":6,"template":687},"appsflyer-moves-to-gitlab","content:en-us:blog:appsflyer-moves-to-gitlab.yml","Appsflyer Moves To Gitlab","en-us/blog/appsflyer-moves-to-gitlab.yml","en-us/blog/appsflyer-moves-to-gitlab",{"_path":714,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":715,"content":721,"config":729,"_id":731,"_type":13,"title":732,"_source":15,"_file":733,"_stem":734,"_extension":18},"/en-us/blog/automating-boring-git-operations-gitlab-ci",{"title":716,"description":717,"ogTitle":716,"ogDescription":717,"noIndex":6,"ogImage":718,"ogUrl":719,"ogSiteName":672,"ogType":673,"canonicalUrls":719,"schema":720},"GitBot – automating boring Git operations with CI","Guest author Kristian Larsson shares how he automates some common Git operations, like rebase, using GitLab CI.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749672374/Blog/Hero%20Images/gitbot-automate-git-operations.jpg","https://about.gitlab.com/blog/automating-boring-git-operations-gitlab-ci","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"GitBot – automating boring Git operations with CI\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Kristian Larsson\"}],\n        \"datePublished\": \"2017-11-02\",\n      }",{"title":716,"description":717,"authors":722,"heroImage":718,"date":724,"body":725,"category":726,"tags":727},[723],"Kristian Larsson","2017-11-02","\n\nGit is super useful for anyone doing a bit of development work or just trying to\nkeep track of a bunch of text files. However, as your project grows you might\nfind yourself doing lots of boring repetitive work just around Git itself. At\nleast that’s what happened to me and so I automated some boring Git stuff using our\n[continuous integration (CI) system](/solutions/continuous-integration/).\n\n\u003C!-- more -->\n\nThere are probably all sorts of use cases for automating various Git operations\nbut I’ll talk about a few that I’ve encountered. We’re using GitLab and [GitLab\nCI](/solutions/continuous-integration/) so that’s what my examples\nwill include, but most of the concepts should apply to other systems as well.\n\n## Automatic rebase\n\nWe have some Git repos with source code that we receive from vendors, who we can think\nof as our `upstream`. We don’t actually share a Git repo with the vendor but\nrather we get a tar ball every now and then. The tar ball is extracted into a\nGit repository, on the `master` branch which thus tracks the software as it is\nreceived from upstream. In a perfect world the software we receive would be\nfeature complete and bug free and so we would be done, but that’s usually not\nthe case. We do find bugs and if they are blocking we might decide to implement\na patch to fix them ourselves. The same is true for new features where we might\nnot want to wait for the vendor to implement it.\n\nThe result is that we have some local patches to apply. We commit such patches\nto a separate branch, commonly named `ts` (for TeraStream), to keep them\nseparate from the official software. Whenever a new software version is released,\nwe extract its content to `master` and then rebase our `ts` branch onto `master`\nso we get all the new official features together with our patches. Once we’ve\nimplemented something we usually send it upstream to the vendor for inclusion.\nSometimes they include our patches verbatim so that the next version of the code\nwill include our exact patch, in which case a rebase will simply skip our patch.\nOther times there are slight or major (it might be a completely different design)\nchanges to the patch and then someone typically needs to sort out the patches\nmanually. Mostly though, rebasing works just fine and we don’t end up with conflicts.\n\nNow, this whole rebasing process gets a tad boring and repetitive after a while,\nespecially considering we have a dozen of repositories with the setup described\nabove. What I recently did was to automate this using our CI system.\n\nThe workflow thus looks like:\n\n- human extracts zip file, git add + git commit on master + git push\n- CI runs for `master` branch\n   - clones a copy of itself into a new working directory\n   - checks out `ts` branch (the one with our patches) in working directory\n   - rebases `ts` onto `master`\n   - push `ts` back to `origin`\n- this event will now trigger a CI build for the `ts` branch\n- when CI runs for the `ts` branch, it will compile, test and save the binary output as “build artifacts”, which can be included in other repositories\n- GitLab CI, which is what we use, has a CI_PIPELINE_ID that we use to version built container images or artifacts\n\nTo do this, all you need is a few lines in a .gitlab-ci.yml file, essentially;\n\n```\nstages:\n  - build\n  - git-robot\n\n... build jobs ...\n\ngit-rebase-ts:\n  stage: git-robot\n  only:\n    - master\n  allow_failure: true\n  before_script:\n    - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'\n    - eval $(ssh-agent -s)\n    - ssh-add \u003C(echo \"$GIT_SSH_PRIV_KEY\")\n    - git config --global user.email \"kll@dev.terastrm.net\"\n    - git config --global user.name \"Mr. Robot\"\n    - mkdir -p ~/.ssh\n    - cat gitlab-known-hosts >> ~/.ssh/known_hosts\n  script:\n    - git clone git@gitlab.dev.terastrm.net:${CI_PROJECT_PATH}.git\n    - cd ${CI_PROJECT_NAME}\n    - git checkout ts\n    - git rebase master\n    - git push --force origin ts\n  ```\n\nWe’ll go through the Yaml file a few lines at a time. Some basic knowledge about GitLab CI is assumed.\n\nThis first part lists the stages of our pipeline.\n\n```\nstages:\n  - build\n  - git-robot\n  ```\n\nWe have two stages, first the `build` stage, which does whatever you want it to\ndo (ours compiles stuff, runs a few unit tests and packages it all up), then the\n`git-robot` stage which is where we perform the rebase.\n\nThen there’s:\n\n```\ngit-rebase-ts:\n  stage: git-robot\n  only:\n    - master\n  allow_failure: true\n  ```\n\nWe define the stage in which we run followed by the only statement which limits\nCI jobs to run only on the specified branch(es), in this case `master`.\n\n`allow_failure` simply allows the CI job to fail but still passing the pipeline.\n\nSince we are going to clone a copy of ourselves (the repository checked out in\nCI) we need SSH and SSH keys set up. We’ll use ssh-agent with a password-less key\nto authenticate. Generate a key using ssh-keygen, for example:\n\n```\nssh-keygen\n\nkll@machine ~ $ ssh-keygen -f foo\nGenerating public/private rsa key pair.\nEnter passphrase (empty for no passphrase):\nEnter same passphrase again:\nYour identification has been saved in foo.\nYour public key has been saved in foo.pub.\nThe key fingerprint is:\nSHA256:6s15MZJ1/kUsDU/PF2WwRGA963m6ZSwHvEJJdsRzmaA kll@machine\nThe key's randomart image is:\n+---[RSA 2048]----+\n|            o**.*|\n|           ..o**o|\n|           Eo o%o|\n|          .o.+o O|\n|        So oo.o+.|\n|       .o o.. o+o|\n|      .  . o..o+=|\n|     . o ..  .o= |\n|      . +.    .. |\n+----[SHA256]-----+\nkll@machine ~ $\n```\n\nAdd the public key as a deploy key under Project Settings\n\u003Ci class=\"fas fa-arrow-right\" aria-hidden=\"true\">\u003C/i> Repository \u003Ci class=\"fas fa-arrow-right\" aria-hidden=\"true\">\u003C/i>\nDeploy Keys. Make sure you enable write access or you won’t be able to have your\nGit robot push commits. We then need to hand over the private key so that it can\nbe accessed from within the CI job. We’ll use a secret environment variable for\nthat, which you can define under Project Settings\n\u003Ci class=\"fas fa-arrow-right\" aria-hidden=\"true\">\u003C/i> Pipelines \u003Ci class=\"fas fa-arrow-right\" aria-hidden=\"true\">\u003C/i>\nEnvironment variables). I’ll use the environment variable GIT_SSH_PRIV_KEY for this.\n\nNext part is the before_script:\n\n```\n  before_script:\n    - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'\n    - eval $(ssh-agent -s)\n    - ssh-add \u003C(echo \"$GIT_SSH_PRIV_KEY\")\n    - git config --global user.email \"kll@dev.terastrm.net\"\n    - git config --global user.name \"Mr. Robot\"\n    - mkdir -p ~/.ssh\n    - cat gitlab-known-hosts >> ~/.ssh/known_hosts\n  ```\n\nFirst ssh-agent is installed if it isn’t already. We then start up ssh-agent and\nadd the key stored in the environment variable GIT_SSH_PRIV_KEY (which we set up\npreviously). The Git user information is set and we finally create .ssh and add\nthe known host information about our GitLab server to our known_hosts file. You\ncan generate the gitlab-known-hosts file using the following command:\n\n```\nssh-keyscan my-gitlab-machine >> gitlab-known-hosts\n```\n\nAs the name implies, the before_script is run before the main `script` part and\nthe ssh-agent we started in the before_script will also continue to run for the\nduration of the job. The ssh-agent information is stored in some environment\nvariables which are carried across from the before_script into the main script,\nenabling it to work. It’s also possible to put this SSH setup in the main script,\nI just thought it looked cleaner splitting it up between before_script and script.\nNote however that it appears that after_script behaves differently so while it’s\npossible to pass environment vars from before_script to script, they do not\nappear to be passed to after_script. Thus, if you want to do Git magic in the\nafter_script you also need to perform the SSH setup in the after_script.\n\nThis brings us to the main script. In GitLab CI we already have a checked-out\nclone of our project but that was automatically checked out by the CI system\nthrough the use of magic (it actually happens in a container previous to the one\nwe are operating in, that has some special credentials) so we can’t really use\nit, besides, checking out other branches and stuff would be really weird as it\ndisrupts the code we are using to do this, since that’s available in the Git\nrepository that’s checked out. It’s all rather meta.\n\nAnyway, we’ll be checking out a new Git repository where we’ll do our work, then\nchange the current directory to the newly checked-out repository, after which\nwe’ll check out the `ts` branch, do the rebase and push it back to the origin remote.\n\n```\n    - git clone git@gitlab.dev.terastrm.net:${CI_PROJECT_PATH}.git\n    - cd ${CI_PROJECT_NAME}\n    - git checkout ts\n    - git rebase master\n    - git push --force origin ts\n  ```\n\n… and that’s it. We’ve now automated the rebasing of a branch in our config file. Occasionally it\nwill fail due to problems rebasing (most commonly merge conflicts) but then you\ncan just step in and do the above steps manually and be interactively prompted\non how to handle conflicts.\n\n## Automatic merge requests\n\nAll the repositories I mentioned in the previous section are NEDs, a form of\ndriver for how to communicate with a certain type of device, for Cisco NSO (a\nnetwork orchestration system). We package up Cisco NSO, together with these NEDs\nand our own service code, in a container image. The build of that image is\nperformed in CI and we use a repository called `nso-ts` to control that work.\n\nThe NEDs are compiled in CI from their own repository and the binaries are saved\nas build artifacts. Those artifacts can then be pulled in the CI build of `nso-ts`.\nThe reference to which artifact to include is the name of the NED as well as the\nbuild version. The version number of the NED is nothing more than the pipeline\nid (which you’ll access in CI as ${CI_PIPELINE_ID}) and by including a specific\nversion of the NED, rather than just use “latest” we gain a much more consistent\nand reproducible build.\n\nWhenever a NED is updated a new build is run that produces new binary artifacts.\nWe probably want to use the new version but not before we test it out in CI. The\nactual versions of NEDs to use is stored in a file in the `nso-ts` repository and\nfollows a simple format, like this:\n\n```\nned-iosxr-yang=1234\nned-junos-yang=4567\n...\n```\n\nThus, updating the version to use is a simple job to just rewrite this text file\nand replace the version number with a given CI_PIPELINE_ID version number. Again,\nwhile NED updates are more seldom than updates to `nso-ts`, they do occur and\nhandling it is bloody boring. Enter automation!\n\n```\ngit-open-mr:\n  image: gitlab.dev.terastrm.net:4567/terastream/cisco-nso/ci-cisco-nso:4.2.3\n  stage: git-robot\n  only:\n    - ts\n  tags:\n    - no-docker\n  allow_failure: true\n  before_script:\n    - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'\n    - eval $(ssh-agent -s)\n    - ssh-add \u003C(echo \"$GIT_SSH_PRIV_KEY\")\n    - git config --global user.email \"kll@dev.terastrm.net\"\n    - git config --global user.name \"Mr. Robot\"\n    - mkdir -p ~/.ssh\n    - cat gitlab-known-hosts >> ~/.ssh/known_hosts\n  script:\n    - git clone git@gitlab.dev.terastrm.net:TeraStream/nso-ts.git\n    - cd nso-ts\n    - git checkout -b robot-update-${CI_PROJECT_NAME}-${CI_PIPELINE_ID}\n    - for LIST_FILE in $(ls ../ned-package-list.* | xargs -n1 basename); do NED_BUILD=$(cat ../${LIST_FILE}); sed -i packages/${LIST_FILE} -e \"s/^${CI_PROJECT_NAME}.*/${CI_PROJECT_NAME}=${NED_BUILD}/\"; done\n    - git diff\n    - git commit -a -m \"Use ${CI_PROJECT_NAME} artifacts from pipeline ${CI_PIPELINE_ID}\"\n    - git push origin robot-update-${CI_PROJECT_NAME}-${CI_PIPELINE_ID}\n    - HOST=${CI_PROJECT_URL} CI_COMMIT_REF_NAME=robot-update-${CI_PROJECT_NAME}-${CI_PIPELINE_ID} CI_PROJECT_NAME=TeraStream/nso-ts GITLAB_USER_ID=${GITLAB_USER_ID} PRIVATE_TOKEN=${PRIVATE_TOKEN} ../open-mr.sh\n```\n\nSo this time around we check out a Git repository into a separate working\ndirectory again, it’s just that it’s not the same Git repository as we are\nrunning on simply because we are trying to do changes to a repository that is\nusing the output of the repository we are running on. It doesn’t make much of a\ndifference in terms of our process. At the end, once we’ve modified the files we\nare interested in, we also open up a merge request on the target repository.\nHere we can see the MR (which is merged already) to use a new version of the\nNED `ned-snabbaftr-yang`.\n\n\u003Cimg src=\"/images/blogimages/gitbot-ned-update-mr.png\" alt=\"MR using new version of NED\" style=\"width: 700px;\"/>{: .shadow}\n\nWhat we end up with is that whenever there is a new version of a NED, a single merge\nrequest is opened on our `nso-ts` repository to start using the new NED. That\nmerge request is using changes on a new branch and CI will obviously run for\n`nso-ts` on this new branch, which will then test all of our code using the new\nversion of the NED. We get a form of version pinning, with the form of explicit\nchanges that it entails, yet it’s a rather convenient and non-cumbersome\nenvironment to work with thanks to all the automation.\n\n## Getting fancy\n\nWhile automatically opening an MR is sweet… we can do ~~better~~fancier. Our `nso-ts`\nrepository is based on Cisco NSO (Tail-F NCS), or actually the `nso-ts` Docker\nimage is based on a `cisco-nso` Docker image that we build in a separate\nrepository. We put the version of NSO as the tag of the `cisco-nso` Docker\nimage, so `cisco-nso:4.2.3` means Cisco NSO 4.2.3. This is what the `nso-ts`\nDockerfile will use in its `FROM` line.\n\nUpgrading to a new version of NCS is thus just a matter of rewriting the tag…\nbut what version of NCS should we use? There’s 4.2.4, 4.3.3, 4.4.2 and 4.4.3\navailable and I’m sure there’s some other version that will pop up its evil\nhead soon enough. How do I know which version to pick? And will our current code\nwork with the new version?\n\nTo help myself in the choice of NCS version I implemented a script that gets the\nREADME file of a new NCS version and cross references the list of fixed issues\nwith the issues that we currently have open in the Tail-F issue tracker. The\noutput of this is included in the merge request description so when I look at\nthe merge request I immediately know what bugs are fixed or new features are\nimplemented by moving to a specific version. Having this automatically generated\nfor us is… well, it’s just damn convenient. Together with actually testing our\ncode with the new version of NCS gives us confidence that an upgrade will be smooth.\n\nHere are the merge requests currently opened by our GitBot:\n\n\u003Cimg src=\"/images/blogimages/automate-git-merge-requests.png\" alt=\"Merge requests automated by Git bot\" style=\"width: 700px;\"/>{: .shadow}\n\nWe can see how the system have generated MRs to move to all the different\nversions of NSO currently available. As we are currently on NSO v4.2.3 there’s\nno underlying branch for that one leading to an errored build. For the other\nversions though, there is a branch per version that executes the CI pipeline to\nmake sure all our code runs with this version of NSO.\n\nAs there have been a few commits today, these branches are behind by six commits\nbut will be rebased this night so we get an up-to-date picture if they work or\nnot with our latest code.\n\n\u003Cimg src=\"/images/blogimages/automate-git-commits.png\" alt=\"Commits\" style=\"width: 700px;\"/>{: .shadow}\n\nIf we go back and look at one of these merge requests, we can see how the\ndescription includes information about what issues that we currently have open\nwith Cisco / Tail-F would be solved by moving to this version.\n\n\u003Cimg src=\"/images/blogimages/automate-git-mr-description.png\" alt=\"Merge request descriptions\" style=\"width: 700px;\"/>{: .shadow}\n\nThis is from v4.2.4 and as we are currently on v4.2.3 we can see that there are\nonly a few fixed issues.\n\nIf we instead look at v4.4.3 we can see that the list is significantly longer.\n\n\u003Cimg src=\"/images/blogimages/automate-git-mr-description-list.png\" alt=\"Merge request descriptions\" style=\"width: 700px;\"/>{: .shadow}\n\nPretty sweet, huh? :)\n\nAs this involves a bit more code I’ve put the relevant files in a [GitHub gist](https://gist.github.com/plajjan/42592665afd5ae045ee36220e19919aa).\n\n## This is the end\n\nIf you are reading this, chances are you already have your reasons for why you\nwant to automate some Git operations. Hopefully I’ve provided some inspiration\nfor how to do it.\n\nIf not or if you just want to discuss the topic in general or have more specific\nquestions about our setup, please do reach out to me on [Twitter](https://twitter.com/plajjan).\n\n_[This post](http://plajjan.github.io/automating-git/) was originally published on [plajjan.github.io](http://plajjan.github.io/)._\n\n## About the Guest Author\n\nKristian Larsson is a network automation systems architect at Deutsche Telekom.\nHe is working on automating virtually all aspects of running TeraStream, the\ndesign for Deutsche Telekom's next generation fixed network, using robust and\nfault tolerant software. He is active in the IETF as well as being a\nrepresenting member in OpenConfig. Previous to joining Deutsche Telekom,\nKristian was the IP & opto network architect for Tele2's international backbone\nnetwork.\n\n\"[BB-8 in action](https://unsplash.com/photos/C8VWyZhcIIU) by [Joseph Chan](https://unsplash.com/@yulokchan) on Unsplash\n{: .note}\n","engineering",[108,728,9],"user stories",{"slug":730,"featured":6,"template":687},"automating-boring-git-operations-gitlab-ci","content:en-us:blog:automating-boring-git-operations-gitlab-ci.yml","Automating Boring Git Operations Gitlab Ci","en-us/blog/automating-boring-git-operations-gitlab-ci.yml","en-us/blog/automating-boring-git-operations-gitlab-ci",{"_path":736,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":737,"content":743,"config":751,"_id":753,"_type":13,"title":754,"_source":15,"_file":755,"_stem":756,"_extension":18},"/en-us/blog/beginner-git-guide",{"title":738,"description":739,"ogTitle":738,"ogDescription":739,"noIndex":6,"ogImage":740,"ogUrl":741,"ogSiteName":672,"ogType":673,"canonicalUrls":741,"schema":742},"A guide to Git for beginners","Our senior developer evangelist answers newbie questions about Git.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681222/Blog/Hero%20Images/git-15th-anniversary-cover.png","https://about.gitlab.com/blog/beginner-git-guide","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"A guide to Git for beginners\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Brendan O'Leary\"}],\n        \"datePublished\": \"2020-04-13\",\n      }",{"title":738,"description":739,"authors":744,"heroImage":740,"date":746,"body":747,"category":748,"tags":749},[745],"Brendan O'Leary","2020-04-13","\n\n_If you're just learning about software development, or are brand new to open source, it won't be long before you encounter Git, a source code management tool and arguably one of the most successful open source projects ever. We asked senior developer evangelist [Brendan O'Leary](/company/team/#brendan) to fill in the background on Git's history and successes in honor of its 15th anniversary._\n\n## What is source code management?\n\nBefore you start in software engineering it's important to understand the concept of [source code management](/solutions/source-code-management/). In its simplest form software is a bunch of text files and if I'm using those by myself it's not a big deal. But when multiple people use multiple files it gets out of hand and you need some way to manage it all. Humans can't necessarily manage all of that easily: If you're working with files A and C and I'm working with C and D, you need a way to bring all the changes we've made together without overriding anything or causing any conflict. A computer can more easily figure that out, and in a nutshell, that's what source code management is.\n\n## Why the term Git?\n\nThere are several different urban legends about this. Linus Torvalds who wrote it is a pretty gruff person [with some acknowledged sharp edges](https://www.newyorker.com/science/elements/after-years-of-abusive-e-mails-the-creator-of-linux-steps-aside). And so the story suggests he actually named it after himself, as in the British slang word, [“git”](https://www.merriam-webster.com/dictionary/git). That may be apocryphal. Also, it's a three-letter combo, meaning it's short and didn't conflict with any existing Unix commands. Now people say it stands for “Global Information Tracker” or “GD Idiot Truckload of...” if you're mad at it.\n\n## But wait. Who is Linus Torvalds?\n\nLinus Torvalds is a Finnish-American software engineer who developed the [Linux kernel](https://www.howtogeek.com/howto/31632/what-is-the-linux-kernel-and-what-does-it-do/) and then invented Git 15 years ago. Torvalds has been quoted as saying he's more \"famous\" for Linux but that over time, Git will [end up being more important](https://www.techrepublic.com/article/linus-torvalds-git-proved-i-could-be-more-than-a-one-hit-wonder/). Torvalds is also widely seen as the godfather of the open source movement.\n\n## Can you explain the rationale behind the cult following of open source?\n\nTorvalds himself has a cult following and open source has been around for a very long time, long before Git was invented. But open source wasn't widely accepted and in some cases, companies were actively hostile to the concept. Torvalds wanted to create a project everyone could contribute to and Git was born (literally developed by Torvalds over a weekend 15 years ago). Git solved a problem that was common across all types of software development and it not only welcomed contributions from users, it _needed_ contributions to grow. The idea of a practical solution everyone could contribute to created a kind of zeitgeist, and today open source is widely embraced as a result.\n\n## How could I explain Git to my neighbor?\n\nWe tend to talk about Git as a tree but I really don't know if that is the best analogy for it. It's a tree in the sense that it makes branches, but then those branches come back together and that doesn't happen in a tree.\n\nInstead, I'd say Git is like a time machine. The whole history of everything that happened on any branch in alternate timelines is brought back together magically. Nothing is lost or changed and you can look backward and move forward. It's magic.\n\n## What's the most important thing I should know about Git?\n\nThat's easy: You can't break it!\n\nBecause it's a magical time machine you really can't do anything to it that can't be fixed. So I always tell beginners to relax and play around with your copy. No matter how many mistakes you make you can't break it in a way that's not fixable.\n\nHave no fear.\n","insights",[9,683,750],"careers",{"slug":752,"featured":6,"template":687},"beginner-git-guide","content:en-us:blog:beginner-git-guide.yml","Beginner Git Guide","en-us/blog/beginner-git-guide.yml","en-us/blog/beginner-git-guide",{"_path":758,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":759,"content":765,"config":778,"_id":780,"_type":13,"title":781,"_source":15,"_file":782,"_stem":783,"_extension":18},"/en-us/blog/building-gitlab-with-gitlab-a-multi-region-service-to-deliver-ai-features",{"title":760,"description":761,"ogTitle":760,"ogDescription":761,"noIndex":6,"ogImage":762,"ogUrl":763,"ogSiteName":672,"ogType":673,"canonicalUrls":763,"schema":764},"Building GitLab with GitLab: A multi-region service to deliver AI features","Discover how we built our first multi-region deployment for teams at GitLab using the platform's many features, helping create a frictionless developer experience for GitLab Duo users.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098664/Blog/Hero%20Images/Blog/Hero%20Images/building-gitlab-with-gitlab-no-type_building-gitlab-with-gitlab-no-type.png_1750098663794.png","https://about.gitlab.com/blog/building-gitlab-with-gitlab-a-multi-region-service-to-deliver-ai-features","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Building GitLab with GitLab: A multi-region service to deliver AI features\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Chance Feick\"},{\"@type\":\"Person\",\"name\":\"Sam Wiskow\"}],\n        \"datePublished\": \"2024-09-12\",\n      }",{"title":760,"description":761,"authors":766,"heroImage":762,"date":769,"body":770,"category":726,"tags":771},[767,768],"Chance Feick","Sam Wiskow","2024-09-12","For GitLab Duo, real-time AI-powered capabilities like [Code Suggestions](https://about.gitlab.com/solutions/code-suggestions/) need low-latency response times for a frictionless developer experience. Users don’t want to interrupt their flow and wait for a code suggestion to show up. To ensure GitLab Duo can provide the right suggestion at the right time and meet high performance standards for critical AI infrastructure, GitLab recently launched our first multi-region service to deliver AI features.\n\nIn this article, we will cover the benefits of multi-region services, how we built an internal platform codenamed ‘Runway’ for provisioning and deploying multi-region services using GitLab features, and the lessons learned migrating to multi-region in production.\n\n## Background on the project\n\nRunway is GitLab’s internal platform as a service (PaaS) for provisioning, deploying, and operating containerized services. Runway's purpose is to enable GitLab service owners to self-serve infrastructure needs with production readiness out of the box, so application developers can focus on providing value to customers. As part of [our corporate value of dogfooding](https://handbook.gitlab.com/handbook/values/#results), the first iteration was built in 2023 by the Infrastructure department on top of core GitLab capabilities, such as continuous integration/continuous delivery ([CI/CD](https://about.gitlab.com/topics/ci-cd/)), environments, and deployments.\n\nBy establishing automated GitOps best practices, Runway services use infrastructure as code (IaC), merge requests (MRs), and CI/CD by default.\n\nGitLab Duo is primarily powered by [AI Gateway](https://gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist), a satellite service written in Python outside of GitLab’s modular monolith written in Ruby. In cloud computing, a region is a geographical location of data centers operated by cloud providers.\n\n## Defining a multi-region strategy\n\nDeploying in a single region is a good starting point for most services, but can come with downsides when you are trying to reach a global audience. Users who are geographically far from where your service is deployed may experience different levels of service and responsiveness than those who are closer. This can lead to a poor user experience, even if your service is well built in all other respects.\n\nFor AI Gateway, it was important to meet global customers wherever they are located, whether on GitLab.com or self-managed instances using Cloud Connector. When a developer is deciding to accept or reject a code suggestion, milliseconds matter and can define the user experience.\n\n### Goals\n\nMulti-region deployments require more infrastructure complexity, but for use cases where latency is a core component of the user experience, the benefits often outweigh the downsides. First, multi-region deployments offer increased responsiveness to the user. By serving requests from locations closest to end users, latency can be significantly reduced. Second, multi-region deployments provide greater availability. With fault tolerance, services can fail over during a regional outage. There is a much lower chance of a service failing completely, meaning users should not be interrupted even in partial failures.\n\nBased on our goals for performance and availability, we used this opportunity to create a scalable multi-region strategy in Runway, which is built leveraging GitLab features.\n\n### Architecture\n\nIn SaaS platforms, GitLab.com’s infrastructure is hosted on Google Cloud Platform (GCP). As a result, Runway’s first supported platform runtime is Cloud Run. The initial workloads deployed on Runway are stateless satellite services (e.g., AI Gateway), so Cloud Run services are a good fit that provide a clear migration path to more complex and flexible platform runtimes, e.g. Kubernetes.\n\nBuilding Runway on top of GCP Cloud Run using GitLab has allowed us to iterate and tease out the right level of abstractions for service owners as part of a platform play in the Infrastructure department.\n\nTo serve traffic from multiple regions in Cloud Run, the multi-region deployment strategy must support global load balancing, and the provisioning and configuration of regional resources. Here’s a simplified diagram of the proposed architecture in GCP:\n\n![simplified diagram of the proposed architecture in GCP](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098671/Blog/Content%20Images/Blog/Content%20Images/image7_aHR0cHM6_1750098671612.png)\n\nBy replicating Cloud Run services across multiple regions and configuring the existing global load balancing with serverless network endpoint group (NEG) backends, we’re able to serve traffic from multiple regions. For the remainder of the article, we’ll focus less on specifics of Cloud Run and more on how we’re building with GitLab.\n\n## Building a multi-region platform with GitLab\n\nNow that you have context about Runway, let's walk through how to build a multi-region platform using GitLab features.\n\n### Provision\n\nWhen building an internal platform, the first challenge is provisioning infrastructure for a service. In Runway, Provisioner is the component that is responsible for maintaining a service inventory and managing IaC for GCP resources using Terraform.\n\nTo provision a service, an application developer will open an MR to add a service project to the inventory using git, and Provisioner will create required resources, such as service accounts and identity and access management policies. When building this functionality with GitLab, Runway leverages [OpenID Connect (OIDC) with GPC Workload Identity Federation](https://docs.gitlab.com/ee/ci/cloud\\_services/google\\_cloud/) for managing IaC.\n\nAdditionally, Provisioner will create a deployment project for each service project. The purpose of creating separate projects for deployments is to ensure the [principle of least privilege](https://about.gitlab.com/blog/the-ultimate-guide-to-least-privilege-access-with-gitlab/) by authenticating as a GCP service account with restricted permissions. Runway leverages the [Projects API](https://docs.gitlab.com/ee/api/projects.html) for creating projects with [Terraform provider](https://registry.terraform.io/providers/gitlabhq/gitlab/latest/docs).\n\nFinally, Provisioner defines variables in the deployment project for the service account, so that deployment CI jobs can authenticate to GCP. Runway leverages [CI/CD variables](https://docs.gitlab.com/ee/ci/variables/) and [Job Token allowlist](https://docs.gitlab.com/ee/ci/jobs/ci\\_job\\_token.html\\#add-a-group-or-project-to-the-job-token-allowlist) to handle authentication and authorization.\n\nHere’s a simplified example of provisioning a multi-region service in the service inventory:\n\n```\n{\n  \"inventory\": [\n    {\n      \"name\": \"example-service\",\n      \"project_id\": 46267196,\n      \"regions\": [\n        \"europe-west1\",\n        \"us-east1\",\n        \"us-west1\"\n      ]\n    }\n  ]\n}\n```\n\nOnce provisioned, a deployment project and necessary infrastructure will be created for a service.\n\n### Configure\n\nAfter a service is provisioned, the next challenge is the configuration for a service. In Runway, [Reconciler](https://gitlab.com/gitlab-com/gl-infra/platform/runway/runwayctl) is a component that is responsible for configuring and deploying services by aligning the actual state with the desired state using Golang and Terraform.\n\nHere’s a simplified example of an application developer configuring GitLab CI/CD in their service project:\n\n```\n# .gitlab-ci.yml\nstages:\n  - validate\n  - runway_staging\n  - runway_production\n\ninclude:\n  - project: 'gitlab-com/gl-infra/platform/runway/runwayctl'\n    file: 'ci-tasks/service-project/runway.yml'\n    inputs:\n      runway_service_id: example-service\n      image: \"$CI_REGISTRY_IMAGE/${CI_PROJECT_NAME}:${CI_COMMIT_SHORT_SHA}\"\n      runway_version: v3.22.0\n\n# omitted for brevity\n```\n\nRunway provides sane default values for configuration that are based on our experience in delivering stable and reliable features to customers. Additionally, service owners can configure infrastructure using a service manifest file hosted in a service project. The service manifest uses JSON Schema for validation. When building this functionality with GitLab, Runway leverages [Pages](https://docs.gitlab.com/ee/user/project/pages/) for schema documentation.\n\nTo deliver this part of the platform, Runway leverages [CI/CD templates](https://docs.gitlab.com/ee/development/cicd/templates.html), [Releases](https://docs.gitlab.com/ee/user/project/releases/), and [Container Registry](https://docs.gitlab.com/ee/user/packages/container\\_registry/) for integrating with service projects.\n\nHere’s a simplified example of a service manifest:\n\n```\n# .runway/runway-production.yml\napiVersion: runway/v1\nkind: RunwayService\nspec:\n container_port: 8181\n regions:\n   - us-east1\n   - us-west1\n   - europe-west1\n\n# omitted for brevity\n```\n\nFor multi-region services, Runway injects an environment variable into the container instance runtime, e.g. RUNWAY\\_REGION, so application developers have the context to make any downstream dependencies regionally-aware, e.g. Vertex AI API.\n\nOnce configured, a service project will be integrated with a deployment project.\n\n### Deploy\n\nAfter a service project is configured, the next challenge is deploying a service. In Runway, Reconciler handles this by triggering a deployment job in the deployment project when an MR is merged to the main branch. When building this functionality with GitLab, Runway leverages [Trigger Pipelines](https://docs.gitlab.com/ee/ci/triggers/) and [Multi-Project Pipelines](https://docs.gitlab.com/ee/ci/pipelines/downstream\\_pipelines.html\\#multi-project-pipelines) to trigger jobs from service project to deployment project.\n\n![trigger jobs from service project to deployment project](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098672/Blog/Content%20Images/Blog/Content%20Images/image5_aHR0cHM6_1750098671612.png)\n\nOnce a pipeline is running in a deployment project, it will be deployed to an environment. By default, Runway will provision staging and production environments for all services. At this point, Reconciler will apply any Terraform resource changes for infrastructure. When building this functionality with GitLab, Runway leverages [Environments/Deployments](https://docs.gitlab.com/ee/ci/environments/) and [GitLab-managed Terraform state](https://docs.gitlab.com/ee/user/infrastructure/iac/terraform\\_state.html) for each service.\n\n![Reconciler applies any Terraform resource changes for infrastructure](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098672/Blog/Content%20Images/Blog/Content%20Images/image1_aHR0cHM6_1750098671614.png)\n\nRunway provides default application metrics for services. Additionally, custom metrics can be used by enabling a sidecar container with OpenTelemetry Collector configured to scrape Prometheus and remote write to Mimir. By providing observability out of the box, Runway is able to bake monitoring into CI/CD pipelines.\n\nExample scenarios include gradual rollouts for blue/green deployments, preventing promotions to production when staging is broken, or automatically rolling back to previous revision when elevated error rates occur in production.\n\n![Runway bakes monitoring into CI/CD pipelines](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098672/Blog/Content%20Images/Blog/Content%20Images/image2_aHR0cHM6_1750098671615.png)\n\nOnce deployed, environments will serve the latest revision of a service. At this point, you should have a good understanding of some of the challenges that will be encountered, and how to solve them with GitLab features.\n\n## Migrating to multi-region in production\n\nAfter extending Runway components to support multi-region in Cloud Run, the final challenge was migrating from AI Gateway’s single-region deployment in production with zero downtime. Today, teams using Runway to deploy their services can self-serve on regions making a multi-region deployment just as simple as a single-region deployment. \n\nWe were able to iterate on building multi-region functionality without impacting existing infrastructure by using semantic versioning for Runway. Next, we’ll share some learnings from the migration that may inform how to operate services for an internal multi-region platform.\n\n### Dry run deployments\n\nIn Runway, Reconciler will apply Terraform changes in CI/CD. The trade-off is that plans cannot be verified in advance, which could risk inadvertently destroying or misconfiguring production infrastructure. To solve this problem, Runway will perform a “dry run” deployment for MRs.\n\n![\"Dry run\" deployment](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098672/Blog/Content%20Images/Blog/Content%20Images/image6_aHR0cHM6_1750098671616.png)\n\nFor migrating AI Gateway, dry run deployments increased confidence and helped mitigate risk of downtime during rollout. When building an internal platform with GitLab, we recommend supporting dry run deployments from the start.\n\n### Regional observability\n\nIn Runway, existing observability was aggregated by assuming a single-region deployment. To solve this problem, Runway observability was retrofitted to include a new region label for Prometheus metrics.\n\nOnce metrics were retrofitted, we were able to introduce service level indicators (SLIs) for both regional Cloud Run services and global load balancing. Here’s an example dashboard screenshot for a general Runway service:\n\n![dashboard screenshot for a general Runway service](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098672/Blog/Content%20Images/Blog/Content%20Images/image3_aHR0cHM6_1750098671617.png)\n\n***Note:** Data is not actual production data and is only for illustration purposes.*\n\nAdditionally, we were able to update our service level objectives (SLOs) to support regions. As a result, service owners could be alerted when a specific region experiences an elevated error rate, or increase in response times.\n\n![screenshot of alerts](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750098672/Blog/Content%20Images/Blog/Content%20Images/image4_aHR0cHM6_1750098671617.png)\n\n***Note:** Data is not actual production data and is only for illustration purposes.*\n\nFor migrating AI Gateway, regional observability increased confidence and helped provide more visibility into new infrastructure. When building an internal platform with GitLab, we recommend supporting regional observability from the start.\n\n### Self-service regions\n\nThe Infrastructure department successfully performed the initial migration of multi-region support for AI Gateway in production with zero downtime. Given the risk associated with rolling out a large infrastructure migration, it was important to ensure the service continued working as expected.\n\nShortly afterwards, service owners began self-serving additional regions to meet the growth of customers. At the time of writing, [GitLab Duo](https://about.gitlab.com/gitlab-duo/) is available in six regions around the globe and counting. Service owners are able to configure the desired regions, and Runway will provide guardrails along the way in a scalable solution.\n\nAdditionally, three other internal services have already started using multi-region functionality on Runway. Application developers have entirely self-served functionality, which validates that we’ve provided a good platform experience for service owners. For a platform play, a scalable solution like Runway is considered a good outcome since the Infrastructure department is no longer a blocker.\n\n## What’s next for Runway\n\nBased on how quickly we could iterate to provide results for customers, the SaaS Platforms department has continued to invest in Runway. We’ve grown the Runway team with additional contributors, started evolving the platform runtime (e.g. Google Kubernetes Engine), and continue dogfooding with tighter integration in the product.\n\nIf you’re interested in learning more, feel free to check out [https://gitlab.com/gitlab-com/gl-infra/platform/runway](https://gitlab.com/gitlab-com/gl-infra/platform/runway).\n\n## More Building GitLab with GitLab\n- [Why there is no MLOps without DevSecOps](https://about.gitlab.com/blog/there-is-no-mlops-without-devsecops/)\n- [Stress-testing Product Analytics](https://about.gitlab.com/blog/building-gitlab-with-gitlab-stress-testing-product-analytics/)\n- [Web API Fuzz Testing](https://about.gitlab.com/blog/building-gitlab-with-gitlab-api-fuzzing-workflow/)\n- [How GitLab.com inspired Dedicated](https://about.gitlab.com/blog/building-gitlab-with-gitlabcom-how-gitlab-inspired-dedicated/)\n- [Expanding our security certification portfolio](https://about.gitlab.com/blog/building-gitlab-with-gitlab-expanding-our-security-certification-portfolio/)\n",[108,772,773,774,682,684,775,9,776,777],"CD","CI","inside GitLab","google","DevSecOps","AI/ML",{"slug":779,"featured":90,"template":687},"building-gitlab-with-gitlab-a-multi-region-service-to-deliver-ai-features","content:en-us:blog:building-gitlab-with-gitlab-a-multi-region-service-to-deliver-ai-features.yml","Building Gitlab With Gitlab A Multi Region Service To Deliver Ai Features","en-us/blog/building-gitlab-with-gitlab-a-multi-region-service-to-deliver-ai-features.yml","en-us/blog/building-gitlab-with-gitlab-a-multi-region-service-to-deliver-ai-features",{"_path":785,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":786,"content":792,"config":799,"_id":801,"_type":13,"title":802,"_source":15,"_file":803,"_stem":804,"_extension":18},"/en-us/blog/cascading-merge-requests-with-gitlab-flow",{"title":787,"description":788,"ogTitle":787,"ogDescription":788,"noIndex":6,"ogImage":789,"ogUrl":790,"ogSiteName":672,"ogType":673,"canonicalUrls":790,"schema":791},"How to adopt a cascading merge request strategy with GitLab Flow","This tutorial explains how to consolidate updates in a single branch and propagate them to other branches using ucascade bot.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749679851/Blog/Hero%20Images/cascade.jpg","https://about.gitlab.com/blog/cascading-merge-requests-with-gitlab-flow","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to adopt a cascading merge request strategy with GitLab Flow\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Madou Coulibaly\"}],\n        \"datePublished\": \"2023-08-31\",\n      }",{"title":787,"description":788,"authors":793,"heroImage":789,"date":795,"body":796,"category":726,"tags":797},[794],"Madou Coulibaly","2023-08-31","\nGit offers a range of branching strategies and workflows that can be utilized to enhance organization, efficiency, and code quality. Employing a well-defined workflow helps foster a successful and streamlined development process. By implementing the [release branches using GitLab Flow](https://docs.gitlab.com/ee/topics/gitlab_flow.html#release-branches-with-gitlab-flow), you can effectively handle multiple product releases. However, when it comes to fixing bugs, it often becomes necessary to apply the fix across various stable branches such as `main`,  `stable-1.0`, `stable-1.1`, and `stable-2.0`. The process of applying the fix to multiple locations can be time-consuming, as it involves the manual creation of multiple merge requests.\n\nBy consolidating updates in a single branch and propagating them to other branches, the cascading merge approach establishes a central source of truth, reducing confusion and maintaining consistency. In this blogpost, we will guide you through setting up this approach for your GitLab project using [ucascade bot](https://github.com/unblu/ucascade).\n\n## Getting started\nTo get started, you'll need the following prerequisites:\n\n### Environment\n  - a GitLab project that implemented [Release Branches Strategy](https://docs.gitlab.com/ee/topics/gitlab_flow.html#release-branches-with-gitlab-flow)\n  - a Kubernetes cluster\n\n### CLI\n  - git\n  - kubectl\n  - docker\n\n### Project access tokens\nFollow the instructions on the [Project access tokens page](https://docs.gitlab.com/ee/user/project/settings/project_access_tokens.html#create-a-project-access-token) to create two project access tokens –`ucascade` and `ucascade-approver` – with the API scope in your GitLab project.\n\n![project access tokens](https://about.gitlab.com/images/blogimages/2023-06-22-cascading-merge-requests-with-gitlab-flow/pat.png){: .shadow.medium}\n\n## Deploy ucascade bot on Kubernetes\nFirst, create the `bots-fleet` namespace on Kubernetes.\n\n```\nkubectl create namespace bots-fleet\n```\n\nThen, create the `cascading-merge-secret` secret that contains the GitLab project access tokens created previously.\n\n```\nkubectl create secret generic cascading-merge-secret -n bots-fleet \\\n--from-literal=gitlab-host=https://gitlab.com \\\n--from-literal=gitlab-api-token=\u003CUCASCADE_PROJECT_ACCESS_TOKEN> \\\n--from-literal=gitlab-api-token-approver=\u003CAPPROVER_BOT_PROJECT_ACCESS_TOKEN>\n```\n\nOnce done, (fork and) clone the [Cascading Merge repository](https://gitlab.com/madou-stories/bots-fleet/cascading-merge) that contains the Kubernetes manifests for the bot and replace the `host` field in the `kube/ingress.yaml` file according to your Kubernetes domain.\n\n```yaml\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n  annotations:\n    kubernetes.io/ingress.class: nginx\n  name: ucascade\n  namespace: bots-fleet\nspec:\n  rules:\n  - host: ucascade.\u003CKUBERNETES_BASED_DOMAIN>\n    http:\n      paths:\n      - backend:\n          service:\n            name: ucascade\n            port:\n              number: 80\n        path: /\n        pathType: Prefix\n\n``` \n\nNow, you are ready to deploy the `ucascade` bot.\n\n```\nkubectl apply -f kube/\n```\n\nYou should see the following resources deployed on Kubernetes:\n\n![ucascade-k8s](https://about.gitlab.com/images/blogimages/2023-06-22-cascading-merge-requests-with-gitlab-flow/ucascade-k8s.png){: .shadow.medium}\n\n**Note:** The `ucascade` image is based on the [ucascade-bot](https://github.com/unblu/ucascade-bot) and is located in the [Container Registry](https://gitlab.com/madou-stories/bots-fleet/cascading-merge/container_registry) of the Cascading Merge repository.\n{: .note}\n\n## Create a GitLab webhook\nFollow the instructions on [the Webhooks page](https://docs.gitlab.com/ee/user/project/integrations/webhooks.html#configure-a-webhook-in-gitlab) to create a webhook with the following variables: \n  - **URL**: `\u003CUCASCADE_INGRESS_URL>/ucascade/merge-request`\n  - **Trigger**: `Merge request events`\n\n![webhook](https://about.gitlab.com/images/blogimages/2023-06-22-cascading-merge-requests-with-gitlab-flow/webhook.png){: .shadow.medium}\n\n## Configure your Cascading Merge rule\nCreate a file called ucascade.json at the root level of your GitLab project as defined in [configuration file](https://unblu.github.io/ucascade/tech-docs/11_ucascade-configuration-file.html#_configuration_file) and matched with your release definition.\n\n![configuration](https://about.gitlab.com/images/blogimages/2023-06-22-cascading-merge-requests-with-gitlab-flow/configuration.png){: .shadow.medium}\n\n## Testing the Cascading Merge\nNow create a branch and an MR from your default branch, make a change, and merge it. The ucascade bot will propagate the change to all other release branches by automatically creating cascading MRs. The following video demonstrates the process:\n\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube.com/embed/Ej7xf8axWMs\" title=\"Cascading Merge Approach\"\n  frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\n# Additional resources\nFind more information about the `ucascade` bot in the [ucascade documentation](https://unblu.github.io/ucascade/index.html).\n\n_Special thank you to Jérémie Bresson for authoring and open sourcing this amazing bot!_\n",[108,798,9,682],"code review",{"slug":800,"featured":90,"template":687},"cascading-merge-requests-with-gitlab-flow","content:en-us:blog:cascading-merge-requests-with-gitlab-flow.yml","Cascading Merge Requests With Gitlab Flow","en-us/blog/cascading-merge-requests-with-gitlab-flow.yml","en-us/blog/cascading-merge-requests-with-gitlab-flow",{"_path":806,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":807,"content":813,"config":821,"_id":823,"_type":13,"title":824,"_source":15,"_file":825,"_stem":826,"_extension":18},"/en-us/blog/celebrating-17-years-of-git",{"title":808,"description":809,"ogTitle":808,"ogDescription":809,"noIndex":6,"ogImage":810,"ogUrl":811,"ogSiteName":672,"ogType":673,"canonicalUrls":811,"schema":812},"Celebrating 17 years of Git","Here's the history, tips, tricks and even a mea culpa to help celebrate the 17th anniversary of Git.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749679424/Blog/Hero%20Images/gitbirthday.jpg","https://about.gitlab.com/blog/celebrating-17-years-of-git","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Celebrating 17 years of Git\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Valerie Silverthorne\"}],\n        \"datePublished\": \"2022-04-07\",\n      }",{"title":808,"description":809,"authors":814,"heroImage":810,"date":816,"body":817,"category":748,"tags":818},[815],"Valerie Silverthorne","2022-04-07","\n\nSeventeen years ago, the Linux community embraced Git as its universal open source version control solution. Created by Linus Torvalds, Git replaced BitKeeper, a proprietary but free-of-charge option that worked, to a point, until it didn’t (and ultimately started costing a fee).\n\nIn the years since, there’s been little to no agreement on what the term “Git” actually means but there’s no disputing its rockstar status in the DevOps world. Tens of millions developers rely on Git’s fast and seamless branching capabilities every single day. In fact, 85% of DevOps professionals who took our [2021 Global DevSecOps Survey](/developer-survey/) said they use Git for source control.\n\nSo, to honor this anniversary, we share our favorite Git tips and tricks and look back at the origins of its name, its 15th anniversary celebration, and even a declaration from one of our own who was certain Git would _never be in his toolkit_. No, really.\n\n## The origin of the name Git\n\nThere’s not much quirky or charming about the world of DevOps, but the theories around the origin of the name Git may be an exception. Torvalds claimed to have named Linux after himself, and he said Git (British slang for “jerk”) was no different. “I’m an egotistical b*stard, and I name all my projects after myself,” he [said at the time](https://git-scm.com/book/en/v2/Getting-Started-A-Short-History-of-Git). \n\nThe source code’s README takes the story in a different direction: Git is easy to pronounce, not used by UNIX, and could sound like “get.” It could be [British shade-throwing](http://www.peevish.co.uk/slang/english-slang/g.htm?qa=150&ss360SearchTerm=git#git), or it could stand for “global information tracker” (the choice of those happily working with a functioning tool). And for those frustrated with Git, there’s also “goddamn idiotic truckload of sh*t.”\n\n## Tips and tricks for better Git\n\nIs it possible to improve on a tool that so many use every single day? Actually, it is, starting with 15 ways [to get a better Git workflow](/blog/15-git-tips-improve-workflow/). Learn how to:\n\n- autocomplete commands\n- use Git blame more efficiently\n- reset files\n- understand the plugins\n\nAlso, Git can help [keep merge requests tidy and humming along](/blog/start-using-git/).\n\nFor an exhaustive look at how GitLab uses Git internally, including .gitconfig on steroids, the lowdown on aliases, and command line tips, we’ve [gathered a life-changing list](/blog/git-tips-and-tricks/). Also, here’s our take on [why (and how) to keep your Git history clean](/blog/keeping-git-commit-history-clean/) and how to do it using [interactive rebase](/blog/keep-git-history-clean-with-interactive-rebase/).\n\n## Remembering the 15th anniversary celebrations\n\nLandmark anniversaries always make people reflect, and Git’s 15th in 2020 was no exception. Not only was there [an actual party – Git Merge 2020](/blog/git-merge-fifteen-year-git-party/), our staff developer evangelist Brendan O’Leary admitted the unthinkable: Back in the day, he was [never ever going to use Git](https://www.computerweekly.com/blog/Open-Source-Insider/GitLab-guru-15-years-later-were-still-learning). Brendan, who obviously has learned his lesson, also teamed up with GitHub’s distinguished software engineer Jeff King to talk about [Git’s impact on software development](https://www.infoq.com/news/2020/04/git-fifteen-anniversary-qa/).\n\n## Practical Git\n\nAlthough there’s a lot to learn about Git, Brendan and other developers consistently stress the simplicity is what sets it apart. So here are three of our most bookmarked pages of straightforward Git advice:\n\n[6 common Git mistakes and how to fix them](/blog/git-happens/)\n[Understand the new Git branch default name](/blog/new-git-default-branch-name/) \n[A guide to Git for beginners](/blog/beginner-git-guide/)\n\nSo make sure to raise a glass to 17 years of Git and its many benefits.\n",[9,819,820],"DevOps","collaboration",{"slug":822,"featured":6,"template":687},"celebrating-17-years-of-git","content:en-us:blog:celebrating-17-years-of-git.yml","Celebrating 17 Years Of Git","en-us/blog/celebrating-17-years-of-git.yml","en-us/blog/celebrating-17-years-of-git",{"_path":828,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":829,"content":835,"config":840,"_id":842,"_type":13,"title":843,"_source":15,"_file":844,"_stem":845,"_extension":18},"/en-us/blog/celebrating-gits-20th-anniversary-with-creator-linus-torvalds",{"title":830,"description":831,"ogTitle":830,"ogDescription":831,"noIndex":6,"ogImage":832,"ogUrl":833,"ogSiteName":672,"ogType":673,"canonicalUrls":833,"schema":834},"Celebrating Git's 20th anniversary with creator Linus Torvalds","Discover the origins of the open-source version control system, why he handed over the reins a few months in, and what he thinks about adding new programming languages to Git.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749662510/Blog/Hero%20Images/git-20-years-opt1.png","https://about.gitlab.com/blog/celebrating-gits-20th-anniversary-with-creator-linus-torvalds","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Celebrating Git's 20th anniversary with creator Linus Torvalds\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Patrick Steinhardt\"}],\n        \"datePublished\": \"2025-04-07\",\n      }",{"title":830,"description":831,"authors":836,"heroImage":832,"date":837,"body":838,"category":680,"tags":839},[677],"2025-04-07","The Git version control system was first released on April 7, 2005, by the father of the Linux kernel, Linus Torvalds. To mark the 20th anniversary of this important project that is nowadays used by almost every single developer, I interviewed Linus about the history of Git, why he handed over maintainership of Git, and what he considers to be its most important milestones.\n\n**In 2005, you were already the maintainer of the thriving Linux kernel. Why did you decide to start a new version control system?**\n\nSo, I got into it from really despising version control.\n\nI had used the traditional version control systems (CVS/RCS/SCCS) both as an end user (i.e., tracking open source projects like [GCC](https://gcc.gnu.org/)) and as a developer (we used CVS at Transmeta for everything) and absolutely hated the experience with a passion.\n\n\u003Cimg src=\"https://about.gitlab.com/images/blogimages/linustorvalds.png\" align=\"left\" width=\"200px\" style=\"padding-right: 20px; padding-bottom: 10px\"/>\n\nAnd yes, back then most projects that used CVS had probably moved to [SVN](https://subversion.apache.org/), but honestly, I always felt that SVN was just \"lipstick on a pig.\" It was just CVS in another form, with some UI improvements, but none of the fundamentals fixed, and a few new problems added.\n\nThe problems with CVS and its ilk are too many to even list, and, happily, they have largely become irrelevant and younger developers have probably never even had to deal with any of it. I absolutely refused to deal with it for the kernel, even though a few subsystems (notably the networking side) were actually using CVS to track their code back in the '90s.\n\nAnyway, back then I lived in the Bay Area, and Larry McVoy, who I knew from other projects (mainly [lmbench](https://www.usenix.org/legacy/publications/library/proceedings/sd96/full_papers/mcvoy.pdf)), had started BitMover, which had a new version control model called BitKeeper, or BK, for short.\n\nBK wasn't open source, but Larry liked open source projects and really felt that the lack of version control was holding the kernel back. He wasn't wrong, but the traditional source code managers (SCMs) really didn't work for me at all. Larry spent some time showing me and David Miller (networking maintainer and existing CVS user) what BitKeeper could do.\n\nBK wasn't perfect, and it was based on Source Code Control System (SCCS) like so many other traditional SCMs were, and thus had the same broken \"history per file\" model that everybody else had, and that causes huge and fundamental issues with file renaming and deletion.\n\nBut BK also wasn't just that \"lipstick\" thing. It may have used SCCS at a low level, but on a higher level it fixed some really fundamental things, and did proper distributed development, and had a real global – not per-file – history that made merging code from different trees actually work.\n\nWith CVS, creating branches and merging them was something you had to plan and discuss with people, and were major events. With BK, every repository was a branch. We take that for granted now, and Git obviously took it much further by having many branches *per* repository, but even the much more limited BK model was really a big deal at the time.\n\nAgain, BK wasn't perfect. As mentioned, it did do per-file history, which really is a big fundamental problem that makes renaming and file merging simply not work reliably, and inevitably causes chaos and pain (for CVS people, think Attic, shudder). And it had some scalability issues, too, but those took a while to become more than a bit problematic.\n\nBut the biggest problem with BK was the licensing, and while over the years (we used BK from 2002 to 2005) a lot of kernel maintainers did end up switching over to it, it was always a bit of a friction point. And that friction came to a head in late 2004, and the use of BK for the kernel basically became untenable a few months later.\n\nI was in the situation that for three years I'd finally used source control that worked, and it really had solved a lot of problems. There was no way I was going back to the days before source control, but in the years we'd been using BK, nothing better had really come out of the open source community.\n\nSure, people knew that CVS and SVN didn't work well, and there were projects that tried alternate approaches, but some of those approaches were even worse (basically amounting to \"fancy patch tracking\"), or had some good ideas but in the process making up some entirely new horrible design mistakes ([Monotone](https://www.monotone.ca/)).\n\nSo, I looked around for a while, and decided that I didn't have any options – I had to write my own.\n\nNow, technically, it actually did take only a few days to make the first version of Git, and hey, it's all there in the Git commit history. It's easy enough to see how it goes from pretty much zero to being usable enough that I started applying patches from others a week later (and being actively used for the kernel a few days after that).\n\nBut that ignores the fact that I had been *thinking* about the problem for a while by then. Writing code is easy. Getting a good design is what matters. So there was a fair amount of background to those few days that is pretty important, and that part doesn't show up in the history.\n\nAnd hey, that first version was very, very rough, and didn't do a lot that was to come later. But you can definitely already see much of the core design in those first few days.\n\n**Can you give us a short recount of the first days and weeks of how the Git project was started?**\n\nI had basically decided that I will stop kernel development until I had an alternative that worked for me. The main goals were to be distributed and high performance, and be something you could absolutely rely on to catch any corruption.\n\nBut I really do want to stress that I wasn't interested in SCMs, per se. I was interested in the end result, not in the process. So Git was never like the kernel for me: I do Linux because I think kernels are interesting - I did Git because I had to.\n\nWhich then directly segues into your next question.\n\n**You handed over the maintainership of Git to Junio Hamano after a couple of months, and Junio is still the maintainer. Why did you hand over maintainership and what made you pick Junio?**\n\nHanding over maintainership was not a hard choice. It was very much: \"The moment somebody else comes along that I can trust to keep it going, I'll go back to doing just the kernel.\"\n\nWhich is not to say that I just threw things over the wall and prayed for the best. I ended up maintaining Git for something like four months because I felt I needed to find somebody who would stick around, and had that hard-to-explain quality of \"GoodTaste\"(TM).\n\nJunio had been one of the very early people involved (he literally showed up the first week of development), but it's not like I just said, \"Tag, you're it.\"  It takes a while to see who sticks around, and who writes code and makes decisions that make sense.\n\nAnd I think Junio has been exemplary. I get much too much credit for the few months I spent on Git - particularly in light of the 20th anniversary. I'll take credit for getting the core design right, and getting the project started, but it really is Junio who has led the project (not to belittle the hundreds of other people involved, but still).\n\n**The initial version of the Mercurial version control system was released only 12 days after the initial version of Git, on April 19, 2005. Many people claim that Mercurial's user experience was superior over Git's, but nowadays Git is significantly more popular. Why do you think that Git has won over Mercurial?**\n\nOh, a big part of it is obviously just network effects, and SCMs have very strong network effects. It's why CVS survived as long as it did despite its limitations.\n\nSo, the fact that the kernel used Git (and then at some point it got to be very popular in the Ruby on Rails community, and then it took off everywhere).\n\nBut I really do think that the design of Git is superior. The core model is both very simple and very powerful, and I think that made it easier to translate into other environments. JGit was an early example of that, but you obviously have implementations like the MSgit virtual filesystem, etc.\n\nAnd while Git was famously somewhat hard to use early on, I really do think that some of that comes from having done things \"right,\" where people coming from other environments found Git non-intuitive because Git really did a few hard decisions that a traditional SCM person would never have done.\n\n**The Git project has not stood still since you handed maintainership over to Junio, and its community is always busy working on new features. What do you think the most important milestones were after you have left the project?**\n\nThat's really hard for me to say, mainly because I obviously made Git work for me, and so the things *I* use have worked from pretty much Day One. Just as an obvious example: Making Git work on Windows was obviously a huge step for other people, but it affected *me* not at all ;)\n\nThere's obviously all the infrastructure within Git itself to make it a lot easier to use, but I think most of the big milestones have all been around people taking the Git infrastructure and building things around it. Those often end up feeding back into Git features, of course, but, at the same time, the milestone is about something external.\n\nTo give an obvious example: All the big Git hosting sites were big milestones. Making Git be distributed was what made those so much easier to do, but the *milestone* was how then the hosting made it so easy for users to use Git for various projects.\n\n**If you had the capacity to work on Git full time again, would there be anything that you would like to implement?**\n\nAbsolutely not. Git did everything I really needed from very early on – my use is actually fairly limited, and I only really care about one project.\n\nAnd I say \"absolutely not\" because I refer you to that earlier answer: I was never really interested in SCMs at all to begin with. I think a large reason for why Git ended up being so different - mostly in good ways - from other SCMs was that I approached it more like I would a distributed journaling filesystem, not really a traditional SCM.\n\n**Is there any feature or design decision in Git that you have come to regret in retrospect?**\n\nDesign decisions? No. I still think the high-level design is just very good, and you can discuss various Git concepts without ever getting into the nitty-gritty complexity of actual implementation.\n\nAnd I think that's important in a project. You need a certain high-level design principle to guide the conceptual direction of a project.\n\nSometimes people take that too far, and think that the high-level design means that the implementation must then slavishly follow some core principle. And that's wrong, too – the *implementation* will have lots of nasty corner cases because reality is hard and people want odd things, but there needs to be some kind of top-level design that you can point to and reason about at a high level before you get your hands dirty with the nasty reality.\n\nAnd I think Git has a good balance of that. A very straightforward object store design (call them \"structured Merkle trees\" if you are a CS person, or you might just think of them as a \"content addressable storage\" if you are a filesystem person). That core design is there – but at the same time, it's realistically just a very tiny part of the actual code. Most of the *code* is about all the things you can do with the core design, but that basic clarity of design still gives the project some kind of high-level structure.\n\nIt's the same kind of high-level structure that Unix itself had, whether you said \"everything is a file\" or you were talking about process handling. There are a few \"concepts\" that drive the design, but then 99% of the code is about the ugly harsh details of what you build on top of that to make it all useful in the real world.\n\nI have two mantras in technology: \"If I have seen further, it is by standing on the shoulders of giants\" (Newton) and \"Genius is 1% inspiration and 99% perspiration\" (Edison).\n\nBut talking about the 99% perspiration: While I am very happy with the big design, there are certainly various details that I would have done differently if I were to do Git today.\n\nBut honestly, they aren't that important. What's much more important is all the *good* details that have been done over the last two decades.\n\n**The Linux kernel has started to use Rust as a programming language for some of its subsystems. Do you think it makes sense to start using such newer programming languages like this in Git?**\n\nI suspect that when it comes to Git, there's less reason to try to mix languages, which is always somewhat painful.\n\nIn the kernel, the end result is one single kernel binary – even if much of it can be loaded dynamically as modules, it is still linked together into effectively one single binary.\n\nAnd that makes using multiple languages more complex. But, on the other hand, the kernel also has more reason to worry about memory safety and, thus, look at newer languages.\n\nIn Git, if somebody wants to write parts of it in Rust or another language, I suspect it makes much more sense to just go for a separate implementation rather than try to mix languages in one binary.\n\nMuch of the Git core ideas are simple enough that just having parallel implementations of the core likely isn't too painful, and then you can target particular problem spaces where a different language makes more sense.\n\nAnd we've seen that in Git already, of course: That's exactly what JGit is. The use of a different language was due to a different web-based environment where that language choice was much more natural.\n\nI know that there are already Rust implementations of some of the core Git functionality, and I think the situation is similar: I suspect they make more sense in specific situations than in some kind of overall \"let's convert things to Rust\" kind of way.\n\nSo for anybody who is interested in implementing things in Rust, I'd suggest looking for target areas where the advantages of Rust are more obvious. I don't think C has actually been all that problematic in the standard Git source base.\n\n**New version control systems are popping up every couple of years. Do you think that Git will stay relevant in the future?**\n\nI already mentioned the network effects in SCMs, and I think that means that to replace Git you have to be not just slightly better, you have to be enormously better. Or so compatible that you effectively are just a new implementation of Git.\n\nAnd I do think the SCM situation has changed – Git doesn't have the kinds of huge gaping fundamental problems that SCMs had before Git. So being \"enormously better\" is fairly hard.\n\nSo, yes, I would expect Git to stay relevant for the foreseeable future, with people working on improvements *around* Git rather than replacements.\n\n*Note: This interview has been edited for length and clarity.*\n\n> Take a [journey with us through Git's 20-year history](https://about.gitlab.com/blog/journey-through-gits-20-year-history/).\n\n## Learn more about Git\n\n- [What's new in Git 2.49.0?](https://about.gitlab.com/blog/whats-new-in-git-2-49-0/)  \n- [What’s new in Git 2.48.0?](https://about.gitlab.com/blog/whats-new-in-git-2-48-0/)  \n- [A beginner's guide to the Git reftable format](https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format/)\n- [Git project](https://git-scm.com/)",[683,9],{"slug":841,"featured":90,"template":687},"celebrating-gits-20th-anniversary-with-creator-linus-torvalds","content:en-us:blog:celebrating-gits-20th-anniversary-with-creator-linus-torvalds.yml","Celebrating Gits 20th Anniversary With Creator Linus Torvalds","en-us/blog/celebrating-gits-20th-anniversary-with-creator-linus-torvalds.yml","en-us/blog/celebrating-gits-20th-anniversary-with-creator-linus-torvalds",{"_path":847,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":848,"content":854,"config":860,"_id":862,"_type":13,"title":863,"_source":15,"_file":864,"_stem":865,"_extension":18},"/en-us/blog/compose-readers-and-writers-in-golang-applications",{"title":849,"description":850,"ogTitle":849,"ogDescription":850,"noIndex":6,"ogImage":851,"ogUrl":852,"ogSiteName":672,"ogType":673,"canonicalUrls":852,"schema":853},"Compose Readers and Writers in Golang applications","GitLab streams terabytes of Git data every hour using Golang abstractions of I/O implementations. Learn how to compose Readers and Writers in Golang apps.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099464/Blog/Hero%20Images/Blog/Hero%20Images/AdobeStock_639935439_3oqldo5Yt5wPonEJYZOLTM_1750099464124.jpg","https://about.gitlab.com/blog/compose-readers-and-writers-in-golang-applications","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Compose Readers and Writers in Golang applications\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Igor Drozdov\"}],\n        \"datePublished\": \"2024-02-15\",\n      }",{"title":849,"description":850,"authors":855,"heroImage":851,"date":857,"body":858,"category":726,"tags":859},[856],"Igor Drozdov","2024-02-15","Every hour, GitLab transfers terabytes of Git data between a server and a client. It is hard or even impossible to handle this amount of traffic unless it is done efficiently in a streaming fashion. Git data is served by Gitaly (Git server), GitLab Shell (Git via SSH), and Workhorse (Git via HTTP(S)). These services are implemented using Go - the language that conveniently provides abstractions to efficiently deal with I/O operations.\n\nGolang's [`io`](https://pkg.go.dev/io) package provides [`Reader`](https://pkg.go.dev/io#Reader) and [`Writer`](https://pkg.go.dev/io#Writer) interfaces to abstract the functionality of I/O implementations into public interfaces.\n\n`Reader` is the interface that wraps the basic `Read` method:\n\n```go\ntype Reader interface {\n\tRead(p []byte) (n int, err error)\n}\n```\n\n`Writer` is the interface that wraps the basic `Write` method.\n\n```go\ntype Writer interface {\n\tWrite(p []byte) (n int, err error)\n}\n```\n\nFor example, [`os`](https://pkg.go.dev/os) package provides an implementation of reading a file. `File` type implements `Reader` and `Writer` interfaces by defining basic [`Read`](https://pkg.go.dev/os#File.Read) and [`Write`](https://pkg.go.dev/os#File.Write) functions.\n\nIn this blog post, you'll learn how to compose Readers and Writers in Golang applications.\n\nFirst, let's read from a file and write its content to [`os.Stdout`](https://cs.opensource.google/go/go/+/master:src/os/file.go;l=66?q=Stdout&ss=go%2Fgo).\n\n```go\nfunc main() {\n\tfile, err := os.Open(\"data.txt\")\n\tif err != nil {\n\t\tlog.Fatal(err)\n\t}\n\tdefer file.Close()\n\n\tp := make([]byte, 32 * 1024)\n\tfor {\n\t\tn, err := file.Read(p)\n\n\t\t_, errW := os.Stdout.Write(p[:n])\n\t\tif errW != nil {\n\t\t\tlog.Fatal(errW)\n\t\t}\n\n\t\tif err != nil {\n\t\t\tif errors.Is(err, io.EOF) {\n\t\t\t\tbreak\n\t\t\t}\n\n\t\t\tlog.Fatal(err)\n\t\t}\n\t}\n}\n```\n\nEach call of the `Read` function fills the buffer `p` with the content from the file, i.e. the file is being consumed in chunks (up to `32KB`) instead of being fully loaded into the memory.\n\nTo simplify this widely used pattern, `io` package conveniently provides [`Copy`](https://pkg.go.dev/io#Copy) function that allows passing content from any `Reader` to any `Writer` and also [handles](https://cs.opensource.google/go/go/+/refs/tags/go1.21.0:src/io/io.go;l=433) additional edge cases.\n\n```go\nfunc main() {\n\tfile, err := os.Open(\"data.txt\")\n\tif err != nil {\n\t\tlog.Fatal(err)\n\t}\n\tdefer file.Close()\n\n\tif _, err := io.Copy(os.Stdout, file); err != nil {\n\t\tlog.Fatal(err)\n\t}\n}\n```\n\n`Reader` and `Writer` interfaces are used across the whole Golang ecosystem because they facilitate reading and writing content in a streaming fashion. Therefore, gluing together the Readers and Writers with the functions that expect these interfaces as arguments is a frequent problem to solve. Sometimes it's as straightforward as passing content from a Reader into a Writer, but sometimes the content written into a Writer must be represented as a Reader or the content from a reader must be sent into multiple Writers. Let's have a closer look into different use cases and the examples of solving these types of problems in the `GitLab` codebase.\n\n## Reader -> Writer\n\n**Problem**\n\nWe need to pass content from a Reader into a Writer.\n\n![readers and writers - image 1](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099495/Blog/Content%20Images/Blog/Content%20Images/image1_aHR0cHM6_1750099494917.png)\n\n**Solution**\n\nThe problem can be solved by using [`io.Copy`](https://pkg.go.dev/io#Copy).\n\n```go\nfunc Copy(dst Writer, src Reader) (written int64, err error)\n```\n\n**Example**\n\n[`InfoRefs*`](https://gitlab.com/gitlab-org/gitlab/blob/57aafb6a886d05c15dd0fa372fb4f008bec014ea/workhorse/internal/gitaly/smarthttp.go#L18-35) Gitaly RPCs return a `Reader` and we want to [stream](https://gitlab.com/gitlab-org/gitlab/blob/57aafb6a886d05c15dd0fa372fb4f008bec014ea/workhorse/internal/git/info-refs.go#L78-80) its content to a user via HTTP response:\n\n```go\nfunc handleGetInfoRefsWithGitaly(ctx context.Context, responseWriter *HttpResponseWriter, a *api.Response, rpc, gitProtocol, encoding string) error {\n        ...\n        infoRefsResponseReader, err := smarthttp.InfoRefsResponseReader(ctx, &a.Repository, rpc, gitConfigOptions(a), gitProtocol)\n        ...\n        if _, err = io.Copy(w, infoRefsResponseReader); err != nil {\n            return err\n        }\n        ...\n}\n```\n\n## Reader -> Multiple Writers\n\n**Problem**\n\nWe need to pass content from a Reader into multiple Writers.\n\n![readers and writers - image 3](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099495/Blog/Content%20Images/Blog/Content%20Images/image3_aHR0cHM6_1750099494917.png)\n\n**Solution**\n\nThe `io` package provides [`io.MultiWriter`](https://pkg.go.dev/io#MultiWriter) function that _converts_ multiple Writers into a single one. When its `Write` function is called, the content is copied to all the Writers ([implementation](https://cs.opensource.google/go/go/+/refs/tags/go1.21.0:src/io/multi.go;l=127)).\n\n```go\nfunc MultiWriter(writers ...Writer) Writer\n```\n\n**Example**\n\nGiven we want to [build](https://gitlab.com/gitlab-org/gitlab/blob/57aafb6a886d05c15dd0fa372fb4f008bec014ea/workhorse/internal/upload/destination/multi_hash.go#L13-18) `md5`, `sha1`, `sha256` and `sha512` hashes from the same content. [`Hash`](https://pkg.go.dev/hash#Hash) type is a `Writer`. Using `io.MultiWriter`, we define [`multiHash`](https://gitlab.com/gitlab-org/gitlab/blob/57aafb6a886d05c15dd0fa372fb4f008bec014ea/workhorse/internal/upload/destination/multi_hash.go#L43-61) Writer. After the content is [written](https://gitlab.com/gitlab-org/gitlab/blob/57aafb6a886d05c15dd0fa372fb4f008bec014ea/workhorse/internal/upload/destination/destination.go#L124-125) to the `multiHash`, we [calculate](https://gitlab.com/gitlab-org/gitlab/blob/57aafb6a886d05c15dd0fa372fb4f008bec014ea/workhorse/internal/upload/destination/multi_hash.go#L63-70) the hashes of all these functions in a single run.\n\nThe simplified version of the example is:\n\n```go\npackage main\n\nimport (\n\t\"crypto/sha1\"\n\t\"crypto/sha256\"\n\t\"fmt\"\n\t\"io\"\n\t\"log\"\n)\n\nfunc main() {\n\ts1 := sha1.New()\n\ts256 := sha256.New()\n\n\tw := io.MultiWriter(s1, s256)\n\tif _, err := w.Write([]byte(\"content\")); err != nil {\n\t\tlog.Fatal(err)\n\t}\n\n\tfmt.Println(s1.Sum(nil))\n\tfmt.Println(s256.Sum(nil))\n}\n```\n\nFor simplicity, we just call `Write` function on a Writer, but when content comes from a Reader, then `io.Copy` can be used as well:\n\n```go\n_, err := io.Copy(io.MultiWriter(s1, s256), reader)\n```\n\n## Multiple Readers -> Reader\n\n**Problem**\n\nWe have multiple Readers and need to sequentially read from them.\n\n![readers and writers - image 4](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099495/Blog/Content%20Images/Blog/Content%20Images/image4_aHR0cHM6_1750099494919.png)\n\n**Solution**\n\nThe `io` package provides [`io.MultiReader`](https://pkg.go.dev/io#MultiReader) function that _converts_ multiple Readers into a single one. The Readers are read in the passed order.\n\n```go\nfunc MultiReader(readers ...Reader) Reader\n```\n\nThen this Reader can be used in any function that accepts `Reader` as an argument.\n\n**Example**\n\nWorkhorse [reads](https://gitlab.com/gitlab-org/gitlab/blob/d97ce3baab7fbf459728ce18766fefd3abb8892f/workhorse/cmd/gitlab-resize-image/png/reader.go#L26-38) the first `N` bytes of an image to detect whether it's a PNG file and _puts them back_ by building a Reader from multiple Readers:\n\n```go\nfunc NewReader(r io.Reader) (io.Reader, error) {\n\tmagicBytes, err := readMagic(r)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\tif string(magicBytes) != pngMagic {\n\t\tdebug(\"Not a PNG - read file unchanged\")\n\t\treturn io.MultiReader(bytes.NewReader(magicBytes), r), nil\n\t}\n\n\treturn io.MultiReader(bytes.NewReader(magicBytes), &Reader{underlying: r}), nil\n}\n```\n\n## Multiple Readers -> Multiple Writers\n\n**Problem**\n\nWe need to pass content from multiple Readers into multiple Writers.\n\n![readers and writers - image 6](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099495/Blog/Content%20Images/Blog/Content%20Images/image6_aHR0cHM6_1750099494921.png)\n\n**Solution**\n\nThe solutions above can be generalized on the many-to-many use case.\n\n```go\n_, err := io.Copy(io.MultiWriter(w1, w2, w3), io.MultiReader(r1, r2, r3))\n```\n\n## Reader -> Reader + Writer\n\n**Problem**\n\nWe need to read content from a Reader or pass the Reader to a function and simultaneously write the content into a Writer.\n\n![readers and writers - image 2](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099495/Blog/Content%20Images/Blog/Content%20Images/image2_aHR0cHM6_1750099494923.png)\n\n**Solution**\n\nThe `io` package provides [io.TeeReader](https://pkg.go.dev/io#TeeReader) function that accepts a Reader to read from, a Writer to write to, and returns a Reader that can be processed further.\n\n```go\nfunc TeeReader(r Reader, w Writer) Reader\n```\n\nThe [implementation](https://cs.opensource.google/go/go/+/refs/tags/go1.21.4:src/io/io.go;l=610) of the functionality is straightforward. The passed `Reader` and `Writer` are stored in a structure that is a `Reader` itself:\n\n```go\nfunc TeeReader(r Reader, w Writer) Reader {\n\treturn &teeReader{r, w}\n}\n\ntype teeReader struct {\n\tr Reader\n\tw Writer\n}\n```\n\nThe `Read` function implemented for the structure delegates the `Read` to the passed `Reader` and also performs a `Write` to the passed `Writer`:\n\n```\nfunc (t *teeReader) Read(p []byte) (n int, err error) {\n\tn, err = t.r.Read(p)\n\tif n > 0 {\n\t\tif n, err := t.w.Write(p[:n]); err != nil {\n\t\t\treturn n, err\n\t\t}\n\t}\n\treturn\n}\n```\n\n**Example 1**\n\nWe already touched hashing topic in the `Multiple Writers -> Writer` section and `io.TeeReader` is [used](https://gitlab.com/gitlab-org/gitlab/blob/d97ce3baab7fbf459728ce18766fefd3abb8892f/workhorse/internal/upload/destination/destination.go#L124-125) to provide a Writer to create a hash from content. The returned Reader can be further used to upload content to object storage.\n\n**Example 2**\n\nWorkhorse uses `io.TeeReader` to [implement](https://gitlab.com/gitlab-org/gitlab/blob/d97ce3baab7fbf459728ce18766fefd3abb8892f/workhorse/internal/dependencyproxy/dependencyproxy.go#L57-101) Dependency Proxy [functionality](https://docs.gitlab.com/ee/user/packages/dependency_proxy/). Dependency Proxy caches requested upstream images in the object storage. The not-yet-cached use case has the following behavior:\n\n- A user performs an HTTP request.\n- The upstream image is fetched using [`net/http`](https://pkg.go.dev/net/http) and [`http.Response`](https://pkg.go.dev/net/http#Response) provides its content via `Body` field, which is [`io.ReadCloser`](https://pkg.go.dev/io#ReadCloser) (basically an `io.Reader`).\n- We need to send this content back to the user by writing it into [`http.ResponseWriter`](https://pkg.go.dev/net/http#ResponseWriter) (basically an `io.Writer`).\n- We need to simultaniously upload the content to object storage by performing an [`http.Request`](https://pkg.go.dev/net/http#NewRequest) (a function that accepts an `io.Reader`).\n\nAs a result, `io.TeeReader` can be used to glue these primitives together:\n\n```go\nfunc (p *Injector) Inject(w http.ResponseWriter, r *http.Request, sendData string) {\n\t// Fetch upstream data via HTTP\n\tdependencyResponse, err := p.fetchUrl(r.Context(), sendData)\n\t...\n\t// Create a tee reader. Each Read will read from dependencyResponse.Body and simultaneously\n        // perform a Write to w writer\n\tteeReader := io.TeeReader(dependencyResponse.Body, w)\n\t// Pass the tee reader as the body of an HTTP request to upload it to object storage\n\tsaveFileRequest, err := http.NewRequestWithContext(r.Context(), \"POST\", r.URL.String()+\"/upload\", teeReader)\n\t...\n\tnrw := &nullResponseWriter{header: make(http.Header)}\n\tp.uploadHandler.ServeHTTP(nrw, saveFileRequest)\n\t...\n```\n\n## Writer -> Reader\n\n**Problem**\n\nWe have a function that accepts a Writer, and we are interested in the content that the function would write into the Writer. We want to intercept the content and represent it as a Reader to further process it in a streaming fashion.\n\n![readers and writers - image 5](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099495/Blog/Content%20Images/Blog/Content%20Images/image5_aHR0cHM6_1750099494924.png)\n\n**Solution**\n\nThe `io` package provides [`io.Pipe`](https://pkg.go.dev/io#Pipe) function that returns a Reader and a Writer:\n\n```go\nfunc Pipe() (*PipeReader, *PipeWriter)\n```\n\nThe Writer can be used to be passed to the function that accepts a Writer. All the content that has been written into it will be accessible via the reader, i.e. a synchronous in-memory pipe is created that can be used to connect code expecting an `io.Reader` with code expecting an `io.Writer`.\n\n**Example 1**\n\nFor [LSIF](https://lsif.dev/) file [transformation](https://gitlab.com/gitlab-org/gitlab/blob/d97ce3baab7fbf459728ce18766fefd3abb8892f/workhorse/internal/lsif_transformer/parser/parser.go#L68-72) for code navigation we need to:\n\n- [Read](https://gitlab.com/gitlab-org/gitlab/blob/d97ce3baab7fbf459728ce18766fefd3abb8892f/workhorse/internal/lsif_transformer/parser/parser.go#L48-51) content of a zip file.\n- Transform the content and [serialize](https://gitlab.com/gitlab-org/gitlab/blob/d97ce3baab7fbf459728ce18766fefd3abb8892f/workhorse/internal/lsif_transformer/parser/docs.go#L97-112) it into [`zip.Writer`](https://pkg.go.dev/archive/zip#Writer).\n- [Represent](https://gitlab.com/gitlab-org/gitlab/blob/d97ce3baab7fbf459728ce18766fefd3abb8892f/workhorse/internal/lsif_transformer/parser/parser.go#L68-72) the new compressed content as a Reader to be further processed in a streaming fashion.\n\nThe [`zip.NewWriter`](https://pkg.go.dev/archive/zip#NewWriter) function accepts a Writer to which it will write the compressed content. It is handy when we need to pass an open file descriptor to the function to save the content to the file. However, when we need to pass the compressed content via an HTTP request, we need to represent the data as a Reader.\n\n```go\n// The `io.Pipe()` creates a reader and a writer.\npr, pw := io.Pipe()\n\n// The writer is passed to `parser.transform` function which will write\n// the transformed compressed content into it\n// The writing should happen asynchronously in a goroutine because each `Write` to\n// the `PipeWriter` blocks until it has satisfied one or more `Read`s from the `PipeReader`.\ngo parser.transform(pw)\n\n// Everything that has been written into it is now accessible via the reader.\nparser := &Parser{\n\tDocs: docs,\n\tpr:   pr,\n}\n\n// pr is a reader that can be used to read all the data written to the pw writer\nreturn parser, nil\n```\n\n**Example 2**\n\nFor Geo setups [GitLab Shell](https://gitlab.com/gitlab-org/gitlab-shell) proxies all `git push` operations to secondary and redirects them to primary.\n\n- GitLab Shell establishes an SSH connection and defines [`ReadWriter`](https://gitlab.com/gitlab-org/gitlab-shell/blob/7898d8e69daf51a7b6e01052c4516ca70893a2d4/internal/command/readwriter/readwriter.go#L6-7) struct that has `In` field of `io.Reader` type to read data from a user and `Out` field of `io.Writer` type to send response to the user.\n- GitLab Shell performs an HTTP request to `/info/refs` and sends `response.Body` of type `io.Reader` to the user using [`io.Copy`](https://gitlab.com/gitlab-org/gitlab-shell/blob/7898d8e69daf51a7b6e01052c4516ca70893a2d4/internal/command/githttp/push.go#L60)\n- The user reacts to this response by sending data to `In` and GitLab Shell needs to read this data, convert it to a request expected by Git HTTP, and send it as an HTTP request to `/git-receive-pack`. This is where `io.Pipe` becomes useful.\n\n```go\nfunc (c *PushCommand) requestReceivePack(ctx context.Context, client *git.Client) error {\n\t// Define pipeReader and pipeWriter and use pipeWriter to collect all the data\n\t//sent by the user converted to a format expected by Git HTTP.\n\tpipeReader, pipeWriter := io.Pipe()\n\t// The writing happens asynchronously because it's a blocking operation\n\tgo c.readFromStdin(pipeWriter)\n\n\t// pipeReader can be passed as io.Reader and used to read all the data written to pipeWriter\n\tresponse, err := client.ReceivePack(ctx, pipeReader)\n\t...\n\t_, err = io.Copy(c.ReadWriter.Out, response.Body)\n\t...\n}\n\nfunc (c *PushCommand) readFromStdin(pw *io.PipeWriter) {\n\tvar needsPackData bool\n\n\t// Scanner reads the user input line by line\n\tscanner := pktline.NewScanner(c.ReadWriter.In)\n\tfor scanner.Scan() {\n\t\tline := scanner.Bytes()\n\t\t// And writes it to the pipe writer\n\t\tpw.Write(line)\n\t\t...\n\t}\n\n\t// The data that hasn't been processed by a scanner is copied if necessary\n\tif needsPackData {\n\t\tio.Copy(pw, c.ReadWriter.In)\n\t}\n\n\t// Close the pipe writer to signify EOF for the pipe reader\n\tpw.Close()\n}\n```\n\n## Try Golang\n\nGolang provides elegant patterns designed to efficiently process data in a streaming fashion. The patterns can be used to address new challenges or refactor the existing performance issues associated with high memory consumption.\n\n> Learn more about [GitLab and Golang](https://docs.gitlab.com/ee/development/go_guide/).\n",[682,9,774,684],{"slug":861,"featured":6,"template":687},"compose-readers-and-writers-in-golang-applications","content:en-us:blog:compose-readers-and-writers-in-golang-applications.yml","Compose Readers And Writers In Golang Applications","en-us/blog/compose-readers-and-writers-in-golang-applications.yml","en-us/blog/compose-readers-and-writers-in-golang-applications",{"_path":867,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":868,"content":874,"config":881,"_id":883,"_type":13,"title":884,"_source":15,"_file":885,"_stem":886,"_extension":18},"/en-us/blog/contributions-to-git-2-42-release",{"title":869,"description":870,"ogTitle":869,"ogDescription":870,"noIndex":6,"ogImage":871,"ogUrl":872,"ogSiteName":672,"ogType":673,"canonicalUrls":872,"schema":873},"Git 2.42 release: Here are four of our contributions in detail","Find out how GitLab's Git team helped improve Git 2.42.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749667792/Blog/Hero%20Images/git-241.jpg","https://about.gitlab.com/blog/contributions-to-git-2-42-release","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git 2.42 release: Here are four of our contributions in detail\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Christian Couder\"}],\n        \"datePublished\": \"2023-10-12\",\n      }",{"title":869,"description":870,"authors":875,"heroImage":871,"date":877,"body":878,"category":879,"tags":880},[876],"Christian Couder","2023-10-12","\n\n[Git 2.42](https://gitlab.com/gitlab-org/git/-/raw/master/Documentation/RelNotes/2.42.0.txt)\nwas officially released on August 21, 2023, and included some\nimprovements from GitLab's Git team. Git is the foundation of\nrepository data at GitLab. GitLab's Git team works on new features, performance improvements, documentation improvements,\nand growing the Git community. Often our contributions to Git have a\nlot to do with the way we integrate Git into our services at\nGitLab.\n\nWe previously shared [some of our improvements that were included in the Git 2.41 release](https://about.gitlab.com/blog/contributions-to-latest-git-release/). Here are some highlights from the Git 2.42 release, and a\nwindow into how we use Git on the server side at GitLab.\n\n## 1. Prevent certain refs from being packed\n\n### Write-ahead logging\nIn [Gitaly](https://docs.gitlab.com/ee/administration/gitaly/), we\nwant to use a [write-ahead log](https://gitlab.com/groups/gitlab-org/-/epics/8911)\nto replicate Git operations on different machines.\n\nThis means that the Git objects and references that should be changed\nby a Git operation are first kept in a log entry. Then, when all the\nmachines have agreed that the operation should proceed, the log entry\nis applied so the corresponding Git objects and references are\nactually added to the repositories on all the machines.\n\n### Need for temporary references\nBetween the time when a specific log entry is first written and when\nit is applied, other log entries could be applied which could remove\nsome objects and references. It could happen that these objects and\nreferences are needed to apply the specific log entry though.\n\nSo when we log an entry, we have to make sure that all the objects and\nreferences that it needs to be properly applied will not be removed\nuntil that entry is either actually applied or discarded.\n\nThe best way to make sure things are kept in Git is to create new Git\nreferences pointing to these things. So we decided to use temporary\nreferences for that purpose. They would be created when a log entry is\nwritten, and then deleted when that entry is either applied or\ndiscarded.\n\n### Packed-refs performance\nGit can store references in \"loose\" files, with one reference per\nfile, or in the `packed-refs` file, which contains many of them. The\n`git pack-refs` command is used to pack some references from \"loose\"\nfiles into the `packed-refs` file.\n\nFor reading a lot of references, the `packed-refs` file is very\nefficient, but for writing or deleting a single reference, it is not\nso efficient as rewriting the whole `packed-refs` file is required.\n\nAs temporary references are to be created and then deleted soon after,\nstoring them in the `packed-refs` file would not be efficient. It\nwould be better to store them in \"loose\" files.\n\nThe `git pack-refs` command had no way to be told precisely which refs\nshould be packed or not though. By default it would repack all the\ntags (which are refs in `refs/tags/`) and all the refs that are\nalready packed. With the `--all` option one could tell it to repack\nall the refs except the hidden refs, broken refs, and symbolic refs,\nbut that was the only thing that could be controlled.\n\n### Improving `git pack-refs`\nWe decided to improve `git pack-refs` by adding two new options to it:\n  - `--include \u003Cpattern>` which can be used to specify which refs should be packed\n  - `--exclude \u003Cpattern>` which can be used to specify which refs should not be packed\n\n[John Cai](https://gitlab.com/jcaigitlab), Gitaly:Git team engineering manager, implemented these options.\n\nFor example, if the refs managed by the write-ahead log are in\n`refs/wal/`, it's now possible the exclude them from being moved into\nthe `packed-refs` file by using:\n\n```\n$ git pack-refs --exclude \"refs/wal/*\"\n```\n\nDetails of the patch series, including discussions, can be found\n[here](https://lore.kernel.org/git/pull.1501.git.git.1683215331910.gitgitgadget@gmail.com/).\n\n## 2. Get machine-readable output from `git cat-file --batch`\n\n### Efficiently retrieving Git object information\nIn GitLab, we often retrieve Git object information. For example, when a\nuser navigates into the files and directories in a repository, we need\nto get the content of the corresponding Git blobs and trees so that\nwe can show it.\n\nIn Gitaly, we use `git cat-file` to retrieve Git object information\nfrom a Git repository. As it's a frequent operation, it needs to be\nperformed efficiently, so we use the batch modes of `git cat-file`\navailable through the `--batch`, `--batch-check` and `--batch-command`\noptions.\n\nIn these modes, a pointer to a Git object can be repeatedly sent to\nthe standard input, called 'stdin', of a `git cat-file` command, while\nthe corresponding object information is read from the standard ouput,\ncalled 'stdout' of the command. This way we don't need to launch a\nnew `git cat-file` command for each object.\n\nGitLab can keep, for example, a `git cat-file --batch-command` process\nrunning in the background while feeding it commands like\n`info \u003Cobject>` or `contents \u003Cobject>` through its stdin to\nget either information about an object or its content.\n\n### Newlines in stdin, stdout, and filenames\nThe commands or pointers to Git objects that are sent through stdin\nshould be delimited using newline characters, and in the same way `git\ncat-file` will use newline characters to delimit the information from\ndifferent Git objects in its output. This is a common shell practice\nto make it easy to chain commands together. For example, one can\neasily get the size (in bytes) of the last three commits on the current\nbranch using the following:\n\n```\n$ git log -3 --format='%H' | git cat-file --batch-check='%(objectsize)'\n285\n646\n428\n```\n\nSometimes, though, the pointer to a Git object can contain a filename\nor a directory name, as such a pointer is allowed to be in the form\n`\u003Cbranch>:\u003Cpath>`. For example `HEAD:Documentation` is a valid\npointer to the blob or the tree corresponding to the `Documentation`\npath on the current branch.\n\nThis used to be an issue because on some systems newline characters\nare allowed in file or directory names. So the `-z` option was\nintroduced last year in Git 2.38 to allow users to change the input\ndelimiter in batch modes to the NUL character.\n\n### Error output\nWhen the `-z` option was introduced, it wasn't considered useful to\nchange the output delimiter to be also the NUL character. This is\nbecause only tree objects can contain paths and the internal format\nof tree objects already uses NUL characters to delimit paths.\n\nUnfortunately, it was overlooked that in case of an error the pointer\nto the object is displayed in the error message:\n\n```\n$ echo 'HEAD:does-not-exist' | git cat-file --batch\nHEAD:does-not-exist missing\n```\n\nAs the error messages are printed along with the regular ouput of the\ncommand on stdout, passing in an invalid pointer with a number of\nnewline characters in it could make it very difficult to parse the\noutput.\n\n### -Z comes to the rescue\n[Toon Claes](https://gitlab.com/toon), Gitaly senior engineer, initially worked on a\npatch to just quote the pointer in the error message, but it was\ndecided in the Git mailing list discussions related to the patch that\nit would be better to just create a new `-Z` option. This option would\nchange both the input and the output delimiter to the NUL character,\nwhile the old `-z` option would be deprecated over time.\n\nSo [Patrick Steinhardt](https://gitlab.com/pks-gitlab), Gitaly staff engineer, implemented that new `-Z` option.\n\nDetails of the patch series, including discussions, can be found\n[here](https://lore.kernel.org/git/20221209150048.2400648-1-toon@iotcl.com/)\nand [here](https://lore.kernel.org/git/cover.1685710884.git.ps@pks.im/).\n\n## 3. Pass pseudo-options to `git rev-list --stdin`\n\n### Computing sizes\nIn GitLab, we need to have different ways to compute the size of Git\nrelated content. For example, we need to know:\n  - how much disk space a repository is using\n  - how big a specific Git object is\n  - how much additional space on a repository is required by a\n    specific set of revisions (and the objects they reference)\n\nKnowing \"how much disk space a repository is using\" is useful to\nenforce repository-related quotas and is easy to get using regular\nshell and OS features.\n\nSize information about a specific Git object is useful to enforce\nquotas related to maximum file size. It can be obtained using, for\nexample, `git cat-file -s \u003Cobject>` or\n`echo \u003Cobject> | git cat-file --batch-check='%(objectsize)'`\nas already seen above.\n\nComputing the space required by a set of revisions is useful, too, as\nforks can share Git content in what we call\n\"[pool repositories](https://docs.gitlab.com/ee/development/git_object_deduplication.html),\"\nand we want to discriminate how much content belongs to each forked\nrepository. Fortunately, `git rev-list` has a `--disk-usage` option\nfor this purpose.\n\n### Passing arguments to `git rev-list`\n`git rev-list` can take a number of different arguments and has a lot\nof different options. It's a fundamental command to traverse commit\ngraphs, and it should be flexible enough to fulfill a lot of different\nuser needs.\n\nWhen repositories grow, they often store a lot of references and a lot\nof files and directories, so there is often the need to pass a big\nnumber of references or paths as arguments to the\ncommand. References and paths can be quite long though.\n\nTo avoid hitting platform limits related to command line length, long\nago, a `--stdin` mode was added that allowed users to pass revisions\nand paths through stdin, instead of as command line\narguments. However, when that was implemented, it was not considered\nnecessary to allow options or pseudo-options, like `--not`,\n`--glob=...`, or `--all` to be passed through stdin.\n\nThis appeared to be a problem for GitLab, as for computing sizes for\nforked repositories we needed some of the pseudo-options, and it would\nhave been intricate and possibly buggy to pass some of them and their\narguments as arguments on the command line while others were passed\nthrough stdin.\n\n### Allowing pseudo-options\nTo fix this issue, Patrick Steinhardt implemented a small patch series to\nallow pseudo-options through stdin.\n\nWith it, in Git 2.42, one can now pass pseudo-options, like `--not`,\n`--glob=...`, or `--all` through stdin when the `--stdin` mode is used.\n\nDetails of the patch series, including discussions, can be found\n[here](https://lore.kernel.org/git/cover.1686744685.git.ps@pks.im/).\n\n## 4. Code and test improvements\nWhile looking at some Git code, we are often tempted to modify nearby\ncode, either to change only its style when the code is ancient and it\nwould look better using Git's current code style, or to refactor it to\nmake it cleaner. This is why we sometimes send small patch series that\ndon't have a real GitLab related purpose.\n\nIn Git 2.42, examples of style code improvements we made are the\n[part1](https://lore.kernel.org/git/pull.1513.git.git.1684440205.gitgitgadget@gmail.com/)\nand\n[part2](https://lore.kernel.org/git/pull.1514.git.git.1684599239.gitgitgadget@gmail.com/)\ntest code modernization patches from John Cai.\n\nAnd [here](https://lore.kernel.org/git/cover.1684324059.git.ps@pks.im/) is\nan example of a refactoring to cleanup some code by Patrick Steinhardt.\n","product",[9,705,683,266],{"slug":882,"featured":6,"template":687},"contributions-to-git-2-42-release","content:en-us:blog:contributions-to-git-2-42-release.yml","Contributions To Git 2 42 Release","en-us/blog/contributions-to-git-2-42-release.yml","en-us/blog/contributions-to-git-2-42-release",{"_path":888,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":889,"content":894,"config":901,"_id":903,"_type":13,"title":904,"_source":15,"_file":905,"_stem":906,"_extension":18},"/en-us/blog/contributions-to-latest-git-release",{"title":890,"description":891,"ogTitle":890,"ogDescription":891,"noIndex":6,"ogImage":871,"ogUrl":892,"ogSiteName":672,"ogType":673,"canonicalUrls":892,"schema":893},"Git 2.41 release - Here are five of our contributions in detail","Find out how GitLab's Git team helped improve the latest version of Git.","https://about.gitlab.com/blog/contributions-to-latest-git-release","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git 2.41 release - Here are five of our contributions in detail\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"John Cai\"}],\n        \"datePublished\": \"2023-06-20\",\n      }",{"title":890,"description":891,"authors":895,"heroImage":871,"date":897,"body":898,"category":899,"tags":900},[896],"John Cai","2023-06-20","\n[Git 2.41](https://gitlab.com/gitlab-org/git/-/raw/master/Documentation/RelNotes/2.41.0.txt)\nwas officially released on June 1, 2023, and included some improvements from GitLab's Git team. Git is the foundation of\nrepository data at GitLab. GitLab's Git team works on everything from new\nfeatures, performance improvements, documentation improvements, and growing the Git\ncommunity. Often our contributions to Git have a lot to do with the way we integrate Git into\nour services at GitLab. Here are some highlights from this latest Git release,\nand a window into how we use Git on the server side at GitLab.\n\n## 1. Machine-parseable fetch output\nWhen `git-fetch` is run, the output is a familiar for users of Git and looks\nsomething like this:\n\n```bash\n> git fetch\nremote: Enumerating objects: 296, done.\nremote: Counting objects: 100% (189/189), done.\nremote: Compressing objects: 100% (103/103), done.\nremote: Total 296 (delta 132), reused 84 (delta 84), pack-reused 107\nReceiving objects: 100% (296/296), 184.46 KiB | 11.53 MiB/s, done.\nResolving deltas: 100% (173/173), completed with 42 local objects.\nFrom https://gitlab.com/gitlab-org/gitaly\n   cfd146b4d..a69cf20ce  master                                                                             -> origin/master\n   3a877b8f3..854f25045  15-11-stable                                                                       -> origin/15-11-stable\n * [new branch]          5316-check-metrics-and-decide-if-need-to-context-cancel-the-running-git-process-in -> origin/5316-check-metrics-and-decide-if-need-to-context-cancel-the-running-git-process-in\n + bdd3c05a2...0bcf6f9d4 blanet_default_branch_opt                                                          -> origin/blanet_default_branch_opt  (forced update)\n * [new branch]          jt-object-pool-disconnect-refactor                                                 -> origin/jt-object-pool-disconnect-refactor\n + f2447981c...34e06e106 jt-replicate-repository-alternates                                                 -> origin/jt-replicate-repository-alternates  (forced update)\n * [new branch]          kn-logrus-update                                                                   -> origin/kn-logrus-update\n + 05cea76f3...258543674 kn-smarthttp-docs                                                                  -> origin/kn-smarthttp-docs  (forced update)\n * [new branch]          pks-git-pseudorevision-validation                                                  -> origin/pks-git-pseudorevision-validation\n + 2e8d0ccd5...bf4ed8a52 pks-storage-repository                                                             -> origin/pks-storage-repository  (forced update)\n * [new branch]          qmnguyen0711/expose-another-port-for-pack-rpcs                                     -> origin/qmnguyen0711/expose-another-port-for-pack-rpcs\n + 82473046f...8e23e474c use_head_reference\n```\n\nThe problem with this output is that it's not meant for machines to parse.\n\nBut why would it be useful to make this output parseable by machines? To understand\nthis, we need to back up a little bit and talk about Gitaly Cluster. [Gitaly Cluster](https://docs.gitlab.com/ee/administration/gitaly/#gitaly-cluster)\nis a service at GitLab that provides high availability of Git repositories by\nreplicating repository writes to replica nodes. Each time a write comes in which\nchanges a Git repository (for example, a push that updates a reference) the write goes to\nthe primary node, and to all replica nodes before the write can succeed. A\nvoting mechanism takes place where the nodes vote on what its updated\nvalue for the reference would be. This vote succeeds when a quorum of replica\nnodes have successfully written the ref, and the write succeeds.\n\nOne of our remote procedure calls (RPCs) in Gitaly runs `git-fetch(1)` for repository mirroring. By\ndefault, when `git-fetch(1)` is run, it will update any references that are able\nto be fast-forwarded and fail on any reference that has since diverged will not\nbe updated.\n\nAs mentioned above, whenever there is an operation that modifies a repository, there\nis a voting mechanism that ensures the same modification is made to all replica nodes.\nTo dive in even a little deeper, our voting mechanism leverages Git's reference transaction hook,\nwhich runs an executable once per reference transaction. `git-fetch(1)` by default will\nstart a reference transaction per reference it updates. A fetch that updates hundreds or\neven thousand of references would thus vote once per reference that gets updated.\n\nIn the following sequence diagram, we are only showing one Gitaly node, but for a Gitaly Cluster\nwith, let's say, three nodes, what happens with the Gitaly primary also happens in\nthe replicas.\n\n```mermaid\nsequenceDiagram\n    participant user\n    participant GitlabUI as Gitlab UI\n    participant p as Praefect\n    participant g0 as Gitaly (primary)\n    participant git as Git\n    user->>GitlabUI: mirror my repository\n    GitlabUI->>p: FetchRemote\n    activate p\n    p->>g0: FetchRemote\n    activate g0\n    g0->>git: fetch-remote\n    activate git\n    git->>g0: vote on refs/heads/branch1 update\n    g0->>p: vote on refs/heads/branch1 update\n    git->>g0: vote on refs/heads/branch2 update\n    g0->>p: vote on refs/heads/branch2 update\n    git->>g0: vote on refs/heads/branch3 update\n    g0->>p: vote on refs/heads/branch3 update\n    deactivate git\n    note over p: vote succeeds\n    p->>GitlabUI: success\n    deactivate g0\n    deactivate p\n\n```\n\nThis is inefficient. Ideally we would want to vote once per batch of references\nupdated from one `git-fetch(1)` call. There is an option `--atomic` in\n`git-fetch(1)` that will open one reference transaction for all references\nupdated by `git-fetch(1)`. However, when `--atomic` is used, a `git-fetch` call will fail if any references have since diverged. This is not how we want repository mirroring to work. We actually want `git-fetch` to update whichever refs it can.\n\nSo, that means we cannot use the `--atomic` flag and are thus stuck voting per reference we update.\n\n### Solution: Handle the reference update ourselves\nThe way we are solving this inefficiency is to handle the reference update\nourselves. Instead of relying on `git-fetch(1)` to both fetch the objects and\nupdate all the references, we can use the `--dry-run` option of `git-fetch(1)`\nto first fetch the objects into a quarantine directory. Then if we can know\nwhich references *would* be updated, we can start a reference transaction\nourselves with `git-update-ref(1)` and update all the refs in one transaction,\nhence triggering a single vote only.\n\n```mermaid\n\nsequenceDiagram\n    participant user\n    participant Gitlab UI\n    participant p as Praefect\n    participant g0 as Gitaly (primary)\n    participant git as Git\n    user->>Gitlab UI: mirror my repository\n    Gitlab UI->>p: FetchRemote\n    activate p\n    p->>g0: FetchRemote\n    g0->>git: fetch-remote --dry-run --porcelain\n    activate git\n    note over git: objects are fetched into a quarantine directory\n    git->>g0: branch1, branch2, branch3 will be updated\n    deactivate git\n    g0->>git: update-ref\n    activate git\n    note over git: update branch1, branch2, branch3 in a single transaction\n    git->>g0: reference transaction hook\n    deactivate git\n    g0->>p: vote on ref updates\n    note over p: vote succeeds\n    p->>Gitlab UI: success\n    deactivate p\n\n```\n\nA requirement for this however, is that we would be able to parse the output of\n`git-fetch(1)` to tell which refs will be updated and to what values. Currently\nin `--dry-run`, `git-fetch(1)`'s output cannot be parsed by a machine.\n\n[Patrick Steinhardt](https://gitlab.com/pks-gitlab), Staff Backend Engineer, Gitaly, added a  `--porcelain` [option to git-fetch](https://git-scm.com/docs/git-fetch#Documentation/git-fetch.txt---porcelain)\nthat causes `git-fetch(1)` to gives its output in a machine-parseable format.\n\n```\n> git fetch --porcelain --dry-run --quiet\n* cd7ec0e2505463855d04f0a685d53af604079bdf 023a4cca58ac713090df15015a2efeadc73be522 refs/remotes/origin/master\n* 0000000000000000000000000000000000000000 b4a007671bd331f1c6f5857aa9a6ab95d500b412 refs/remotes/origin/alejguer-improve-readabiliy-geo\n  2314938437eb962dadd6a88f45d463f8ed2c7cec 3d3e36fa40e9b87b90ef31f80c63c767d0ef3638 refs/remotes/origin/ali/document-keyless-container-signing\n+ c8107330f8d5a938f6349743310db030ca5159e6 e155670196e4974659304c79e670b238192bce08 refs/remotes/origin/fc-add-failed-jobs-in-mr-part-2\n+ 9ec873de405b3c5078ad1c073711a222e7734337 eb7947e37d05460a94c988bf1f408f96228dd50d refs/remotes/origin/fc-mvc-details-page\n* 0000000000000000000000000000000000000000 36d214774f39d3c3d0569df8befd2b46d22ea94b refs/remotes/origin/group-runner-docs\n+ b357bfdec53b96e76582ac5dd64deb2d35dbe697 7b85d775b1a46ea94e0b241aa0b6aa37ae2e0b69 refs/remotes/origin/jwanjohi-add-abuse-training-data-table\n+ c9beb0b9c0b933903c12393acaa2c4447bb9035f fd13eda262c67a48495a0695659fea10b32e7e02 refs/remotes/origin/jy-permissions-blueprint\n+ 9ecf5a7fb7ca39a6a4296e569af0ddff1058a830 3341369e650c931c46d9880f3b781dc1e21c9f75 refs/remotes/origin/kassio/spike-pages-review-apps\n```\n\nThis change allows us to be much more efficient when mirroring repositories.\n\nDetails of the patch series, including discussions can be found [here](https://lore.kernel.org/git/cover.1683721293.git.ps@pks.im/).\n\n## 2. A new way to read Git attribute files\n[Git attribute](https://docs.gitlab.com/ee/user/project/git_attributes.html) is\na way to define attributes in a Git repository such as syntax highlighting. Until now, Git only read `.gitattribute` files in the wokrtree or the\n`.git/info/attributes` files. On Gitaly servers, we store repositories on disk\nas [bare\nrepositories](https://git-scm.com/docs/git-clone#Documentation/git-clone.txt---bare).\nThis means that on the server we don't keep worktrees around. To\nsupport gitattributes on GitLab then, we use a workaround whereby when the user\nchanges attributes on the default branch, we copy the contents of the blob\n`HEAD:.gitattribute` to the `info/attributes` file.\n\n\u003Cpre class=\"mermaid\">\nflowchart TD\n  A[User A] -->|edit HEAD:.gitattributes\u003Cbr/>git push| B[Gitaly]\n  B --> |copy HEAD:.gitattributes\u003Cbr/>to info/attributes| C[info/attributes file]\n  D[GitLab UI] --> |Display code with syntax highlighting| B\n  B -.->|how should I do syntax highlighting?\u003Cbr/>Read info/attributes file| C   \n\u003C/pre>\n\n### Solution: New git option to read attribute files directly\nTo get rid of this extra step of copying a blob to `info/attributes`,\nI added a new git\n[option](https://git-scm.com/docs/git#Documentation/git.txt---attr-sourcelttree-ishgt)\n`--attr-source=\u003Ctree>` whereby a caller can pass in a tree from which Git will\nread the attributes file directly. This way Git can read the attributes blob directly\nwithout a worktree and without having to copy the contents to `info/attributes` each time it changes.\n\n\u003Cpre class=\"mermaid\">\nflowchart TD\n    A[User A] -->|edit HEAD:.gitattributes\u003Cbr/>git push| B[Gitaly]\n    D[GitLab UI] --> |Display code with syntax highlighting|B\n    B --> |Directly read the HEAD:.gitattributes blob|B\n\u003C/pre>\n\nHaving this feature in Git allows us to simplify this process a lot. We no longer\nhave to manually copy over the contents to a separate file. Internally, this\nallows us to delete two RPCs, reducing complexity and improving performance.\n\nDetails of this patch series, including discussions can be found [here](https://lore.kernel.org/git/pull.1470.v6.git.git.1683346530487.gitgitgadget@gmail.com/).\n\n## 3. Bug fix in commit-graph generation numbers\nA regression for truncated commit-graph generation numbers is a bug that we have been hitting for\nspecific repositories, corrupting the commit-graph. The [commit\ngraph](https://git-scm.com/docs/commit-graph) is an important Git optimization\nthat speeds up commit graph walks. Commit graph walks happen whenever Git has to\nwalk through commit history. Any time we display commit history in the UI, for\ninstance, it  will trigger a commit graph walk. Keeping these fast is crucial to a\nsnappy browsing experience.\n\n### Solution: A patch series to fix the bug\nPatrick submitted a patch series to fix the regression for truncated commit-graph generation numbers bug \nDetails of this patch series, including discussions can be found [here](https://lore.kernel.org/git/f8a0a869e8b0882f05cac49d78f49ba3553d3c44.1679904401.git.ps@pks.im/).\n\n## 4. Fix for stale lockfiles in `git-receive-pack`\n`git-receive-pack(1)` is a Git command that handles the server-side of pushes. When `git push` is run\nagainst a GitLab server, Gitaly will handle the `ssh` or `http` request and\nspawn a `git-receive-pack(1)` process behind the scenes to handle the push.\n\n`git-receive-pack(1)` will write a lockfile when processing packfiles in order\nto prevent a race condition where a concurrent garbage-collecting process tries\nto delete the new packfile that is not yet being referenced by anything.\n\nWhen the `git-receive-pack(1)` process dies prematurely for whatever reason, this\nlockfile was being left around instead of being cleaned up. Busy repositories\nthat received many pushes a day could grow in size quickly due to the\naccumulation of these lockfiles.\n\n### Solution: A patch series to clean up unused lockfiles\nPatrick fixed this by submitting a patch series that allows `git-receive-pack(1)` to clean up its unused lockfiles. This allows GitLab to save space on its servers from having to keep useless lockfiles around.\n\nDetails of this patch series, including discussions can be found [here](https://lore.kernel.org/git/e1ee1d8026a361bc58d16bc741e2b347ada7a53e.1678431076.git.ps@pks.im/).\n\n## 5. Fixed geometric repacking with alternate object databases\n[Geometric repacking](https://git-scm.com/docs/git-repack#Documentation/git-repack.txt---geometricltfactorgt)\nis a repacking strategy where instead of packing everything into on giant pack\neach time, several packs are kept around according to a geometric progression\nbased on object size.\n\nThis is useful for large and very busy repositories so that housekeeping doesn't\nhave to pack all of its objects into a giant pack each time.\n\nUnfortunately, geometric repacking had various corner case bugs when an\nalternate object database was involved. At GitLab, we leverage the Git\nalternates mechanism to save space in the case of forks. A fork of a repository\nshares most files. Instead of keeping a second copy of all the data, when we\ncreate a fork, we can deduplicate this data by having both the source\nrepository, as well as the fork repository share objects by pointing to a third\nrepository. This means that only one copy of a blob needs to be kept around\nrather than two.\n\nGeometric repacking bugs prevented it from working in an object database that\nwas connected to an alternate object database.\n\n### Solution: A patch series\nThese bugs have been fixed via a patch series from Patrick. This\nhelps us as we improve our implementation of object pools in Gitaly.\n\nDetails of this patch series, including discussions can be found [here](https://lore.kernel.org/git/cover.1681452028.git.ps@pks.im/).\n","devsecops",[9,705,683,266],{"slug":902,"featured":6,"template":687},"contributions-to-latest-git-release","content:en-us:blog:contributions-to-latest-git-release.yml","Contributions To Latest Git Release","en-us/blog/contributions-to-latest-git-release.yml","en-us/blog/contributions-to-latest-git-release",{"_path":908,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":909,"content":915,"config":921,"_id":923,"_type":13,"title":924,"_source":15,"_file":925,"_stem":926,"_extension":18},"/en-us/blog/git-command-line-on-windows-with-git-bash",{"title":910,"description":911,"ogTitle":910,"ogDescription":911,"noIndex":6,"ogImage":912,"ogUrl":913,"ogSiteName":672,"ogType":673,"canonicalUrls":913,"schema":914},"Git command line on Windows with Git Bash","Learn about Git Bash, how it works, how to install it, and the main commands you need to know.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749660028/Blog/Hero%20Images/blog-image-template-1800x945__25_.png","https://about.gitlab.com/blog/git-command-line-on-windows-with-git-bash","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git command line on Windows with Git Bash\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"GitLab\"}],\n        \"datePublished\": \"2024-12-16\",\n      }",{"title":910,"description":911,"authors":916,"heroImage":912,"date":918,"body":919,"category":680,"tags":920},[917],"GitLab","2024-12-16","Git commands allow developers to manage different versions of code and collaborate as a team. If you're in a Windows environment, you may have heard of Git Bash, a Bash terminal emulator that includes a Windows-friendly version of Git. Discover everything you need to know about installing Git Bash in this guide.\n\n## How does Git Bash work?   \nGit Bash is an application that you can install on Windows operating systems using Git for Windows. This application acts as an emulator to use the [Git version control tool](https://about.gitlab.com/topics/version-control/what-is-git-version-control/#what-is-git) on a Bash command terminal.\n\nBash is an acronym for Bourne Again SHell. SHell refers to the command terminal application of an operating system (OS). Bourne Again SHell is actually an upgraded version of Bourne SHell (also referred to as shell sh), the command line interface for UNIX developed by Stephen Bourne in 1977.  \n\nBash is the default shell for Linux and MacOS operating systems. With Git Bash, Windows users can install Bash, run Bash commands and use Git commands.\n\n## How to install Git Bash   \n\nTo download Git Bash, it is necessary to install Git for Windows. To do this, go to the official [Git for Windows](https://gitforwindows.org/) website and click \"Download\" to install the full Git package. When the download is complete, open the .exe file and begin the installation.  \n\nTo install Git Bash on Windows, please follow these step-by-step instructions:\n\n1. Open the .exe file and click **Next**. Select the appropriate folder for the installation.  \n2. Accept the terms of use and click **Next** to start the installation.  \n3. In this step, select the components to install. The pre-selected settings are relevant, but you can change them according to your preferences. Click **Next** again.  \n4. Then, choose the editor you prefer to use with Git. The tool recognizes editors already installed on your computer.  \n5. A window is displayed with three settings of the PATH environment. Depending on your needs, choose whether Git should only be used by Git Bash or if you want to use it from other third-party software.  \n6. Finally, keep the default settings by clicking **Next** and install Git Bash by clicking **Install**.\n\n## What are Bash commands?   \nFirst of all, the `pwd` (Print Working Directory) command allows you to view the absolute path. This means that it displays the path of the folder we are in at the time of typing the command.  \n**Remember:** When you open the Git Bash terminal, you are in a folder on your computer. Usually, this is the folder with your username.  \n\nThe `ls` command gives access to the list of files present in the current folder. You can also add options to the `ls` command with a dash `-`. For example, the `-l` option after `ls` lists the contents of a folder with more information about each file.\n\nBash also has a `cd` (Change Directory) command to move around your computer. To indicate the directory you want to go to, please specify the relative or absolute path after `cd`. The relative path is the location relative to the current directory while the absolute path is its location relative to the root folder.\n\n## How to use Git Bash with GitLab   \nUsing Git Bash with [GitLab](https://about.gitlab.com/) is like using the terminal emulator with another source code management platform. In order to push and retrieve your changes from GitLab, add the URL of your GitLab remote repository with the command: `git remote add origin \u003Crepository_url>`.\n\nIf your project is private, Git Bash asks you to authenticate yourself. Enter your credentials when the terminal requests your username and password. If you're having trouble logging in, check your authorization settings directly in GitLab.\n\nThen use the basic Git commands like `git clone`, `git commit`, `git push`, `git branch`, as well as `git checkout`, to name a few. To learn more, visit our [Git Cheat Sheet](https://about.gitlab.com/images/press/git-cheat-sheet.pdf).\n\n## Git Bash FAQ   \n**Are Git Bash and GitLab compatible?**\n\nYes. Using Git Bash with GitLab is similar to working with another source code management platform. Be sure to set up GitLab as a remote repository and authenticate yourself during the initial setup.\n\n**Why use Git Bash?**\n\nGit Bash acts as a terminal emulator to use the Git and Bash commands in a Windows environment.  \n\n**What's the point of a shell?**\n\nUsing a shell allows you to automate tasks through scripts, effectively control your computer and benefit from direct access to system functions.\n\n## Read more\n- [What is Git version control?](https://about.gitlab.com/topics/version-control/what-is-git-version-control/)\n- [What's new in Git 2.47.0?](https://about.gitlab.com/blog/whats-new-in-git-2-47-0/)\n- [Git pull vs. git fetch: What's the difference?](https://about.gitlab.com/blog/git-pull-vs-git-fetch-whats-the-difference/)",[9,683],{"slug":922,"featured":6,"template":687},"git-command-line-on-windows-with-git-bash","content:en-us:blog:git-command-line-on-windows-with-git-bash.yml","Git Command Line On Windows With Git Bash","en-us/blog/git-command-line-on-windows-with-git-bash.yml","en-us/blog/git-command-line-on-windows-with-git-bash",{"_path":928,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":929,"content":935,"config":942,"_id":944,"_type":13,"title":945,"_source":15,"_file":946,"_stem":947,"_extension":18},"/en-us/blog/git-fetch-performance-2021-part-2",{"title":930,"description":931,"ogTitle":930,"ogDescription":931,"noIndex":6,"ogImage":932,"ogUrl":933,"ogSiteName":672,"ogType":673,"canonicalUrls":933,"schema":934},"Git fetch performance improvements in 2021, Part 2 ","Looking back at the server-side performance improvements we made in 2021 for Git fetch.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749663383/Blog/Hero%20Images/tanuki-bg-full.png","https://about.gitlab.com/blog/git-fetch-performance-2021-part-2","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git fetch performance improvements in 2021, Part 2 \",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Jacob Vosmaer\"}],\n        \"datePublished\": \"2022-02-07\",\n      }",{"title":930,"description":931,"authors":936,"heroImage":932,"date":938,"body":939,"category":726,"tags":940},[937],"Jacob Vosmaer","2022-02-07","\nIn [Part 1](/blog/git-fetch-performance/) of this two-part series, we looked at how much server-side Git fetch performance, especially for CI, has improved in GitLab in 2021. Now, we will discuss how we achieved this.\n\n## Recap of Part 1\n-   In December 2019, we set up custom CI fetch caching automation for\n   `gitlab-org/gitlab`, which we internally called \"the CI pre-clone\n   script\".\n-   In December 2020, we encountered some production incidents on GitLab.com,\n   which highlighted that the CI pre-clone script had become critical\n   infrastructure but, at the same time, it had not yet matured beyond\n   a custom one-off solution.\n-   Over the course of 2021, we built an alternative caching solution\n   for CI Git fetch traffic called the pack-objects cache. In Part 1,\n   we discussed a benchmark simulating CI fetch traffic which shows\n   that the pack-objects cache combined with other efficiency\n   improvements reduced GitLab server CPU consumption 9x compared to\n   the baseline of December 2020.\n\n## The pack-objects cache\n\nAs discussed in Part 1, what we realized through the\nproduction incidents in December 2020 was that the CI pre-clone script\nfor `gitlab-org/gitlab` had become a critical piece of infrastructure.\nAt the same time, it benefited only one Git repository on GitLab.com,\nand it was not very robust. It would be much better to have an\nintegrated solution that benefits all repositories. We achieved this\ngoal by building the [pack-objects cache](https://docs.gitlab.com/ee/administration/gitaly/configure_gitaly.html#pack-objects-cache).\n\nThe name \"pack-objects cache\" refers to `git pack-objects`, which is\nthe Git [subcommand](https://git-scm.com/docs/git-pack-objects) that\nimplements the [packfile](https://git-scm.com/book/en/v2/Git-Internals-Packfiles) compression algorithm. As this [Git commit message from Jeff King](https://gitlab.com/gitlab-org/gitlab-git/-/commit/20b20a22f8f7c1420e259c97ef790cb93091f475) explains, `git pack-objects` is a good candidate for a CI fetch cache.\n\n> You may want to insert a caching layer around\n> pack-objects; it is the most CPU- and memory-intensive\n> part of serving a fetch, and its output is a pure\n> function of its input, making it an ideal place to\n> consolidate identical requests.\n\nThe pack-objects cache is GitLab's take on this \"caching layer\". It\ndeduplicates identical Git fetch requests that arrive within a short\ntime window.\n\nAt a high level, when serving a fetch, we buffer the output of `git\npack-objects` into a temporary file. If an identical request comes in,\nwe serve it from the buffer file instead of creating a new `git\npack-objects` process. After 5 minutes, we delete the buffer file. If\nyou want to know more about how exactly the cache is implemented, you\ncan look at the implementation\n([1](https://gitlab.com/gitlab-org/gitaly/-/blob/v14.6.3/internal/gitaly/service/hook/pack_objects.go),\n[2](https://gitlab.com/gitlab-org/gitaly/-/tree/v14.6.3/internal/streamcache)).\n\n![Architecture diagram](https://about.gitlab.com/images/blogimages/git-fetch-2021/pack-objects-cache-architecture.jpg)\n\nBecause the amount of space used by the cache files is bounded roughly\nby the eviction window (5 minutes) multiplied by the maximum network bandwidth\nof the Gitaly server, we don't have to worry about the cache using a\nlot of storage. In fact, on GitLab.com, we store the cache files on the\nsame disks that hold the repository data. We leave a safety margin of\nfree space on these disks at all times anyway, and the cache fits in\nthat safety margin comfortably.\n\nSimilarly, we also don't notice the increase disk input/output\noperations per second (IOPS) used by the cache on GitLab.com. There\nare two reasons for this. First of all, whenever we _read_ data from\nthe cache, it is usually still in the Linux page cache, so it gets\nserved from RAM. The cache barely does any disk read I/O operations.\nSecond, although the cache does do _write_ operations, these fit\ncomfortably within the maximum sustained IOPS rate supported by the\nGoogle Compute Engine persistent disks we use.\n\nThis leads us to a disadvantage of the pack-objects cache, which is\nthat it really does write a lot of data to disk. On GitLab.com, we saw\nthe disk write throughput jump up by an order of magnitude. You can\nsee this in the graph below, which shows disk writes for a single\nGitaly server with a busy, large repository on it: (the GitLab [company\nwebsite](https://gitlab.com/gitlab-com/www-gitlab-com)). You can\nclearly see the number of bytes written to disk per second jump up when we\nturned the cache on.\n\n![increased disk writes with cache enabled](https://about.gitlab.com/images/blogimages/git-fetch-2021/cache-disk-writes.jpg)\n\nThis increase in disk writes is not a problem for our infrastructure because we have the\nspare capacity, but we were not sure we could assume the same for all\nother GitLab installations in the world. Because of this, we decided\nto leave the pack-objects cache off by default.\n\nThis was a difficult decision because we think almost all GitLab\ninstallations would benefit from having this cache enabled. One of the\nreasons we are writing this blog post is to raise awareness that this\nfeature is available, so that self-managed GitLab administrators can\nopt in to using it.\n\nAgain, on the positive side, the cache did not introduce a new\npoint of failure on GitLab.com. If the `gitaly` service is running,\nand if the repository storage disk is available, then the cache is\navailable. There are no external dependencies. And if `gitaly` is not\nrunning, or the repository storage disk is unavailable, then the whole\nGitaly server is unavailable anyway.\n\nAnd finally, cache capacity grows naturally with the number of Gitaly\nservers. Because the cache is completely local to each Gitaly server,\nwe do not have to worry about whether the cache keeps working as we\ngrow GitLab.com.\n\nThe pack-objects cache was introduced in GitLab 13.11. In GitLab 14.5,\nwe made it a lot more efficient by optimizing its transport using Unix\nsockets\n([1](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/3758),\n[2](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/3759)). If\nyou want to [try out the pack-objects cache](https://docs.gitlab.com/ee/administration/gitaly/configure_gitaly.html#pack-objects-cache) on\nyour self-managed GitLab instance, we recommend that you upgrade to\nGitLab 14.5 or newer first.\n\n## Improved RPC transport for Git HTTP\n\nAfter we built the pack-objects cache, we were able to generate a much\nhigher volume of Git fetch responses on a single Gitaly server.\nHowever, we then found out that the RPC transport between the HTTP\nfront-end (GitLab Workhorse) and the Gitaly server became a\nbottleneck. We tried disabling the CI pre-clone script of\n`gitlab-org/gitlab` in April 2021 but we quickly had to turn it back\non because the increased volume of Git fetch data transfer was slowing\ndown the rest of Gitaly.\n\nThe fetch traffic was acting as a noisy neighbor to all the other\ntraffic on `gitlab-org/gitlab`. For each GitLab.com Gitaly server, we\nhave a request latency\n[SLI](https://sre.google/sre-book/service-level-objectives/). This is\na metric that observes request latencies for a selection of RPCs that\nwe expect to be fast, and it tracks how many requests for these RPCs\nare \"fast enough\". If the percentage of fast-enough requests drops\nbelow a certain threshold, we know we have a problem.\n\nWhen we disabled the pre-clone script, the network traffic to the\nGitaly server hosting `gitlab-org/gitlab` went up, as expected. What\nwent wrong was that the percentage of fast-enough requests started to\ndrop. This was not because the server had to serve up more data: The\nRPCs that serve the Git fetch data do not count towards the latency\nSLI.\n\nBelow you see two graphs from the day we tried disabling the CI\npre-clone script. First, see how the network traffic off of the Gitaly\nserver increased once we disabled the CI pre-clone script. This is\nbecause instead of pulling most of the data from object storage, and\nonly some of the data from Gitaly, the CI runners now started pulling\nall of the Git data they needed from Gitaly.\n\n![network peaks](https://about.gitlab.com/images/blogimages/git-fetch-2021/no-script-network-annotated.png)\n\nNow consider our Gitaly request latency SLI for this particular\nserver. For historical reasons, we call this \"Apdex\" in our dashboards.\nRecall that this SLI tracks the percentage of fast-enough requests from\na selection of Gitaly RPCs. The ideal number would be 100%. In the\ntime window where the CI pre-clone script was disabled, this graph\nspent more time below 99%, and it even dipped below 96% several times.\n\n![latency drops](https://about.gitlab.com/images/blogimages/git-fetch-2021/no-script-latency-annotated.png)\n\nEven though we could not explain what was going on, the latency SLI dips\nwere clear evidence that disabling the CI pre-clone script slowed down\nunrelated requests to this Gitaly server, to a point which is\nunacceptable. This was a setback for our plan to replace the CI pre-clone script.\n\nBecause we did not want to just give up, we set aside some time to try\nand understand what the bottleneck was, and if it could be\ncircumvented. The bad news is that we did not come up with a\nsatisfactory answer about what the bottleneck is. But the good news is\nthat we were able to circumvent it.\n\nBy building a simplified [prototype alternate RPC\ntransport](https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/1046),\nwe were able to find out that with the pack-objects cache, the\nhardware we run on and Git itself were able to serve up much more\ntraffic than we were able to get out of GitLab. We [never got to the\nbottom](https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/1024)\nof what was causing all the overhead but a likely suspect is the fact\nthat gRPC-Go allocates memory for each message it sends, and with Git\nfetch traffic we send a lot of messages. Gitaly was spending a lot of\ntime doing garbage collection.\n\nWe then had to decide how to improve the situation. Because we were\nuncertain if we could fix the apparent bottleneck in gRPC, and because\nwe were certain that we could go faster by not sending the Git fetch data\nthrough gRPC in the first place, we chose to do the latter. We created\nmodified versions of the RPCs that carry the bulk of the Git fetch\ndata. On the surface, the new versions are still gRPC methods. But\nduring a call, each will establish a side channel, and use that for\nthe bulk data transfer.\n\n![side channel diagram](https://about.gitlab.com/images/blogimages/git-fetch-2021/sidechannel.png)\n\nThis way we avoided making major changes to the structure of Gitaly:\nit is still a gRPC server application. Logging, metrics,\nauthentication, and other middleware work as normal on the optimized\nRPCs. But most of the data transfer happens on either Unix sockets (for localhost RPC calls) or [Yamux streams](https://github.com/hashicorp/yamux/) (for the regular RPC calls).\n\nBecause we have 6x more Git HTTP traffic than Git SSH traffic on\nGitLab.com, we decided to initially only optimize the transport for\nGit HTTP traffic. We are still working on [doing the same for Git\nSSH](https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/652) because, even though Git HTTP efficiency is more important for\nGitLab.com than that of Git SSH, we know that for some self-managed\nGitLab instances it is the other way around.\n\nThe new server-side RPC transport for Git HTTP was released in GitLab\n14.5. There is no configuration required for this improved transport.\nRegardless of whether you use the pack-objects cache on your GitLab\ninstance, Gitaly, Workhorse, and Praefect all use less CPU to handle\nGit HTTP fetch requests now.\n\nThe payoff for this work came in October 2021 when we disabled the CI\npre-clone script for `gitlab-org/gitlab`, which did not cause any\nnoisy neighbor problems this time. We have had no issues since then\nserving the Git fetch traffic for that project.\n\n## Improvements to Git itself\n\nAside from the pack-objects cache and the new RPC transport between\nWorkhorse and Gitaly, we also saw some improvements because of changes\nin Git itself. We discovered a few inefficiencies which we\nreported to the Git mailing list and helped get fixed.\n\nOur main repository `gitlab-org/gitlab` has hundreds of thousands of [Git\nreferences](https://git-scm.com/book/en/v2/Git-Internals-Git-References). Looking at CPU profiles, we [noticed](https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/400) that a lot of Git\nfetch time was spent on the server iterating over these references.\nThese references were not even being sent back to the client; Git was\njust scanning through all of them on the server twice for each CI job.\n\nIn both cases, the problem could be fixed by doing a scan over a\nsubset instead of a scan across all references. These two problems got fixed\n([1](https://gitlab.com/gitlab-org/gitlab-git/-/commit/b3970c702cb0acc0551d88a5f34ad4ad2e2a6d39), [2](https://gitlab.com/gitlab-org/gitlab-git/-/commit/be18153b975844f8792b03e337f1a4c86fe87531)) in Git 2.31.0, released in March 2021.\n\nLater on, we found a different problem, also in the reference-related\nworkload of Git fetch. As part of the fetch protocol, the server sends\na list of references to the client so that the client can update its\nlocal branches etc. It turned out that for each reference, Git was\ndoing 1 or 2 `write` system calls on the server. This led to [a lot of\noverhead](https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/1257), and this was made worse by our old RPC transport which could\nend up sending 1 RPC message per advertised Git reference.\n\nThis problem got fixed in Git itself by changing the functions that\nwrite the references to [use buffered\nIO](https://gitlab.com/gitlab-org/gitlab-git/-/commit/70afef5cdf29b5159f18df1b93722055f78740f8).\nThis change landed in Git 2.34.0, released in November 2021. Ahead of\nthat, it got shipped in GitLab 14.4 as a custom Git patch.\n\nFinally, we discovered that increasing the copy buffer size used by\n`git upload-pack` to relay `git pack-objects` output made both `git\nupload-pack` and [every link in the chain after\nit](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/4224) more\nefficient. This got fixed in Git by [increasing the buffer\nsize](https://gitlab.com/gitlab-org/gitlab-git/-/commit/55a9651d26a6b88c68445e7d6c9f511d1207cbd8).\nThis change is part of Git 2.35.0 and is included in GitLab 14.7, both\nof which were released in January 2022.\n\n## Summary\n\nIn Part 1, we showed that GitLab server performance when service CI Git fetch traffic has improved a lot in 2021. In this post, we explained that the improvements are due to:\n\n- The pack-objects cache\n- A more efficient Git data transport between server-side GitLab components\n- Efficiency improvements in Git itself\n\n## Thanks\n\nMany people have contributed to the work described in this blog post.\nI would like to specifically thank Quang-Minh Nguyen and Sean McGivern\nfrom the Scalability team, and Patrick Steinhardt and Sami Hiltunen\nfrom the Gitaly team.\n\n## Related content\n\n- Improvements to the client-side performance of `git fetch` (although GitLab is a server application, it sometimes acts as a Git client): [mirror fetches](https://gitlab.com/gitlab-org/git/-/issues/95), [fetches into repositories with many references](https://gitlab.com/gitlab-org/git/-/issues/94)\n- Improvements to server-side Git push performance: [consistency check improvements](https://gitlab.com/gitlab-org/git/-/issues/92)\n",[9,941,684],"production",{"slug":943,"featured":6,"template":687},"git-fetch-performance-2021-part-2","content:en-us:blog:git-fetch-performance-2021-part-2.yml","Git Fetch Performance 2021 Part 2","en-us/blog/git-fetch-performance-2021-part-2.yml","en-us/blog/git-fetch-performance-2021-part-2",{"_path":949,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":950,"content":956,"config":961,"_id":963,"_type":13,"title":964,"_source":15,"_file":965,"_stem":966,"_extension":18},"/en-us/blog/git-fetch-performance",{"title":951,"description":952,"ogTitle":951,"ogDescription":952,"noIndex":6,"ogImage":953,"ogUrl":954,"ogSiteName":672,"ogType":673,"canonicalUrls":954,"schema":955},"How we made Git fetch performance improvements in 2021, part 1","Our Scalability team tackled a server CPU utilization issue. Here's the first part of a detailed look at performance improvements we made for Git fetch.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749663397/Blog/Hero%20Images/logoforblogpost.jpg","https://about.gitlab.com/blog/git-fetch-performance","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How we made Git fetch performance improvements in 2021, part 1\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Jacob Vosmaer\"}],\n        \"datePublished\": \"2022-01-20\",\n      }",{"title":951,"description":952,"authors":957,"heroImage":953,"date":958,"body":959,"category":726,"tags":960},[937],"2022-01-20","\nIn this post we look back on a series of projects from the Scalability\nteam that improved GitLab server-side efficiency for serving Git fetch\ntraffic. In the benchmark described below we saw a 9x reduction in\nGitLab server CPU utilization. Most of the performance comes from the\nGitaly pack-objects cache, which has proven very effective at reducing\nthe Gitaly server load caused by highly concurrent CI pipelines.\n\nThese changes are not user-visible but they benefit the stability and\navailability of GitLab.com. If you manage a GitLab instance\nyourself you may want to [enable the pack-objects\ncache](https://docs.gitlab.com/ee/administration/gitaly/configure_gitaly.html#pack-objects-cache)\non your instance too.\n\nWe discuss how we achieved these improvements in [part 2](/blog/git-fetch-performance-2021-part-2/).\n\n## Background\n\nWithin the GitLab application, Gitaly is the component that acts as a\nremote procedure call (RPC) server for Git repositories. On\nGitLab.com, repositories are stored on persistent disks attached to\ndedicated Gitaly servers, and the rest of the application accesses\nrepositories by making RPC calls to Gitaly.\n\nIn 2020 we encountered several incidents on GitLab.com caused by the fact that\nour Gitaly server infrastructure [could not\nhandle](https://gitlab.com/gitlab-com/gl-infra/production/-/issues/3013)\nthe Git fetch traffic generated by CI on our own main repository,\n[`gitlab-org/gitlab`](https://gitlab.com/gitlab-org/gitlab). The only reason the situation at the time worked\nwas because we had a custom CI caching solution for\n`gitlab-org/gitlab` only, commonly referred to as the \"CI pre-clone\nscript\".\n\n### The CI pre-clone script\n\nThe CI pre-clone script was an implementation of the [clone bundle CI\nfetching\nstrategy](https://www.kernel.org/best-way-to-do-linux-clones-for-your-ci.html).\nWe had originally set up the CI pre-clone script one year earlier, in\n[December 2019](https://gitlab.com/gitlab-org/gitlab/-/issues/39134).\nIt consisted of two parts.\n\n1.   A CI cron job that would clone `gitlab-org/gitlab`, pack up the\n   result into a tarball, and upload it to a known Google Cloud\n   Storage bucket.\n1.   A shell script snippet, stored in the `gitlab-org/gitlab` project settings, that was\n   injected into each `gitlab-org/gitlab` CI job. This shell script\n   would download and extract the latest tarball from the known URL.\n   After that the CI job did an incremental Git fetch, relative to the\n   tarball contents, to retrieve the actual CI pipeline commit.\n\nThis system was very effective. Our CI pipelines run against shallow\nGit clones of `gitlab-org/gitlab`, which require over 100MB of data to\nbe transfered per CI job. Because of the CI pre-clone script, the\namount of Git data per job was closer to 1MB. The rest of the data was\nalready there because of the tarball. The amount of repository data\ndownloaded by each CI job stayed the same, but only 1% of this data\nhad to come from a Gitaly server. This saved a lot of computation and\nbandwidth on the Gitaly server hosting `gitlab-org/gitlab`.\n\nAlthough this solution worked well, it had a number of downsides.\n\n1.   It was not part of the application and required per-project manual\n   set-up and maintenance.\n1.   It did not work for forks of `gitlab-org/gitlab`.\n1.   It had to be maintained in two places: the project that created the\n   tarball and the project settings of `gitlab-org/gitlab`.\n1.   We had no version control for the download script; this was just\n   text stored in the project's CI settings.\n1.   The download script was fragile. We had one case where we added an\n   `exit` statement in the wrong place, and all `gitlab-org/gitlab`\n   builds started silently using stale checkouts left behind by other\n   pipelines.\n1.   In case of a Google Cloud Storage outage, the full uncached traffic\n   would saturate the Gitaly server hosting `gitlab-org/gitlab`. Such\n   outages are rare but they do happen.\n1.   A user who would want to copy our solution would have to set up\n   their own Google Cloud Storage bucket and pay the bills for it.\n\nThe biggest issue really was that one year on, the CI pre-clone script\nhad not evolved from a custom one-off solution into an easy to use\nfeature for everyone.\n\nWe solved this problem by building the pack-objects cache, which we\nwill describe in more detail in the next blog post. Unlike the CI pre-clone script,\nwhich was a separate component, the pack-objects cache sits inside\nGitaly. It is always on, for all repositories and all users on\nGitLab.com. If you run your own GitLab server you can also use the\npack-objects cache, but you do have to [turn it on\nfirst](https://docs.gitlab.com/ee/administration/gitaly/configure_gitaly.html#pack-objects-cache).\n\n## Performance comparison\n\nTo illustrate what has changed we have created a benchmark. We set up a GitLab\nserver with a clone of `gitlab-org/gitlab` on it, and we configured a\nclient machine to perform 20 simultaneous shallow clones of the same commit using Git HTTP.[^ssh] This\nsimulates having a CI pipeline with 20 parallel jobs. The pack data is\nabout 87MB so in terms of bandwidth, we are transferring `20 * 87 =\n1740MB` of data.\n\n[^ssh]: As of GitLab 14.6, Git HTTP is 3x more CPU-efficient on the server than Git SSH. We are working on [improving the efficiency of Git SSH in GitLab](https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/652). We prioritized optimizing Git HTTP because that is what GitLab CI uses.\n\nWe did this experiment with two GitLab servers. Both were Google\nCompute Engine `c2-standard-8` virtual machines with 8 CPU cores and\n32GB RAM. The operating system was Ubuntu 20.04 and we installed\nGitLab using our Omnibus packages.\n\n### Before\n\n- GitLab FOSS 13.7.9 (released December 2020)\n- Default Omnibus configuration\n\nThe 30-second [Perf flamegraph](https://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html) below was captured at 99Hz across all CPU's.\n\n![Flamegraph of GitLab 13.7 performance](https://about.gitlab.com/images/blogimages/git-fetch-2021/before.jpg)\n\nSource: [SVG](/images/blogimages/git-fetch-2021/before.svg)\n\n### After\n\n- GitLab FOSS 14.6.1 (released December 2021)\n- One extra setting in `/etc/gitlab/gitlab.rb`:\n\n```ruby\ngitaly['pack_objects_cache_enabled'] = true\n```\n\n![Flamegraph of GitLab 14.6 performance with\ncache](https://about.gitlab.com/images/blogimages/git-fetch-2021/after.jpg)\n\nSource: [SVG](/images/blogimages/git-fetch-2021/after.svg)\n\n### Analysis\n\nServer CPU profile distribution:\n\n|Value|Before|After\n|---|---|---|\n|Benchmark run time|27s|7.5s|\n|`git` profile samples|18 552|923|\n|`gitaly` samples (Git RPC server process)|1 247|331|\n|`gitaly-hooks` samples (pack-objects cache client)||258|\n|`gitlab-workhorse` samples (application HTTP frontend)|1 057|237|\n|`nginx` samples (main HTTP frontend)|474|251|\n|Total CPU busy samples|21 720|2 328|\n|CPU utilization during benchmark|100%|40%|\n\n### Conclusion\n\nCompared to GitLab 13.6 (December 2020), GitLab 14.6 (December 2021) plus the\npack-objects cache makes the CI fetch benchmark in this post run 3.6x faster.\nAverage server CPU utilization during the benchmark dropped from 100%\nto 40%.\n\nStay tuned for part 2 of this blog post, in which we will go over the\nchanges we made to make this happen.\n\n## Related content\n\n- [Gitaly pack-objects cache documentation](https://docs.gitlab.com/ee/administration/gitaly/configure_gitaly.html#pack-objects-cache)\n- [Epic to improve Git SSH efficiency in GitLab](https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/652)\n",[9,941,684],{"slug":962,"featured":6,"template":687},"git-fetch-performance","content:en-us:blog:git-fetch-performance.yml","Git Fetch Performance","en-us/blog/git-fetch-performance.yml","en-us/blog/git-fetch-performance",{"_path":968,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":969,"content":975,"config":983,"_id":985,"_type":13,"title":986,"_source":15,"_file":987,"_stem":988,"_extension":18},"/en-us/blog/git-for-business-processes",{"title":970,"description":971,"ogTitle":970,"ogDescription":971,"noIndex":6,"ogImage":972,"ogUrl":973,"ogSiteName":672,"ogType":673,"canonicalUrls":973,"schema":974},"How we use Git as the blockchain for process changes","Git can be useful for more than just coding and operations. It can help you run your entire business – here's how we do it.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749679971/Blog/Hero%20Images/git-blockchain.jpg","https://about.gitlab.com/blog/git-for-business-processes","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How we use Git as the blockchain for process changes\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Aricka Flowers\"}],\n        \"datePublished\": \"2019-01-15\",\n      }",{"title":970,"description":971,"authors":976,"heroImage":972,"date":978,"body":979,"category":980,"tags":981},[977],"Aricka Flowers","2019-01-15","\n\nGit may have started out as a way to collaborate on code, but there’s no denying that it has crept into the operations side of things. But does it stop there? We don’t think so.\n\nJust like [blockchain technology](https://blockgeeks.com/guides/what-is-blockchain-technology/) was originally created for cryptocurrency, but is now seen as a revolutionary way to share, store and update [all kinds of data](https://www.fool.com/investing/2018/04/11/20-real-world-uses-for-blockchain-technology.aspx), we see – and use – Git in much the same way.\n\nIn addition to version controlling code and the environment in which it lives, Git can also be used at a high level to facilitate the way a company actually functions, according to our CEO [Sid Sijbrandij](/company/team/#sytses).\n\nHe says GitLab is a prime example of how it can be done.\n\n## How we use Git to run GitLab, the company\n\n\"We’re not just trying to version our code and operations, we're also trying to version all the processes we have at the company, and we do that for a whole slew of reasons,\" says Sid. \"If you write your processes down, it's easier to change and for someone to propose a change. If it's all stored in people's heads, how are you going to change it? You'll have to create a presentation and make sure everyone reads it. But if it’s written down, it's faster to make a change and you're better able to communicate the context for it.\"\n\n### How Git has helped us to scale\n\nUsing Git to implement procedural changes within the company has helped GitLab shoulder growing pains, thanks to our [handbook](/handbook/).\n\n\"Although we're not a perfect company by any means, we've been able to scale really rapidly, onboard people and get them started with the work they have to do,\" Sid says. \"And I think our handbook and how we describe things is an important part of that. It's exciting to see it grow. The handbook is now over 2,000 pages, so people can't read everything anymore, but they can read the parts that are relevant to them, and it's really helping with organizational changes that are happening between different departments.\"\n\nSid admits running a business with Git collaboration can seem like a daunting task, especially for companies that did not start out functioning that way. But he urges business leaders to give the process a chance, pointing to a number of companies that are adopting Git as a way to make procedural changes, including O’Reilly Media and several law firms.\n\n## Two tips for adopting Git to run your business\n\n### 1. Evangelize from the top down\n\n\"First of all, this is super hard. It's unnatural and it requires constant campaigning from the top of the company,\" Sid said. \"The natural state is for all the documentation to get out of date, and for people to send each other emails and PowerPoints about the change they want to make without looking at the rest of the changes.\"\n\n### 2. Make processes easier to change\n\n\"What you frequently find in companies is that there's the official process, and then the process that people really use. You can prevent that by making processes easier to change. The reality is people are changing processes in a company every single day, and they have to make those changes quickly. So the harder you make it, the more diversions there will be between reality and what's in the handbook. Instead, empower everyone in the organization to make those changes and do so quickly. That is one of the most important things you can do.\"\n\n\"Our handbook is [Creative Commons](https://creativecommons.org/licenses/by-sa/4.0/), so feel free to use that as a starting point for anything that you do.\" [Tweet us](http://twitter.com/gitlab) if you do borrow from or adapt our handbook – we'd love to hear about it.\n\n[Cover image](https://unsplash.com/photos/mf-o1E7omzk) by [chuttersnap](https://unsplash.com/@chuttersnap) on Unsplash\n{: .note}\n","culture",[820,9,774,683,982],"workflow",{"slug":984,"featured":6,"template":687},"git-for-business-processes","content:en-us:blog:git-for-business-processes.yml","Git For Business Processes","en-us/blog/git-for-business-processes.yml","en-us/blog/git-for-business-processes",{"_path":990,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":991,"content":997,"config":1003,"_id":1005,"_type":13,"title":1006,"_source":15,"_file":1007,"_stem":1008,"_extension":18},"/en-us/blog/git-happens",{"title":992,"description":993,"ogTitle":992,"ogDescription":993,"noIndex":6,"ogImage":994,"ogUrl":995,"ogSiteName":672,"ogType":673,"canonicalUrls":995,"schema":996},"Git happens! 6 Common Git mistakes and how to fix them","Whether you added the wrong file, committed directly to master, or some other mishap, we've got you covered.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678743/Blog/Hero%20Images/fix-common-git-mistakes.jpg","https://about.gitlab.com/blog/git-happens","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git happens! 6 Common Git mistakes and how to fix them\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Sam Beckham\"}],\n        \"datePublished\": \"2018-08-08\",\n      }",{"title":992,"description":993,"authors":998,"heroImage":994,"date":1000,"body":1001,"category":726,"tags":1002},[999],"Sam Beckham","2018-08-08","\nWe all make mistakes, especially when working with something as complex as Git. But remember, Git happens!\n\n## What is Git?\n\nGit is free and open-source software for distributed code management and version control. It is distributed under the GNU General Public License Version 2. Git tracks changes in any set of files and is usually used for coordinating work among programmers collaboratively developing source code during software development. \n\nGit was created and released in 2005 by Linus Torvalds, who also developed Linux. The impetus for Git (which is an altering of the word “get”) was to generate an open-source version control system that performed better for the requirements of Linux kernel development. Available open-source systems at the time were not able to meet the [large-scale collaborative performance effort](https://www.techtarget.com/searchitoperations/definition/Git) required.\n\n## Benefits of using Git\n\nBesides delivering superior performance, Git also provides support for a distributed workflow and safeguards against corruption. There are several other benefits, such as:\n\n- superior performance when it comes to version control systems\n- the ability for simultaneous development because everyone has their own local copy of code and can work on it in tandem. \n- faster releases\n- security\n- flexibility\n- built-in integration \n- strong community support\n\nIf you're brand-new to Git, you can learn [how to start using Git on the command line](https://docs.gitlab.com/ee/gitlab-basics/start-using-git.html). Here's how we can fix six of the most common Git mistakes.\n\n## 1. Oops... I spelled that last commit message wrong\n\nAfter a good few hours of [coding](/solutions/source-code-management/), it's easy for a spelling error to sneak into your commit messages.\nLuckily, there's a simple fix.\n\n```bash\ngit commit --amend\n```\n\nThis will open up your editor and allow you to make a change to that last commit message.\nNo one needs to know you spelled, \"addded\" with three \"d\"s.\n\n## 2. Oops... I forgot to add a file to that last commit\n\nAnother common Git pitfall is committing too early. You missed a file, forgot to save it, or\nneed to make a minor change for the last commit to make sense. `--amend` is your friend\nonce again.\n\nAdd that missed file then run that trusty command.\n\n```bash\ngit add missed-file.txt\ngit commit --amend\n```\n\nAt this point, you can either amend the commit message or just save it to keep it the same.\n\n## 3. Oops... I added a file I didn't want in the repo\n\nBut what if you do the exact opposite? What if you added a file that you didn't want to commit?\nA rogue ENV file, a build directory, a picture of your cat that you accidentally saved to the wrong folder?\nIt's all fixable.\n\nIf all you did was stage the file and you haven't committed it yet, it's as simple as resetting that staged file:\n\n```bash\ngit reset /assets/img/misty-and-pepper.jpg\n```\n\nIf you've gone as far as committing that change, you need to run an extra step before:\n\n```bash\ngit reset --soft HEAD~1\ngit reset /assets/img/misty-and-pepper.jpg\nrm /assets/img/misty-and-pepper.jpg\ngit commit\n```\n\nThis will undo the commit, remove the image, then add a new commit in its place.\n\n## 4. Oops... I committed all those changes to the master branch\n\nSo you're working on a new feature and in your haste, you forgot to open a new branch for it.\nYou've already committed a load of files and now them commits are all sitting on the master branch.\nLuckily, [GitLab can prevent you from pushing directly to master](/blog/keeping-your-code-protected/).\nSo we can roll back all these changes to a new branch with the following three commands:\n\n*Note: Make sure you commit or stash your changes first, or all will be lost!*\n\n```bash\ngit branch future-brunch\ngit reset HEAD~ --hard\ngit checkout future-brunch\n```\n\nThis creates a new branch, then rolls back the master branch to where it was before you made\n changes, before finally checking out your new branch with all your previous changes intact.\n\n## 5. Oops... I made a spelling mistake in my branch name\n\nThe keen-eyed among you will notice a slight spelling error in my last example. It's almost\n3:00 PM and I haven't had lunch yet, so in my hunger, I've named our new branch `future-brunch`.\nDelicious.\n\nWe rename this branch in a similar way to how we rename a file with the `mv` command: by\n moving it to a new location with the correct name.\n\n```bash\ngit branch -m future-brunch feature-branch\n```\n\nIf you've already pushed this branch, there are a couple of extra steps required. We need to\ndelete the old branch from the remote and push up the new one:\n\n```bash\ngit push origin --delete future-brunch\ngit push origin feature-branch\n```\n\n## 6. Oops... I did it again\n\nThis command is for when everything has gone wrong. When you've copy-pasted one too\nmany solutions from Stack Overflow and your repo is in a worse state than it was when you started.\nWe've all been there.\n\n`git reflog` shows you a list of all the things you've done.\nIt then allows you to use Git's magical time-traveling skills to go back to any point in the past.\nI should note, this is a last resort thing and should not be used lightly.\nTo get this list, type:\n\n```bash\ngit reflog\n```\n\nEvery step we took, every move we made, Git was watching us.\nRunning that on our project gives us this:\n\n```bash\n3ff8691 (HEAD -> feature-branch) HEAD@{0}: Branch: renamed refs/heads/future-brunch to refs/heads/feature-branch\n3ff8691 (HEAD -> feature-branch) HEAD@{2}: checkout: moving from master to future-brunch\n2b7e508 (master) HEAD@{3}: reset: moving to HEAD~\n3ff8691 (HEAD -> feature-branch) HEAD@{4}: commit: Adds the client logo\n2b7e508 (master) HEAD@{5}: reset: moving to HEAD~1\n37a632d HEAD@{6}: commit: Adds the client logo to the project\n2b7e508 (master) HEAD@{7}: reset: moving to HEAD\n2b7e508 (master) HEAD@{8}: commit (amend): Added contributing info to the site\ndfa27a2 HEAD@{9}: reset: moving to HEAD\ndfa27a2 HEAD@{10}: commit (amend): Added contributing info to the site\n700d0b5 HEAD@{11}: commit: Addded contributing info to the site\nefba795 HEAD@{12}: commit (initial): Initial commit\n```\n\nTake note of the left-most column, as this is the index.\nIf you want to go back to any point in the history, run the below command, replacing `{index}` with that reference, e.g. `dfa27a2`.\n\n```bash\ngit reset HEAD@{index}\n```\n\nSo there you have six ways to get out of the most common Gitfalls.\n\n## More common Git problems \n\nThere are a number of tips for fixing common git problems. For starters, here are a couple of common ones: to indicate the end of command options for command line utilities, try using the double dash (--). If you want to undo a change, use git reset.\n\n- If you have a commit that is only in your local repository, you can amend it with the git commit — amend command.\n- Sometimes, you might find yourself adding files that you didn’t mean to commit. Git rm will remove it from both your staging area, as well as your file system. However, if that’s not the solution you were looking for, make sure you only remove the staged version and add the file to your .gitignore so you don’t make the same mistake again. \n- To fix a typo in a commit message or to add a file, use: git - amend.\n- If you want to remove files from staging before committing, use [“git restore”](https://medium.com/@basitalkaff/common-git-problems-and-how-to-fix-them-878ef750a015) to reset the pointer back to the last commit ID.\n- If you have a change of heart and want to remove changes from a commit before pushing and reverting back, use “git reset \u003Cspecific commit ID we want to go back>.”\n- Faulty commits sometimes make their way into the central repository. When that happens, instead of creating additional revert commits, just apply the necessary changes and use the --no-commit/-n option.\nInstead of having to reinvent the wheel, use the reuse recorded resolution feature to fix repetitive merge conflicts. Add \"git config --global rerere.enabled true\" to your global config to enable it for all projects. \n\nIf you prefer, you can manually create the directory: \n\n.git/rr-cache to enable it for each project.\n\n## How to prevent problems with your git repository\n\nIt’s important to consider git repository security for web projects. Why? When you deploy a [web page from a git repository](https://www.techtarget.com/searchsecurity/answer/How-can-developers-avoid-a-Git-repository-security-risk), you could also make the directory and its contents accessible. This gives an attacker the ability to access the metadata from URLs such as https://example.org/git/config.\n\nIf a git repository is checked out using HTTP authentication where the username and password to access the repository are incorporated as part of the URL, that can create an especially unsafe situation. Because this information is stored in the .git/config file, an attacker has direct access to credentials for the repository.\n\nTo avoid these risks and improve the security of a git repository, developers should refrain from using direct git checkouts on web deployments. Instead, they should copy files to the web root directory without the .git directory metadata. Alternatively, access to the .git directory can be bypassed in the server configuration. It's also a good idea to avoid storing passwords and secret tokens right in repositories.\n\nSome suggestions to [stop git repositories from getting too big](https://stackoverflow.com/questions/58679210/how-to-stop-git-repositories-from-getting-too-big): avoid cluttering the repository with large numbers of files, don’t include binary or office files that require huge commits in the number of lines edited, and from time to time, use commands like\ngit reflog expire --all --expire=now git gc --prune=now --aggressive.\n\nHere is an approach for [fixing a corrupted git repository](https://stackoverflow.com/questions/18678853/how-can-i-fix-a-corrupted-git-repository).\n\n## Some common git commands\n\nThere are hundreds of git commands programmers can use to change and track projects. Some of the [more common ones](https://shortcut.com/blog/common-git-commands-that-you-should-memorize#:~:text=13%20common%20Git%20commands%20that%20you%20should%20consider,compare%20unstaged%20files%20before%20committing%20...%20More%20items) are:\n\n**Create a new repository for storing code/making changes:** \n\nA new project requires a repository where your code is stored and changes can be made.\nCommand:\n\ngit init\n\nOr change a current directory into a Git repo using:\n\ngit init \u003Cdirectory>\n\n**Configure local and global values:**\n\nCommand:\n\ngit config --global user.email \u003Cyour-email> or git config -\n\n**Use cloning to get source code from your remote repo**\n\nWhen working on an existing project, you can use the clone command to create a copy of your remote rep in GitLab and make changes without overwriting the master version.\n\nWhen this command is used, you will get access to a copy of the source code on your local machine and make changes to it without compromising the master.\n\nTo download your project, use this:\n\ngit clone \u003Crepo URL>\n\n**Create a local workspace:**\n\nWhen collaborating with other developers on a project, using branches lets you modify and reference copies of the same portions of source code and merge them at a later point. This avoids a situation where developers are making changes to the same code at the same time, creating errors and broken code/features.\n\n[To create a new local branch](https://shortcut.com/blog/common-git-commands-that-you-should-memorize#:~:text=13%20common%20Git%20commands%20that%20you%20should%20consider,compare%20unstaged%20files%20before%20committing%20...%20More%20items):\n\ngit branch \u003Cbranch-name>\n\nPush this local branch to the remote repo with the following:\n\ngit push -u \u003Cremote> \u003Cbranch name>\n\nView existing branches on the remote repo with the following:\n\ngit branch or git branch—list\n\nAnd delete a branch with:\n\ngit branch -d \u003Cbranch-name> \n\n**Switch branches, inspect files and commits:**\n\nWith git checkout, you can move between the master branch and your copies locally, and it can be used to inspect the file and [commit history](/blog/keeping-git-commit-history-clean/). You will start out with the local clone of your master branch by default. You’ll need to run the command to switch between branches to make changes to a different local branch. One thing to note: make sure that you commit or stash any in-progress changes before switching; otherwise, you could encounter errors.\n\nCommand:\n\ngit checkout \u003Cname of your branch>\n\nOr create a new branch and switch to it with one command:\n\ngit checkout -b \u003Cname-of-your-branch>\n\nHave some Git tips of your own? Let us know in the comments below, we'd love to hear them.\n\nPhoto by [Pawel Janiak](https://unsplash.com/photos/WtRuYJ2EPMA?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com/search/photos/mistake?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)\n{: .note}\n",[9],{"slug":1004,"featured":6,"template":687},"git-happens","content:en-us:blog:git-happens.yml","Git Happens","en-us/blog/git-happens.yml","en-us/blog/git-happens",{"_path":1010,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1011,"content":1017,"config":1023,"_id":1025,"_type":13,"title":1026,"_source":15,"_file":1027,"_stem":1028,"_extension":18},"/en-us/blog/git-merge-fifteen-year-git-party",{"title":1012,"description":1013,"ogTitle":1012,"ogDescription":1013,"noIndex":6,"ogImage":1014,"ogUrl":1015,"ogSiteName":672,"ogType":673,"canonicalUrls":1015,"schema":1016},"Git Merge 2020: a celebration of Git","A look at Git Merge 2020 and a look forward to the next decade of remote, async, and powerful source code management.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681154/Blog/Hero%20Images/GitLab-sponsoring-Git-Merge.jpg","https://about.gitlab.com/blog/git-merge-fifteen-year-git-party","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git Merge 2020: a celebration of Git\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Jordi Mon\"}],\n        \"datePublished\": \"2020-03-25\",\n      }",{"title":1012,"description":1013,"authors":1018,"heroImage":1014,"date":1020,"body":1021,"category":680,"tags":1022},[1019],"Jordi Mon","2020-03-25","\n\nAlmost 15 years ago [Linus Torvalds](https://www.linkedin.com/in/linustorvalds/) came out of retirement and released a project – Git – that would be adopted by millions who would in turn contribute over time to what is arguably the world's most powerful distributed version control system.\n\n![Git Merge 2020 kicking off](https://about.gitlab.com/images/blogimages/git-merge-2020/Entrance_Git_Merge.gif){: .center}\n\nIn early March, Git was celebrated at [Git Merge 2020](https://git-merge.com/#schedule/), an event that was sponsored by GitHub, GitLab and the [Software Freedom Conservancy (SFC)](https://sfconservancy.org/). A fair share of GitLab team members attended and actively participated in the birthday celebration. We thought we'd share a look at what we liked most.\n\n![Happy birthday Git](https://about.gitlab.com/images/blogimages/git-merge-2020/15_years_of_Git.jpg){: .shadow.medium.center}\n\n## Git police, stop! Open that trunk\n\nThere were lots of bad jokes like that one, but fortunately the content was much better than the jokes. Our users often say the one thing they like about GitLab is it makes Git understandable to them. It's nice to have validation every now and then and that is precisely what we felt during the talk titled **The Zen of Git** in which [Tianyu Pu](https://twitter.com/tianyupu), a software developer at Booking.com, explained in beautifully crafted slides how Git's internals work. By knowing how Git works she is able to approach Git less fearfully and be more productive using it in a day-to-day workflow. Judging by the warm round of applauses received when she finished her talk, we would argue she definitely achieved her goal. The clarity with which she presented each concept was encouraging so we suggest reading through [her deck](https://speakerdeck.com/tianyupu/the-zen-of-git).\n\n![Git 15 year life](https://about.gitlab.com/images/blogimages/git-merge-2020/Git_timeline.jpg){: .shadow.medium.center}\n\n[Ed Thomson](https://twitter.com/ethomson), co-maintainer of libgit2 and a GitHub employee, received some laughter from the audience the minute he was up on stage. His talk was about how lightweight, short-living branches merged fast into trunk – or master, as you wish (more terrible jokes!). He outlined great ideas to keep some sanity in your development branching model. To make this even more compelling, why not a Git workflow alignment chart?\n\n![Ed Thomson's Git workflow alignment chart](https://about.gitlab.com/images/blogimages/git-merge-2020/Git-workflow-alignment-chart.png){: .shadow.medium.center}\n\nEd suggested that pairing this pattern with [continuous delivery practices](/topics/continuous-delivery/) would make a perfect combo. Git flow, however, didn’t get the best of Ed's talk but it is noteworthy that Git flow’s author [Vincent Driessen](https://twitter.com/nvie) shared some timely advice [on his blog](https://nvie.com/posts/a-successful-git-branching-model/) while Git Merge was taking place:\n\n> If your team is doing continuous delivery of software, I would suggest to\n> adopt a much simpler workflow instead of trying to\n> shoehorn git-flow into your team.\n\nBut if there was a star that day, it certainly was [Derrick Stolee](https://twitter.com/stolee?lang=en) from Microsoft. Derrick and his team have recently released [Scalar](https://devblogs.microsoft.com/devops/introducing-scalar/). Barebones Git or Git in combination with the VFS protocol can still struggle when handling large repos like the one hosting Windows' source code. Scalar is an open source project aimed at accelerating Git's workflow regardless of the size of the repos.\n\nI asked Derrick how he and his team combined the request from his employer Microsoft and the larger goals of the Git community which may not be in alignment. For him the answer is simple: Microsoft thinks of Scalar as a good solution for clients and internal teams. The company believes giving Scalar to Git will only make it better since most of the community members are Git veterans and will be able to improve the feature. When designing Scalar Derrick's team always had Git's architecture in mind and the plan is to contribute it to [Git's client](https://devblogs.microsoft.com/devops/introducing-scalar/#git-future). I believe this speaks volumes about Derrick's team's ability to solve a complex problem but also at the same time care about the larger community and Git's design. This is just one example of how enterprises and the larger Git community are getting together and making Git perform better and in more use cases.\n\nAnd Scalar does not only just apply to Window's repo, Office's repo or video game repos. It is having a real-world and timely impact. This [repo](https://github.com/FoldingAtHome/coronavirus/issues/41#issuecomment-602186402) that is collecting real-time datasets to help with the COVID-19 pandemic is getting bigger every minute thanks to the input that many, including [some GitLab teams](https://about.gitlab.com/handbook/engineering/#foldinghome-and-covid-19), are offering. However, it needs technology like Scalar to handle it. \n\nAt the end of our chat Derrick asked me if I knew about the Japanese principle of [Ikigai](https://en.wikipedia.org/wiki/Ikigai):\n\n> Try to find something for your professional career that is fulfilling, something you are good at, something the world needs and something you'll get paid for.\n\nIt's true that contributing features to Git that are useful in such dire times must be a reason to be part of the Git community.\n\n## Work in the open: companies collaborating for the good of Git\n\nScalar isn't the only recent addition to Git – Partial Clone was contributed to Git by [Jeff Hostetler](https://twitter.com/jeffhostetler) from Microsoft and Jonathan Tan from Google. In Derrick's opinion, both of them came from different perspectives to solve the same problem. Had they not collaborated on their approach – even with the community's input – they wouldn't have arrived at the same successful feature that Partial Clone is now. Another very recent example of this same collaboration is some of the updates [Git v2.26 comes with](https://raw.githubusercontent.com/git/git/master/Documentation/RelNotes/2.26.0.txt). And [Peff](https://github.com/peff) from GitHub and [Christian Couder](https://gitlab.com/chriscool) from GitLab contributed changes to the way Git handles packfiles.\n\n## GitLab experts all over: to 15 more years!\n\nOverall we found a lot of validation in GitLab's own work, not only upstream to Git with new features like the ones already mentioned, but also downstream to our users. GitLab gets better at making Git more easily usable and proposes development workflows, like [GitLab Flow](https://docs.gitlab.com/ee/topics/gitlab_flow.html), that allow our users to be fast and productive while keeping a neat code base. GitLab is making [Partial Clone](https://about.gitlab.com/blog/partial-clone-for-massive-repositories/) progressively more stable across any GitLab instance. (If you are already using partial clone, or would like to help us test partial clone on a large project, please get in touch with [James Ramsay](mailto:jramsay@gitlab.com), the group manager, product for Create at GitLab, me [Jordi Mon](mailto:jmon@gitlab.com) or your account manager.)\n\n![GitLab team having fun](https://about.gitlab.com/images/blogimages/git-merge-2020/GitLab_working_together.jpg){: .shadow.medium.center}\n\nWhile our very own [James Ramsay](https://gitlab.com/jramsay) participated in an expert panel in [last year's event](https://github.blog/wp-content/uploads/2019/02/190201_GithubBrussels2019_0330.jpg?resize=1024%2C683?w=1024), this year [Zeger-Jan van de Weg](https://gitlab.com/zj-gitlab) was on stage for a stump the experts panel.\n\n{::options parse_block_html=\"false\" /}\n\n\u003Cdiv class=\"center\">\n\n\u003Cblockquote class=\"twitter-tweet\">\u003Cp lang=\"en\" dir=\"ltr\">\u003Ca href=\"https://twitter.com/gitlab?ref_src=twsrc%5Etfw\">@gitlab\u003C/a>’s \u003Ca href=\"https://twitter.com/ZJvandeWeg?ref_src=twsrc%5Etfw\">@ZJvandeWeg\u003C/a> on a stump the experts panel at \u003Ca href=\"https://twitter.com/hashtag/GitMerge?src=hash&amp;ref_src=twsrc%5Etfw\">#GitMerge\u003C/a> \u003Ca href=\"https://t.co/jfgC5ZxzWa\">pic.twitter.com/jfgC5ZxzWa\u003C/a>\u003C/p>&mdash; Ray Paik (@rspaik) \u003Ca href=\"https://twitter.com/rspaik/status/1235333465203183618?ref_src=twsrc%5Etfw\">March 4, 2020\u003C/a>\u003C/blockquote> \u003Cscript async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\">\u003C/script>\n\n\u003C/div>\n\n\u003Cdiv class=\"center\">\n\n\u003Cblockquote class=\"twitter-tweet\">\u003Cp lang=\"en\" dir=\"ltr\">Had a great time meeting Git community members at \u003Ca href=\"https://twitter.com/hashtag/GitMerge?src=hash&amp;ref_src=twsrc%5Etfw\">#GitMerge\u003C/a> 2020 yesterday! It was awesome being there as part of the \u003Ca href=\"https://twitter.com/gitlab?ref_src=twsrc%5Etfw\">@gitlab\u003C/a> team and coming together with folk from \u003Ca href=\"https://twitter.com/github?ref_src=twsrc%5Etfw\">@github\u003C/a> \u003Ca href=\"https://twitter.com/Google?ref_src=twsrc%5Etfw\">@Google\u003C/a> \u003Ca href=\"https://twitter.com/conservancy?ref_src=twsrc%5Etfw\">@conservancy\u003C/a>, and many others, to collaborate and then celebrate Git’s upcoming 15th anniversary! \u003Ca href=\"https://t.co/crXr6iT5qI\">pic.twitter.com/crXr6iT5qI\u003C/a>\u003C/p>&mdash; Nuritzi Sanchez (@1nuritzi) \u003Ca href=\"https://twitter.com/1nuritzi/status/1235655639554117637?ref_src=twsrc%5Etfw\">March 5, 2020\u003C/a>\u003C/blockquote> \u003Cscript async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\">\u003C/script>\n\n\u003C/div>\n\nMingling around with the rest of the community was hands down the best part of Git Merge 2020. It was so much fun to be part of a welcoming, inclusive community.\n\n![GitLab's team having fun](https://about.gitlab.com/images/blogimages/git-merge-2020/GitLab_team_chilling_out.jpg){: .shadow.medium.center}\n\nFor all these reasons and more we would love our involvement to be ever-growing with Git Merge. That's why we look forward to Git Merge 2021! 15 years have passed and Git is still in its best moment.\n",[276,9,683],{"slug":1024,"featured":6,"template":687},"git-merge-fifteen-year-git-party","content:en-us:blog:git-merge-fifteen-year-git-party.yml","Git Merge Fifteen Year Git Party","en-us/blog/git-merge-fifteen-year-git-party.yml","en-us/blog/git-merge-fifteen-year-git-party",{"_path":1030,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1031,"content":1037,"config":1044,"_id":1046,"_type":13,"title":1047,"_source":15,"_file":1048,"_stem":1049,"_extension":18},"/en-us/blog/git-not-just-for-developers",{"title":1032,"description":1033,"ogTitle":1032,"ogDescription":1033,"noIndex":6,"ogImage":1034,"ogUrl":1035,"ogSiteName":672,"ogType":673,"canonicalUrls":1035,"schema":1036},"Git: Not just for developers","How one company helps video editors, developers, and project managers to collaborate on interactive video, by leveraging the power of open source.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749670464/Blog/Hero%20Images/gitlab-loves-open-source.jpg","https://about.gitlab.com/blog/git-not-just-for-developers","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git: Not just for developers\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Opher Vishnia\"},{\"@type\":\"Person\",\"name\":\"Roy Taragan\"}],\n        \"datePublished\": \"2018-05-24\",\n      }",{"title":1032,"description":1033,"authors":1038,"heroImage":1034,"date":1041,"body":1042,"category":680,"tags":1043},[1039,1040],"Opher Vishnia","Roy Taragan","2018-05-24","\nIn this post I’d like to tell you about how, at [Eko](https://helloeko.com/), we’re using GitLab CE to allow professionals from different disciplines, such as video editors, designers, and software engineers, to collaborate on creating and publishing Interactive Video projects using the Eko platform.\n\nEko is a unique company. I know practically every company says that about itself, but for us that’s doubly true in that both our platform as well as our users, and our users of users, take part and actively contribute to creative, experimental ideas and technology. At the core of what we do is an exciting new medium called Interactive Video, which enhances storytelling by bridging the gap between the creator and the viewer. The projects themselves are somewhere between a TV show and a video game. These embody a range of creativity - from the [official music video for Bob Dylan’s “Like a Rolling Stone,”](https://helloeko.com/mindblown/beats-and-rhymes?publisherID=gitlab) through choose-your-own-adventure style comedies and high-caliber movie studio productions like #WarGames.\n\n[![Bob Dylan's Like a Rolling Stone video](https://about.gitlab.com/images/blogimages/eko_mind_blown.png)](https://helloeko.com/mindblown/beats-and-rhymes?publisherID=gitlab)\n\nOur development body creates all the technology for both viewing and authoring these experiences, which are created by small indies as well as big studios and production houses. At the end of the day though, all of these projects, regardless of whether they’re playing on desktop, mobile, or the Xbox, are built with web technologies and run in a browser. Each project is served as a web app, consisting of HTML, JavaScript and CSS files, as well as its video, audio and image assets.\n\nTo create these projects, Eko offers a web-based, drag-and-drop interface called Eko Studio. This software provides project creators with an easy interface for uploading and assembling video, connecting the different videos to each other, creating GUI to define the underlying creativity and finally publishing the finished product.\n\n![Eko Studio](https://about.gitlab.com/images/blogimages/eko-guest-post/eko-studio.png)\n\nIn cases where extra logic and functionality is required, such that isn’t yet covered by the set of features in Eko Studio, we offer the Eko SDK, which enables developers to extend the Studio’s functionality by writing their own custom JS and CSS code.\n\nThe interesting thing about the creation process of our Interactive Video projects is because of their scope and multi-disciplinary nature, different people with different roles all work on the same project at the same time. For example, a video editor might upload a new scene, a project manager would change the SEO copy and a developer might implement new GUI or functionality. One of the challenges we faced at Eko is that all of this needs to be synchronised and shared by all. The experience needs to be fluid and cohesive for all types of users, regardless of their role.\n\n![Eko Studio commits](https://about.gitlab.com/images/blogimages/eko-guest-post/eko-studio-commits.png)\n\n## Using open source to enable collaboration\n\nSo what type of software allows for multiple people to work on the same project without stepping on each other’s toes? Git, of course! With that in mind we set out to find how can we use Git as a backend that could serve our creators, developers and non-developers alike.\n\nIn Eko Studio, users can activate the feature that allows extending a project with code. Behind the scenes, the studio then employs GitLab’s API to create a new repository, generates all the code reflecting the current state of the project, and pushes it as the *initial commit*. From this point forward, each time a preview or published version of the project is generated, the process will begin by first pulling the latest version of the code from the repo. Using GitLab’s webhook for push events combined with Firebase, any time a commit is pushed to the repository, the user in Eko Studio is notified and the UI is updated accordingly. The user in Eko Studio can see all the commits (also fetched using the GitLab API) listed as versions, and can revert to an earlier version.\n\n>The less tech-savvy users aren’t even fully aware that by editing the project or adding content they are in fact publishing commits in the project repo\n\nThe cool thing here though, is that the Eko Studio itself acts as a Git client behind the scenes. The less tech-savvy users aren’t even fully aware that by editing the project or adding content they are in fact publishing commits in the project repo. The studio interface makes this completely transparent for them. Changes to the project made in Eko Studio are translated into Git commits in the project repo. Over on the dev side though, the software engineers use the Git interface itself using their favorite code editor and Git client.\n\n![Eko Studio code panel](https://about.gitlab.com/images/blogimages/eko-guest-post/eko-studio-code-panel.png)\n\nThe fact that GitLab is open source enabled us to custom tailor a solution for our users with minimal changes, leveraging APIs and webhooks to connect our own infrastructure. The readily available AMI meant that we can easily spool up our own GitLab CE instances without a complicated setup process. While our use case is very specific, the fact we’ve been able to use GitLab CE with minimal effort to implement our platform and tools for creating Interactive Video definitely highlights the flexibility and capabilities of GitLab!\n",[9,820,683],{"slug":1045,"featured":6,"template":687},"git-not-just-for-developers","content:en-us:blog:git-not-just-for-developers.yml","Git Not Just For Developers","en-us/blog/git-not-just-for-developers.yml","en-us/blog/git-not-just-for-developers",{"_path":1051,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1052,"content":1058,"config":1065,"_id":1067,"_type":13,"title":1068,"_source":15,"_file":1069,"_stem":1070,"_extension":18},"/en-us/blog/git-performance-on-nfs",{"title":1053,"description":1054,"ogTitle":1053,"ogDescription":1054,"noIndex":6,"ogImage":1055,"ogUrl":1056,"ogSiteName":672,"ogType":673,"canonicalUrls":1056,"schema":1057},"What we're doing to fix Gitaly NFS performance regressions","How we're improving our Git IO patterns to fix performance regressions when running Gitaly on NFS.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749670065/Blog/Hero%20Images/git-performance-nfs.jpg","https://about.gitlab.com/blog/git-performance-on-nfs","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What we're doing to fix Gitaly NFS performance regressions\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"James Ramsay\"},{\"@type\":\"Person\",\"name\":\"Zeger-Jan van de Weg\"}],\n        \"datePublished\": \"2019-07-08\",\n      }",{"title":1053,"description":1054,"authors":1059,"heroImage":1055,"date":1062,"body":1063,"category":726,"tags":1064},[1060,1061],"James Ramsay","Zeger-Jan van de Weg","2019-07-08","\nFrom the start, Gitaly, GitLab's service that is the interface to our Git data,\nfocused on removing the dependency on NFS. We achieved this task at the end\nof the summer 2018, when the [NFS drives were unmounted on GitLab.com][gitaly-nfs-blog].\nThe migration was geared towards improving the availability of Git data at\nGitLab and correctness, that is: fixing bugs. To an extent, performance\nwas an afterthought. By rewriting most of the RPCs in Go there were side effects\nthat positively improved performance, but conversely there were also occasions\nwhere performance wasn't addressed immediately, but rather added to the backlog\nfor the next iteration.\n\nSince releasing Gitaly 1.0, and updating GitLab to use Gitaly instead of Rugged\nfor all Git operations, we have observed severe performance regressions for\nlarge GitLab instances when using NFS. To address these performance problems in\nGitLab 11.9, we added [feature flags][feature-flag-docs] to enable\nRugged implementations that improve performance for affected GitLab instances.\nThese have been back ported to 11.5-11.8.\n\n### So what's the problem?\n\nWhile the migration was under way, there were noticeable performance regressions.\nIn most cases, these were so-called N + 1 access patterns. One example was the\n[pipeline index view](https://gitlab.com/gitlab-org/gitlab-ce/pipelines/), where\neach pipeline runs on a commit. On that page, GitLab used to call the `FindCommit`\nRPC for each pipeline. To improve performance, a new RPC was added;\n`ListCommitsByOid`. In which case, the object IDs for the commits were collected\nfirst, once request was made to Gitaly to get all the commits and return them to\ncontinue rendering the view.\n\nThis approach was, and still is, successful. However, detecting these N + 1\nqueries is hard. When GitLab is run for development as part of the GDK, or\nduring testing, a special N + 1 detector will raise an error if an N + 1\noccurred. This approach has several shortcomings, for one; most tests will only\ntest the behavior of one entity, not 20. This reduces the likelihood of the\nerror being raised. There is also a way to silence N + 1 errors, for example:\n\n```ruby\nproject = Project.find(1)\n\nGitalyClient.allow_n_plus_1 do\n  project.pipelines.last(20).each do |pipeline|\n    project.repository.find_commit(pipeline.sha)\n  end\nend\n\n# The better solution would be\n\nshas = project.pipelines.last(20).select(&:sha)\nrepository.list_commits_by_oid(shas)\n```\n\nWhatever happened in that block would not be counted. For each of these blocks\nissues were created and added to [an epic][epic-nplus1], however, little\nprogress was made by the teams who had bypassed these checks in this way. This\nwas primarily because these performance issues were not a big\nproblem for GitLab.com, despite the fact they had become a problem for our customers.\n\nThe detected N + 1 issues included a lot of Git object read operations, for\nexample the `FindCommit` RPC. This is especially bad because this requires a\nnew Git process to be invoked to fetch each commit. If a millisecond later\nanother request comes in for the same repository, Gitaly will invoke Git again\nand Git will do all this work again. Before the migration and when GitLab.com\nwas still using NFS, GitLab leveraged Rugged, and used memoization to keep around\nthe Rugged Repository until the Rails request was done. This allowed Rugged to\nload part of the Git repository into memory for faster access for subsequent\nrequests. This property was not recreated in Gitaly for some time.\n\n## Enter cat-file cache\n\nIn GitLab 12.1, Gitaly will cache a repository per Rails session to recreate this\nbehavior with a feature called ['cat-file' cache](https://gitlab.com/gitlab-org/gitaly/merge_requests/1203).\nTo explain how this cache works and its name, it should be noted that objects\nin Git are compressed using [zlib][zlib]. This means that a commit object\nisn't packed and can be located on disk, it seemingly contains garbage:\n\n```\n# This example is an empty .gitkeep file\n$ cat .git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391\nxKOR0`\n```\n\nNow cat-file will query for the object, and when using the `-p` flag pretty print\nit. In the following example, the current [Gitaly license][gitaly-mit].\n\n```\n$ git cat-file -p c7344c56da804e88a0bca979a53e1ec1c8b6021e\nThe MIT License (MIT)\n... ommitted\n```\n\nCat-file has another flag, `--batch`, which allows for multiple objects to be\nrequested to the same process through STDIN.\n\n```\n$ git cat-file --batch\nc7344c56da804e88a0bca979a53e1ec1c8b6021e\nc7344c56da804e88a0bca979a53e1ec1c8b6021e blob 1083\nThe MIT License (MIT)\n\n... ommitted\n```\n\nInspecting the Git process using [strace][strace] allows us to inspect how Git\namortizes expensive operations to improve performance. The output on STDOUT and\nthe strace are available [as a snippet](https://gitlab.com/snippets/1858975).\n\nThe process is reading the first input from STDIN, or file descriptor 0, at\n[line 141](https://gitlab.com/snippets/1858975#L141). It starts writing the output\nabout [40 syscalls later](https://gitlab.com/snippets/1858975#L180). In between\nthere are two important operations performed: an\n[mmap of the pack file index](https://gitlab.com/snippets/1858975#L167), and\nanother [mmap of the pack file itself](https://gitlab.com/snippets/1858975#L177).\nThese operations store part of these files in memory, so that they are available\nthe next time they are needed.\n\nIn the snippet, we've requested the same blob on the same process again. This a\nsyntactic follow-up request, but even when the next request would've been `HEAD`\nGit would have to do a considerable amount less work to come up with the object\nthat `HEAD` deferences to.\n\nKeeping a cat-file process around for subsequent requests was shipped in\nGitLab 11.11 behind the `gitaly_catfile-cache` feature flag, and will be\n[enabled by default][remove-ff] in GitLab 12.1.\n\n### Next steps\n\nThe `cat-file` cache is one of many improvements being made to improve Git IO\npatterns in GitLab, to mitigate slow IO when using NFS and improve performance\nof GitLab. Particularly, progress has been made in GitLab 11.11, and continues\nto be made in eliminating the worst N + 1 access patterns from GitLab. You can\nfollow [gitlab-org&1190][epic-worst-io] for\nthe full plan and progress.\n\nThe Gitaly team's highest priority is\n[automatically enabling Rugged][automatic-rugged]\nfor GitLab servers using NFS to immediately mitigate the performance\nregressions until performance improvements are sufficiently complete in GitLab\nand Gitaly, allowing Rugged to again be removed.\n\nIn the future, we will remove the need for NFS with\n[High Availability for Gitaly][ha-epic], providing both performance and\navailability, and eliminating the burden of maintaining an NFS cluster.\n\nCover image by [Jannes Glas](https://unsplash.com/@jannesglas) on [Unsplash](https://unsplash.com/photos/P6iOpqQpwwU)\n{: .note}\n\n[automatic-rugged]: https://gitlab.com/gitlab-org/gitlab-ce/issues/60931\n[epic-nplus1]: https://gitlab.com/groups/gitlab-org/-/epics/827\n[epic-worst-io]: https://gitlab.com/groups/gitlab-org/-/epics/1190\n[feature-flag-docs]: https://docs.gitlab.com/ee/administration/nfs.html#improving-nfs-performance-with-gitlab\n[gitaly-mit]: https://gitlab.com/gitlab-org/gitaly/blob/1b09f13374be5b272d40b3b089372adae2801f81/LICENSE\n[gitaly-nfs-blog]: /2018/09/12/the-road-to-gitaly-1-0/\n[ha-epic]: https://gitlab.com/groups/gitlab-org/-/epics/842\n[remove-ff]: https://gitlab.com/gitlab-org/gitaly/issues/1671\n[strace]: https://strace.io/\n[zlib]: https://www.zlib.net/\n",[9,684],{"slug":1066,"featured":6,"template":687},"git-performance-on-nfs","content:en-us:blog:git-performance-on-nfs.yml","Git Performance On Nfs","en-us/blog/git-performance-on-nfs.yml","en-us/blog/git-performance-on-nfs",{"_path":1072,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1073,"content":1078,"config":1084,"_id":1086,"_type":13,"title":1087,"_source":15,"_file":1088,"_stem":1089,"_extension":18},"/en-us/blog/git-protocol-v2-enabled-for-ssh-on-gitlab-dot-com",{"title":1074,"description":1075,"ogTitle":1074,"ogDescription":1075,"noIndex":6,"ogImage":953,"ogUrl":1076,"ogSiteName":672,"ogType":673,"canonicalUrls":1076,"schema":1077},"Git Protocol v2 now enabled for SSH on GitLab.com","Fetch faster using Git Protocol v2 – here's how.","https://about.gitlab.com/blog/git-protocol-v2-enabled-for-ssh-on-gitlab-dot-com","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git Protocol v2 now enabled for SSH on GitLab.com\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"James Ramsay\"}],\n        \"datePublished\": \"2018-12-10\",\n      }",{"title":1074,"description":1075,"authors":1079,"heroImage":953,"date":1080,"body":1081,"category":726,"tags":1082},[1060],"2018-12-10","\n\nGitLab added support for [Git Protocol v2 over HTTP and SSH in GitLab 11.4](/releases/2018/10/22/gitlab-11-4-released/#git-protocol-v2), and enabled Protocol v2 over HTTP on GitLab.com, but not for SSH. On Nov. 23, we enabled [Git Protocol v2 over SSH on GitLab.com](https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/5244). You can view Git Protocol usage on our [public dashboard](https://dashboards.gitlab.com/d/pqlQq0xik/git-protocol-versions?refresh=5m&orgId=1).\n\nGit Protocol v2 is supported from Git v2.18.0 and is opt-in. To enable globally, run `git config --global protocol.version 2`.\n\n## What Git Protocol v2 means for you\n\nGit 2.18 introduced support for Protocol v2, which defines how clones, fetches, and pushes are communicated between the client (your computer) and the server (GitLab). The new [wire protocol](https://www.kernel.org/pub/software/scm/git/docs/technical/pack-protocol.html) improves the performance of fetch commands and enables future protocol improvements. [Read more about Protocol v2](https://opensource.googleblog.com/2018/05/introducing-git-protocol-version-2.html) in the release post by the author of the change.\n\nTo see the reduction in network traffic with Protocol v2 you can run the commands below:\n\n```\n# Original Git wire protocol\nGIT_TRACE_PACKET=1 git -c protocol.version=0 ls-remote git@gitlab.com:gitlab-org/gitlab-ce.git master\n\n# New Git wire protocol v2\nGIT_TRACE_PACKET=1 git -c protocol.version=2 ls-remote git@gitlab.com:gitlab-org/gitlab-ce.git master\n```\n\nIn moving from Protocol v0 to v2, on this repo the number of lines (\"packets\") sent behind the scenes drops from over 36,000 to fewer than 30.\n",[9,941,1083],"releases",{"slug":1085,"featured":6,"template":687},"git-protocol-v2-enabled-for-ssh-on-gitlab-dot-com","content:en-us:blog:git-protocol-v2-enabled-for-ssh-on-gitlab-dot-com.yml","Git Protocol V2 Enabled For Ssh On Gitlab Dot Com","en-us/blog/git-protocol-v2-enabled-for-ssh-on-gitlab-dot-com.yml","en-us/blog/git-protocol-v2-enabled-for-ssh-on-gitlab-dot-com",{"_path":1091,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1092,"content":1097,"config":1102,"_id":1104,"_type":13,"title":1105,"_source":15,"_file":1106,"_stem":1107,"_extension":18},"/en-us/blog/git-pull-vs-git-fetch-whats-the-difference",{"title":1093,"description":1094,"ogTitle":1093,"ogDescription":1094,"noIndex":6,"ogImage":912,"ogUrl":1095,"ogSiteName":672,"ogType":673,"canonicalUrls":1095,"schema":1096},"Git pull vs. git fetch: What's the difference? ","Git pull is a Git command that performs both git fetch and git merge simultaneously. This article outlines the characteristics and appropriate uses of each.","https://about.gitlab.com/blog/git-pull-vs-git-fetch-whats-the-difference","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git pull vs. git fetch: What's the difference? \",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"GitLab\"}],\n        \"datePublished\": \"2024-09-24\",\n      }",{"title":1093,"description":1094,"authors":1098,"heroImage":912,"date":1099,"body":1100,"category":680,"tags":1101},[917],"2024-09-24","The Git command is very popular as a [distributed version control system](https://about.gitlab.com/topics/version-control/benefits-distributed-version-control-system/) and is used when synchronization with a remote repository is necessary. The developer needs to choose the appropriate commands based on the project's needs. In this article, we will explain the basics and differences between git fetch and git pull, and provide a detailed explanation of their respective use cases. \n\nTable of contents \n- [Git fetch and git pull basics](#git-fetch-and-git-pull-basics)\n- [What is git fetch?](#what-is-git-fetch%3F)\n- [What is git pull?](#what-is-git-pull%3F)\n- [When to use git fetch](#when-to-use-git-fetch)\n- [When to use git pull](#when-to-use-git-pull)\n- [Git fetch and git pull FAQs](#git-fetch-and-git-pull-faqs)\n\n## Git fetch and git pull basics \n\nGit fetch and git pull are both Git commands used to retrieve update information from a remote repository. So, how do they differ? Git fetch downloads the changes from the remote repository to the local repository but does not make any changes to the current working directory. Since the changes are not merged into the local branch, you can check the changes from the remote repository without interrupting your current work. On the other hand, git pull retrieves the latest changes from the remote repository like git fetch, but it also automatically merges those changes into the current branch. In contrast to git fetch, git pull directly applies the changes from the remote repository to the local working directory.\n\n## What is git fetch? \nThe git fetch command retrieves the latest commit history from the remote repository, but it does not affect the local working directory. Even after fetching remote changes, they are not reflected in the local branch. It is primarily used when you want to retrieve the latest status from the remote repository and review the changes before they are reflected in the local repository. To apply the retrieved changes to the local branch, you need to manually run git merge or [git rebase](https://docs.gitlab.com/ee/topics/git/git_rebase.html).\n\n## What is git pull? \nThe git pull command combines `git fetch` and `git merge` (or `git rebase`) into a single command. This allows you to fetch changes from the remote repository and automatically integrate them into the current local branch. \n\nWhile git fetch retrieves changes from the remote repository without applying them to the local branch, running git pull automatically integrates the changes from the remote repository into the local branch. \n\nGit pull is suitable for quickly reflecting remote changes in the local branch, but it can lead to conflicts, so caution is needed, especially when working with multiple people. \n\n## When to use git fetch \nGit fetch is a command used to retrieve the latest information from a remote repository. The retrieved information is not directly reflected in the local branch. Using git pull will reflect all remote branches, including incorrect or problematic ones, in the local branch. \n\nWhen changes are made simultaneously on both remote and local branches, or when there are new users on the team, it is safer to use git fetch to retrieve the remote branch contents first and then perform merge or rebase. \n\n## When to use git pull \nGit pull is a command that performs more processes compared to git fetch. Git pull can perform both git fetch and additionally execute git merge or git rebase. For this reason, git pull is recommended when you want to quickly reflect changes from the remote repository in the local branch. \n\n## Git fetch and git pull FAQs\n\n### What is the difference between git pull and git fetch? \nGit pull is a command that performs git fetch followed by git merge or git rebase. While git fetch does not affect the local repository, git pull automatically synchronizes changes from the remote repository with the local repository. \n\n### What precautions should be taken when using git pull? \nWhen executing git pull, there may be conflicts between remote and local changes. Merge conflicts are particularly likely to occur, so if conflicts arise, they need to be resolved manually. Additionally, using git pull --rebase allows you to incorporate the latest changes while performing a rebase. \n\n### What is git fetch used for? \nGit fetch is useful for checking and retrieving the latest status of the remote repository. However, the changes retrieved are not automatically reflected in the local branch; git fetch is used to synchronize the local and remote repositories. \n\n## Read more\n- [What's new in Git 2.46](https://about.gitlab.com/blog/whats-new-in-git-2-46-0/)\n- [Learn Git](https://docs.gitlab.com/ee/topics/git/)\n- [Learn about GitLab Gitaly](https://docs.gitlab.com/ee/administration/gitaly/)",[9,683],{"slug":1103,"featured":6,"template":687},"git-pull-vs-git-fetch-whats-the-difference","content:en-us:blog:git-pull-vs-git-fetch-whats-the-difference.yml","Git Pull Vs Git Fetch Whats The Difference","en-us/blog/git-pull-vs-git-fetch-whats-the-difference.yml","en-us/blog/git-pull-vs-git-fetch-whats-the-difference",{"_path":1109,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1110,"content":1115,"config":1122,"_id":1124,"_type":13,"title":1125,"_source":15,"_file":1126,"_stem":1127,"_extension":18},"/en-us/blog/git-ransom-campaign-incident-report-atlassian-bitbucket-github-gitlab",{"title":1111,"description":1112,"ogTitle":1111,"ogDescription":1112,"noIndex":6,"ogImage":953,"ogUrl":1113,"ogSiteName":672,"ogType":673,"canonicalUrls":1113,"schema":1114},"Git ransom campaign incident report","This is a coordinated effort to help educate and inform users on secure best practices relating to the recent Git ransomware incident.","https://about.gitlab.com/blog/git-ransom-campaign-incident-report-atlassian-bitbucket-github-gitlab","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git ransom campaign incident report\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Atlassian Bitbucket, GitHub, GitLab\"}],\n        \"datePublished\": \"2019-05-14\",\n      }",{"title":1111,"description":1112,"authors":1116,"heroImage":953,"date":1118,"body":1119,"category":298,"tags":1120},[1117],"Atlassian Bitbucket, GitHub, GitLab","2019-05-14","\n\nToday, Atlassian Bitbucket, GitHub, and GitLab are issuing a joint blog post, in a coordinated effort to help educate and inform users of the three platforms on secure best practices relating to the recent Git ransomware incident. Though there is no evidence Atlassian Bitbucket, GitHub, or GitLab products were compromised in any way, we believe it’s important to help the software development community better understand and collectively take steps to protect against this threat.\n\nOn Thursday, May 2, the security teams of Atlassian Bitbucket, GitHub, and GitLab learned of a series of user account compromises across all three platforms. These account compromises resulted in a number of public and private repositories being held for ransom by an unknown actor. Each of the teams investigated and assessed that all account compromises were the result of unintentional user credential leakage by users or other third parties, likely on systems external to Bitbucket, GitHub, or GitLab.\n\nThe security and support teams of all three companies have taken and continue to take steps to notify, protect, and help affected users recover from these events. Further, the security teams of all three companies are also collaborating closely to further investigate these events in the interest of the greater Git community. At this time, we are confident that we understand how the account compromises and subsequent ransom events were conducted. This coordinated blog post will outline the details of the ransom event, provide additional information on how our organizations protect users, and arm users with information on recovering from this event and preventing others.\n\n## Event details\n\nOn the evening of May 2 (UTC), all three companies began responding to reports that user repositories, both public and private, were being wiped and replaced with a single file containing the following ransom note:\n\n> To recover your lost data and avoid leaking it: Send us 0.1 Bitcoin (BTC) to our Bitcoin address 1ES14c7qLb5CYhLMUekctxLgc1FV2Ti9DA and contact us by Email at admin@gitsbackup.com with your Git login and a Proof of Payment. If you are unsure if we have your data, contact us and we will send you a proof. Your code is downloaded and backed up on our servers. If we dont receive your payment in the next 10 Days, we will make your code public or use them otherwise.\n\nThrough immediate independent investigations, all three companies observed that user accounts were compromised using legitimate credentials including passwords, app passwords, API keys, and personal access tokens. Subsequently, the bad actor performed command line Git pushes to repositories accessible to these accounts at very high rates, indicating automated methods. These pushes overwrote the repository contents with the ransom note above and erased the commit history of the remote repository. Incident responders from each of the three companies began collaborating to protect users, share intelligence, and identify the source of the activity. All three companies notified the affected users and temporarily suspended or reset those accounts in order to prevent further malicious activity.\n\nDuring the course of the investigation, we identified a third-party credential dump being hosted by the same hosting provider where the account compromise activity had originated. That credential dump comprised roughly one third of the accounts affected by the ransom campaign. All three companies acted to invalidate the credentials contained in that public dump.\n\nFurther investigation showed that continuous scanning for publicly exposed `.git/config` and other environment files has been and continues to be conducted by the same IP address that conducted the account compromises, as recently as May 10. These files can contain sensitive credentials and personal access tokens if care is not taken to prevent their inclusion, and they should not be publicly accessible in repositories or on web servers. This [problem](https://en.internetwache.org/dont-publicly-expose-git-or-how-we-downloaded-your-websites-sourcecode-an-analysis-of-alexas-1m-28-07-2015/) is [not](https://laravel-news.com/psa-hide-your-gitconfig-directory) a new one. More information on the `.git` directory and the `.git/config` file is available [here](https://git-scm.com/docs/gitrepository-layout) and [here](https://git-scm.com/docs/git-config#_configuration_file). Additional IPs residing on the same hosting provider are also exhibiting similar scanning behavior. We are confident that this activity is the source of at least a portion of the compromised credentials.\n\nKnown ransom activity ceased on May 2. All known affected users have had credentials reset or revoked, and all known affected users have been notified by all three companies.\n\n## How to protect yourself\n\nEnable multi-factor authentication on your software development platform of choice.\n- [Bitbucket](https://confluence.atlassian.com/bitbucket/two-step-verification-777023203.html)\n- [GitHub](https://help.github.com/en/articles/securing-your-account-with-two-factor-authentication-2fa)\n- [GitLab](https://docs.gitlab.com/ee/user/profile/account/two_factor_authentication.html)\n\nUse strong and unique passwords for every service.\nStrong and unique passwords prevent credential reuse if a third party experiences a breach and leaks credentials.\nUse a password manager (if approved by your organization) to make this easier!\n\nUnderstand the risks associated with the use of personal access tokens.\nPersonal access tokens, used via Git or the API, circumvent multi-factor authentication.\nTokens have may have read/write access to repositories depending on scope and should be treated like passwords.\nIf you enter your token into the clone URL when cloning or adding a remote, Git writes it to your `.git/config` file in plain text, which may carry a security risk if the `.git/config` file is publicly exposed.\nWhen working with the API, use tokens as environment variables instead of hardcoding them into your programs.\n\nDo not expose `.git` directories and `.git/config` files containing credentials or tokens in public repositories or on web servers.\n[Information on securing `.git/config` files on popular web servers is available here](https://en.internetwache.org/dont-publicly-expose-git-or-how-we-downloaded-your-websites-sourcecode-an-analysis-of-alexas-1m-28-07-2015/).\n\n## How to recover an affected repository\n\nIf you have a full, current copy of the repository on your computer, you can force push to the current HEAD of your local copy using:\n`git push origin HEAD:master --force`.\n\nOtherwise, you can still clone the repository and make use of:\n[`git reflog`](https://git-scm.com/docs/git-reflog) or\n[`git fsck`](https://git-scm.com/docs/git-fsck) to find your last commit and change the `HEAD`.\n\nAdditional assistance on Git usage is available in the following resources:\n- [Git documentation](https://git-scm.com/doc)\n- [How to move `HEAD`](https://stackoverflow.com/questions/34519665/how-to-move-head-back-to-a-previous-location-detached-head-undo-commits/34519716#34519716)\n- [Use `git fsck` to recover a deleted branch](https://opensolitude.com/2012/02/29/recover-git-branch.html)\n\nShould you require additional assistance recovering your repository contents, please refer to the following:\n- Bitbucket:\nPlease contact Bitbucket Support by filing a request at [support.atlassian.com/contact/#/](https://support.atlassian.com/contact/#/) and selecting “Bitbucket Cloud” when prompted for a product.\n- GitHub:\nPlease contact GitHub Support at [github.com/contact](https://github.com/contact).\n- GitLab:\nPlease contact GitLab Support via [support.gitlab.com](https://support.gitlab.com/).\n\n## What the software development platform community is doing to protect users\n\nAll three platforms provide robust multi-factor authentication options:\n- [Bitbucket](https://confluence.atlassian.com/bitbucket/two-step-verification-777023203.html)\n- [GitHub](https://help.github.com/en/articles/securing-your-account-with-two-factor-authentication-2fa)\n- [GitLab](https://docs.gitlab.com/ee/user/profile/account/two_factor_authentication.html)\n\n**Bitbucket** provides the ability for admins to require two-factor authentication (2FA) and the ability to restrict access to users on certain IP addresses ([IP Whitelisting](https://confluence.atlassian.com/bitbucket/control-access-to-your-private-content-862621261.html)) on their Premium plan.\n\n**GitHub** provides [token scanning](https://help.github.com/en/articles/about-token-scanning) to notify a variety of service providers if secrets are published to public GitHub repositories. GitHub also provides [extensive guidance on preventing unauthorized account access](https://help.github.com/en/articles/preventing-unauthorized-access). We encourage all users to [enable two-factor authentication](https://help.github.com/en/articles/about-two-factor-authentication).\n\n**GitLab** provides secrets detection in 11.9 as part of the [SAST functionality](/releases/2019/03/22/gitlab-11-9-released/#detect-secrets-and-credentials-in-the-repository). We also encourage users to [enable 2FA here](https://docs.gitlab.com/ee/user/profile/account/two_factor_authentication.html), and set up [SSH keys](https://docs.gitlab.com/ee/ssh/).\n\nThanks to the security and support teams of Atlassian Bitbucket, GitHub, and GitLab, including the following individuals for their contributions to this investigation and blog post: Mark Adams, Ethan Dodge, Sean McLucas, Elisabeth Nagy, Gary Sackett, Andrew Wurster (Atlassian Bitbucket); Matt Anderson, Howard Draper, Jay Swan, John Swanson (GitHub); Paul Harrison, Anthony Saba, Jayson Salazar, Jan Urbanc, Kathy Wang (GitLab).\n",[9,705,1121],"security",{"slug":1123,"featured":6,"template":687},"git-ransom-campaign-incident-report-atlassian-bitbucket-github-gitlab","content:en-us:blog:git-ransom-campaign-incident-report-atlassian-bitbucket-github-gitlab.yml","Git Ransom Campaign Incident Report Atlassian Bitbucket Github Gitlab","en-us/blog/git-ransom-campaign-incident-report-atlassian-bitbucket-github-gitlab.yml","en-us/blog/git-ransom-campaign-incident-report-atlassian-bitbucket-github-gitlab",{"_path":1129,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1130,"content":1136,"config":1142,"_id":1144,"_type":13,"title":1145,"_source":15,"_file":1146,"_stem":1147,"_extension":18},"/en-us/blog/git-resources-for-visual-learners",{"title":1131,"description":1132,"ogTitle":1131,"ogDescription":1132,"noIndex":6,"ogImage":1133,"ogUrl":1134,"ogSiteName":672,"ogType":673,"canonicalUrls":1134,"schema":1135},"5 Git resources for visual learners","Learning Git is not commonplace in code instruction, yet it is essential for modern software development. These sites get you started.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749668161/Blog/Hero%20Images/armycyberschool.jpg","https://about.gitlab.com/blog/git-resources-for-visual-learners","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"5 Git resources for visual learners\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"PJ Metz\"}],\n        \"datePublished\": \"2022-09-14\",\n      }",{"title":1131,"description":1132,"authors":1137,"heroImage":1133,"date":1139,"body":1140,"category":680,"tags":1141},[1138],"PJ Metz","2022-09-14","\n[Git](https://Git-scm.com/doc) is free and open source version control and has become the industry standard for keeping track of changes in software. A recent [survey](https://www.jetbrains.com/lp/devecosystem-2021/) by JetBrains states that 93% of developers surveyed use Git for source control.  Even though it’s used by almost every software developer, it’s still not ubiquitously taught as part of coding courses. Many people end up learning Git either on the job or on their own.\n\nWe’ve gathered a list of sites to learn Git, whether you’re brand-new to it or you need to fine-tune your skills. These five resources are largely focused on visual learning and use either video-based tools or an interactive website or game. \n\n\n**1. [Oh My Git](https://ohmyGit.org/)**\n\nOh My Git is a gamified way of learning Git commands that includes a visualization of what effect your actions have on the repository. It’s card-based for early beginners. Think of it like Hearthstone or Magic the Gathering, but better for learning. It can also be played by using the command line as well. Start playing today! \n\n**2. [Git for Computer Scientists](https://eagain.net/articles/git-for-computer-scientists/)**\n\nI love the abstract for this site: “Quick introduction to Git internals for people who are not scared by words like Directed Acyclic Graph.” This website has lots of helpful graphs for people who aren’t necessarily working explicitly in software, and is intended for a specific audience of computer scientists; be aware before heading in. \n\n**3. [Learn Git Branching](https://Github.com/pcottle/learnGitBranching#learnGitbranching)**\n\nSometimes, the complicated part of Git is understanding what is actually happening when you’re creating or working with multiple branches. This visualization tool helpfully creates a real-time display of changes to commit trees. \n\n**4. [Explain Git with D3](https://onlywei.github.io/explain-git-with-d3/)**\n\nThis is such a great resource and one that everyone should have bookmarked. This website lets you type commands in a CLI and immediately see graphs representing what you did on the right. It has an open playground mode where you can just do whatever you like as well as structured lessons for common Git commands. If you use Git but it just feels like magic, then this is a great website for deepening your understanding of what Git does. \n\n**5. [Git for ages 4 and up](https://youtu.be/1ffBJ4sVUb4?t=125)**\n\nThis is a fantastic video of Michael G. Schwern at Linux conf.au in 2013. Using children's toys, Michael gives us a great example of what exactly goes on in Git. It’s an entertaining video with important basics and concepts for anyone struggling to understand Git. \n\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube.com/embed/1ffBJ4sVUb4\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\n## Did you know? \n\nGitLab offers free Ultimate tier licenses to qualifying educational institutions when used for learning, teaching, or research? Learn more [here](/solutions/education/).\n",[9,683,266],{"slug":1143,"featured":6,"template":687},"git-resources-for-visual-learners","content:en-us:blog:git-resources-for-visual-learners.yml","Git Resources For Visual Learners","en-us/blog/git-resources-for-visual-learners.yml","en-us/blog/git-resources-for-visual-learners",{"_path":1149,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1150,"content":1156,"config":1162,"_id":1164,"_type":13,"title":1165,"_source":15,"_file":1166,"_stem":1167,"_extension":18},"/en-us/blog/git-wars-switching-to-gitlab",{"title":1151,"description":1152,"ogTitle":1151,"ogDescription":1152,"noIndex":6,"ogImage":1153,"ogUrl":1154,"ogSiteName":672,"ogType":673,"canonicalUrls":1154,"schema":1155},"Git Wars: Why I'm switching to GitLab","New GitLab user Christopher Watson puts us through our paces and weighs up his Git hosting options.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749680411/Blog/Hero%20Images/git-wars-switching-to-gitlab.jpg","https://about.gitlab.com/blog/git-wars-switching-to-gitlab","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Git Wars: Why I'm switching to GitLab\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Christopher Watson\"}],\n        \"datePublished\": \"2017-07-19\",\n      }",{"title":1151,"description":1152,"authors":1157,"heroImage":1153,"date":1159,"body":1160,"category":726,"tags":1161},[1158],"Christopher Watson","2017-07-19","\n\nIt’s a well-known fact: GitHub has the market share when it comes to Git hosting, with Bitbucket following close behind due to their “unlimited private repositories” policy. But what if I told you that those weren’t your only options?\n\n\u003C!-- more -->\n\nNow I have nothing against GitHub. It’s a great tool and I’ve been using it for years. It’s built primarily in a framework I love, Ruby on Rails, and its design is one we’ve all come to tolerate (if not love). With GitHub controlling most of the market share, most open source projects have also found a home there.\n\nAll of this being said, I’d be lying if I said that GitHub didn’t have its downsides. If you want private repositories, it’s going to cost you a pretty penny at $25 per month for your first five users, and then $9 per user after that. For comparison's sake, if you have 10 users in your organization it’s going to cost you $80 a month, and that’s a pretty small team. GitHub also has a sordid history when it comes to communicating with the community, implementing new features, and updating its somewhat dated look.\n\nBitbucket is another beast altogether. Created by the tech conglomerate Atlassian, Bitbucket is their answer to GitHub. Its claim to fame is that you can have unlimited private repositories for free…as long as your team has no more than five people.\n\nThis is actually the reason I first started using Bitbucket. However, if you do have more than five people, you’re going to pay, and the quality that you pay for isn’t all that great. I’ve worked on a number of projects on Bitbucket where my team was plagued by slow pulls/pushes, 503 errors when attempting to view a repo, and just overall jankyness. Besides that, their design also leaves a lot to be desired. The new design looks better, but also somehow makes things even more confusing (someone needs to learn the difference between UI and UX). To be completely honest I don’t have anything good to say about Bitbucket, so I’m going to continue.\n\n### So what’s this GitLab thing? Is it the answer to all of our Git hosting woes?\n\nWell yes and no. GitLab is a very good product, but it’s not perfect. It certainly isn’t as fast as GitHub when it comes to pushing and pulling repos. That being said, here are my reasons for switching to GitLab for my personal projects: GitLab is a Git hosting solution with a very large toolset and, objectively, a beautifully designed website (could it still use some work? Yes, but I digress). It is completely free for unlimited users, unlimited private repositories, and full access to most of the awesome features they provide.\n\nYes, they still have paid tiers for the [enterprise](/enterprise/). You can’t expect them to keep an awesome project like this going without some kind of monetization, but for us little guys you’ll most likely never have to pay. That has got to be music to your ears.\n\n### So you said it has “awesome features.” To what are you referring?\n\nWell, there’s quite a list. Let’s take a look:\n\n1. **Syntax themes!** In case you didn’t get that, I’ll say it again: syntax themes! This has been something that I have been waiting a long time for GitHub to come out with, but GitLab beat them to the punch. We’re still probably a long way away from having custom themes, but the ability to have a dark theme when checking diffs in the browser is awesome.\n\n1. **Registry:** GitLab also has a built-in Docker registry for your projects. This is an amazingly powerful feature for those that want to keep their containers off of the public registry at hub.docker.com, but don’t want to pay for a private service.\n\n1. **Pipelines/GitLab CI:** Continuous integration is a huge time saver and a great way to make sure a pull request isn’t going to break your app. GitLab saves you from having to use an external CI service by having their own CI built right in. Not to say you can’t use an external CI if you want; GitLab has integrations for Jenkins, Bamboo, and much more.\n\n1. **3rd Party Integrations:** As mentioned above, GitLab has 3rd party integrations for several services such as CI, code coverage, messaging, etc. Their Slack integration is great for notifying your team when stuff has been merged into master. I will be honest though, I am sure GitHub has more integrations.\n\n1. **All the features that make GitHub great:** GitLab also ships with Wikis, Markdown-based readmes, etc. You don’t really lose any features by switching, but you gain a ton.\n\n![screengrab](https://about.gitlab.com/images/blogimages/git-wars-2.png){: .shadow}\u003Cbr>\n\n### So if it’s so great, why isn’t everyone using it?\n\nThere are a couple of answers to that question. First off, you have the market share factor. GitHub was one of the first Git hosting providers to market and they’ve managed to hold onto that. That means that if you want people to contribute to your project, it helps to have it on GitHub because chances are the people that you want to contribute have an account.\n\nThe other answer is related. Comfortability. People are simply comfortable with the tool they know and a lot of people aren’t like me (willing to throw everything out the window because I truly believe that the better product should get my business). This is the same reason so many people are still using Atlassian products. It’s definitely not because of their user interfaces.\n\n### So where should I go from here?\n\nThat depends on you. If you’re comfortable getting to know a new way of doing things, I’d suggest you take a look at GitLab. It really is worth the time you’ll put into it.\n\nIf you’re already *comfortable* then go ahead and stick with what you know, but at least now you know that there are alternatives.\n\n## About the Author\n\n[Chris Watson](https://twitter.com/idev0urer) is a freelance full-stack developer who occasionally enjoys sharing some of his many opinions with the world. He and his wife currently reside in sunny Arizona.\n\n_This post was originally published on [blog.cwatsondev.com](https://blog.cwatsondev.com/git-wars-why-im-switching-to-gitlab/)._\n\n“[paper battle](https://www.flickr.com/photos/die_ani/9024130/)” by [anika](https://www.flickr.com/photos/die_ani/) is licensed under [CC0 1.0](https://creativecommons.org/publicdomain/zero/1.0/)\n{: .note}\n",[9,683,728],{"slug":1163,"featured":6,"template":687},"git-wars-switching-to-gitlab","content:en-us:blog:git-wars-switching-to-gitlab.yml","Git Wars Switching To Gitlab","en-us/blog/git-wars-switching-to-gitlab.yml","en-us/blog/git-wars-switching-to-gitlab",{"_path":1169,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1170,"content":1176,"config":1181,"_id":1183,"_type":13,"title":1184,"_source":15,"_file":1185,"_stem":1186,"_extension":18},"/en-us/blog/github-free-for-teams",{"title":1171,"description":1172,"ogTitle":1171,"ogDescription":1172,"noIndex":6,"ogImage":1173,"ogUrl":1174,"ogSiteName":672,"ogType":673,"canonicalUrls":1174,"schema":1175},"#GitChallenge: Compare GitLab to GitHub and earn swag","Send us a review of GitLab and GitHub and get swag.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681235/Blog/Hero%20Images/hero-blog-gitlab-github.jpg","https://about.gitlab.com/blog/github-free-for-teams","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"#GitChallenge: Compare GitLab to GitHub and earn swag\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"GitLab\"}],\n        \"datePublished\": \"2020-04-14\",\n      }",{"title":1171,"description":1172,"authors":1177,"heroImage":1173,"date":1178,"body":1179,"category":705,"tags":1180},[917],"2020-04-14","\n\nAre you up for a challenge? Compare GitLab and GitHub! If you send us a link to your review on Twitter by tagging @gitlab and #GitChallenge we’ll send you some swag for giving us a try.\n{: .alert .alert-gitlab-purple}\n\nToday, GitHub announced [free private repositories with unlimited collaborators](https://github.blog/2020-04-14-github-is-now-free-for-teams/). This is great news for developers worldwide. GitHub also announced that they are lowering the price of their paid Team product to the same price as [GitLab’s Bronze/Starter](/pricing/premium/) offering: $4 per month per user.\n\nAt GitLab, we’ve offered free private repositories as part of our Core/Free product from the start. We also recently made 18 additional features open source, which will help teams collaborate more effectively in a single product, and we’ve been steadily gaining market share in the version control space, with users switching from BitBucket and GitHub to GitLab.\n\n## What your team loses when you go from GitHub Pro to Free\n\nWhen you go from GitHub Pro to GitHub Free, you lose some features that are already free and available to all users on GitLab and Gitlab.com:\n\n*   Protected branches in private repos\n*   Draft PRs in private repos\n*   GitHub Pages in private repos (using one)\n*   Wikis in private repos\n\n## What your team gains by using GitLab Bronze/Starter\n\nWith GitLab, you get even more features than GitHub Team. When there are multiple users on the same team, use [GitLab Bronze](/pricing/#gitlab-com)/[Starter](/pricing/#self-managed):\n\n*   Code owners in private repos\n*   Multiple issue assignees in private repos\n*   Multiple PR assignees in private repos\n*   Code review automatic assignment in private repos\n*   Standard support\n\n## GitLab is more complete\n\nGitLab is a [complete DevOps platform](/topics/devops/), delivered as a single application. Here is a visual comparison:\n\n![Comparing_GitLab_GitHub](https://about.gitlab.com/images/blogimages/gitlab-github-comparison.jpg){: .shadow}\n\n## Take the #GitChallenge\n\nIt has never been a better time to compare DevOps tools and find the best ones for you.\n\nCompare GitLab (get your [free trial here](/free-trial/)) and GitHub! You can:\n* Record a video and post it on social media\n* Write a blog or Medium post\n* Post your review on one of the many review sites like [G2](https://www.g2.com/products/gitlab/reviews)\n\nAfter you finish your review, send us a link on Twitter by tagging @gitlab and #GitChallenge, and we’ll send you some swag for giving us the feedback!\n",[819,9],{"slug":1182,"featured":6,"template":687},"github-free-for-teams","content:en-us:blog:github-free-for-teams.yml","Github Free For Teams","en-us/blog/github-free-for-teams.yml","en-us/blog/github-free-for-teams",{"_path":1188,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1189,"content":1195,"config":1201,"_id":1203,"_type":13,"title":1204,"_source":15,"_file":1205,"_stem":1206,"_extension":18},"/en-us/blog/gitlab-for-the-non-technical",{"title":1190,"description":1191,"ogTitle":1190,"ogDescription":1191,"noIndex":6,"ogImage":1192,"ogUrl":1193,"ogSiteName":672,"ogType":673,"canonicalUrls":1193,"schema":1194},"GitLab 101 – a primer for the non-technical","If a set-in-her-ways English major can conquer the GitLab product and culture, you can too. Here’s what you need to know.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678544/Blog/Hero%20Images/gitlab101.jpg","https://about.gitlab.com/blog/gitlab-for-the-non-technical","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"GitLab 101 – a primer for the non-technical\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Valerie Silverthorne\"}],\n        \"datePublished\": \"2019-08-02\",\n      }",{"title":1190,"description":1191,"authors":1196,"heroImage":1192,"date":1197,"body":1198,"category":980,"tags":1199},[815],"2019-08-02","\nI am living proof it’s possible to work at GitLab and not be particularly technical, or even particularly quick about learning technical things. Three months ago I joined the company having never used the tool and with no idea what a merge request or an issue was. I’d never touched Git or pushed a commit, and I certainly had never owned a laptop with Docker on it.\n\nIf you’re like me, fear not. Here’s everything you need to know to jump right in.\n\n## It’s an issue\n\nLet’s start with the thing that confused me the most in the first weeks – issues. An [issue](/handbook/communication/#issues) is something you create if you want to start an initiative, or simply keep track of an idea. Derived from the software development space (obviously), it’s like the starting point in any work-related conversation. Have a great idea for a new GitLab feature? Open an issue. Have an idea for a marketing campaign? Start an issue. Anyone can chime in on your issue and it becomes a place to not only have a conversation but also to keep track of the conversation. At GitLab we call all that “chiming in” collaboration. [Collaboration](https://handbook.gitlab.com/handbook/values/#collaboration) is central to the company’s culture and our mission [“everyone can contribute.”](/blog/how-do-you-contribute/) Issues are sort of the file folders we store all that collaboration in. (And, because you might hear this term and wonder about it, as I did...an [“epic”](https://docs.gitlab.com/ee/user/group/epics/) is a collection of related issues, sort of how a filing cabinet holds file folders, to use a very old school analogy.)\n\n## Lanes merge ahead\n\nA [merge request](/handbook/communication/#start-with-a-merge-request) is a formalized way to request something (usually in the [GitLab handbook](/handbook/) or [blog](/blog/)) be created or changed. Creating a merge request triggers GitLab.com to rebuild the entire website (which is both cool and sort of scary the first few times you do it). When you submit a merge request you’ll get a message that says the pipeline is running, meaning the process of rebuilding the entire website has begun. That’s not a small undertaking, so it can take 15 minutes, or more, for your merge request to go through. If it does go through, you’ll get a message that says “passed with warnings!” Ignore the “warnings” – builds always pass with warnings. These warnings are usually not relevant if you're not contributing code. The key thing is it passed. (Speaking from personal experience, refreshing the page or simply staring at the “pipeline running” message doesn’t actually make it go faster.)\n\nNotice the term is merge *request.* That means once it’s passed you’ll need to ask someone who has magical merging powers to actually merge it (usually your manager). You do that by assigning the request to them (top right of the MR form) and leaving them a comment asking them to do so.\n\n## All aboard\n\nYou’ll get a big [onboarding](/handbook/people-group/general-onboarding/) issue on day one. Do not panic. Take your time. And realize that some of what you’re doing will only make sense in a month, or even a few months (like all that time I spent downloading Git).\n\nMost of the onboarding tasks are very straightforward and helpful. But ultimately you’ll have to add yourself to the [team page](/company/team/), creating your first merge request in the process. Anything involving the team page can be very tricky because it is based on `.yml` files (cranky, touchy things that are pronounced a little like the vegetable, “yaml”) so do not be afraid to ask for help. The #mr-buddies, #git-help, or #questions channels in Slack can be great resources. You’ll want to remember to use “command F” to search through the hundreds of files on the team page to find your entry.\n\nDon’t worry – no matter how much of a struggle it is to add yourself to the team page, you’re unlikely to actually “break” anything on [about.gitlab.com](/). (I’ll freely admit it took me *several days* to accomplish this one task… )\n\n## Communication\n\nIn an all-remote company, communication is vital. But *how* to communicate at GitLab doesn’t necessarily come naturally to someone like me who came from an email and phone call culture. Our communication methods are [spelled out in the handbook](/handbook/communication/#introduction), but here’s the quick version: You want to communicate primarily within GitLab. That means within an issue – tag someone with their GitLab “handle” (@vsilverthorne as an example) – in the discussion box. Or the same thing can happen in a merge request. Whoever you tag will get a notification in their To-do list on GitLab, and may also be notified via email. But speaking as someone who’s been pointed in the right direction after using Slack or email instead of GitLab, trust me when I say _within_ GitLab is the first and best way to communicate.\n\nIf it’s urgent, [Slack](/handbook/communication/#slack) can be a good choice. Slack is also a great place to ask questions, chit-chat with colleagues and/or share common interests. GitLab has lots of groups on Slack for everything from crafty people to gardeners. Email is the last choice because much of the company checks it only occasionally.\n\n## Meetup IRL or virtually\n\nThe [video call on Zoom](/handbook/communication/#video-calls) is another key GitLab practice and although I was a little skeptical it could be more effective than a phone call, I’m now a convert. Not only do you get to know people better because you can see them, the ability to screen share is invaluable, particularly when you’re learning something new. I never feel “camera ready” though, so if you feel that way, you’re far from alone. Luckily, there is a function on Zoom called \"Touch up my appearance.\" It's like FaceTune for the workplace instead of Instagram. Just go into Zoom>Preferences>Video and under My Video check \"Touch up my appearance.\" This way your dark circles won't be making an appearance in the latest video on [GitLab Unfiltered](https://www.youtube.com/channel/UCMtZ0sc1HHNtGGWZFDRTh5A).\n\nIf meetups are possible in real life, I’d suggest those too. At an all-remote company you do have to put time and energy into feeling like you’re part of the team.\n\nAre there other challenges you’ve encountered when you were brand new to GitLab that would have been helped by a clearer or more detailed explanation? Let us know and we’ll update this blog post (and the handbook).\n\nCover image by [Charlotte Karlsen](https://unsplash.com/@charlottemsk?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com)\n{: .note}\n",[774,9,1200],"remote work",{"slug":1202,"featured":6,"template":687},"gitlab-for-the-non-technical","content:en-us:blog:gitlab-for-the-non-technical.yml","Gitlab For The Non Technical","en-us/blog/gitlab-for-the-non-technical.yml","en-us/blog/gitlab-for-the-non-technical",{"_path":1208,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1209,"content":1215,"config":1222,"_id":1224,"_type":13,"title":1225,"_source":15,"_file":1226,"_stem":1227,"_extension":18},"/en-us/blog/gitlab-kubernetes-agent-on-gitlab-com",{"title":1210,"description":1211,"ogTitle":1210,"ogDescription":1211,"noIndex":6,"ogImage":1212,"ogUrl":1213,"ogSiteName":672,"ogType":673,"canonicalUrls":1213,"schema":1214},"A new era of Kubernetes integrations on GitLab.com","The GitLab Agent for Kubernetes enables secure deployments from GitLab SaaS to your Kubernetes cluster and provides deep integrations of your cluster to GitLab.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681920/Blog/Hero%20Images/kubernetes.png","https://about.gitlab.com/blog/gitlab-kubernetes-agent-on-gitlab-com","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"A new era of Kubernetes integrations on GitLab.com\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Viktor Nagy\"}],\n        \"datePublished\": \"2021-02-22\",\n      }",{"title":1210,"description":1211,"authors":1216,"heroImage":1212,"date":1218,"body":1219,"category":705,"tags":1220},[1217],"Viktor Nagy","2021-02-22","\n\nThe GitLab Agent for Kubernetes (\"Agent\", for short) provides a secure connection between a GitLab instance and a Kubernetes cluster and allows pull-based deployments to receive alerts based on the network policies. We released the first version of the Agent back in September on self-managed GitLab instances. We are happy to announce that the Agent is available on GitLab SaaS, GitLab.com, and has many more features coming soon.\n\nIf you run into any issues with the Agent or would like to provide feedback, please, [contribute in the Agent epic](https://gitlab.com/groups/gitlab-org/-/epics/3329).\n{: .alert .alert-warning}\n\n## Why a new era?\n\nBefore, the recommended way to attach a cluster to GitLab was to provide the cluster certificates and to open up the Kube API to GitLab.com. To get the most out of the integrations, we recommended attaching the cluster with `cluster-admin` rights, so GitLab could provision new namespaces and create review apps. But many users found this to be overly risky and instead rolled out custom integrations that were often built around the GitLab Runner. We want to simplify and support security-minded users with the GitLab Agent for Kubernetes and provide them with a safe, reliable, and future-proof integration solution between GitLab and their clusters. The GitLab Agent provides a secure connection between the cluster and GitLab. Access rights can be controlled with the Agent more tightly by our users, and we consider it to be the basis for future Kubernetes integrations with GitLab.\n\nWhen Kubernetes was just starting to get popular, our initial approach served new Kubernetes users well. At the same time, providing `cluster-admin` rights is not an option for many current users with experienced Site Reliability Engineers (SREs) and Platform Engineers on board. In the past few years, thanks to the certificate-based integrations, we have learned a lot about the needs of GitLab users, and we are leveraging these learnings with the Agent.\n\n## How does the Agent work?\n\nThe Agent provides a permanent connection using websockets or gRPC between a Kubernetes cluster and a GitLab instance. Since we want to keep the cluster-side component minimal and lightweight, we imagine multiple Agents being installed into the same cluster with different access levels. Still, this integration is complex. To understand how the Agent works, let me first introduce its major components. The whole Agent experience is made possible primarily by two components that we call `agentk` and `kas` (short for GitLab Agent Server). `agentk` is the cluster-side component that has to be deployed in the cluster, while `kas` is the GitLab server-side component that is managed alongside GitLab. Since we want to keep the cluster-side component as slim as possible, `kas` is responsible for much of the heavy lifting.\n\nThe Agent is configured in code, then registered with GitLab through an access token. Once installed in the cluster, `agentk` receives the access token and the `kas` endpoint and authenticates itself with GitLab. Subsequently, it retrieves its own configuration from GitLab, and keeps a connection open between `kas` and the cluster. This way both the agent and GitLab can send messages and receive information from the other party through a secure connection. This approach also allows a Kubernetes cluster sitting behind a firewall to be securely integrated with GitLab.com.\n\n## Getting started\n\n### About the Agent's availability\n\nIf you would like to try out the Agent on GitLab.com, `kas` is already installed and is managed by our SRE team. Before making the Agent generally available, we want to make sure that Agent-based workflows won't harm the performance of GitLab.com. This is why, at this time, `kas` is only available for select customers and projects. If you would like to try it out, [reach out to me](/company/team/#nagyv-gitlab) in e-mail or by mentioning me in an issue with your project ID, and we will authorize your project.\n\nGitLab's `kas` instance is available at `wss://kas.gitlab.com`. You will have to provide this value together with a registered agent access token when you deploy `agentk` to your cluster. You can [follow the installation instructions from our documentation](https://docs.gitlab.com/ee/user/clusters/agent/#define-a-configuration-repository) starting with defining a configuration repository.\n\n### How deployments work\n\nIf you prefer a video walk-through, we demonstrate how pull-based deployments work with the Agent below.\n\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube-nocookie.com/embed/17O_ARVaRGo\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\nFor deployments, we share some codebase with ArgoCD since this part of the Agent is built on the [gitops-engine](https://github.com/argoproj/gitops-engine/). The `gitops-engine` provides a simple tool to keep git repositories synced with cluster resources. The Agent is configured in code. What we call the \"agent configuration project\" references the repositories containing the Kubernetes manifests which are the resource definitions describing the expected state of your cluster. Whenever these manifests change, the Agent automatically pulls the new configuration and applies it in the cluster.\n\n#### An example using Helm\n\nToday, the GitLab Agent for Kubernetes only supports pull-based deployments, but we are working on connecting it with GitLab CI to also provide push-based deployment support. So far, we have created a simple example repository that shows how someone might use the Agent together with Helm to install the GitLab Runner in their cluster.\n\nOne critique of Helm is that you might get different deployments without changing anything in the code you manage. We want to make sure that your manifest projects reflect what is expected to be deployed in your cluster. This is why we recommend that you use GitLab CI to generate and commit the final Kubernetes manifests from your preferred templating tool, and let the Agent take care of deploying the rendered templates. We follow this pattern in the example repository too.\n\n### Kubernetes network security alerts\n\nIn [GitLab 13.9](/releases/2021/02/22/gitlab-13-9-released/) we are [shipping an integration with Cilium built on top of the Agent](/releases/2021/02/22/gitlab-13-9-released/#configmap-support-for-kubernetes-agent-server). The integration provides a simple way to generate network policy-related alerts and to surface those alerts in GitLab. Watch the video below for a demo:\n\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube-nocookie.com/embed/mFpXUvcAT1g\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\n## Ongoing developments\n\nWhile we think that the Agent can already bring great value to Silver and Gold-level GitLab users, we are working constantly to build even more features on top of it.\n\nOur primary focus now is to make the Agent generally available on GitLab.com SaaS. We are also working on a set of features that allows a user to connect GitLab CI with clusters securely using the Agent. This allows existing push-based deployments to start easily using the Agent and the integrations coming with it.\n\nWe are excited to see how you will benefit from the Agent and what amazing things you will build with it.\n\n## Read more on Kubernetes:\n\n- [How to install and use the GitLab Kubernetes Operator](/blog/gko-on-ocp/)\n\n- [Threat modeling the Kubernetes Agent: from MVC to continuous improvement](/blog/threat-modeling-kubernetes-agent/)\n\n- [How to deploy the Agent with limited permissions](/blog/setting-up-the-k-agent/)\n\n- [Understand Kubernetes terminology from namespaces to pods](/blog/kubernetes-terminology/)\n\n- [What we learned after a year of GitLab.com on Kubernetes](/blog/year-of-kubernetes/)\n",[1221,9,774],"kubernetes",{"slug":1223,"featured":6,"template":687},"gitlab-kubernetes-agent-on-gitlab-com","content:en-us:blog:gitlab-kubernetes-agent-on-gitlab-com.yml","Gitlab Kubernetes Agent On Gitlab Com","en-us/blog/gitlab-kubernetes-agent-on-gitlab-com.yml","en-us/blog/gitlab-kubernetes-agent-on-gitlab-com",{"_path":1229,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1230,"content":1236,"config":1243,"_id":1245,"_type":13,"title":1246,"_source":15,"_file":1247,"_stem":1248,"_extension":18},"/en-us/blog/gitlab-now-supports-sha256-repositories",{"title":1231,"description":1232,"ogTitle":1231,"ogDescription":1232,"noIndex":6,"ogImage":1233,"ogUrl":1234,"ogSiteName":672,"ogType":673,"canonicalUrls":1234,"schema":1235},"GitLab now supports SHA256 repositories","Try this experimental security feature to create test projects.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749667390/Blog/Hero%20Images/blog-image-template-1800x945__19_.png","https://about.gitlab.com/blog/gitlab-now-supports-sha256-repositories","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"GitLab now supports SHA256 repositories\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"John Cai\"}],\n        \"datePublished\": \"2024-08-19\",\n      }",{"title":1231,"description":1232,"authors":1237,"heroImage":1233,"date":1238,"body":1239,"category":1240,"tags":1241},[896],"2024-08-19","Previously, we announced how GitLab [supports SHA256 repositories on\nthe backend in Gitaly](https://about.gitlab.com/blog/sha256-support-in-gitaly/). Now, we've added the ability to create new GitLab projects with the SHA256 hashing algorithm.\n\nYou can do so on the project creation page under “Experimental settings.”\n\n**Note: This feature is experimental and should only be used to create test projects.**\n\nWhile experimenting with this security feature, if you find any anomalies in the application,\nplease help us out and [file an issue with your feedback](https://gitlab.com/gitlab-org/gitlab/-/issues/new?issuable_template=SHA256%20Bug).\n","bulletin-board",[1121,9,183,1242],"features",{"slug":1244,"featured":6,"template":687},"gitlab-now-supports-sha256-repositories","content:en-us:blog:gitlab-now-supports-sha256-repositories.yml","Gitlab Now Supports Sha256 Repositories","en-us/blog/gitlab-now-supports-sha256-repositories.yml","en-us/blog/gitlab-now-supports-sha256-repositories",{"_path":1250,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1251,"content":1257,"config":1262,"_id":1264,"_type":13,"title":1265,"_source":15,"_file":1266,"_stem":1267,"_extension":18},"/en-us/blog/gitlabs-contributions-to-git-2-44-0",{"title":1252,"description":1253,"ogTitle":1252,"ogDescription":1253,"noIndex":6,"ogImage":1254,"ogUrl":1255,"ogSiteName":672,"ogType":673,"canonicalUrls":1255,"schema":1256},"GitLab's contributions to Git 2.44.0","Find out the topics that GitLab’s Git team – as well as the wider community – contributed to the latest Git release, including fast scripted rebases via git-replay.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749666069/Blog/Hero%20Images/AdobeStock_639935439.jpg","https://about.gitlab.com/blog/gitlabs-contributions-to-git-2-44-0","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"GitLab's contributions to Git 2.44.0\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Patrick Steinhardt\"}],\n        \"datePublished\": \"2024-02-26\",\n      }",{"title":1252,"description":1253,"authors":1258,"heroImage":1254,"date":1259,"body":1260,"category":680,"tags":1261},[677],"2024-02-26","The Git project recently released [Git 2.44.0](https://git-scm.com/downloads). In this blog post, we will highlight the contributions made by GitLab's Git team, as well as those from the wider Git community.\n\n## Fast scripted rebases via `git-replay`\n\nThe `git-rebase` command can be used to reapply a set of commits onto a different base commit. This can be quite useful when you have a feature branch where the main branch it was originally created from has advanced since creating the feature branch.\n\nIn this case, `git-rebase` can be used to reapply all commits of the feature branch onto the new commits of the main branch.\n\nSuppose you have the following commit history with the main development branch `main` and your feature branch `feature`:\n\n![main and feature branch](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678099/Blog/Content%20Images/Screenshot_2024-02-20_at_2.15.37_PM.png)\n\nYou have originally created your feature branch from `m-2`, but since then the `main` branch has gained two additional commits. Now `git-rebase` can be used to reapply your commits `f-1` and `f-2` on top of the newest commit `m-4`:\n\n![applying git-rebase](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678099/Blog/Content%20Images/Screenshot_2024-02-20_at_2.16.28_PM.png)\n\nYou can see this functionality in GitLab when you create a merge request. When you want to reapply the commits of your merge request onto new commits in the target branch, all you have to do is [to create a comment that contains the `/rebase` command](https://docs.gitlab.com/ee/topics/git/git_rebase.html#rebase-from-the-ui). The magic then happens behind the scenes.\n\nThere is one problem though: `git-rebase` only works on repositories that have a worktree (a directory where a branch, tag or commit has been checked out). The repositories we host at GitLab are “bare” repositories, which don’t have a worktree. This means that the files and directories tracked by your commits are only tracked as Git objects in the `.git` directory of the repository. This is mostly done to save precious disk space and speed up operations.\n\nIn the past, we used [libgit2](https://libgit2.org/) to implement rebases. But for various reasons, we decided to remove this dependency in favor of only using Git commands to access Git repositories. But this created a problem for\nus because we could neither use libgit2 nor `git-rebase` to perform rebases. While we could create an ad-hoc worktree to use `git-rebase`, this would have been prohibitively expensive in large monorepos.\n\nLuckily, [Elijah Newren](https://www.linkedin.com/in/elijah-newren-0a41665/) has upstreamed a new merge algorithm called `merge-ort` in Git 2.33. Despite being significantly faster than the old `recursive` merge strategy in almost all cases, it also has the added benefit that it can perform merges in-memory. In practice, this also allows us to perform such rebases in-memory.\n\nEnter `git-replay`, which is a new command that does essentially the same thing as `git-rebase` but in-memory, thus not requiring a worktree anymore. This is an\nimportant building block to allow us to develop faster rebasing of merge requests in the future.\n\nYou may ask: Why a new command instead of updating `git-rebase`? The problem here was that `git-rebase` is essentially a user-focused command (also called a\n\"porcelain\" command in Git). Thus it performs several actions that are not required by a script at all, like, for example, executing hooks or checking out files into the worktree. The new `git-replay` command is a script-focused\ncommand (also called a \"plumbing\" command in Git) and has a different set of advantages and drawbacks. Furthermore, besides doing rebases, we plan to use it to do cherry-picks and reverts in the future, too.\n\nThis topic was a joint effort by [Elijah Newren](https://www.linkedin.com/in/elijah-newren-0a41665/) and\n[Christian Couder](https://www.gitlab.com/chriscool).\n\n## Commit-graph object existence checks\n\nYou may know that each commit can have an arbitrary number of parents:\n\n- The first commit in your repository has no parents. This is the \"root\" commit.\n- Normal commits have a single parent.\n- Merge commits have at least two, but sometimes even more than two parents.\n\nThis parent relationship is part of what forms the basis of Git's object model and establishes the object graph. If you want to traverse this object graph, Git must look up an entry point commit and from there walk the parent chain of commits.\n\nTo fully traverse history from the newest to the oldest commit, you must look up and parse all commit objects in between. Because repositories can consist of hundreds of thousands or even millions of such commits, this can be\nquite an expensive operation. But users of such repositories still want to be able to, for example, search for a specific commit that changes a specific file\nwithout waiting several minutes for the search to complete.\n\nThe Git project introduced a commit-graph data structure a long time ago that essentially caches a lot of the parsed information in a more accessible data structure. This commit-graph encodes the parent-child relation and some additional information, like, for example, a [bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) of changed\nfiles.\n\nThis commit-graph is usually updated automatically during repository housekeeping. Because housekeeping only runs every so often, the commit-graph can be missing entries for recently added commits. This is perfectly fine and expected to happen, and Git knows to instead look up and parse the commit object in such a case.\n\nNow, the reverse case also theoretically exists: The commit-graph contains cached information of an object that does not exist anymore because it has been deleted without regenerating the commit-graph. The consequence would\nbe that lookups of this commit succeed even though they really shouldn't. To avoid this, in Git 2.43.0, we upstreamed a change into Git that detects commits\nthat exist in the commit-graph but no longer in the object database.\n\nThis change requires us to do an existence check for every commit that we parse via the commit-graph. Naturally, this change leads to a performance regression, which was measured to be about 30% in the worst case. This was\ndeemed acceptable though, because it is better to return the correct result slowly than to return the wrong result quickly. Furthermore, the commit-graph still results in a significant performance improvement compared to not using the commit-graph at all. To give users an escape hatch in case they do not want this performance regression, we also introduced a `GIT_COMMIT_GRAPH_PARANOIA` environment variable that can be used to disable this check.\n\nAfter this change was merged and released though, we heard of cases where the impact was even worse than 30%: counting the number of commits via `git rev-list --count` in the Linux repository regressed by about 100%. After some\ndiscussion upstream, we changed the default so that we no longer verify commit existence for the commit-graph to speed up such queries again. Because repository housekeeping should ensure that commit-graphs are consistent, this change should stop us from needlessly pessimizing this uncommon case.\n\nThis change was implemented by\n[Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Making Git ready for a new ref backend\n\nA common theme among our customers is that large monorepos with many refs create significant performance problems with many workloads. The range of problems here are manyfold, but the more refs a repository has, the more pronounced the problems become.\n\nMany of the issues are inherent limitations of the way Git stores refs. The so-called `files` ref backend uses a combination of two mechanisms:\n- \"Loose refs\" are simple files that contain the object ID they point to.\n- \"Packed refs\" are a single file that contains a collection of refs.\n\nWhenever you update or create a ref, Git creates them as a loose ref. Every once in a while, repository housekeeping then compresses all loose refs into the `packed-refs` file and deletes the corresponding loose refs. A typical repo looks as follows:\n\n```shell\n $ git init --ref-format=files repo\nInitialized empty Git repository in /tmp/repo/.git/\n $ cd repo/\n $ git commit --allow-empty --message \"initial commit\"\n $ tree .git/\n.git/\n├── config\n├── HEAD\n├── index\n└── refs\n\t├── heads\n\t│   └── main\n\t└── tags\n $ cat .git/HEAD\nref: refs/heads/main\n $ cat .git/refs/heads/main\nbf1814060ed3a88bd457ac4dca055d000ffe4482\n\n $ git pack-refs --all\n $ cat .git/packed-refs\n# pack-refs with: peeled fully-peeled sorted\nbf1814060ed3a88bd457ac4dca055d000ffe4482 refs/heads/main\n```\n\nWhile this model has served the Git project quite well, relying on a filesystem like this has several limitations:\n- Deleting a single ref requires you to rewrite the `packed-refs` file, which can be gigabytes in size.\n- It is impossible to do atomic reads because you cannot atomically scan multiple files at once when a concurrent writer may modify some refs.\n- It is impossible to do atomic writes because creating or updating several refs requires you to write to several files.\n- Housekeeping via `git-pack-refs` does not scale well because of its all-into-one repacking nature.\n- The storage format of both loose and packed refs is inefficient and wastes disk space.\n- Filesystem-specific behavior can be weird and may restrict which refs can be created. For example, Case-insensitivity on filesystems like FAT32 can cause issues, when trying to create two refs with the same name that only differ in their case.\n\nSeveral years ago, [Shawn Pearce](https://sfconservancy.org/blog/2018/jan/30/shawn-pearce/) had proposed the \"reftable\" format as an alternative new format to store refs in a repository. This new format was supposed to help with most or all of the above issues and is essentially a\nbinary format specifically catered towards storing references in Git.\n\nThis new \"reftable\" format has already been implemented by\n[JGit](https://www.eclipse.org/jgit/) and is used extensively by the [Gerrit project](https://www.gerritcodereview.com/). And, in 2021, [Han-Wen Nienhuys](https://www.linkedin.com/pub/dir/han-wen/nienhuys) upstreamed a library to read and write reftables into the Git project. What is still missing though is the backend that ties together the reftable library and\nGit, and unfortunately progress has stalled here. As we experience much of the pain that the reftable format is supposed to address, we decided to take over the work from Han-Wen and continue the upstreaming process.\n\nBefore we can upstream the reftable backend itself though, we first had to prepare several parts of Git for such a new backend. While the Git project already has a concept of different ref backends, the boundaries were very blurry because until now there only exists a single \"files\" backend.\n\nThe biggest contribution by GitLab in this release was thus a joint effort to prepare all the parts of Git for the new backend that were crossing boundaries:\n- Some commands used to read or write refs directly via the filesystem without going through the ref backend.\n- The ref databases of worktrees created via `git-worktree` were initialized ad-hoc instead of going through the ref backend.\n- Cloning a repository created the ref database with the wrong object format when using SHA256. This did not matter with the \"files\" backend because the format was not stored anywhere by the ref backend itself. But because the reftable backend encodes the format into its binary format, this was a problem.\n- Many tests read or write refs via the filesystem directly.\n- We invested quite some time already into bug fixing and performance optimizations for the reftable library itself.\n- We introduced a new `refStorage` extension that tells Git in which format the repository stores its refs. This can be changed when creating a new repository by specifying `--ref-format` flag in `git-init` or `git-clone`. For now, only the “files” format is supported.\n\nThe overarching goal was to get the work-in-progress reftable backend into a state where it passes the complete test suite. And even though the reftable backend is not yet part of Git 2.44.0, I am happy to report that we have\nsucceeded in this goal: Overall, we have contributed more than 150 patches to realize it. Given the current state, we expect that the new reftable backend will become available with Git v2.45.0.\n\nWe will not cover the new reftable format in this post because it is out of scope, but stay tuned for more details soon!\n\nThis project was a joint effort by\n[John Cai](https://gitlab.com/jcaigitlab),\n[Justin Tobler](https://gitlab.com/justintobler),\n[Karthik Nayak](https://gitlab.com/knayakgl),\n[Stan Hu](https://gitlab.com/stanhu),\n[Toon Claes](https://gitlab.com/toon),\nand [Patrick Steinhardt](https://gitlab.com/pks-gitlab), who has led the effort. Credit also goes to\n[Shawn Pearce](https://sfconservancy.org/blog/2018/jan/30/shawn-pearce/) as original inventor of the format and [Han-Wen Nienhuys](https://www.linkedin.com/pub/dir/han-wen/nienhuys) as the\nauthor of the reftable library.\n\n## Support for GitLab CI\n\nAs all the preparations for the new `reftable` backend demonstrate, we have significantly increased our investments into the long-term vision and health of\nthe Git project. And because a very important part of our product depends on the Git project to remain healthy, we want to continue investing into the Git project like this.\n\nFor us, this means that it was high time to improve our own workflows in the context of the Git project. Naturally, we were already using GitLab CI as part of the process instead of the GitHub Workflows support that existed in\nthe Git project. But we were using a [`.gitlab-ci.yml` definition](https://docs.gitlab.com/ee/ci/yaml/) that was not part of the upstream repository and instead maintained outside the Git project.\n\nWhile this worked reasonably well, there were two significant downsides:\n- Test coverage was significantly lower than that of the GitHub Workflows definition. Notably, we did not test on macOS, had no static analysis, and didn't test with non-default settings. This often led to failures in the GitHub Workflows pipeline that we could have detected earlier if we had better CI integration.\n- Other potential contributors to Git who may already be using GitLab on a daily basis didn't have easy access to a GitLab CI pipeline.\n\nTherefore, we decided to upstream a new GitLab CI definition that integrates with the preexisting CI infrastructure that the Git project already had. Because we reuse a lot of pre-existing infrastructure, this ensures that both GitLab CI and GitHub Workflows run tests mostly in the same way.\n\nAnother benefit of GitLab CI support is that, for the first time, we now also exercise an architecture other than `x86_64` or `i686`: the [macOS runners we provide at GitLab.com](https://docs.gitlab.com/ee/ci/runners/saas/macos_saas_runner.html) use an Apple M1, which is based on the `arm64` architecture.\n\nThis change was contributed by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## More to come\n\nThis blog post gives just a glimpse into what has happened in the Git project, which lies at the heart of [source code management](https://about.gitlab.com/solutions/source-code-management/) at GitLab. Stay tuned for more insights into future contributions and the reftable backend in particular!",[9,683,773],{"slug":1263,"featured":6,"template":687},"gitlabs-contributions-to-git-2-44-0","content:en-us:blog:gitlabs-contributions-to-git-2-44-0.yml","Gitlabs Contributions To Git 2 44 0","en-us/blog/gitlabs-contributions-to-git-2-44-0.yml","en-us/blog/gitlabs-contributions-to-git-2-44-0",{"_path":1269,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1270,"content":1276,"config":1282,"_id":1284,"_type":13,"title":1285,"_source":15,"_file":1286,"_stem":1287,"_extension":18},"/en-us/blog/goldman-sachs-partners-with-gitlab-for-next-gen-platform-strategies",{"title":1271,"description":1272,"ogTitle":1271,"ogDescription":1272,"noIndex":6,"ogImage":1273,"ogUrl":1274,"ogSiteName":672,"ogType":673,"canonicalUrls":1274,"schema":1275},"Goldman Sachs partners with GitLab for next-gen platform strategies","Goldman Sachs’ George Grant shares how partnering with GitLab has modernized the development ecosystem.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749671845/Blog/Hero%20Images/serverless-ops-blog.jpg","https://about.gitlab.com/blog/goldman-sachs-partners-with-gitlab-for-next-gen-platform-strategies","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Goldman Sachs partners with GitLab for next-gen platform strategies\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Brein Matturro\"}],\n        \"datePublished\": \"2020-01-24\",\n      }",{"title":1271,"description":1272,"authors":1277,"heroImage":1273,"date":1279,"body":1280,"category":748,"tags":1281},[1278],"Brein Matturro","2020-01-24","\n\nMost people know Goldman Sachs as the global investment banking giant, but over the past few years the company has branched out to some pretty modern applications that go beyond the standard financial firm. At GitLab Commit Brooklyn 2019, [George Grant](https://www.linkedin.com/in/george-grant-21a9624), who runs the US SDLC engineering team at Goldman Sachs, explained how they’ve partnered with GitLab to help transform not only their development but the company as a whole.\n\n“It means we have to be a lot more nimble than we were in the past,” Grant says. “Now that we’re developing things that run on people’s iPhones, you need to have a different sort of infrastructure to do that.” The SDLC engineering team drives strategies for the development team, including legacy products, but also newer platforms like budgeting applications and the latest Apple credit card. The team is at the center of every business move within the organization.\n\n## Getting past the “dark times”\nGolman Sachs has about 10 [SDLCs running](/platform/), having grown organically into its own ecosystem over the years for various purposes. “Many of the things that we have at GS were designed in house – its our own workflow, our own tools doing code reviews, surrounding a minimum amount of external tools. Everthing thats involved in it is very tightly coupled with everything else,” Grant says.\n\nThe deployments, the issue tracker, the builds, and the testing are all linked together in order for everything to be controlled in one environment, including regulatory and compliance. This workflow is comfortable and controlled for users, but not ideal. “The problem is, it is sort of simultaneously its greatest strength and greatest weakness because the tightness of the coupling of the components makes it very difficult to replace any of the ones,” Grant says. If any part of the environment needs to be updated or switched out, it impacts all the others.\n\n\n\nThe engineering team started researching a new strategic direction, primarily looking for a modern Git-based solution. The goal was to find a tool that could alleviate developers’ SDLC workload and provide critical strategies for [cloud and Kubernetes](/2017/11/30/containers-kubernetes-basics/), allowing people to move away from the legacy stack. “You actually want to have something that gives you the freedom to innovate, but still have that control level around it.”\n\n## Creating a roadmap with GitLab\nGoldman Sachs chose GitLab as a way to move to the cloud, as an automation tool and to ultimately become the center of the ecosystem. “We didn’t want GitLab to be an island,” Grant says. Within the first two weeks of introducing GitLab, there were over 1600 users, underscoring the push for a new strategic platform.\n\nGitLab users can be innovative without restrictions. Each user group continues to work in their own world of tooling, but in a highly regulated environment. Reduced cycle times are another benefit, according to Grant. “We have one team that used to only be able to do a release every two weeks. Now they can do one and do another one five minutes later if they want to,” he says.\n\nFor an experienced company, the ability to integrate with legacy tools is important. On top of that, GS is embracing DevOps and QA metrics now that they have end-to-end visibility within the ecosystem. The transparency of GitLab allows Goldman Sachs to have input. “We have new ideas and new ways that we want to use the product to drive it strategically within GS,” Grant says.\n\n## Goldman Sachs and GitLab: Better together\nGoldman Sachs and GitLab have established a partnership. “The proof is in the pudding, as they say, and Goldman Sachs was very, very happy to become an investor in GitLab,” Grant says. As users of the tool, Goldman Sachs found it to be a natural investment opportunity. Bottom line, he says, people are demanding to use it more often. “We believe it is the strategic platform to take us into the future.”\n\nTo learn more about Goldman Sach’s implementation strategies, watch George Grant’s presentation from GitLab Commit Brooklyn 2019.\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube.com/embed/Bu3nrxPy1-E\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\nPhoto by [Tomasz Frankowski](https://unsplash.com/@sunlifter?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com/?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)\n{: .note}\n",[728,108,819,9,266],{"slug":1283,"featured":6,"template":687},"goldman-sachs-partners-with-gitlab-for-next-gen-platform-strategies","content:en-us:blog:goldman-sachs-partners-with-gitlab-for-next-gen-platform-strategies.yml","Goldman Sachs Partners With Gitlab For Next Gen Platform Strategies","en-us/blog/goldman-sachs-partners-with-gitlab-for-next-gen-platform-strategies.yml","en-us/blog/goldman-sachs-partners-with-gitlab-for-next-gen-platform-strategies",{"_path":1289,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1290,"content":1296,"config":1303,"_id":1305,"_type":13,"title":1306,"_source":15,"_file":1307,"_stem":1308,"_extension":18},"/en-us/blog/google-summer-of-code-2024-contribute-to-gitlab-and-git-to-prepare",{"title":1291,"description":1292,"ogTitle":1291,"ogDescription":1292,"noIndex":6,"ogImage":1293,"ogUrl":1294,"ogSiteName":672,"ogType":673,"canonicalUrls":1294,"schema":1295},"Google Summer of Code 2024: Contribute to GitLab and Git to prepare","Learning how to contribute to GitLab and Git can help you get ready to apply for Google's program for open source development.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749663000/Blog/Hero%20Images/tanukilifecycle.png","https://about.gitlab.com/blog/google-summer-of-code-2024-contribute-to-gitlab-and-git-to-prepare","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Google Summer of Code 2024: Contribute to GitLab and Git to prepare\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Nick Veenhof\"},{\"@type\":\"Person\",\"name\":\"Christian Couder\"}],\n        \"datePublished\": \"2023-12-20\",\n      }",{"title":1291,"description":1292,"authors":1297,"heroImage":1293,"date":1299,"body":1300,"category":680,"tags":1301},[1298,876],"Nick Veenhof","2023-12-20","Google Summer of Code ([GSoC](https://summerofcode.withgoogle.com/)), a program that helps bring new contributors into open source software development, is just around the corner. So now is the time to start learning [how to contribute to GitLab](https://about.gitlab.com/community/contribute/) or Git and prepare ideas for GSOC 2024. GitLab has participated in GSOC for more than five years of the program's 20-year history, and the mentorship opportunity aligns well with our \"[Everyone can contribute](https://handbook.gitlab.com/handbook/company/mission/)\" mission.\n\nIn 2023, GitLab team members mentored GSoC contributors working on GitLab and Git open source projects throughout the 12-week program. One example was the “Unify ref-filter formats with other --pretty formats” Git project. \n\n## Implementing new formatting options for Git commands\n\nKousik Sanagavarapu was selected as a 2023 GSOC contributor and was mentored by [Christian Couder](https://gitlab.com/chriscool), staff backend engineer on the GitLab Gitaly::Git team.\n\nKousik’s work focused on implementing some [new formatting options for Git commands](https://summerofcode.withgoogle.com/programs/2023/projects/rck3kmq2) like `git branch`, `git tag` and `git for-each-ref`. These commands use a formatting mechanism called the “ref-filter” format. The formatting options Kousik worked on were already available for other commands like `git log`, that use a different formatting mechanism called the “pretty” format. So the work involved porting these options from the “pretty” format to the “ref-filter” format.\n\nThanks to Kousik’s work, it’s now possible to use a number of new placeholders like %(signature), %(authoremail:mailmap), or %(describe) in the –format option of `git branch`, `git tag`, and `git for-each-ref` to get more information about the commits that branches, tags, or refs in general point to. [Read the documentation](https://git-scm.com/docs/git-for-each-ref/2.43.0#_field_names) for a description of these placeholders.\n\nThese improvements are available in the recently released Git 2.43.\n\n## How GSOC works\n\nOpen source organizations who participate – such as GitLab and Git – have to propose projects and provide mentors. Selected contributors are helped by the mentors and paid by Google during 12 or more weeks while they work on their projects. Contributors are evaluated three times by mentors: after a “Community Bonding” period, in the middle of the coding period, and after the coding period for a final evaluation.  \n\n## How to participate as a contributor\n\nTo apply to become a contributor for GSOC 2024, check out the [GSoC website](https://summerofcode.withgoogle.com/) and the [Google Open Source blog](https://opensource.googleblog.com). Interested parties should register [when selected organizations are announced](https://opensource.googleblog.com/2023/02/mentor-organizations-announced-for.html), which will happen in a few months. \n\nContributors will then be selected by the mentors after they have made a small contribution and after they have prepared an application document that details how they plan to achieve the proposed project they want to work on.\n\nProspective contributors can start learning about GitLab or Git right now to be fully ready to make a small contribution and prepare an application. [As Google says](https://opensource.googleblog.com/2023/02/mentor-organizations-announced-for.html), “The most successful applications come from contributors who start preparing now.” \n\nGitLab has a lot of documentation and tutorials [to learn how to contribute](https://about.gitlab.com/community/contribute/), while Git has a [Hacking Git page](https://git.github.io/Hacking-Git/) with a lot of helpful links.\n\n## How GitLab team members participate\n\nGitLab participates in GSOC as an open source organization and team members from different functional areas volunteer to mentor contributors and propose projects for them to work on.  \n\nIn 2023, GitLab team members mentored contributors on a number of GitLab-related projects, including  Pajamas Migration with the GitLab Foundations Team and improving the documentation for the contributor journey to GitLab.\n\n## How Git developers participate\n\nThe Git project also participates in GSoC as an open source organization, and Git developers who are interested in mentoring propose projects, and then select GSoC contributors.\n\nLast summer, in addition to the \"Unify ref-filter formats with other --pretty formats\" project, Git developers proposed the \"[More Sparse Index integrations](https://summerofcode.withgoogle.com/programs/2023/projects/Rkbc1Abe)\" project.\n\n## Mentoring and GitLab \n\nGitLab’s mission is “Everyone can contribute” and we understand that helping potential contributors through mentoring can achieve this goal. In addition to participating in external programs like GSOC and [Outreachy](https://about.gitlab.com/blog/outreachy-sponsorship-winter-2020/), GitLab has internal mentoring programs, including a [CEO Shadow program](https://handbook.gitlab.com/handbook/ceo/shadow/) and a [Mentorship program for women](https://handbook.gitlab.com/handbook/company/culture/inclusion/tmrg-gitlab-women/mentorship-program/).\n\nLearn more about [mentoring at GitLab](https://handbook.gitlab.com/handbook/people-group/learning-and-development/mentor/).",[683,266,1302,9,820],"contributors",{"slug":1304,"featured":6,"template":687},"google-summer-of-code-2024-contribute-to-gitlab-and-git-to-prepare","content:en-us:blog:google-summer-of-code-2024-contribute-to-gitlab-and-git-to-prepare.yml","Google Summer Of Code 2024 Contribute To Gitlab And Git To Prepare","en-us/blog/google-summer-of-code-2024-contribute-to-gitlab-and-git-to-prepare.yml","en-us/blog/google-summer-of-code-2024-contribute-to-gitlab-and-git-to-prepare",{"_path":1310,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1311,"content":1317,"config":1323,"_id":1325,"_type":13,"title":1326,"_source":15,"_file":1327,"_stem":1328,"_extension":18},"/en-us/blog/high-availability-git-storage-with-praefect",{"title":1312,"description":1313,"ogTitle":1312,"ogDescription":1313,"noIndex":6,"ogImage":1314,"ogUrl":1315,"ogSiteName":672,"ogType":673,"canonicalUrls":1315,"schema":1316},"Meet Praefect: The traffic manager making your Git data highly available","This router and transaction manager ensures there are multiple copies of each Git repository available in the event of an outage – no NFS required.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749669204/Blog/Hero%20Images/traffic-intersection.jpg","https://about.gitlab.com/blog/high-availability-git-storage-with-praefect","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Meet Praefect: The traffic manager making your Git data highly available\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Zeger-Jan van de Weg\"}],\n        \"datePublished\": \"2021-01-21\",\n      }",{"title":1312,"description":1313,"authors":1318,"heroImage":1314,"date":1319,"body":1320,"category":726,"tags":1321},[1061],"2021-01-21","\nAs critical software projects grow, scaling infrastructure to make the service [highly available](https://en.wikipedia.org/wiki/High_availability) is key. At GitLab, our biggest struggle in scaling was right in our name: Git.\n\n## The trouble with scaling Git\n\nGit is software that is distributed, but not usually run in a ‘highly available cluster,’ which is what GitLab needs. At first, we solved this with a [boring solution](https://handbook.gitlab.com/handbook/values/#boring-solutions), NFS – which exposes a shared filesystem across multiple machines and generally worked. As we’d soon find out, most NFS appliances were for bulk storage and not fast enough. This led to problems with GitLab’s Git access being slow.\n\nTo solve the speed problem we built [Gitaly, our service that provides high-level RPC access to Git repositories](https://docs.gitlab.com/ee/administration/gitaly/). \n\nWhen we started with [Gitaly v1.0](/blog/the-road-to-gitaly-1-0/), our goal was to remove the need for a network-attached filesystem access for Git data. When that was complete, the next problem to tackle was that all your data is only stored once. So, if you have a server down, or your hard disk dies, or something happens to this one copy, you're in deep trouble until a backup is restored. This is an issue for GitLab.com, but it’s also a big risk for our customers and community.\n\nBack at our [Summit in Cape Town](/company/culture/contribute/previous/#summit-in-cape-town-south-africa) in 2018, the Gitaly team (at the time, that was [Jacob Vosmaer](/company/team/?department=all#jacobvosmaer-gitlab) and me) and some other engineers discussed pursuing a fault-tolerant, highly available system for Git data. For about a month we went back and forth about how we would go about it – ranging from wild ideas to smaller iterations towards what we want. The challenge here was that the ultimate aim is always going to be 100% availability, but you’re never going to make that. So let's aim for a lot of nines (three nines being 99.9%, five being 99.999%, etc.) Ideally, we'd be able to iterate to 10 nines if we wanted to. \n\nEventually we chose the design of a proxy: introduce a new component in the GitLab architecture, which is Praefect, and then route all the traffic through it to Gitaly storage nodes to provide a [Gitaly Cluster](https://docs.gitlab.com/ee/administration/gitaly/praefect.html). Praefect inspects the request and tries to route it to the right Gitaly backend, checks that Gitaly is up, makes sure the copies of your data are up to date, and so on. \n\n## First iteration: Eventual consistency\n\nTo cut the scope, for our first iterations we settled on eventual consistency, which is fairly common – we even use it for some GitLab features. With Git data, if we are behind a minute, it's not a big deal because at GitLab at least 90% of operations on our Git data are just reads, compared to a very small volume of writes. If I run `git pull` and I'm one commit behind master, that's not ideal, but not a deal breaker in most cases. \n\nWith eventual consistency, each repository gets three copies: one primary and two secondary. We replicate your data from the primary to the other copies, so that if your primary is inaccessible, we can at least give you read access to the secondary copies until we recover the primary. There’s a chance the secondaries are one or two commits behind your primary, but it’s better than no access.\n\nWe rolled this out in [13.0](/releases/2020/05/22/gitlab-13-0-released/#gitaly-cluster-for-high-availability-git-storage) as generally available. \n\n## Strong consistency\n\nThe next stage was to work on strong consistency, where all of your three copies are always up to date. \n\nWhen you write to your Git repository, there’s a moment where Praefect says, “OK, I'm going to update branch A from #abc to #cbd.” If all three copies agree on the updates, then Praefect tells everyone to apply this update and now, almost at the same moment in time, they'll update the data to the same thing. Now you've got three copies that are up to date.\n\nSo, if one copy is offline for some reason – let’s say a network partition, or the disk is corrupted – we can serve from the other two copies. Then the data remains available, and you have more time to recover the third copy as an admin. Effectively, while you always have a designated primary, it's actually more like having _three_ primaries, because they are all in the same state. \n\nIf the default state of a system is consistent it requires maintaining this consistency on each mutation to the data that's performed. All possible requests to Gitaly are grouped into two classes: mutators and accessors. Meaning that there was a risk we had to migrate each mutator RPC individually. That would've been a major effort, and if possible, we wanted to push this problem to Git. Gitaly uses Git for the majority of write operations, and was thus the largest common denominator.\n\nSo Git had to become aware of transactions, which ideally isn't part of Git. There are more areas where it would be nice if Git was aware of business logic, but if we're honest with ourselves, it's not really Git's concern: authentication and authorization. At GitLab we use [Git Hooks](https://git-scm.com/docs/githooks.html#_hooks) for that. So the idea [applied and contributed](https://public-inbox.org/git/1de96b96e3448c8f7e7974f7c082fd08d2d14e96.1592475610.git.ps@pks.im/T/#m9ae42f583968aa1d8ca43bd3007333cf51a618cc) (thanks, [Patrick Steinhardt](/company/team/#pks-gitlab)!) was the same: when events happen with Git, execute a hook and allow Gitaly to execute business logic. Through the exit code of the hook, Git is signaled on how to proceed. In Git, these events are updates of any reference (for example, branches or tags). When this happens Git will then allow Gitaly to participate in a [three-phase commit](https://en.wikipedia.org/wiki/Three-phase_commit_protocol) transaction by communicating back to Praefect, and enforce consistency. So we got that released in Git, fixed a bug, and now we’re [rolling it out to almost all write requests](https://gitlab.com/gitlab-org/git/-/issues/79).\n\n## A defensible cost increase\n\nNow strong consistency is great, but we are effectively asking our customers, “Instead of one copy, why don't you triple your storage costs and your server costs and whatnot, and you have zero benefits unless something goes wrong.” That wasn't really appealing for most customers, but now we’ve sweetened the deal with increased performance and making the cost increase more manageable. \n\nSo, if you have three copies of your data that are up to date, then all of them could serve any request that doesn't mutate the data, right? Because you know they're up to date. Right now, [Pavlo](/company/team/?department=gitaly-team#8bitlife) is working on [read distribution, which we are making generally available in 13.8](https://gitlab.com/gitlab-com/www-gitlab-com/-/merge_requests/71960) (coming Jan. 22, 2021). [We rolled it out briefly before](https://gitlab.com/gitlab-com/www-gitlab-com/-/merge_requests/58694), but it didn’t scale as expected, so we’ve worked with QA to mitigate that.\n\nRight now, Praefect is rolled out to a very limited subset of projects on GitLab.com, because running it is expensive already. When I first proposed rolling it out for everyone, it was very quick to calculate that that will triple our Gitaly Clusters – not within the budget at all! So we're trying to iterate towards that goal. The first step is to work on allowing a [variable replication factor](https://docs.gitlab.com/ee/administration/gitaly/praefect.html#variable-replication-factor). It can be expensive to store a lot of data multiple times, so why don't we make it so that you can store some repositories three times and some just one time, and you don't get the guarantees and the availability of those with three copies.\n\n## Challenges and lessons learned\n\nSo we have Praefect, this new component, but it's not installed by default on GitLab Omnibus –\nyou have to enable it yourself. The [GitLab Development Kit](https://gitlab.com/gitlab-org/gitlab-development-kit) uses it as well as the tests on GitLab.com, for GitLab projects, but that wasn’t always the case. When you have an optional part in your architecture, if you’re debugging or talking with customers, there is the additional mental burden of verifying what the architecture looks like. Without it, you can make much quicker assumptions on what's going on and why it's working or why it isn't. Officially, we have deprecated NFS, so it makes sense to make it a required component so we can depend on it being there.\n\nAlso, as we add more features to Praefect, if it’s still optional then some customers get those added benefits and some don’t.\n\n### We should have put it in production sooner\n\nOur first iteration was just proxying the traffic, doing nothing with it, and verifying that it works. We didn't put it in production because it offered nothing to the community. But, it includes new components in your architecture, which our SREs need to know about, and there were a couple of bugs we found out much later. I was hesitant to put something in production that didn't offer anything in return, but if we’d been a little more aggressive with putting it out there – even just for a small subset of projects – we would understand more quickly what we're running, what was working, and what wasn't. \n\n### Applying big architectural changes takes time\n\nIf you ask customers to make giant architectural changes, it's going to take longer than you think. When we released Praefect and Gitaly Clusters in 13.0, it was fairly rough around the edges and some things weren't working as you would expect, but it was a good time to release because now, six months later, we see customers finally starting to implement it. They want to validate, try it out on a subset, and then finally roll it out for their whole GitLab instance. While that took longer than I expected, it's cool to see the numbers going up now, and adoption is growing quite rapidly.\n\n## More than just a traffic manager\n\nPraefect does much more than just inspect the traffic. If Gitaly goes down, ideally you want to notice that before you actually fire a request, which Praefect does. It does failover, so if one fails and it was designated as a primary, then it fails over to a secondary, which is now designated as a primary. \n\nI'm really excited for the next few years and the kind of things we are planning to build in Praefect and what that will deliver to GitLab.com and our customers and community. Where before we didn’t have very granular control over what we were doing or why we were doing it, now we can intercept and optimize.\n\n## What's next\n\nWe're shipping [HA Distributed Reads](https://gitlab.com/gitlab-org/gitaly/-/issues/3334) in GitLab 13.8 (Jan. 22, 2021). For 13.9, we're shooting for [strong consistency in the Gitaly Cluster](https://gitlab.com/groups/gitlab-org/-/epics/1189) and [variable replication factor](https://gitlab.com/groups/gitlab-org/-/epics/3372).\n\nFor GitLab self-managed users, consider enabling Praefect if you have high availability requirements. Visit our [Gitaly Clusters documentation](https://docs.gitlab.com/ee/administration/gitaly/praefect.html) to get started.\n\n_Major thanks to [Rebecca Dodd](/company/team#rebecca) who contributed to this post._\n\nCover image by [Yoel J Gonzalez](https://unsplash.com/@yoeljgonzalez?utm_source=unsplash&amp;utm_medium=referral&amp;utm_content=creditCopyText\") on [Unsplash](https://unsplash.com/s/photos/traffic?utm_source=unsplash&amp;utm_medium=referral&amp;utm_content=creditCopyText)\n{: .note}\n",[1322,9,774,230,683],"testing",{"slug":1324,"featured":6,"template":687},"high-availability-git-storage-with-praefect","content:en-us:blog:high-availability-git-storage-with-praefect.yml","High Availability Git Storage With Praefect","en-us/blog/high-availability-git-storage-with-praefect.yml","en-us/blog/high-availability-git-storage-with-praefect",{"_path":1330,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1331,"content":1337,"config":1343,"_id":1345,"_type":13,"title":1346,"_source":15,"_file":1347,"_stem":1348,"_extension":18},"/en-us/blog/how-to-use-oci-images-as-the-source-of-truth-for-continuous-delivery",{"title":1332,"description":1333,"ogTitle":1332,"ogDescription":1333,"noIndex":6,"ogImage":1334,"ogUrl":1335,"ogSiteName":672,"ogType":673,"canonicalUrls":1335,"schema":1336},"How to use OCI images as the source of truth for continuous delivery","Discover the benefits of using Open Container Initiative images as part of GitOps workflows and the many features GitLab offers to simplify deployments to Kubernetes.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097601/Blog/Hero%20Images/Blog/Hero%20Images/REFERENCE%20-%20Use%20this%20page%20as%20a%20reference%20for%20thumbnail%20sizes_76Tn5jFmEHY5LFj8RdDjNY_1750097600692.png","https://about.gitlab.com/blog/how-to-use-oci-images-as-the-source-of-truth-for-continuous-delivery","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to use OCI images as the source of truth for continuous delivery\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Daniel Helfand\"}],\n        \"datePublished\": \"2025-02-19\",\n      }",{"title":1332,"description":1333,"authors":1338,"heroImage":1334,"date":1340,"body":1341,"category":680,"tags":1342},[1339],"Daniel Helfand","2025-02-19","Is [GitOps](https://about.gitlab.com/topics/gitops/) still GitOps if you are not using a git repository as your deployment artifact? While git remains central to GitOps workflows, storing infrastructure definitions as Open Container Initiative (OCI) artifacts in container registries has seen a rise in adoption as the source for GitOps deployments. In this article, we will dive deeper into the ideas behind this trend and how GitLab features support this enhancement to GitOps workflows.\n\n## What is GitOps?\n\nThe [OpenGitOps](https://opengitops.dev/) project has defined [four principles](https://opengitops.dev/#principles) for the practice of GitOps:\n- A [system managed by GitOps](https://github.com/open-gitops/documents/blob/v1.0.0/GLOSSARY.md#software-system) must have its [desired state expressed declaratively](https://github.com/open-gitops/documents/blob/v1.0.0/GLOSSARY.md#declarative-description).\n- Desired state is stored in a way that enforces immutability and versioning, and retains a complete version history.\n- Software agents automatically pull the desired state declarations from the source.\n- Software agents [continuously](https://github.com/open-gitops/documents/blob/v1.0.0/GLOSSARY.md#continuous) observe actual system state and [attempt to apply the desired state](https://github.com/open-gitops/documents/blob/v1.0.0/GLOSSARY.md#reconciliation).\n\nAn example of GitOps is storing the Kubernetes manifests for a microservice in a GitLab project. Those Kubernetes resources are then continuously reconciled by a [controller](https://kubernetes.io/docs/concepts/architecture/controller/) running on the Kubernetes cluster where the microservice is deployed to. This allows engineers to manage infrastructure using the same workflows as working with regular code, such as opening merge requests to make and review changes and versioning changes. GitOps also has operational benefits such as [preventing configuration drift](https://about.gitlab.com/topics/gitops/#cicd) and helps engineers audit what changes led to certain outcomes with deployments.\n\n## Benefits and limitations of git in GitOps workflows\n\nWhile git is an essential piece of GitOps workflows, git repositories were not designed to be deployed by GitOps controllers. Git does provide the ability for engineers to collaborate on infrastructure changes and audit these changes later on, but controllers do not need to download an entire git repository for a successful deployment. GitOps controllers simply need the infrastructure defined for a particular environment.\n\nAdditionally, an important piece of the deployment process is to [sign and verify deployments](https://docs.sigstore.dev/about/overview/#why-cryptographic-signing) to assure deployment changes to an environment are coming from a trusted source. While git commits can be signed and verified by GitOps controllers, commits may also capture other details not related to the deployment itself (e.g., documentation changes, updates to other environments, and git repository restructuring) or not enough of the deployment picture as a deployment may consist of multiple commits. This again feels like a case this git feature wasn’t designed for.\n\nAnother challenging aspect of git in GitOps workflows is that it can sometimes lead to more automation than expected. Soon after merging a change to the watched branch, it will be deployed. There are no controls in the process outside of git. How can you make sure that nothing gets deployed on a Friday late afternoon? What if teams responsible for deployment do not have permissions to merge changes in certain GitLab projects? Using OCI images adds a pipeline into the process, including all the delivery control features, like [approvals or deploy freezes](https://docs.gitlab.com/ee/ci/environments/protected_environments.html).\n\n## OCI images\n\nThe [Open Container Initiative](https://opencontainers.org/) has helped to define standards around container formats. While most engineers are familiar with building Dockerfiles into container images, many may not be as familiar with storing Kubernetes manifests in a container registry. Because [GitLab’s Container Registry](https://docs.gitlab.com/ee/user/packages/container_registry/) is OCI compliant, it allows for users to push Kubernetes manifests for a particular environment to a container registry. GitOps controllers, such as [Flux CD](https://about.gitlab.com/blog/why-did-we-choose-to-integrate-fluxcd-with-gitlab/), can use the manifests stored in this OCI artifact instead of needing to clone an entire git repository.\n\nOften in GitOps workflows, a git repository can include the infrastructure definitions for all environments that a microservice will be deployed to. By packaging the Kubernetes manifests for only a specific environment, Flux CD can download the minimum files needed to carry out a deployment to a specific environment.\n\n### Security benefits of using OCI artifacts\n\nAs mentioned previously, signing and verifying the artifacts to be deployed to an environment adds an additional layer of security for software projects. After Kubernetes manifests are pushed to a container registry, a tool like [Sigstore Cosign](https://docs.sigstore.dev/quickstart/quickstart-cosign/) can be used to sign the OCI image with a private key that can be securely stored in a GitLab project as a [CI/CD variable](https://docs.gitlab.com/ee/ci/variables/). Flux CD can then use a public key stored on a Kubernetes cluster to verify that a deployment is coming from a trusted source.\n\n## Using GitLab to push and sign OCI images\n\nGitLab offers many features that help simplify the process of packaging, signing, and deploying OCI images. A common way to structure GitLab projects with GitOps workflows is to have separate GitLab projects for microservices’ code and a single infrastructure repository for all microservices. If an application is composed of `n` microservices, this would require having `n +1` GitLab projects for an application.\n\nThe artifact produced by a code project is usually a container image that will be used to package the application. The infrastructure or delivery project will contain the Kubernetes manifests defining all the resources required to scale and serve traffic to each microservice. The artifact produced by this project is usually an OCI image used to deploy the application and other manifests to Kubernetes.\n\nIn this setup, separation of environments is handled by defining Kubernetes manifests in separate folders. These folders represent environments (e.g., development, staging, and production) that will host the application. When changes are made to the code project and a new container image is pushed, all that needs to be done to deploy these changes via GitLab’s integration with Flux CD is to edit the manifests under the environment folder to include the new image reference and open a merge request. Once that merge request is reviewed, approved, and merged, the delivery project’s CI/CD job will push a new OCI image that Flux CD will pick up and deploy to the new environment.\n\n![OCI images - flow chart](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097611/Blog/Content%20Images/Blog/Content%20Images/image1_aHR0cHM6_1750097611046.png)\n\nSigning an OCI image is as simple as including Cosign in your project’s CI/CD job. You can simply generate a new public and private key with Cosign by running the commands below locally. Just make sure to log in to your GitLab instance with the [glab CLI](https://gitlab.com/gitlab-org/cli/#installation) and replace the [`PROJECT_ID`] for the Cosign command with your [delivery project’s ID](https://docs.gitlab.com/ee/user/project/working_with_projects.html#access-a-project-by-using-the-project-id).\n\n```\nglab auth login\ncosign generate-key-pair gitlab://[PROJECT_ID]\n```\n\nOnce the cosign command runs successfully, you can see the Cosign keys added to your project under the CI/CD variables section under the key names `COSIGN_PUBLIC_KEY` and `COSIGN_PRIVATE_KEY`.\n\n### Example CI/CD job\n\nA GitLab CI/CD job for pushing an OCI image will look something like the following:\n\n```yaml\nfrontend-deploy:\n  rules:\n  - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH\n    changes:\n      paths:\n      - manifests/dev/frontend-dev.yaml\n  trigger:\n    include:\n      - component: gitlab.com/components/fluxcd/oci-artifact@0.3.1\n        inputs:\n          version: 0.3.1\n          kubernetes_agent_reference: gitlab-da/projects/tanuki-bank/flux-config:dev\n          registry_image_url: \"oci://$CI_REGISTRY_IMAGE/frontend\"\n          image_tag: dev\n          manifest_path: ./manifests/dev/frontend-dev.yaml\n          flux_oci_repo_name: frontend\n          flux_oci_namespace_name: frontend-dev\n          signing_private_key: \"$COSIGN_PRIVATE_KEY\"\n```\n\nThe [GitLab CI/CD Catalog](https://about.gitlab.com/blog/ci-cd-catalog-goes-ga-no-more-building-pipelines-from-scratch/) offers a GitLab-maintained [CI/CD component for working with OCI artifacts and Flux CD](https://gitlab.com/explore/catalog/components/fluxcd). This component allows development teams to push Kubernetes manifests as OCI images to GitLab’s Container Registry or an external container registry, sign the OCI image using Cosign, and immediately reconcile the newly pushed image via Flux CD.\n\nIn the example above, the Flux CD `component` is included in a `.gitlab-ci.yml` file of a GitLab project. Using the component’s `inputs`, users can define what registry to push the image to (i.e., `registry_image_url` and `image tag`), the file path to Kubernetes manifests that will be pushed (i.e., `manifest_path`), the cosign private key used to sign images (i.e., `signing_private_key`), and the Kubernetes namespace and Flux CD [OCIRepository](https://fluxcd.io/flux/components/source/ocirepositories/) name needed to sync updates to an environment (i.e., `flux_oci_namespace_name` and `flux_oci_repo_name`).\n\nThe `kubernetes_agent_reference` allows GitLab CI/CD jobs to inherit the `kubeconfig` needed to access a Kubernetes cluster without needing to store a `kubeconfig` CI/CD variable in each GitLab project. By setting up the [GitLab agent for Kubernetes](https://docs.gitlab.com/ee/user/clusters/agent/), you can configure all GitLab projects’ CI/CD jobs in a [GitLab group](https://docs.gitlab.com/ee/user/group/) to inherit permissions to deploy to the Kubernetes cluster.\n\nThe agent for Kubernetes context is typically configured wherever you configure the GitLab Agent for Kubernetes in your GitLab group. It is typically recommended that this be done in the project where Flux CD is managed. More information on configuring the agent for CI/CD access can be found in our [CI/CD workflow documentation](https://docs.gitlab.com/ee/user/clusters/agent/ci_cd_workflow.html).\n\nThe variables `$COSIGN_PRIVATE_KEY`, `$FLUX_OCI_REPO_NAME`, and `$FRONTEND_DEV_NAMESPACE` are values stored as CI/CD variables to easily access and mask these sensitive pieces of data in CI/CD logs. The `$CI_REGISTRY_IMAGE` is a variable that GitLab jobs have available by default that specifies the GitLab project’s container registry.\n\n### Deploy OCI images\n\nUsing [Flux CD with your GitLab projects](https://docs.gitlab.com/ee/user/clusters/agent/gitops/flux_tutorial.html), you can automate deployments and signing verification for your microservice’s environments. Once Flux CD is configured to sync from a GitLab project, you could add the following Kubernetes [custom resource definitions](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) to your project to sync your pushed OCI image.\n\n```yaml\napiVersion: v1\nkind: Namespace\nmetadata:\n  name: frontend-dev\n  labels:\n    name: frontend-dev\n---\napiVersion: bitnami.com/v1alpha1\nkind: SealedSecret\nmetadata:\n  name: cosign-public-key\n  namespace: frontend-dev\nspec:\n  encryptedData:\n    cosign.pub: AgAKgLf4VbVzJOmr6++k81LlFayx88AELaUQFNOaXmBF4G+fBfBYeABl0skNvMAa1UrPVNSfMIHgFoYHoO96g576a+epk6V6glOI+++XvYbfsygof3GGxe0nL5Qh2b3ge0fNpyd0kTPSjTj0YUhRhKtMGMRSRw1jrwhNcGxCHK+Byibs52v8Np49KsIkeZKbzLdgYABkrv+k0j7hQM+jR180NpG+2UiRvaXpPuogxkbj61FEqWGrJHk8IVyfl3eh+YhoXxOHGDqko6SUC+bUZPDBlU6yKegO0/8Zq3hwulrSEsEjzRZNK+RFVMOLWWuC6h+WGpYhAMcsZPwjjJ/y29KLNa/YeqkN/cdk488QyEFc6ehCxzhH67HxIn2PDa+KkEOTv2TuycGF+Q00jKIizXF+IwLx/oRb3pTCF0AoAY8D8N3Ey+KfkOjsBON7gGID8GbQiJqX2IgIZxFMk0JRzxbRKOEqn+guLd5Shj7CD1a1Mkk0DxBdbqrGv2XNYUaFPI7xd3rZXUJZlnv+fsmwswsiGWRuXwim45HScWzQnfgLAe7tv3spVEGeaO5apl6d89uN21PBQnfE/zyugB//7ZW9tSp6+CSMyc5HynxI8diafqiwKPgvzLmVWRnkvxJijoXicRr3sCo5RudZPSlnjfd7CKdhwEVvLl7dRR4e/XBMdxCzk1p52Pl+3/kJR+LJii5+iwOpYrpVltSZdzc/3qRd19yMpc9PWpXYi7HxTb24EOQ25i21eDJY1ceplDN6bRtop2quzkjlwVeE2i4cEsX/YG8QBtQbop/3fjiAjKaED3QH3Ul0PECS9ARTScSkcOL3I00Xpp8DyD+xH0/i9wCBRDmH3yKX18C8VrMq02ALSnlP7WCVVjCPzubqKx2LPZRxK9EG0fylwv/vWQzTUUwfbPQZsd4c75bSTsTvxqp/UcFaXA==\n  template:\n    metadata:\n      name: cosign-public-key\n      namespace: frontend-dev\n---\napiVersion: source.toolkit.fluxcd.io/v1beta2\nkind: OCIRepository\nmetadata:\n    name: frontend\n    namespace: frontend-dev\nspec:\n    interval: 1m\n    url: oci://registry.gitlab.com/gitlab-da/projects/tanuki-bank/tanuki-bank-delivery/frontend\n    ref:\n        tag: dev\n    verify:\n      provider: cosign\n      secretRef:\n        name: cosign-public-key\n---\napiVersion: kustomize.toolkit.fluxcd.io/v1\nkind: Kustomization\nmetadata:\n    name: frontend\n    namespace: frontend-dev\nspec:\n    interval: 1m\n    targetNamespace: frontend-dev\n    path: \".\"\n    sourceRef:\n        kind: OCIRepository\n        name: frontend\n    prune: true\n```\n\nThe [`Kustomization`](https://fluxcd.io/flux/components/kustomize/kustomizations/) resource allows for further customization of Kubernetes manifests and also specifies which namespace to deploy resources to. The `OCIRepository` resource for Flux CD allows users to specify the OCI image repository reference and tag to regularly sync from. Additionally, you will notice the `verify.provider` and `verify.secretRef` properties. These fields allow you to verify that the OCI image deployed to the cluster was signed by the corresponding Cosign private key used in the earlier CI/CD job.\n\nThe public key needs to be stored in a [Kubernetes secret](https://kubernetes.io/docs/concepts/configuration/secret/) that will need to be present in the same namespace as the `OCIRepository` resource. To have this secret managed by Flux CD and not store the secret in plain text, you can consider using [SealedSecrets](https://fluxcd.io/flux/guides/sealed-secrets/) to encrypt the value and have it be decrypted cluster side by a controller.\n\nFor a simpler approach not requiring SealedSecrets, you can [deploy the secret via a GitLab CI/CD](https://docs.gitlab.com/ee/user/clusters/agent/getting_started_deployments.html) job using the [`kubectl CLI`](https://kubernetes.io/docs/reference/kubectl/). In the non-sealed secret approach, you would simply remove the SealedSecret included above and run the job to deploy the public key secret before running the job to push the new OCI image. This will make sure the secret is stored securely in GitLab and make sure the secret can be accessed on the cluster by the OCIRepository. While this approach is a bit simpler, just note this is not a suitable approach for managing secrets in production.\n\n## The benefits of OCI, GitLab, and GitOps\n\nOCI artifacts allow for GitOps teams to take deployments even further with added security benefits and allowing for deployments to be minimal. Users still gain all the benefits offered by git as far as having a source of truth for infrastructure and collaborating on projects. OCI images add a packaging approach that improves the deployment aspect of GitOps.\n\nGitLab continues to learn from our customers and the cloud native community on building experiences that help simplify GitOps workflows. To get started using some of the features mentioned in this blog, you can sign up for a [60-day free trial of GitLab Ultimate](https://about.gitlab.com/free-trial/). We would also love to hear from users about their experiences with these tools, and you can provide feedback in the [community forum](https://forum.gitlab.com/t/oci-images-as-source-of-truth-for-gitops-with-gitlab/120965).\n",[108,683,1221,536,9,682],{"slug":1344,"featured":6,"template":687},"how-to-use-oci-images-as-the-source-of-truth-for-continuous-delivery","content:en-us:blog:how-to-use-oci-images-as-the-source-of-truth-for-continuous-delivery.yml","How To Use Oci Images As The Source Of Truth For Continuous Delivery","en-us/blog/how-to-use-oci-images-as-the-source-of-truth-for-continuous-delivery.yml","en-us/blog/how-to-use-oci-images-as-the-source-of-truth-for-continuous-delivery",{"_path":1350,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1351,"content":1357,"config":1364,"_id":1366,"_type":13,"title":1367,"_source":15,"_file":1368,"_stem":1369,"_extension":18},"/en-us/blog/how-we-decreased-gitlab-repo-backup-times-from-48-hours-to-41-minutes",{"title":1352,"description":1353,"ogTitle":1352,"ogDescription":1353,"noIndex":6,"ogImage":1354,"ogUrl":1355,"ogSiteName":672,"ogType":673,"canonicalUrls":1355,"schema":1356},"How we decreased GitLab repo backup times from 48 hours to 41 minutes","Learn how we tracked a performance bottleneck to a 15-year-old Git function and fixed it, leading to enhanced efficiency that supports more robust backup strategies and can reduce risk.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097166/Blog/Hero%20Images/Blog/Hero%20Images/REFERENCE%20-%20display%20preview%20for%20blog%20images%20%282%29_2pKf8RsKzAaThmQfqHIaa7_1750097166565.png","https://about.gitlab.com/blog/how-we-decreased-gitlab-repo-backup-times-from-48-hours-to-41-minutes","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How we decreased GitLab repo backup times from 48 hours to 41 minutes\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Karthik Nayak\"},{\"@type\":\"Person\",\"name\":\"Manuel Kraft\"}],\n        \"datePublished\": \"2025-06-05\",\n      }",{"title":1352,"description":1353,"authors":1358,"heroImage":1354,"date":1361,"body":1362,"category":726,"tags":1363},[1359,1360],"Karthik Nayak","Manuel Kraft","2025-06-05","Repository backups are a critical component of any robust disaster recovery strategy. However, as repositories grow in size, the process of creating reliable backups becomes increasingly challenging.  Our own [Rails repository](https://gitlab.com/gitlab-org/gitlab) was taking 48 hours to back up — forcing impossible choices between backup frequency and system performance. We wanted to tackle this issue for our customers and for our own users internally. \n\nUltimately, we traced the issue to a 15-year-old Git function with O(N²) complexity and fixed it with an algorithmic change, __reducing backup times exponentially__. The result: lower costs, reduced risk, and backup strategies that actually scale with your codebase.\n\nThis turned out to be a Git scalability issue that affects anyone with large repositories. Here's how we tracked it down and fixed it. \n\n## Backup at scale\n\nFirst, let's look at the problem. As organizations scale their repositories and backups grow more complex, here are some of the challenges they can face:\n\n* **Time-prohibitive backups:** For very large repositories, creating a repository backup could take several hours, which can hinder the ability to schedule regular backups. \n* **Resource intensity:** Extended backup processes can consume substantial server resources, potentially impacting other operations.\n* **Backup windows:** Finding adequate maintenance windows for such lengthy processes can be difficult for teams running 24/7 operations.\n* **Increased failure risk:** Long-running processes are more susceptible to interruptions from network issues, server restarts, and system errors, which can force teams to restart the entire very long backup process from scratch.\n* **Race conditions:** Because it takes a long time to create a backup, the repository might have changed a lot during the process, potentially creating an invalid backup or interrupting the backup because objects are no longer available.\n\nThese challenges can lead to compromising on backup frequency or completeness – an unacceptable trade-off when it comes to data protection. Extended backup windows can force customers into workarounds. Some might adopt external tooling, while others might reduce backup frequency, resulting in potential inconsistent data protection strategies across organizations.\n\nNow, let's dig into how we identified a performance bottleneck, found a resolution, and deployed it to help cut backup times.\n\n## The technical challenge\n\nGitLab's repository backup functionality relies on the [`git bundle create`](https://git-scm.com/docs/git-bundle) command, which captures a complete snapshot of a repository, including all objects and references like branches and tags. This bundle serves as a restoration point for recreating the repository in its exact state.\n\nHowever, the implementation of the command suffered from poor scalability related to reference count, creating a performance bottleneck. As repositories accumulated more references, processing time increased exponentially. In our largest repositories containing millions of references, backup operations could extend beyond 48 hours.\n\n### Root cause analysis\n\nTo identify the root cause of this performance bottleneck, we analyzed a flame graph of the command during execution.\n\n![Flame graph showing command during execution](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097176/Blog/Content%20Images/Blog/Content%20Images/image1_aHR0cHM6_1750097176388.jpg)\n\nA flame graph displays the execution path of a command through its stack trace. Each bar corresponds to a function in the code, with the bar's width indicating how much time the command spent executing within that particular function.\n\nWhen examining the flame graph of `git bundle create` running on a repository with 10,000 references, approximately 80% of the execution time is consumed by the `object_array_remove_duplicates()` function. This function was introduced to Git in the [commit b2a6d1c686](https://gitlab.com/gitlab-org/git/-/commit/b2a6d1c686) (bundle: allow the same ref to be given more than once, 2009-01-17).\n\nTo understand this change, it's important to know that `git bundle create` allows users to specify which references to include in the bundle. For complete repository bundles, the `--all` flag packages all references.\n\nThe commit addressed a problem where users providing duplicate references through the command line – such as `git bundle create main.bundle main main` - would create a bundle without properly handling the duplicated main reference. Unbundling this bundle in a Git repository would break, because it tries to write the same ref twice. The code to avoid duplication uses nested `for` loops that iterate through all references to identify duplicates. This O(N²) algorithm becomes a significant performance bottleneck in repositories with large reference counts, consuming substantial processing time.\n\n### The fix: From O(N²) to efficient mapping\n\nTo resolve this performance issue, we contributed an upstream fix to Git that replaces the nested loops with a map data structure. Each reference is added to the map, which automatically ensures only a single copy of each reference is retained for processing.\n\nThis change dramatically enhances the performance of `git bundle create` and enables much better scalability in repositories with large reference counts. Benchmark testing on a repository with 10,000 references demonstrates a 6x performance improvement.\n\n```shell\nBenchmark 1: bundle (refcount = 100000, revision = master)\n  Time (mean ± σ): \t14.653 s ±  0.203 s\t[User: 13.940 s, System: 0.762 s]\n  Range (min … max):   14.237 s … 14.920 s\t10 runs\n\nBenchmark 2: bundle (refcount = 100000, revision = HEAD)\n  Time (mean ± σ):  \t2.394 s ±  0.023 s\t[User: 1.684 s, System: 0.798 s]\n  Range (min … max):\t2.364 s …  2.425 s\t10 runs\n\nSummary\n  bundle (refcount = 100000, revision = HEAD) ran\n\t6.12 ± 0.10 times faster than bundle (refcount = 100000, revision = master)\n```\n\nThe patch was accepted and [merged](https://gitlab.com/gitlab-org/git/-/commit/bb74c0abbc31da35be52999569ea481ebd149d1d) into upstream Git. At GitLab, we backported this fix to ensure our customers could benefit immediately, without waiting for the next Git release.\n\n## The result: Dramatically decreased backup times\n\nThe performance gains from this improvement have been nothing short of transformative:\n\n* **From 48 hours to 41 minutes:** Creating a backup of our largest repository (`gitlab-org/gitlab`) now takes just 1.4% of the original time.\n* **Consistent performance:** The improvement scales reliably across repository sizes.\n* **Resource efficiency:** We significantly reduced server load during backup operations.\n* **Broader applicability:** While backup creation sees the most dramatic improvement, all bundle-based operations that operate on many references benefit.\n\n## What this means for GitLab customers\n\nFor GitLab customers, this enhancement delivers immediate and tangible benefits on how organizations approach repository backup and disaster recovery planning:\n* **Transformed backup strategies**   \n  * Enterprise teams can establish comprehensive nightly schedules without impacting development workflows or requiring extensive backup windows.   \n  * Backups can now run seamlessly in the background during nightly schedules, instead of needing to be dedicated and lengthy.  \n* **Enhanced business continuity**  \n  * With backup times reduced from days to minutes, organizations significantly minimize their recovery point objectives (RPO). This translates to reduced business risk – in a disaster scenario, you're potentially recovering hours of work instead of days.  \n* **Reduced operational overhead**   \n  * Less server resource consumption and shorter maintenance windows.  \n  * Shorter backup windows mean reduced compute costs, especially in cloud environments, where extended processing time translates directly to higher bills.  \n* **Future-proofed infrastructure**   \n  * Growing repositories no longer force difficult choices between backup frequency and system performance.   \n  * As your codebase expands, your backup strategy can scale seamlessly alongside it\n\nOrganizations can now implement more robust backup strategies without compromising on performance or completeness. What was once a challenging trade-off has become a straightforward operational practice.\n\nStarting with the [GitLab 18.0](https://about.gitlab.com/releases/2025/05/15/gitlab-18-0-released/) release, all GitLab customers regardless of their license tier can already fully take advantage of these improvements for their [backup](https://docs.gitlab.com/administration/backup_restore/backup_gitlab/) strategy and execution. There is no further change in configuration required.\n\n## What's next\n\nThis breakthrough is part of our ongoing commitment to scalable, enterprise-grade Git infrastructure. While the improvement of 48 hours to 41 minutes for backup creation time represents a significant milestone, we continue to identify and address performance bottlenecks throughout our stack.\n\nWe're particularly proud that this enhancement was contributed upstream to the Git project, benefiting not just GitLab users but the broader Git community. This collaborative approach to development ensures that improvements are thoroughly reviewed, widely tested, and available to all.\n\n> Deep infrastructure work like this is how we approach performance at GitLab. Join the GitLab 18 virtual launch event to see what other fundamental improvements we're shipping. [Register today!](https://about.gitlab.com/eighteen/)",[9,683,879,684,481],{"slug":1365,"featured":90,"template":687},"how-we-decreased-gitlab-repo-backup-times-from-48-hours-to-41-minutes","content:en-us:blog:how-we-decreased-gitlab-repo-backup-times-from-48-hours-to-41-minutes.yml","How We Decreased Gitlab Repo Backup Times From 48 Hours To 41 Minutes","en-us/blog/how-we-decreased-gitlab-repo-backup-times-from-48-hours-to-41-minutes.yml","en-us/blog/how-we-decreased-gitlab-repo-backup-times-from-48-hours-to-41-minutes",{"_path":1371,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1372,"content":1378,"config":1384,"_id":1386,"_type":13,"title":1387,"_source":15,"_file":1388,"_stem":1389,"_extension":18},"/en-us/blog/how-we-spent-two-weeks-hunting-an-nfs-bug",{"title":1373,"description":1374,"ogTitle":1373,"ogDescription":1374,"noIndex":6,"ogImage":1375,"ogUrl":1376,"ogSiteName":672,"ogType":673,"canonicalUrls":1376,"schema":1377},"How we spent two weeks hunting an NFS bug in the Linux kernel","Here's an in-depth recap of debugging a GitLab issue that culminated in a patch for the Linux kernel.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749672173/Blog/Hero%20Images/nfs-bug-hunt-detective.jpg","https://about.gitlab.com/blog/how-we-spent-two-weeks-hunting-an-nfs-bug","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How we spent two weeks hunting an NFS bug in the Linux kernel\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Stan Hu\"}],\n        \"datePublished\": \"2018-11-14\",\n      }",{"title":1373,"description":1374,"authors":1379,"heroImage":1375,"date":1381,"body":1382,"category":726,"tags":1383},[1380],"Stan Hu","2018-11-14","\n\nUPDATE 2019-08-06: This bug has now been resolved in the following\ndistributions:\n\n* [Red Hat Enterprise Linux 7](https://access.redhat.com/errata/RHSA-2019:2029)\n* [Ubuntu](https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1802585)\n* Linux mainline: Backported to [4.14-stable](https://lkml.org/lkml/2019/8/2/562) and [4.19-stable](https://lkml.org/lkml/2019/8/2/639)\n\nOn Sep. 14, the GitLab support team escalated a critical\nproblem encountered by one of our customers: GitLab would run fine for a\nwhile, but after some time users encountered errors. When attempting to\nclone certain repositories via Git, users would see an opaque `Stale\nfile error` message. The error message persisted for a long time,\nblocking employees from being able to work, unless a system\nadministrator intervened manually by running `ls` in the directory\nitself.\n\nThus launched an investigation into the inner workings of Git and the\nNetwork File System (NFS). The investigation uncovered a bug with the\nLinux v4.0 NFS client and culiminated with a [kernel patch that was written by\nTrond Myklebust](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=be189f7e7f03de35887e5a85ddcf39b91b5d7fc1)\nand [merged in the latest mainline Linux kernel](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=c7a2c49ea6c9eebbe44ff2c08b663b2905ee2c13)\non Oct. 26.\n\nThis post describes the journey of investigating the issue and\ndetails the thought process and tools by which we tracked down the\nbug. It was inspired by the fine detective work in [How I spent two\nweeks hunting a memory leak in Ruby](http://www.be9.io/2015/09/21/memory-leak/)\nby Oleg Dashevskii.\n\nMore importantly, this experience exemplifies how open source software\ndebugging has become a team sport that involves expertise across\nmultiple people, companies, and locations. The GitLab motto \"[everyone can\ncontribute](/company/mission/#mission)\" applies not only to GitLab itself, but also to other open\nsource projects, such as the Linux kernel.\n\n## Reproducing the bug\n\nWhile we have run NFS on GitLab.com for many years, we have stopped\nusing it to access repository data across our application\nmachines. Instead, we have [abstracted all Git calls to\nGitaly](/blog/the-road-to-gitaly-1-0/).\nStill, NFS remains a supported configuration for our customers who\nmanage their own installation of GitLab, but we had never seen the exact\nproblem described by the customer before.\n\n[Our customer gave us a few important clues](https://gitlab.com/gitlab-org/gitlab-ce/issues/51437):\n\n1. The full error message read, `fatal: Couldn't read ./packed-refs: Stale file handle`.\n2. The error seemed to start when they started a manual Git garbage\ncollection run via `git gc`.\n3. The error would go away if a system administrator ran `ls` in the\ndirectory.\n4. The error also would go away after `git gc` process ended.\n\nThe first two items seemed obviously related. When you push to a branch\nin Git, Git creates a loose reference, a fancy name for a file that\npoints your branch name to the commit. For example, a push to `master`\nwill create a file called `refs/heads/master` in the repository:\n\n```bash\n$ cat refs/heads/master\n2e33a554576d06d9e71bfd6814ee9ba3a7838963\n```\n\n`git gc` has several jobs, but one of them is to collect these loose\nreferences (refs) and bundle them up into a single file called\n`packed-refs`. This makes things a bit faster by eliminating the need to\nread lots of little files in favor of reading one large one. For\nexample, after running `git gc`, an example `packed-refs` might look\nlike:\n\n```\n# pack-refs with: peeled fully-peeled sorted\n564c3424d6f9175cf5f2d522e10d20d781511bf1 refs/heads/10-8-stable\nedb037cbc85225261e8ede5455be4aad771ba3bb refs/heads/11-0-stable\n94b9323033693af247128c8648023fe5b53e80f9 refs/heads/11-1-stable\n2e33a554576d06d9e71bfd6814ee9ba3a7838963 refs/heads/master\n```\n\nHow exactly is this `packed-refs` file created? To answer that, we ran\n`strace git gc` with a loose ref present. Here are the pertinent lines\nfrom that:\n\n```\n28705 open(\"/tmp/libgit2/.git/packed-refs.lock\", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666) = 3\n28705 open(\".git/packed-refs\", O_RDONLY) = 3\n28705 open(\"/tmp/libgit2/.git/packed-refs.new\", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666) = 4\n28705 rename(\"/tmp/libgit2/.git/packed-refs.new\", \"/tmp/libgit2/.git/packed-refs\") = 0\n28705 unlink(\"/tmp/libgit2/.git/packed-refs.lock\") = 0\n```\n\nThe system calls showed that `git gc` did the following:\n\n1. Open `packed-refs.lock`. This tells other processes that `packed-refs` is locked and cannot be changed.\n1. Open `packed-refs.new`.\n1. Write loose refs to `packed-refs.new`.\n1. Rename `packed-refs.new` to `packed-refs`.\n1. Remove `packed-refs.lock`.\n1. Remove loose refs.\n\nThe fourth step is the key here: the rename where Git puts `packed-refs`\ninto action. In addition to collecting loose refs, `git gc` also\nperforms a more expensive task of scanning for unused objects and\nremoving them. This task can take over an hour for large\nrepositories.\n\nThat made us wonder: for a large repository, does `git gc` keep the file\nopen while it's running this sweep? Looking at the `strace` logs and\nprobing the process with `lsof`, we found that it did the following:\n\n![Git Garbage Collection](https://about.gitlab.com/images/blogimages/nfs-debug/git-gc-diagram.svg)\n\nNotice that `packed-refs` is closed only at the end, after the potentially\nlong `Garbage collect objects` step takes place.\n\nThat made us wonder: how does NFS behave when one node has `packed-refs`\nopen while another renames over that file?\n\nTo experiment, we asked the customer to run the following experiment on\ntwo different machines (Alice and Bob):\n\n1. On the shared NFS volume, create two files: `test1.txt` and\n`test2.txt` with different contents to make it easy to distinguish them:\n\n    ```bash\n    alice $ echo \"1 - Old file\" > /path/to/nfs/test1.txt\n    alice $ echo \"2 - New file\" > /path/to/nfs/test2.txt\n    ```\n\n2. On machine Alice, keep a file open to `test1.txt`:\n\n    ```bash\n     alice $ irb\n     irb(main):001:0> File.open('/path/to/nfs/test1.txt')\n    ```\n\n3. On machine Alice, show the contents of `test1.txt` continuously:\n\n    ```bash\n    alice $ while true; do cat test1.txt; done\n    ```\n\n4. Then on machine Bob, run:\n\n    ```bash\n    bob $ mv -f test2.txt test1.txt\n    ```\n\nThis last step emulates what `git gc` does with `packed-refs` by\noverwriting the existing file.\n\nOn the customer's machine, the result looked something like:\n\n```\n1 - Old file\n1 - Old file\n1 - Old file\ncat: test1.txt: Stale file handle\n```\n\nBingo! We seemed to reproduce the problem in a controlled way. However,\nthe same experiment using a Linux NFS server did not have this\nproblem. The result was what you would expect: the new contents were\npicked up after the rename:\n\n```\n1 - Old file\n1 - Old file\n1 - Old file\n2 - New file  \u003C--- RENAME HAPPENED\n2 - New file\n2 - New file\n```\n\nWhy the difference in behavior? It turns out that the customer was using\nan [Isilon NFS\nappliance](https://www.dellemc.com/en-us/storage/isilon/index.htm) that\nonly supported NFS v4.0. By switching the mount parameters to v4.0 via\nthe `vers=4.0` parameter in `/etc/fstab`, the test revealed a different\nresult with the Linux NFS server:\n\n```\n1 - Old file\n1 - Old file\n1 - Old file\n1 - Old file \u003C--- RENAME HAPPENED\n1 - Old file\n1 - Old file\n```\n\nInstead of a `Stale file handle`, the Linux NFS v4.0 server showed stale\n*contents*. It turns out this difference in behavior can be explained by\nthe NFS spec. From [RFC\n3010](https://tools.ietf.org/html/rfc3010#page-153):\n\n> A filehandle may or may not become stale or expire on a rename.\n> However, server implementors are strongly encouraged to attempt to keep\n> file handles from becoming stale or expiring in this fashion.\n\nIn other words, NFS servers can choose how to behave if a file is\nrenamed; it's perfectly valid for any NFS server to return a `Stale file\nerror` when that happens. We surmised that even though the results were\ndifferent, the problem was likely related to the same issue. We\nsuspected some cache validation issue because running `ls` in the\ndirectory would \"clear\" the error. Now that we had a reproducible test\ncase, we asked the experts: the Linux NFS maintainers.\n\n## False path: NFS server delegations\n\nWith a clear set of reproduction steps, I [sent an email to the Linux\nNFS mailing list](https://marc.info/?l=linux-nfs&m=153721785231614&w=2)\ndescribing what we had found. Over the week, I went back and forth with\nBruce Fields, the Linux NFS server maintainer, who suggested this was a\nNFS bug and that it would be useful to look at the network traffic. He\nthought there might be an issue with NFS server delegations.\n\n### What is an NFS server delegation?\n\nIn a nutshell, NFS v4 introduced server delegations as a way to speed up file access. A server can\ndelegate read or write access to a client so that the client doesn't\nhave to keep asking the server whether that file has changed by another\nclient. In simpler terms, a write delegation is akin to someone lending\nyou a notebook and saying, \"Go ahead and write in here, and I'll take it\nback when I'm ready.\" Instead of having to ask to borrow the notebook\nevery time you want to write a new paragraph, you have free rein until\nthe owner reclaims the notebook. In NFS terms, this reclamation process\nis called a delegation recall.\n\nIndeed, a bug in the NFS delegation recall might explain the `Stale file\nhandle` problem. Remember that in the earlier experiment, Alice had\nan open file to `test1.txt` when it was replaced by `test2.txt` later.\nIt's possible that the server failed to recall the delegation on\n`test1.txt`, resulting in an incorrect state. To check whether this was\nan issue, we turned to `tcpdump` to capture NFS traffic and used\nWireshark to visualize it.\n\n[Wireshark](https://www.wireshark.org/) is a wonderful open source tool\nfor analyzing network traffic, and it's especially good for viewing NFS\nin action. We captured a trace using the following command on the NFS server:\n\n```\ntcpdump -s 0 -w /tmp/nfs.pcap port 2049\n```\n\nThis command captures all NFS traffic, which typically is on TCP port 2049.\nBecause our experiment worked properly with NFS v4.1 but did not\n with NFS v4.0, we could compare and contrast how NFS behaved\nin a non-working and a working case. With Wireshark, we saw the\nfollowing behavior:\n\n### NFS v4.0 (stale file case)\n\n![NFS v4.0 flow](https://about.gitlab.com/images/blogimages/nfs-debug/nfs-4.0-flow.svg)\n\nIn this diagram, we can see in step 1 Alice opens `test1.txt` and gets\nback an NFS file handle along with a `stateid` of 0x3000. When Bob\nattempts to rename the file, the NFS server tells to Bob to retry via\nthe `NFS4ERR_DELAY` message while it recalls the delegation from Alice\nvia the `CB_RECALL` message (step 3). Alice then returns her delegation\nvia `DELEGRETURN` (step 4), and then Bob attempts to send another\n`RENAME` message (step 5). The `RENAME` completes in both cases, but\nAlice continues to read using the same file handle.\n\n### NFS v4.1 (working case)\n\n![NFS v4.1 flow](https://about.gitlab.com/images/blogimages/nfs-debug/nfs-4.1-flow.svg)\n\nThe main difference happens at the bottom at step 6. Notice in NFS v4.0\n(the stale file case), Alice attempts to reuse the same `stateid`. In\nNFS v4.1 (working case), Alice performs an additional `LOOKUP` and\n`OPEN`, which causes the server to return a different `stateid`. In v4.0,\nthese extra messages are never sent. This explains why Alice continues\nto see stale content because she uses the old file handle.\n\nWhat makes Alice decide to do the extra `LOOKUP`? The delegation recall\nseemed to work fine, but perhaps there was still an issue, such as a\nmissing invalidation step. To rule that out, we disabled NFS delegations\nby issuing this command on the NFS server itself:\n\n```sh\necho 0 > /proc/sys/fs/leases-enable\n```\n\nWe repeated the experiment, but the problem persisted. All this\nconvinced us this wasn't a NFS server issue or a problem with NFS\ndelegations; it was a problem that led us to look into the NFS client\nwithin the kernel.\n\n## Digging deeper: the Linux NFS client\n\nThe first question we had to answer for the NFS maintainers:\n\n### Was this problem still in the latest upstream kernel?\n\nThe issue occurred with both CentOS 7.2 and Ubuntu 16.04 kernels, which\nused versions 3.10.0-862.11.6 and 4.4.0-130, respectively. However, both\nthose kernels lagged the most recent kernel, which was 4.19-rc2 at the\ntime.\n\nWe deployed a new Ubuntu 16.04 virtual machine on Google Cloud Platform\n(GCP), cloned the latest Linux kernel, and set up a kernel development\nenvironment. After generating a `.config` file via `make menuconfig`, we\nchecked two items:\n\n1. The NFS driver was compiled as a module (`CONFIG_NFSD=m`).\n2. The [required GCP kernel settings](https://cloud.google.com/compute/docs/images/building-custom-os)\nwere set properly.\n\nJust as a geneticist would use fruit flies to study evolution in\nreal time, the first item allowed us to make quick changes in the NFS\nclient without having to reboot the kernel. The second item was required\nto ensure that the kernel would actually boot after it was\ninstalled. Fortunately, the default kernel settings had all the settings\nright out of the box.\n\nWith our custom kernel, we verified that the stale file problem still\nexisted in the latest version. That begged a number of questions:\n\n1. Where exactly was this problem happening?\n2. Why was this problem happening with NFS v4.0 but not in v4.1?\n\nTo answer these questions, we began to investigate the NFS [source\ncode](/solutions/source-code-management/). Since we didn't have a kernel debugger available, we sprinkled the\nsource code with two main types of calls:\n\n1. `pr_info()` ([what used to be `printk`](https://lwn.net/Articles/487437/)).\n2. `dump_stack()`: This would show the stack trace of the current function call.\n\nFor example, one of the first things we did was hook into the\n`nfs4_file_open()` function in `fs/nfs/nfs4file.c`:\n\n```c\nstatic int\nnfs4_file_open(struct inode *inode, struct file *filp)\n{\n...\n        pr_info(\"nfs4_file_open start\\n\");\n        dump_stack();\n```\n\nAdmittedly, we could have [activated the `dprintk` messages with the\nLinux dynamic\ndebug](https://www.kernel.org/doc/html/v4.15/admin-guide/dynamic-debug-howto.html)\nor used\n[`rpcdebug`](https://www.thegeekdiary.com/how-to-enable-nfs-debug-logging-using-rpcdebug/),\nbut it was nice to be able to add our own messages to verify changes\nwere being made.\n\nEvery time we made changes, we recompiled the module and reinstalled it\ninto the kernel via the commands:\n\n```sh\nmake modules\nsudo umount /mnt/nfs-test\nsudo rmmod nfsv4\nsudo rmmod nfs\nsudo insmod fs/nfs/nfs.ko\nsudo mount -a\n```\n\nWith our NFS module installed, repeating the experiments would print\nmessages that would help us understand the NFS code a bit more. For\nexample, you can see exactly what happens when an application calls `open()`:\n\n```\nSep 24 20:20:38 test-kernel kernel: [ 1145.233460] Call Trace:\nSep 24 20:20:38 test-kernel kernel: [ 1145.233462]  dump_stack+0x8e/0xd5\nSep 24 20:20:38 test-kernel kernel: [ 1145.233480]  nfs4_file_open+0x56/0x2a0 [nfsv4]\nSep 24 20:20:38 test-kernel kernel: [ 1145.233488]  ? nfs42_clone_file_range+0x1c0/0x1c0 [nfsv4]\nSep 24 20:20:38 test-kernel kernel: [ 1145.233490]  do_dentry_open+0x1f6/0x360\nSep 24 20:20:38 test-kernel kernel: [ 1145.233492]  vfs_open+0x2f/0x40\nSep 24 20:20:38 test-kernel kernel: [ 1145.233493]  path_openat+0x2e8/0x1690\nSep 24 20:20:38 test-kernel kernel: [ 1145.233496]  ? mem_cgroup_try_charge+0x8b/0x190\nSep 24 20:20:38 test-kernel kernel: [ 1145.233497]  do_filp_open+0x9b/0x110\nSep 24 20:20:38 test-kernel kernel: [ 1145.233499]  ? __check_object_size+0xb8/0x1b0\nSep 24 20:20:38 test-kernel kernel: [ 1145.233501]  ? __alloc_fd+0x46/0x170\nSep 24 20:20:38 test-kernel kernel: [ 1145.233503]  do_sys_open+0x1ba/0x250\nSep 24 20:20:38 test-kernel kernel: [ 1145.233505]  ? do_sys_open+0x1ba/0x250\nSep 24 20:20:38 test-kernel kernel: [ 1145.233507]  __x64_sys_openat+0x20/0x30\nSep 24 20:20:38 test-kernel kernel: [ 1145.233508]  do_syscall_64+0x65/0x130\n```\n\nWhat are the `do_dentry_open` and `vfs_open` calls above? Linux has a\n[virtual filesystem\n(VFS)](https://www.kernel.org/doc/Documentation/filesystems/vfs.txt), an\nabstraction layer which provides a common interface for all\nfilesystems. The VFS documentation explains:\n\n> The VFS implements the open(2), stat(2), chmod(2), and similar system\n> calls. The pathname argument that is passed to them is used by the VFS\n> to search through the directory entry cache (also known as the dentry\n> cache or dcache). This provides a very fast look-up mechanism to\n> translate a pathname (filename) into a specific dentry. Dentries live\n> in RAM and are never saved to disc: they exist only for performance.\n\n### This gave us a clue: what if this was a problem with the dentry cache?\n\nWe noticed a lot of dentry cache validation was done in\n`fs/nfs/dir.c`. In particular, `nfs4_lookup_revalidate()` sounded\npromising. As an experiment, we hacked that function to bail\nout early:\n\n\n```diff\ndiff --git a/fs/nfs/dir.c b/fs/nfs/dir.c\nindex 8bfaa658b2c1..ad479bfeb669 100644\n--- a/fs/nfs/dir.c\n+++ b/fs/nfs/dir.c\n@@ -1159,6 +1159,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)\n        trace_nfs_lookup_revalidate_enter(dir, dentry, flags);\n        error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);\n        trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);\n+       goto out_bad;\n        if (error == -ESTALE || error == -ENOENT)\n                goto out_bad;\n        if (error)\n```\n\nThat made the stale file problem in our experiment go away! Now we were onto something.\n\nTo answer, \"Why does this problem not happen in NFS v4.1?\", we added\n`pr_info()` calls to every `if` block in that function. After running our\nexperiments with NFS v4.0 and v4.1, we found this special condition being run\nin the v4.1 case:\n\n```c\n        if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) {\n          goto no_open;\n        }\n```\n\nWhat is `NFS_CAP_ATOMIC_OPEN_V1`? We saw [this kernel\npatch](https://patchwork.kernel.org/patch/2300511/) mentioned this was\nan NFS v4.1-specific feature, and the code in `fs/nfs/nfs4proc.c`\nconfirmed that this flag was a capability present in v4.1 but not in v4.0:\n\n```c\nstatic const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {\n        .minor_version = 1,\n        .init_caps = NFS_CAP_READDIRPLUS\n                | NFS_CAP_ATOMIC_OPEN\n                | NFS_CAP_POSIX_LOCK\n                | NFS_CAP_STATEID_NFSV41\n                | NFS_CAP_ATOMIC_OPEN_V1\n```\n\nThat explained the difference in behavior: in the v4.1 case, the `goto\nno_open` would cause more validation to happen in\n`nfs_lookup_revalidate()`, but in v4.0, the `nfs4_lookup_revalidate()`\nwould return earlier. Now, how do we actually solve the problem?\n\n## The solution\n\nI reported the [findings to the NFS mailing\nlist](https://marc.info/?l=linux-nfs&m=153782129412452&w=2) and proposed\n[a naive patch](https://marc.info/?l=linux-nfs&m=153807208928650&w=2). A\nweek after the report, Trond Myklebust sent a [patch series to the list\nfixing this bug and found another related issue for NFS\nv4.1](https://marc.info/?l=linux-nfs&m=153816500525563&w=2).\n\nIt turns out the fix for the NFS v4.0 bug was deeper in the code base\nthan we had looked. Trond summarized it well in the\n[patch](https://marc.info/?l=linux-nfs&m=153816500525564&w=2):\n\n> We need to ensure that inode and dentry revalidation occurs correctly\n> on reopen of a file that is already open. Currently, we can end up not\n> revalidating either in the case of NFSv4.0, due to the 'cached open'\n> path.  Let's fix that by ensuring that we only do cached open for the\n> special cases of open recovery and delegation return.\n\nWe confirmed that this fix made the stale file problem go away and filed\nbug reports with\n[Ubuntu](https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1802585)\nand [RedHat](https://bugzilla.redhat.com/show_bug.cgi?id=1648482).\n\nKnowing full well that kernel changes may take a while to make it to\nstable releases, we also added a [workaround in\nGitaly](https://gitlab.com/gitlab-org/gitaly/merge_requests/924) to deal\nwith this issue. We did experiments to test that calling `stat()` on the\n`packed-refs` file appears to cause the kernel to revalidate the dentry\ncache for the renamed file. For simplicity, this is implemented in\nGitaly regardless of whether the filesystem is NFS; we only do this once\nbefore Gitaly \"opens\" a repository, and there are already other `stat()`\ncalls that check for other files.\n\n## What we learned\n\nA bug can be anywhere in your software stack, and sometimes you have to\nlook beyond your application to find it. Having helpful partners in the\nopen source world makes that job much easier.\n\nWe are extremely grateful to Trond Myklebust for fixing the problem, and\nBruce Fields for responding to questions and helping us understand\nNFS. Their responsiveness and professionalism truly reflects the best of\nthe open source community.\n\nPhoto by [dynamosquito](https://www.flickr.com/photos/dynamosquito) on [Flickr](https://www.flickr.com/photos/dynamosquito/4265771518)\n{: .note}\n",[266,9,774,683],{"slug":1385,"featured":6,"template":687},"how-we-spent-two-weeks-hunting-an-nfs-bug","content:en-us:blog:how-we-spent-two-weeks-hunting-an-nfs-bug.yml","How We Spent Two Weeks Hunting An Nfs Bug","en-us/blog/how-we-spent-two-weeks-hunting-an-nfs-bug.yml","en-us/blog/how-we-spent-two-weeks-hunting-an-nfs-bug",{"_path":1391,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1392,"content":1398,"config":1403,"_id":1405,"_type":13,"title":1406,"_source":15,"_file":1407,"_stem":1408,"_extension":18},"/en-us/blog/journey-through-gits-20-year-history",{"title":1393,"description":1394,"ogTitle":1393,"ogDescription":1394,"noIndex":6,"ogImage":1395,"ogUrl":1396,"ogSiteName":672,"ogType":673,"canonicalUrls":1396,"schema":1397},"Journey through Git's 20-year history","Follow along as we reminisce about the first commit, the unique aspects of the earliest releases, and the confusion sparked by an update to the git-push(1) default behavior.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097380/Blog/Hero%20Images/Blog/Hero%20Images/git-20-years-opt2_TWNsNk8KH43b3jP0KLD0U_1750097380123.png","https://about.gitlab.com/blog/journey-through-gits-20-year-history","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Journey through Git's 20-year history\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Patrick Steinhardt\"}],\n        \"datePublished\": \"2025-04-14\",\n      }",{"title":1393,"description":1394,"authors":1399,"heroImage":1395,"date":1400,"body":1401,"category":680,"tags":1402},[677],"2025-04-14","The Git project has just turned 20 years old. A lot has happened during these years, and while the conceptual design of Git hasn't changed significantly since its inception, the way users interact with the tool has changed quite significantly. We at GitLab are proud to build on top of this critical piece of software and to be part of its history.\n\nJoin us on a journey through Git's history to explore how it has evolved over the years.\n\n## The first commit\n\nThe first commit was made on April 7, 2005, by Linus Torvalds, the creator of the Linux kernel: `e83c5163316 (Initial revision\nof \"git\", the information manager from hell, 2005-04-07)`.\n\nAs we can see, this\ncommit does not contain a lot of files:\n\n```shell\n$ git ls-tree e83c5163316\n100644 blob a6bba79ba1f46a1bbf7773449c3bd2bb9bf48e8b\tMakefile\n100644 blob 27577f76849c09d3405397244eb3d8ae1d11b0f3\tREADME\n100644 blob 98a32a9ad39883c6d05a000a68511d4b1ee2b3c7\tcache.h\n100644 blob 74a0a234dd346fff51c773aa57d82fc4b83a8557\tcat-file.c\n100644 blob 840307af0cfaab31555795ce7175d5e9c9f981a0\tcommit-tree.c\n100644 blob 25dc13fe101b219f74007f3194b787dd99e863da\tinit-db.c\n100644 blob c924a6e0fc4c36bad6f23cb87ee59518c771f936\tread-cache.c\n100644 blob 1b47742d8cbc0d98903777758b7b519980e7499e\tread-tree.c\n100644 blob b8522886a15db861508fb6d03d4d88d6de912a4b\tshow-diff.c\n100644 blob 5085a5cb53ee52e1886ff6d46c609bdb2fc6d6cd\tupdate-cache.c\n100644 blob 921f981353229db0c56103a52609d35aff16f41b\twrite-tree.c\n```\n\nIn addition to build infrastructure, the first commit provides seven top-level commands:\n\n- `init-db` to initialize a new Git repository\n- `update-cache` to add files to the index\n- `write-tree` to take what is in the index and create a new tree from it\n- `read-tree` to read a tree object\n- `commit-tree` to create a commit from a tree\n- `cat-file` to read a specific object into a temporary file\n\nNote that the `git` command itself did not yet exist at this point in time.\nInstead, these commands had to be executed directly.\n\nAs example, let's create a\nnew repository:\n\n```shell\n$ mkdir repo\n$ cd repo\n$ init-db\ndefaulting to private storage area\n$ ls -a\n.  ..  .dircache\n```\n\nThat looks quite unfamiliar: There is no `.git` directory, but there is a\n`.dircache` directory. And where was the private storage area?\n\nThe early design of Git distinguished between a \"shared\" and \"private\" object\nstorage area. This object storage area was where all of your Git objects went. For example, your\ncommits and blobs.\n\nBy default, `init-db` created a private object storage area that was only used for\nthe managed directory that it was created in. A \"shared\" object storage area, on\nthe other hand, shared object content across multiple managed directories so\nthat the same object did not need to be stored twice.\n\n### Create a commit\n\nSo, now that we have a repository, how did we create a commit? Well, it isn't as\neasy as today's `git add . && git commit`. Instead, you had to:\n\n1. Update the index by calling `update-cache` for every file that you want to\n   add.\n1. Write a new tree by calling `write-tree`, which takes everything you have\n   added to the index.\n1. Set up environment variables to tell Git who you are.\n1. Write a commit object by calling `commit-tree`.\n\nLet’s create a commit in the repository:\n\n```shell\n$ echo content-1 >file-a\n$ update-cache file-a\n$ echo content-2 >file-b\n$ update-cache file-b\n$ write-tree\n3f143dfb48f2d84936626e2e5402e1f10c2050fb\n$ export COMMITTER_NAME=\"Patrick Steinhardt\"\n$ export COMMITER_EMAIL=ps@pks.im\n$ echo \"commit message\" | commit-tree 3f143dfb48f2d84936626e2e5402e1f10c2050fb\nCommitting initial tree 3f143dfb48f2d84936626e2e5402e1f10c2050fb\n5f8e928066c03cebe5fd0a0cc1b93d058155b969\n```\n\nThis isn't exactly ergonomic, but it works! Let's have a look at the generated\ncommit:\n\n```shell\n$ cat-file 5f8e928066c03cebe5fd0a0cc1b93d058155b969\ntemp_git_file_rlTXtE: commit\n$ cat temp_git_file_rlTXtE\ntree 3f143dfb48f2d84936626e2e5402e1f10c2050fb\nauthor Patrick Steinhardt \u003Cps@pks.im> Wed Mar 26 13:10:16 2025\ncommitter Patrick Steinhardt \u003Cps@pks.im> Wed Mar 26 13:10:16 2025\n\ncommit message\n```\n\nNote that `cat-file` didn't print the contents directly, but instead wrote\nit into a temporary file first. But the contents of the file looked exactly how a\nmodern commit would look.\n\n### Making changes\n\nNow that we have files, how do we get their status? You might have guessed it:\nthis could be done with `show-diff`:\n\n```shell\n$ show-diff\nfile-a: ok\nfile-b: ok\n\n$ echo modified-content >file-a\n$ show-diff\n--- -\t2025-03-26 13:14:53.457611094 +0100\n+++ file-a\t2025-03-26 13:14:52.230085756 +0100\n@@ -1 +1 @@\n-content-1\n+modified-content\nfile-a:  46d8be14cdec97aac6a769fdbce4db340e888bf8\nfile-b: ok\n```\n\nAmazingly, `show-diff` even knew to already generate diffs between the old and\nnew state of modified files! Funny enough though, Git achieved this by simply\nexecuting the diff(1) Unix tool.\n\nIn summary, all of this was still rather bare-bones, but it performed all of the\nnecessary duties to track history. There were still many limitations:\n\n- There was no easy way yet to switch between commits.\n- There was no way to show logs.\n- There were no branches, tags, or even references. Users were expected to manually\n  keep track of object IDs.\n- There was no way to synchronize two repositories with one another. Instead,\n  users were expected to use rsync(1) to synchronize the `.dircache` directories.\n- There was no way to perform merges.\n\n## Git 0.99\n\nThe first test release of Git was Version 0.99. This release came only two months after\nthe initial commit, but already contained 1,076 commits. There had been almost 50\ndifferent developers involved. The most frequent committer at this point was\nLinus himself, but he was closely followed by Junio Hamano, the current maintainer.\n\nA lot of things had changed since the initial commit:\n\n- Git started to track different development branches by using references, which\n  in most cases removes the need to manually track object IDs.\n- There was a new remote protocol that allows two repositories to exchange\n  objects with one another.\n- The `.dircache` directory was renamed to `.git`.\n- It became possible to merge single files with one another.\n\nThe most important visible change, though, was the introduction of\nthe top-level `git` command and its subcommands. Interestingly, this release\nalso created the notion of \"plumbing\" and \"porcelain\" commands:\n\n- \"Plumbing\" tools are the low-level commands that access the underlying Git\n  repository.\n- \"Porcelain\" tools are shell scripts that wrap the plumbing commands to provide\n  a nicer, high-level user interface.\n\nThis split still exists nowadays as documented in\n[`git(1)`](https://git-scm.com/docs/git#_high_level_commands_porcelain), but because \nmost porcelain tools have been rewritten from shell scripts to C, the line between these two\ncategories has started to blur significantly.\n\n## Linus hands over maintainership\n\nLinus never started Git out of love for version control systems, but because there was a need to replace BitKeeper for Linux kernel development. As such, he never planned to keep maintaining Git forever. The intent was to maintain it until someone trustworthy stepped up.\n\nThat someone was Junio Hamano. Junio got involved in Git about a week after Linus’s first commit and already had a couple of hundred commits in the history after the Git 0.99 release. So, on July 26, 2005, [Linus made Junio the new maintainer of the Git project](https://lore.kernel.org/git/Pine.LNX.4.58.0507262004320.3227@g5.osdl.org/). While Linus has continued to contribute to Git, his involvement with the project faded over time, which is only natural considering that he is quite busy as head of the Linux project.\n\nJunio is still leading the Git project today.\n\n## Git 1.0\n\nThe first major release of Git happened on December 21, 2005, by\nJunio. Interestingly enough, there had been 34 releases between Version 0.99\nand Version 1.0: 0.99.1 to 0.99.7, 0.99.7a to 0.99.7d, 0.99.8 to 0.99.8g, and\n0.99.9 up to 0.99.9n.\n\nOne of the more important milestones since 0.99 was probably the addition of the `git-merge(1)`\ncommand that allows one to merge two trees with one another. This is in stark\ncontrast to before, where one had to basically script the merges file by file.\n\n### Remotes\n\nAnother significant change was the introduction of shorthand notation for\nremote repositories. While Git already knew how to talk to remote repositories,\nusers always had to specify the URL to fetch from every single time they wanted\nto fetch changes from it. This was quite unfriendly to the users, because, typically, they wanted to interact with the same remote over and over again.\n\nYou may know about how remotes work now, but the mechanism that existed at  \nthis point in time was still significantly different. There was no `git-remote(1)`  \ncommand that you could use to manage your remotes. Remotes weren't even stored  \nin your `.git/config` file. In fact, when remotes were first introduced in  \nVersion 0.99.2, Git didn't even *have* config files.\n\nInstead, you had to configure remotes by writing a file into the  \n`.git/branches` directory, which nowadays feels somewhat counterintuitive. But  \nthe mechanism still works today:\n\n```shell\n$ git init repo --\nInitialized empty Git repository in /tmp/repo/.git/\n$ cd repo\n$ mkdir .git/branches\n$ echo https://gitlab.com/git-scm/git.git >.git/branches/origin\n$ git fetch origin refs/heads/master\n```\n\nBut that isn't all! The directory was soon renamed in Git Version 0.99.5 to \"remotes\", so there are a total of three different ways to configure remotes in a modern Git client.\n\nMost of you have probably never used either `.git/branches` nor `.git/remotes`,  \nand both of these mechanisms have been deprecated since 2005 and 2011,  \nrespectively. Furthermore, these directories will finally be removed in Git 3.0.\n\n## Git branding\n\nIn 2007, the first Git logo was created. It’s arguable if you can call it a logo, because it only consisted of three red minus signs above three green plus signs, reflecting what the output of `git diff` looks like:\n\n![three red minus signs above three green plus signs, reflecting what the output of `git diff`](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097388/Blog/Content%20Images/Blog/Content%20Images/image3_aHR0cHM6_1750097387927.png)\n\nA bit later, in 2008, the website [git-scm.com](https://git-scm.com) was launched:\n\n![landing page for git-scm.com in 2006](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097388/Blog/Content%20Images/Blog/Content%20Images/image4_aHR0cHM6_1750097387930.png)\n\nIn 2012, the Git website was [revamped](https://lore.kernel.org/git/CAP2yMaJy=1c3b4F72h6jL_454+0ydEQNXYiC6E-ZeQQgE0PcVA@mail.gmail.com/) by Scott Chacon and Jason Long. It looks pretty similar to how it looks today:\n\n![git website revamped in 2012](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097388/Blog/Content%20Images/Blog/Content%20Images/image2_aHR0cHM6_1750097387932.png)\n\nThis site redesign sports the new red-orange logo designed by Jason Long; the same logo that's currently used:\n\n![git logo](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750097388/Blog/Content%20Images/Blog/Content%20Images/image1_aHR0cHM6_1750097387934.png)\n\n## Git 2.0\n\nGit already started to look a lot like modern Git at the 1.0 release, so we\nare going to do a big historical jump to Git 2.0. This version was\nreleased around 10 years after Git 1.0 and was the first release that\nintentionally contained backwards-incompatible changes in central workflows.\n\n### `git-push(1)` default behavior\n\nThe change that arguably caused most the confusion in this release was the\nupdated default behavior of `git-push(1)`.\n\nThere are a couple of different actions that Git could take when you push\ninto a remote repository and don’t specify exactly what you want to push:\n\n- Git could refuse to do anything, asking you to provide more information of\n  what exactly you want to push.\n- Git could push the currently checked out branch.\n- Git could push the currently checked out branch, but only if it knows that it\n  has an equivalent on the remote side.\n- Git could push all of your branches that have an equivalent on the remote side.\n\nThe behavior of modern Git is the so-called \"simple\" strategy, which is the third\noption above. But before Git 2.0, the default behavior was the \"matching\"\nstrategy, which is the last option.\n\nThe “matching” strategy was significantly more risky. You always had to make sure that you\nwere fine with pushing all of your local branches that have an equivalent on the\nremote side before pushing. Otherwise, you might have ended up\npushing changes unintentionally. As such, it was decided to change the strategy\nto \"simple\" to reduce the risk and help out Git beginners.\n\n### `git-add(1)`\n\nAnother big change was the default behavior of `git-add(1)` when it comes to  \ntracked files that have been deleted. Before Git 2.0, `git-add(1)` wouldn't  \nstage deleted files automatically, but you instead had to manually add each  \ndeleted file by using `git-rm(1)` to make them part of a commit. With Git 2.0, this behavior was changed so that `git-add(1)` also adds deleted files to the index.\n\n## Celebrating the Git community\n\nI won’t bore you with the details around how Git works nowadays – you probably use it daily anyway, and, if you don’t, there are many tutorials out there that can help you get started. Instead, let’s celebrate the Git community, which has ensured that Git works as well as it does 20 years later.\n\nOver time, Git has:\n\n- Accumulated 56,721 commits as of the Git 2.49 release.\n- Received contributions from more than 2,000 different individuals.\n- Published 60 major releases.\n\nThe Git project also has a steady influx of new contributors by taking part in [Google Summer of Code](https://summerofcode.withgoogle.com/) and [Outreachy](https://www.outreachy.org/). New contributors like these are what will ensure that the Git project will remain healthy in the long term.\n\nAs such, let me extend a big thank you to all contributors. It is your contributions that have made Git possible.\n\n## Going forward\n\nIt should be an uncontroversial take to say that Git has essentially won the competition of version control systems. It has significant market share, and it isn't easy to find open source projects that are using a version control system other than Git. So it has clearly done a lot of things right.\n\nThat being said, its development hasn't stood still, and there are still many challenges ahead of Git. On the one hand, we have technical challenges:\n- modernization of an aging code base  \n- scaling with the ever-growing size of monorepos  \n- handling large binary files better\n\nAnd on the other hand, there are problems of a more social type:\n- improving the usability of Git  \n- fostering the Git community so that the project remains healthy in the long  \n  term\n\nThere always remains work to be done and we at GitLab are proud to be part  \nof these efforts to make sure that Git continues to be a great version control  \nsystem for the next 20 years.\n\n## Read more about Git\n\n- [Celebrating Git's 20th anniversary with creator Linus Torvalds](https://about.gitlab.com/blog/celebrating-gits-20th-anniversary-with-creator-linus-torvalds/)\n- [What's new in Git 2.49.0?](https://about.gitlab.com/blog/whats-new-in-git-2-49-0/)  \n- [What’s new in Git 2.48.0?](https://about.gitlab.com/blog/whats-new-in-git-2-48-0/)  \n- [A beginner's guide to the Git reftable format](https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format/)",[683,9],{"slug":1404,"featured":90,"template":687},"journey-through-gits-20-year-history","content:en-us:blog:journey-through-gits-20-year-history.yml","Journey Through Gits 20 Year History","en-us/blog/journey-through-gits-20-year-history.yml","en-us/blog/journey-through-gits-20-year-history",{"_path":1410,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1411,"content":1417,"config":1423,"_id":1425,"_type":13,"title":1426,"_source":15,"_file":1427,"_stem":1428,"_extension":18},"/en-us/blog/just-commit-launch",{"title":1412,"description":1413,"ogTitle":1412,"ogDescription":1413,"noIndex":6,"ogImage":1414,"ogUrl":1415,"ogSiteName":672,"ogType":673,"canonicalUrls":1415,"schema":1416},"Let’s talk about commitment","What possibilities could you unlock by just making the choice, committing, and moving forward?","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749671258/Blog/Hero%20Images/just-commit-blog-cover.png","https://about.gitlab.com/blog/just-commit-launch","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Let’s talk about commitment\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Todd Barr\"}],\n        \"datePublished\": \"2019-02-18\",\n      }",{"title":1412,"description":1413,"authors":1418,"heroImage":1414,"date":1420,"body":1421,"category":748,"tags":1422},[1419],"Todd Barr","2019-02-18","\n\nWe’re now solidly into 2019. Commitments you made to yourself, your health, your productivity, your career, your budget, or whatever the case may be – they’re probably becoming harder to keep. This pattern of making resolutions, being on our best behavior for a while, falling off the wagon, returning to our ways, then starting the whole process over in the new year is all too familiar.\n\nWith [50 percent of digital transformation efforts stalled in 2018](https://mktg.forrester.com/predictions-2019), you’ve likely experienced your own version of this at work, and are probably even somewhere in that cycle right now.\n\nThe thing is, commitment unlocks new potential. You often don’t get to the good stuff until you make that commitment – whether it’s committing to months of training and discipline, then experiencing the euphoria of completing your first marathon, or committing to your partner and building a life together.\n\nIn the software space, making that commitment can be the difference between paying lip service to DevOps transformation and actually realizing its promises. Making big changes, especially at an organizational level, is daunting. The trick is to commit to the process, not just to the goal. [Focusing on the processes and behaviors that support the goal is key to success](https://www.scienceofpeople.com/goal-setting/), so having a clear plan of attack rather than an abstract objective to achieve is what makes all the difference.\n\nHere at GitLab, we committed to being [all-remote](/company/culture/all-remote/) – allowing us to hire the best people, no matter where in the world they might be or at what times they choose to work. We went all in on [asynchronous communication](/handbook/communication/#internal-communication), conscientiously documenting everything so we could collaborate across time zones and borders. We committed to a monthly release cycle, a decision which has seen us ship, to date, 88 consecutive new releases, allowing us to work with a short feedback loop and make small adjustments and iterations along the way. It was our commitment to the process, to having a single vision and steadily marching toward it, that enabled us to build a single application for the entire DevOps lifecycle with an all-remote team.\n\nSo this is what we’re asking you to do! Just commit. To software modernization. To faster cycle times. To secure apps. And because commitment is easier when you have a plan, and accountability, we’re here to support you on the journey. Over the coming weeks, we will be rolling out a series of blog posts and guides to help you make meaningful, lasting change in your organization. From tips and success stories on how to modernize your application architecture, to finally getting on top of technical debt, and building more secure applications, we’re working with our experts, customers, and community to help you along the way.\n\nObviously, commit has a double meaning for us. Git unlocked a whole new way to collaborate on software with the humble commit. Now, at GitLab, committing unlocks a whole lot more value – faster time to market, more secure code, more modern applications. We’re asking you to just commit to these. [Are you up for the challenge?](/blog/application-modernization-best-practices/)\n\n## #JustCommit\nSo, you're committing to starting something new this year. Hooray! 🎉 It's always easier to stick to something with a buddy – tell us your commitments by tweeting us [@gitlab](https://twitter.com/gitlab) using #JustCommit, and we'll do our best to help (and enter you into our swag giveaway)! The [giveaway](/community/sweepstakes/) lasts through April, but we want to keep you committing all year long.\n",[819,9,774],{"slug":1424,"featured":6,"template":687},"just-commit-launch","content:en-us:blog:just-commit-launch.yml","Just Commit Launch","en-us/blog/just-commit-launch.yml","en-us/blog/just-commit-launch",{"_path":1430,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1431,"content":1437,"config":1443,"_id":1445,"_type":13,"title":1446,"_source":15,"_file":1447,"_stem":1448,"_extension":18},"/en-us/blog/keep-git-history-clean-with-interactive-rebase",{"title":1432,"description":1433,"ogTitle":1432,"ogDescription":1433,"noIndex":6,"ogImage":1434,"ogUrl":1435,"ogSiteName":672,"ogType":673,"canonicalUrls":1435,"schema":1436},"How to keep your Git history clean with interactive rebase","Interactive rebase is one of Git’s most versatile tools. Here's how to use it to correct commit messages, fix mistakes, and more.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749662593/Blog/Hero%20Images/title-image.png","https://about.gitlab.com/blog/keep-git-history-clean-with-interactive-rebase","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to keep your Git history clean with interactive rebase\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Tobias Günther\"}],\n        \"datePublished\": \"2020-11-23\",\n      }",{"title":1432,"description":1433,"authors":1438,"heroImage":1434,"date":1440,"body":1441,"category":726,"tags":1442},[1439],"Tobias Günther","2020-11-23","\n## What is interactive rebase? \n\nInteractive [rebase](/solutions/source-code-management/), or Git rebase interactive, is sometimes called the \"Swiss Army Knife\" of Git – because it contains so many different tools, for so many different use cases! However, there's one main, overarching use case: _cleaning up your local commit history_.\n\nMind the word \"local\": it should only be used for cleaning up your own, local commit history, for example before integrating one of your feature branches into a team branch. In contrast, it should NOT be used on commit history that has already been pushed and shared on a remote repository. Interactive rebase is one of those tools that \"rewrite\" Git history – and you shouldn't do this on commits that have already been shared with others.\n\nWith this little warning message out of the way, let's look at some practical examples! \n\nNote: for easier visualization of the scenarios and workflows in this post, I’ve been using the [\"Tower\" Git desktop GUI](https://www.git-tower.com/?utm_source=gitlab&utm_medium=guestpost&utm_campaign=interactive-rebase) in some of my screenshots.\n{: .note}\n\n## Correcting an old commit message with Git rebase interactive\n\nSometimes you notice a typo in an **old commit message** – or you've forgotten to mention something in the description that is noteworthy. If we were talking about the _very last_ commit, we could have simply used the `--amend` option of the `git commit` command. But for older commits you will have to use interactive rebase to change them after the fact.\n\nHere's an example of a commit message gone horribly wrong that we want to correct:\n\n![A bad commit message that needs correction](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/bad-commit-message@2x.png){: .shadow.medium.center}\nA bad commit message that needs correction\n{: .note.text-center}\n\nThe first step in _any_ Git interactive rebase session is to **determine what part of commit history you want to manipulate**. To again take the above example: in order to change this bad commit we have to start the session at its _parent_ commit.\n\n![Starting our interactive rebase session](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/start-at-parent-commit@2x.png){: .shadow.medium.center}\nStarting our interactive rebase session\n{: .note.text-center}\n\nWe can now feed this starting commit's hash to the Git rebase interactive command:\n\n```\n$ git rebase -i 0023cddd\n```\n\nAn editor window will now open, containing a list of the commits that you just selected for manipulation. And don't be surprised because they are in _reverse order_: in an interactive rebase session, Git will reapply the old commits, item after item – which means that reversing the order is correct from Git's perspective.\n\n![Editor window with the selected commits](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/editor-window-start-ir@2x.png){: .shadow.medium.center}\nEditor window with the selected commits\n{: .note.text-center}\n\nOne other important thing to note about this editor window: _you don't perform the actual manipulations here_! Or, in this concrete example, you do NOT go ahead and change the commit message here. Instead, you only mark the commit you want to change with an action keyword. In our case, because we want to change a commit’s message, we mark the line with \"reword\". If you then save and close this editor window, a new one will open, containing the old commit’s message. Now is the time to finally make your changes:\n\n![Finally, we can make our changes](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/correct-commit-message.gif){: .shadow.medium.center}\nFinally, we can make our changes\n{: .note.text-center}\n\nAfter saving and closing once more, the interactive rebase session is complete and our old commit message has been corrected!\n\n## Combining multiple commits into one using interactive rebase\n\nAnother use case for interactive rebase is when you want to **combine multiple old comments into one**. Although, of course, the golden rule of version control applies: in most situations, it's beneficial to create more and smaller commits instead of a few big ones. However, as with everything, we might find that we have overdone this and now want to meld two or more old commits into a single one.\n\nTo make a concrete example, let's say we want to combine the following selected commits into a single one:\n\n![Let's combine multiple commits into one](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/squash-selected-commits@2x.png){: .shadow.medium.center}\nLet's combine multiple commits into one\n{: .note.text-center}\n\nJust like in our first case, we begin by starting the interactive rebase session at least at the parent commit of the one we want to manipulate.\n\n```\n$ git rebase -i 2b504bee\n```\n\nAgain, an editor window will open, listing that part of our commit history that we want to manipulate:\n\n![Marking lines with \"squash\"](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/squash-mark-commit@2x.png){: .shadow.medium.center}\nMarking lines with \"squash\"\n{: .note.text-center}\n\nThe action keyword we are going to use here is called \"squash.\" And there's only one important piece of information you need to know about squash in order to use it: _the line we mark up with the \"squash\" keyword will be combined with the line directly above_. That’s why, as you can see in my screenshot above, I’ve marked line #2 with \"squash\" in order to combine it with line #1.\n\nWe can now save and close the editor window and again watch and a new window appear: we are now asked to provide a commit message for the new commit that is created when combining those two old ones.\n\n![Entering a new message for the new, squashed commit](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/squash-enter-new-message@2x.png){: .shadow.medium.center}\nEntering a new message for the new, squashed commit\n{: .note.text-center}\n\nAfter saving and closing this editor window, you will see that a new commit was created that contains the changesets of both old commits. Voila!\n\n## Fixing a mistake with interactive rebase\n\nAnother use case for interactive rebase is when you found a mistake in one of your earlier commits. And it doesn't matter what exactly you messed up: you could have forgotten to add a certain change, should have deleted a file, or simply introduced a typo...\n\nThe natural tendency, in such a situation, is to simply create a new commit that corrects the mistake. But on the other hand, this will mess up our commit history: making an original commit, and then adding a \"band-aid\" commit just to fix some mistakes… that’s a messy way of working. Your commit history will soon become hard to understand, because it's littered with all those little \"quick fix commits\"!\n\nThis is where \"fixup,\" one of the tools that come with interactive rebase, comes in very handy. Fixup takes this \"quick fix\" commit, applies its changes to the original commit (thereby correcting it), and then gets rid of the band-aid commit:\n\n![How \"fixup\" works](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/diagram-fixup.png){: .medium.center}\nHow \"fixup\" works\n{: .note.text-center}\n\nAfter we're done, it looks as if there had never been a problem with our original commit! So let's walk through this using a practical example. \n\nThe first step is to do whatever is necessary to fix the problem: this could mean adding a new file, making changes to existing ones, deleting obsolete files... you \"just\" need to produce the changes that correct the mistake.\n\nThe next step is to commit these changes to the repository – but with a little extra: when making the commit, we are going to use the `--fixup` flag and tell Git the commit hash of our bad commit:\n\n```\n$ git add corrections.txt\n$ git commit --fixup 2b504bee\n```\n\nWhen you now take a look at the commit history, you will see that a pretty ordinarily looking commit has been created – probably not the magic and fireworks you would have expected. But if you take a closer look, you will see that something’s going on: the new commit has automatically been prepended with \"fixup !\" and the commit subject of our bad commit.\n\n![The original commit and the fix commit](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/fixup_create-fix-commit@2x.png){: .shadow.medium.center}\nThe original commit and the fix commit\n{: .note.text-center}\n\nThe third step now is to start the interactive rebase session. Again, we choose the parent of our bad commit as the starting point...\n\n```\n$ git rebase -i 0023cddd --autosquash\n```\n\n... and as the second part of the secret sauce, we are using the `--autosquash` flag. This option makes sure that we don't have to do _anything_ in the editor window that is now open. Take a close look at the situation:\n\n![Our fix commit is marked \"fixup\" and sorted to the right position](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/fixup_editor@2x.png){: .shadow.medium.center}\nOur fix commit is marked \"fixup\" and sorted to the right position\n{: .note.text-center}\n\nYou will see that Git automatically did two things for us:\n1. It marked our band-aid commit as \"fixup.\"\n2. It re-ordered the lines so that our band-aid commit appears directly below our bad commit. This is because fixup works exactly like squash in that it _combines with the line above_.\n\nIn other words: there's nothing left to do for us but save and close the editor window.\n\nLet's take another look at the commit history:\n\n![A happy ending!](https://about.gitlab.com/images/blogimages/how-to-keep-your-git-history-clean-with-interactive-rebase/fixup_final-corrected@2x.png){: .shadow.medium.center}\nA happy ending!\n{: .note.text-center}\n\nNot only does our originally bad commit now contain the changes from our band-aid commit. But on top of that, the ugly band-aid commit has disappeared from the commit history! Everything is nice and clean, just as if there had never been a problem!\n\n## Discover the power of Git rebase interactive\n\nThere are lots of use cases for interactive rebase – and most of them in the department of “fixing mistakes”. For an overview of other useful things you can do, I recommend the _free_ [\"First Aid Kit for Git\"](https://www.git-tower.com/learn/git/first-aid-kit?utm_source=gitlab&utm_medium=guestpost&utm_campaign=interactive-rebase): it’s a collection of short videos (2-3 min per episode) that help you learn to undo mistakes using interactive rebase and other Git tools.\n\nEditor's note: I had to use interactive rebase when reviewing this very post! One of my commits included an image that was greater than 1MB which is against the rules for GitLab website project. I had to go back and fix that commit to include a correctly sized image instead. Thanks for the lesson, universe! 😁\n{: .note}\n\n## More Git tips and tricks\n\n- [15 Git tips to improve your workflow](/blog/15-git-tips-improve-workflow/)\n- [How Git Partial Clone lets you fetch only the large file you need](/blog/partial-clone-for-massive-repositories/)\n- [Git happens! 6 Common Git mistakes and how to fix them](/blog/git-happens/)\n\n### About the guest author\n\n_[Tobias Günther](https://twitter.com/gntr) is the CEO of [Tower](https://www.git-tower.com/?utm_source=gitlab&utm_medium=guestpost&utm_campaign=interactive-rebase), the popular Git desktop client that helps more than 100,000 developers around the world to be more productive with Git._\n\nCover image by [David Taljat](https://www.pexels.com/@david-taljat-3748658) on [Pexels](https://www.pexels.com/photo/yellow-and-blue-line-on-gray-asphalt-road-5690623/)\n{: .note}\n",[9,682],{"slug":1444,"featured":6,"template":687},"keep-git-history-clean-with-interactive-rebase","content:en-us:blog:keep-git-history-clean-with-interactive-rebase.yml","Keep Git History Clean With Interactive Rebase","en-us/blog/keep-git-history-clean-with-interactive-rebase.yml","en-us/blog/keep-git-history-clean-with-interactive-rebase",{"_path":1450,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1451,"content":1457,"config":1463,"_id":1465,"_type":13,"title":1466,"_source":15,"_file":1467,"_stem":1468,"_extension":18},"/en-us/blog/keeping-git-commit-history-clean",{"title":1452,"description":1453,"ogTitle":1452,"ogDescription":1453,"noIndex":6,"ogImage":1454,"ogUrl":1455,"ogSiteName":672,"ogType":673,"canonicalUrls":1455,"schema":1456},"How (and why!) to keep your Git commit history clean","Git commit history is very easy to mess up, here's how you can fix it!","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749659457/Blog/Hero%20Images/keep-git-commit-history-clean.jpg","https://about.gitlab.com/blog/keeping-git-commit-history-clean","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How (and why!) to keep your Git commit history clean\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Kushal Pandya\"}],\n        \"datePublished\": \"2018-06-07\",\n      }",{"title":1452,"description":1453,"authors":1458,"heroImage":1454,"date":1460,"body":1461,"category":726,"tags":1462},[1459],"Kushal Pandya","2018-06-07","\n\nGit commits are one of the key parts of a [Git repository](/solutions/source-code-management/), and more so, the _commit message_ is a life log for the repository. As the project/repository evolves over time (new features getting added, bugs being fixed, architecture being refactored), commit messages are the place where one can see what was changed and how. So it's important that these messages reflect the underlying change in a short, precise manner.\n\n## Why a meaningful Git commit history is important\n\nWhat does Git commit do? Git commit messages are the fingerprints that you leave on the code you touch. Any code that you commit today, a year from now when you look at the same change; you would be thankful for a clear, meaningful commit message that you wrote, and it will also make the lives of your fellow developers easier. When Git commits are isolated based on context, a bug which was introduced by a single commit becomes quicker to find, and the easier it is to revert the commit which caused the bug in the first place.\n\nWhile working on a large project, we often deal with a lot of moving parts that are updated, added or removed. Ensuring that commit messages are maintained in such cases could be tricky, especially when development spans across days, weeks, or even months. So to simplify the effort of maintaining concise commit history, this article will use some of the common situations that a developer might face while working on a Git repository.\n\n- [Situation 1: I need to change the most recent commit](#situation-1-i-need-to-change-the-most-recent-commit)\n- [Situation 2: I need to change a specific commit](#situation-2-i-need-to-change-a-specific-commit)\n- [Situation 3: I need to add, remove, or combine commits](#situation-3-i-need-to-add-remove-or-combine-commits)\n- [Situation 4: My commit history doesn't make sense, I need a fresh start!](#situation-4-my-commit-history-doesnt-make-sense-i-need-a-fresh-start)\n\nBut before we dive in, let's quickly go through what a typical development workflow looks like in our hypothetical Ruby application.\n\n**Note:** This article assumes that you are aware about basics of Git, how branches work, how to add uncommitted changes of a branch to stage and how to commit the changes. If you're unsure of these flows, [our documentation](https://docs.gitlab.com/ee/topics/git/index.html) is a great starting point.\n\n## A day in the life\n\nHere, we are working on a small Ruby on Rails project where we need to add a navigation view on the homepage and that involves updating and adding several files. Following is a step by step breakdown of the entire flow:\n\n- You start working on a feature with updating a single file; let's call it `application_controller.rb`\n- This feature requires you to also update a view: `index.html.haml`\n- You added a partial which is used in index page: `_navigation.html.haml`\n- Styles for the page also need to be updated to reflect the partial we added: `styles.css.scss`\n- Feature is now ready with the desired changes, time to also update tests; files to be updated are as follows:\n  - `application_controller_spec.rb`\n  - `navigation_spec.rb`\n- Tests are updated and passing as expected, now time to commit the changes!\n\nSince all the files belong to different territories of the architecture, we commit the changes isolated of each other to ensure that each commit represents a certain context and is made in a certain order. I usually prefer backend -> frontend order where most backend-centric change is committed first, followed by the middle layer and then by frontend-centric changes in the Git list commits.\n\n\n1.  `application_controller.rb` & `application_controller_spec.rb`; **Add routes for navigation**.\n2.  `_navigation.html.haml` &  `navigation_spec.rb`; **Page Navigation View**.\n3.  `index.html.haml`; **Render navigation partial**.\n4.  `styles.css.scss`; **Add styles for navigation**.\n\nNow that we have our changes committed, we create a merge request with the branch. Once you have merge request open, it typically gets reviewed by your peer before the changes are merged into repo's `master` branch. Now let's learn what different situations we may end up with during code review.\n\n## Situation 1: How to change the most recent Git commit\n\nImagine a case where the reviewer looked at `styles.css.scss` and suggested a change. In such a case, it is very simple to do the change as the stylesheet changes are part of **last** commit on your branch. Here's how we can handle this;\n\n- You directly do the necessary changes to `styles.css.scss` in your current branch.\n- Once you're done with the changes, add these changes to stage; run `git add styles.css.scss`.\n- Once changes are staged, we need to _add_ these changes to our last commit; run `git commit --amend`.\n  -  **Command breakdown**: Here, we're asking the `git commit` command to _amend_ whatever changes are present in stage to the most recent commit.\n- This will open your last commit in your Git-defined text editor which has the commit message **Add styles for navigation**.\n- Since we only updated the CSS declaration, we don't need to alter the commit message. At this point, you can just save and exit the text editor that Git opened for you and your changes will be reflected in the commit.\n\nSince you modified an existing Git commit, these changes are required to be _force pushed_ to your remote repo using `git push --force-with-lease \u003Cremote_name> \u003Cbranch_name>`. This command will override the commit `Add styles for navigation` on remote repo with updated commit that we just made in our local repo.\n\nOne thing to keep in mind while force pushing branches is that if you are working on the same branch with multiple people, force pushing may cause trouble for other users when they try to normally push their changes on a remote branch that has new commits force pushed. Hence, use this feature wisely. You can learn more about Git force push options [here](https://git-scm.com/docs/git-push#git-push---no-force-with-lease).\n\n## Situation 2: How to change a specific Git commit changes\n\nIn the previous situation, the Git commit change was rather simple as we had to modify only our last Git commit, but imagine if reviewer suggested to change something in `_navigation.html.haml`. In this case, it is second commit from the top, so changing it won't be as direct as it was in the first situation. Let's see how we can handle this:\n\nWhenever a commit is made in a branch, it is identified by a unique SHA-1 hash string. Think of it as a unique ID that separates one commit from another. You can view all the previous commits, along with their SHA-1 hashes in a branch by running the `git log` command. With this, you would see an output that looks somewhat as follows and is a list of commits, where the most recent commits are at the top;\n\n```\ncommit aa0a35a867ed2094da60042062e8f3d6000e3952 (HEAD -> add-page-navigation)\nAuthor: Kushal Pandya \u003Ckushal@gitlab.com>\nDate: Wed May 2 15:24:02 2018 +0530\n\n    Add styles for navigation\n\ncommit c22a3fa0c5cdc175f2b8232b9704079d27c619d0\nAuthor: Kushal Pandya \u003Ckushal@gitlab.com>\nDate: Wed May 2 08:42:52 2018 +0000\n\n    Render navigation partial\n\ncommit 4155df1cdc7be01c98b0773497ff65c22ba1549f\nAuthor: Kushal Pandya \u003Ckushal@gitlab.com>\nDate: Wed May 2 08:42:51 2018 +0000\n\n    Page Navigation View\n\ncommit 8d74af102941aa0b51e1a35b8ad731284e4b5a20\nAuthor: Kushal Pandya \u003Ckushal@gitlab.com>\nDate: Wed May 2 08:12:20 2018 +0000\n\n    Add routes for navigation\n```\n\nThis is where `git rebase` command comes into play. Whenever we wish to edit a specific commit with `git rebase`, we need to first rebase our branch by moving back HEAD to the point right _before_ the commit we wish to edit. In our case, we need to change the commit that reads `Page Navigation View`.\n\n![Commit Log](https://about.gitlab.com/images/blogimages/keeping-git-commit-history-clean/GitRebase.png){: .shadow.center.medium}\n\nHere, notice the hash of commit which is right before the commit we want to modify; copy the hash and perform the following steps:\n\n- Rebase the branch to move to commit before our target commit; run `git rebase -i 8d74af102941aa0b51e1a35b8ad731284e4b5a20`\n  -  **Git command breakdown**: Here we're running Git's `rebase` command with _interactive_ mode with provided SHA-1 hash as commit to rebase to.\n- This will run rebase command for Git in interactive mode and will open your text editor showing all of your commits that came _after_ the commit you rebased to. It will look somewhat like this:\n\n```\npick 4155df1cdc7 Page Navigation View\npick c22a3fa0c5c Render navigation partial\npick aa0a35a867e Add styles for navigation\n\n# Rebase 8d74af10294..aa0a35a867e onto 8d74af10294 (3 commands)\n#\n# Commands:\n# p, pick = use commit\n# r, reword = use commit, but edit the commit message\n# e, edit = use commit, but stop for amending\n# s, squash = use commit, but meld into previous commit\n# f, fixup = like \"squash\", but discard this commit's log message\n# x, exec = run command (the rest of the line) using shell\n# d, drop = remove Git commit\n#\n# These lines can be re-ordered; they are executed from top to bottom.\n#\n# If you remove a line here THAT COMMIT WILL BE LOST.\n#\n# However, if you remove everything, the rebase will be aborted.\n#\n# Note that empty commits are commented out\n```\n\nNotice how each commit has a word `pick` in front of it, and in the contents below, there are all possible keywords we can use. Since we want to _edit_ a commit, we need to change `pick 4155df1cdc7 Page Navigation View` to `edit 4155df1cdc7 Page Navigation View`. Save the changes and exit editor.\n\nNow your branch is rebased to the point in time right before the commit you made which included `_navigation.html.haml`. Open the file and perform desired changes as per the review feedback. Once you're done with the changes, stage them by running `git add _navigation.html.haml`.\n\nSince we have staged the changes, it is time to move branch HEAD back to the commit we originally had (while also including the new changes we added), run `git rebase --continue`, this will open your default editor in the terminal and show you the commit message that we edited during rebase; `Page Navigation View`. You can change this message if you wish, but we would leave it as it is for now, so save and exit the editor. At this point, Git will replay all the commits that followed after the commit you just edited and now branch `HEAD` is back to the top commit we originally had, and it also includes the new changes you made to one of the commits.\n\nSince we again modified a commit that's already present in remote repo, we need force push this branch again using `git push --force-with-lease \u003Cremote_name> \u003Cbranch_name>`.\n\n## Situation 3: How to add, remove, or combine Git commits\n\nA common situation is when you've made several commits just to fix something previously committed. Now let's reduce them as much as we can, combining them with the original commits.\n\nAll you need to do is start the interactive rebase as you would in the other scenarios.\n\n```\npick 4155df1cdc7 Page Navigation View\npick c22a3fa0c5c Render navigation partial\npick aa0a35a867e Add styles for navigation\npick 62e858a322 Fix a typo\npick 5c25eb48c8 Ops another fix\npick 7f0718efe9 Fix 2\npick f0ffc19ef7 Argh Another fix!\n```\n\nNow imagine you want to combine all those fixes into `c22a3fa0c5c Render navigation partial`. You just need to:\n\n1. Move the fixes up so that they are right below the commit you want to keep in the end.\n2. Change `pick` to `squash` or `fixup` for each of the fixes.\n\n*Note:* `squash` keeps the git fix commit messages in the description. `fixup` will forget the commit messages of the fixes and keep the original.\n\nYou'll end up with something like this:\n\n```\npick 4155df1cdc7 Page Navigation View\npick c22a3fa0c5c Render navigation partial\nfixup 62e858a322 Fix a typo\nfixup 5c25eb48c8 Ops another fix\nfixup 7f0718efe9 Fix 2\nfixup f0ffc19ef7 Argh Another fix!\npick aa0a35a867e Add styles for navigation\n```\n\nSave the changes, exit the editor, and you're done! This is the resulting history:\n\n```\npick 4155df1cdc7 Page Navigation View\npick 96373c0bcf Render navigation partial\npick aa0a35a867e Add styles for navigation\n```\n\nAs before, all you need to do now is `git push --force-with-lease \u003Cremote_name> \u003Cbranch_name>` and the changes are up.\n\nIf you want to remove a Git commit from branch altogether, instead of `squash` or `fixup`, just write `drop` or simply delete that line.\n\n### How to avoid Git commit conflicts\n\nTo avoid conflicts, make sure the commits you're moving up the timeline aren't touching the same files touched by the commits left after them.\n\n```\npick 4155df1cdc7 Page Navigation View\npick c22a3fa0c5c Render navigation partial\nfixup 62e858a322 Fix a typo                 # this changes styles.css\nfixup 5c25eb48c8 Ops another fix            # this changes image/logo.svg\nfixup 7f0718efe9 Fix 2                      # this changes styles.css\nfixup f0ffc19ef7 Argh Another fix!          # this changes styles.css\npick aa0a35a867e Add styles for navigation  # this changes index.html (no conflict)\n```\n\n### Pro-tip: Quick Git commit `fixup`s\n\nIf you know exactly which commit you want to fixup, when committing you don't have to waste brain cycles thinking of good temporary names for \"Fix 1\", \"Fix 2\", ..., \"Fix 42\".\n\n**Step 1: Meet `--fixup`**\n\nAfter you've staged the changes fixing whatever it is that needs fixing, just Git commit all the changes like this:\n\n```\ngit commit --fixup c22a3fa0c5c\n```\n(Note that this is the hash for the commit `c22a3fa0c5c Render navigation partial`)\n\nThis will generate this commit message: `fixup! Render navigation partial`.\n\n**Step 2: And the sidekick `--autosquash`**\n\nEasy interactive rebase. You can have `git` place the `fixup`s automatically in the right place.\n\n`git rebase -i 4155df1cdc7 --autosquash`\n\nHistory will be shown like so:\n```\npick 4155df1cdc7 Page Navigation View\npick c22a3fa0c5c Render navigation partial\nfixup 62e858a322 Fix a typo\nfixup 5c25eb48c8 Ops another fix\nfixup 7f0718efe9 Fix 2\nfixup f0ffc19ef7 Argh Another fix!\npick aa0a35a867e Add styles for navigation\n```\n\nReady for you to just review and proceed.\n\nIf you're feeling adventurous you can do a non-interactive rebase `git rebase --autosquash`, but only if you like living dangerously, as you'll have no opportunity to review the squashes being made before they're applied.\n\n## Situation 4: My Git commit history doesn't make sense, I need a fresh start!\n\nIf we're working on a large feature, it is common to have several fixup and review-feedback changes that are being committed frequently. Instead of constantly rebasing the branch, we can leave the cleaning up of Git commits until the end of development.\n\nThis is where creating patch files is extremely handy. In fact, patch files were the primary way of sharing code over email while collaborating on large open source projects before Git-based services like GitLab were available to developers. Imagine you have one such branch (eg; `add-page-navigation`) where there are tons of commits that don't convey the underlying changes clearly. Here's how you can create a patch file for all the changes you made in this branch:\n\n- The first step to create the patch file is to make sure that your branch has all the changes present from `master` branch and has no conflicts with the same.\n- You can run `git rebase master` or `git merge master` while you're checked out in `add-page-navigation` branch to get all the changes from `master` on to your branch.\n- Now create the patch file; run `git diff master add-page-navigation > ~/add_page_navigation.patch`.\n  -  **Command breakdown**: Here we're using Git's _diff_ feature, and asking for a diff between `master` branch and `add-page-navigation` branch, and _redirecting_ the output (via `>` symbol) to a file named `add_page_navigation.patch` in our user home directory (typically `~/` in *nix operating systems).\n- You can specify any path you wish to keep this file in and the file name and extension could be anything you want.\n- Once the command is run and you don't see any errors, the patch file is generated.\n- Now checkout `master` branch; run `git checkout master`.\n- Delete the branch `add-page-navigation` from local repo; run `git branch -D add-page-navigation`. Remember, we already have changes of this branch in a created patch file.\n- Now create a new branch with the same name (while `master` is checked out); run `git checkout -b add-page-navigation`.\n- At this point, this is a fresh branch and doesn't have any of your changes.\n- Finally, apply your changes from the patch file; `git apply ~/add_page_navigation.patch`.\n- Here, all of your changes are applied in a branch and they will appear as uncommitted, as if all your modification where done, but none of the modifications were actually committed in the branch.\n- Now you can go ahead and commit individual files or files grouped by area of impact in the order you want with concise commit messages.\n\nAs with previous situations, we basically modified the whole branch, so it is time to force push!\n\n## Git commit history: Conclusion\n\nWhile we have covered most common and basic situations that arise in a day-to-day workflow with Git, rewriting Git history is a vast topic and as you get familiar with above tips, you can learn more advanced concepts around the subject in the [Git Official Documentation](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History). Happy git'ing!\n\nPhoto by [pan xiaozhen](https://unsplash.com/photos/pj-BrFZ9eAA?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com/search/photos/clean?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)\n{: .note}\n",[9,982],{"slug":1464,"featured":6,"template":687},"keeping-git-commit-history-clean","content:en-us:blog:keeping-git-commit-history-clean.yml","Keeping Git Commit History Clean","en-us/blog/keeping-git-commit-history-clean.yml","en-us/blog/keeping-git-commit-history-clean",{"_path":1470,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1471,"content":1477,"config":1484,"_id":1486,"_type":13,"title":1487,"_source":15,"_file":1488,"_stem":1489,"_extension":18},"/en-us/blog/meltano-functional-group-update-post",{"title":1472,"description":1473,"ogTitle":1472,"ogDescription":1473,"noIndex":6,"ogImage":1474,"ogUrl":1475,"ogSiteName":672,"ogType":673,"canonicalUrls":1475,"schema":1476},"New Meltano personas, priorities, and updates from the team","There's a lot going on — here are some of the highlights on user research, dogfooding Meltano, embedding engineers, and hiring!","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678847/Blog/Hero%20Images/meltano-fgu.jpg","https://about.gitlab.com/blog/meltano-functional-group-update-post","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"New Meltano personas, priorities, and updates from the team\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Jacob Schatz\"}],\n        \"datePublished\": \"2018-10-08\",\n      }",{"title":1472,"description":1473,"authors":1478,"heroImage":1474,"date":1480,"body":1481,"category":726,"tags":1482},[1479],"Jacob Schatz","2018-10-08","\nJacob Schatz here, Staff Engineer for [Meltano](https://gitlab.com/meltano)! We've been heads down working on improving Meltano, and figured it was time for an update. We've had some great conversations that have helped us identify two general personas. Our team is also growing, and we're ready for frontend contributions, but more on that later.\n\nWe've been conducting interviews to zero in on what our users will want, what they're currently doing, and what tools they're using. Over the course of those conversations, we saw two main scenarios emerge. People either wanted a command line interface (CLI) or a graphical user interface (GUI). The GUIs that exist are painful to use, and not very intuitive. In both scenarios, people we spoke with are frustrated. This goes back to the original reason [we decided to create Meltano](/blog/hey-data-teams-we-are-working-on-a-tool-just-for-you/) — our data team members were relying on frustrating and expensive toolsets with poor UIs.\n\n### What are the Meltano personas?\n\nOur conversations revealed two general types of users:\n* Users who have engineers on staff\n* Users who do not have engineers on staff, or their engineers do not have bandwidth to help them\n\nThe Data team at GitLab, for example, has data engineers on staff who are willing, able, and happy to write Python. We won't be able to write every extractor and loader, so our users can follow our [specifications](https://gitlab.com/meltano/specifications), which are based off of the [Singer specifications](https://github.com/singer-io/getting-started). We want to make that as easy as possible, so Meltano can be the glue between all these different pieces.\n\nFor the other teams who don’t have the technical resources, we want to make it as if they had engineers on staff. Ideally, they'll just need to click a couple of buttons, run extract, load and transform with the extractors and loaders that we already have. Hopefully in the future the community can contribute more to these types of different extractors and loaders.\n\nYou can check out our updated [readme](https://gitlab.com/meltano/meltano/blob/master/README.md) with more info about Meltano and our personas. We're working iteratively, so if you have a different setup or scenario to share, we want to hear from you about your experience! Get in touch with us and tell us about your struggles or successes with your data team.\n\n### What’s next?\n\nWe're focused on our own CLI and GUI, and continuing to build more extractors and loaders (or [\"taps and targets\"](https://www.singer.io/)). We will be the glue that ties everything together. While current Singer taps and targets support extracting and loading, we'll be supporting much more, like removal of PII. Our CLI will support all of this from one configuration. We also want the CLI to have a really nice user experience, so I'm working with GitLab UX to help make it happen.\n\nAs always, we’re looking for contributors! In the [Dashboard project](https://gitlab.com/meltano/dashboard) you’ll see the Chart.js library that I’m building to make really nice dashboards for Meltano. Although we've had a ton of great Python contributions, we haven’t had as many contributors to the frontend, so we’d love your help there.\n\n### In other news\nThere's a lot going on, here are some of the highlights!\n\n#### Dogfooding\nIn my experience, unless one experiences the direct results of the code they write, and feel the pain their users feel when they hit a bug, one might not correctly solve the problem. Currently, we fulfill the data team's requests, but if something doesn't work they merely report back to us, without us experiencing the pain ourselves. We're changing how we work in order to imprint the idea that if something is broken, it's the Meltano team's responsibility. We’re all investigating every single pipeline failure, regardless of whose “fault” it is, because these suggest that it may be a poor user experience.\n\n#### Embedded engineers\nIn order to dogfood better, we've taken a data engineer from the data team, and an engineer from the Meltano team. They split their work 50/50 so each does half of their usual work and half of each other's work. It's already made a huge difference by giving us more eyes and ears on lots of issues, and allowing the engineers to approach problems from a different angle. Another added benefit is that every Meltano engineer gets direct exposure and experience from the data team, to make them better data scientists as well product engineers.\n\nThat's all for now, get in touch with us in our [issue tracker](https://gitlab.com/groups/meltano/-/boards), and tweet us [@meltanodata](https://twitter.com/meltanodata)!\n\nCover [image](https://unsplash.com/photos/2FPjlAyMQTA) by [John Schnobrich](https://unsplash.com/@johnschno) on Unsplash\n{: .note}\n\n[Emily von Hoffmann](https://about.gitlab.com/company/team/#emvonhoffmann) contributed to this post.\n{: .note}\n",[1483,1242,9,774,982],"demo",{"slug":1485,"featured":6,"template":687},"meltano-functional-group-update-post","content:en-us:blog:meltano-functional-group-update-post.yml","Meltano Functional Group Update Post","en-us/blog/meltano-functional-group-update-post.yml","en-us/blog/meltano-functional-group-update-post",{"_path":1491,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1492,"content":1498,"config":1503,"_id":1505,"_type":13,"title":1506,"_source":15,"_file":1507,"_stem":1508,"_extension":18},"/en-us/blog/microsoft-acquires-github",{"title":1493,"description":1494,"ogTitle":1493,"ogDescription":1494,"noIndex":6,"ogImage":1495,"ogUrl":1496,"ogSiteName":672,"ogType":673,"canonicalUrls":1496,"schema":1497},"Congratulations GitHub on the acquisition by Microsoft","The acquisition of GitHub by Microsoft is validation of the growing influence of software developers in the world.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749680101/Blog/Hero%20Images/github-news-cover.png","https://about.gitlab.com/blog/microsoft-acquires-github","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Congratulations GitHub on the acquisition by Microsoft\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"GitLab\"}],\n        \"datePublished\": \"2018-06-03\",\n      }",{"title":1493,"description":1494,"authors":1499,"heroImage":1495,"date":1500,"body":1501,"category":680,"tags":1502},[917],"2018-06-03","\n\nCongratulations to GitHub on their [acquisition by Microsoft](https://www.bloomberg.com/news/articles/2018-06-03/microsoft-is-said-to-have-agreed-to-acquire-coding-site-github)! This is validation of the growing influence of software developers in the world, and the importance of [modern DevOps](/topics/devops/). The software community owes a lot to GitHub, and that includes the GitLab community. GitLab was first developed on GitHub and found its first contributors through it.\n\n## Code collaboration before GitHub\n\nOver the years, code collaboration has come a long way. Many developers will remember how code was often hosted on private websites, FTP servers, email, and IRC. We used to stuff a floppy disk or CD-ROM with code and mail it back and forth, or send patches to newsgroups or mailing lists in order to share and work on code together. It was a painful, error-prone time.\n\nGit, the [version control system](/topics/version-control/) used by GitHub, GitLab, and others, was first introduced in 2005. It allowed developers to work asynchronously, across the globe, on the same code. GitWeb went a step further, with its web interface for browsing a Git repository, including viewing contents of files, commit messages, and more.\n\nSourceForge offered the first glimpse of modern code collaboration by offering a central location to host and manage free, open source projects. Despite limited functionality and a cumbersome UI, SourceForge started bringing developers together in one place.\n\nEach step along the way improved the developer experience, allowed more people to contribute, and sped up the software development lifecycle.\n\n## A common place for code\n\nGitHub launched in 2008. While Git version control was a starting point for better code collaboration, GitHub made it even easier. By applying modern communication features inspired by social media sites, GitHub empowered social coding. It provided the first truly accessible UI to manage and review feature branches, and the ability to merge them with one-click “Pull Requests.” As a result, open source projects flocked to GitHub as a place to not only host code, but to grow a community as well.\n\n\u003Cdiv class=\"row\">\n\u003Cdiv class=\"col-md-6 col-sm-12\">\n\u003Cimg src=\"/images/blogimages/git-instaweb.png\" alt=\"GitWeb user interface\">\n\u003C/div>\n\u003Cdiv class=\"col-md-6 col-sm-12\">\n\u003Cimg src=\"/images/blogimages/github-ui.png\" alt=\"GitHub user interface\">\n\u003C/div>\n\u003Cdiv class=\"col-md-12 text-center\" style=\"margin-top: 5px\">\n\u003Cem>\u003Csmall>GitHub’s UI made it easier to manage and review feature branches compared to its predecessor, GitWeb.\u003C/small>\u003C/em>\n\u003C/div>\n\u003C/div>\n\n## What does the Microsoft acquisition mean for the industry?\n\nThe growing influence of software developers cannot be overstated. Developers are the [new kingmakers](https://thenewkingmakers.com/) and their influence within organizations is growing along with their value.\n\nGitHub has earned mindshare within the developer community, and Microsoft’s acquisition is certainly an attempt to garner and cultivate that mindshare. However, the long-term strategic implication seems to be that Microsoft wants to use GitHub as a means to drive Azure adoption.\n\nDeveloper tools have a high capacity for driving cloud usage. Once you have your application code hosted, the natural next step is to need a place to deploy it. Today, Microsoft fosters cloud adoption by tightly coupling Azure, its cloud service, together with Microsoft Visual Studio Team Services (VSTS), its set of development tools. Microsoft will likely integrate GitHub into VSTS in order to take advantage of the strong tie with Azure.\n\n> *“The way developers produce, deliver and maintain code has changed significantly in the last ten years and we applaud GitHub for being a driving force supporting the vast independent developer community through this evolution. This acquisition affirms the global importance of software developers and their influence in the enterprise. Microsoft likely acquired GitHub so it could more closely integrate it with Microsoft Visual Studio Team Services (VSTS) and ultimately help drive compute usage for Azure.” - [Sid Sijbrandij](/company/team/#sytses), GitLab CEO*\n\n## How does this relate to GitLab?\n\nWe applaud GitHub on its accomplishments and congratulate Microsoft on its acquisition. While we admire what's been done, our strategy differs in two key areas. First, instead of integrating multiple tools together, we believe a [single application](/handbook/product/single-application/), built from the ground up to support the entire DevOps lifecycle, is a better experience leading to a faster cycle time. Second, it’s important to us that the [core of our product always remain open source](/blog/gitlab-is-open-core-github-is-closed-source/) itself as well. Being “open core” means everyone can build the tools together. Having it all in a single application means everyone can use the same tool to collaborate together. We see the next evolution of software development as a world where everyone can contribute.\n",[683,705,9],{"slug":1504,"featured":6,"template":687},"microsoft-acquires-github","content:en-us:blog:microsoft-acquires-github.yml","Microsoft Acquires Github","en-us/blog/microsoft-acquires-github.yml","en-us/blog/microsoft-acquires-github",{"_path":1510,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1511,"content":1517,"config":1523,"_id":1525,"_type":13,"title":1526,"_source":15,"_file":1527,"_stem":1528,"_extension":18},"/en-us/blog/migrating-your-version-control-to-git",{"title":1512,"description":1513,"ogTitle":1512,"ogDescription":1513,"noIndex":6,"ogImage":1514,"ogUrl":1515,"ogSiteName":672,"ogType":673,"canonicalUrls":1515,"schema":1516},"Migrating your version control to Git? Here’s what you need to know","Change is hard, but moving to Git doesn’t have to be if you read these tips.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681731/Blog/Hero%20Images/migrategit.jpg","https://about.gitlab.com/blog/migrating-your-version-control-to-git","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Migrating your version control to Git? Here’s what you need to know\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Suri Patel\"}],\n        \"datePublished\": \"2020-11-12\",\n      }",{"title":1512,"description":1513,"authors":1518,"heroImage":1514,"date":1520,"body":1521,"category":748,"tags":1522},[1519],"Suri Patel","2020-11-12","\n\nDeciding to make sweeping changes that’ll affect your entire organization is a nerve-wracking experience, because until you see the change in action, you don’t know whether it’ll be a success or a disaster. Migrating from one [version control](/topics/version-control/) to Git is just that type of change that can make team members and leaders feel overwhelmed. However, advanced knowledge helps teams prepare and transition more smoothly. Here are a few tips to help you make the change.\n\n## Keep your previous version control system\n\nIf you perform a tip migration and copy over only the most recent commits, teams will still need access to the previous version control to consult project history. Set the old version control system to read-only and place a breadcrumb trail in Git to help developers find the information they need in the previous version control. Retaining the old version control preserves history and enables new team members to find important information, which may be lost as veteran contributors move to different roles or forget code specifics.\n\n## Clone your previous version control\n\nBefore making a sudden shift to a new version control, create a mirror of your previous system to test out whether your current processes work with Git or you need to make adjustments. Continuous integration, code review, security testing, and release processes should all be tested with the clone so that the complications can be remedied before the entire workflow breaks down.\n\n## Invest in learning Git\n\nAlthough Git is the most popular and widely-used version control, it’s also known for its initial degree of difficulty. Developers who are new to Git may struggle with the command line and find [branching](https://learngitbranching.js.org/) tedious and confusing. Despite Git’s learning curve, its positive impact on productivity and code quality is worth the trouble, and teams can cope with these challenges by investing in training or identifying Git experts within the team to coach others. Team members may find it easier to work with a [GUI](https://git-scm.com/downloads/guis) rather than the command line, so using a strong tool could help ease the transition.\n\n## Identify a branching strategy\n\n![A diagram of colorful blocks representing code with connecting lines to represent branches and the flow](https://about.gitlab.com/images/blogimages/illustration_feature-branches.png){: .shadow.small.left.wrap-text}\n\nBefore [migrating to Git](https://git-scm.com/book/en/v2/Git-and-Other-Systems-Migrating-to-Git), it’s imperative to select a branching strategy and train the team on its specifics. Git is a distributed version control system and offers unparalleled workflow flexibility, which can either streamline or convolute development depending on whether a team identifies a single branching strategy. Without a strategy, team members could interfere with each other’s work and ship unfinished features. Collaborating through a single workflow keeps the codebase clean and helps team members maintain velocity. Git enables teams to approach software development through a variety of workflows to meet specific needs. Some branching strategies, such as [GitLab Flow](https://docs.gitlab.com/ee/topics/gitlab_flow.html), are more straightforward than others, so it’s important to research your team’s needs before deciding.\n\n## Read more about Git\n\nAccording to the [2020 DevSecOps Survey](/developer-survey/), Git is the choice for source control for 92% of the survey takers, with just 2% using no source control and even smaller percentages using Azure DevOps Server and Subversion. Here are few additional posts to help you get the most out of Git.\n\n- [15 Git tips to improve workflow](/blog/15-git-tips-improve-workflow/)\n- [How Git Partial Clone lets you fetch only the large file you need](/blog/partial-clone-for-massive-repositories/)\n- [A guide to Git for beginners](/blog/beginner-git-guide/) \n\nCover image by [Belinda Fewings](https://unsplash.com/@bel2000a?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com/photos/1Spvd7ktFX4)\n{: .note}\n",[9,982,820],{"slug":1524,"featured":6,"template":687},"migrating-your-version-control-to-git","content:en-us:blog:migrating-your-version-control-to-git.yml","Migrating Your Version Control To Git","en-us/blog/migrating-your-version-control-to-git.yml","en-us/blog/migrating-your-version-control-to-git",{"_path":1530,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1531,"content":1537,"config":1543,"_id":1545,"_type":13,"title":1546,"_source":15,"_file":1547,"_stem":1548,"_extension":18},"/en-us/blog/move-to-distributed-vcs",{"title":1532,"description":1533,"ogTitle":1532,"ogDescription":1533,"noIndex":6,"ogImage":1534,"ogUrl":1535,"ogSiteName":672,"ogType":673,"canonicalUrls":1535,"schema":1536},"Why you should switch to distributed version control","We share a few reasons why high-performing software development teams use distributed version control systems over centralized version control.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681766/Blog/Hero%20Images/distributedvcs.jpg","https://about.gitlab.com/blog/move-to-distributed-vcs","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Why you should move from centralized version control to distributed version control\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Suri Patel\"}],\n        \"datePublished\": \"2020-11-19\",\n      }",{"title":1538,"description":1533,"authors":1539,"heroImage":1534,"date":1540,"body":1541,"category":748,"tags":1542},"Why you should move from centralized version control to distributed version control",[1519],"2020-11-19","\n\nDistributed version control has the power to increase collaboration and streamline development, but many teams are still using a centralized [version control system](/topics/version-control/) that prevents them from reaching their full development potential. If your team uses a centralized version control system, velocity, code quality, and collaboration aren’t at the same levels of high-performing teams that consistently deliver valuable products at rapid speeds. Using a [version control](/topics/version-control/) system isn’t enough to stay competitive in today’s market - you have to use the best tools available.\n\n## What is version control?\n\nVersion control lets software development teams build up communication and collaboration while continuously making and tracking changes to source code. Sometimes called code revision control, version control exists as a safety net to protect the source code while giving the development team the flexibility to experiment without worrying about causing damage or creating code conflicts. A version control system can be local, centralized, or distributed based on organizational needs.\n\n## Centralized version control: A relic from the past\n\nA centralized version control system relies on a central server where developers commit changes. Users like centralized systems, because they’re simple to set up and provide admins with workflow controls. Centralized vcs, like Subversion, CVS, and Perforce, solve the age-old problem of manually storing multiple copies on a hard drive, but the few benefits don’t outweigh what’s at risk from relying on a [single server](https://git-scm.com/book/en/v2/Getting-Started-About-Version-Control).\n\nIf the only copy of a project becomes corrupted or goes down, developers are unable to access the code or retrieve previous versions. Also, remote commits are extremely slow, because users must commit through a network to the central repository, which can slow down systems. Users must also be in network to push changes, limiting where and when developers can commit. Merging and branching are also difficult and confusing, since contributors have to track merges and branch as a single check-in.\n\n## Distributed version control: The key to rapid software development\n\nUnlike a centralized version control system, a distributed version control doesn’t have a single point of failure, because developers clone repositories on their distributed version control workstations, creating multiple backup copies. If the [source code](/solutions/source-code-management/) is corrupted, teams can use any developer’s clone as a backup, increasing security since there’s little risk of losing a project’s entire history. \n\nAlso, because there are local copies, developers can commit offline, which offers flexibility in their personal workflow and prevents having to commit as a giant changeset. Distributed version control, such as Git, Bazaar, and Mercurial, offers fast [branching](/topics/version-control/what-is-git-workflow/), because there’s no communication with a remote server - everything is done on a local drive.\n\nAre you ready for a quick look at Git, the most popular distributed version control system? [Brendan O’Leary](/company/team/#brendan), senior developer evangelist, explains Git basics to help teams get started in the video below.\n\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube-nocookie.com/embed/9oDNBuive-g\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\nThe biggest challenge to switching to a distributed version control system is the learning curve. Teams will be able to ship higher quality code at new speeds using a distributed version control.\n\n## Core benefits of a distributed version control system\n\nA [distributed version control system](/topics/version-control/benefits-distributed-version-control-system/) is like each team member having a second set of hands to catch problems, introduce fast fixes, and execute fast merging with fewer conflicts. Additionally, it makes the collaboration process hyper-efficient, thereby letting DevOps teams work asynchronously. Version control empowers teams to collaborate and streamline software development to resolve pain points and create a centralized location for code.\n\n## Popular distributed version control systems (e.g. Git)\n\nThe three most well-known options are Git, SVN, and Mercurial. The most popular of these options is Git, which is an open-source distributed system that is used for any size software project. \n\nGit offers tons of features and benefits, including:\n\n* Strong support for non-linear development.\n\n* Works with popular protocols/systems including HTTP, FTP, and SSH.\n\n* Offers GIT GUI, which allows for fast re-scan, state change, sign off, commit & push the code quickly with low friction.\n\n* It can handle any size project.\n\n* Can function across platforms.\n\n* Toolkit-based design.\n\n* Rapid and efficient performance.\n\n* Code changes are easily tracked and managed.\n\nWhen choosing a version control system, make sure to evaluate all options to find the best fit for your team.\n\nCover image by [Hans-Peter Gauster](https://unsplash.com/@sloppyperfectionist?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com/photos/3y1zF4hIPCg)\n{: .note}\n",[9,982,820],{"slug":1544,"featured":6,"template":687},"move-to-distributed-vcs","content:en-us:blog:move-to-distributed-vcs.yml","Move To Distributed Vcs","en-us/blog/move-to-distributed-vcs.yml","en-us/blog/move-to-distributed-vcs",{"_path":1550,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1551,"content":1557,"config":1563,"_id":1565,"_type":13,"title":1566,"_source":15,"_file":1567,"_stem":1568,"_extension":18},"/en-us/blog/new-git-default-branch-name",{"title":1552,"description":1553,"ogTitle":1552,"ogDescription":1553,"noIndex":6,"ogImage":1554,"ogUrl":1555,"ogSiteName":672,"ogType":673,"canonicalUrls":1555,"schema":1556},"The new Git default branch name","Why we're joining the Git community and updating \"master\" to \"main\".","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681942/Blog/Hero%20Images/git_gitlab.jpg","https://about.gitlab.com/blog/new-git-default-branch-name","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"The new Git default branch name\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Daniel Gruesso\"}],\n        \"datePublished\": \"2021-03-10\",\n      }",{"title":1552,"description":1553,"authors":1558,"heroImage":1554,"date":1560,"body":1561,"category":705,"tags":1562},[1559],"Daniel Gruesso","2021-03-10","\n\n**Note:** We have added release 13.12 to our release schedule. 13.12 is due to ship on 2021-05-22. This will cause the next major release 14.0 to ship in June instead of May as originally stated. The below content has been updated to reflect the new release date for 14.0 of 2021-06-22.\n{: .note}\n\nEvery Git repository has an initial or original branch, which is the first branch to be created when a new repository is generated. Historically, the default name for this initial branch was `master`. This term came from Bitkeeper, a predecessor to Git. Bitkeeper referred to the source of truth as the \"master repository\" and other copies as \"slave repositories\". This shows how common master/slave references have been in technology, and the difficulty in knowing how the term master should be interpreted.\n\n## A more descriptive and inclusive name\n\nThe Git project maintainers, in coordination with the broader community, have been listening to the development community’s feedback on determining a more descriptive and inclusive name for the default or primary branch and offering users options to change the name of the default branch name (usually `master`) of their repository. There have already been significant steps toward this goal:\n\n* Git 2.28.0, released in July 2020, introduced the `init.defaultBranch` configuration option, which allows Git users to define and configure a default branch name other than `master`. [In June 2020, the Software Freedom Conservancy released a statement](https://sfconservancy.org/news/2020/jun/23/gitbranchname/) summarizing why the default term `master` is offensive to many users because of its history.\n\n* Git maintainers are actively working toward a permanent change for the default name from `master` to `main`. You can read more about this change on [this post from the Git mailing list](https://lore.kernel.org/git/pull.656.v4.git.1593009996.gitgitgadget@gmail.com/). A large part of this effort involves [updating all existing tests](https://lore.kernel.org/git/xmqqa6vf437i.fsf@gitster.c.googlers.com/T/#t) in the Git project.\n\n## Changing GitLab's default branch name\n\nGitLab has already introduced changes that allow users to change the default branch name both at the [instance level](https://docs.gitlab.com/ee/user/project/repository/branches/default.html) (for self-managed users) and at the [group level](https://docs.gitlab.com/ee/user/project/repository/branches/default.html) (for both SaaS and self-managed users). To further align with the community, we will make the following changes in the upcoming releases:\n\n1. **Default branch name changes for GitLab.com and self-managed users**: When creating a new project in GitLab, the default branch name will change from `master` to `main`. We are planning to implement this change in two phases:\n\n    * Phase 1 (GitLab.com): We plan to update GitLab's underlying Git version to 2.31.0 as part of GitLab’s 13.11 release, shipping on April 22, 2021. We will additionally change the default branch name on GitLab.com from `master` to `main` on May 24th, 2021.\n\n    * Phase 2 (Self-managed): We will change the default branch name from `master` to `main` for self-managed GitLab as part for our next major release 14.0, shipping on June 22, 2021. Any project created in GitLab post 14.0 will use the new default. This does not affect existing projects.\n\n    - **Action required**: When upgrading a self-managed instance to GitLab 14.0, new projects created via the [GitLab GUI](/pricing/feature-comparison/) will use `main` as the default branch name. If your project is running CI/CD jobs configured in another project, hard-coded references to the `master` branch will need to be updated to use either `main` or to use a default branch name [CI/CD variable](https://docs.gitlab.com/ee/ci/variables/predefined_variables.html), such as `CI_DEFAULT_BRANCH`. You can continue to use `master` by configuring a [custom default branch name](https://docs.gitlab.com/ee/user/group/#custom-initial-branch-name).\n\n1. **Default branch name change for the GitLab project (and its related projects)**: The [GitLab project](https://gitlab.com/gitlab-org/gitlab) and its related projects will change their default branch name from `master` to `main`. Some projects have already been updated (for example [GitLab Shell](https://gitlab.com/gitlab-org/gitlab-shell)) and we plan to update the rest between now and the release of 14.0 on June 22, 2021.\n\n    - **Action required**: If during the switchover you created a contribution for the GitLab project and are still using the `master` branch locally, you will need to rebase your changes prior to pushing to consume the new default branch name of `main`.\n\nPlease note this **will only affect new projects** created in GitLab. Existing projects will not be affected. You can learn more about the change in the [default branch rename epic](https://gitlab.com/groups/gitlab-org/-/epics/3600).\n\nWe welcome the use of `main` as a more inclusive default branch name and are happy to see that our peers at [GitHub](https://github.blog/changelog/2020-10-01-the-default-branch-for-newly-created-repositories-is-now-main/) and [Atlassian](https://bitbucket.org/blog/moving-away-from-master-as-the-default-name-for-branches-in-git) also support this change.\n",[9],{"slug":1564,"featured":6,"template":687},"new-git-default-branch-name","content:en-us:blog:new-git-default-branch-name.yml","New Git Default Branch Name","en-us/blog/new-git-default-branch-name.yml","en-us/blog/new-git-default-branch-name",{"_path":1570,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1571,"content":1577,"config":1582,"_id":1584,"_type":13,"title":1585,"_source":15,"_file":1586,"_stem":1587,"_extension":18},"/en-us/blog/optimize-gitops-workflow",{"title":1572,"description":1573,"ogTitle":1572,"ogDescription":1573,"noIndex":6,"ogImage":1574,"ogUrl":1575,"ogSiteName":672,"ogType":673,"canonicalUrls":1575,"schema":1576},"Optimize GitOps workflow with version control from GitLab","A GitOps workflow improves development, operations and business processes and GitLab’s CI plays a vital role.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749673081/Blog/Hero%20Images/gitops-image-unsplash.jpg","https://about.gitlab.com/blog/optimize-gitops-workflow","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Optimize GitOps workflow with version control from GitLab\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Brein Matturro\"}],\n        \"datePublished\": \"2019-10-28\",\n      }",{"title":1572,"description":1573,"authors":1578,"heroImage":1574,"date":1579,"body":1580,"category":680,"tags":1581},[1278],"2019-10-28","\nGitOps is a way for IT operations to manage changes across infrastructure and development teams. At GitLab\nConnect in Denver, [Tyler Sparks](https://www.linkedin.com/in/sparksconcept/), principal engineer and\nowner of Sparks Concept, presented a talk on why GitOps is a productive workflow and how\nusing GitLab can increase communication and version control.\n\n[GitOps](/topics/gitops/) uses infrastructure as code but with processes in place on top of it, including extensive use of\nmerge requests for everything from policy to infrastructure changes. “Success for most companies and\nengineering groups is based on the interactions of a large, complex, distributed system,” Tyler says.\nThe goal of GitOps is to incorporate Git beyond development and operations teams, improving the\nbusiness as a whole with the right tool. “It's a really cool way that GitLab integrates and it's a way to\nshift things left in your organization.”\n\n## The Git in GitOps\n\n“Git is the single source of truth. You shouldn’t be able to make any change outside of Git,” Tyler says. This creates one clean transaction between teams. Git establishes a unified location for anything from security, infrastructure changes, deployments, process changes, and even the integration of other tools. “Git is serving as the glue to make these safe transitions so that you can move faster as a team,” Tyler says.\n\nCreating that interaction between groups is often elaborate and difficult to manage. “Anyone building software these days is finding it more and more complex...everything is changing, the landscape is constantly changing,” Tyler says. Services are being run on stacks upon stacks and there is a lot of risk involved in maintenance. A tool, like [GitLab CI](/solutions/continuous-integration/), simplifies the processes and grants visibility.\n\n## GitOps best practices\n\nIn a GitOps workflow, where one simple change can impact three different teams, a strong [version control is imperative for communication](/topics/version-control/). Between disparate tools and poorly defined handoffs, the solution is to move into one repository for all tools and teams. With one overarching repository, “You can have a bunch of parallel workstreams running safely… you will have minimum viable change and a way to observe it,” Tyler says.\n\nWith GitLab’s version control system in place, teams can see what’s going on to work together and to know what change is going to impact where. “GitLab CI is one of the original products that made it possible to start to take an integrative view of the system,” Tyler says. “This is the penultimate way to [promote collaboration](/topics/gitops/gitops-gitlab-collaboration/) and to break down silos within an organization. GitLab is a tool that helps with that.”\n\nGitLab’s version control not only safeguards the infrastructure, but ultimately trickles throughout the entire enterprise. “As companies adopt GitLab, they’re not just more successful with their technology...it really comes down to how they’re functioning as a group,” Tyler says. “GitLab encourages some really good practices around development and how teams interact.”\n\n>“That’s why GitLab is the clear winner...They’re not just leading Gartner and Forrester because they paid somebody off. They’re actually an amazing tool.” Tyler Sparks, principal engineer and owner of Sparks Concept\n\nLearn more about GitOps best practices and Tyler’s work with GitLab CI in his presentation below:\n\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube.com/embed/5ykRuaZvY-E\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\nCover image by [David Rangel](https://unsplash.com/@rangel) on [Unsplash](https://unsplash.com)\n{: .note}\n",[9,684,230,728],{"slug":1583,"featured":6,"template":687},"optimize-gitops-workflow","content:en-us:blog:optimize-gitops-workflow.yml","Optimize Gitops Workflow","en-us/blog/optimize-gitops-workflow.yml","en-us/blog/optimize-gitops-workflow",{"_path":1589,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1590,"content":1596,"config":1605,"_id":1607,"_type":13,"title":1608,"_source":15,"_file":1609,"_stem":1610,"_extension":18},"/en-us/blog/outreachy-sponsorship-winter-2020",{"title":1591,"description":1592,"ogTitle":1591,"ogDescription":1592,"noIndex":6,"ogImage":1593,"ogUrl":1594,"ogSiteName":672,"ogType":673,"canonicalUrls":1594,"schema":1595},"Technology internships meet open source in Outreachy","Inside Outreachy technology internships, where participants work on Git.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749664041/Blog/Hero%20Images/open-devops.png","https://about.gitlab.com/blog/outreachy-sponsorship-winter-2020","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Technology internships meet open source in Outreachy\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Joey Salazar\"},{\"@type\":\"Person\",\"name\":\"Charvi Mendiratta\"},{\"@type\":\"Person\",\"name\":\"Nuritzi Sanchez\"},{\"@type\":\"Person\",\"name\":\"Christian Couder\"}],\n        \"datePublished\": \"2021-04-15\",\n      }",{"title":1591,"description":1592,"authors":1597,"heroImage":1593,"date":1601,"body":1602,"category":680,"tags":1603},[1598,1599,1600,876],"Joey Salazar","Charvi Mendiratta","Nuritzi Sanchez","2021-04-15","\n\nAs an enthusiastic participant in the [open source](/blog/innersourcing-using-the-open-source-workflow-to-improve-collaboration-within-an-organization/) community, we were excited to participate in the [Outreachy technology internships program](https://www.outreachy.org/) again this year, which focuses on women and underrepresented groups. It's a way GitLab can give back, and as a bonus, Outreachy's principles intersect with [our Diversity, Inclusion and Belonging value](https://handbook.gitlab.com/handbook/values/#diversity-inclusion).\n\n## About the Outreachy program\n\nInitially, Outreachy began as the Open Source Program for Women (OPW) at [GNOME](https://www.gnome.org/about-us/). The program was successful and grew quickly. Today, Outreachy has grown into the largest global technology internships program that provides opportunities for women and underrepresented groups to work on open source projects.\n\nCurrently, Outreachy is independently organized with the help of many volunteers, or sponsored help. For example, [Cindy Pallares](/company/team/#cindy) is a GitLab employee and helps with organizing Outreachy as a site reliability engineer.\n\nOutreachy is a paid technology internship program that runs twice a year for three months. During that time, interns can work in areas like programming, user experience, documentation, illustration and graphic design, or data science. In this technology internship program, participants work remotely with experienced mentors from prominent FOSS communities like Git, Mozilla, Linux kernel, GNOME, Wikimedia, and many others.\n\nOne of the benefits of the Outreachy technology internship is that the interns do not need to be students. It's a great opportunity for people who are coming back into the workforce after a hiatus, or who are navigating a career change into tech. This technology internship program is unique because it incorporates skill sets beyond engineering – which creates a broader range of skill sets represented in the open source world. The Outreachy internship is remote, making it more relevant than ever during the pandemic by helping interns gain experience working on an all-remote team.\n\nGitLab is one of the organizations that sponsors the Outreachy technology internship program, and we hope that by sharing our experience we can encourage more tech organizations to join us in participating in Outreachy as [corporate sponsors](https://www.outreachy.org/sponsor/).\n\n## Outreachy interns work on Git\n\nMore than 90% of the professional applications created today are built using open source components, according to a [2020 Tidelift survey](https://cdn2.hubspot.net/hubfs/4008838/Resources/The-Tidelift-guide-to-managed-open-source.pdf?utm_source=hs_automation&utm_medium=email&utm_content=66640714). One of the fundamental open source technologies we leverage at GitLab is the [Git project](https://git-scm.com/), so we chose to sponsor an Outreachy intern to work there.\n\n> GitLab sponsors an Outreachy intern to work on one of the most critical open source technologies that it relies on: The Git project.\n\n[Christian Couder](/company/team/#chriscool), senior backend software engineer at GitLab, who works on Git full-time, introduced the [GitLab Developer Relations team](/handbook/marketing/developer-relations/) to the Outreachy opportunity during the winter of 2017-2018 round. An experienced mentor for other programs like Google Summer of Code, Christian thought that it would be great to mentor an intern through the Outreachy program as well. Since the number of mentored interns and the need to sponsor them increased over the years, GitLab has sponsored an Outreachy intern for the Git project since winter 2019-2020.\n\nOutreachy at Git works similarly to the [Google Summer of Code (GSoC) program](https://summerofcode.withgoogle.com/). Git participates in GSoC in the summer and Outreachy in the winter. These programs consist of the Git project finding mentors and project ideas for individual participants to work on. Then there is a selection step, which includes working on a micro-project (a small code-related change), as part of the application process, and writing a proposal for a project to work on during the internship. After the interns are announced, they begin to work on their projects. Typically, Git tries to provide two mentors per intern to provide the best possible experience for the mentee.\n\n> The mentors used to be long-time Git developers, but more and more Outreachy and GSoC alumni have returned to the program as mentors, indicating the power of these programs.\n\nThe mentors volunteer some time each week to help their mentees by answering questions, providing suggestions, reviewing contributions, etc. Contributions still have to be sent by participants to the Git mailing list as patches. Then, other experienced Git contributors review the contributions before they are integrated into the Git code base by [Junio Hamano](https://www.linkedin.com/in/gitster), the Git maintainer.\n\n## Meet the Outreachy interns\n\nWe met with the Outreachy interns at Git to learn more about their experience participating in the winter 2020-2021 Outreachy technology internship program. In the next section, the Outreachy interns shared what the experience was like, in their own words.\n\n### Charvi Mendiratta: A self-taught programmer with an interest in robotics\n\n_This section was written by Charvi._\n\n> I am a recent graduate from the electronics field in India, a self-taught programmer with internship experiences working on mobile robotics projects, and I aim to pursue a career as a software developer. - [Charvi](https://charvi-077.github.io/about/)\n\nIt turned out to be difficult to find a job as a software developer because of my background in electronics and because I lacked professional programming skills. Also, there are very few job opportunities for recent graduates in software engineering roles, especially those related to robotics.\n\nDue to these challenges, I decided to try out open source in parallel with brushing up my skills. I supposed that open source contributions would be the best way to get hands-on experience with projects that required real-life problem solving skills, and I wanted to learn to convert my code into deployable software. That's why I decided to apply to the Outreachy program.\n\nBesides wanting to learn more about creating enterprise-grade code, I have always been interested in being part of the open source community. I first learned about open source work culture from my college programming community. I remember the old days when I attended an open source event called '[Software Freedom Day](http://www.softwarefreedomday.in/)' at my university. That's where I first learned about different open source programs like Outreachy.\n\n> Over the course of my three month internship, I worked on cleaning up and improving the Git interactive rebase, which is a useful git command to rewrite or modify the commit history. - Charvi\n\n#### About Charvi's Outreachy project\n\nMy work on Git's interactive rebase, which was mentored by Christian and [Phillip Wood](https://git.github.io/rev_news/2019/11/20/edition-57/), will help users who want to rework their commits and make it easier for users to improve the quality of their contributions. When teams practice code review, for example, it's very useful to rework commits to make them better or easier to understand before a reviewer steps in, and to fix them when reviews point to problems.\n\nFirst, I added the options '-c' and '-C' to the present `fixup` command in the interactive rebase. The `fixup` command adds the functionality to edit the commit message of the specific commit listed in the interactive rebase (see [merged patches](https://lore.kernel.org/git/20210129182050.26143-1-charvi077@gmail.com/)). This work is based on the [original patch series](https://github.com/phillipwood/git/commits/wip/rebase-amend), started by Phillip.\n\nThen, I worked on the [follow-up patches](https://lore.kernel.org/git/20210210113650.19715-1-charvi077@gmail.com/) and introduced some improvements after discussing the user interface of the added options with the Git community. Next, I worked on adding the new feature to `git commit --fixup` that allows to prepare the \"amend!\" and \"reword!\" commit, as an alternative to the present `fixup!` commit. It works with `git commit --autosquash` and will help to fix-up the content and commit message of the specific commit from the command line (see [merged patches](https://lore.kernel.org/git/20210315075435.18229-1-charvi077@gmail.com/)).\n\n### Joey Salazar: An engineer with international experience\n\n_This section was written by Joey._\n\n> As a female engineer from Costa Rica, who graduated in China through a full scholarship, it has been a challenge to find opportunities with mentoring for my transition from IT into programming. - [Joey](https://about.me/gomezsalazar-jogebeth)\n\nEven though I worked five years in IT (OS, networking, and storage), and was certified in Linux+ and CCNA through self-learning before beginning my software engineering studies, most companies and organizations seem eager to hire mid-senior level developers. Very few seem willing to invest in helping people get to that level, or in finding ways to build on any preexisting IT experience. As an open source advocate, it was through my research of open source technologies and the open source space that I came across community groups such as [WomenWhoCode](https://www.womenwhocode.com/), which was where I learned about Outreachy.\n\n#### About Joey's Outreachy project\n\n> My favorite thing to work on, probably because of my [background in privacy advocacy](https://www.techdirt.com/articles/20200622/08142044757/long-past-time-to-encrypt-entire-dns.shtml), was adding the foundations of HTTPS connection support for the Git protocol by following up on [a patch](https://gitlab.com/wireshark/wireshark/-/merge_requests/1946) started (and shared by) long-time Wireshark developer, [Richard Sharpe](https://sharkfestus.wireshark.org/bios/richard-sharpe). –  Joey\n\nMy work on Git protocol support in [Wireshark](https://www.wireshark.org/), which was mentored by Git developers employed by Google, [Emily Shaffer](https://nasamuffin.dev) and Jonathan Nieder, will help users debugging Git or any Git using software (like GitLab). This work helps production teams or developers understand what's going on between Git clients and servers, so they can better troubleshoot or optimize how Git works. This project will help demystify Git and its inner workings in the tech community.\n\nAs Wireshark is \"the world’s foremost and widely-used network protocol analyzer\", improving the way it dissects and presents the Git protocol to the user is helpful and important. Traffic interception and analysis is part of many user's workflows – from students, to researchers and advocates. For a few years, Git's dissector in Wireshark was bare-bones, and supported only raw traffic transmitted over regular TCP transport – my work is helping to change that.\n\nBy starting with [base functionality](https://gitlab.com/wireshark/wireshark/-/merge_requests/1922) and building on top of other member's work, Joey and her mentors added parsing of the multiplexing ([sideband](https://gitlab.com/wireshark/wireshark/-/merge_requests/1313)) version in use (if any) to Wireshark's dissector for the Git protocol. Next, they [added parsing for the specific version](https://gitlab.com/wireshark/wireshark/-/merge_requests/1714) of the Git protocol that is used, following up on [an MR to parse the Git protocol version](https://gitlab.com/wireshark/wireshark/-/merge_requests/805), did some refactoring on [an MR to refactor Git packet line dissector](https://gitlab.com/wireshark/wireshark/-/merge_requests/1942), and began the foundations for Git protocol's [testing suite](https://gitlab.com/wireshark/wireshark/-/merge_requests/2142).\n\nToday the Git dissector now includes more functionality and error handling, as well as HTTPS transport support – all of which was done through GitLab's platform.\n\n## Outreachy mentor shares experience\n\n_Christian, the Outreachy mentor and GitLab team member who worked with Joey and Charvi, shares what the experience was like in his own words._\n\nThere are many rewarding parts to being a mentor. I really enjoy seeing mentees gain confidence over the weeks in their abilities to contribute significantly by themselves.\n\n> Since Git is used by more than 80% of the developers in the world, I hope that the Outreachy interns get the feeling that they can improve things even in small ways for millions of people and that their work can have a global impact. - Christian\n\nI also really enjoy it when former mentees want to continue contributing to the Git community after their internship. Outreachy alumni contributions can take many forms. Sometimes they continue to contribute on the same topic as their project, sometimes they participate in related discussions, even 10 years later. One of our mentees was recently hired to work full-time on Git. And it is of course great when they want to become mentors, so they can give back to the program and increase the number of people who can get mentored.\n\nIt's great too that Outreachy, Google, and sometimes the Git project itself all provide funds for former mentees to come to in-person Git events or open source-related conferences. Meeting mentees in-person is very rewarding. At in-person events, the interns can also meet a number of Git-related companies and people, and of course, learn even more about Git and open source. For some of them, it was the first time they traveled outside of their country or could visit a different continent.\n\n#### Mentorship comes with challenges\n\nThe most challenging part of being a mentor is the fact that the Git codebase is getting bigger and more complex as Git evolves and gains features all the time.\n\nThis makes it hard for participants to stay on track when the internship starts. They sometimes have to trust that following the process we suggest will lead them to better and better understanding until they can find their own way and become autonomous.\n\n## Outreach interns share their key takeaways\n\nWe asked Joey and Charvi to share some of the ways that the Outreachy technology internship has impacted them.\n\n### Joey has a better understanding of herself\n\n_This section was written by Joey._\n\n> My Outreachy internship helped me better define the type of team and community that I'd like to join and which will benefit the most from the wide range of skills that I can offer. – Joey\n\nOutreachy was an amazing help, not only in technical areas, but also with soft skills. For example, I formed a solid understanding of Git. Now I can use `git cherry-pick` and `git rebase`, as well as squash, comfortably since I understand better what they do, and how. Those Git commands gave me lots of trouble when I was a junior developer for [BIND](https://en.wikipedia.org/wiki/BIND), and now they don't give me trouble anymore. I also reinforced fundamentals in C -- implementing pointers and references without panic and knowing about vtables -- and I learned how to write test cases in Python.\n\nA crowning achievement was finding balance between patience and impatience, and between autonomy and guidance.\n\n### Charvi has fallen in love with the open source world\n\n_This section was written by Charvi._\n\n> Outreachy helped me start my open source journey. - Charvi\n\nI have always been fascinated with the open source work culture as a way to learn, share, and grow. I finally got wonderful working experience too, since both Outreachy and the Git project are prestigious organizations.\n\nI learned a lot throughout the entire internship, starting from when the Outreachy contribution period began before I qualified for the internship. On the technical side, I enhanced my C programming and debugging skills, learned to write neat code, learned about shell scripts, and developed a deeper understanding of Git commands and about the Git project workflow.\n\nApart from this, my internship helped me improve my communication skills, make connections with amazing software developers, and  become more confident in myself. I am sincerely thankful for the Outreachy program, Git community, and my mentors, Christian and Phillip. It was an amazing learning journey.\n\n## So what's next?\n\nNow that the Outreachy internship has concluded, both Joey and Charvi are ready to leverage their skills and experience working on the Git project to future work in FOSS. Learn more about [Charvi's experience](https://charvi-077.github.io/about/) and [Joey's experience](https://about.me/gomezsalazar-jogebeth) by following the links.\n\n## GitLab's continued internship opportunities\n\nGitLab is proud to have sponsored and mentored an intern for the Git project during the most recent round of Outreachy technology internships. We hope to someday qualify for our own Outreachy interns to work on the [GitLab FOSS project](https://gitlab.com/gitlab-org/gitlab-foss) (which celebrates 10 years in October 2021).\n\nThis summer, GitLab will also be participating for the first time in [Google Summer of Code 2021](https://summerofcode.withgoogle.com/organizations/4961424868114432/). We look forward to mentoring engineering students through that technology internship program.\n\nIn addition to participating in these two great technology internship programs, GitLab held its first [engineering internship program](/handbook/engineering/internships/) in 2020 with great success. As a result, GitLab will continue to hire interns for various projects and teams in an ongoing fashion, with a specific [focus on recruiting interns from underrepresented groups in engineering](/handbook/engineering/internships/#recruitment).\n\nWe look forward to supporting these programs that help foster diversity in open source and the wider tech industry, and are excited for the year ahead!\n",[683,9,1604],"design",{"slug":1606,"featured":6,"template":687},"outreachy-sponsorship-winter-2020","content:en-us:blog:outreachy-sponsorship-winter-2020.yml","Outreachy Sponsorship Winter 2020","en-us/blog/outreachy-sponsorship-winter-2020.yml","en-us/blog/outreachy-sponsorship-winter-2020",{"_path":1612,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1613,"content":1619,"config":1624,"_id":1626,"_type":13,"title":1627,"_source":15,"_file":1628,"_stem":1629,"_extension":18},"/en-us/blog/partial-clone-for-massive-repositories",{"title":1614,"description":1615,"ogTitle":1614,"ogDescription":1615,"noIndex":6,"ogImage":1616,"ogUrl":1617,"ogSiteName":672,"ogType":673,"canonicalUrls":1617,"schema":1618},"How Git Partial Clone lets you fetch only the large file you need","Work faster with this experimental Partial Clone feature for huge Git repositories, saving you time, bandwidth, and storage, one large file at a time.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681131/Blog/Hero%20Images/partial-clone-for-massive-repositories.jpg","https://about.gitlab.com/blog/partial-clone-for-massive-repositories","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How Git Partial Clone lets you fetch only the large file you need\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"James Ramsay\"}],\n        \"datePublished\": \"2020-03-13\",\n      }",{"title":1614,"description":1615,"authors":1620,"heroImage":1616,"date":1621,"body":1622,"category":680,"tags":1623},[1060],"2020-03-13","\n\nThe Git project began nearly 15 years ago, on [April 7,\n2005](https://marc.info/?l=linux-kernel&m=111288700902396), and is now the\n[version control system](/topics/version-control/) of choice for developers. Yet, there are certain types of projects that\noften do not use Git, particularly projects that have many large binary files,\nsuch as video games. One reason projects with large binary files don't use Git\nis because, when a Git repository is cloned, Git will download every version of\nevery file in the repo. For most use cases, downloading this history is a\nuseful feature, but it slows cloning and fetching for projects with large binary\nfiles, assuming the project even fits on your computer.\n\n## What is Partial Clone?\n\nPartial Clone is a new feature of Git that replaces [Git\nLFS](https://git-lfs.github.com/) and makes working with very large repositories\nbetter by teaching Git how to work without downloading every file. Partial Clone\nhas been\n[years](https://public-inbox.org/git/xmqqeg4o27zw.fsf@gitster.mtv.corp.google.com/)\nin the making, with code contributions from GitLab, GitHub, Microsoft and\nGoogle. Today it is experimentally available in Git and GitLab, and can be\nenabled by administrators\n([docs](https://docs.gitlab.com/ee/topics/git/partial_clone.html)).\n\nPartial Clone speeds up fetching and cloning because less data is\ntransferred, and reduces disk usage on your local computer. For example, cloning\n[`gitlab-com/www-gitlab-com`](https://gitlab.com/gitlab-com/www-gitlab-com)\nusing Partial Clone (`--filter=blob:none`) is at least 50% faster, and transfers\n70% less data.\n\nNote: Partial Clone is one specific performance optimization for very large\nrepositories. [Sparse\nCheckout](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/)\nis a related optimization that is particularly focused on repositories with\ntremendously large numbers of files and revisions such as\n[Windows](https://devblogs.microsoft.com/bharry/the-largest-git-repo-on-the-planet/)\ncode base.\n\n## A brief history of large files\n\n\"What about Git LFS?\" you may ask. Doesn't LFS stand for \"large file storage\"?\n\nPreviously, extra tools were required to store large files in Git. In 2010,\n[git-annex](https://git-annex.branchable.com/) was released, and five years\nlater in 2015, [Git LFS](https://git-lfs.github.com/) was released. Both\ngit-annex and Git LFS added large file support to Git in a similar way: Instead\nof storing a large file in Git, store a pointer file that links to the large\nfile. Then, when someone needs a large file, they can download it on-demand\nusing the pointer.\n\nThe criticism of this approach is that there are now two places to store files,\nin Git or in Git LFS. Which means that everyone must remember that big files need\nto go in Git LFS to keep the Git repo small and fast. There are downsides to\nthis approach. Besides being susceptible to human error, the pointer encodes\ndecisions based on bandwidth and file type into the structure of the repository\nthat influence all the people using the repository. Our assumptions about\nbandwidth and storage are likely to change over time, and vary by the location,\nbut decisions encoded in the repository are not flexible. Administrators and\ndevelopers alike benefit from flexibility in where to store large files, and\nwhich files to download.\n\nPartial Clone solves these problems by removing the need for two classes of\nstorage, and special pointers. Let's walk through an example to understand how.\n\n## How to get started with Partial Clone\n\nLet's continue to use `gitlab-com/www-gitlab-com` as an example project, since\nit has quite a lot of images. For a larger repository, like a video game with\ndetailed textures and models that could take up a lot of disk space, the benefits will be even more significant.\n\nInstead of a vanilla `git clone`, we will include a filter spec which controls\nwhat is excluded when fetching data. In this situation, we just want to exclude\nlarge binary files. I've included `--no-checkout` so we can more clearly observe\nwhat is happening.\n\n```bash\ngit clone --filter=blob:none --no-checkout git@gitlab.com/gitlab-com/www-gitlab-com.git\n# Cloning into 'www-gitlab-com'...\n# remote: Enumerating objects: 624541, done.\n# remote: Counting objects: 100% (624541/624541), done.\n# remote: Compressing objects: 100% (151886/151886), done.\n# remote: Total 624541 (delta 432983), reused 622339 (delta 430843), pack-reused 0\n# Receiving objects: 100% (624541/624541), 74.61 MiB | 8.14 MiB/s, done.\n# Resolving deltas: 100% (432983/432983), done.\n\n```\n\nAbove we explicitly told Git not to checkout the default branch. Normally\n`checkout` doesn't require fetching any data from the server, because we have\neverything locally. When using Partial Clone, since we are deliberately not downloading everything, Git will need to fetch any missing files when doing a\ncheckout.\n\n```bash\ngit checkout master\n# remote: Enumerating objects: 12080, done.\n# remote: Counting objects: 100% (12080/12080), done.\n# remote: Compressing objects: 100% (11640/11640), done.\n# remote: Total 12080 (delta 442), reused 9773 (delta 409), pack-reused 0\n# Receiving objects: 100% (12080/12080), 1.10 GiB | 8.49 MiB/s, done.\n# Resolving deltas: 100% (442/442), done.\n# Updating files: 100% (12342/12342), done.\n# Filtering content: 100% (3/3), 131.24 MiB | 4.73 MiB/s, done.\n```\n\nIf we checkout a different branch or commit, we'll need to download more missing\nfiles.\n\n```bash\ngit checkout 92d1f39b60f957d0bc3c5621bb3e17a3984bdf72\n# remote: Enumerating objects: 1968, done.\n# remote: Counting objects: 100% (1968/1968), done.\n# remote: Compressing objects: 100% (1953/1953), done.\n# remote: Total 1968 (delta 23), reused 1623 (delta 15), pack-reused 0\n# Receiving objects: 100% (1968/1968), 327.44 MiB | 8.83 MiB/s, done.\n# Resolving deltas: 100% (23/23), done.\n# Updating files: 100% (2255/2255), done.\n# Note: switching to '92d1f39b60f957d0bc3c5621bb3e17a3984bdf72'.\n```\n\nGit remembers the filter spec we provided when cloning the repository so that\nfetching updates will also exclude large files until we need them.\n\n```bash\ngit config remote.origin.promisor\n# true\n\ngit config remote.origin.partialclonefilter\n# blob:none\n```\n\nWhen committing changes, you simply commit binary files like you would any other\nfile. There is no extra tool to install or configure, no need to treat big files\ndifferently to small files.\n\n## Network and Storage\n\nIf you are already using [Git LFS](https://git-lfs.github.com/) today, you might\nbe aware that large files are stored and transferred differently to regular Git\nobjects. On GitLab.com, Git LFS objects are stored in object storage (like AWS\nS3) rather than fast attached storage (like SSD), and transferred over HTTP even\nwhen using SSH for regular Git objects. Using object storage has the advantage\nof reducing storage costs for large binary files, while using simpler HTTP\nrequests for large downloads allows the possibility of resumable and parallel\ndownloads.\n\nPartial Clone\n[already](https://public-inbox.org/git/20190625134039.21707-1-chriscool@tuxfamily.org/)\nsupports more than one remote, and work is underway to allow large files to be\nstored in a different location such as object storage. Unlike Git LFS, however,\nthe repository or instance administrator will be able to choose which objects\nshould be stored where, and change this configuration over time if needed.\n\nFollow the epic for [improved large file\nstorage](https://gitlab.com/groups/gitlab-org/-/epics/1487) to learn more and\nfollow our progress.\n\n## Performance\n\nWhen fetching new objects from the Git server using a [filter\nspec](https://github.com/git/git/blob/v2.25.0/Documentation/rev-list-options.txt#L735)\n to exclude objects from the response, Git will check each object and exclude\n any that match the filter spec. In [Git\n 2.25](https://raw.githubusercontent.com/git/git/master/Documentation/RelNotes/2.25.0.txt),\n the most recent version, filtering has not been optimized for performance.\n\n[Jeff King (Peff)](https://github.com/peff/) (GitHub) recently\n[contributed](https://public-inbox.org/git/20200214182147.GA654525@coredump.intra.peff.net/)\nperformance improvements for blob size filtering, which will likely be included\nin [Git 2.26](https://gitlab.com/gitlab-org/gitaly/issues/2497), and our plan is\nto include it in GitLab 12.10 release.\n\nOptimizing the sparse filter spec option (`--filter:sparse`), which filters\nbased on file path is more complex because blobs, which contain the file\ncontent, do not include file path information. The directory structure of a\nrepository is stored in tree objects.\n\nFollow the epic for [Partial Clone performance\nimprovements](https://gitlab.com/groups/gitlab-org/-/epics/1671) to learn more\nand follow our progress.\n\n## Usability\n\nOne of the drawbacks of Git LFS was that it required installing an additional\ntool. In comparison, Partial Clone does not require any additional tools.\nHowever, it does require learning new options and configurations, such as to\nclone using the `--filter` option.\n\nWe want to make it easy for people get their work done, who simply desire Git to\njust work. They shouldn't need to work out which is the optimal blob size filter\nspec for a project? Or what even is a filter spec?  While Partial Clone remains\nexperimental, we haven't made any changes to the GitLab interface to highlight\nPartial Clone, but we are investigating this and welcome your feedback. Please\njoin the conversation on this\n[issue](https://gitlab.com/gitlab-org/gitlab/issues/207744).\n\n## File locking and tool integrations\n\nAny conversation of large binary files, particularly in regards to video\ngames is incomplete without discussing file locking and tooling integrations.\n\nUnlike plain text [source code](/solutions/source-code-management/), resolving conflicts between different versions of\na binary file is often impossible. To prevent conflicts in binary file editing,\nan exclusive file lock is used, meaning only one person at a time can edit a\nsingle file, regardless of branches. If conflicts can't be resolved, allowing multiple\nversions of a individual file to be created in parallel on different branches is a bug, not\na feature. GitLab already has basic file locking support, but it is really only\nuseful for plain text because it only applies to the default branch, and is not\nintegrated with any local tools.\n\nLocal tooling integrations are important for binary asset workflows, to\nautomatically propagate file locks to the local development environment, and to\nallow artists to work on assets without needing to use Git from the command\nline. Propagating file locks quickly to local development environments is also\nimportant because it prevents work from being wasted before it even happens.\n\nFollow the [file locking](https://gitlab.com/groups/gitlab-org/-/epics/1488) and\n[integrations](https://gitlab.com/groups/gitlab-org/-/epics/2704) epics for more\ninformation about what we're working on.\n\n## Conclusion\n\nLarge files are necessary for many projects, and Git will soon support this\nnatively, without the need for extra tools. Although Partial Clone is still an\nexperimental feature, we are making improvements with every release and the\nfeature is now ready for testing.\n\nThank you to the Git community for your work over the past years on improving\nsupport for enormous repositories. Particularly, thank you to [Jeff\nKing](https://github.com/peff/) (GitHub) and [Christian\nCouder](https://about.gitlab.com/company/team/#chriscool) (senior backend\nengineer on Gitaly at GitLab) for your early experimentation with Partial Clone,\nJonathan Tan (Google) and [Jeff Hostetler](https://github.com/jeffhostetler)\n(Microsoft) for contributing the [first\nimplementation](https://public-inbox.org/git/cover.1506714999.git.jonathantanmy@google.com/)\nof Partial Clone and promisor remotes, and the many others who've also\ncontributed.\n\nIf you are already using Partial Clone, or would like to help us test Partial\nClone on a large project, please get in touch with me, [James\nRamsay](https://about.gitlab.com/company/team/#jramsay) (group manager, product\nfor Create at GitLab), [Jordi\nMon](https://about.gitlab.com/company/team/#jordi_mon) (senior product marketing\nmanager for Dev at GitLab), or your account manager.\n\nFor more information on Partial Clone, check out [the documentation](https://docs.gitlab.com/ee/topics/git/partial_clone.html).\n\nCover image by [Simon Boxus](https://unsplash.com/@simonlerouge) on\n[Unsplash](https://unsplash.com/photos/4ftI4lCcByM)\n{: .note}\n",[9,684],{"slug":1625,"featured":6,"template":687},"partial-clone-for-massive-repositories","content:en-us:blog:partial-clone-for-massive-repositories.yml","Partial Clone For Massive Repositories","en-us/blog/partial-clone-for-massive-repositories.yml","en-us/blog/partial-clone-for-massive-repositories",{"_path":1631,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1632,"content":1638,"config":1643,"_id":1645,"_type":13,"title":1646,"_source":15,"_file":1647,"_stem":1648,"_extension":18},"/en-us/blog/pre-commit-post-deploy-is-dead",{"title":1633,"description":1634,"ogTitle":1633,"ogDescription":1634,"noIndex":6,"ogImage":1635,"ogUrl":1636,"ogSiteName":672,"ogType":673,"canonicalUrls":1636,"schema":1637},"Pre-commit and post-deploy code reviews are dead","In a world with Git, pre-commit and post-deploy code reviews are relics that can be eliminated from your workflow.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678861/Blog/Hero%20Images/pre-commit.jpg","https://about.gitlab.com/blog/pre-commit-post-deploy-is-dead","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Pre-commit and post-deploy code reviews are dead\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Aricka Flowers\"}],\n        \"datePublished\": \"2019-01-31\",\n      }",{"title":1633,"description":1634,"authors":1639,"heroImage":1635,"date":1640,"body":1641,"category":726,"tags":1642},[977],"2019-01-31","\nPre-commit and post-deploy reviews have been the industry standard for ensuring that code is functioning as intended. But with Git around, are these methods still needed?\n\nLet’s take a step back and look at how they work.\n\n### Pre-commit reviews require that code is checked for bugs before it is committed\n\nOur CEO [Sid Sijbrandij](/company/team/#sytses) says pre-commit reviews makes sense because new code is evaluated before it is introduced into the code base. But with distributed version control, he says, you can essentially [do the same thing on Git branches](https://docs.gitlab.com/ee/topics/gitlab_flow.html). Prior to Git, branches were too pricey to use regularly in [version control systems](/topics/version-control/) like Subversion.\n\n### Post-deploy reviews periodically check for areas of improvement in the code base\n\nPost-deploy reviews are typically done on a periodic basis as a way to check certain areas of the code base and decide if improvements can be made. This method doesn’t make sense, according to Sid, because \"The code has already proven itself in production ... so you’re reluctant to make changes to it.\" Additionally, the idea of occasionally reviewing your code base is not really needed:\n\n\"If there's technical debt in there, at least it's not affecting other code,\" Sid explains. \"There's a certain interest you pay on technical debt, and it has to do with how much it spreads the technical debt to your code base. Code that is not doing much, meaning it's being executed but it's not changing much, well at least it's not influencing other code. You're always going to have tech debt, and you're always going to have a limited time during which you can review and fix things. Focus on the code that's active, that's probably the best place to focus.\"\n\n### Git branches are more efficient\n\nUsing Git branches to ensure that code is safe to introduce into the code base improves efficiencies when compared to pre-commit and post-deploy reviews, says Sid, who finds the former to be hard to track.\n\n\"Pre-commit code reviews were a bit awkward because you didn't have a good way to refer to it. It was in the tool, but you didn't have a SHA or definite way to refer to that version. And it was hard to know what CI it ran against because there wasn't a SHA. So by doing it post-commit, you have it in versions and it's much easier to see what you referred to. But with code review after deploy, the mindset was, 'If it works, you move on.'\n\n> \"If you change it, there's extra risk; if you don't change it, it's extra tech debt – and you always have to choose between the two.\"\n\n\"You're not going to be as vigilant to technical debt building up and it's harder to request that someone change something that’s working. If you change it, there's extra risk; if you don't change it, it's extra tech debt – and you always have to choose between the two. With pre-deploy code reviews, you don't have to make that choice …  [With what we have now], I think pre-commit and post-deploy code reviews are dead, and code should be reviewed on a branch before it's deployed to production.\"\n\nWhat do you think: Are pre-commit and post-deploy reviews a thing of the past? Tweet us @GitLab!\n{: .alert .alert-gitlab-purple.text-center}\n\nPhoto by [Caspar Camille Rubin](https://unsplash.com/photos/fPkvU7RDmCo?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on Unsplash\n{: .note}\n",[798,9,982],{"slug":1644,"featured":6,"template":687},"pre-commit-post-deploy-is-dead","content:en-us:blog:pre-commit-post-deploy-is-dead.yml","Pre Commit Post Deploy Is Dead","en-us/blog/pre-commit-post-deploy-is-dead.yml","en-us/blog/pre-commit-post-deploy-is-dead",{"_path":1650,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1651,"content":1657,"config":1663,"_id":1665,"_type":13,"title":1666,"_source":15,"_file":1667,"_stem":1668,"_extension":18},"/en-us/blog/proximus-customer-story-clearcase-to-gitlab",{"title":1652,"description":1653,"ogTitle":1652,"ogDescription":1653,"noIndex":6,"ogImage":1654,"ogUrl":1655,"ogSiteName":672,"ogType":673,"canonicalUrls":1655,"schema":1656},"Proximus shares its #movingtoGitLab story","Moving to GitLab resulted in an 80 percent drop in support tickets and an increase in developer productivity.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678603/Blog/Hero%20Images/traffic-at-sunset.jpg","https://about.gitlab.com/blog/proximus-customer-story-clearcase-to-gitlab","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Proximus shares its #movingtoGitLab story\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Bert Van Eyck\"}],\n        \"datePublished\": \"2019-06-07\",\n      }",{"title":1652,"description":1653,"authors":1658,"heroImage":1654,"date":1660,"body":1661,"category":748,"tags":1662},[1659],"Bert Van Eyck","2019-06-07","\n[Proximus](https://www.proximus.com/) is a telecommunication company providing services to residential, enterprise, and public users. We are the leading provider of telephony, internet, television, and network-based ICT services in Belgium, with more than 2 million customers.\n\n## Our road to GitLab\n\nThe technical divisions of Proximus deliver a big part of the applications and systems required for delivering the best possible service to our end users. It includes all types of capabilities such as network construction, network maintenance, product ordering, product selling, billing, etc.\nSome examples of our development include:\n\n- Our website, [Proximus.be](https://www.proximus.be), on which users can find product info, support info and so much more.\n- A mobile app where everyone can check their usage, products, bills, etc.\n- Television interface.\n- A television app.\n\nTo ensure a performant and stable working environment for our developers, we have been working for several years to create a CI/CD DevOps workflow.\n\nThe first complete chain started in 2014 and used tools like ClearCase, Jenkins, Nexus, etc. By 2015 we had about 200 applications which were using our end-to-end chain to build and deploy in all different environments.\n\nIn 2016, to continue to improve our delivery chain, we considered switching ClearCase to Git. Despite ClearCase being a powerful tool, we noticed that the learning curve and the ease of use of ClearCase was not optimal. Also some of the tools we used were starting to lose compatibility.\n\nWe quickly came across GitLab and decided to try our first setup with [GitLab CE](/blog/gitlab-tiers/) in mid-2016.\n\n## The evolution of GitLab inside Proximus\n\nOur first implementation of Gitlab was rapidly a real success and the popularity of GitLab was increasing exponentially within our developer community. So, we decided to set up a corporate GitLab CE server at Proximus and to promote the creation of all new applications using our existing CI/CD chain with GitLab as source code management.\nIn just one year of using GitLab, we grew to 325 projects and about 600 users.\n\nBecause GitLab was becoming a big part of our tool set, we switched to GitLab EE in Q2 of 2017. This allowed us to use more features of GitLab such as: LDAP groups, push rules, mirror repositories, etc.\nAnd of course, with the enterprise edition you also receive additional support. With the enterprise edition we also started moving applications from ClearCase to GitLab.\n\nWe were also investigating and testing other features to expand our use of GitLab in the meantime:\n\n- Some projects have started using GitLab CI to build.\n- Integration with Jira has been implemented.\n- Currently experimenting with a first setup of GitLab’s global search function in combination with Elasticsearch.\n\nBy the end of 2018 we had grown to almost 1,000 users and 1,700 projects.\n\n## Challenges\n\nOur biggest challenge was to maintain and ensure a stable environment while growing rapidly. When we started using GitLab CI we encountered some issues with the large number of pipelines and jobs being created, which were consuming a lot of our resources. But [as of GitLab 11.6 a feature has been provided to remove pipelines with their job logs when using API](/releases/2018/12/22/gitlab-11-6-released/#pipelines-can-now-be-deleted-by-project-maintainers-using-api), which helped a lot.\n\n## Results\n\nSince we started using GitLab, we have been able to provide our developers with faster setup and support. Another very noticeable side effect of switching to GitLab was the significant drop in the number of support tickets created by the developers. Our first full year of using GitLab inside our CI/CD setup resulted in **80 percent** fewer tickets.\n\nEven in 2018, after our total number of users had grown to almost 1,000, the number of projects had multiplied by five and we migrated 75 applications to GitLab. We still had **65 percent** fewer tickets.\n\nIn the future, we will continue looking into expanding our GitLab environment and we hope to continue the positive evolution together with the support of GitLab.\n",[266,9,230,728,683],{"slug":1664,"featured":6,"template":687},"proximus-customer-story-clearcase-to-gitlab","content:en-us:blog:proximus-customer-story-clearcase-to-gitlab.yml","Proximus Customer Story Clearcase To Gitlab","en-us/blog/proximus-customer-story-clearcase-to-gitlab.yml","en-us/blog/proximus-customer-story-clearcase-to-gitlab",{"_path":1670,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1671,"content":1677,"config":1682,"_id":1684,"_type":13,"title":1685,"_source":15,"_file":1686,"_stem":1687,"_extension":18},"/en-us/blog/rearchitecting-git-object-database-mainentance-for-scale",{"title":1672,"description":1673,"ogTitle":1672,"ogDescription":1673,"noIndex":6,"ogImage":1674,"ogUrl":1675,"ogSiteName":672,"ogType":673,"canonicalUrls":1675,"schema":1676},"Why and how we rearchitected Git object database maintenance for scale","Go in-depth into improvements to maintenance of the Git object database for reduced overhead and increased efficiency.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749664413/Blog/Hero%20Images/speedlights.png","https://about.gitlab.com/blog/rearchitecting-git-object-database-mainentance-for-scale","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Why and how we rearchitected Git object database maintenance for scale\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Patrick Steinhardt\"}],\n        \"datePublished\": \"2023-11-02\",\n      }",{"title":1672,"description":1673,"authors":1678,"heroImage":1674,"date":1679,"body":1680,"category":726,"tags":1681},[677],"2023-11-02","\n[Gitaly](/direction/gitaly/#gitaly-1), the service that is responsible for providing access to Git repositories in GitLab, needs to ensure that the repositories are maintained regularly. Regular maintenance ensures:\n\n- fast access to these repostiories for users\n- reduced resource usage for servers\n\nHowever, repository maintenance is quite expensive by itself and especially so for large monorepos.\n\nIn [a past blog post](/blog/scaling-repository-maintenance/), we discussed how we revamped the foundations of repository maintenance so that we can iterate on the exact maintenance strategy more readily. This blog post will go through improved maintenance strategies for objects hosted in a Git repository, which was enabled by that groundwork.\n\n- [The object database](#the-object-database)\n- [The old way of packing objects](#the-old-way-of-packing-objects)\n- [All-into-one repacks](#all-into-one-repacks)\n- [Deletion of unreachable objects](#deletion-of-unreachable-objects)\n- [Reachability checks](#reachability-checks)\n- [The new way of packing objects](#the-new-way-of-packing-objects)\n- [Cruft packs](#cruft-packs)\n- [More efficient incremental repacks](#more-efficient-incremental-repacks)\n- [Geometric repacking](#geometric-repacking)\n- [Real-world results](#real-world-results)\n\n## The object database\n\nWhenever a user makes changes in a Git repository, these changes come in the form of new objects written into the repository. Typically, any such object is written into the repository as a so-called \"loose object,\" which is a separate file that contains the compressed contents of the object itself with a header that identifies the type of the object.\n\nTo demonstrate this, in the following example we use\n[`git-hash-object(1)`](https://www.git-scm.com/docs/git-hash-object) to write a new blob into the repository:\n\n```shell\n $ git init --bare repository.git\nInitialized empty Git repository in /tmp/repository.git/\n $ cd repository.git/\n $ echo \"contents\" | git hash-object -w --stdin\n12f00e90b6ef79117ce6e650416b8cf517099b78\n $ tree objects\nobjects\n├── 12\n│   └── f00e90b6ef79117ce6e650416b8cf517099b78\n├── info\n└── pack\n\n4 directories, 1 file\n```\n\nAs you can see, the new object was written into the repository and stored as a separate file in the objects database.\n\nOver time, many of these loose objects will accumulate in the repository. Larger repositories tend to have millions of objects, and storing all of them as separate files is going to be inefficient. To ensure that the repository can be served efficiently to our users and to keep the load on servers low, Git will regularly compress loose objects into packfiles. We can compress loose objects manually by using, for example, [`git-pack-objects(1)`](https://www.git-scm.com/docs/git-pack-objects):\n\n```shell\n $ git pack-objects --pack-loose-unreachable ./objects/pack/pack \u003C/dev/null\nEnumerating objects: 1, done.\nCounting objects: 100% (1/1), done.\nWriting objects: 100% (1/1), done.\nTotal 1 (delta 0), reused 0 (delta 0), pack-reused 0\n7ce39d49d7ddbbbbea66ac3d5134e6089210feef\n $ tree objects\n objects/\n├── 12\n│   └── f00e90b6ef79117ce6e650416b8cf517099b78\n├── info\n│   └── packs\n└── pack\n    ├── pack-7ce39d49d7ddbbbbea66ac3d5134e6089210feef.idx\n    └── pack-7ce39d49d7ddbbbbea66ac3d5134e6089210feef.pack\n```\n\nThe loose object was compressed into a packfile (`.pack`) with a packfile index (`.idx`) that is used to efficiently access objects in that packfile.\n\nHowever, the loose object still exists. To remove it, we can execute [`git-prune-packed(1)`](https://www.git-scm.com/docs/git-prune-packed) to delete all objects that have been packed already:\n\n```shell\n $ git prune-packed\n $ tree objects/\nobjects/\n├── info\n│   └── packs\n└── pack\n    ├── pack-7ce39d49d7ddbbbbea66ac3d5134e6089210feef.idx\n    └── pack-7ce39d49d7ddbbbbea66ac3d5134e6089210feef.pack\n```\n\nFor end users of Git, all of this happens automatically because Git calls `git gc --auto` regularly. This command uses heuristics to figure out what needs to be optimized and whether loose objects need to be compressed into packfiles. This command is unsuitable for the server side because:\n\n- The command does not scale well enough in its current form. The Git project must be more conservative about changing defaults because they support a lot of different use cases. Because we know about the specific needs that we have at GitLab, we can adopt new features that allow for more efficient maintenance more readily.\n- The command does not provide an easy way to observe what exactly it is doing, so we cannot provide meaningful metrics.\n- The command does not allow us to fully control all its exact inner workings and so is not flexible enough.\n\nTherefore, Gitaly uses its own maintenance strategy to maintain Git repositories, of which maintaining the object database is one part.\n\n## The old way of packing objects\n\nAny maintenance strategy to pack objects must ensure the following three things to keep a repository efficient and effective with disk space:\n\n- Loose objects must be compressed into packfiles.\n- Packfiles must be merged into larger packfiles.\n- Objects that are not reachable anymore must be deleted eventually.\n\nPrevious to GitLab 16.0, Gitaly used the following three heuristics to ensure that those three things happened:\n\n- If the number of packfiles in the repository exceeds a certain threshold, Gitaly rewrote all packfiles into a single new packfile. Any objects that were unreachable were put into loose files so that they could be deleted after a certain grace period.\n- If the number of loose objects exceeded a certain threshold, Gitaly compressed all reachable loose objects into a new packfile.\n- If the number of loose objects that are older than the grace period for object deletion exceeded a certain threshold, Gitaly deleted those objects.\n\nWhile these heuristics satisfy all three requirements, they have several downsides, especially in large monorepos that contain gigabytes of data.\n\n### All-into-one repacks\n\nFirst and foremost, the first heuristic requires us to do all-into-one repacks where all packfiles are regularly compressed into a single packfile. In Git repositories with high activity levels, we usually create lots of packfiles during normal operations. But because we need to limit the maximum number of packfiles in a repository, we need to regularly do these complete rewrites of all objects.\n\nUnfortunately, doing such an all-into-one repack can be prohibitively expensive in large monorepos. The repacks may allocate large amounts of memory and typically keep multiple CPU cores busy during the repack, which can require hours of time to complete.\n\nSo, ideally, we want to avoid these all-into-one repacks to the best extent possible.\n\n### Deletion of unreachable objects\n\nTo avoid certain race conditions, Gitaly and Git enforce a grace period before an unreachable object is eligible for deletion. This grace period is tracked using the access time of such an unreachable object: If the last access time of the object is earlier than the grace period, the unreachable object can be deleted.\n\nTo track the access time of a single object, the object must exist as a loose object. This means that all objects that are pending deletion will be evictedfrom any packfile they were previously part of and become loose objects.\n\nBecause the grace period we have in place for Gitaly is 14 days, large monorepos tend to grow a large number of such loose object that are pending deletion. This has two effects:\n\n- The number of loose objects overall grows, which makes object lookup less efficient.\n- Loose objects are stored a lot less efficiently than packed objects, which means that the disk space required for the objects that are pending deletion is signficantly higher than if those objects were stored in their packed form.\n\nIdeally, we would be able to store unreachable objects in packed format while still being able to store their last access times separately.\n\n### Reachability checks\n\nCompressing loose objects into a new packfile is done by using an incremental repack. Git will compute the reachability of all objects in the repository and then pack all loose objects that are reachable into a new packfile.\n\nTo determine reachability of an object, we have to perform a complete graph walk. Starting at all objects that are directly referenced, we walk down any links that those objects have to any other objects. Once we reach the root of the object graph, we have then split all objects into two sets, which are the reachable and unreachable objects.\n\nThis operation can be quite expensive and the larger the repository and the more objects it contains, the more expensive this computation gets. As mentioned above though, objects which are about to be deleted need to be stored\nas loose objects such that we can track their last access time. So if our incremental repack compressed all loose objects into a packfile regardless of their reachability, then this would impact our ability to track the grace\nperiod per object.\n\nThe ideal solution here would avoid doing reachability checks altogether while still being able to track the grace period of unreachable objects which are pending deletion individually.\n\n## The new way of packing objects\n\nOver the past two years, the Git project has shipped multiple mechanisms that allow us to address all of these painpoints we had with our old strategy. These new mechanisms come in two different forms:\n\n- Geometric repacking allows us to merge multiple packfiles without having to rewrite all packfiles into one. This feature was introduced in [Git v2.32.0](https://gitlab.com/gitlab-org/git/-/commit/2744383cbda9bbbe4219bd3532757ae6d28460e1).\n- Cruft packs allow us to store objects that are pending deletion in compressed format in a packfile. This feature was introduced in [Git v2.37.0](https://gitlab.com/gitlab-org/git/-/commit/a50036da1a39806a8ae1aba2e2f2fea6f7fb8e08).\n\nThe Gitaly team has reworked the object database maintenance strategy to make use of these new features.\n\n### Cruft packs\nPrevious to Git v2.37.0, pruning objects with a grace period required Git to first unpack packed objects into loose objects. We did this so that we can track the per-object access times for unreachable objects that are pending deletion as explained above. This is inefficient though as it potentially requires us to keep a lot of unreachable objects in loose format until they can be deleted after the grace period.\n\nWith Git v2.37.0, [git-repack(1)](https://www.git-scm.com/docs/git-repack) learned to write [cruft packs](https://git-scm.com/docs/cruft-packs). While a cruft pack looks just like a normal pack, it also has an accompanying\n`.mtimes` file:\n\n```shell\n$ tree objects/\nobjects/\n├── info\n│   └── packs\n└── pack\n    ├── pack-7ce39d49d7ddbbbbea66ac3d5134e6089210feef.idx\n    ├── pack-7ce39d49d7ddbbbbea66ac3d5134e6089210feef.mtimes\n    └── pack-7ce39d49d7ddbbbbea66ac3d5134e6089210feef.pack\n```\n\nThis file contains per-object timestamps that record when the object was last accessed. With this, we can continue to track per-object grace periods while storing the objects in a more efficient way compared to loose objects.\n\nIn Gitaly, we [started to make use of cruft packs](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5454) in GitLab 15.10 and made the feature generally available in GitLab 15.11. Cruft packs allow us to store objects that are pending deletion more efficiently and with less impact on the overall performance of the repository.\n\n### More efficient incremental repacks\n\nCruft packs also let us fix the issue that we had to do reachability checks when doing incremental repacks.\n\nPreviously, we had to always ensure reachability when packing loose objects so that we don't pack objects that are pending deletion. But now that any such object would be stored as part of a cruft pack and not as a loose pack anymore, we can instead compress all loose files into a packfile. This change was [introduced into Gitaly](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5660) with GitLab 16.0.\n\nIn an artificial benchmark with the Linux repository, compressing all loose objects into a packfile led to more than a 90-fold speedup, dropping from almost 13 seconds to 174 milliseconds.\n\n### Geometric repacking\n\nLast but not least, we still have the issue that we need to perform regular all-into-one repacks when we have too many packfiles in the repository.\n\nGit v2.32.0 introduced a new \"geometric\" repacking strategy for the [git-repack(1)](https://www.git-scm.com/docs/git-repack) command that will merge multiple packfiles into a single, larger packfile, that we can use to solve this issue.\n\nThis new \"geometric\" strategy tries to ensure that existing packfiles in the repository form a [geometric sequence](https://en.wikipedia.org/wiki/Geometric_progression) where each successive packfile contains at least `n` times as many objects as the preceding packfile. If the sequence isn't maintained, Git will determine a slice of packfiles that it must repack to maintain the sequence again. With this process, we can limit the number of packfiles that exist in the repository without having to repack all objects into a single packfile regularly.\n\nThe following figures demonstrate geometric repacking with a factor of two.\n\n1. We notice that the two smallest packfiles do not form a geometric sequence as they both contain two objects each.\n\n![Geometrically repacking packfiles, initial](https://about.gitlab.com/images/blogimages/2023-10-09-repository-scaling-odb-maintenance/geometric-repacking-1.png)\n\n1. We identify the smallest slice of packfiles that need to be repacked in order to restore the geometric sequence. Merging the smallest two packfiles would lead to a packfile with four objects. This would not be sufficient to restore the geometric sequence as the next-biggest packfile contains four objects, as well.\n\nInstead, we need to merge the smallest three packfiles into a new packfile that contains eight objects in total. As `8 × 2 ≤ 16` the geometric sequence is restored.\n\n![Geometrically repacking packfiles, combining](https://about.gitlab.com/images/blogimages/2023-10-09-repository-scaling-odb-maintenance/geometric-repacking-2.png)\n\n3. We merge those packfiles into a new packfile.\n\n![Geometrically repacking packfiles, final](https://about.gitlab.com/images/blogimages/2023-10-09-repository-scaling-odb-maintenance/geometric-repacking-3.png)\n\nOriginally, we introduced this new feature [into Gitaly in GitLab 15.11](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5590).\n\nUnfortunately, we had to quickly revert this new mode. It turned out that the geometric strategy was not ready to handle Git repositories that had an alternate object database connected to them. Because we make use of this feature to [deduplicate objects across forks](https://docs.gitlab.com/ee/development/git_object_deduplication.html), the new repacking strategy led to problems.\n\nAs active contributors to the Git project, we set out to fix these limitations in git-repack(1) itself. This led to an [upstream patch series](http://public-inbox.org/git/a07ed50feeec4bfc3e9736bf493b9876896bcdd2.1680606445.git.ps@pks.im/T/#u) that fixed a bunch of limitations around alternate object directories when doing geometric repacks in Git that was then released with Git v2.41.\n\nWith these fixes upstream, we were then able to\n[reintroduce the change](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5607) and [globally enable our new geometric repacking strategy](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5745) with GitLab 16.0.\n\n## Real-world results\n\nAll of this is kind of dry and deeply technical. What about the real-world results?\n\nThe following graphs show the global time we spent repacking objects across all projects hosted on GitLab.com.\n\n![Time spent optimizing repositories globally](https://about.gitlab.com/images/blogimages/2023-10-09-repository-scaling-odb-maintenance/global-optimization.png)\n\nThe initial rollout was on April 26 and progressed until April 28. As you can see, there was first a significant increase in repacking time. But after the initial dust settles, we can see that globally the time we spent repacking repositories roughly decreased by almost 20%.\n\nIn the two weeks before we enabled the feature, during weekdays and at peak times we were usually spending around 2.6 days per 12 hours repacking. In the two weeks after the feature was enabled, we spent around 2.12 days per 12 hours\nrepacking objects.\n\nThis is a success by itself already, but the more important question is how it would impact large monorepos, which are significantly harder to keep well-maintained due to their sheer size. Fortunately, the effect of the new housekeeping strategy was a lot more significant here. The following graph shows the time we spent performing housekeeping tasks in our own `gitlab-org` and `gitlab-com` groups, which host some of the most active repositories that have caused issues in the past:\n\n![Time spent optimizing repositories in GitLab groups](https://about.gitlab.com/images/blogimages/2023-10-09-repository-scaling-odb-maintenance/gitlab-groups-optimization.png)\n\nIn summary, we have observed the following improvements:\n\n|                                                        | Before              | After                | Change |\n| ------------------------------------------------------ | ------------------- | -------------------- | ------ |\n| Global accumulated repacking time                      | ~5.2 hours/hour     | ~4.2 hours/hour      | -20%   |\n| Large repositories of gitlab-org and gitlab-com groups | ~0.7-1.0 hours/hour | 0.12-0.15 hours/hour | -80%   |\n\nWe have heard of other customers that saw similar improvements in highly active large monorepositories.\n\n## Manually enable geometric repacking\n\nWhile the new geometric repacking strategy has been default-enabled starting with GitLab 16.0, it was introduced with GitLab 15.11. If you want to use the\nnew geometric repacking mode, you can opt in by setting the\n`gitaly_geometric_repacking` feature flag. You can do so via the `gitlab-rails`\nconsole:\n\n```\nFeature.enable(:gitaly_geometric_repacking)\n```\n",[9,941,684,683],{"slug":1683,"featured":6,"template":687},"rearchitecting-git-object-database-mainentance-for-scale","content:en-us:blog:rearchitecting-git-object-database-mainentance-for-scale.yml","Rearchitecting Git Object Database Mainentance For Scale","en-us/blog/rearchitecting-git-object-database-mainentance-for-scale.yml","en-us/blog/rearchitecting-git-object-database-mainentance-for-scale",{"_path":1689,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1690,"content":1696,"config":1702,"_id":1704,"_type":13,"title":1705,"_source":15,"_file":1706,"_stem":1707,"_extension":18},"/en-us/blog/rebase-in-real-life",{"title":1691,"description":1692,"ogTitle":1691,"ogDescription":1692,"noIndex":6,"ogImage":1693,"ogUrl":1694,"ogSiteName":672,"ogType":673,"canonicalUrls":1694,"schema":1695},"How to use Git rebase in real life","From fixup to autosquash here are real world ways to leverage Git rebase.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749682486/Blog/Hero%20Images/rebase-in-real-life.jpg","https://about.gitlab.com/blog/rebase-in-real-life","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to use Git rebase in real life\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Toon Claes\"}],\n        \"datePublished\": \"2022-11-08\",\n      }",{"title":1691,"description":1692,"authors":1697,"heroImage":1693,"date":1699,"body":1700,"category":726,"tags":1701},[1698],"Toon Claes","2022-11-08","\n\nMy colleague [Chris](/company/team/#chriscool) recently wrote about [how to take advantage of Git\nrebase](/blog/take-advantage-of-git-rebase/). In this post we'll\nexplain how you can take these techniques, and apply them to daily developer life.\n\n## Fixup\n\nImagine you have created a merge request, and there are some pipeline failures\nand some comments from reviews, and suddenly your [commit history](/blog/keeping-git-commit-history-clean/) looks something\nlike this:\n\n```shell\n$ git log --oneline\n\n8f8ef5af (HEAD -> my-change) More CI fixes\ne4fb7935 Apply suggestion from reviewer\nc1a1bec6 Apply suggestion from reviewer\n673222be Make linter happy\na0c30577 Fix CI failure for X\n5ff160db Implement feature Y\nf68080e3 Implement feature X\n3cdbc201 (origin/main, origin/HEAD, main) Merge branch 'other-change' into 'main'\n...\n```\n\nIn this example there are 2 commits implementing feature X and Y, followed by a\nhandful of commits that aren't useful on their own. We used the fixup feature of\nGit rebase to get rid of them.\n\n### Finding the commit\n\nThe idea of this technique is to integrate the changes of these follow-up\ncommits into the commits that introduced each feature. This means for each\nfollow-up commit we need to determine which commit they belong to.\n\nBased on the filename you may already know which commits belong together, but if\nyou don't you can use git-blame to find the commit.\n\n```shell\ngit blame \u003Crevision> -L\u003Cstart>,\u003Cend> \u003Cfilename>\n```\n\nWith the option `-L` we'll specify a range of a line numbers we're interested in.\nHere `\u003Cend>` cannot be omitted, but it can be the same as `\u003Cstart>`. You can\nomit `\u003Crevision>`, but you probably shouldn't because you want to skip over the\ncommits you want to rebase away. Your command will look something like this:\n\n```shell\n$ git blame 5ff160db -L22,22 app/model/user.rb\n\nf68080e3 22) scope :admins, -> { where(admin: true) }\n```\n\nThis tells us line `22` was touched by `f68080e3 Implement feature X`.\n\nNow repeat this step until you know the commit for each of the commits you want\nto rebase out.\n\n### Interactive rebase\n\nThe next step is to start the interactive rebase:\n\n```shell\n$ git rebase -i main\n```\n\nHere you're presented with the list of instructions in your `$EDITOR`:\n\n``` text\npick 8f8ef5af More CI fixes\npick e4fb7935 Apply suggestion from reviewer\npick c1a1bec6 Apply suggestion from reviewer\npick 673222be Make linter happy\npick a0c30577 Fix CI failure for X\npick 5ff160db Implement feature Y\npick f68080e3 Implement feature X\n```\n\nNow you'll need to change these instructions to something like this:\n\n```text\nfixup 8f8ef5af More CI fixes\nfixup e4fb7935 Apply suggestion from reviewer\nfixup 673222be Make linter happy\npick 5ff160db Implement feature Y\nfixup c1a1bec6 Apply suggestion from reviewer\nfixup a0c30577 Fix CI failure for X\npick f68080e3 Implement feature X\n```\n\nAs you can see I've reordered the commits, and I've changed some occurrences of\n`pick` to `fixup`.\n\nThe Git rebase will process this list bottom-to-top. It takes each line with\n`pick` and uses its commit message. On each line starting with `fixup` it\nintegrates the changes into the commit below. When you've saved this file and\nclosed your `$EDITOR`, the Git history will look something like this:\n\n```shell\n$ git log --oneline\n\ne880c726 (HEAD -> my-change) Implement feature Y\ne088ea06 Implement feature X\n3cdbc201 (origin/main, origin/HEAD, main) Merge branch 'other-change' into 'main'\n...\n```\n\n## Autosquash\n\nUsing autosquash can be an alternative technique to the above. First we'll\nuncommit all the commits we want to get rid of.\n\n```shell\ngit checkout f68080e3\n```\n\nNow all changes only exist in your working tree, and are gone from the commit\nhistory. You can use `git add` or `git add -p` to stage all changes related to\n`e088ea06 Implement feature X`. Instead of running `git commit` or `git commit -m`\nwe'll use the `--fixup` option:\n\n```shell\n$ git commit --fixup e088ea06\n```\n\nNow the history will look something like:\n\n```shell\n$ git log --oneline\n\ne744646b (HEAD -> my-change) fixup! Implement feature X\n5ff160db Implement feature Y\nf68080e3 Implement feature X\n3cdbc201 (origin/main, origin/HEAD, main) Merge branch 'other-change' into 'main'\n...\n```\n\nAll remaining changes should now belong to `5ff160db Implement feature Y` so we\ncan run:\n\n```shell\n$ git add .\n\n$ git commit --fixup 5ff160db\n\n$ git log --oneline\n\n18c0fff9 (HEAD -> my-change) fixup! Implement feature Y\ne744646b fixup! Implement feature X\n5ff160db Implement feature Y\nf68080e3 Implement feature X\n3cdbc201 (origin/main, origin/HEAD, main) Merge branch 'other-change' into 'main'\n...\n```\n\nYou can now review the `fixup!` commits and if you're happy with it, run:\n\n```shell\n$ git rebase -i --autosquash main\n```\n\nYou see we provide the extra option `--autosquash`. This option will look for\n`fixup!` commits and automatically reorder those and set their instruction to\n`fixup`. Normally there's nothing for you to be done now, and you can just close\nthe instruction list in your editor. If you type `git log` now you'll see the\n`fixup!` commits are gone.\n\n## Alternatives\n\nFinally, there are some tools that allow you to _absorb_ commits more easily, for\nexample:\n\n* [lib.rs/crates/git-absorb](https://lib.rs/crates/git-absorb)\n* [github.com/MrFlynn/git-absorb](https://github.com/MrFlynn/git-absorb)\n* [gitlab.com/bertoldia/git-absorb](https://gitlab.com/bertoldia/git-absorb)\n* [github.com/tummychow/git-absorb](https://github.com/tummychow/git-absorb)\n* [github.com/torbiak/git-autofixup](https://github.com/torbiak/git-autofixup)\n\n[Cover image](https://unsplash.com/photos/qAShc5SV83M) by [Yung Chang](https://unsplash.com/@yungnoma) on [Unsplash](https://unsplash.com/).\n{: .note}\n",[9,982,682],{"slug":1703,"featured":6,"template":687},"rebase-in-real-life","content:en-us:blog:rebase-in-real-life.yml","Rebase In Real Life","en-us/blog/rebase-in-real-life.yml","en-us/blog/rebase-in-real-life",{"_path":1709,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1710,"content":1716,"config":1721,"_id":1723,"_type":13,"title":1724,"_source":15,"_file":1725,"_stem":1726,"_extension":18},"/en-us/blog/redbox-on-demand-delivers-with-gitlab",{"title":1711,"description":1712,"ogTitle":1711,"ogDescription":1712,"noIndex":6,"ogImage":1713,"ogUrl":1714,"ogSiteName":672,"ogType":673,"canonicalUrls":1714,"schema":1715},"Redbox delivers On Demand with GitLab","Redbox's Joel Vasallo and Nicholas Konieczko explain how they ‘deliver software like a fox’ with GitLab.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749673064/Blog/Hero%20Images/redbox-blog-jannes-glas-unsplash.jpg","https://about.gitlab.com/blog/redbox-on-demand-delivers-with-gitlab","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Redbox delivers On Demand with GitLab\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Brein Matturro\"}],\n        \"datePublished\": \"2019-10-01\",\n      }",{"title":1711,"description":1712,"authors":1717,"heroImage":1713,"date":1718,"body":1719,"category":680,"tags":1720},[1278],"2019-10-01","\nAt GitLab Connect Chicago, Redbox's [Joel Vasallo](https://www.linkedin.com/in/joelvasallo) and [Nicholas Konieczko](https://www.linkedin.com/in/nick-konieczko-42895354) presented a talk called “Delivering software like a fox.” Redbox, primarily known for providing movie and video game rentals via automated retail kiosks, has recently expanded to provide streaming services.\n\nRedbox On Demand is the company's newest streaming platform, built on .NET Core in containers on Linux in the cloud. The video retail company had a few goals in mind when building its latest platform. Joel, cloud DevOps manager, and Nicholas, mobile applications manager, share their three main objectives and how GitLab provides the tool that ensures success.\n\n## Goal #1: Modernize software development processes\n\nThe mobile and development teams wanted to be able to create the platform using the latest technology in order to provide the best product for the customer. “[There was] nothing wrong with the way they were done, but in the sense that the world has really come a long way from traditional Windows servers... in a data center running .NET frameworks and stuff like that, we really wanted to empower developers to use containers,” Joel says.\n\n**Outcome**: The mobile and development teams currently use GitLab CI, leveraging Fastlane. The power of GitLab and its ability to work along with other tools helped to modernize software development processes.\n\n## Goal #2: Speed up delivery to the cloud\n\nProviding an on-demand service means that the application has to actually be ready at the very moment of demand. Being new to the streaming arena, it was important for Redbox to move to the cloud. “We also wanted to leverage the power of the cloud and have the scaling perspective of the cloud. We wanted to be in the cloud, as everyone wants to be nowadays. We also wanted to ensure that our features go out the door faster because, in the streaming business, it's all about being first to market with your features,” Joel says.\n\n**Outcome**: The teams now use GitLab CI along with Spinnaker. “We wanted to increase software delivery and do what's best for the teams. I don't want to dictate what developers should do in their day-to-day workflow,” Joel says.\n\n## Goal #3: Empower developers to own their applications\n\nThe hope was that a developer would be able to deploy code to production at any time with a single click of a button. Developers would then have the ability to just write the code and a working tool will be able to pick up the errors. “Code goes out the door based on an approval process. Developers are able to own and operate their code, essentially,” Joel says.\n\n**Outcome**: The objective was achieved, according to Joel. “Ultimately, developers own their own apps. GitLab Enterprise allowed teams to own their own verticals as well as Spinnaker, which allows them to deploy it to whatever cloud provider that they so choose.”\n\nTo learn more about how GitLab helped the mobile and development teams achieve their platform goals (and more), watch the presentation below.\n\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube.com/embed/3eG8Muorafo\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\n## Key takeaways\n\n### Putting the version in version control\n\n“There was a disparate amount of Git and source control tools. Namely, we had an internal Git server, which... I don't know what it was actually running. We had GitHub.com. We had Team Foundation Server. We had Azure DevOps. So all this stuff... Teams were really all over the place. They all had their source code. Getting access to things was just a nightmare.\n\n“So what did we do? Let's get another version control system into the mix. We need a fifth one. So we picked GitLab. Honestly, GitLab was the most tried and true solution from our perspective. It has support for a few things, like on-prem, also in the cloud as well on the .com offering. But, more than that, at the end of the day it let developers control their namespace within a large organization.” – _Joel Vasallo_\n\n### GitLab works for mobile development\n\n“The mobile teams were the first to get to try out GitLab.com. It's simple. It's extremely easy to get started. There's a lot of documentation out there, all the things I love. It's very cost effective. We were able to get a free trial running, get repos open, test out different things, different features, to see if it could work for our teams.\" – _Nick Konieczko_\n\n### Yes, you can use Jenkins too\n\n“This is, honestly, one of the best things about GitLab, is they just want us to be successful. Batteries are included. There's a lot of great tools in there, such as GitLab CI, the GitLab Issue Board... and GitLab's Artifact Repository. It's built into the platform. GitLab's pipelines with the CI/CD process, all of this comes in. But if you don't want to use it, it'll adapt to your business model.\n\n“For example, my team uses Jenkins. We can still use GitLab. There's no blocking event where it says, ‘Oh, you're using Jenkins. You can't talk to us. Error. Blocked.’ No, we use Jira. We type ‘Jira, take us into GitLab’ all the time. We have JFrog Artifactory. We also use Spinnaker for our software delivery. Again, it transforms to what you need as a business, and that's the thing that I really appreciate, being on the DevOps side.” – _Joel Vasallo_\n\nCover image by [Jannes Glas](https://unsplash.com/@jannesglas) on [Unsplash](https://www.unsplash.com)\n{: .note}\n",[728,684,9,230],{"slug":1722,"featured":6,"template":687},"redbox-on-demand-delivers-with-gitlab","content:en-us:blog:redbox-on-demand-delivers-with-gitlab.yml","Redbox On Demand Delivers With Gitlab","en-us/blog/redbox-on-demand-delivers-with-gitlab.yml","en-us/blog/redbox-on-demand-delivers-with-gitlab",{"_path":1728,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1729,"content":1732,"config":1739,"_id":1741,"_type":13,"title":1742,"_source":15,"_file":1743,"_stem":1744,"_extension":18},"/en-us/blog/reduce-the-load-on-gitlab-gitaly-with-bundle-uri",{"noIndex":6,"title":1730,"description":1731},"Reduce the load on GitLab Gitaly with bundle URI","Discover what the bundle URI Git feature is, how it is integrated into Gitaly, configuration best practices, and how GitLab users can benefit from it.",{"title":1730,"description":1731,"heroImage":1733,"date":1734,"body":1735,"category":879,"tags":1736,"authors":1737},"https://res.cloudinary.com/about-gitlab-com/image/upload/v1750099013/Blog/Hero%20Images/Blog/Hero%20Images/blog-image-template-1800x945%20%2814%29_6VTUA8mUhOZNDaRVNPeKwl_1750099012960.png","2025-06-24","Gitaly plays a vital role in the GitLab ecosystem — it is the server\ncomponent that handles all Git operations. Every push and pull made to/from\na repository is handled by Gitaly, which has direct access to the disk where\nthe actual repositories are stored. As a result, when Gitaly is under heavy\nload, some operations like CI/CD pipelines and browsing a repository in the\nGitLab UI can become quite slow. This is particularly true when serving\nclones and fetches for large and busy monorepos, which can consume large\namounts of CPU and memory.\n\n\n[Bundle URI](https://docs.gitlab.com/administration/gitaly/bundle_uris/) takes significant load off of Gitaly servers during clones by allowing Git to pre-download a bundled repository from object storage before calling the Gitaly servers to fetch the remaining objects.\n\n\nHere is a graph that shows the difference between clones without and with bundle URI.\n\n\n![Graph that shows the difference between clones without and with bundle URI](https://res.cloudinary.com/about-gitlab-com/image/upload/v1750705069/rvbm4ru1w58msd6zv4x7.png)\n\n\nThis graph shows the results of a small test we ran on an isolated GitLab installation, with Gitaly running on a machine with 2 CPUs. We wanted to test bundle URI with a large repository, so we pushed the [GitLab repository](https://gitlab.com/gitlab-org/gitlab) to the instance. We also generated a bundle beforehand.\n\n\nThe big CPU spike is from when we performed a single clone of the GitLab repository with bundle URI disabled. It's quite noticeable. A little later, we turned on bundle URI and launched three concurrent clones of the GitLab repository. Sure enough, turning on bundle URI provides massive performance gain. We can't even distinguish the CPU usage of the three clones from normal usage.\n\n\n## Configure Gitaly to use bundle URI\n\n\nTo enable bundle URI on your GitLab installation, there are a couple of things you need to configure.\n\n\n### Create a cloud bucket\n\n\nBundles need to be stored somewhere. The ideal place is in a cloud storage bucket. Gitaly uses the [gocloud.dev](https://pkg.go.dev/gocloud.dev) library to read and write from cloud storage. Any cloud storage solution supported by this library can be used. Once you have a cloud bucket URL, you can add it in the Gitaly configuration here:\n\n\n```toml\n[bundle_uri]\ngo_cloud_url = \"\u003Cbucket-uri>\"\n```\n\n\nIt must be noted that Gitaly does not manage the lifecycle of the bundles stored in the bucket. To avoid cost issues, object lifecycle policies must be enabled on the bucket in order to delete unused or old objects.\n\n\n### Enable the feature flags\n\n\nThere are two feature flags to enable:\n\n\n- `gitaly_bundle_generation` enables [auto-generation](#auto-generated) of bundles.\n\n\n- `gitaly_bundle_uri` makes Gitaly advertise bundle URIs when they are available (either manually created or auto-generated) and allows the user to [manually](#manual) generate bundles.\n\n\nThese feature flags can be enabled at-large on a GitLab installation, or per repository. See the [documentation on how to enable a GitLab feature behind a feature flag](https://docs.gitlab.com/administration/feature_flags/#how-to-enable-and-disable-features-behind-flags).\n\n\n### How to generate bundles\n\n\nGitaly offers two ways for users to use bundle URI: a [manual](#manual) way and an [auto-generated](#auto-generated) way.\n\n\n#### Manual\n\n\nIt is possible to create a bundle manually by connecting over SSH with the Gitaly node that stores the repository you want to create a bundle for, and run the following command:\n\n```shell\nsudo -u git -- /opt/gitlab/embedded/bin/gitaly bundle-uri \n--config=\u003Cconfig-file>\n--storage=\u003Cstorage-name>\n--repository=\u003Crelative-path>\n```\n\nThis command will create a bundle for the given repository and store it into the bucket configured above. When a subsequent `git clone` request will reach Gitaly for the same repository, the bundle URI mechanism described above will come into play.\n\n\n#### Auto-generated\n\n\nGitaly can also generate bundles automatically, using a heuristic to determine if it is currently handling frequent clones for the same repository.\n\n\nThe current heuristic keeps track of the number of times a `git fetch` request is issued for each repository. If the number of requests reaches a certain `threshold` in a given time `interval`, a bundle is automatically generated. Gitaly also keeps track of the last time it generated a bundle for a repository. When a new bundle should be regenerated, based on the `threshold` and `interval`, Gitaly looks at the last time a bundle was generated for the given repository. It will only generate a new bundle if the existing bundle is older than some `maxBundleAge` configuration. The old bundle is overwritten. There can only be one bundle per repository in cloud storage.\n\n\n## Using bundle URI\n\n\nWhen a bundle exists for a repository, it can be used by the `git clone` command.\n\n\n### Cloning from your terminal\n\n\nTo clone a repository from your terminal, make sure your Git configuration enables bundle URI. The configuration can be set like so:\n\n\n```shell\ngit config --global transfer.bundleuri true\n```\n\nTo verify that bundle URI is used during a clone, you can run the `git clone` command with `GIT_TRACE=1` and see if your bundle is being downloaded:\n```shell\n➜  GIT_TRACE=1 git clone https://gitlab.com/gitlab-org/gitaly\n...\n14:31:42.374912 run-command.c:667       trace: run_command: git-remote-https '\u003Cbundle-uri>'\n...\n```\n\n### Cloning during CI/CD pipelines\n\n\nOne scenario where using bundle URI would be beneficial is during a CI/CD pipeline, where each job needs a copy of the repository in order to run. Cloning a repository during a CI/CD pipeline is the same as cloning a repository from your terminal, except that the Git client in this case is the GitLab Runner. Thus, we need to configure the GitLab Runner in such a way that it can use bundle URI.\n\n\n**1. Update the helper-image**\n\n\nThe first thing to do to configure the GitLab Runner is to [overwrite the helper-image](https://docs.gitlab.com/runner/configuration/advanced-configuration/#override-the-helper-image) that your GitLab Runner instances use. The `helper-image` is the image that is used to run the process of cloning a repository before the job starts. To use bundle URI, the image needs the following:\n\n\n- Git Version 2.49.0 or later\n\n\n- [`GitLab Runner helper`](https://gitlab.com/gitlab-org/gitlab-runner/-/tree/main/apps/gitlab-runner-helper?ref_type=heads) Version 18.1.0 or later\n\n\nThe helper-images can be found [here](https://gitlab.com/gitlab-org/gitlab-runner/container_registry/1472754?orderBy=PUBLISHED_AT&sort=desc&search[]=v18.1.0). Select an image that corresponds to the OS distribution and the architecture you use for your GitLab Runner instances, and verify that the image satisfies the requirements.\n\n\nAt the time of writing, the `alpine-edge-\u003Carch>-v18.1.0*` tag meets all requirements.\n\nYou can validate the image meets all requirements with:\n\n```shell\ndocker run -it \u003Cimage:tag>\n$ git version ## must be 2.49.0 or newer\n$ gitlab-runner-helper -v ## must be 18.0 or newer\n```\n\nIf you do not find an image that meets the requirements, you can also use the helper-image as a base image and install the requirements yourself in a custom-built image that you can host on [GitLab Container Registry](https://docs.gitlab.com/user/packages/container_registry/).\n\n\nOnce you have found the image you need, you must configure your GitLab Runner instances to use it by updating your `config.toml` file:\n\n\n```toml\n[[runners]]\n (...)\n executor = \"docker\"\n [runners.docker]\n    (...)\n    helper_image = \"image:tag\" ## \u003C-- put the image name and tag here\n```\n\n\nOnce the configuration is changed, you must restart the runners for the new configuration to take effect.\n\n\n**2. Turn on the feature flag**\n\n\nNext, you must enable the `FF_USE_GIT_NATIVE_CLONE` [GitLab Runner feature flags](https://docs.gitlab.com/runner/configuration/feature-flags/) in your `.gitlab-ci.yml` file. To do that, simply add it as a variable and set to `true` :\n\n```yaml\nvariables:\n  FF_USE_GIT_NATIVE_CLONE: \"true\"\n```\n\n\nThe `GIT_STRATEGY` must also be [set to `clone`](\u003Chttps://docs.gitlab.com/ci/runners/configure_runners/#git-strategy>), as Git bundle URI only works with `clone` commands.\n\n\n## How bundle URI works\n\n\nWhen a user clones a repository with the `git clone` command, a process called [`git-receive-pack`](https://git-scm.com/docs/git-receive-pack) is launched on the client's machine. This process communicates with the remote repository's server (it can be over HTTP/S, SSH, etc.) and asks to start a [`git-upload-pack`](https://git-scm.com/docs/git-receive-pack) process. Those two processes then exchange information using the Git protocol (it must be noted that bundle URI is only supported with [Git protocol v2](https://git-scm.com/docs/protocol-v2)). The capabilities both processes support and the references and objects the client needs are among the information exchanged. Once the Git server has determined which objects to send to the client, it must package them into a packfile, which, depending on the size of the data it must process, can consume a good amount of resources.\n\n\nWhere does bundle URI fit into this interaction? If bundle URI is advertised as a capability from the `upload-pack` process and the client supports bundle URI, the Git client will ask the server if it knows about any bundle URIs. The server sends those URIs back and the client downloads those bundles.\n\n\nHere is a diagram that shows those interactions:\n\n\n```mermaid\n\nsequenceDiagram\n\n\n    participant receive as Client\n\n\n    participant upload as Server\n\n\n    participant cloud as File server\n\n\n    receive ->> upload: issue git-upload-pack\n\n\n    upload -->> receive: list of server capabilities\n\n\n    opt if bundle URI is advertised as a capability\n\n\n    receive ->> upload: request bundle URI\n\n\n    upload -->> receive: bundle URI\n\n\n    receive ->> cloud: download bundle at URI\n\n\n    cloud -->> receive: bundle file\n\n\n    receive ->> receive: clone from bundle\n\n\n    end\n\n\n    receive ->> upload: requests missing references and objects\n\n\n    upload -->> receive: packfile data\n\n```\n\n\nAs such, Git [bundle URI](https://git-scm.com/docs/bundle-uri) is a mechanism by which, during a `git clone`, a Git server can advertise the URI of a bundle for the repository being cloned by the Git client. When that is the case, the Git client can clone the repository from the bundle and request from the Git server only the missing references or objects that were not part of the bundle. This mechanism really helps to alleviate pressure from the Git server.\n\n\n## Alternatives\n\n\nGitLab also has a feature [Pack-objects cache](https://docs.gitlab.com/administration/gitaly/configure_gitaly/#pack-objects-cache). This feature works slightly differently than bundle URI. When the server packs objects together into a so-called packfile, this feature will keep that file in the cache. When another client needs the same set of objects, it doesn't need to repack them, but it can just send the same packfile again.\n\n\nThe feature is only beneficial when many clients request the exact same set of objects. In a repository that is quick-changing, this feature might not give any improvements. With bundle URI, it doesn't matter if the bundle is slightly out-of-date because the client can request missing objects after downloading the bundle and apply those changes on top. Also bundle URI in Gitaly stores the bundles on external storage, which the Pack-objects Cache stores them on the Gitaly node, so using the latter doesn't reduce network and I/O load on the Gitaly server.\n\n\n## Try bundle URI today\n\n\nYou can try the bundle URI feature in one of the following ways:\n\n\n* Download a [free, 60-day trial version of GitLab Ultimate](https://about.gitlab.com/free-trial/).\n\n\n* If you already run a self-hosted GitLab installation, upgrade to 18.1.\n\n\n* If you can't upgrade to 18.1 at this time, [download GitLab](https://about.gitlab.com/install/) to a local machine.",[879,776,9],[1738],"Olivier Campeau",{"featured":6,"template":687,"slug":1740},"reduce-the-load-on-gitlab-gitaly-with-bundle-uri","content:en-us:blog:reduce-the-load-on-gitlab-gitaly-with-bundle-uri.yml","Reduce The Load On Gitlab Gitaly With Bundle Uri","en-us/blog/reduce-the-load-on-gitlab-gitaly-with-bundle-uri.yml","en-us/blog/reduce-the-load-on-gitlab-gitaly-with-bundle-uri",{"_path":1746,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1747,"content":1753,"config":1758,"_id":1760,"_type":13,"title":1761,"_source":15,"_file":1762,"_stem":1763,"_extension":18},"/en-us/blog/scaling-repository-maintenance",{"title":1748,"description":1749,"ogTitle":1748,"ogDescription":1749,"noIndex":6,"ogImage":1750,"ogUrl":1751,"ogSiteName":672,"ogType":673,"canonicalUrls":1751,"schema":1752},"Future-proofing Git repository maintenance","Learn how we revamped our architecture for faster iteration and more efficiently maintained repositories.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749677736/Blog/Hero%20Images/Git.png","https://about.gitlab.com/blog/scaling-repository-maintenance","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Future-proofing Git repository maintenance\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Patrick Steinhardt\"}],\n        \"datePublished\": \"2023-03-20\",\n      }",{"title":1748,"description":1749,"authors":1754,"heroImage":1750,"date":1755,"body":1756,"category":726,"tags":1757},[677],"2023-03-20","\n\nUsers get the most from [Gitaly](/direction/gitaly/#gitaly-1), the service responsible for the storage and maintenance of all Git repositories in GitLab, when traffic hitting it is efficiently handled. Therefore, we must ensure our Git repositories remain in a well-optimized state. When it comes to Git monorepositories, this maintenance can be a complex task that can cause a lot of overhead by itself because repository housekeeping becomes more expensive the larger the repositories get. This blog post explains in depth what we have done over the past few GitLab releases to rework our approach to repository housekeeping for better scaling and to maintain an optimized state to deliver the best peformance for our users.\n\n## The challenge with Git monorepository maintenance\n\nTo ensure that Git repositories remain performant, Git regularly runs a set of\nmaintenance tasks. On the client side, this usually happens by automatically\nrunning `git-gc(1)` periodically, which:\n\n- Compresses revisions into a `packed-refs` file.\n- Compresses objects into `packfiles`.\n- Prunes objects that aren't reachable by any of the revisions and that have\n  not been used for a while.\n- Generates and updates data structures like `commit-graphs` that help to speed\n  up queries against the Git repository.\n\nGit periodically runs `git gc --auto` automatically in the background, which\nanalyzes your repository and only performs maintenance tasks if required.\n\nAt GitLab, we can't use this infrastructure because it does not give us enough\ncontrol over which maintenance tasks are executed at what point in time.\nFurthermore, it does not give us full control over exactly which data\nstructures we opt in to. Instead, we have implemented our own maintenance\nstrategies that are specific to how GitLab works and catered to our specific\nneeds. Unfortunately, the way GitLab implemented repository maintenance has\nbeen limiting us for quite a while by now.\n\n- It is unsuitable for large monorepositories.\n- It does not give us the ability to easily iterate on the employed maintenance\n  strategy.\n\nThis post explains our previous maintenance strategy and its problems as well as\nhow we revamped the architecture to allow us to iterate faster and more\nefficiently maintain repositories.\n\n## Our previous repository maintenance strategy\n\nIn the early days of GitLab, most of the application ran on a single server.\nOn this single server, GitLab directly accessed Git repositories. For various\nreasons, this architecture limited us, so we created the standalone Gitaly\nserver that provides a gRPC API to access Git repositories.\n\nTo migrate to exclusively accessing Git repository data using Gitaly we:\n\n- Migrated all the logic that was previously contained in the Rails\n   application to Gitaly.\n- Created Gitaly RPCs and updated Rails to not execute the logic directly, but\n   instead invoke the newly-implemented RPC.\n\nWhile this was the easiest way to tackle the huge task back then, the end\nresult was that there were still quite a few areas in the Rails codebase that\nrelied on knowing how the Git repositories were stored on disk.\n\nOne such area was repository maintenance. In an ideal world, the Rails server\nwould not need to know about the on-disk state of a Git repository. Instead,\nthe Rails server would only care about the data it wants to get out of the\nrepository or commit to it. Because of the Gitaly migration path we took,\nthe Rails application was still responsible for executing fine-grained\nrepository maintenance by calling certain RPCs:\n\n- `Cleanup` to delete stale, temporary files that have accumulated\n- `RepackIncremental` and `RepackFull` to either pack all loose objects into a\n  new packfile or alternatively to repack all packfiles into a single one\n- `PackRefs` to compress all references into a single `packed-refs` file\n- `WriteCommitGraph` to update the commit-graph\n- `GarbageCollect` to perform various different tasks\n\nThese low-level details of repository maintenance were being managed by the\nclient. But because clients didn't have any information on the on-disk state of\nthe repository, they could not even determine which of these maintenance tasks\nhad to be executed in the first place. Instead, we had a very simple heuristic:\nEvery few pushes, we ran one of the above RPCs to perform one of the maintenance\ntasks. While this heuristic worked, it wasn't great for the following reasons:\n\n- Repositories can be modified without using pushes at all. So if users only\n  use the Web IDE to commit to repositories, they may not get repacked at all.\n- Because repository maintenance is controlled by the client, Gitaly can't\n  assume a specific repository state.\n- The threshold for executing housekeeping tasks is set globally across all\n  projects rather than on a per-project basis. Consequently, no matter\n  whether you have a tiny repository or a huge monorepository, we would use the\n  same intervals for executing maintenance tasks. As you may imagine though,\n  doing a full repack of a Git repository that is only a few dozen megabytes in\n  size is a few orders of magnitudes faster than repacking a monorepository\n  that is multiple gigabytes in size.\n- Specific types of Git repositories hosted by Gitaly need special care and we\n  required Gitaly clients to know about these.\n- Repository maintenance was inefficient overall. Clients do not know about the\n  on-disk state of repositories. Consequently, they had no choice except to\n  repeatedly ask Gitaly to optimize specific data structures without knowing\n  whether this was required in the first place.\n\n## Heuristical maintenance strategy\n\nIt was clear that we needed to change the strategy we used for repository\nmaintenance. Most importantly, we wanted to:\n\n- Make Gitaly the single source of truth for how we maintain repositories.\n  Clients should not need to worry about low-level specifics, and Gitaly should\n  be able to easily iterate on the strategy.\n- Make the default maintenance strategy work for repositories of all sizes.\n- Make the maintenance strategy work for repositories of all types. A client\n  should not need to worry about which maintenance tasks must be executed for\n  what repository type.\n- Avoid optimizing data structures that already are in an optimal state.\n- Improve visibility into the optimizations we perform.\n\nAs mentioned in the introduction, Git periodically runs `git gc --auto`. This\ncommand inspects the repository's state and performs optimizations only when it\nfinds that the repository is in a sufficiently bad state to warrant the cost.\nWhile using this command directly in the context of Gitaly does not give us\nenough flexibility, it did serve as the inspiration for our new architecture.\n\nInstead of providing fine-grained RPCs to maintain various parts of a Git\nrepository, we now only provide a single RPC `OptimizeRepository` that works as\na black-box to the caller. This RPC call:\n\n1. Cleans up stale data in the repository if there is any.\n1. Analyzes the on-disk state of the repository.\n1. Depending on this on-disk state, performs only these maintenance tasks that\n   are deemed to be necessary.\n\nBecause we can analyze and use the on-disk state of the repository, we can be\nfar more intelligent about repository maintenance compared to the previous\nstrategy where we optimized some bits of the repository every few pushes.\n\n### Packing objects\n\nIn the old-style repository maintenance, the client would call either\n`RepackIncremental` or `RepackFull`. This would either: Pack all loose objects into a new `packfile` or repack all objects into a single `packfile`.\n\nBy default, we would perform a full repack every five repacks. While this may be\na good default for small repositories, it gets prohibitively expensive for huge\nmonorepositories where a full repack may easily take several minutes.\n\nThe new heuristical maintenance strategy instead scales the allowed number of\n`packfiles` by the total size of all combined `packfiles`. As a result, the\nlarger the repository becomes, the less frequently we perform a full repack.\n\n### Pruning objects\n\nIn the past, clients would periodically call `GarbageCollect`. In addition to\nrepacking objects, this RPC would also prune any objects that are unreachable\nand that haven't been accessed for a specific grace period.\n\nThe new heuristical maintenance strategy scans through all loose objects that\nexist in the repository. If the number of loose objects that have a modification\ntime older than two weeks exceeds a certain threshold, it spawns the\n`git prune` command to prune these objects.\n\n### Packing references\n\nIn the past, clients would call `PackRefs` to repack references into the\n`packed-refs` file.\n\nBecause the time to compress references scales with the size of the\n`packed-refs` file, the new heuristical maintenance strategy takes into account\nboth the size of the `packed-refs` file and the number of loose references that\nexist in the repository. If a ratio between these two figures is exceeded, we\ncompress the loose references.\n\n### Auxiliary data structures\n\nThere are auxiliary data structures like `commit-graphs` that are used by Git\nto speed up various queries. With the new heuristical maintenance strategy,\nGitaly now automatically updates these as required, either when they are\ndeemed to be out-of-date, or when they are missing altogether.\n\n### Heuristical maintenance strategy rollout\n\nWe rolled out this new heuristical maintenance strategy to GitLab.com in March 2022. Initially, we only rolled it out for\n[`gitlab-org/gitlab`](https://gitlab.com/gitlab-org/gitlab), which is a\nrepository where maintenance performed particularly poorly in the past. You can\nsee the impact of the rollout in the following graph:\n\n![Latency of OptimizeRepository for gitlab-org/gitlab](https://about.gitlab.com/images/blogimages/repo-housekeeping-gitlab-org-gitlab-latency.png)\n\nIn this graph, you can see that:\n\n1. Until March 19, we used the legacy fine-grained RPC calls. We spent most\n   of the time in `RepackFull`, followed by `RepackIncremental` and `GarbageCollect`.\n1. Because March 19 and 20 occurred on a weekend, nothing much happens with\n   housekeeping.\n1. Early on March 21 we switched `gitlab-org/gitlab` to use heuristical\n   housekeeping using `OptimizeRepository`. Initially, there didn't seem to be\n   much of an improvement. There wasn't much difference in how much time we\n   spent maintaining this repository compared to the past.\n\n   However, this was caused by an inefficient heuristic. Instead of only pruning\n   objects when there were stale ones, we always pruned objects when we saw that\n   there were too many loose objects.\n1. We deployed a fix for this bug on March 22, which led to a significant drop in\n   time spent optimizing this repository compared to before.\n\nThis demonstrated two things:\n\n- We're easily able to iterate on the heuristics that we have in Gitaly.\n- Using the heuristics saves a lot of compute time as we don't unnecessarily\n  optimize anymore.\n\nWe have subsequently rolled this out to all of GitLab.com, starting on March\n29, 2022, with similar improvements. With this change, we more than halved the CPU\nload when performing repository optimizations.\n\n## Observability\n\nWhile it is great that `OptimizeRepository` has managed to save us a lot of\ncompute power, one goal was to improve visibility into repository housekeeping.\nMore specifically, we wanted to:\n\n- Gain visibility on the global level to see what optimizations are performed\n  across all of our repositories.\n- Gain visibility on the repository level to know what state a specific\n  repository is in.\n\nIn order to improve global visibility, we expose a set of Prometheus metrics that\nallow us to observe important details about our repository maintenance. The\nfollowing graphs show the optimizations performed in a 30-minute window of our\nproduction systems on GitLab.com.\n\n- The optimizations, which are being performed in general.\n\n  ![Repository optimization metrics for GitLab.com](https://about.gitlab.com/images/blogimages/repo-housekeeping-metrics-optimizations.png)\n\n- The average latency it takes to perform each of these optimizations.\n\n  ![Repository optimization metrics for GitLab.com](https://about.gitlab.com/images/blogimages/repo-housekeeping-metrics-latencies.png)\n\n- What kind of stale data we are cleaning up.\n\n  ![Repository optimization metrics for GitLab.com](https://about.gitlab.com/images/blogimages/repo-housekeeping-metrics-cleanups.png)\n\nTo improve visibility into the state each repository is in we have started to\nlog structured data that includes all the relevant bits. A subset of the\ninformation it exposes is:\n\n- The number of loose objects and their sizes.\n- The number of `packfiles` and their combined size.\n- The number of loose references.\n- The size of the `packed-refs` file.\n- Information about `commit-graphs`, bitmaps and other auxiliary data\n  structures.\n\nThis information is also exposed through Prometheus metrics:\n\n![Repository state metrics for GitLab.com](https://about.gitlab.com/images/blogimages/repo-state-metrics.png)\n\nThese graphs expose important metrics of the on-disk state of our repositories:\n\n- The top panel shows which data structures exist.\n- The heatmaps on the left show how large specific data structures are.\n- The heatmaps on the right show how many of these data structures we have.\n\nCombining both the global and per-repository information allows us to easily\nobserve how repository maintenance behaves during normal operations. But more\nimportantly, it gives us meaningful data when rolling out new features that\nchange the way repositories are maintained.\n\n## Manually enabling heuristical housekeeping\n\nWhile the heuristical housekeeping is enabled by default starting with GitLab\n15.9, it has already been introduced with GitLab 14.10. If you want to use the\nnew housekeeping strategy before upgrading to 15.9, you can opt in by\nsetting the `optimized_housekeeping` [feature flag](https://docs.gitlab.com/ee/administration/feature_flags.html#how-to-enable-and-disable-features-behind-flags).\nYou can do so via the `gitlab-rails` console:\n\n```\nFeature.enable(:optimized_housekeeping)\n```\n\n## Future improvements\n\nWhile the new heuristical optimization strategy has been successfully\nbattle-tested for a while now for GitLab.com, at the time of writing this\nblog post, it still wasn't enabled by default for self-deployed installations.\nThis has finally changed with GitLab 15.8, where we have default-enabled the new\nheuristical maintenance strategy.\n\nWe are not done yet, though. Now that Gitaly is the only source of truth for how\nrepositories are optimized, we are tracking improvements to our maintenance\nstrategy in [epic 7443](https://gitlab.com/groups/gitlab-org/-/epics/7443):\n\n- [Multi-pack indices](https://git-scm.com/docs/multi-pack-index) and geometric\n  repacking will help us to further reduce the time spent repacking objects.\n- [Cruft packs](https://git-scm.com/docs/cruft-packs) will help us to further\n  reduce the time spent pruning objects and reduce the overall size of\n  unreachable objects.\n- Gitaly will automatically run housekeeping tasks when receiving mutating RPC\n  calls so that clients don't have to call `OptimizeRepository` at all anymore.\n\nSo stay tuned!\n\n",[9,941,684],{"slug":1759,"featured":6,"template":687},"scaling-repository-maintenance","content:en-us:blog:scaling-repository-maintenance.yml","Scaling Repository Maintenance","en-us/blog/scaling-repository-maintenance.yml","en-us/blog/scaling-repository-maintenance",{"_path":1765,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1766,"content":1772,"config":1778,"_id":1780,"_type":13,"title":1781,"_source":15,"_file":1782,"_stem":1783,"_extension":18},"/en-us/blog/setting-up-the-k-agent",{"title":1767,"description":1768,"ogTitle":1767,"ogDescription":1768,"noIndex":6,"ogImage":1769,"ogUrl":1770,"ogSiteName":672,"ogType":673,"canonicalUrls":1770,"schema":1771},"How to deploy the GitLab Agent for Kubernetes with limited permissions"," Learn how to deploy the GitLab Agent for Kubernetes with Limited Permissions.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749668655/Blog/Hero%20Images/seabass-creatives-U3m4_cKbUfc-unsplash.jpg","https://about.gitlab.com/blog/setting-up-the-k-agent","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to deploy the GitLab Agent for Kubernetes with limited permissions\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Fernando Diaz\"}],\n        \"datePublished\": \"2021-09-10\",\n      }",{"title":1767,"description":1768,"authors":1773,"heroImage":1769,"date":1775,"body":1776,"category":899,"tags":1777},[1774],"Fernando Diaz","2021-09-10","\n\nThe [GitLab Agent for Kubernetes (`agentk`)](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent) is an active in-cluster component for solving GitLab and Kubernetes integration tasks in a secure and cloud-native way. The `agentk` communicates to the GitLab Agent Server (KAS) to perform [GitOps](https://about.gitlab.com/topics/gitops/) operations.\n\nIn many examples, we see the agent being deployed with global-level permissions on your Kubernetes cluster. There are use cases where we want to reduce the scope of what agentk has access to. In this guide I will provide information on deploying agentk on your cluster, limiting what namespaces it can access, as well as using it to deploy your applications.\n\nPrefer a video? Watch the walkthrough below to learn how to deploy agentk to your cluster:\n\n\u003Ciframe width=\"560\" height=\"315\" src=\"https://www.youtube-nocookie.com/embed/Sr3X5-O9HWA\" title=\"YouTube video player\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture\" allowfullscreen>\u003C/iframe>\n\n## How it works\n\nAnytime a developer performs changes to a manifest file managed within GitLab, the agentk will apply these changes to the Kubernetes cluster.\n\n![Kagent flowchart](https://about.gitlab.com/images/blogimages/kagent-limited/1.png){: .shadow.medium}\nHow a change to a manifest file in GitLab is applied to the Kubernetes cluster.\n{: .note.text-center}\n\nThe `agentk` and the KAS use bidirectional streaming to allow the connection acceptor (the gRPC server, GitLab Agent Server) to act as a client. The connection acceptor sends requests as gRPC replies.\n\n![Bidirectional streaming flowchart](https://about.gitlab.com/images/blogimages/kagent-limited/2.png){: .shadow.medium}\nHow bidirectional streaming with agentk works.\n{: .note.text-center}\n\n- GitLab RoR is the main GitLab application. It uses gRPC to talk to kas.\n\n- `agentk` is the GitLab Agent for Kubernetes. It keeps a connection established to a\nkas instance, waiting for requests to process. It may also actively send information\nabout things happening in the cluster.\n\n- KAS is the GitLab Agent Server, and is responsible for:\n  - Accepting requests from agentk\n  - Authentication of requests from agentk by querying GitLab RoR\n  - Fetching the agent's configuration file from a corresponding Git repository by querying Gitaly\n  - Matching incoming requests from GitLab RoR with existing connections from the right agentk, forwarding requests to it, and forwarding responses back\n  - Polling manifest repositories for GitOps support by communicating with Gitaly\n\n## How to deploy the GitLab Agent\n\nIn order to deploy the agent, we require the following:\n\n- Kubernetes cluster (I am using Google Kubernetes Engine, or GKE)\n- The GitLab project which will hold the agentk configuration and deployment manifest, you can import [Simple Agent K](https://gitlab.com/tech-marketing/devsecops/kubernetes-agent/simple-agent-k) which includes an application and CICD configured\n\n**Note:** The agentk configuration file and deployment manifests can be located in different projects. It just depends how you want to organize the GitOps workflow.\n\n**1. Create `.gitlab/agent/agent-name/config.yaml` directory in your project** and replace `agent-name` with whatever you want to name your agent.\n\n  ```\n  gitops:\n    manifest_projects:\n    - id: \"Your Project ID\"\n      paths:\n      - glob: '/manifests/*.{yaml,yml,json}'\n  ```\n\n  Remember to replace `Your Project ID` with the projectID of your project, seen below:\n\n   ![Replace projectID for your project](https://about.gitlab.com/images/blogimages/kagent-limited/3.png){: .shadow.medium}\n   Fill in the projectID section with your information.\n   {: .note.text-center}\n\n  **Note:** You can also use the path to the project in GitLab, i.e., mygroup/mysub/myproject.\n\n**2. Create agent record in GitLab**\n\n  A GitLab Rails Agent record is used to associate the cluster with the configuration repository project.\n\n  - Go to **Infrastructure > Kubernetes** tab\n\n   ![Click Kubernetes cluster tab](https://about.gitlab.com/images/blogimages/kagent-limited/4.png){: .shadow.medium}\n   Click the Kubernetes cluster tab in GitLab.\n   {: .note.text-center}\n\n  - Click on the **GitLab Agent managed clusters** tab\n\n   ![Click GitLab Agent tab](https://about.gitlab.com/images/blogimages/kagent-limited/5.png){: .shadow.medium}\n   What the GitLab Agent tab looks like\n   {: .note.text-center}\n\n  - Click the **Install a new GitLab Agent** button\n\n   ![Click Install new GitLab Agent button](https://about.gitlab.com/images/blogimages/kagent-limited/5.png){: .shadow.medium}\n   What the \"Install new GitLab agent\" button looks like.\n   {: .note.text-center}\n\n  - Select your agent\n\n   ![How to select your agent in GitLab](https://about.gitlab.com/images/blogimages/kagent-limited/6.png){: .shadow.medium}\n   How to select your agent in GitLab\n   {: .note.text-center}\n\n  - Save the provided token\n\n   ![How to save your provided token](https://about.gitlab.com/images/blogimages/kagent-limited/7.png){: .shadow.medium}\n   Click here to save your provided token.\n   {: .note.text-center}\n\n**3. Open a Terminal window**\n\n**4. Scope kubectl to your cluster**\n\n  ```\n  $ gcloud container clusters get-credentials fern-gitops-2 --zone us-central1-c --project group-cs-9b54eb\n\n  Fetching cluster endpoint and auth data.\n  kubeconfig entry generated for fern-gitops-2.\n  ```\n\n**5. Create the namespace for the Kubernetes agent**\n\n  ```\n  $ kubectl create ns gitlab-kubernetes-agent\n\n  namespace/gitlab-kubernetes-agent created\n  ```\n\n**6. Create agent secret**\n\n  This secret is used to store the token needed to configure the agent.\n\n  ```\n  $ kubectl create secret generic -n gitlab-kubernetes-agent gitlab-kubernetes-agent-token --from-literal=token='YOUR_AGENT_TOKEN'\n\n  secret/gitlab-kubernetes-agent-token created\n  ```\n\n**7. Apply the agentk deployment with limited access**\n\n  In this deployment below, we will create the following:\n\n### Namespaces\n\n  - **gitlab-kubernetes-agent**: Where the agent will be deployed\n  - **dude**: A namespace where agentk has permission to deploy\n  - **naww**: A namespace where the agentk has no permissions\n\n### Service accounts\n\n  - **gitlab-kubernetes-agent**: Service account used for running agentk\n\n### Deployments\n\n  - **gitlab-kubernetes-agent**: The actual agentk client application\n\n### Cluster roles and bindings\n\n  - **gitlab-kubernetes-agent-write-cm:** Permission for agentk to write all configmaps on the cluster\n  - **gitlab-kubernetes-agent-read-cm:** Permission for agentk to read all configmaps on the cluster\n\n### Roles and bindings\n\n  - **gitlab-kubernetes-agent-write**: Permission for agentk to write all resources on gitlab-kubernetes-agent ns\n  - **gitlab-kubernetes-agent-read**: Permission for agentk to read all resources on gitlab-kubernetes-agent ns\n  - **gitlab-kubernetes-agent-write-dude**: Permission for agentk to write all resources on dude ns\n  - **gitlab-kubernetes-agent-read-dude**: Permission for agentk to read all resources on dude ns\n\nThe next step is to create the deployment file `agentk.yaml`:\n\n  ```\n  apiVersion: v1\n  kind: Namespace\n  metadata:\n    name: dude\n  ---\n  apiVersion: v1\n  kind: Namespace\n  metadata:\n    name: naww\n  ---\n  apiVersion: v1\n  kind: ServiceAccount\n  metadata:\n    name: gitlab-kubernetes-agent\n    namespace: gitlab-kubernetes-agent\n  ---\n  apiVersion: apps/v1\n  kind: Deployment\n  metadata:\n    name: gitlab-kubernetes-agent\n    namespace: gitlab-kubernetes-agent\n  spec:\n    replicas: 1\n    selector:\n      matchLabels:\n        app: gitlab-kubernetes-agent\n    template:\n      metadata:\n        labels:\n          app: gitlab-kubernetes-agent\n        namespace: gitlab-kubernetes-agent\n      spec:\n        serviceAccountName: gitlab-kubernetes-agent\n        containers:\n        - name: agent\n          image: \"registry.gitlab.com/gitlab-org/cluster-integration/gitlab-agent/agentk:stable\"\n          args:\n          - --token-file=/config/token\n          - --kas-address\n          - wss://kas.gitlab.com # for GitLab.com users, use this KAS.\n          volumeMounts:\n          - name: token-volume\n            mountPath: /config\n        volumes:\n        - name: token-volume\n          secret:\n            secretName: gitlab-kubernetes-agent-token\n    strategy:\n      type: RollingUpdate\n      rollingUpdate:\n        maxSurge: 0\n        maxUnavailable: 1\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: ClusterRole\n  metadata:\n    name: gitlab-kubernetes-agent-write-cm\n  rules:\n  - resources:\n    - 'configmaps'\n    apiGroups:\n    - ''\n    verbs:\n    - create\n    - update\n    - delete\n    - patch\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: ClusterRoleBinding\n  metadata:\n    name: gitlab-kubernetes-agent-write-binding-cm\n  roleRef:\n    name: gitlab-kubernetes-agent-write-cm\n    kind: ClusterRole\n    apiGroup: rbac.authorization.k8s.io\n  subjects:\n  - name: gitlab-kubernetes-agent\n    kind: ServiceAccount\n    namespace: gitlab-kubernetes-agent\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: ClusterRole\n  metadata:\n    name: gitlab-kubernetes-agent-read-cm\n  rules:\n  - resources:\n    - 'configmaps'\n    apiGroups:\n    - ''\n    verbs:\n    - get\n    - list\n    - watch\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: ClusterRoleBinding\n  metadata:\n    name: gitlab-kubernetes-agent-read-binding-cm\n  roleRef:\n    name: gitlab-kubernetes-agent-read-cm\n    kind: ClusterRole\n    apiGroup: rbac.authorization.k8s.io\n  subjects:\n  - name: gitlab-kubernetes-agent\n    kind: ServiceAccount\n    namespace: gitlab-kubernetes-agent\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: Role\n  metadata:\n    namespace: gitlab-kubernetes-agent\n    name: gitlab-kubernetes-agent-write\n  rules:\n  - resources:\n    - '*'\n    apiGroups:\n    - '*'\n    verbs:\n    - create\n    - update\n    - delete\n    - patch\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: RoleBinding\n  metadata:\n    namespace: gitlab-kubernetes-agent\n    name: gitlab-kubernetes-agent-write-binding\n  roleRef:\n    name: gitlab-kubernetes-agent-write\n    kind: Role\n    apiGroup: rbac.authorization.k8s.io\n  subjects:\n  - name: gitlab-kubernetes-agent\n    kind: ServiceAccount\n    namespace: gitlab-kubernetes-agent\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: Role\n  metadata:\n    namespace: gitlab-kubernetes-agent\n    name: gitlab-kubernetes-agent-read\n  rules:\n  - resources:\n    - '*'\n    apiGroups:\n    - '*'\n    verbs:\n    - get\n    - list\n    - watch\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: RoleBinding\n  metadata:\n    namespace: gitlab-kubernetes-agent\n    name: gitlab-kubernetes-agent-read-binding\n  roleRef:\n    name: gitlab-kubernetes-agent-read\n    kind: Role\n    apiGroup: rbac.authorization.k8s.io\n  subjects:\n  - name: gitlab-kubernetes-agent\n    kind: ServiceAccount\n    namespace: gitlab-kubernetes-agent\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: Role\n  metadata:\n    namespace: dude\n    name: gitlab-kubernetes-agent-write-dude\n  rules:\n  - resources:\n    - '*'\n    apiGroups:\n    - '*'\n    verbs:\n    - create\n    - update\n    - delete\n    - patch\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: RoleBinding\n  metadata:\n    namespace: dude\n    name: gitlab-kubernetes-agent-write-binding-dude\n  roleRef:\n    name: gitlab-kubernetes-agent-write-dude\n    kind: Role\n    apiGroup: rbac.authorization.k8s.io\n  subjects:\n  - name: gitlab-kubernetes-agent\n    kind: ServiceAccount\n    namespace: gitlab-kubernetes-agent\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: Role\n  metadata:\n    namespace: dude\n    name: gitlab-kubernetes-agent-read-dude\n  rules:\n  - resources:\n    - '*'\n    apiGroups:\n    - '*'\n    verbs:\n    - get\n    - list\n    - watch\n  ---\n  apiVersion: rbac.authorization.k8s.io/v1\n  kind: RoleBinding\n  metadata:\n    namespace: dude\n    name: gitlab-kubernetes-agent-read-binding-dude\n  roleRef:\n    name: gitlab-kubernetes-agent-read-dude\n    kind: Role\n    apiGroup: rbac.authorization.k8s.io\n  subjects:\n  - name: gitlab-kubernetes-agent\n    kind: ServiceAccount\n    namespace: gitlab-kubernetes-agent\n  ```\n\nNow we can apply the deployment with the following command:\n\n  ```\n  $ kubectl apply -f k-agent.yaml\n\n  namespace/dude created\n  namespace/naww created\n  serviceaccount/gitlab-kubernetes-agent created\n  deployment.apps/gitlab-kubernetes-agent created\n  clusterrole.rbac.authorization.k8s.io/gitlab-kubernetes-agent-write-cm created\n  clusterrolebinding.rbac.authorization.k8s.io/gitlab-kubernetes-agent-write-binding-cm created\n  clusterrole.rbac.authorization.k8s.io/gitlab-kubernetes-agent-read-cm created\n  clusterrolebinding.rbac.authorization.k8s.io/gitlab-kubernetes-agent-read-binding-cm created\n  role.rbac.authorization.k8s.io/gitlab-kubernetes-agent-write created\n  rolebinding.rbac.authorization.k8s.io/gitlab-kubernetes-agent-write-binding created\n  role.rbac.authorization.k8s.io/gitlab-kubernetes-agent-read created\n  rolebinding.rbac.authorization.k8s.io/gitlab-kubernetes-agent-read-binding created\n  role.rbac.authorization.k8s.io/gitlab-kubernetes-agent-write-dude created\n  rolebinding.rbac.authorization.k8s.io/gitlab-kubernetes-agent-write-binding-dude created\n  role.rbac.authorization.k8s.io/gitlab-kubernetes-agent-read-dude created\n  rolebinding.rbac.authorization.k8s.io/gitlab-kubernetes-agent-read-binding-dude created\n  ```\n\n  **Note:** You see we are giving permissions to the gitlab-kubernetes-agent on the `dude` namespace, but not on the `naww` namespace. Currently, permissions for ConfigMaps are necessary but the scope can be reduced.\n\n**8. Make sure agentk is running**\n\n  ```\n  $ kubectl get pods -n gitlab-kubernetes-agent\n\n  NAME                            READY   STATUS    RESTARTS   AGE\n  gitlab-agent-58869d96bd-nqqnf   1/1     Running   0          10s\n  ```\n\nNow that the agentk is deployed, it can start managing our Kubernetes deployments.\n\n## Managing deployments\n\nNow let's go back to the GitLab UI, and add some applications to deploy using GitOps.\n\n**1. Open the Web IDE and create a manifest folder in your project root**\n\n**2. Add a manifest file for what you want to deploy on the `dude` namespace, name it `dude.yaml`**\n\n  ```\n  apiVersion: apps/v1\n  kind: Deployment\n  metadata:\n    name: nginx-deployment-dude\n    namespace: dude  # Can be any namespace managed by you that the agent has access to.\n  spec:\n    selector:\n      matchLabels:\n        app: nginx\n    replicas: 1\n    template:\n      metadata:\n        labels:\n          app: nginx\n      spec:\n        containers:\n        - name: nginx\n          image: nginx:1.14.2\n          ports:\n          - containerPort: 80\n  ```\n\n**3. Add a manifest file for what you want to deploy on the `naww` namespace and name it `naww.yaml`**\n\n  ```\n  apiVersion: apps/v1\n  kind: Deployment\n  metadata:\n    name: nginx-deployment-naww\n    namespace: naww  # Can be any namespace managed by you that the agent has access to.\n  spec:\n    selector:\n      matchLabels:\n        app: nginx\n    replicas: 1\n    template:\n      metadata:\n        labels:\n          app: nginx\n      spec:\n        containers:\n        - name: nginx\n          image: nginx:1.14.2\n          ports:\n          - containerPort: 80\n  ```\n\n**4. Commit changes and wait for the pipeline to run**\n\n**5. Check dude namespace**\n\n  ```\n  $ kubectl get pods -n dude\n\n  NAME                                     READY   STATUS    RESTARTS   AGE\n  nginx-deployment-dude-66b6c48dd5-rpxx2   1/1     Running   0          6m22s\n  ```\n\n  Notice that the application has deployed.\n\n**6. Check naww namespace**\n\n  ```\n  $ kubectl get pods -n naww\n\n  No resources found in naww namespace.\n  ```\n\n  Notice there is nothing on there.\n\n**7. Look at the k-agent logs**\n\n  ```\n  $ kubectl get pods -n gitlab-kubernetes-agent\n\n  NAME                            READY   STATUS    RESTARTS   AGE\n  gitlab-agent-58869d96bd-nqqnf   1/1     Running   0          10s\n\n  $ kubectl logs gitlab-agent-58869d96bd-nqqnf -n gitlab-kubernetes-agent\n\n  {\"level\":\"info\",\"time\":\"2021-08-19T19:17:26.088Z\",\"msg\":\"Feature status change\",\"feature_name\":\"tunnel\",\"feature_status\":true}\n  {\"level\":\"info\",\"time\":\"2021-08-19T19:17:26.088Z\",\"msg\":\"Observability endpoint is up\",\"mod_name\":\"observability\",\"net_network\":\"tcp\",\"net_address\":\"[::]:8080\"}\n  {\"level\":\"info\",\"time\":\"2021-08-19T19:17:26.375Z\",\"msg\":\"Starting synchronization worker\",\"mod_name\":\"gitops\",\"project_id\":\"devsecops/gitops-project\"}\n  ...\n  ```\n\n  You should see logs as follows:\n\n  Application successfully deployed to `dude`\n\n  ```\n  {\"level\":\"info\",\"time\":\"2021-08-20T22:03:57.561Z\",\"msg\":\"Synchronizing objects\",\"mod_name\":\"gitops\",\"project_id\":\"29010173\",\"agent_id\":711,\"commit_id\":\"221499beaf2dcf267cd40324235570001e928817\"}\n  {\"eventType\":\"resourceStatus\",\"group\":\"apps\",\"kind\":\"Deployment\",\"message\":\"Deployment is available. Replicas: 1\",\"name\":\"nginx-deployment-dude\",\"namespace\":\"dude\",\"status\":\"Current\",\"timestamp\":\"2021-08-20T22:03:58Z\",\"type\":\"status\"}\n  ```\n\n  Application failed to deploy to `naww`\n\n  ```\n  {\"eventType\":\"resourceStatus\",\"group\":\"apps\",\"kind\":\"Deployment\",\"message\":\"\",\"name\":\"nginx-deployment-naww\",\"namespace\":\"naww\",\"status\":\"Unknown\",\"timestamp\":\"2021-08-20T22:03:29Z\",\"type\":\"status\"}\n  {\"level\":\"warn\",\"time\":\"2021-08-20T22:03:30.015Z\",\"msg\":\"Synchronization failed\",\"mod_name\":\"gitops\",\"project_id\":\"29010173\",\"agent_id\":711,\"commit_id\":\"221499beaf2dcf267cd40324235570001e928817\",\"error\":\"1 resources failed\"}\n  ```\n\nWe can see that deployments only happen on the `dude` namespace because that is all the k-agent has access to. You can add access to other namespaces by creating [Roles and RoleBindings](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) for each namespace like we did for the `dude` namespace.\n\n## Securing GitOps workflow on Kubernetes\n\nNow you have seen how you can create a more restrictive GitOps workflow, allowing you to meet your security needs.\n\nThanks for reading! I hope this guide brings you one step forward into using and securing your GitOps workflow on Kubernetes. For more information see the [GitLab Agent documentation](https://docs.gitlab.com/ee/user/clusters/agent/).\n\nPhoto by \u003Ca href=\"https://unsplash.com/@sebbb?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText\">seabass creatives\u003C/a> on \u003Ca href=\"https://unsplash.com/s/photos/limited?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText\">Unsplash\u003C/a>\n{: .note}\n\n## Read more on Kubernetes:\n\n- [How to install and use the GitLab Kubernetes Operator](/blog/gko-on-ocp/)\n\n- [Threat modeling the Kubernetes Agent: from MVC to continuous improvement](/blog/threat-modeling-kubernetes-agent/)\n\n- [A new era of Kubernetes integrations on GitLab.com](/blog/gitlab-kubernetes-agent-on-gitlab-com/)\n\n- [Understand Kubernetes terminology from namespaces to pods](/blog/kubernetes-terminology/)\n\n- [What we learned after a year of GitLab.com on Kubernetes](/blog/year-of-kubernetes/)\n",[9,536,1121],{"slug":1779,"featured":6,"template":687},"setting-up-the-k-agent","content:en-us:blog:setting-up-the-k-agent.yml","Setting Up The K Agent","en-us/blog/setting-up-the-k-agent.yml","en-us/blog/setting-up-the-k-agent",{"_path":1785,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1786,"content":1791,"config":1796,"_id":1798,"_type":13,"title":1799,"_source":15,"_file":1800,"_stem":1801,"_extension":18},"/en-us/blog/sha256-support-in-gitaly",{"title":1787,"description":1788,"ogTitle":1787,"ogDescription":1788,"noIndex":6,"ogImage":871,"ogUrl":1789,"ogSiteName":672,"ogType":673,"canonicalUrls":1789,"schema":1790},"GitLab Gitaly project now supports the SHA 256 hashing algorithm","Gitaly now supports SHA-256 repositories. Here's why it matters.","https://about.gitlab.com/blog/sha256-support-in-gitaly","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"GitLab Gitaly project now supports the SHA 256 hashing algorithm\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"John Cai\"}],\n        \"datePublished\": \"2023-08-28\",\n      }",{"title":1787,"description":1788,"authors":1792,"heroImage":871,"date":1793,"body":1794,"category":705,"tags":1795},[896],"2023-08-28","\nWe've taken a huge step in SHA-256 support in GitLab: The [Gitaly](https://gitlab.com/gitlab-org/gitaly) project now fully supports SHA-256 repositories. While there is [still some work](https://gitlab.com/groups/gitlab-org/-/epics/10981) we need to do in other parts of the GitLab application before SHA-256 repositories can be used, this milestone is important.\n\n## What is SHA-256?\nSHA-256 is a [hashing algorithm](https://about.gitlab.com/handbook/security/cryptographic-standard.html#algorithmic-standards). Given an input of data, it produces a fixed-length hash of 64 characters with hexadecimal digits. Git uses hashing algorithms to generate IDs for commits and other Git objects such as blobs,\ntrees, and tags.\n\nGit uses the SHA-1 algorithm by default. If you've ever used Git, you know that\ncommit IDs are a bunch of hexademical digits. A `git log` command yields\nsomething like the following:\n\n```\ncommit bcd64dba39c90daee2e1e8d9015809b992174e34 (HEAD -> main, origin/main, origin/HEAD)\nAuthor: John Cai \u003Cjcai@gitlab.com>\nDate:   Wed Jul 26 13:41:34 2023 -0400\n\n    Fix README.md\n```\n\nThe `bcd64dba39c90daee2e1e8d9015809b992174e34` is the ID of the commit and is a\n40-character hash generated by using the SHA-1 hashing algorithm.\n\nIn SHA-256 repositories, everything is the same except, instead of a 40-character\nID, it's now a 64-character ID:\n\n```\ncommit e60501431d52f6d06b4749cf205b0dd09141ea0b3155a45b9246df24eee9b97b (HEAD -> master)\nAuthor: John Cai \u003Cjcai@gitlab.com>\nDate:   Fri Jul 7 12:56:52 2023 -0400\n\n    Fix README.md\n```\n\n### Why SHA-256?\nSHA-1, which has been the algorithm that has been used until now in Git, is\ninsecure. In 2017, [Google was able to produce a hash collision](https://security.googleblog.com/2017/02/announcing-first-sha1-collision.html). While the Git project is not yet impacted by these kinds of attacks due to the\nway it stores objects, it is only a matter of time until new attacks on SHA-1\nwill be found that would also impact Git.\n\nFederal regulations such as NIST and CISA [guidelines](https://csrc.nist.gov/projects/hash-functions/nist-policy-on-hash-functions),\nwhich [FedRamp](https://www.fedramp.gov/) enforces, set a due date in 2030 to\nstop using SHA-1, and encourage agencies to move away from it sooner if\npossible.\n\nIn addition, SHA-256 has been labeled experimental in the Git project for a long time,\nbut as of Git 2.42.0, the project has decided to [remove the experimental label](https://github.com/git/git/blob/master/Documentation/RelNotes/2.42.0.txt#L41-L45).\n\n### What does this mean for developers?\nFrom a usability perspective, SHA-256 and SHA-1 repositories really don't have a\nsignificant difference. For personal projects, SHA-1 is probably fine. However,\ncompanies and organizations are likely to switch to using SHA-256 repositories\nfor security reasons.\n\n### See SHA-256 in action\nIf you have `sha256sum(1)` installed, you can generate such a hash on the command line:\n\n```\n> printf '%s' \"please hash this data\" | sha256sum\n62f73749b40cc70f453320e1ffc37e405ba50474b5db68ad436e64b61fbb8cf0  -\n```\n\nWe can also see this in action in a Git repository. Let's create a repository,\nadd an initial commit, and inspect the contents of the commit object. **Note:** If\nyou try this yourself, the commit IDs will be different because the date of the\ncommit is part of the hash calculation.\n\n```\n> git init test-repo\n> cd test-repo\n> echo \"This is a README\" >README.md\n> git add .\n> git commit -m \"README\"\n[main (root-commit) 328b61f] README\n 1 file changed, 1 insertion(+)\n create mode 100644 README.md\n> zlib-flate -uncompress \u003C ./git/objects/32/8b61f2449205870f69b5981f58bd8cdbb22f95\ncommit 159tree 09303be712bd8e923f9b227c8522257fa32ca7dc\nauthor John Cai \u003Cjcai@gitlab.com> 1688748132 -0400\ncommitter John Cai \u003Cjcai@gitlab.com> 1688748132 -0400\n\nREADME\n```\n\nIn the last step, we uncompress the actual commit file on disk. Git zlib compresses object\nfiles before storing them on disk.\n\n`zlib-flate(1)` is a utility that comes packaed with `qpdf` that uncompresses zlib compressed files.\n\nNow, if we feed this data back into the SHA-1 algorithm, we get a predictable result:\n\n```\n> zlib-flate -uncompress \u003C .git/objects/32/8b61f2449205870f69b5981f58bd8cdbb22f95 | sha1sum\n328b61f2449205870f69b5981f58bd8cdbb22f95  -\n```\n\nAs we can see, the result of this is the commit ID.\n\nThe recommendation by NIST was to replace SHA-1 with SHA-2 or SHA-3. The\nGit project has [undergone this effort](https://git-scm.com/docs/hash-function-transition/),\nand the current state of the feature is that it's fully usable in Git and no\nlonger deemed experimental.\n\nIn fact, you can create and use repositories with SHA-256 as the hashing algorithm\nto see it in action on your local machine:\n\n```\n> git init --object-format=sha256 test-repo\n> cd test-repo\n> echo \"This is a README\" >README.md\n> git add .\n> git commit -m \"README\"\n[main (root-commit) e605014] README\n 1 file changed, 1 insertion(+)\n create mode 100644 README.md\n> git log\ncommit e60501431d52f6d06b4749cf205b0dd09141ea0b3155a45b9246df24eee9b97b (HEAD -> master)\nAuthor: John Cai \u003Cjcai@gitlab.com>\nDate:   Fri Jul 7 12:56:52 2023 -0400\n\n    README\n\n```\n\n",[9,705,683,266],{"slug":1797,"featured":90,"template":687},"sha256-support-in-gitaly","content:en-us:blog:sha256-support-in-gitaly.yml","Sha256 Support In Gitaly","en-us/blog/sha256-support-in-gitaly.yml","en-us/blog/sha256-support-in-gitaly",{"_path":1803,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1804,"content":1810,"config":1815,"_id":1817,"_type":13,"title":1818,"_source":15,"_file":1819,"_stem":1820,"_extension":18},"/en-us/blog/speed-up-your-monorepo-workflow-in-git",{"title":1805,"description":1806,"ogTitle":1805,"ogDescription":1806,"noIndex":6,"ogImage":1807,"ogUrl":1808,"ogSiteName":672,"ogType":673,"canonicalUrls":1808,"schema":1809},"Speed up your monorepo workflow in Git","Tap into the features that can reap huge savings in the long run for any developer team.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749665560/Blog/Hero%20Images/speedmonorepo.jpg","https://about.gitlab.com/blog/speed-up-your-monorepo-workflow-in-git","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Speed up your monorepo workflow in Git\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"John Cai\"}],\n        \"datePublished\": \"2022-09-06\",\n      }",{"title":1805,"description":1806,"authors":1811,"heroImage":1807,"date":1812,"body":1813,"category":726,"tags":1814},[896],"2022-09-06","\n\nMonorepos have grown in popularity in recent years. For many of us, they are a\npart of our daily Git workflows. The trouble is working with them can be slow. Speeding up\na developer's workflow can reap huge savings in the long run for any team.\n\nFirst, a word about monorepos. What does it mean for a repository to be a\nmonorepo anyway? Well, it depends who you ask and the definition has become\nmore flexible over time, but here are a few.\n\n## Characteristics of monorepos\n\nMonorepos have the following characteristics.\n\n### Multiple sub-projects\n\nThe typical definition of \"monorepo\" is a repository that contains multiple sub-projects. For instance, let's imagine a repository with a web-facing front end,\na backend, an iOS app directory, and an android app directory:\n\n```\nawesome-app/\n|\n|--backend/\n|\n|--web-frontend/\n|\n|--app-ios/\n|\n|--app-android/\n\n```\n\n`awesome-app` is a single repository:\n\n```\ngit clone https://my-favorite-git-hosting-service.com/awesome-app.git\n```\n\nThe [Chromium](https://github.com/chromium/chromium) repository is a good\nexample of this.\n\n### Large files\n\nRepositories can also grow to be very large if large files are checked in. In\nsome cases, binaries or other large assets such as images are checked into the\nrepository to have their history tracked. Other times, large files are inadvertently \nintroduced into the repository. The way Git history works, even if these files are\nimmediately removed, the single version that was checked in remains.\n\n### Old projects with deep histories\n\nWhile Git is very good at compressing text files, when a Git repository has a deep history,\nthe need to keep all versions of a file around can cause the size of the repository to be huge.\n\nThe [Linux](https://github.com/torvalds/linux) repository is a good example of this.\n\nFor instance, the Linux project's first Git commit is from [April 2005](https://github.com/torvalds/linux/commit/1da177e4c3f41524e886b7f1b8a0c1fc7321cac2).\n\nAnd a `git rev-list --all --count` gives us 1,120,826 commits! That's a lot of\nhistory! Getting into Git internals a little bit, Git keeps a commit object, and a\ntree object for each commit, as well as a copy of the files at that snapshot\nin history. This means a deep Git history means a lot of Git data.\n\n## Speeding up your Git workflow\n\nHere are some features to help speed up your Git workflow.\n\n### Sparse checkout\n\n[git sparse checkout](https://git-scm.com/docs/git-sparse-checkout) reduces the\nnumber of files you check out to a subset of the repository. (NOTE: This feature\nin Git is still marked experimental.) This is especially useful in the case of\n[many sub-projects in a repository](#multiple-sub-projects).\n\nTaking our [example](#multiple-sub-projects) of a monorepo with multiple\nsub-projects, let's say that as a front-end web developer I only need to make\nchanges to `web-frontend/`.\n\n```sh\n> git clone --no-checkout https://my-favorite-git-hosting-service.com/awesome-app.git\n> cd awesome-app\n> git sparse-checkout set web-frontend\n> git checkout\nYour branch is up to date with 'origin/master'.\n> ls\n> web-frontend README.md\n```\n\nOr, if you've already checked out a worktree, sparse checkout can be used to remove\nfiles from the worktree.\n\n\n```sh\n> git clone https://my-favorite-git-hosting-service.com/awesome-app.git\n> cd awesome-app\n> ls\n> backend web-frontend app-ios app-android README.md\n> git sparse-checkout set web-frontend\nUpdating files: 100% (103452/103452), done.\n> ls\n> web-frontend README.md\n```\n\nSparse checkout will only include the directories indicated, plus all files\ndirectly under the root repository directory.\n\nThis way, we only checkout the directories that we need, saving both space locally\nand time since each time `git pull` is done, only files that are checked out will\nneed to be updated.\n\nMore information can be found in the [docs](https://git-scm.com/docs/git-sparse-checkout)\nfor sparse checkout.\n\n### Partial clone\n\n[git partial clone](https://docs.gitlab.com/ee/topics/git/partial_clone.html#:~:text=Partial%20clone%20is%20a%20performance,0%20or%20later%20is%20required) has a similar goal to sparse checkout in reducing the number\nof files in your local Git repository. It provides the option to filter out\ncertain types of files when cloning.\n\nPartial clone is used by passing the `--filter` option to `git-clone`.\n\n```sh\ngit clone --filter=blob:limit=10m\n```\n\nThis will exclude any files over 10 megabytes from being copied to the local\nrepository. A full list of supported filters are included in the\n[docs for git-rev-list](https://git-scm.com/docs/git-rev-list#Documentation/git-rev-list.txt",[9,982,682],{"slug":1816,"featured":6,"template":687},"speed-up-your-monorepo-workflow-in-git","content:en-us:blog:speed-up-your-monorepo-workflow-in-git.yml","Speed Up Your Monorepo Workflow In Git","en-us/blog/speed-up-your-monorepo-workflow-in-git.yml","en-us/blog/speed-up-your-monorepo-workflow-in-git",{"_path":1822,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1823,"content":1829,"config":1835,"_id":1837,"_type":13,"title":1838,"_source":15,"_file":1839,"_stem":1840,"_extension":18},"/en-us/blog/start-using-git",{"title":1824,"description":1825,"ogTitle":1824,"ogDescription":1825,"noIndex":6,"ogImage":1826,"ogUrl":1827,"ogSiteName":672,"ogType":673,"canonicalUrls":1827,"schema":1828},"How to tidy up your merge requests with Git","Here's how to use a Git feature that saves a lot of time and cleans up your MRs.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749672243/Blog/Hero%20Images/git-tricks-cover-image.png","https://about.gitlab.com/blog/start-using-git","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to tidy up your merge requests with Git\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Ronald van Zon\"}],\n        \"datePublished\": \"2019-02-07\",\n      }",{"title":1824,"description":1825,"authors":1830,"heroImage":1826,"date":1832,"body":1833,"category":726,"tags":1834},[1831],"Ronald van Zon","2019-02-07","\n\nI've worked on a lot of open source projects and one thing they all have in common is\nwhen you create a merge request (or pull request) they will often ask, \"Can you clean up your request?\"\nbecause commits like *fix typo* should not be included in a Git history.\n\nNow there are a few ways of cleaning up commits and I'll show you what I have found to be the easiest way.\n\nBelow is an example scenario where I use a feature of Git that has saved me a lot of time.\nI have a tiny project seen in the image below.\n\n![Git Project](https://about.gitlab.com/images/blogimages/start-using-git/git_demo_project.png){: .shadow.medium.center}\n\nNow I like to run my `main.py` in a test environment to see if it works as expected.\nI like to do that by configuring a `.gitlab-ci.yml` to run `main.py`.\nAlthough this is extremely easy, for this example I made sure I increased the number of commits\nto illustrate my example a bit more clearly. So after some time my commit history looks like this:\n\n![Git Project](https://about.gitlab.com/images/blogimages/start-using-git/git_commits_bad.png){: .shadow.medium.center}\n\nHere you can see my first three commits add `README.md`, `main.py` and `.gitlab-ci.yml`.\nA few commits update my `gitlab-ci` file, trying some stuff out, and fixing typos.\nThere's also a commit that cleans up my `gitlab-ci` and two more to fix and clean up `main.py`.\n\nNow some of you might see this and think, \"Looks good,\" while others might want to scream at me\nfor making a mess out of my commits.\n\nHow do we fix it?\n\n## How to consolidate your commits\n\nFirst, let's revert the last two commits using [reset](https://docs.gitlab.com/ee/gitlab-basics/start-using-git.html#unstage-all-changes-that-have-been-added-to-the-staging-area).\nWe don't want to lose our changes so we will use `git reset --soft HEAD~2`.\n`--soft` will keep our changes of the files and `HEAD~2` tells Git the two commits from `HEAD` position should be reverted.\n\nWe create a new commit, `git commit --fixup 6c29979`. This will create a commit called `fixup! Add main Python file`.\nWhen we run `git rebase -i --autosquash 24d214a` we can see below that our `fixup` commit has been moved below\nthe commit we referenced with the tag *6c29979*. I could save this and the fixup will be merged into the commit above.\n\n![Git Project](https://about.gitlab.com/images/blogimages/start-using-git/git_rebase_1.png){: .shadow.medium.center}\n\nBut if we look at the commits below the *fixup*, we see that all the commits are related to the *.gitlab-ci.yml*\nand by making a small change here, we can clean up my commits in a single go. We will change the *pick* to *fixup* for all\ncommits but `Add default gitlab-ci` (shown in the image below) and we will save this.\n\n![Git Project](https://about.gitlab.com/images/blogimages/start-using-git/git_rebase_2.png){: .shadow.medium.center}\n\nChecking our Git log, we see that our long list of commits has been reduced to just three. There is a big change that\nyou should be aware of: because I have just rewritten my Git history I will have to use `git push --force` to update\nany *remote repository*.\n\n![Git Project](https://about.gitlab.com/images/blogimages/start-using-git/git_commits_good.png){: .shadow.medium.center}\n\nThis looks a lot better now; only the relevant commits are left. But could we have prevented this while working on this\nfeature? The answer is yes.\n\nWe could have used `git commit --amend` to add almost every commit behind *19d8353 Add default gitlab-ci*.\nThis wouldn't require any new commit for any changes that we were making to our `.gitlab-ci.yml` file. We would have ended\nup with the following and we already know how to handle the *fixup*.\n\n![Git Project](https://about.gitlab.com/images/blogimages/start-using-git/git_commits_alternative.png){: .shadow.medium.center}\n\nSomething to keep in mind when using features that rewrite the history of your Git repository: If you already\npushed your previous commits to a *remote repository* you will have to use `git push --force` to overwrite the\nhistory of the *remote repository*. Bad use of this could cause serious problems, so be careful!\nIf you run into trouble, a useful guide that could help you recover from this is [git push --force and how to deal with it](https://evilmartians.com/chronicles/git-push",[728,982,9],{"slug":1836,"featured":6,"template":687},"start-using-git","content:en-us:blog:start-using-git.yml","Start Using Git","en-us/blog/start-using-git.yml","en-us/blog/start-using-git",{"_path":1842,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1843,"content":1848,"config":1853,"_id":1855,"_type":13,"title":1856,"_source":15,"_file":1857,"_stem":1858,"_extension":18},"/en-us/blog/take-advantage-of-git-rebase",{"title":1844,"description":1845,"ogTitle":1844,"ogDescription":1845,"noIndex":6,"ogImage":1807,"ogUrl":1846,"ogSiteName":672,"ogType":673,"canonicalUrls":1846,"schema":1847},"Take advantage of Git rebase","Tap into the Git rebase features to improve your workflow.","https://about.gitlab.com/blog/take-advantage-of-git-rebase","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Take advantage of Git rebase\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Christian Couder\"}],\n        \"datePublished\": \"2022-10-06\",\n      }",{"title":1844,"description":1845,"authors":1849,"heroImage":1807,"date":1850,"body":1851,"category":726,"tags":1852},[876],"2022-10-06","\n\nThese days, developers spend a lot of time reviewing merge requests\nand taking these reviews into account to improve the code. We'll discuss how\n[Git rebase](https://git-scm.com/docs/git-rebase) can help in\nspeeding up these review cycles. But first, let's take a look at some\nworkflow considerations.\n\n## Different ways to rework a merge request\n\nA developer who worked on some code changes and created a merge\nrequest with these changes will often have to rework them. Why does\nthis happen? Tests can fail, bugs are found, or reviewers suggest\nimprovements and find shortcomings.\n\n### Simple but messy method: add more commits\n\nOne way to rework the code changes is to make more changes in some new\ncommits on top of the branch that was used to create the merge\nrequest, and then push the branch again to update the merge\nrequest.\n\nWhen a number of commits have been added in this way, the merge\nrequest becomes problematic:\n\n- It's difficult to review by looking at all the changes together.\n- It's difficult to review the commits separately as they may contain different unrelated changes, or even multiple reworks of the same code.\n\nReviewers find it easier to review changes split into a number of small,\nself-contained commits that can be reviewed individually.\n\n### Pro method: rebase!\n\nA better method to prepare or rework a merge request is to always\nensure that each commit contains small, self-contained, easy-to-review\nchanges.\n\nThis means that all the commits in the branch may need reworking\ninstead of stacking on yet more commits. This approach might seem much\nmore complex and tedious, but `git rebase` comes to the rescue!\n\n## Rework your commits with `git rebase`\n\nIf your goal is to build a merge request from a series of small,\nself-contained commits, your branch may need significant rework before its\ncommits are good enough. When the commits are ready, you can push the branch\nand update or create a merge request with this branch.\n\n### Start an interactive rebase\n\nIf your branch is based on `main`, the command to rework your branch\nis:\n\n```plaintext\ngit rebase -i main\n```\n\nI encourage you to create [a Git alias](https://git-scm.com/book/en/v2/Git-Basics-Git-Aliases),\nor a shell alias or function for this command right away, as you will\nuse it very often.\n\nThe `-i` option passed to `git rebase` is an alias for\n`--interactive`. It starts\n[an 'interactive' rebase](https://git-scm.com/docs/git-rebase#Documentation/git-rebase.txt---interactive)\nwhich will open your editor. In it, you will find a list of the\ncommits in your branch followed by commented-out lines beginning with\n`#`. The list of commits looks like this:\n\n```plaintext\npick 1aac632db2 first commit subject\npick a385014ad4 second commit subject\npick 6af12a88cf other commit subject\npick 5cd121e2a1 last commit subject\n```\n\nThese lines are instructions for how `git rebase` should handle these\ncommits. The commits are listed in chronological order, with the\noldest commit at the top. (This order is the opposite of the default\n`git log` order.) What do these lines contain?\n\n- An instruction (here, `pick`) that tells Git what action to take\n- An abbreviated commit ID\n- A commit subject to help you identify the commit contents\n\n### Edit the instruction list\n\nYou can edit these instructions! When you quit your text editor, `git rebase`\nreads the instructions you've just edited, and performs them\nin sequence to recreate your branch the way you want.\n\nAfter the instructions for all commits, a set of commented-out lines\nexplain how to edit the instruction lines, and how each instruction\nwill change your branch:\n\n- If you **delete a commit's entire instruction line** from the list,\n  that commit won't be recreated.\n- If you **reorder the instruction lines**, the commits will be\n  recreated in the order you specify.\n- If you **change the action** from `pick` to something else, such as\n  `squash` or `reword`, Git performs the action you specify on that\n  commit.\n- You can even **add new instruction lines** before, after, or between\n  existing lines.\n\nIf the comment lines aren't enough, more information about what you\ncan do and how it works is available in:\n\n- The [Git Tools - Rewriting History](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History)\n  section of the \"Pro Git\" book\n- The [Interactive mode](https://git-scm.com/docs/git-rebase#_interactive_mode)\n  section of the `git rebase` documentation\n\n### Continue or abort the rebase\n\nAn interactive rebase can stop if there is a conflict (as a regular\nrebase would) or if you used an instruction like `edit` in the\ninstruction line. This allows you to make some changes, like splitting\nthe current commit into two commits, or fixing the rebase conflict if\nthere is one. You can then either:\n\n- Continue the interactive rebase with `git rebase --continue`.\n- Abort the rebase altogether with `git rebase --abort`.\n\n(These `git rebase` options also work when a regular, non-interactive\nrebase stops.)\n\n## Further tips and benefits\n\n### Try different instructions\n\nI recommend you try out the different instructions you can use in\neach instruction line, especially `reword`, `edit`, `squash`, and `fixup`. You'll\nsoon want to use the abbreviated versions of these instructions: `r`,\n`e`, `s`, and `f`.\n\n### Run shell commands in your rebase\n\nYou might also have noticed an `exec \u003Ccommand>` instruction that\nallows you to run any shell command at any point in the interactive rebase.\nI've found it more useful for non-interactive rebases, such as:\n\n```plaintext\ngit rebase --exec 'make test' main\n```\n\n(It's not an interactive rebase because it doesn't contain the `-i` flag.)\n\nThe `--exec \u003Ccommand>` flag allows you to run any shell command after\neach rebased commit, stopping if the shell command fails (which is\nsignaled by a non zero exit code).\n\n### Test all your commits\n\nPassing a command to build your software and run its tests, like\n`make test`, to `--exec` will check that each commit in your branch\nbuilds correctly and passes your tests.\n\nIf `make test` fails, the rebase stops. You can then fix the current\ncommit right away, and continue the rebase to test the next\ncommits.\n\nChecking each commit builds cleanly and passes all the tests ensures\nyour code base is always in a good state. It's especially useful if\nyou want to take advantage of\n[Git bisect](https://git-scm.com/docs/git-bisect) when you encounter\nregressions.\n\n## Conclusion\n\nIn Git, a rebase is a very versatile and useful tool to rework\ncommits. Use it to achieve a workflow with high-quality changes\nproposed in high-quality commits and merge requests. It makes your\ndevelopers and reviewers more efficient. Code reviews and debugging also become easier and more effective.\n\n**EDIT:** Check out our [follow-up post on how you can apply this is real life](/blog/rebase-in-real-life/).\n",[9,982,1083,682],{"slug":1854,"featured":6,"template":687},"take-advantage-of-git-rebase","content:en-us:blog:take-advantage-of-git-rebase.yml","Take Advantage Of Git Rebase","en-us/blog/take-advantage-of-git-rebase.yml","en-us/blog/take-advantage-of-git-rebase",{"_path":1860,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1861,"content":1867,"config":1872,"_id":1874,"_type":13,"title":1875,"_source":15,"_file":1876,"_stem":1877,"_extension":18},"/en-us/blog/the-contributions-we-made-to-the-git-2-43-release",{"title":1862,"description":1863,"ogTitle":1862,"ogDescription":1863,"noIndex":6,"ogImage":1864,"ogUrl":1865,"ogSiteName":672,"ogType":673,"canonicalUrls":1865,"schema":1866},"The contributions we made to the Git 2.43 release","Git 2.43 included some improvements from GitLab's Git team. Here are some highlights from the work the team has done on Git and why it matters.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749659507/Blog/Hero%20Images/AdobeStock_623844718.jpg","https://about.gitlab.com/blog/the-contributions-we-made-to-the-git-2-43-release","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"The contributions we made to the Git 2.43 release\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"John Cai\"}],\n        \"datePublished\": \"2024-01-11\",\n      }",{"title":1862,"description":1863,"authors":1868,"heroImage":1864,"date":1869,"body":1870,"category":680,"tags":1871},[896],"2024-01-11","[Git 2.43](https://gitlab.com/gitlab-org/git/-/raw/master/Documentation/RelNotes/2.43.0.txt)\nwas officially released on November 20, 2023, and included some improvements from GitLab's Git team. Here are some highlights from the work our team has\ndone on Git and why it matters.\n\n## Segmenting objects across packfiles\n\nIn Git 2.43, [Christian Couder](https://about.gitlab.com/company/team/#chriscool)\nadded a `--filter` option to the `git repack` command. Supported filter (see the\n[filter-spec docs](https://git-scm.com/docs/git-rev-list#Documentation/git-rev-list.txt---filterltfilter-specgt)) can be added to the `git repack --filter` flag. This will cause the filtered out objects to be\npacked into a separate packfile.\n\nA `--filter-to` option was also added. Providing this option will cause Git to write the filtered packfile to the specified location on the filesystem.\n\n### Why it matters\n\nGitaly servers host Git repositories and incur storage costs. In many repositories however, not all the objects need to be accessed all the time. Allowing Git to\noffload some repository data onto a different packfile paves the way for storage optimizations whereby we can choose to segment the Git repository data and place\ncertain kinds of objects on cheaper storage such as slower disks or object storage.\n\n## Checking object existence\n\nIn Git, to check the existence of an object one would have to rely on Git returning an error if it couldn’t find an object. However, to date, there has not been a generic way in Git to check the existence of an object. There were certain edge cases that were not handled well by the underlying Git code. For example, if a reference exists as a symbolic reference, but its target branch does not exist.\n\n[Patrick Steinhardt](https://about.gitlab.com/company/team/#pks-gitlab) added the `--exists` option to `git show` as a generic way to check for object existence.\n\n### Why it matters\n\nThe Gitaly team has started work to upstream the [reftable backend](https://gitlab.com/groups/gitlab-org/-/epics/11652) into the Git project. This new flag enables consistent validation of object existence to fix a number of tests to work with the reftables backend.\n\n## Find missing commit objects \n\n`git rev-list`'s `--missing` option provides information about objects that are referenced but are missing from a repository. Up to this release however, this option only worked with blobs and trees. Missing commits would cause `git rev-list` to fail with a fatal error.\n\nIn Git 2.43, [Karthik Nayak](https://about.gitlab.com/company/team/#knayakgl)\nextended the `--missing` option to work with commit objects.\n\n### Why it matters\n\nGitaly's next-generation repository replication implementation relies on a [write\nahead log](https://gitlab.com/groups/gitlab-org/-/epics/8911) (WAL) that logs every write to a repository.\n\nThe upcoming WAL creates separate log entries per transaction – as such, some transactions contain reference updates. In these transactions, it is necessary to identify new git objects being added to the repository. The WAL implementation uses a quarantine directory to stage these new objects. \n\nWe can now use git-rev-list(1) along with the --missing flag, to identify all the objects that are newly added and required and also boundary commits that connect the quarantine directory to the main object directory.\n\n## Read gitattributes from HEAD in bare repos\n\nStarting in 2.43, [John Cai](https://about.gitlab.com/company/team/#jcaigitlab)\nmade a change that allows [Git attributes](https://git-scm.com/docs/gitattributes) to start to read attributes from the tree that HEAD points to by default, in bare repositories.\n\n### Why it matters\n\nTo reduce some tech debt around how git attributes are read in a repository, we added the ability to pass a tree object directly to Git through the [`--attr-source` flag](https://git-scm.com/docs/git#Documentation/git.txt---attr-sourcelttree-ishgt).\n\nPassing in `HEAD` to `--attr-source` would fail however, when `HEAD` pointed to and unborn branch, Gitaly would have needed to use a separate call to check if `HEAD` were unborn before passing it in.\n\nThis change not only causes Git to read attributes from `HEAD` by default, which means we don't need to pass in anything, but also silently ignores it if `HEAD` is unborn, which is the behavior we want in Gitaly. This way, we don't need to make any code changes in Gitaly for this to work.\n\nThis leads to simplification on the Gitaly side, as we seek to remove some [technical debt around gitattributes](https://gitlab.com/groups/gitlab-org/-/epics/9006)\nput in during a time when Git lacked support around reading gitattributes in bare repositories.\n\n## Bug fixes\n\n[Patrick Steinhardt](https://about.gitlab.com/company/team/#pks-gitlab) fixed a bug in `git rev-list –stdin`.  \n\nSteinhardt also addressed an existing issue in [commit-graphs](https://git-scm.com/docs/commit-graph) whereby commits parsed from the commit-graph weren’t always checked for existence. A `GIT_COMMIT_GRAPH_PARANOIA` environment variable can now be turned on to always check for object existence.",[266,683,9],{"slug":1873,"featured":6,"template":687},"the-contributions-we-made-to-the-git-2-43-release","content:en-us:blog:the-contributions-we-made-to-the-git-2-43-release.yml","The Contributions We Made To The Git 2 43 Release","en-us/blog/the-contributions-we-made-to-the-git-2-43-release.yml","en-us/blog/the-contributions-we-made-to-the-git-2-43-release",{"_path":1879,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1880,"content":1886,"config":1892,"_id":1894,"_type":13,"title":1895,"_source":15,"_file":1896,"_stem":1897,"_extension":18},"/en-us/blog/the-road-to-gitaly-1-0",{"title":1881,"description":1882,"ogTitle":1881,"ogDescription":1882,"noIndex":6,"ogImage":1883,"ogUrl":1884,"ogSiteName":672,"ogType":673,"canonicalUrls":1884,"schema":1885},"GitLab no longer requires NFS: The road to Gitaly v1.0","How we went from vertical to horizontal scaling without depending on NFS by creating our own Git RPC service.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749670092/Blog/Hero%20Images/road-to-gitaly.jpg","https://about.gitlab.com/blog/the-road-to-gitaly-1-0","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"The road to Gitaly v1.0 (aka, why GitLab doesn't require NFS for storing Git data anymore)\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Zeger-Jan van de Weg\"}],\n        \"datePublished\": \"2018-09-12\",\n      }",{"title":1887,"description":1882,"authors":1888,"heroImage":1883,"date":1889,"body":1890,"category":726,"tags":1891},"The road to Gitaly v1.0 (aka, why GitLab doesn't require NFS for storing Git data anymore)",[1061],"2018-09-12","\nIn the early days of [GitLab.com](https://gitlab.com), most of the application,\nincluding Rails worker processes, Sidekiq background processes, and Git storage,\nall ran on a single server. A single server is easy to deploy to and maintain.\nThe same structure is what most smaller GitLab instances still use for their\nself-managed [Omnibus](https://docs.gitlab.com/omnibus/) installation. Scaling\nis done vertically, meaning; adding more RAM, CPU, and disk space.\n\n## Moving from vertical to horizontal scaling\n\nSoon we ran out of options to continue scaling the system vertically, and we had\nto move to scaling horizontally by adding new servers. To have the repositories\navailable on all the nodes, NFS (Network File System) was used to mount these to each application\nserver and background workers. NFS is a well-known technology for sharing file\nsystems across a network. For each server, each storage node needed to be\nmounted. The advantage: GitLab.com could keep adding more servers and scale. However NFS\nhad multiple disadvantages too: the visibility is decreased to what type of file\nsystem operation is performed. Even worse, one NFS storage node's outage impacted\nthe whole site, and took the whole site down. On the other hand, Git operations\ncan be quite CPU/IOPS intensive too, so we began a balancing act between adding more nodes,\nand thus reducing reliability, versus scaling nodes vertically.\n\n## Considering NFS alternatives\n\nOver two years ago, we started to look for alternatives. One of the first ideas\nwas to remove the dependency on NFS with [Ceph](https://ceph.com/).\nCeph is a distributed file system that was meant to replace NFS in an\narchitecture like ours. Like NFS, this would solve our scaling problem on the\nsystem level, meaning that little to no changes would be required to GitLab as\nan application. However, running a Ceph cluster in the cloud didn't have the\nperformance characteristics that were required. Briefly we flirted with the idea\nof [moving away from the cloud][no-cloud], but this would have had major implications\nfor our own infrastructure team, and given that many of our customers _do_ run in\nthe cloud, [we decided to stay in the cloud][yes-cloud].\n\n[no-cloud]: /blog/why-choose-bare-metal/\n[yes-cloud]: /2017/03/02/why-we-are-not-leaving-the-cloud/\n\n## Introducing Gitaly\n\nSo it was clear that the application needed to be redesigned, and a new service\nwould be introduced to handle all Git requests. We named it\n[Gitaly](https://gitlab.com/gitlab-org/gitaly).\n\n![Gitaly Architecture Diagram](https://about.gitlab.com/images/gitaly_arch.png){: .large.center}\n*\u003Csmall>The planned architecture at the project start\u003C/small>*\n\nAs the diagram shows, the new Git server would have a number of distinct clients.\nTo make sure the protocol for the server and its clients is well defined,\n[Protocol Buffers][protobuf] was used. The client calls are handled by\nleveraging [gRPC][grpc]. Combined, they allowed us to iteratively add RPCs and\nmove away from NFS, in favor of an HTTP boundary. With the technologies chosen,\nthe migration started. The ultimate goal: v1.0, meaning no disk access was\nrequired to the Git storage nodes for [GitLab.com](https://gitlab.com).\n\nShipping such an architectural change should not influence the performance, nor\nthe stability of the self-managed installations of GitLab, so for each RPC a [feature\nflag](https://docs.gitlab.com/ee/development/feature_flags/index.html) gated the use of it. When the RPC had gone through a series of tests on both\ncorrectness and performance impact, the gate was removed. To determine stability we used\n[Prometheus](https://docs.gitlab.com/ee/administration/monitoring/prometheus/) for monitoring and the ELK stack for sifting through massive numbers of structured log messages.\n\nThe server was written in Go, while the application is a large Rails monolith.\nRails had a great amount of code that was still very valuable. This code got\nextracted to the `lib/gitlab/git` directory, allowing easier vendoring. The idea\nwas to start a sidecar next to the Go server, reusing the old code. About once a week the\ncode would be re-vendored. This allowed Ruby developers on other teams to\nwrite code once, and ship it. Bonus points could be earned if [the boilerplate code][gitaly-ruby]\nwas written to call the same function in Ruby!\n\n[protobuf]: https://developers.google.com/protocol-buffers/\n[gitaly-ruby]: https://gitlab.com/gitlab-org/gitaly/blob/232c26309a8e9bef61262ccd04a8f0ba75e13d73/doc/beginners_guide.md#gitaly-ruby-boilerplate\n[grpc]: https://grpc.io/\n\nThe new service wasn't all sunshine and rainbows though, at times it felt like\nthe improved visibility was hurting our ability to ship. For example, it became\nclear that the illusion of an attached disk created\n[N + 1 problems][rails-eager-loading]. And even though this is a well-known problem\nin Ruby on Rails, the tools to combat it are all tailored toward using it with\nActiveRecord, Rails' ORM.\n\n[rails-eager-loading]:https://guides.rubyonrails.org/active_record_querying.html#eager-loading-associations\n\n## Nearing v1.0\n\nWith each RPC introduced, v1.0 was getting closer and closer. But how could we be\nsure everything was migrated before unmounting all NFS mount points? A trip\nswitch got introduced, guarding the details required to get to the full path of each\nrepository. Without this data there was no way to execute any Git operation\nthrough NFS. Luckily, the trip switch never went off, so now it was clear NFS\nwasn't being used. The next step was unmounting on our staging environment! Again, this was very\nuneventful. Leaving the volumes unmounted for a full week, and not seeing any\nindication of unexpected errors, the logical next step was our production instance.\n\nDays later we started rolling out these changes to production: first the\nbackground workers were unmounted, than we moved onto higher impact services. At\nthe end of the day, all drives were unmounted without customer impact.\n\n## What's next?\n\nSo, where is this v1.0 tag? We didn't tag it, and I don't think we will. v1.0 is\na state for our Git infrastructure, and a goal for the team, rather than the code base.\nThat being said, the next mental goal is allowing all customers to run without NFS.\nAt the time of writing, some features like administrative tasks, aren't using Gitaly just\nyet. These are slated for [v1.1][gitaly-11], and our next objective.\n\nWant to know more about our Gitaly journey? Read about [how we're making your Git data highly available with Praefect](/blog/high-availability-git-storage-with-praefect/) and [how a fix in Go 1.9 sped up our Gitaly service by 30x](/blog/how-a-fix-in-go-19-sped-up-our-gitaly-service-by-30x/).\n{: .alert .alert-info .text-center}\n\n[gitaly-11]: https://gitlab.com/groups/gitlab-org/-/epics/288\n\nPhoto by [Jason Hafso](https://unsplash.com/photos/8Sjcc4vExpg) on Unsplash\n{: .note}\n",[774,9,684],{"slug":1893,"featured":6,"template":687},"the-road-to-gitaly-1-0","content:en-us:blog:the-road-to-gitaly-1-0.yml","The Road To Gitaly 1 0","en-us/blog/the-road-to-gitaly-1-0.yml","en-us/blog/the-road-to-gitaly-1-0",{"_path":1899,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1900,"content":1906,"config":1911,"_id":1913,"_type":13,"title":1914,"_source":15,"_file":1915,"_stem":1916,"_extension":18},"/en-us/blog/top-10-technical-articles-of-2022",{"title":1901,"description":1902,"ogTitle":1901,"ogDescription":1902,"noIndex":6,"ogImage":1903,"ogUrl":1904,"ogSiteName":672,"ogType":673,"canonicalUrls":1904,"schema":1905},"Top 10 technical articles of 2022","Let’s review our fantastic year of how-to guides. From fixing failed pipelines to making the best use of GitOps, we have you covered with our in-depth tutorials.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749663736/Blog/Hero%20Images/a-deep-dive-into-the-security-analyst-persona.jpg","https://about.gitlab.com/blog/top-10-technical-articles-of-2022","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Top 10 technical articles of 2022\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Valerie Silverthorne\"}],\n        \"datePublished\": \"2022-12-08\",\n      }",{"title":1901,"description":1902,"authors":1907,"heroImage":1903,"date":1908,"body":1909,"category":726,"tags":1910},[815],"2022-12-08","\nWith 2022 coming to a close, we wanted to ensure everyone gets one more chance to explore our top 10 technical blog posts of the year. Roll up your sleeves and enjoy our most-viewed how-to articles and don’t forget to bookmark them for next year!\n\n## 1. Failed pipeline?\n\nWe have *all* been there, and not much is more frustrating than that red X. Staff Developer Evangelist [Brendan O’Leary](/company/team/#brendan) offers his best advice on troubleshooting the “why?” of a GitLab failed pipeline – it starts with keeping the right perspective. So many factors are involved in code development that it’s critical to ask all of the questions: Is it the code? Is it the test? Is it a vulnerability, etc.?\n\n[How to troubleshoot a GitLab pipeline failure](https://docs.gitlab.com/ci/debugging/)\n\n## 2. Why Git Rebase is your BFF\n\nWith code review increasingly important to successful DevOps, Senior Backend Engineer (Gitaly) [Christian Couder](/company/team/#chriscool) thinks devs might be forgetting a secret weapon in their IDE: Git Rebase. Learn how to rework commits with Git Rebase, including expert tips to try different instructions like ‘reword’, ‘edit’, and ‘squash’.\n\n[Take advantage of Git Rebase](/blog/take-advantage-of-git-rebase/)\n\n## 3. Alert fatigue is real\n\nFollow along with Senior Site Reliability Engineer [Steve Azzopardi](/company/team/#steveazz) as he lays out a GitLab investigation into annoying, time-consuming (and customer-facing) 502 errors in the GitLab Pages logs. To uncover the problem, Azzopardi and team had to unearth some red herrings along the way, but ultimately discovered the importance of PID 1 in a container.\n\n[How we reduced 502 errors by caring about PID 1 in containers](/blog/how-we-removed-all-502-errors-by-caring-about-pid-1-in-kubernetes/)\n\n## 4. More pipelines = less complexity\n\nCI/CD is at the heart of most modern DevOps practices, but that doesn’t mean it’s a “set it and forget it.” Staff Backend Engineer Fabio Pittino acknowledges the complexity challenges of CI/CD and suggests the solution is choosing the right pipelines for the job. Understand the differences between parent-child and multi-project pipelines to streamline your CI/CD efforts.\n\n[Breaking down CI/CD complexity with parent-child and multi-project pipelines](/blog/parent-child-vs-multi-project-pipelines/)\n\n## 5. Hacking and bug bounties\n\nHow did a Swedish web developer go from zero to number seven on our HackerOne Top 10 list in just over a year? Johan Carlsson offers a detailed look at how and why he started looking for bugs in GitLab in his spare time, and how others can jump into hacking, too.\n\n[Want to start hacking? Here’s how to quickly dive in](/blog/cracking-our-bug-bounty-top-10/)\n\n## 6. Gitlab… on an iPad\n\nYes, you can code on an M1-chip-based iPad, and Staff Developer Evangelist Brendan O’Leary walks through all the necessary steps to get GitLab running using GitPod.\n\n[How to code, build, and deploy from an iPad using GitLab and GitPod](/blog/how-to-code-build-and-deploy-from-an-ipad-using-gitlab-and-gitpod/)\n\n## 7. Speed up database changes\n\nMany DevOps teams have mastered speedy application code changes but have struggled to make database updates equally streamlined. In this step-by-step guide, you’ll learn how to apply DevOps principles to database change management.\n\n[How to bring DevOps to the database with GitLab and Liquibase](/blog/how-to-bring-devops-to-the-database-with-gitlab-and-liquibase/)\n\n## 8. A primer on IaC security\n\nInfrastructure as Code (IaC) is an increasingly popular solution for DevOps teams, and with good reason: It’s an efficient and low-resource solution. But, as Senior Developer Evangelist [Michael Friedrich](/company/team/#dnsmichi) explains, it’s also ripe with potential security vulnerabilities. Friedrich takes an exhaustive look at the threats, tools, integrations, and strategies that make IaC a safer choice.\n\n[Fantastic Infrastructure as Code security attacks and how to find them](/blog/fantastic-infrastructure-as-code-security-attacks-and-how-to-find-them/)\n\n## 9. Everything you need to know about GitOps\n\nWant to know how to make GitLab work with GitOps? Senior Product Manager (Configure) [Viktor Nagy](/company/team/#nagyv-gitlab) created an eight-part tutorial covering everything GitLab and GitOps, culminating in how to make a GitLab agent for Kubernetes self-managing.\n\n[The ultimate guide to GitOps with GitLab](/blog/the-ultimate-guide-to-gitops-with-gitlab/)\n\n## 10. The skinny on static site generators\n\nDevs will get the most out of GitLab Pages by choosing the right static site generator (SSG). Developer Evangelist [Fatima Sarah Khalid](/company/team/#sugaroverflow) reviews six options and has also created a toolkit to help make the SSG evaluation process easier.\n\n[How to choose the right static site generator](/blog/comparing-static-site-generators/)\n\n",[819,682,9],{"slug":1912,"featured":6,"template":687},"top-10-technical-articles-of-2022","content:en-us:blog:top-10-technical-articles-of-2022.yml","Top 10 Technical Articles Of 2022","en-us/blog/top-10-technical-articles-of-2022.yml","en-us/blog/top-10-technical-articles-of-2022",{"_path":1918,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1919,"content":1925,"config":1932,"_id":1934,"_type":13,"title":1935,"_source":15,"_file":1936,"_stem":1937,"_extension":18},"/en-us/blog/tracking-down-missing-tcp-keepalives",{"title":1920,"description":1921,"ogTitle":1920,"ogDescription":1921,"noIndex":6,"ogImage":1922,"ogUrl":1923,"ogSiteName":672,"ogType":673,"canonicalUrls":1923,"schema":1924},"Tracking TCP Keepalives: Lessons in Docker, Golang & GitLab","An in-depth recap of debugging a bug in the Docker client library.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749680874/Blog/Hero%20Images/network.jpg","https://about.gitlab.com/blog/tracking-down-missing-tcp-keepalives","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What tracking down missing TCP Keepalives taught me about Docker, Golang, and GitLab\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Stan Hu\"}],\n        \"datePublished\": \"2019-11-15\",\n      }",{"title":1926,"description":1921,"authors":1927,"heroImage":1922,"date":1928,"body":1929,"category":726,"tags":1930},"What tracking down missing TCP Keepalives taught me about Docker, Golang, and GitLab",[1380],"2019-11-15","\n\nThis blog post was originally published on the GitLab Unfiltered blog. It was reviewed and republished on 2019-12-03.\n{: .alert .alert-info .note}\n\nWhat began as failure in a GitLab static analysis check led to a\ndizzying investigation that uncovered a subtle [bug in the Docker client\nlibrary code](https://github.com/docker/for-linux/issues/853) used by\nthe GitLab Runner. We ultimately worked around the problem by upgrading\nthe Go compiler, but in the process we uncovered an unexpected change in\nthe Go TCP keepalive defaults that fixed an issue with Docker and GitLab\nCI.\n\nThis investigation started on October 23, when backend engineer [Luke\nDuncalfe](/company/team/#.luke) mentioned, \"I'm seeing\n[`static-analysis` failures with no output](https://gitlab.com/gitlab-org/gitlab/-/jobs/331174397).\nIs there something wrong with this job?\" He opened [a GitLab\nissue](https://gitlab.com/gitlab-org/gitlab/issues/34951) to discuss.\n\nWhen Luke ran the static analysis check locally on his laptop, he saw\nuseful debugging output when the test failed. For example, an extraneous\nnewline would accurately be reported by Rubocop. However, when the same\ntest ran in GitLab's automated test infrastructure, the test failed\nquietly:\n\n![Failed job](https://about.gitlab.com/images/blogimages/docker-tcp-keepalive-debug/job-failure.png){: .shadow.center}\n\nNotice how the job log did not include any clues after the `bin/rake\nlint:all` step. This made it difficult to determine whether a real\nproblem existed, or whether this was just a flaky test.\n\nIn the ensuing days, numerous team members reported the same problem.\nNothing kills productivity like silent test failures.\n\n## Was something wrong with the test itself?\n\nIn the past, we had seen that if that specific test generated enough\nerrors, [the output buffer would fill up, and the continuous integration\n(CI) job would lock\nindefinitely](https://gitlab.com/gitlab-org/gitlab-foss/issues/61432). We\nthought we had [fixed that issue months\nago](https://gitlab.com/gitlab-org/gitlab-foss/merge_requests/28402). Upon\nfurther review, that fix seemed to eliminate any chance of a thread\ndeadlock.\n\nDid we have to flush the buffer? No, because the Linux kernel will do\nthat for an exiting process already.\n\n## Was there a change in how CI logs were handled?\n\nWhen a test runs in GitLab CI, the [GitLab\nRunner](https://gitlab.com/gitlab-org/gitlab-runner/) launches a Docker\ncontainer that runs commands specified by a `.gitlab-ci.yml` inside the\nproject repository. As the job runs, the runner streams the output to\nthe GitLab API via PATCH requests. The GitLab backend saves this data\ninto a file. The following sequence diagram shows how this works:\n\n```plantuml\n== Get a job! ==\nRunner -> GitLab: POST /api/v4/jobs/request\nGitLab -> Runner: 201 Job was scheduled\n\n== Job sends logs (1 of 2) ==\nRunner -> GitLab: PATCH /api/v4/job/:id/trace\nGitLab -> File: Save to disk\nGitLab -> Runner: 202 Accepted\n\n== Job sends logs (2 of 2) ==\nRunner -> GitLab: PATCH /api/v4/job/:id/trace\nGitLab -> File: Save to disk\nGitLab -> Runner: 202 Accepted\n```\n\n[Henrich Lee Yu](/company/team/#engwan) mentioned\nthat we had recently [disabled a feature flag that changed how GitLab\nhandled CI job\nlogs](https://docs.gitlab.com/ee/administration/job_logs.html#new-incremental-logging-architecture). [The\ntiming seemed to line\nup](https://gitlab.com/gitlab-org/gitlab/issues/34951#note_236723888).\n\nThis feature, called live CI traces, eliminates the need for a shared\nPOSIX filesystem (e.g., NFS) when saving job logs to disk by:\n\n1. Streaming data into memory via Redis\n2. Persisting the data in the database (PostgreSQL)\n3. Archiving the final data into object storage\n\nWhen this flag is enabled, the flow of CI job logs looks something like\nthe following:\n\n```plantuml\n== Get a job! ==\nRunner -> GitLab: POST /api/v4/jobs/request\nGitLab -> Runner: 201 Job was scheduled\n\n== Job sends logs ==\nRunner -> GitLab: PATCH /api/v4/job/:id/trace\nGitLab -> Redis: Save chunk\nGitLab -> Runner: 202 Accepted\n...\n== Copy 128 KB chunks from Redis to database ==\nGitLab -> Redis: GET gitlab:ci:trace:id:chunks:0\nGitLab -> PostgreSQL: INSERT INTO ci_build_trace_chunks\n...\n== Job finishes ==\n\nRunner -> GitLab: PUT /api/v4/job/:id\nGitLab -> Runner: 200 Job was updated\n\n== Archive trace to object storage ==\n```\n\nLooking at the flow diagram above, we see that this approach has more\nsteps. After receiving data from the runner, something could have gone\nwrong with handling a chunk of data. However, we still had many\nquestions:\n\n1. Did the runners send the right data in the first place?\n1. Did GitLab drop a chunk of data somewhere?\n1. Did this new feature actually have anything to do with the problem?\n1. Are they really making another Gremlins movie?\n\n## Reproducing the bug: Simplify the `.gitlab-ci.yml`\n\nTo help answer those questions, we simplified the `.gitlab-ci.yml` to\nrun only the `static-analysis` step. We inserted a known Rubocop error,\nreplacing a `eq` with `eql`. We first ran this test on a separate GitLab\ninstance with a private runner. No luck there – the job showed the right\noutput:\n\n```\nOffenses:\n\nee/spec/models/project_spec.rb:55:42: C: RSpec/BeEql: Prefer be over eql.\n        expect(described_class.count).to eql(2)\n                                         ^^^\n\n12669 files inspected, 1 offense detected\n```\n\nHowever, we repeated the test on our staging server and found that we\nreproduced the original problem. In addition, the live CI trace feature\nflag had been activated on staging. Since the problem occurred with and\nwithout the feature, we could eliminate that feature as a possible\ncause.\n\nPerhaps something with the GitLab server environment caused a\nproblem. For example, could the load balancers be rate-limiting the\nrunners? As an experiment, we pointed a private runner at the staging\nserver and re-ran the test. This time, it succeeded: the output was\nshown. That seemed to suggest that the problem had more to do with the\nrunner than with the server.\n\n## Docker Machine vs. Docker\n\nOne key difference between the two tests: One runner used a shared,\nautoscaled runner using a [Docker\nMachine](https://docs.docker.com/machine/overview/) executor, and the\nprivate runner used a [Docker\nexecutor](https://docs.gitlab.com/runner/executors/docker.html).\n\nWhat does Docker Machine do exactly? The following diagram may help\nillustrate:\n\n![Docker Machine](https://docs.docker.com/machine/img/machine.png){: .medium.center}\n\nThe top-left shows a local Docker instance. When you run Docker from the\ncommand-line interface (e.g., `docker attach my-container`), the program\njust makes [REST calls to the Docker Engine\nAPI](https://docs.docker.com/engine/api/v1.40/).\n\nThe rest of the diagram shows how Docker Machine fits into the\npicture. Docker Machine is an entirely separate program. The GitLab\nRunner shells out to `docker-machine` to create and destroy virtual\nmachines using cloud-specific (e.g. Amazon, Google, etc.) drivers. Once\na machine is running, the runner then uses the Docker Engine API to run,\nwatch, and stop containers.\n\nNote that this API is used securely over an HTTPS connection. This is an\nimportant difference between the Docker Machine executor and Docker\nexecutor: The former needs to communicate across the network, while the\nlatter can either use a local TCP socket or UNIX domain socket.\n\n## Google Cloud Platform timeouts\n\nWe've known for a while that Google Cloud [has a 10-minute idle\ntimeout](https://cloud.google.com/compute/docs/troubleshooting/general-tips),\nwhich has caused issues in the past:\n\n> Note that idle connections are tracked for a maximum of 10 minutes,\n> after which their traffic is subject to firewall rules, including the\n> implied deny ingress rule. If your instance initiates or accepts\n> long-lived connections with an external host, you should adjust TCP\n> keep-alive settings on your Compute Engine instances to less than 600\n> seconds to ensure that connections are refreshed before the timeout\n> occurs.\n\nWas the problem caused by this timeout? With the Docker Machine\nexecutor, we found that we could reproduce the problem with a simple\n`.gitlab-ci.yml`:\n\n```yaml\nimage: \"busybox:latest\"\n\ntest:\n  script:\n    - date\n    - sleep 601\n    - echo \"Hello world!\"\n    - date\n    - exit 1\n```\n\nThis would reproduce the failure, where we would never see the `Hello\nworld!` output. Changing the `sleep 601` to `sleep 599` would make the\nproblem go away. Hurrah! All we have to do is tweak the system TCP\nkeepalives, right? Google provided these sensible settings:\n\n```sh\nsudo /sbin/sysctl -w net.ipv4.tcp_keepalive_time=60 net.ipv4.tcp_keepalive_intvl=60 net.ipv4.tcp_keepalive_probes=5\n```\n\nHowever, enabling these kernel-level settings didn't solve the\nproblem. Were keepalives even being sent? Or was there some other issue?\nWe turned our attention to network traces.\n\n## Eavesdropping on Docker traffic\n\nIn order to understand what was happening, we needed to be able to\nmonitor the network communication between the runner and the Docker\ncontainer. But how exactly does the GitLab Runner stream data from a\nDocker container to the GitLab server?  The following diagram\nillustrates the flow:\n\n```plantuml\nRunner -> Docker: POST /containers/name/attach\nDocker -> Runner: \u003Ccontainer output>\nDocker -> Runner: \u003Ccontainer output>\nRunner -> GitLab: PATCH /api/v4/job/:id/trace\nGitLab -> File: Save to disk\nGitLab -> Runner: 202 Accepted\n```\n\nFirst, the runner makes a [POST request to attach to the container\noutput](https://docs.docker.com/engine/api/v1.40/#operation/ContainerAttach).\nAs soon as a process running in the container outputs some data, Docker\nwill transmit the data over this HTTPS stream. The runner then copies\nthis data to GitLab via the PATCH request.\n\nHowever, as mentioned earlier, traffic between a GitLab Runner and the\nremote Docker machine is encrypted over HTTPS on port 2376. Was there an\neasy way to disable HTTPS? Searching through the code of Docker Machine,\nwe found that it did not appear to be supported out of the box.\n\nSince we couldn't disable HTTPS, we had two ways to eavesdrop:\n\n1. Use a man-in-the-middle proxy (e.g. [mitmproxy](https://mitmproxy.org/))\n1. Record the traffic and decrypt the traffic later using the private keys\n\n## Ok, let's be the man-in-the-middle!\n\nThe first seemed more straightforward, since [we already had experience\ndoing this with the Docker\nclient](https://docs.gitlab.com/ee/administration/packages/container_registry.html#running-the-docker-daemon-with-a-proxy).\n\nHowever, after [defining the proxy variables for GitLab\nRunner](https://docs.gitlab.com/runner/configuration/proxy.html#adding-proxy-variables-to-the-runner-config),\nwe found we were only able to intercept the GitLab API calls with\n`mitmproxy`. The Docker API calls still went directly to the remote\nhost. Something wasn't obeying the proxy configuration, but we didn't\ninvestigate further. We tried the second approach.\n\n## Decrypting TLS data\n\nTo decrypt TLS data, we would need to obtain the encryption keys. Where\nwere these located for a newly-created system with `docker-machine`? It\nturns out `docker-machine` worked in the following way:\n\n1. Call the Google Cloud API to create a new machine\n1. Create a `/root/.docker/machine/machines/:machine_name` directory\n1. Generate a new SSH keypair\n1. Install the SSH key on the server\n1. Generate a new TLS certificate and key\n1. Install and configure Docker on the newly-created machine with TLS certificates\n\nAs long as the machine runs, the directory will contain the information\nneeded to decode this traffic. We ran `tcpdump` and saved the private keys.\n\nOur first attempt at decoding the traffic failed. Wireshark could not\ndecode the encrypted traffic, although general TCP traffic could still\nbe seen. Researching more, we found out why: If the encrypted traffic\nused a [Diffie-Hellman key\nexchange](https://en.wikipedia.org/wiki/Diffie%E2%80%93Hellman_key_exchange),\nhaving the private keys would not suffice! This is by design, a property\ncalled [perfect forward\nsecrecy](https://en.m.wikipedia.org/wiki/Forward_secrecy).\n\nTo get around that limitation, we modified the GitLab Runner to disable\ncipher suites that used the Diffie-Hellman key exchange:\n\n```diff\ndiff --git a/vendor/github.com/docker/go-connections/tlsconfig/config_client_ciphers.go b/vendor/github.com/docker/go-connections/tlsconfig/config_client_ciphers.go\nindex 6b4c6a7c0..a3f86d756 100644\n",[266,9,536,773,775,773,1931,1322,1242],"AWS",{"slug":1933,"featured":6,"template":687},"tracking-down-missing-tcp-keepalives","content:en-us:blog:tracking-down-missing-tcp-keepalives.yml","Tracking Down Missing Tcp Keepalives","en-us/blog/tracking-down-missing-tcp-keepalives.yml","en-us/blog/tracking-down-missing-tcp-keepalives",{"_path":1939,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1940,"content":1945,"config":1951,"_id":1953,"_type":13,"title":1954,"_source":15,"_file":1955,"_stem":1956,"_extension":18},"/en-us/blog/tuto-mac-m1-gitlab-ci",{"title":1941,"description":1942,"ogTitle":1941,"ogDescription":1942,"noIndex":6,"ogImage":953,"ogUrl":1943,"ogSiteName":672,"ogType":673,"canonicalUrls":1943,"schema":1944},"How to use Scaleway to self-host your GitLab Runners","Learn how to set up GitLab CI for your iOS and macOS projects using a hosted Mac mini M1.","https://about.gitlab.com/blog/tuto-mac-m1-gitlab-ci","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to use Scaleway to self-host your GitLab Runners\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Benedikt Rollik\"}],\n        \"datePublished\": \"2021-06-07\",\n      }",{"title":1941,"description":1942,"authors":1946,"heroImage":953,"date":1948,"body":1949,"category":726,"tags":1950},[1947],"Benedikt Rollik","2021-06-07","\nGitLab's complete DevOps platform comes with built-in continuous integration (CI) and continuous delivery (CD) via [GitLab CI/CD](https://docs.gitlab.com/ee/ci/). GitLab CI/CD is a great solution to increase developer productivity and motivation to write higher-quality code without sacrificing speed. It runs a series of tests every time a commit is pushed, providing immediate visibility into the results of changes in the codebase. While it is not a hassle to set up a CI using Linux-based machines, iOS and macOS developers may find it is more complicated to have access to a Mac that is connected and available 24 hours a day.\n\nGitLab Runners, provided on GitLab.com, are the engine that executes CI workflows. Due to various requirements, some users may opt to self-host runners on public cloud VMs. This is super easy if the build VM OS requirement is Linux-based since there are several low-cost public cloud Linux-based VM solutions. However, iOS and macOS developers may find fewer options for public cloud-delivered macOS based systems.\n\nIn this blog post tutorial, you will learn how to set up CI for iOS and macOS application development using a Scaleway Virtual Instance running the [GitLab application](https://www.scaleway.com/en/docs/install-gitlab-with-dbaas/) and a GitLab Runner that runs on a Scaleway-hosted [Mac mini M1](https://www.scaleway.com/en/hello-m1/). To complete this tutorial most successfully, we assume that you have some experience creating Xcode and GitLab projects, as well as some experiences using a Terminal and git.\n\n> **Requirements**\n>\n- You have an account and are logged into [console.scaleway.com](https://console.scaleway.com)\n- You have [configured your SSH Key](https://www.scaleway.com/en/docs/configure-new-ssh-key/)\n- You have a Virtual Instance running the GitLab InstantApp\n- **Note:** We assume you have already deployed a Virtual Instance running the GitLab InstantApp. If not, [deploy GitLab](https://www.scaleway.com/en/docs/install-gitlab-with-dbaas/) before continuing with this tutorial.\n\n### Deploying the Mac mini M1\n\n1. Log into your [Scaleway console](https://console.scaleway.com) and click on **Apple silicon** in the **Compute** section of the sidebar.\n\n   ![Orga_dashboard](https://about.gitlab.com/images/blogimages/scaleway-blog/Orga_dashboard.png){: .shadow.medium}\n   Click on the \"Apple silicon\" in the Scaleway console.\n   {: .note.text-center}\n\n1. The Apple silicon M1 as-a-Service splash screen displays. Click **Create a Mac mini M1**.\n1. Enter the details for your Mac mini M1:\n\n   - Select the geographic region in which your Mac mini M1 will be deployed.\n   - Choose the macOS version you want to run on the Mac mini M1.\n   - Select the hardware configuration for your Mac mini M1.\n   - Enter a name for your Mac mini M1.\n\n1. Click **Create a Mac mini M1** to launch the installation of your Apple silicon M1 as-a-Service.\n\n   ![M1_creation](https://about.gitlab.com/images/blogimages/scaleway-blog/M1_creation.png){: .shadow.medium}\n   Click \"Create a Mac mini M1\" to launch.\n   {: .note.text-center}\n\n1. Once deployed click **VNC** from the Mac mini M1 Overview page to launch the remote desktop client.\n\n1. Launch the **App Store** and install the **Xcode development environment** on your Mac mini M1.\n\n### Setting-up the Homebrew package manager\n\n[Homebrew](https://brew.sh/) is a package manager for macOS. It can be used to manage the software installed on your Mac. We use it to install `gitlab-runner` on your Mac mini M1.\n\n1. Click on the Terminal icon to open a new **Terminal**.\n\n1. Copy-paste the following code in the terminal application and press **Enter** to install Homebrew and the Xcode command line tools:\n\n   ```sh\n   /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\"\n   ```\n\nLeave the terminal window open since it is required for the next step.\n\n#### Installing the GitLab Runner\n\nThe GitLab Runner is an application installed on a different computer than your GitLab host and runs jobs in a pipeline. It executes the build task on your Mac mini M1 for the code you push to your GitLab instance.\n\n1. Make sure you are still in the **Terminal** application. If you closed it after installing Homebrew, open a new one.\n\n1. Run the following command to install `gitlab-runner`:\n\n   ```\n   brew install gitlab-runner\n   ```\n\n### Configuring the Runner in GitLab\n\n   > **Note:** You require a Virtual Instance running the [GitLab InstantApp](https://www.scaleway.com/en/docs/how-to-use-the-gitlab-instant-apps/) for the following steps.\n\n1. GitLab Runner requires a registration token for the link between your GitLab Instance and the Runner. Open the GitLab web interface of your Virtual Instance and log into it.\n\n1. Select the project you want to use in GitLab with the Runner. If you don't have a project yet, click **+** > **Create Project** and fill in the required information about the project.\n\n1. On the projects overview page, click **Settings** > **CI/CD** to view the Continuous Integration settings.\n\n1. On the Continuous Integration settings page, click **Expand** in the **Runner** section to view the required information to link GitLab with your Runner.\n\n1. Scroll down to retrieve the GitLab Instance URL and the registration token.\n\n1. Run the following command in the Terminal application on your Mac to launch the configuration wizard for your GitLab Runner:\n\n   ```\n   gitlab-runner register\n   ```\n\n   Enter the required information as follows:\n\n   ```\n   Runtime platform                                    arch=arm64 os=darwin pid=810 revision=2ebc4dc4 version=13.9.0\n   WARNING: Running in user-mode.\n   WARNING: Use sudo for system-mode:\n   WARNING: $ sudo gitlab-runner...\n\n   Enter the GitLab instance URL (for example, https://gitlab.com/):\n   http://163.172.141.212/   \u003C- Enter the URL of your GitLab instance\n   Enter the registration token:\n   1mWBwzWAZSL7-pR18K3Y  \u003C- Enter the registration token for your Runner\n   Enter a description for the runner:\n   [306a20a2-2e01-4f2e-bc76-a004d35d9962]: Mac mini M1  \u003C- Enter a description for your Runner\n   Enter tags for the runner (comma-separated):\n   Mac, mini, M1, dev, xcode  \u003C- Optionally, enter tags for the runner\n   Registering runner... succeeded                     runner=1mWBwzWA\n   Enter an executor: shell, virtualbox, docker+machine, custom, docker, docker-ssh, kubernetes, parallels, ssh, docker-ssh+machine:\n   shell  \u003C- Enter the \"shell\" executor for the runner\n   Runner registered successfully. Feel free to start it, but if it's running already the config should be automatically reloaded!\n   ```\n\n1. Reload the CI/CD configuration page of your GitLab instance. The runner is now linked to your project and displays as available.\n\n   > **Note:** If you have several projects in a GitLab group, you can configure the Runner at the group-level. Runners available at the group-level are available for all projects within said group.\n\n### Configuring CI for your project\n\nGitLab stores the configuration of the CI in a file called `.gitlab-ci.yml`. This file should be in the folder you created for your project. Typically this is the same directory where your Xcode project file (`ProjectName.xcodeproj`) is located. The GitLab CI configuration file is written in [YAML](https://yaml.org/).\n\nInside the configuration file you can specify information like:\n\n* The scripts you want to run.\n* Other configuration files and templates you want to include.\n* Dependencies and caches.\n* The commands you want to run in sequence and those you want to run in parallel.\n* The location to deploy your application to.\n* Whether you want to run the scripts automatically or trigger any of them manually.\n\n1. Open a text editor on your local computer and create the `.gitlab-ci.yml` file as in the following example.\n\n   ```\n   stages:\n     - build\n     - test\n\n   build-code-job:\n     stage: build\n     script:\n       - echo \"Check the ruby version, then build some Ruby project files:\"\n       - ruby -v\n       - rake\n\n   test-code-job1:\n     stage: test\n     script:\n       - echo \"If the files are built successfully, test some files with one command:\"\n       - rake test1\n   ```\n\n1. Save the file and make a new commit to add it to your repository.\n\n1. Push the commit to GitLab. The CI will automatically launch the tasks on your Runner.\n\nFor more information on the GitLab CI configuration file, refer to the [official documentation](https://docs.gitlab.com/ee/ci/yaml/gitlab_ci_yaml.html).\n\n### Speed up development with Scaleway and GitLab\n\nHaving a dedicated Mac available for executing your CI jobs can reduce your development team's cycle time. In this tutorial, we covered configuring a dedicated Mac mini M1 to host a GitLab Runner. If you want to learn more about the Mac mini M1 as-a-Service, refer to our [product documentation](https://www.scaleway.com/en/docs/apple-silicon-as-a-service-quickstart/).\nWe invite the GitLab community to start building on Scaleway today with a €10 voucher to use on dozens of products & services. Find out more [here.](https://www.scaleway.com/en/gitlab-m1/)\n\n\u003Chr>\n\n_Mac mini, macOS are trademarks of Apple Inc., registered in the U.S. and other countries and regions. IOS is a trademark or registered trademark of Cisco in the U.S. and other countries and is used by Apple under license. Scaleway is not affiliated with Apple Inc._\n",[9,1931,1931],{"slug":1952,"featured":6,"template":687},"tuto-mac-m1-gitlab-ci","content:en-us:blog:tuto-mac-m1-gitlab-ci.yml","Tuto Mac M1 Gitlab Ci","en-us/blog/tuto-mac-m1-gitlab-ci.yml","en-us/blog/tuto-mac-m1-gitlab-ci",{"_path":1958,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1959,"content":1965,"config":1971,"_id":1973,"_type":13,"title":1974,"_source":15,"_file":1975,"_stem":1976,"_extension":18},"/en-us/blog/tutorial-automated-release-and-release-notes-with-gitlab",{"title":1960,"description":1961,"ogTitle":1960,"ogDescription":1961,"noIndex":6,"ogImage":1962,"ogUrl":1963,"ogSiteName":672,"ogType":673,"canonicalUrls":1963,"schema":1964},"Tutorial: Automate releases and release notes with GitLab","With the GitLab Changelog API, you can automate the generation of release artifacts, release notes, and a comprehensive changelog detailing all user-centric software modifications.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749659978/Blog/Hero%20Images/automation.png","https://about.gitlab.com/blog/tutorial-automated-release-and-release-notes-with-gitlab","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Tutorial: Automate releases and release notes with GitLab\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Ben Ridley\"}],\n        \"datePublished\": \"2023-11-01\",\n      }",{"title":1960,"description":1961,"authors":1966,"heroImage":1962,"date":1968,"body":1969,"category":879,"tags":1970,"updatedDate":1361},[1967],"Ben Ridley","2023-11-01","***2025 update** - The Changelog API has continued to evolve and now has some great new capabilities we don’t cover in this blog, such as the ability to provide custom changelogs with templated values from your commit history. [Discover more in the official Changelogs docs.](https://docs.gitlab.com/user/project/changelogs/)*\n\nWhen you develop software that users rely on, effective communication about changes with each release is essential. By keeping users informed about new features and any modifications or removals, you ensure they maximize the software's benefits and avoid encountering unpleasant surprises during upgrades.\n\nHistorically, creating release notes and maintaining a changelog has been a laborious task, requiring developers to monitor changes externally or release managers to sift through merge histories. With the GitLab Changelog API, you can use the rich history provided in our git repository to easily create release notes and maintain a changelog.\n\nIn this tutorial, we'll delve into automating releases with GitLab, covering the generation of release artifacts, release notes, and a comprehensive changelog detailing all user-centric software modifications.\n\n## Releases in GitLab\nFirst, let's explore how releases work in GitLab.\n\nIn GitLab, a release is a specific version of your code, identified by a git tag, that includes details about changes since the last release (and release notes) and any related artifacts built from that version of the code, such as Docker images, installation packages, and documentation.\n\nYou can create and track releases in GitLab using the UI by calling our Release API or by defining a special `release` job inside a CI pipeline. In this tutorial, we'll use the `release` job in a CI/CD pipeline, which allows us to extend the automation we're using in our pipelines for testing, code scanning, etc. to also perform automated releases.\n\nTo automate our releases, we first need to answer this question: Where are we going to get the information on changes made for our release notes and our changelog? The answer: Our git repository, which provides us with a rich history of development activity through commit messages and merge commit history. Let's see if we can leverage this rich history to automatically create our notes and changelogs.\n\n## Introducing commit trailers\n[Commit trailers](https://git-scm.com/docs/git-interpret-trailers) are structured entries in your git commits, created by adding simple `\u003CHEADER>:\u003CBODY>` format messages to the end of your commit. The `git` CLI tool can then parse and extract these for use in other systems. An example you might have already used is `git commit --sign-off` to sign off on a commit. This is implemented by adding a `Signed-off-by: \u003CYour Name>` trailer to the commit. We can add any arbitrary structured data here, which makes it a great place to store information that could be useful for our changelog.\n\nIn fact, if we use a `Changelog: \u003Cadded/changed/removed>` trailer in our commits, the GitLab Changelog API will parse these and use them to create a changelog for us automatically!\n\nLet's see this in action by making some changes to a real codebase and performing a release, and generating release notes and changelog entries.\n\n## Our example project\nFor the purposes of this blog, I'm using a simple Python web app repository. Let's pretend Version 1.0.0 of the application was just released and is the current version of the code. I've also created a 1.0.0 release in GitLab, which I did manually because we haven't created our automated release pipeline yet:\n\n![A screenshot of the GitLab UI showing a release for Version 1.0.0](https://about.gitlab.com/images/blogimages/2023-08-22-automated-release-and-release-notes-with-gitlab/1-0-release.png)\n\n## Making our changes\nWe're in rapid development mode, so we're going to be working on releasing Version 2.0.0 of our application today. As part of our 2.0.0 release, we're going to be adding a new feature to our app: A chatbot! And we're also going to be removing the quantum blockchain feature, because we only needed that for our first venture capital funding round. Also, we're going to be adding an automated release job to our CI/CD pipeline for our 2.0.0 release.\n\nFirst, let's remove unneeded features. I've created a merge request that contains the necessary removals. Importantly, we need to ensure we have a commit message that includes the `Changelog: removed` trailer. There's a few ways to do this, such as including it directly in a commit, or performing an interactive rebase and adding it using the CLI. But I think the easiest way in our situation is to leave it until the end and then use the `Edit commit message` button in GitLab to add the trailer to the merge commit like so:\n\n![A screenshot the GitLab UI showing a merge request removing unused features](https://about.gitlab.com/images/blogimages/2023-08-22-automated-release-and-release-notes-with-gitlab/remove-unused-features-mr.png)\n\nIf you use this method, you can also change the merge commit title to something more succinct. I've changed the title of my merge commit to 'Remove Unused Features', as this is what will appear in the changelog entry.\n\nNext, let's add some new functionality for the 2.0.0 release. Again, all we need to do is open another merge request that includes our new features and then edit the merge commit to include the `Changelog: added` trailer and edit the commit title to be more succinct:\n\n![A screenshot of the GitLab UI showing a merge request to add new functionality](https://about.gitlab.com/images/blogimages/2023-08-22-automated-release-and-release-notes-with-gitlab/add-chatbot-mr.png)\n\nNow we're pretty much ready to release 2.0.0. But we don't want to create our release manually this time. So before our release we're going to add some jobs to our `.gitlab-ci.yml` file that will perform the release for us automatically, and generate the respective release notes and changelog entries, when we tag our code with a new version like `2.0.0`.\n\n**Note:** If you want to enforce changelog trailers, consider using something like [Danger to perform automated checks for MR conventions](https://docs.gitlab.com/ee/development/dangerbot.html).\n\n## Building an automated release pipeline\nFor our pipeline to work, we need to create a project access token that will allow us to call GitLab's API to generate changelog entries. [Create a project access token with the API scope](https://docs.gitlab.com/ee/user/project/settings/project_access_tokens.html#create-a-project-access-token), and then [store the token as a CI/CD variable](https://docs.gitlab.com/ee/ci/variables/#define-a-cicd-variable-in-the-ui) called `CI_API_TOKEN`. We'll reference this variable to authenticate to the API.\n\nNext, we're going to add two new jobs to our `gitlab-ci.yml` file:\n```yaml\nprepare_job:\n  stage: prepare\n  image: alpine:latest\n  rules:\n  - if: '$CI_COMMIT_TAG =~ /^v?\\d+\\.\\d+\\.\\d+$/'\n  script:\n    - apk add curl jq\n    - 'curl -H \"PRIVATE-TOKEN: $CI_API_TOKEN\" \"$CI_API_V4_URL/projects/$CI_PROJECT_ID/repository/changelog?version=$CI_COMMIT_TAG\" | jq -r .notes > release_notes.md'\n  artifacts:\n    paths:\n    - release_notes.md\n\nrelease_job:\n  stage: release\n  image: registry.gitlab.com/gitlab-org/release-cli:latest\n  needs:\n    - job: prepare_job\n      artifacts: true\n  rules:\n  - if: '$CI_COMMIT_TAG =~ /^v?\\d+\\.\\d+\\.\\d+$/'\n  script:\n    - echo \"Creating release\"\n  release:\n    name: 'Release $CI_COMMIT_TAG'\n    description: release_notes.md\n    tag_name: '$CI_COMMIT_TAG'\n    ref: '$CI_COMMIT_SHA'\n    assets:\n      links:\n        - name: 'Container Image $CI_COMMIT_TAG'\n          url: \"https://$CI_REGISTRY_IMAGE/$CI_COMMIT_REF_SLUG:$CI_COMMIT_SHA\"\n```\n\nIn the above configuration, the `prepare_job` uses `curl` and `jq` to call the GitLab Changelog API endpoint and then passes this to our `release_job` to actually create the release. To break it down further:\n- We use the project access token created earlier to call the GitLab Changelog API, which performs the generation of the release notes and we store this as an artifact.\n- We're using the `$CI_COMMIT_TAG` variable as the version. For this to work, we need to be using semantic versioning for our tags (something like `2.0.0` for example), so you'll notice I've also restricted the release job using a `rules` section that checks for a semantic version tag.\n\t- Semantic versioning is required for the GitLab Changelog API to work. It uses this format to find the most recent release to compare to our current release.\n- We use the official `release-cli` image from GitLab. The release-cli is required to use the `release` keyword in a job.\n- We use the `release` keyword to create a release in GitLab. This is a special job keyword reserved for creating a release and populating the required fields.\n- We can pass a file as an argument to the `description` of the release. In our case, it's the file we generated in the `prepare_job`, which was passed to this job as an artifact.\n- We've also included our container image that is being built earlier in the pipeline as a release asset. You can attach any assets you'd like from your build process, such as binaries or documentation by providing a URL to wherever you've uploaded them earlier in the pipeline.\n\n## Performing an automated release\nWith this setup, all we need to do to perform a release is push a tag to our repository that follows our versioning scheme. You can simply push a tag using the CLI, this example uses GitLab's UI to create a tag on the main branch. Create a tag by selecting Code -> Tags -> New Tag on the sidebar:\n![A screenshot of the GitLab UI illustrating how to create a tag](https://about.gitlab.com/images/blogimages/2023-08-22-automated-release-and-release-notes-with-gitlab/create-2-tag.png)\n\nOn creation, our pipelines will start to execute. The GitLab Changelog API will automatically generate release notes for us as markdown, which contains all the changes between this release and the previous release. Here's the resulting markdown generated in our example:\n\n```md\n## 2.0.0 (2023-08-25)\n\n### added (1 change)\n\n- [Add ChatBot](gl-demo-ultimate-bridley/super-devsecops-incorporated/simply-notes-release-demo@0c3601a45af617c5481322bfce4d71db1f911b02) ([merge request](gl-demo-ultimate-bridley/super-devsecops-incorporated/simply-notes-release-demo!4))\n\n### removed (1 change)\n\n- [Remove Unused Features](gl-demo-ultimate-bridley/super-devsecops-incorporated/simply-notes-release-demo@463d453c5ae0f4fc611ea969e5442e3298bf0d8a) ([merge request](gl-demo-ultimate-bridley/super-devsecops-incorporated/simply-notes-release-demo!3))\n```\n\nAs you can see, GitLab has extracted the entries for our release notes automatically using our git commit trailers. In addition, it's helpfully provided links back to the merge request so readers can see more details and discussion around the changes.\n\nAnd now, our final release:\n![The GitLab release UI showing a release for version 2.0.0](https://about.gitlab.com/images/blogimages/2023-08-22-automated-release-and-release-notes-with-gitlab/2-0-release.png)\n\n## Creating the changelog\nNext, we want to update our changelog (which is basically a collated history of all your release notes). You can use a `POST` request to the changelog API endpoint we used earlier to do this.\n\nYou can do this as part of your release pipeline if you like, for example by adding this to the `script` section of your prepare job:\n```sh\n'curl -H \"PRIVATE-TOKEN: $CI_API_TOKEN\" -X POST \"$CI_API_V4_URL/projects/$CI_PROJECT_ID/repository/changelog?version=$CI_COMMIT_TAG\"\n```\n\n**Note that this will actually modify the repository.** It will create a commit to add the latest notes to a `CHANGELOG.md` file:\n![A screenshot of the repository which shows a commit updating the changelog file](https://about.gitlab.com/images/blogimages/2023-08-22-automated-release-and-release-notes-with-gitlab/changelog-api-commit.png)\n\nAnd we are done! By utilizing the rich history provided by `git` with some handy commit trailers, we can leverage GitLab's powerful API and CI/CD pipelines to automate our release process and generate release notes for us.\n\n> If you’d like to explore the project we used for this article, [you can find the project at this link](https://gitlab.com/gitlab-learn-labs/sample-projects/release-automation-demo).\n",[682,773,108,819,776,9],{"slug":1972,"featured":6,"template":687},"tutorial-automated-release-and-release-notes-with-gitlab","content:en-us:blog:tutorial-automated-release-and-release-notes-with-gitlab.yml","Tutorial Automated Release And Release Notes With Gitlab","en-us/blog/tutorial-automated-release-and-release-notes-with-gitlab.yml","en-us/blog/tutorial-automated-release-and-release-notes-with-gitlab",{"_path":1978,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1979,"content":1985,"config":1991,"_id":1993,"_type":13,"title":1994,"_source":15,"_file":1995,"_stem":1996,"_extension":18},"/en-us/blog/tyranny-of-the-clock",{"title":1980,"description":1981,"ogTitle":1980,"ogDescription":1981,"noIndex":6,"ogImage":1982,"ogUrl":1983,"ogSiteName":672,"ogType":673,"canonicalUrls":1983,"schema":1984},"6 Lessons we learned when debugging a scaling problem on GitLab.com","Get a closer look at how we investigated errors originating from scheduled jobs, and how we stumbled upon \"the tyranny of the clock.\"","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749667913/Blog/Hero%20Images/clocks.jpg","https://about.gitlab.com/blog/tyranny-of-the-clock","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"6 Lessons we learned when debugging a scaling problem on GitLab.com\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Craig Miskell\"}],\n        \"datePublished\": \"2019-08-27\",\n      }",{"title":1980,"description":1981,"authors":1986,"heroImage":1982,"date":1988,"body":1989,"category":726,"tags":1990},[1987],"Craig Miskell","2019-08-27","\nHere is a story of a scaling problem on GitLab.com: How we found it, wrestled with it, and ultimately resolved it. And how we discovered the tyranny of the clock.\n\n## The problem\n\nWe started receiving reports from customers that they were intermittently seeing errors on Git pulls from GitLab.com, typically from CI jobs or similar automated systems. The reported error message was usually:\n```\nssh_exchange_identification: connection closed by remote host\nfatal: Could not read from remote repository\n```\nTo make things more difficult, the error message was intermittent and apparently unpredictable. We weren't able to reproduce it on demand, nor identify any clear indication of what was happening in graphs or logs. The error message wasn't particularly helpful either; the SSH client was being told the connection had gone away, but that could be due to anything: a flaky client or VM, a firewall we don't control, an ISP doing something strange, or an application problem at our end. We deal with a *lot* of connections to Git-over-SSH, in the order of ~26 million a day, or 300/s average, so trying to pick out a small number of failing ones out of that firehose of data was going to be difficult. It's a good thing we like a challenge.\n\n## The first clue\n\nWe got in touch with one of our customers (thanks Hubert Hölzl from Atalanda) who was seeing the problem several times a day, which gave us a foothold. Hubert was able to supply the relevant public IP address, which meant we could run some packet captures on our frontend HAproxy nodes, to attempt to isolate the problem from a smaller data set than 'All of the SSH traffic.' Even better, they were using the [alternate-ssh port](/blog/gitlab-dot-com-now-supports-an-alternate-git-plus-ssh-port/) which means we only had two HAProxy servers to look at, not 16.\n\nTrawling through these packet traces was still not fun; despite the constraints, there was ~500MB of packet capture from about 6.5 hours. We found the short-running connections, in which the TCP connection was established, the client sent a version string identifier, and then our HAProxy immediately tore down the connection with a proper TCP FIN sequence. This was the first great clue. It told us that it was definitely the GitLab.com end that was closing the connection, not something in between the client and us, meaning this was a problem we could debug.\n\n### Lesson #1: In Wireshark, the Statistics menu has a wealth of useful tools that I'd never really noticed until this endeavor.\n\nIn particular, 'Conversations' shows you a basic breakdown of time, packets, and bytes for each TCP connection in the capture, which you can sort. I *should* have used this at the start, instead of trawling through the captures manually. In hindsight, connections with small packet counts was what I was looking for, and the Conversations view shows this easily. I was then able to use this feature to find other instances, and verify that the first instance I found was not just an unusual outlier.\n\n## Diving into logs\n\nSo what was causing HAProxy to tear down the connection to the client? It certainly seemed unlikely that it was doing it arbitrarily, and there must be a deeper reason; another layer of [turtles](https://en.wikipedia.org/wiki/Turtles_all_the_way_down), if you will. The HAProxy logs seemed like the next place to check. Ours are stored/available in GCP BigQuery, which is handy because there's a lot of them, and we needed to slice 'n dice them in lots of different ways. But first, we were able to identify the log entry for one of the incidents from the packet capture, based on time and TCP ports, which was a major breakthrough. The most interesting detail in that entry was the `t_state` (Termination State) attribute, which was `SD`. From the HAProxy documentation:\n```\n    S: aborted by the server, or the server explicitly refused it\n    D: the session was in the DATA phase.\n```\n`D` is pretty clear; the TCP connection had been properly established, and data was being sent, which matched the packet capture evidence. The `S` means HAProxy received an RST, or an ICMP failure message from the backend. There was no immediate clue as to which case was occurring or possible causes. It could be anything from a networking issue (e.g. glitch or congestion) to an application-level problem. Using BigQuery to aggregate by the Git backends, it was clear it wasn't specific to any VM. We needed more information.\n\nSide note: It turned out that logs with `SD` weren't unique to the problem we were seeing. On the alternate-ssh port we get a lot of scanning for HTTPS, which leads to `SD` being logged when the SSH server sees a TLS ClientHello message while expecting an SSH greeting. This created a brief detour in our investigation.\n\nOn capturing some traffic between HAProxy and the Git server and using the Wireshark statistics tools again, it was quickly obvious that SSHD on the Git server was tearing down the connection with a TCP FIN-ACK immediately after the TCP three-way handshake; HAProxy still hadn't sent the first data packet but was about to, and when it did very shortly after, the Git server responded with a TCP RST. And thus we had the reason for HAProxy to log a connection failure with `SD`. SSH was closing the connection, apparently deliberately and cleanly, with the RST being just an artifact of the SSH server receiving a packet after the FIN-ACK, and doesn't mean anything else here.\n\n## An illuminating graph\n\nWhile watching and analyzing the `SD` logs in BigQuery, it became apparent that there was quite a bit of clustering going on in the time dimension, with spikes in the first 10 seconds after the top of each minute, peaking at about 5-6 seconds past:\n\n![Connection errors grouped by second](https://gitlab.com/gitlab-com/gl-infra/infrastructure/uploads/72cd1b763c51781fa4224495f059afb5/image.png){: .shadow.medium.center}\nConnection errors, grouped by second-of-the-minute\n{: .note.text-center}\n\nThis graph is created from data collated over a number of hours, so the fact that the pattern is so substantial suggests the cause is consistent across minutes and hours, and possibly even worse at specific times of the day. Even more interesting, the average spike is 3x the base load, which means we have a fun scaling problem and simply provisioning 'more resource' in terms of VMs to meet the peak loads would potentially be prohibitively expensive. This also suggested that we were hitting some hard limit, and was our first clue to an underlying systemic problem, which I have called \"the tyranny of the clock.\"\n\nCron, or similar scheduling systems, often don't have sub-minute accuracy, and if they do, it isn't used very often because humans prefer to think about things in round numbers. Consequently, jobs will run at the start of the minute or hour or at other nice round numbers. If they take a couple of seconds to do any preparations before they do a `git fetch` from GitLab.com, this would explain the connection pattern with increases a few seconds into the minute, and thus the increase in errors around those times.\n\n### Lesson #2: Apparently a lot of people have time synchronization (via NTP or otherwise) set up properly.\n\nIf they hadn't, this problem wouldn't have emerged so clearly. Yay for NTP!\n\nSo what could be causing SSH to drop the connection?\n\n## Getting close\n\nLooking through the documentation for SSHD, we found MaxStartups, which controls the maximum number of connections that can be in the pre-authenticated state. At the top of the minute, under the stampeding herd of scheduled jobs from around the internet, it seems plausible that we were exceeding the connections limit. MaxStartups actually has three numbers: the low watermark (the number at which it starts dropping connections), a percentage of connections to (randomly) drop for any connections above the low watermark, and an absolute maximum above which all new connections are dropped. The default is 10:30:100, and our setting at this time was 100:30:200, so clearly we had increased the connections in the past. Perhaps it was time to increase it again.\n\nSomewhat annoyingly, the version of openssh on our servers is 7.2, and the only way to see that MaxStartups is being breached in that version is to turn on Debug level logging. This is an absolute firehose of data, so we carefully turned it on for a short period on only one server. Thankfully within a couple of minutes it was obvious that MaxStartups was being breached, and connections were being dropped early as a result,.\n\nIt turns out that OpenSSH 7.6 (the version that comes with Ubuntu 18.04) has better logging about MaxStartups; it only requires Verbose logging to get it. While not ideal, it's better than Debug level.\n\n### Lesson #3: It is polite to log interesting information at default levels and deliberately dropping a connection for any reason is definitely interesting to system administrators.\n\nSo now that we have a cause for the problem, how can we address it? We can bump MaxStartups, but what will that cost? Definitely a small bit of memory, but would it cause any untoward downstream effects? We could only speculate, so we had to just try it. We bumped the value to 150:30:300 (a 50% increase). This had a great positive effect, and no visible negative effect (such as increased CPU load):\n\n![Before and after graph](https://gitlab.com/gitlab-com/gl-infra/production/uploads/047a4859caafc6681c9d034c202418b9/image.png){: .shadow.medium.center}\n\nBefore and after bumping MaxStartups by 50%\n{: .note.text-center}\n\nNote the substantial reduction after 01:15. We've clearly eliminated a large proportion of the errors, although a non-trivial amount remained. Interestingly, these are clustered around round numbers: the top of the hour, every 30 minutes, 15 minutes, and 10 minutes. Clearly the tyranny of the clock continues. The top of the hour saw the biggest peaks, which seems reasonable in hindsight; a lot of people will simply schedule their jobs to run every hour at 0 minutes past the hour. This finding was more evidence that confirms our theory that it was scheduled jobs causing the spikes, and that we were on the right path with this error being due to a numerical limit.\n\nDelightfully, there were no obvious negative effects. CPU usage on the SSH servers stayed about the same and didn't cause any noticeable increase in load. Even though we were unleashing more connections that would previously have been dropped, and doing so at the busiest times. This was promising.\n\n## Rate limiting\n\nAt this point we weren't keen on simply bumping MaxStartups higher; while our 50% increase to-date had worked, it felt pretty crude to keep on pushing this arbitrarily higher. Surely there was something else we could do.\n\nMy search took me to the HAProxy layer that we have in front of the SSH servers. HAProxy has a nice 'rate-limit sessions' option for its frontend listeners. When configured, it constrains the new TCP connections per-second that the frontend will pass through to backends, and leaves additional incoming connections on the TCP socket. If the incoming rate exceeds the limit (measured every millisecond) the new connections are simply delayed. The TCP client (SSH in this case) simply sees a delay before the TCP connection is established, which is delightfully graceful, in my opinion. As long as the overall rate never spiked too high above the limit for too long, we'd be fine.\n\nThe next question was what number we should use. This is complicated by the fact that we have 27 SSH backends, and 18 HAproxy frontends (16 main, two alt-ssh), and the frontends don't coordinate amongst themselves for this rate limiting. We also had to take into account how long it takes a new SSH session to make it past authentication: Assuming MaxStartups of 150, if the auth phase took two seconds we could only send 75 new sessions per second to the each backend. The [note on the issue](https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/7168#note_191678023) has the derivation of the math, and I won't recount it in detail here, except to note that there are four quantities needed to calculate the rate-limit: the counts of both server types, the value of MaxStartups, and `T`, which is how long the SSH session takes to auth. `T` is critical, but we could only estimate it. You might speculate how well I did at this estimate, but that would spoil the story. I went with two seconds for now, and came to a rate limit per frontend of approximately 112.5, and rounded down to 110.\n\nWe deployed. Everything was happy, yes? Errors tended to zero, and children danced happily in the streets? Well, not so much. This change had no visible effect on the error rates. I will be honest here, and say I was rather distressed. We had missed something important, or misunderstood the problem space entirely.\n\nSo we went back to logs (and eventually the HAProxy metrics), and were able to verify that the rate limiting was at least working to limit to the number we specified, and that historically this number had been higher, so we were successfully constraining the rate at which connections were being dispatched. But clearly the rate was still too high, and not only that, it wasn't even *close* enough to the right number to have a measurable impact. Looking at the selection of backends (as logged by HAproxy) showed an oddity: At the top of the hour, the backend connections were not evenly distributed across all the SSH servers. In the sample time chosen, it varied from 30 to 121 in a given second, meaning our load balancing wasn't very balanced. Reviewing the configuration showed we were using `balance source`, so that a given client IP address would always connect to the same backend. This might be good if you needed session stickiness, but this is SSH and we have no such need. It was deliberately chosen some time ago, but there was no record as to why. We couldn't come up with a good reason to keep it, so we tried changing to leastconn, which distributes new incoming connections to the backend with the least number of current connections. This was the result, of the CPU usage on our SSH (Git) fleet:\n\n![Leastconn before and after](https://gitlab.com/gitlab-com/gl-infra/infrastructure/uploads/b006877c1e45ad0255a316a96750402c/before-after-leastconn-change.png){: .shadow.medium.center}\n\nBefore and after turning on leastconn\n{: .note.text-center}\n\nClearly leastconn was a good idea. The two low-usage lines are our [Canary](/handbook/engineering/infrastructure/library/canary/) servers and can be ignored, but the spread on the others before the change was 2:1 (30% to 60%), so clearly some of our backends were much busier than others due to the source IP hashing. This was surprising to me; it seemed reasonable to expect the range of client IPs to be sufficient to spread the load much more evenly, but apparently a few large outliers were enough to skew the usage significantly.\n\n### Lesson #4: When you choose specific non-default settings, leave a comment or link to documentation/issues as to why, future people will thank you.\n\n This transparency is [one of GitLab's core values](https://handbook.gitlab.com/handbook/values/#say-why-not-just-what).\n\nTurning on leastconn also helped reduce the error rates, so it is something we wanted to continue with. In the spirit of experimenting, we dropped the rate limit lower to 100, which further reduced the error rate, suggesting that perhaps the initial estimate for `T` was wrong. But if so, it was too small, leading to the rate limit being too high, and even 100/s felt pretty low and we weren't keen to drop it further. Unfortunately for some operational reasons these two changes were just an experiment, and we had to roll back to `balance source` and rate limit of 100.\n\nWith the rate limit as low as we were comfortable with, and leastconn insufficient, we tried increasing MaxStartups: first to 200 with some effect, then to 250. Lo, the errors all but disappeared, and nothing bad happened.\n\n### Lesson #5: As scary as it looks, MaxStartups appears to have very little performance impact even if it's raised much higher than the default.\n\nThis is probably a large and powerful lever we can pull in future, if necessary. It's possible we might notice problems if it gets into the thousands or tens of thousands, but we're a long way from that.\n\nWhat does this say about my estimate for `T`, the time to establish and authenticate an SSH session? Reverse engineering the equation, knowing that 200 wasn't quite enough for MaxStartups, and 250 is enough, we could calculate that `T` is probably between 2.7 and 3.4 seconds. So the estimate of two seconds wasn't far off, but the actual value was definitely higher than expected. We'll come back to this a bit later.\n\n## Final steps\n\nLooking at the logs again in hindsight, and after some contemplation, we discovered that we could identify this specific failure with t_state being `SD` and b_read (bytes read by client) of 0. As noted above, we handle approximately 26-28 million SSH connections per day. It was unpleasant to discover that at the worst of the problem, roughly 1.5% of those connections were being dropped badly. Clearly the problem was bigger than we had realised at the start. There was nothing about this that we couldn't have identified earlier (right back when we discovered that t_state=\"SD\" was indicative of the issue), but we didn't think to do so, and we should have. It might have increased how much effort we put in.\n\n### Lesson #6: Measure the actual rate of your errors as early as possible.\n\nWe might have put a higher priority on this earlier had we realized the extent of the problem, although it was still dependent on knowing the identifying characteristic.\n\nOn the plus side, after our bumps to MaxStartups and rate limiting, the error rate was down to 0.001%, or a few thousand per day. This was better, but still higher than we liked. After we unblocked some other operational matters, we were able to formally deploy the leastconn change, and the errors were eliminated entirely. We could breathe easy again.\n\n## Further work\n\nClearly the SSH authentication phase is still taking quite a while, perhaps up to 3.4 seconds. GitLab can use [AuthorizedKeysCommand](https://docs.gitlab.com/ee/administration/operations/fast_ssh_key_lookup.html) to look up the SSH key directly in the database. This is critical for speedy operations when you have a large number of users, otherwise SSHD has to sequentially read a very large `authorized_keys` file to look up the public key of the user, and this doesn't scale well. We implement the lookup with a little bit of ruby that calls an internal HTTP API. [Stan Hu](/company/team/#stanhu), engineering fellow and our resident source of GitLab knowledge, identified that the unicorn instances on the Git/SSH servers were experiencing substantial queuing. This could be a significant contributor to the ~3-second pre-authentication stage, and therefore something we need to look at further, so investigations continue. We may increase the number of unicorn (or puma) workers on these nodes, so there's always a worker available for SSH. However, that isn't without risk, so we will need to be careful and measure well. Work continues, but slower now that the core user problem has been mitigated. We may eventually be able to reduce MaxStartups, although given the lack of negative impact it seems to have, there's little need. It would make everyone more comfortable if OpenSSH let us see the how close we were to hitting MaxStartups at any point, rather than having to go in blind and only find out we were close when the limit is breached and connections are dropped.\n\nWe also need to alert when we see HAProxy logs that indicate the problem is occurring, because in practice there's no reason it should ever happen. If it does, we need to increase MaxStartups further, or if resources are constrained, add more Git/SSH nodes.\n\n## Conclusion\n\nComplex systems have complex interactions, and there is often more than one lever that can be used to control various bottlenecks. It's good to know what tools are available because they often have trade-offs. Assumptions and estimates can also be risky. In hindsight, I would have attempted to get a much better measurement of how long authentication takes, so that my `T` estimate was better.\n\nBut the biggest lesson is that when large numbers of people schedule jobs at round numbers on the clock, it leads to really interesting scaling problems for centralized service providers like GitLab. If you're one of them, you might like to consider putting in a random sleep of maybe 30 seconds at the start, or pick a random time during the hour *and* put in the random sleep, just to be polite and fight the tyranny of the clock.\n\nCover image by [Jon Tyson](https://unsplash.com/@jontyson) on [Unsplash](https://unsplash.com)\n{: .note}\n",[9,684,941],{"slug":1992,"featured":6,"template":687},"tyranny-of-the-clock","content:en-us:blog:tyranny-of-the-clock.yml","Tyranny Of The Clock","en-us/blog/tyranny-of-the-clock.yml","en-us/blog/tyranny-of-the-clock",{"_path":1998,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":1999,"content":2004,"config":2009,"_id":2011,"_type":13,"title":2012,"_source":15,"_file":2013,"_stem":2014,"_extension":18},"/en-us/blog/ultimate-git-guide",{"title":2000,"description":2001,"ogTitle":2000,"ogDescription":2001,"noIndex":6,"ogImage":740,"ogUrl":2002,"ogSiteName":672,"ogType":673,"canonicalUrls":2002,"schema":2003},"Our ultimate guide to Git","Open source pioneer Git is 15 years old. Here is our guide to making the most of it.","https://about.gitlab.com/blog/ultimate-git-guide","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"Our ultimate guide to Git\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Valerie Silverthorne\"}],\n        \"datePublished\": \"2020-04-20\",\n      }",{"title":2000,"description":2001,"authors":2005,"heroImage":740,"date":2006,"body":2007,"category":680,"tags":2008},[815],"2020-04-20","\n\n_Git, a [source code management](/solutions/source-code-management/) tool and arguably the most famous open source software project, turned 15 in April 2020. That’s a milestone no matter how you look at it, and not surprisingly our team has a lot to say about Git. From a look back at the past to newbie-friendly explanations, we’ve pulled together the ultimate guide to Git (as told by GitLab)._\n\n## Meet Git\n\nIf you’re just getting started with software development, you’ll have questions. Luckily, we have answers including background on developer Linus Torvalds in [\"A beginner’s guide to Git\"](/blog/beginner-git-guide/).\n\n![Linus Torvalds](https://about.gitlab.com/images/blogimages/linustorvalds.png){: .shadow.small.center}\n\nThe godfather of Git, Linus Torvalds.\n{: .note.text-center}\n\n## Get more out of Git\n\nWe all spend a ton of time working with Git so it makes sense to polish up your workflow so it shines. We’ve [got the lowdown](/blog/15-git-tips-improve-workflow/) on Git blame, .gitignore, how to pull frequently, and more.\n\n## Missed Git Merge?\n\nNot everyone was lucky enough to attend the actual, in-person Git birthday party. Here’s our [first-person account](/blog/git-merge-fifteen-year-git-party/) of the festivities, complete with lots of pictures.\n\n![birthday balloons](https://about.gitlab.com/images/blogimages/balloons.jpg){: .shadow.small.center}\n\n## Why Git flow doesn’t always go with the flow\n\nYou can have too much of a good thing, and if you doubt that, perhaps it’s because you haven’t yet encountered Git flow. Although designed to streamline development it ends up creating extra effort – too many branches and too much task switching. Never fear, though, [we have a solution](/blog/what-is-gitlab-flow/).\n\n## Git goes (really) big\n\nWhen Git was invented 15 years ago, video streaming (and gaming) weren’t even on the horizon. Git can handle those huge files but there’s one hiccup: You can’t just download the one you need, Git insists you download all of them. Enter Git Partial Clone which speeds up the process so you can just grab the file you need. [Here’s how it works](/blog/partial-clone-for-massive-repositories/).\n\n## GitLab and GitHub on Git\n\nOur senior developer evangelist [Brendan O’Leary](/company/team/#brendan) did a bit of a point counter-point about Git and its past and future with GitHub’s distinguished software engineer [Jeff King](https://www.linkedin.com/in/pefflinkedin/) on [infoq.com](https://www.infoq.com/news/2020/04/git-fifteen-anniversary-qa/).\n\n## Never say never\n\nBrendan also admitted that 15 years ago, he was never ever going to use Git. Ahem. Feel free to enjoy [his mea culpa](https://www.computerweekly.com/blog/Open-Source-Insider/GitLab-guru-15-years-later-were-still-learning).\n\n## Dive into GitOps\n\nYou’ve heard the term, now is the time to understand what [GitOps](/solutions/gitops/) means and how it can work – well – in real world applications. Here’s what you need to know about [continuous delivery to production](/blog/why-gitops-should-be-workflow-of-choice/).\n\nImage by [Adi Gold](https://unsplash.com/@adigold1) on [Unsplash](https://www.unsplash.com)\n{: .note}\n",[9,683,1242],{"slug":2010,"featured":6,"template":687},"ultimate-git-guide","content:en-us:blog:ultimate-git-guide.yml","Ultimate Git Guide","en-us/blog/ultimate-git-guide.yml","en-us/blog/ultimate-git-guide",{"_path":2016,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2017,"content":2023,"config":2029,"_id":2031,"_type":13,"title":2032,"_source":15,"_file":2033,"_stem":2034,"_extension":18},"/en-us/blog/using-the-gitlab-ci-slash-cd-for-smart-home-configuration-management",{"title":2018,"description":2019,"ogTitle":2018,"ogDescription":2019,"noIndex":6,"ogImage":2020,"ogUrl":2021,"ogSiteName":672,"ogType":673,"canonicalUrls":2021,"schema":2022},"How to simplify your smart home configuration with GitLab CI/CD","How to use GitLab pipelines to automatically test and deploy new home-assistant configurations, wherever you are.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678717/Blog/Hero%20Images/ci-smart-home-configuration.jpg","https://about.gitlab.com/blog/using-the-gitlab-ci-slash-cd-for-smart-home-configuration-management","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to simplify your smart home configuration with GitLab CI/CD\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Mario de la Ossa\"}],\n        \"datePublished\": \"2018-08-02\",\n      }",{"title":2018,"description":2019,"authors":2024,"heroImage":2020,"date":2026,"body":2027,"category":726,"tags":2028},[2025],"Mario de la Ossa","2018-08-02","\nSo you've read all about the [Internet of Things](https://en.wikipedia.org/wiki/Internet_of_things) and all the cool stuff you can do with it – from setting up timers for your lights to [making your breakfast](/blog/introducing-auto-breakfast-from-gitlab/) – and now you're itching to get started? Great!\n\nIf you're a power user, you've probably settled on using [Home Assistant](https://www.home-assistant.io/) as your smart home hub, but this choice has a few pitfalls:\n\n- It's annoying to SSH into the server itself to change configuration. Wouldn't you like to use your favorite local editor instead?\n- How do you keep your configuration backed up?\n- How do you protect yourself from accidentally messing up the configuration?\n\nIn this guide we'll show you how to fix these annoyances yourself, thanks to Git and the power of [GitLab Pipelines](https://docs.gitlab.com/ee/ci/pipelines/index.html)! We will set up a pipeline that will check your home-assistant configuration and deploy it to your home-assistant install, giving you the power to deploy changes from anywhere in the world with a simple `git push`!\nDid you go on vacation and forget you wanted your lights to [turn on and off randomly to make it seem like someone's home](https://community.home-assistant.io/t/set-random-time-for-random-automatic-turning-off-lights-mimic-someone-is-home/3524)? No worries! Just open GitLab's [Web IDE](https://docs.gitlab.com/ee/user/project/web_ide/) and make your changes from your hotel room.\n\nBy the end of this tutorial you'll have:\n\n- Automatic configuration backups thanks to `git`. You'll be able to see the history of every change you've made and revert changes easily.\n- Automatic configuration testing via GitLab pipelines. Never again will a simple typo have you scratching your head, wondering why things don't work!\n- An easy way to push changes to your Home Assistant configuration without having to SSH into the server.\n\n## Requirements\n\nIn this guide we'll be assuming a few things:\n\n- You installed Home Assistant using the Docker image\n- The server Home Assistant runs in is accessible from the internet via SSH (or you're using a self-managed GitLab installation in the same network)\n\n## Set up your server\n\n1.   Navigate to your Home Assistant configuration folder.\n1.   Create a new file called `.gitignore` with the following content:\n\n     ```\n     *.db\n     *.log\n     ```\n\n1.   Initialize the Git repo\n\n     ```bash\n     git init\n     git add .\n     git commit -m 'Initial commit'\n     ```\n1.   [Create a new GitLab project](https://gitlab.com/projects/new) and push to it\n\n     ```bash\n     git remote add origin YOUR_PROJECT_HERE\n     git push -u origin master\n     ```\n\nWith this you now have a backup of your Home Assistant configuration. Let's now set up the GitLab pipeline!\n\n## Setting up the pipeline\n\nWe have a few goals for the [CI/CD pipeline](/topics/ci-cd/):\n- Test the new configuration to ensure it's valid\n- Deploy the new configuration to the Home Assistant server\n- Bonus: Notify us of a successful deployment, since the default is to only notify for failures\n\n[The complete `.gitlab-ci.yml` can be found here.](https://gitlab.com/mdelaossa/hass-via-cicd/blob/master/.gitlab-ci.yml)\n{: .note}\n[General documentation for how to configure jobs can be found here.](https://docs.gitlab.com/ee/ci/yaml/)\n{: .note}\n\nWe will be using the following stages in our pipeline:\n- test: Will test the Home Assistant configuration to ensure it is valid\n- deploy: Will update the Home Assistant configuration in the server and restart Home Assistant\n- notify: Will send a push notification with success/failure state\n\nSince these aren't default pipeline stages we need to declare them in our `.gitlab-ci.yml` like so:\n\n```yaml\nstages:\n  - test\n  - deploy\n  - notify\n```\n\n### Automating configuration testing\n\nSince GitLab CI/CD [supports Docker images](https://docs.gitlab.com/ee/ci/docker/using_docker_images.html) and Home Assistant is available as a Docker image, this is a fairly straightforward stage to add.\n\nAdd this to your `.gitlab-ci.yml` file:\n\n```yaml\ntest:\n  stage: test\n  image: homeassistant/amd64-homeassistant\n  script:\n    - hass --script check_config -c .\n```\n\nWith this we are creating a job called `test` which will run in the `test` stage. We're using the `homeassistant/amd64-homeassistant` image because it exposes the `hass` command globally so we can use the built-in configuration checking command on our committed files. That's it!\n\nFeel free to commit and push this change to test it out!\n\n```bash\ngit add .\ngit commit -m 'Added testing stage to GitLab pipeline'\ngit push\n```\n\nYou'll now see that a pipeline gets created whenever you push:\n\n![HASS Test pipeline success](https://about.gitlab.com/images/blogimages/hass-cicd/pipeline-pass-1.png){: .shadow.center.large}\n\nIf your configuration contains any errors, they'll be shown in the `Failed Jobs` view of the pipeline and you'll get an email notifying you of the failure:\n\n![HASS Test pipeline failure](https://about.gitlab.com/images/blogimages/hass-cicd/pipeline-fail-1.png){: .shadow.center.large}\n\n### Automating deployments\n\nNow that we have automated testing, let's add another stage that will deploy our new configuration if the tests pass!\n\n\"Deploying\" in this case will consist of:\n- SSHing into the server\n- Doing a `git pull` to pull down changes from the repo\n- Restart the Home Assistant Docker image\n\n#### Preparing the server (and GitLab) for SSH access\n\nSince we will be using SSH we need to prepare our server first. We'll follow [these instructions from the GitLab documentation](https://docs.gitlab.com/ee/ci/ssh_keys/).\nWe will also set some [CI/CD Variables](https://gitlab.com/help/ci/variables/README#variables).\n\n1.   Generate a new SSH key pair. It's OK to save them to the current folder as you'll delete them later anyway.\n\n     ```bash\n     ssh-keygen -t rsa -C \"hass-deploy\" -b 4096\n     ```\n\n1.   On the server that runs Home Assistant, save the contents of the public key (the file ending in `.pub`) to `/home/user_running_hass/.ssh/authorized_keys`\n1.   Go to your GitLab project's CI/CD variables (inside Settings). Add the contents of the private key file to a variable named `SSH_PRIVATE_KEY`. You can now delete the SSH key pair files if you'd like, or store them somewhere safe.\n\nWe also need to add our server's host keys to the GitLab runner so the runner will be able to SSH successfully. Alternatively we could disable host key checking, but this is not recommended.\n\n1.   On your server, run `ssh-keyscan example.com` where example.com is the domain or IP of your server.\n1.   Create a new CI/CD variable called `SSH_KNOWN_HOSTS` and add the output of `ssh-keyscan` to it.\n\nYou should also create two other CI/CD variables (optional):\n- `DEPLOY_USER`: the user running HASS that the runner with SSH into the server as to perform the deploy\n- `DEPLOY_HOST`: the domain or IP of the server\n\n#### The deploy stage\n\nNow that we have prepared our server and GitLab CI/CD variables, we can add our deploy stage to `.gitlab-ci.yml`. Please note that we are using the `only: ` keyword so that only new commits in the `master` branch will attempt a deploy.\n\n```yaml\ndeploy:\n  stage: deploy\n  only:\n    - master\n  before_script:\n    - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'\n    - eval $(ssh-agent -s)\n    - echo \"$SSH_PRIVATE_KEY\" | tr -d '\\r' | ssh-add - > /dev/null\n    - mkdir -p ~/.ssh\n    - chmod 700 ~/.ssh\n    - echo \"$SSH_KNOWN_HOSTS\" > ~/.ssh/known_hosts\n    - chmod 644 ~/.ssh/known_hosts\n  script:\n    - ssh $DEPLOY_USER@$DEPLOY_HOST \"cd '$DEPLOY_PATH'; git pull; docker restart home-assistant\"\n```\n\nThe `before_script` above is in charge of:\n- Making sure `ssh-agent` is installed and installing it otherwise\n- Making sure `ssh-agent` is running\n- Adding the `SSH_PRIVATE_KEY` to the keys to use when logging into a server\n- Creating the `.ssh` folder with required permissions\n- Adding the values we added to the `SSH_KNOWN_HOSTS` variable to the proper location\n\nThe `script` portion is what actually deploys our new configuration:\n- We `cd` into the proper location (where the Home Assistant configuration files are kept)\n- We update the configuration with a `git pull`, since this directory is a Git repo\n- We restart Home Assistant (in this case the Docker image was created with the name `home-assistant`. Please use the name of your container)\n\nNote: If you did not create `DEPLOY_USER` and `DEPLOY_HOST` variables on GitLab, please replace the proper values in the script\n{: .note}\n\nNow let's commit and push this new stage to GitLab!\n```bash\ngit add .\ngit commit -m 'Added deploy stage to GitLab pipeline'\ngit push\n```\n\nWith this new stage added, you can now edit your configuration from anywhere (including the GitLab Web IDE!) and be confident that these changes will be pushed to your Home Assistant server if there are no issues with the configuration.\nThere's no longer a need to figure out how to connect directly to your Home Assistant server to make the edits you need.\n\n### Bonus: Successful deployment notifications\n\nYou'll notice that if the configuration is wrong or an error occurs during the deployment, you will get an email notification, but what about when everything runs successfully?\n\nWe have two options:\n\n1. Enable the `Pipeline Emails` integration and set it to notify on every pipeline\n2. Add a new stage called `notify` and use it to send push notifications to your phone\n\nWhile email is really nice, there's something really satisfying about getting push notification for your services, so let's set things up using [Pushover](https://pushover.net/).\nYou'll need to create an 'Application' and add the token you get to a GitLab variable called `PUSHOVER_API_TOKEN`. You'll also need to add your user key to a variable called `PUSHOVER_USER_TOKEN`.\n\nSince we'd like a different notification depending on whether our pipeline passed or failed, we will be adding two jobs to the `notify` stage:\n\n```yaml\nnotify_success:\n  stage: notify\n  allow_failure: true\n  only:\n    - master\n  script:\n    - curl -s --form-string \"token=$PUSHOVER_API_TOKEN\" --form-string \"user=$PUSHOVER_USER_TOKEN\" --form-string \"message=New Hass config deployed successfully!\" https://api.pushover.net/1/messages.json\n\nnotify_fail:\n  stage: notify\n  allow_failure: true\n  only:\n    - master\n  when: on_failure\n  script:\n    - curl -s --form-string \"token=$PUSHOVER_API_TOKEN\" --form-string \"user=$PUSHOVER_USER_TOKEN\" --form-string \"message=New Hass config failed. Please check for errors\" https://api.pushover.net/1/messages.json\n```\n\nOur first job, `notify_success`, runs when the stage before it (`deploy`) completes successfully. This is the default for GitLab. Our `notify_fail` job on the other hand has `when: on_failure` set, which means it will _only_ run when the stage before it fails. We also set `allow_failure: true` on both these jobs so that we aren't notified of a failed pipeline if for some reason the notification commands fail. We also set the `only: - master` option since deploys only happen on the master branch.\n\nWe are using Pushover's API to send the message we want in the `script` area.\n\nWith this final stage in place, your pipeline should now look like this:\n\n![HASS pipeline overview](https://about.gitlab.com/images/blogimages/hass-cicd/pipeline-final-1.png){: .shadow.center.large}\n\n### Enjoy!\n\nThere you have it! Now you can edit your Home Assistant configuration from anywhere you'd like, using your favorite editor, by following three simple steps:\n\n1. `git clone PATH_TO_REPO` (if you have not cloned it before)\n2. Edit the configuration\n3. `git push -u remote master`\n\n[Photo](https://unsplash.com/photos/9TF54VdG0ws?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) by Kevin Bhagat on [Unsplash](https://unsplash.com/search/photos/smart-home?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)\n{: .note}\n",[773,9],{"slug":2030,"featured":6,"template":687},"using-the-gitlab-ci-slash-cd-for-smart-home-configuration-management","content:en-us:blog:using-the-gitlab-ci-slash-cd-for-smart-home-configuration-management.yml","Using The Gitlab Ci Slash Cd For Smart Home Configuration Management","en-us/blog/using-the-gitlab-ci-slash-cd-for-smart-home-configuration-management.yml","en-us/blog/using-the-gitlab-ci-slash-cd-for-smart-home-configuration-management",{"_path":2036,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2037,"content":2043,"config":2048,"_id":2050,"_type":13,"title":2051,"_source":15,"_file":2052,"_stem":2053,"_extension":18},"/en-us/blog/what-is-git-the-ultimate-guide-to-gits-role-and-functionality",{"title":2038,"description":2039,"ogTitle":2038,"ogDescription":2039,"noIndex":6,"ogImage":2040,"ogUrl":2041,"ogSiteName":672,"ogType":673,"canonicalUrls":2041,"schema":2042},"What is Git? The ultimate guide to Git's role and functionality","Want to complete your projects with Git? Discover all of Git's benefits and features in our comprehensive guide.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749673991/Blog/Hero%20Images/Git.jpg","https://about.gitlab.com/blog/what-is-git-the-ultimate-guide-to-gits-role-and-functionality","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What is Git? The ultimate guide to Git's role and functionality\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"GitLab\"}],\n        \"datePublished\": \"2024-11-14\",\n      }",{"title":2038,"description":2039,"authors":2044,"heroImage":2040,"date":2045,"body":2046,"category":680,"tags":2047},[917],"2024-11-14","Git is a must-have tool in the world of modern software development. In this comprehensive guide, we explain in detail what the Git tool is, its role in source code versioning, and how it works. Whether you're a beginner or an expert, this guide will give you a deep understanding of Git and its many features.\n\n## What is Git?\n\nGit is a source control tool that has quickly become a must-have in the software development ecosystem. Git's ability to meticulously track project changes makes it an essential tool for developers aiming to efficiently manage their projects. Therefore, mastering Git has become a vital skill for anyone aiming to excel in the field of software development.\n\n### What is version control?\n\n[Version control](https://about.gitlab.com/topics/version-control/what-is-git-version-control/) enables you to track changes to a software's source code. Thus, a software's delivered version consists of a set of specific versions of each of its components and source code files. For example, an icon might have only been changed twice, while a code file might have undergone several dozen changes over time.\n\n## What are Git's features?\n\nIn development, maintaining rigorous management of changes to a software's source code is important. Without this, ensuring the consistency and reliability of development teams' work is impossible. Fine-tuned change management can also make it easier to identify the source of a problem. Similarly, it reduces the risk of conflicts and file overwriting. Indeed, Git facilitates and streamlines software versioning precisely for this purpose.\n\nTo better understand Git and how it works, below we've outlined some of the key features that make it easy to optimize source code management as well as collaboration across teams.\n\n### Visualization of your project history\n\nIn the software development ecosystem, [the commit history](https://about.gitlab.com/blog/keeping-git-commit-history-clean/) is a core pillar for tracking project progress on Git. That's why Git offers developers a detailed history of all changes made to the   \nsource code.\n\nFor each new commit, the following are recorded:\n\n* specific changes made to project files\n* an explanatory message from the developer who made the change\n\nThese elements help improve the development team's communication and mission, allowing them to more quickly understand the ins and outs of each change made to the code.\n\nIn addition to monitoring project developments, this history allows you to go back if necessary, cancel part of the changes or, conversely, fetch only part of the changes from one branch to another. This function therefore plays an essential role in maintaining the transparency, consistency, and quality of a project's source code in Git, as well as collaboration within the development team and operational efficiency to solve problems.\n\nCheck out our tutorial on [how to create your first Git commit](https://docs.gitlab.com/ee/tutorials/make_first_git_commit/).\n\n### Greater autonomy for teams\n\nAnother essential feature of the Git tool is [distributed development](https://git-scm.com/about/distributed). Thanks to its decentralized structure, Git allows development teams to work simultaneously on the same project. Each team member has their own copy of the project, where each of their changes can be versioned. This allows them to work autonomously on specific features while reducing conflict or overwriting risks. This approach offers great flexibility for developers who can then explore different ideas or experiment with new features without interfering with their colleagues' work.\n\nDistributed development also enhances resilience to server failures. Thus, even in the event of a failure, each person has a copy on which they can continue to work offline. Changes can then be synchronized once the server is available again, thereby reducing the risk of work disruption for development teams and update constraints for operational teams.\n\n### Optimizing development workflows\n\nOne of Git's most powerful features is the ability to [manage branches and their mergers (branching and merging)](https://git-scm.com/about/branching-and-merging). These allow teams to work in parallel in a collaborative and organized way. Each new code addition or bug fix can be independently developed and tested to ensure reliability. Developers can then simply merge changes into the project's main branch.\n\nBy adopting this approach, teams can track code evolution, collaborate easily and efficiently, reduce conflicts between different versions, and ensure continuous integration of developed features.\n\nUsing these two features, teams can develop projects continuously and in an agile manner while regularly deploying new code versions. This practice greatly facilitates change management while reducing the risk of errors.\n\n## What are Git's benefits?\n\nTo thoroughly understand Git, it's important to fully recognize the benefits it offers to your development teams:\n\n* **Decentralized version management:** With Git, each developer has a complete copy of the project history, allowing them to work independently.  \n* **A tool designed around security:** Unlike other source control tools, Git was designed from the outset to ensure the integrity of all elements of the repository with a cryptographic Secure Hash Algorithm (SHA1 and [SHA-256](https://about.gitlab.com/blog/gitlab-now-supports-sha256-repositories/) to date). This algorithm aims to protect the project's code and history from any modifications, whether malicious or not. In addition, each commit (creation of a new version) can be automatically signed (GPG) to ensure change traceability. This makes Git a particularly safe and secure tool, which guarantees the integrity and authenticity of your source code and its history.  \n* **A fast and effective tool:** The Git tool has been designed to maximize efficiency during development. Its speed allows developers to perform complex operations, such as commits, branching, and merging, in minimal time, even on large code bases. It also ensures a minimum fingerprint on the hard disk and during network exchanges. This efficiency then translates into rapid response times during backups, consultations, and project history changes.  \n* **Greater work flexibility:** Git supports a wide variety of development workflows. Whether you prefer centralized development models or more linear approaches, Git adapts easily. This ability to manage different workflows provides teams with numerous options for how they work.  \n* **Ease of integration:** Git excels in its ability to integrate with a wide array of existing development tools and platforms. The breadth of this compatibility allows teams to manage their projects more effectively by leveraging the best DevSecOps tools and practices.  \n* **A widely followed open-source project:** Another significant benefit of Git is that it's an open-source project supported by a dynamic and dedicated community which ensures its constant improvement. This active participation from individuals and companies in the Git community ensures the regular addition of new features and improvements through continuous updates.\n\n## What are Git's main commands?\n\nThe open-source Git project offers a wide variety of commands to make teamwork easier.  \nHere are some of the most commonly used commands.\n\n* **git init:** Initialize a new Git repository.  \n* **git clone \\[url\\]:** Clone an existing repository.  \n* **git add \\[file\\]:** Add a file to the index.  \n* **git commit:** Validate changes made.  \n* **git commit \\-m \"message\":** Validate changes with a message.  \n* **git status:** View the status of files in the working directory.  \n* **git push:** Send changes to remote repository.  \n* **git pull:** Fetch changes from the remote repository and merge them with the local repository.\n\nWhile these commands are essential to getting started with Git, it's important to note that there are plenty of other commands. See the [list of Git commands](https://git-scm.com/docs).\n\n## Git and GitLab\n\nGitLab is a collaborative open-source development platform covering all stages of the DevSecOps lifecycle and providing a Git server for efficient team collaboration.\n\nBeyond source code management, GitLab offers a complete suite enabling continuous integration and distribution, deliverables management, security and incident management, as well as all associated traceability, real-time task planning and tracking, deployment monitoring, software versioning, and the associated document space.\n\n## Git FAQs\n\n### Why use Git?\n\nGit is all about efficiency. Git's decentralized system based on branching and merging features allows development teams to work on the same project without interfering with others' work or, more importantly, creating version conflicts.\n\n### Is Git software?\n\nGit is an open-source project. Therefore, it's free and open to everyone. However, you need to [install Git](https://docs.gitlab.com/ee/topics/git/how_to_install_git/) on your device before you can start working.\n\n### What is a branch in Git?\n\nIn Git, a branch is a pointer to a change history. Thus, each main branch points to the last commit performed on it. It is therefore possible to have many parallel branches, each with its own history but the same root.\n\n### What is a commit?\n\nIn Git, a commit is a record of changes to a software's source code. Each commit is accompanied by an explanatory message that traces the history of all changes. This makes project tracking easier, and there's always the option to revert to earlier, functional versions if there's a problem.\n\n### What is the benefit of branches in Git?\n\nDeveloping features in branches allows developers to work simultaneously on several distinct features. In addition, this avoids compromising the main branch with unstable code. Moreover, implementing branches in Git is significantly more lightweight than in other version control systems.",[9,776,683],{"slug":2049,"featured":6,"template":687},"what-is-git-the-ultimate-guide-to-gits-role-and-functionality","content:en-us:blog:what-is-git-the-ultimate-guide-to-gits-role-and-functionality.yml","What Is Git The Ultimate Guide To Gits Role And Functionality","en-us/blog/what-is-git-the-ultimate-guide-to-gits-role-and-functionality.yml","en-us/blog/what-is-git-the-ultimate-guide-to-gits-role-and-functionality",{"_path":2055,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2056,"content":2062,"config":2068,"_id":2070,"_type":13,"title":2071,"_source":15,"_file":2072,"_stem":2073,"_extension":18},"/en-us/blog/what-is-gitflow",{"title":2057,"description":2058,"ogTitle":2057,"ogDescription":2058,"noIndex":6,"ogImage":2059,"ogUrl":2060,"ogSiteName":672,"ogType":673,"canonicalUrls":2060,"schema":2061},"What is GitFlow?","This article introduces the differences between GitFlow and GitLab Flow, explains what GitFlow is, how it works, its benefits, and answers frequently asked questions.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749659838/Blog/Hero%20Images/AdobeStock_662057734.jpg","https://about.gitlab.com/blog/what-is-gitflow","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What is GitFlow?\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"GitLab Team\"}],\n        \"datePublished\": \"2024-09-27\",\n      }",{"title":2057,"description":2058,"authors":2063,"heroImage":2059,"date":2065,"body":2066,"category":680,"tags":2067},[2064],"GitLab Team","2024-09-27","In GitFlow, developers create a separate \"`develop`\" (for development) branch in addition to the \"`main`\" (for operation) branch and set it as the default. However, with [GitLab Flow](https://about.gitlab.comtopics/version-control/what-is-gitlab-flow/), work can begin directly on the `main` branch. GitLab Flow incorporates pre-production branches, allowing for bug fixes before merging changes into the `main` branch and deploying to production. For example, teams can add as many pre-production branches as needed, such as flowing from `main` to test, test to acceptance, or acceptance to production. \n\nIn this article, you'll learn the differences between GitFlow and GitLab, what GitFlow is, how it works, its benefits, and get answers to frequently asked questions. \n\n## Table of contents\n- [What is GitFlow](#what-is-gitflow)\n- [How GitFlow works](#how-gitflow-works)\n- [How GitFlow and GitLab Flow differ](#how-gitflow-and-gitlab-flow-differ)\n- [GitFlow's workflow](#gitflow's-workflow)\n- [GitLab Flow's workflow](#gitlab-flow's-workflow)\n- [Benefits of using GitFlow and its features](#benefits-of-using-gitflow-and-its-features)\n- [GitFlow example](#gitflow-example)\n- [GitLab Flow and GitFlow FAQ ](#gitlab-flow-and-gitflow-faq)\n\n## What is GitFlow\n\nGitFlow is a Git workflow designed for managing branches in Git (a distributed version control system); it serves as a branching model for Git repositories. Created to simplify complex software release management, it was introduced by Vincent Driessen in 2010. It is particularly popular among large teams. \n\n## How GitFlow works\n\nCompared to trunk-based development, GitFlow features persistent branches and tends to involve larger commits. GitFlow can be used for projects with scheduled release cycles and aligns with [DevOps](https://about.gitlab.com/solutions/devops-platform/) best practices for continuous delivery. GitFlow provides a structured workflow where branches are defined for specific purposes, such as creating feature branches off the `develop` branch and the 'main' branch, preparing `release` branches, and eventually merging into `main`. This structure makes it easier for teams to understand where changes should be integrated within their development pipeline. \n\n## How GitFlow and GitLab Flow differ\n\nGitFlow is a Git branching model that utilizes multiple primary branches in addition to feature branches. [GitLab Flow](https://about.gitlab.com/topics/version-control/what-is-gitlab-flow/) aims to address some of the complexities inherent in GitFlow, enabling team members to work more efficiently. Let's examine the workflow differences in more detail. \n\n### GitFlow's workflow\n\nThe GitFlow workflow involves the following five types of branches: \n\n1. main\n2. develop \n3. feature\n4. release \n5. hotfix\n\nWhen using GitFlow for code development, you work with the main branch and various supporting branches. There are two primary long-lived branches: the main branch for production-ready code, and the develop branch for integrating source code under development. Codes are stabilized in the `develop` branch, prepared to be released, and then merged into the main branch when ready. Supporting branches, such as feature, release, and hotfix branches, are created to handle specific development tasks. \n\n### GitLab Flow's workflow\n\nGitLab Flow streamlines development by preventing the overhead associated with releases, tagging, merging, and more. \n\nGitLab Flow is a simplified alternative to GitFlow, combining feature-driven development with issue tracking capabilities. Using GitLab Flow enables simple, straightforward, and efficient workflows. GitLab Flow incorporates best practices to help software development teams release features smoothly. \n\nGitLab Flow is the workflow used in GitLab's own development. It involves branches such as the `main` branch; a pre-release testing branch, `pre-production`; a branch for managing released code, `production`; and branches for feature development or bug fixes like `feature``hotfix`. Teams can add as many pre-production branches as they need. For example, creating flows such as from `main` to test, from test to approval, and from approval to production. \n\nWhile teams create feature branches, they also manage production branches. Once the main branch is ready for deployment, it will be merged into the production branch and released. GitLab Flow can also be utilized with release branches. Teams needing public APIs must manage different versions; GitLab Flow facilitates this by allowing the creation of individually manageable branches like `v1` and `v2`, making it convenient to revert to `v1` if bugs are detected during code review. \n\n## Benefits of using GitFlow and its features\n\n### 1: Rapid handling of bug fixes\n\nOne benefit of using GitFlow is the ability to quickly handle bug fixes in the production environment. GitFlow is employed as a Git (distributed version control system) workflow, particularly by large teams engaged in complex software development. \n\n### 2: Ensured testing\n\nWhen releasing software from a release branch, you can allocate time for users to test in a staging environment. This can occur independently of ongoing code development. Furthermore, as commits flow downstream through different stages, it helps ensure testing across all relevant environments. \n\n### 3: Streamlined software development process\n\nUsing GitFlow allows you to leverage Git to its full potential. This, in turn, helps streamline the software development process. \n\n### 4: More efficient collaboration, conflict resolution, and continuous delivery\n\nImplementing GitFlow enhances collaboration efficiency. Merge conflicts can be resolved quickly, enabling continuous delivery. \n\n## GitFlow example\nThe diagram below illustrates an example configuration of GitFlow. It should help clarify the overall flow, including the different branches and their structure.  \n\n![GitFlow example](https://res.cloudinary.com/about-gitlab-com/image/upload/v1749673714/Blog/Content%20Images/AdobeStock_569852816.jpg)\n\n## GitLab Flow and GitFlow FAQ \n\n### Q: What is Git Feature Flow? \n\nA: It is one of the proposed development workflows that utilize Git. Git Feature Flow is suitable for handling simpler development requirements. \n\n### Q: Is GitLab Flow worth using? \n\nA: Yes. GitLab Flow reduces the overhead associated with activities like releasing, tagging, and merging. These can be common issues encountered in other Git workflows. For more details, see [these GitLab Flow best practices](https://about.gitlab.com/topics/version-control/what-are-gitlab-flow-best-practices/). \n\n### Q: How should I choose between GitLab Flow and GitFlow? \n\nA: Git Flow, due to its structure, is well-suited for large projects with clearly defined development stages. GitLab Flow, being more agile, is better suited for projects that prioritize continuous delivery and rapid releases. \n\n## Get started with GitLab\n\nStart your [free, 60-day trial of GitLab Ultimate and GitLab Duo Enterprise](https://about.gitlab.com/free-trial/) today!\n",[9,683],{"slug":2069,"featured":6,"template":687},"what-is-gitflow","content:en-us:blog:what-is-gitflow.yml","What Is Gitflow","en-us/blog/what-is-gitflow.yml","en-us/blog/what-is-gitflow",{"_path":2075,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2076,"content":2082,"config":2087,"_id":2089,"_type":13,"title":2090,"_source":15,"_file":2091,"_stem":2092,"_extension":18},"/en-us/blog/what-is-gitlab-flow",{"title":2077,"description":2078,"ogTitle":2077,"ogDescription":2078,"noIndex":6,"ogImage":2079,"ogUrl":2080,"ogSiteName":672,"ogType":673,"canonicalUrls":2080,"schema":2081},"The problem with Git flow","Learn why Git flow complicates the lifecycle and discover an alternative to streamline development.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749681121/Blog/Hero%20Images/whatisgitlabflow.jpg","https://about.gitlab.com/blog/what-is-gitlab-flow","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"The problem with Git flow\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Suri Patel\"}],\n        \"datePublished\": \"2020-03-05\",\n      }",{"title":2077,"description":2078,"authors":2083,"heroImage":2079,"date":2084,"body":2085,"category":680,"tags":2086},[1519],"2020-03-05","\n  \u003Cscript type=\"application/ld+json\">\n  {\n    \"@context\": \"https://schema.org\",\n    \"@type\": \"BlogPosting\",\n    \"mainEntityOfPage\": {\n      \"@type\": \"WebPage\",\n      \"@id\": \"https://about.gitlab.com/blog/what-is-gitlab-flow/\"\n    },\n    \"headline\": \"The problem with Git flow\",\n    \"description\": \"Learn why Git flow complicates the lifecycle and discover an alternative to streamline development.\",\n    \"image\": \"https://about.gitlab.com/images/blogimages/whatisgitlabflow.jpg\",\n    \"author\": {\n      \"@type\": \"Organization\",\n      \"name\": \"GitLab\"\n    },\n    \"publisher\": {\n      \"@type\": \"Organization\",\n      \"name\": \"\",\n      \"logo\": {\n        \"@type\": \"ImageObject\",\n        \"url\": \"\"\n      }\n    },\n    \"datePublished\": \"2020-03-05\"\n  }\n  \u003C/script>\n\nSometimes, you can have too much of a good thing. That’s certainly true with [Git flow](https://nvie.com/posts/a-successful-git-branching-model/), a well-known software development workflow that offers several options but can bog down users.\n\nWe developed [GitLab Flow](/topics/version-control/what-is-gitlab-flow/) as the solution to eliminate messy complexity and streamline the development process. [GitLab Flow](/topics/version-control/what-is-gitlab-flow/) brings issue tracking to the Git workflow, simplifying the process and removing confusion.\n\n## The problem with Git flow\n\nTo understand how GitLab Flow works, it’s helpful to start by looking at the problems it tries to solve. In Git flow, there are two main pain points, both of which involve unnecessary branch switching.\n\nGit flow forces developers to use the `develop` branch rather than the `master` or default branch. Because most tools default to using the master, there’s a significant amount of branch switching involved. Another frustrating aspect are `release` and [hotfix](https://stackoverflow.com/questions/46729813/how-to-use-a-Gitflow-hotfix-branch) branches, which are overkill for most organizations and completely unnecessary in companies practicing continuous integration and continuous delivery.\n\nThat brings us to GitLab Flow, a simpler workflow and branching model that keeps everything simple and inclusive.\n\n## GitLab Flow: a streamlined branching strategy\n\nGitLab Flow is a simpler alternative to Git flow that combines feature-driven development and feature branches with issue tracking. GitLab Flow integrates the Git workflow with an issue tracking system, offering a simple, transparent, and effective way to work with Git.\n\nGitLab Flow is an approach to make the relationship between the code and the issue tracker more transparent. Each change to the codebase starts with an issue in the issue tracking system. When you’re done coding or want to discuss the code, you can open a merge request. When the code is ready, the reviewer will merge the branch into master, creating a merge commit that makes this event easily visible in the future. Using GitLab Flow, teams can deploy a new version of code by merging master into a `production` branch, enabling them to quickly identify what code is in the production environment. In this workflow, commits only flow downstream, ensuring that everything is tested in all environments.\n\nGitLab Flow prevents the overhead of releasing, tagging, and merging that accompanies Git flow.\n\nGitLab Flow in a nutshell:\n- All features and fixes first go to master\n- Allows for `production` or `stable` branches\n- Bug fixes/hotfix patches are cherry-picked from master\n\nRead more on here [GitLab Flow best practicies](/topics/version-control/what-are-gitlab-flow-best-practices/)\n\n## Breaking down the 10 stages of software development\n\nGitLab Flow is a way to move from the idea stage to production, all while keeping everyone informed and productive. We identified [10 key stages](/topics/version-control/what-is-gitlab-flow/#stages-of-software-development) of the development process that must happen in order for software to get into production. GitLab Flow makes it easy to account for all of them, while continuing to provide full visibility into the development lifecycle.\n\nBroadly speaking, GitLab Flow is broken down into three main areas: `feature` branch, `production` branch, and `release` branch.\n\nA `feature` branch is where the serious development work occurs. A developer creates a feature or bug fix branch and does all the work there rather than on a master branch. Once the work is complete, the developer creates a merge request to merge the work into the master branch.\n\nThe `production` branch is essentially a monolith – a single long-running production `release` branch rather than individual branches. It’s possible to create a tag for each deployable version to keep track of those details easily.\n\nThe last piece, the `release` branch, is key if you release software to customers. With every new release, you’ll create a stable branch from master and decide on a tag. If you need to do a patch release, be sure to cherry-pick critical bug fixes first, and don’t commit them directly to the stable or long-lived branch.\n\n## Follow the rules\n\nWant to get the most out of GitLab Flow? Our CEO [Sid Sijbrandij](/company/team/#sytses) came up with [11 rules teams should always follow to achieve maximum efficiency](/topics/version-control/what-are-gitlab-flow-best-practices/). The article is worth a read in its entirety, but here are a few rules that are timely reminders of the importance of testing, even in a [CI environment](/solutions/continuous-integration/):\n\n* **Test all commits**: Don’t wait to test until everything has been merged into `master`. Test commits along the way to catch problems earlier in the process.\n* **And run _all_ tests on all the commits**, even if you have to run tests in parallel.\n* **Code reviews > merging into `master`.** Why wait? \"Don’t test everything at the end of the week,\" Sid writes. \"Do it on the spot, because you'll be more likely to catch things that could cause problems, and others will also be working to come up with solutions.\"\n\n## Take a deep dive\n\nTake a look at GitLab Flow in action! 🍿\n\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube-nocookie.com/embed/InKNIvky2KE\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\n\n\nCover image by [Fabio Bracht](https://unsplash.com/@bracht?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com/photos/_z0DiiaIhB4)\n{: .note}\n",[9,982,798],{"slug":2088,"featured":6,"template":687},"what-is-gitlab-flow","content:en-us:blog:what-is-gitlab-flow.yml","What Is Gitlab Flow","en-us/blog/what-is-gitlab-flow.yml","en-us/blog/what-is-gitlab-flow",{"_path":2094,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2095,"content":2098,"config":2106,"_id":2108,"_type":13,"title":2109,"_source":15,"_file":2110,"_stem":2111,"_extension":18},"/en-us/blog/what-s-new-in-git-2-50-0",{"noIndex":6,"title":2096,"description":2097},"What’s new in Git 2.50.0?","Here are contributions from GitLab's Git team and the Git community such as the git-diff-pairs(1) command and git-rev-list(1) option to perform batched reference updates.",{"title":2096,"description":2099,"authors":2100,"heroImage":2102,"body":2103,"date":2104,"category":680,"tags":2105},"Here are contributions from GitLab's Git team and the Git community such as the git-diff-pairs(1) command and git-update-ref(1) option to perform batched reference updates.",[2101],"Justin Tobler","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749663087/Blog/Hero%20Images/git3-cover.png","The Git project recently released [Git Version 2.50.0](https://lore.kernel.org/git/xmqq1prj1umb.fsf@gitster.g/T/#u). Let's look at a few notable highlights from this release, which includes contributions from the Git team at GitLab and also the wider Git community.\n## New git-diff-pairs(1) command\n\nDiffs are at the heart of every code review and show all the changes made\nbetween two revisions. GitLab shows diffs in various places, but the most\ncommon place is a merge request's [\"Changes\" tab](https://docs.gitlab.com/user/project/merge_requests/changes/).\nBehind the scenes, diff generation is powered by\n[`git-diff(1)`](https://git-scm.com/docs/git-diff). For example:\n\n```shell\n$ git diff HEAD~1 HEAD\n```\n\nThis command returns the full diff for all changed files. This might pose a scalability challenge because the number of files changed between a set of revisions could be very large and cause the command to reach self-imposed timeouts for the GitLab backend. For large change sets, it would be better if\nthere were a way to break diff computation into smaller, more digestible chunks.\n\nOne way this can be achieved is by using\n[`git-diff-tree(1)`](https://git-scm.com/docs/git-diff-tree) to retrieve info\nabout all the changed files:\n\n```shell\n$ git diff-tree -r -M --abbrev HEAD~ HEAD\n:100644 100644 c9adfed339 99acf81487 M      Documentation/RelNotes/2.50.0.adoc\n:100755 100755 1047b8d11d 208e91a17f M      GIT-VERSION-GEN\n```\n\nGit refers to this output as the [\"raw\" format](https://git-scm.com/docs/git-diff-tree#_raw_output_format).\nIn short, each line of output lists filepairs and the accompanying metadata\nabout what has changed between the start and end revisions. Compared to\ngenerating the \"patch\" output for large changes, this process is relatively\nquick and provides a summary of everything that changed. This command can optionally perform rename detection by  appending the `-M` flag to check if identified changes were due to a file rename.\n\nWith this information, we could use `git-diff(1)` to compute each of the\nfilepair diffs individually. For example, we can provide the blob IDs\ndirectly:\n\n```shell\n$ git diff 1047b8d11de767d290170979a9a20de1f5692e26 208e91a17f04558ca66bc19d73457ca64d5385f\n```\n\nWe can repeat this process for each of the filepairs, but spinning up a\nseparate Git process for each individual file diff is not very efficient.\nFurthermore, when using blob IDs, the diff loses some contextual information\nsuch as the change status, and file modes which are stored in with the parent\ntree object. What we really want is a mechanism to feed \"raw\" filepair info and\ngenerate the corresponding patch output.\n\nWith the 2.50 release, Git has a new built-in command named\n[`git-diff-pairs(1)`](https://git-scm.com/docs/git-diff-pairs). This command\naccepts \"raw\" formatted filepair info as input on stdin to determine exactly which patches to output. The following example showcases how this command could be\nused:\n\n```shell\n$ git diff-tree -r -z -M HEAD~ HEAD | git diff-pairs -z\n```\n\nWhen used in this manner, the resulting output is identical to using `git-diff(1)`.\nBy having a separate command to generate patch output, the \"raw\" output from\n`git-diff-tree(1)` can be broken up into smaller batches of filepairs and fed to separate\n`git-diff-pairs(1)` processes. This solves the previously mentioned scalability\nconcern because diffs no longer have to be computed all at once. Future GitLab\nreleases could build upon this mechanism to improve diff\ngeneration performance, especially in cases where large change sets are\nconcerned. For more information on this change, check out the corresponding\n[mailing-list thread](https://lore.kernel.org/git/20250228213346.1335224-1-jltobler@gmail.com/).\n\n_This project was led by [Justin Tobler](https://gitlab.com/justintobler)._\n\n## Batched reference updates\n\nGit provides the [`git-update-ref(1)`](https://git-scm.com/docs/git-update-ref)\ncommand to perform reference updates. When used with the `--stdin` flag,\nmultiple reference updates can be batched together in a single transaction by\nspecifying instructions for each reference update to be performed on stdin.\nBulk updating references in this manner also provides atomic behavior whereby a\nsingle reference update failure results in an aborted transaction and no\nreferences being updated. Here is an example showcasing this behavior:\n\n```shell\n# Create repository with three empty commits and branch named \"foo\"\n$ git init\n$ git commit --allow-empty -m 1\n$ git commit --allow-empty -m 2\n$ git commit --allow-empty -m 3\n$ git branch foo\n\n# Print out the commit IDs\n$ git rev-list HEAD\ncf469bdf5436ea1ded57670b5f5a0797f72f1afc\n5a74cd330f04b96ce0666af89682d4d7580c354c\n5a6b339a8ebffde8c0590553045403dbda831518\n\n# Attempt to create a new reference and update existing reference in transaction.\n# Update is expected to fail because the specified old object ID doesn’t match.\n$ git update-ref --stdin \u003C\u003CEOF\n> create refs/heads/bar cf469bdf5436ea1ded57670b5f5a0797f72f1afc\n> update refs/heads/foo 5a6b339a8ebffde8c0590553045403dbda831518 5a74cd330f04b96ce0666af89682d4d7580c354c\n> EOF\nfatal: cannot lock ref 'refs/heads/foo': is at cf469bdf5436ea1ded57670b5f5a0797f72f1afc but expected 5a74cd330f04b96ce0666af89682d4d7580c354c\n\n# The \"bar\" reference was not created.\n$ git switch bar\nfatal: invalid reference: bar\n```\n\nCompared to updating many references individually, updating in bulk is also\nmuch more efficient. While this works well, there might be certain\ncircumstances where it is okay for a subset of the requested reference updates\nto fail, but we still want to take advantage of the efficiency gains of bulk\nupdates.\n\nWith this release, `git-update-ref(1)` has the new `--batch-updates` option,\nwhich allows the updates to proceed even when one or more reference updates\nfails. In this mode, individual failures are reported in the following format:\n\n```text\nrejected SP (\u003Cold-oid> | \u003Cold-target>) SP (\u003Cnew-oid> | \u003Cnew-target>) SP \u003Crejection-reason> LF\n```\n\nThis allows successful reference updates to proceed while providing context to\nwhich updates were rejected and for what reason. Using the same example\nrepository from the previous example:\n\n```shell\n# Attempt to create a new reference and update existing reference in transaction.\n$ git update-ref --stdin --batch-updates \u003C\u003CEOF\n> create refs/heads/bar cf469bdf5436ea1ded57670b5f5a0797f72f1afc\n> update refs/heads/foo 5a6b339a8ebffde8c0590553045403dbda831518 5a74cd330f04b96ce0666af89682d4d7580c354c\n> EOF\nrejected refs/heads/foo 5a6b339a8ebffde8c0590553045403dbda831518 5a74cd330f04b96ce0666af89682d4d7580c354c incorrect old value provided\n\n# The \"bar\" reference was created even though the update to \"foo\" was rejected.\n$ git switch bar\nSwitched to branch 'bar'\n```\n\nThis time, with the `--batch-updates` option, the reference creation succeeded\neven though the update didn't work. This patch series lays the groundwork for\nfuture performance improvements in `git-fetch(1)` and `git-receive-pack(1)`\nwhen references are updated in bulk. For more information, check the\n[mailing-list thread](https://lore.kernel.org/git/20250408085120.614893-1-karthik.188@gmail.com/)\n\n_This project was led by [Karthik Nayak](https://gitlab.com/knayakgl)._\n\n## New filter option for git-cat-file(1)\n\nWith [`git-cat-file(1)`](https://git-scm.com/docs/git-cat-file), it is possible\nto print info for all objects contained in the repository via the\n`--batch–all-objects` option. For example:\n\n```shell\n# Setup simple repository.\n$ git init\n$ echo foo >foo\n$ git add foo\n$ git commit -m init\n\n# Create an unreachable object.\n$ git commit --amend --no-edit\n\n# Use git-cat-file(1) to print info about all objects including unreachable objects.\n$ git cat-file --batch-all-objects --batch-check='%(objecttype) %(objectname)'\ncommit 0b07e71d14897f218f23d9a6e39605b466454ece\ntree 205f6b799e7d5c2524468ca006a0131aa57ecce7\nblob 257cc5642cb1a054f08cc83f2d943e56fd3ebe99\ncommit c999f781fd7214b3caab82f560ffd079ddad0115\n```\n\nIn some situations, a user might want to search through all objects in the\nrepository, but only output a subset based on some specified attribute. For\nexample, if we wanted to see only the objects that are commits, we could use\n`grep(1)`:\n\n```shell\n$ git cat-file --batch-all-objects --batch-check='%(objecttype) %(objectname)' | grep ^commit\ncommit 0b07e71d14897f218f23d9a6e39605b466454ece\ncommit c999f781fd7214b3caab82f560ffd079ddad0115\n```\n\nWhile this works, one downside with filtering the output is that\n`git-cat-file(1)` still has to traverse all the objects in the repository, even\nthe ones that the user is not interested in. This can be rather inefficient.\n\nWith this release, `git-cat-file(1)` now has the `--filter` option, which only\nshows objects matching the specified criteria. This is similar to the option of\nthe same name for `git-rev-list(1)`, but with only a subset of the filters\nsupported. The supported filters are `blob:none`, `blob:limit=`, as well as\n`object:type=`. Similar to the previous example, objects can be filtered by\ntype with Git directly:\n\n```shell\n$ git cat-file --batch-all-objects --batch-check='%(objecttype) %(objectname)' --filter='object:type=commit'\ncommit 0b07e71d14897f218f23d9a6e39605b466454ece\ncommit c999f781fd7214b3caab82f560ffd079ddad0115\n```\n\nNot only is it convenient for Git to handle the processing, for large\nrepositories with many objects, it is also potentially more efficient. If a\nrepository has bitmap indices, it becomes possible for Git to efficiently\nlookup objects of a specific type, and thus avoid scanning through the\npackfile, which leads to a significant speedup. Benchmarks conducted on the\n[Chromium repository](https://github.com/chromium/chromium.git) show\nsignificant improvements:\n\n```text\nBenchmark 1: git cat-file --batch-check --batch-all-objects --unordered --buffer --no-filter\n   Time (mean ± σ):     82.806 s ±  6.363 s    [User: 30.956 s, System: 8.264 s]\n   Range (min … max):   73.936 s … 89.690 s    10 runs\n\nBenchmark 2: git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=tag\n   Time (mean ± σ):      20.8 ms ±   1.3 ms    [User: 6.1 ms, System: 14.5 ms]\n   Range (min … max):    18.2 ms …  23.6 ms    127 runs\n\nBenchmark 3: git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=commit\n   Time (mean ± σ):      1.551 s ±  0.008 s    [User: 1.401 s, System: 0.147 s]\n   Range (min … max):    1.541 s …  1.566 s    10 runs\n\nBenchmark 4: git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=tree\n   Time (mean ± σ):     11.169 s ±  0.046 s    [User: 10.076 s, System: 1.063 s]\n   Range (min … max):   11.114 s … 11.245 s    10 runs\n\nBenchmark 5: git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=blob\n   Time (mean ± σ):     67.342 s ±  3.368 s    [User: 20.318 s, System: 7.787 s]\n   Range (min … max):   62.836 s … 73.618 s    10 runs\n\nBenchmark 6: git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=blob:none\n   Time (mean ± σ):     13.032 s ±  0.072 s    [User: 11.638 s, System: 1.368 s]\n   Range (min … max):   12.960 s … 13.199 s    10 runs\n\nSummary\n   git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=tag\n    74.75 ± 4.61 times faster than git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=commit\n   538.17 ± 33.17 times faster than git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=tree\n   627.98 ± 38.77 times faster than git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=blob:none\n  3244.93 ± 257.23 times faster than git cat-file --batch-check --batch-all-objects --unordered --buffer --filter=object:type=blob\n  3990.07 ± 392.72 times faster than git cat-file --batch-check --batch-all-objects --unordered --buffer --no-filter\n```\n\nInterestingly, these results indicate that the computation time now scales with\nthe number of objects for a given type instead of the number of total objects\nin the packfile. The original mailing-list thread can be found\n[here](https://lore.kernel.org/git/20250221-pks-cat-file-object-type-filter-v1-0-0852530888e2@pks.im/).\n\n_This project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab)._\n\n## Improved performance when generating bundles\n\nGit provides a means to generate an archive of a repository which contains a\nspecified set of references and accompanying reachable objects via the\n[`git-bundle(1)`](https://git-scm.com/docs/git-bundle) command. This operation\nis used by GitLab to generate repository backups and also as part of the\n[bundle-URI](https://git-scm.com/docs/bundle-uri) mechanism.\n\nFor large repositories containing millions of references, this operation can\ntake hours or even days. For example, with the main GitLab repository\n([gitlab-org/gitlab](https://gitlab.com/gitlab-org/gitlab)), backup times were\naround 48 hours. Investigation revealed there was a performance bottleneck due\nto how Git was performing a check to avoid duplicated references being included\nin the bundle. The implementation used a nested `for` loop to iterate and\ncompare all listed references, leading to O(N^2) time complexity. This scales\nvery poorly as the number of references in a repository increases.\n\nIn this release, this issue was addressed by replacing the nested loops with a\nmap data structure leading to a significant speedup. The following benchmark\nthe performance improvement for creating a bundle with a repository containing\n100,000 references:\n\n```text\nBenchmark 1: bundle (refcount = 100000, revision = master)\n  Time (mean ± σ):     14.653 s ±  0.203 s    [User: 13.940 s, System: 0.762 s]\n  Range (min … max):   14.237 s … 14.920 s    10 runs\n\nBenchmark 2: bundle (refcount = 100000, revision = HEAD)\n  Time (mean ± σ):      2.394 s ±  0.023 s    [User: 1.684 s, System: 0.798 s]\n  Range (min … max):    2.364 s …  2.425 s    10 runs\n\nSummary\n  bundle (refcount = 100000, revision = HEAD) ran\n    6.12 ± 0.10 times faster than bundle (refcount = 100000, revision = master)\n```\n\nTo learn more, check out our blog post\n[How we decreased GitLab repo backup times from 48 hours to 41 minutes](https://about.gitlab.com/blog/how-we-decreased-gitlab-repo-backup-times-from-48-hours-to-41-minutes/).\nYou can also find the original mailing list thread\n[here](https://lore.kernel.org/git/20250401-488-generating-bundles-with-many-references-has-non-linear-performance-v1-0-6d23b2d96557@gmail.com/).\n\n_This project was led by [Karthik Nayak](https://gitlab.com/knayakgl)._\n\n## Better bundle URI unbundling\n\nThrough the [bundle URI](https://git-scm.com/docs/bundle-uri) mechanism in Git,\nlocations to fetch bundles from can be provided to clients with the goal to\nhelp speed up clones and fetches. When a client downloads a bundle, references\nunder `refs/heads/*` are copied from the bundle into the repository along with\ntheir accompanying objects. A bundle might contain additional references\noutside of `refs/heads/*` such as `refs/tags/*`, which are simply ignored when\nusing bundle URI on clone.\n\nIn Git 2.50, this restriction is lifted, and all references\nmatching `refs/*` contained in the downloaded bundle are copied.\n[Scott Chacon](https://github.com/schacon), who contributed this functionality,\ndemonstrates the difference when cloning\n[gitlab-org/gitlab-foss](https://gitlab.com/gitlab-org/gitlab-foss):\n\n```shell\n$ git-v2.49 clone --bundle-uri=gitlab-base.bundle https://gitlab.com/gitlab-org/gitlab-foss.git gl-2.49\nCloning into 'gl2.49'...\nremote: Enumerating objects: 1092703, done.\nremote: Counting objects: 100% (973405/973405), done.\nremote: Compressing objects: 100% (385827/385827), done.\nremote: Total 959773 (delta 710976), reused 766809 (delta 554276), pack-reused 0 (from 0)\nReceiving objects: 100% (959773/959773), 366.94 MiB | 20.87 MiB/s, done.\nResolving deltas: 100% (710976/710976), completed with 9081 local objects.\nChecking objects: 100% (4194304/4194304), done.\nChecking connectivity: 959668, done.\nUpdating files: 100% (59972/59972), done.\n\n$ git-v2.50 clone --bundle-uri=gitlab-base.bundle https://gitlab.com/gitlab-org/gitlab-foss.git gl-2.50\nCloning into 'gl-2.50'...\nremote: Enumerating objects: 65538, done.\nremote: Counting objects: 100% (56054/56054), done.\nremote: Compressing objects: 100% (28950/28950), done.\nremote: Total 43877 (delta 27401), reused 25170 (delta 13546), pack-reused 0 (from 0)\nReceiving objects: 100% (43877/43877), 40.42 MiB | 22.27 MiB/s, done.\nResolving deltas: 100% (27401/27401), completed with 8564 local objects.\nUpdating files: 100% (59972/59972), done.\n```\n\nComparing these results, we see that Git 2.50 fetches 43,887 objects\n(40.42 MiB) after the bundle was extracted whereas Git 2.49 fetches a\ntotal of 959,773 objects (366.94 MiB). Git 2.50 fetches roughly 95% fewer\nobjects and 90% less data, which benefits both the client and the server. The\nserver needs to process a lot less data to the client and the client needs to\ndownload and extract less data. In the example provided by Scott this led to a\nspeedup of 25%.\n\nTo learn more, check out the corresponding\n[mailing-list thread](https://lore.kernel.org/git/pull.1897.git.git.1740489585344.gitgitgadget@gmail.com/).\n\n_This patch series was contributed by [Scott Chacon](https://github.com/schacon)._\n\n## Read more\n\nThis article highlighted just a few of the contributions made by GitLab and\nthe wider Git community for this latest release. You can learn about these from\nthe [official release announcement](https://lore.kernel.org/git/xmqq1prj1umb.fsf@gitster.g/) of the Git project. Also, check\nout our [previous Git release blog posts](https://about.gitlab.com/blog/tags/git/)\nto see other past highlights of contributions from GitLab team members.\n","2025-06-16",[9,683,266],{"featured":90,"template":687,"slug":2107},"what-s-new-in-git-2-50-0","content:en-us:blog:what-s-new-in-git-2-50-0.yml","What S New In Git 2 50 0","en-us/blog/what-s-new-in-git-2-50-0.yml","en-us/blog/what-s-new-in-git-2-50-0",{"_path":2113,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2114,"content":2119,"config":2124,"_id":2126,"_type":13,"title":2127,"_source":15,"_file":2128,"_stem":2129,"_extension":18},"/en-us/blog/whats-new-in-git-2-45-0",{"title":2115,"description":2116,"ogTitle":2115,"ogDescription":2116,"noIndex":6,"ogImage":1864,"ogUrl":2117,"ogSiteName":672,"ogType":673,"canonicalUrls":2117,"schema":2118},"What’s new in Git 2.45.0?","Here are some highlights of contributions from GitLab's Git team and the wider Git community to the latest Git release, including reftables and better tooling for references.\n","https://about.gitlab.com/blog/whats-new-in-git-2-45-0","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What’s new in Git 2.45.0?\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Patrick Steinhardt\"}],\n        \"datePublished\": \"2024-04-30\",\n      }",{"title":2115,"description":2116,"authors":2120,"heroImage":1864,"date":2121,"body":2122,"category":680,"tags":2123},[677],"2024-04-30","The Git project recently released [Git Version 2.45.0](https://lore.kernel.org/git/xmqq8r0ww0sj.fsf@gitster.g/). Let's look at the highlights of this release, which includes contributions from GitLab's Git team and the wider Git community.\n\n## Reftables: A new backend for storing references\n\nEvery Git repository needs to track two basic data structures:\n- The object graph that stores the data of your files, the directory structure, commit messages, and tags.\n- References that are pointers into that object graph to associate specific objects with a more accessible name. For example, a branch is a reference whose name starts with a `refs/heads/` prefix.\n\nThe on-disk format of how references are stored in a repository has remained largely unchanged since Git’s inception and is referred to as  the \"files\" format. Whenever you create a reference, Git creates a so-called \"loose reference\" that is a plain file in your Git repository whose path matches the ref name. For example:\n\n```shell\n$ git init .\nInitialized empty Git repository in /tmp/repo/.git/\n\n# Updating a reference will cause Git to create a \"loose ref\". This loose ref is\n# a simple file which contains the object ID of the commit.\n$ git commit --allow-empty --message \"Initial commit\"\n[main (root-commit) c70f266] Initial commit\n$ cat .git/refs/heads/main\nc70f26689975782739ef9666af079535b12b5946\n\n# Creating a second reference will end up with a second loose ref.\n$ git branch feature\n$ cat .git/refs/heads/feature\nc70f26689975782739ef9666af079535b12b5946\n$ tree .git/refs\n.git/refs/\n├── heads\n│   ├── feature\n│   └── main\n└── tags\n\n3 directories, 2 files\n```\n\nEvery once in a while, Git packs those references into a \"packed\"\nfile format so that it becomes more efficient to look up references. For example:\n\n```shell\n# Packing references will create \"packed\" references, which are a sorted list of\n# references. The loose reference does not exist anymore.\n$ git pack-refs --all\n$ cat .git/refs/heads/main\ncat: .git/refs/heads/main: No such file or directory\n$ cat .git/packed-refs\n# pack-refs with: peeled fully-peeled sorted\nc70f26689975782739ef9666af079535b12b5946 refs/heads/feature\nc70f26689975782739ef9666af079535b12b5946 refs/heads/main\n```\n\nWhile this format is rather simple, it has limitations:\n- In large mono repos with many references, we started to hit scalability issues. Deleting references is especially inefficient because the entire “packed-refs” file must be rewritten to drop the deleted reference. In our largest repositories, this can lead to rewriting multiple gigabytes of data on every reference deletion.\n- It is impossible to perform an atomic read of references without blocking concurrent writers because you have to read multiple files to figure out all references.\n- It is impossible to perform an atomic write because it requires you to create or update multiple files, which cannot be done in a single step.\n- Housekeeping of references does not scale well because you have to rewrite the full \"packed-refs\" file.\n- Because loose references use the filesystem path as their name, they are subject to filesystem-specific behavior. For example, case-insensitive file systems cannot store references for which only the case differs.\n\nTo address these issues, Git v2.45.0 introduces a new \"reftable\" backend, which uses a new binary format to store references. This new backend has been in development for a very long time. It was initially proposed by [Shawn Pearce](https://sfconservancy.org/blog/2018/jan/30/shawn-pearce/) in July 2017 and was initially implemented in [JGit](https://www.eclipse.org/jgit/). It is used extensively by the [Gerrit project](https://www.gerritcodereview.com/). In 2021, [Han-Wen Nienhuys](https://hanwen.home.xs4all.nl/) upstreamed the library into Git that allows it to read and write the [reftable format](https://git-scm.com/docs/reftable).\n\nThe new \"reftable\" backend that we upstreamed in Git v2.45.0 now finally brings together the reftable library and Git such that it is possible to use the new format as storage backend in your Git repositories.\n\nAssuming that you run at least Git v2.45.0, you can create new repositories with the \"reftable\" format by passing the `--ref-format=reftable` switch to either `git-init(1)` or `git-clone(1)`. For example:\n\n```shell\n$ git init --ref-format=reftable .\nInitialized empty Git repository in /tmp/repo/.git/\n$ git rev-parse --show-ref-format\nreftable\n$ find -type f .git/reftable/\n.git/reftable/0x000000000001-0x000000000001-01b5e47d.ref\n.git/reftable/tables.list\n\n$ git commit --allow-empty --message \"Initial commit\"\n$ find -type f .git/reftable/\n.git/reftable/0x000000000001-0x000000000001-01b5e47d.ref\n.git/reftable/0x000000000002-0x000000000002-87006b81.ref\n.git/reftable/tables.list\n```\n\nAs you can see, the references are now stored in `.git/reftable` instead of in the `.git/refs` directory. The references and the reference logs are stored in “tables,” which are the files ending with `.ref`, whereas the `tables.list` file contains the list of all tables that are currently active. The technical details of how this work will be explained in a separate blog post. Stay tuned!\n\nThe “reftable” backend is supposed to be a  drop-in replacement for the “files” backend. Hence, from a user’s perspective, everything should just work the same.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab). Credit also goes to Shawn Pearce as original inventor of the format and Han-Wen Nienhuys as the author of the reftable library.\n\n## Better tooling for references\n\nWhile the \"reftable\" format solves many of the issues we have, it also\nintroduces some new issues. One of the most important issues is accessibility of the data it contains.\n\nWith the \"files\" backend, you can, in the worst case, use your regular Unix tools to inspect the state of references. Both the \"packed\" and the \"loose\" references contain human-readable data that one can easily make sense of. This is different with the \"reftable\" format, which is a binary format. Therefore, Git needs to provide all the necessary tooling to extract data from the new \"reftable\" format.\n\n### Listing all references\n\nThe first problem we had is that it is basically impossible to learn about all the references that a repository knows about. This is somewhat puzzling at first: you can create and modify references via Git, but it cannot exhaustively list all references that it knows about?\n\nIndeed, the \"files\" backend can't. While it can trivially list all \"normal\"\nreferences that start with the `refs/` prefix, Git also uses so-called\n[pseudo refs](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpseudorefapseudoref). These files live directly in the root of the Git directory and would be files like, for example, `.git/MERGE_HEAD`. The problem here is that those pseudo refs live next to other files that Git stores like, for example, `.git/config`.\n\nWhile some pseudo refs are well-known and thus easy to identify, there is\nin theory no limit to what references Git can write. Nothing stops you from\ncreating a reference called \"foobar\".\n\nFor example:\n\n```shell\n$ git update-ref foobar HEAD\n$ cat .git/foobar\nf32633d4d7da32ccc3827e90ecdc10570927c77d\n```\n\nNow the problem that the \"files\" backend has is that it can only enumerate\nreferences by scanning through directories. So to figure out that\n`.git/foobar` is in fact a reference, Git would have to open the file and check whether it is formatted like a reference or not.\n\nOn the other hand, the \"reftable\" backend trivially knows about all references that it contains: They are encoded in its data structures, so all it needs to do is to decode those references and return them. But because of the restrictions of the \"files\" backend, there is no tooling that would allow you to learn about all references that exist.\n\nTo address the issue, we upstreamed a new flag to `git-for-each-ref(1)` called `--include-root-refs`, which will cause it to also list all references that exist in the root of the reference naming hierarchy. For example:\n\n```shell\n$ git for-each-ref --include-root-refs\nf32633d4d7da32ccc3827e90ecdc10570927c77d commit    HEAD\nf32633d4d7da32ccc3827e90ecdc10570927c77d commit    MERGE_HEAD\nf32633d4d7da32ccc3827e90ecdc10570927c77d commit    refs/heads/main\n```\n\nFor the \"files\" backend, this new flag is handled on a best-effort basis where we include all references that match a known pseudo ref name. For the \"reftable\" backend, we can simply list all references known to it.\n\nThis project was led by [Karthik Nayak](https://gitlab.com/knayakgl).\n\n### Listing all reflogs\n\nWhenever you update branches, Git, by default, tracks those branch updates in a so-called reflog. This reflog allows you to roll back changes to that branch in case you performed an unintended change and can thus be a very helpful tool.\n\nWith the \"files\" backend, those logs are stored in your `.git/logs` directory:\n\n```shell\n$ find -type f .git/logs/\n.git/logs/HEAD\n.git/logs/refs/heads/main\n```\n\nIn fact, listing files in this directory is the only way for you to learn what references actually have a reflog in the first place. This is a problem for the \"reftable\" backend, which stores those logs together with the references. Consequently, there doesn't exist any way for you to learn about which reflogs exist in the repository at all anymore when you use the \"reftable\" format.\n\nThis is not really the fault of the \"reftable\" format though, but an omission in the tooling that Git provides. To address the omission, we introduced a new `list` subcommand for `git-reflog(1)` that allows you to list all existing reflogs:\n\n```shell\n$ git reflog list\nHEAD\nrefs/heads/main\n```\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n### More efficient packing of references\n\nTo stay efficient, Git repositories need regular maintenance. Usually,\nthis maintenance is triggered by various Git commands that write data into the Git repositories by executing `git maintenance run --auto`. This command \nonly optimizes data structures that actually need to be optimized so that Git doesn’t waste compute resources.\n\nOne data structure that gets optimized by Git's maintenance is the reference\ndatabase, which is done by executing `git pack-refs --all`. For the \"files\"\nbackend, this means that all references get repacked into the \"packed-refs\" file and the loose references get deleted, whereas for the \"reftable\" backend all the tables will get merged into a single table.\n\nFor the \"files\" backend, we cannot reasonably do much better. Given that we have to rewrite the whole \"packed-refs\" file anyway, it makes sense that we would want to pack _all_ loose references.\n\nBut for the \"reftable\" backend this is suboptimal as the \"reftable\" backend is self-optimizing. Whenever Git appends a new table to the \"reftable\" backend, it will perform auto-compaction and merge tables together as needed. Consequently, the reference database should always be in a well-optimized state and thus merging all tables together is a wasted effort.\n\nIn Git v2.45.0, we thus introduced a new `git pack-refs --auto` mode, which asks the reference backend to optimize on an as-needed basis. While the \"files\" backend continues to work the same even with the `--auto` flag set, the \"reftable\" backend will use the same heuristics as it already uses for its auto-compaction. In practice, this should be a no-op in most cases.\n\nFurthermore, `git maintenance run --auto` has been adapted to pass the `-tauto` flag to `git-pack-refs(1)` to make use of this new mode by default.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Read more\n\nThis blog post put a heavy focus on the new \"reftable\" backend, which allows us to scale better in large repositories with many references, as well as related tooling that we have introduced alongside it to make it work well. There, of course, have been various performance improvements, bug fixes and smaller features introduced with this Git release by the wider Git community, as well. You can learn about these from the [official release announcement](https://lore.kernel.org/git/xmqq8r0ww0sj.fsf@gitster.g/) of the Git project.\n\n## GitLab's previous Git release contributions\n* [GitLab's contributions to Git 2.44.0](https://about.gitlab.com/blog/gitlabs-contributions-to-git-2-44-0/)\n* [GitLab's contributions to Git 2.43.0](https://about.gitlab.com/blog/the-contributions-we-made-to-the-git-2-43-release/)\n* [GitLab's contributions to Git 2.42.0](https://about.gitlab.com/blog/contributions-to-git-2-42-release/)\n* [GitLab's contributions to Git 2.41.0](https://about.gitlab.com/blog/contributions-to-latest-git-release/)\n",[9,266],{"slug":2125,"featured":6,"template":687},"whats-new-in-git-2-45-0","content:en-us:blog:whats-new-in-git-2-45-0.yml","Whats New In Git 2 45 0","en-us/blog/whats-new-in-git-2-45-0.yml","en-us/blog/whats-new-in-git-2-45-0",{"_path":2131,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2132,"content":2137,"config":2142,"_id":2144,"_type":13,"title":2145,"_source":15,"_file":2146,"_stem":2147,"_extension":18},"/en-us/blog/whats-new-in-git-2-46-0",{"title":2133,"description":2134,"ogTitle":2133,"ogDescription":2134,"noIndex":6,"ogImage":912,"ogUrl":2135,"ogSiteName":672,"ogType":673,"canonicalUrls":2135,"schema":2136},"What’s new in Git 2.46.0?","Here are highlights of release contributions from GitLab's Git team and the wider Git community, including reference backend migration tooling and transactional symbolic reference updates.","https://about.gitlab.com/blog/whats-new-in-git-2-46-0","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What’s new in Git 2.46.0?\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Justin Tobler\"}],\n        \"datePublished\": \"2024-07-29\",\n      }",{"title":2133,"description":2134,"authors":2138,"heroImage":912,"date":2139,"body":2140,"category":680,"tags":2141},[2101],"2024-07-29","The Git project recently released [Git v2.46.0](https://lore.kernel.org/git/xmqqzfq0i0qa.fsf@gitster.g/T/#u). Let's look at a few notable highlights from this release, which includes contributions from GitLab's Git team and the wider Git community.\n\n## Tooling to migrate reference backends\n\nIn the previous [Git 2.45.0](https://gitlab.com/gitlab-org/git/-/raw/master/Documentation/RelNotes/2.45.0.txt?ref_type=heads)\nrelease, the reftables format was introduced as a new backend for storing\nreferences. This new reference format solves some challenges that large\nrepositories face as the number of references scales. If you are not yet\nfamiliar with the reftables backend, check out our previous [Git release blog post](https://about.gitlab.com/blog/whats-new-in-git-2-45-0/) where the feature was introduced and our beginner’s guide to [learn more about how reftables work](https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format/).\n\nThe reftable backend has a different on-disk format than the pre-existing files backend. Consequently, to use reftables on an existing repository requires a conversion between the different formats. To accomplish this, a new git-refs(1) command has been introduced with the `migrate` subcommand to perform reference backend migrations. Below is an example of how this command can be used.\n\n```shell\n# Initialize a new repository as “bare” so it does not contain reflogs.\n$ git init --bare .\n$ git commit --allow-empty -m \"init\"\n# Populate repository with references in the files backend.\n$ git branch foo\n$ git branch bar\n$ tree .git/refs\n.git/refs\n├── heads\n│   ├── bar\n│   ├── foo\n│   ├── main\n└── tags\n# Perform reference migration to reftables format.\n$ git refs migrate --ref-format=reftable\n# Check that reftables backend is now in use.\n$ tree .git/reftable\n.git/reftable\n├── 0x000000000001-0x000000000001-a3451eed.ref\n└── tables.list\n# Check the repository config to see the updated `refstorage` format.\n$ cat config\n[core]\n        repositoryformatversion = 1\n        filemode = true\n        bare = true\n        ignorecase = true\n        precomposeunicode = true\n[extensions]\n        refstorage = reftable\n```\n\nOnce a repository has been migrated, the on-disk format is changed to begin\nusing the reftable backend. Git operations on the repository continue to\nfunction and interact with remotes the same as before. The migration only\naffects how references are stored internally for the repository. If you wish to go back to the files reference backend, you can accomplish this with the same command by instead specifying `--ref-format=files`.\n\nThe migration tooling currently has some notable limitations. The reflogs in a repository are a component of a reference backend and would also require\nmigration between formats. Unfortunately, the tooling is not yet capable of\nconverting reflogs between the files and reftables backends. Also, a repository with worktrees essentially has multiple ref stores and the migration tool is not yet capable of handling this scenario. Therefore, if a repository contains reflogs or worktrees, reference migration is currently unavailable. These limitations may be overcome in future versions.\n\nBecause a bare Git repository does not have reflogs, it is easier to migrate. To migrate a standard non-bare repository, reflogs must be pruned first. Therefore, any repository without reflogs or worktrees can be migrated. With these limitations in mind, this tool can be used to begin taking advantage of the reftables backend in your existing repositories.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Transactional symref updates\n\nThe [git-update-ref(1)](https://git-scm.com/docs/git-update-ref) command\nperforms reference updates in a Git repository. These reference updates can also be performed atomically in bulk with transactions by using\n`git update-ref --stdin` and passing update-ref instructions on stdin. Below is an example of how this is done.\n\n```shell\n$ git init .\n$ git branch -m main\n$ git commit --allow-empty -m \"foo\" && git commit --allow-empty -m \"bar\"\n# Retrieve the object ID of the two commits created.\n$ git rev-parse main~ main\n567aac2b3d1fbf0bd2433f669eb0b82a0348775e\n3b13462a9a42e0a3130b9cbc472ab479d3ef0631\n# Start transaction, provide update-ref instructions, and commit.\n$ git update-ref --stdin \u003C\u003CEOF\n> start\n> create refs/heads/new-ref 3b13462a9a42e0a3130b9cbc472ab479d3ef0631\n> update refs/heads/main 567aac2b3d1fbf0bd2433f669eb0b82a0348775e\n> commit\n> EOF\n$ git for-each-ref\n567aac2b3d1fbf0bd2433f669eb0b82a0348775e commit refs/heads/main\n3b13462a9a42e0a3130b9cbc472ab479d3ef0631 commit refs/heads/my-ref\n```\n\nFrom this example, once the transaction is committed, a new branch is created pointing to the “bar” commit and the main branch is updated to point to the previous “foo” commit. Committing the transaction performs the specified reference updates atomically. If an individual reference update fails, the transaction is aborted and no reference updates are performed.\n\nA notable absence here is instructions to support symref updates in these\ntransactions. If a user wants to update a symref along with other references\natomically in the same transaction, there is no tooling to do so. In this\nrelease, the `symref-create`, `symref-update`, `symref-delete`, and\n`symref-verify` instructions are introduced to provide this functionality.\n\n```shell\n# Create a symref that will be updated during the next operation.\n$ git symbolic-ref refs/heads/symref refs/heads/main\n# The --no-deref flag is required to ensure the symref itself is updated.\n$ git update-ref --stdin --no-deref \u003C\u003CEOF\n> start\n> symref-create refs/heads/new-symref refs/heads/main\n> symref-update refs/heads/symref refs/heads/new-ref\n> commit\n> EOF\n$ git symbolic-ref refs/heads/symref\nrefs/heads/new-ref\n$ git symbolic-ref refs/heads/new-symref\nrefs/heads/main\n```\n\nFrom the above example, a new symbolic reference is created and another is\nupdated in a transaction. These new symref instructions can be used in\ncombination with the pre-existing instructions to perform all manner of\nreference updates now in a single transaction. Check out the\n[documentation](https://git-scm.com/docs/git-update-ref) for more information regarding each of these new instructions.\n\nThis project was led by [Karthik Nayak](https://gitlab.com/knayakgl).\n\n## UX improvements for git-config(1)\n\nThe git-config(1) command allows repository and global options to be viewed and configured. The modes used to interact with configuration can be selected explicitly using flags or determined implicitly based on the number of arguments provided to the command. For example:\n\n```shell\n$ git config --list\n# Explicit retrieval of username configuration\n$ git config --get user.name\n# Implicit retrieval of username configuration\n$ git config user.name\n# Explicit setting of username configuration\n$ git config --set user.name \"Sidney Jones\"\n# Implicit setting of username configuration\n$ git config user.name \"Sidney Jones\"\n# An optional third argument is also accepted. What do you think this does?\n$ git config \u003Cname> [\u003Cvalue> [\u003Cvalue-pattern>]]\n```\n\nOverall, the [git-config(1)](https://git-scm.com/docs/git-config) user\ninterface is not consistent with how other more modern Git commands work where you usually use subcommands. For example, `git remote list`. This release introduces `list`, `get`, `set`, `unset`, `rename-section`, `remove-section`, and `edit` as subcommands for use with the config command while also keeping the old-style syntax available. This change aims to improve user experience by adapting the config command to follow more UI practices and better conform to other commands within Git. For example:\n\n```shell\n$ git config list\n$ git config get user.name\n$ git config set user.name \"Sidney Jones\"\n```\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Addressed performance regression\n\nGit operations that leverage attributes rely on reading `.gitattributes` files found in the repository’s working-tree. This is problematic for bare Git repositories because by definition they lack a working-tree. To get around this, Git has the `attr.tree` configuration that allows a source tree to be specified and used to lookup attributes from.\n\nIn Git release 2.43.0, Git started using the tree of `HEAD` as the source of Git attributes for bare repositories by default. Unfortunately, the additional overhead due to scanning for Git attributes files had severe performance impacts. This is because, when `attr.tree` is set, each attribute lookup requires walking the source tree to check for an associated `.gitattributes` file. The larger and deeper the source tree of the repository is, the more pronounced the performance regression becomes. For example, benchmarks run on the linux.git repository showed\ngit-pack-objects(1) taking 1.68 times longer to complete. This could lead to slowdowns when performing clones or fetches.\n\n```\n# attr.tree set to HEAD as done by default in Git version 2.43.0.\nBenchmark 1: git -c attr.tree=HEAD pack-objects --all --stdout \u003C/dev/null >/dev/null\n  Time (mean ± σ):     133.807 s ±  4.866 s    [User: 129.034 s, System: 6.671 s]\n  Range (min … max):   128.447 s … 137.945 s    3 runs\n\n# attr.tree is set to an empty tree to disable attribute lookup as done in Git versions prior to 2.43.0.\nBenchmark 2: git -c attr.tree=4b825dc642cb6eb9a060e54bf8d69288fbee4904 pack-objects --all --stdout \u003C/dev/null >/dev/null\n  Time (mean ± σ):     79.442 s ±  0.822 s    [User: 77.500 s, System: 6.056 s]\n  Range (min … max):   78.583 s … 80.221 s    3 runs\n```\n\nSome of the most notable Git commands that were affected were `clone`, `pull`, `fetch`, and `diff` when, as previously mentioned, used on repositories with large or deep trees. Consequently, the `attr.tree` configuration was partially reverted to no longer be set to `HEAD` by default to address the performance regression. To learn more, check out this\n[thread](https://lore.kernel.org/git/CAKOHPAn1btewYTdLYWpW+fOaXMY+JQZsLCQxUSwoUqnnFN_ohA@mail.gmail.com/) on the mailing list.\n\n## Unit-test migration\n\nHistorically, testing in the Git project has been done via end-to-end tests\nimplemented as shell scripts. The Git project has relatively recently\nintroduced a unit-testing framework written in C. This new testing framework\nbrings opportunities for more in-depth testing of low-level implementation\ndetails at the individual function call level and helps complement the existing end-to-end tests. There are some existing end-to-end tests that are a better fit as unit-tests and thus are good candidates to be ported.\n\nThis year, GitLab is again helping mentor [Google Summer of Code (GSoC)](https://summerofcode.withgoogle.com/) contributors working in the Git project. Thanks to efforts from these ongoing GSoC projects and also the wider Git community, some existing tests are being refactored and migrated to the unit-testing framework. During this last release cycle, there have been several contributions towards the goal of improving the testing in the Git project. To follow development progress for these GSoC contributor projects, check out [Chandra’s](https://chand-ra.github.io/) and [Ghanshyam’s](https://spectre10.github.io/posts/) blogs.\n\n## Bundle URI fixes\n\nUsually when a client fetches from a remote repository, all required objects\nare sent in a packfile computed by the remote server. To avoid some of this\ncomputation, servers can opt to advertise prebuilt “bundles” stored separately from the remote server which contain sets of references and objects that the client may need. The client can fetch these bundles first through a mechanism called [bundle-uri](https://git-scm.com/docs/bundle-uri).\n\nThanks to [Xing Xin](https://lore.kernel.org/git/pull.1730.git.1715742069966.gitgitgadget@gmail.com/), an issue was identified and fixed where Git, despite having downloaded some bundles, was still downloading everything from the remote as if there were no bundles. This was due to Git not correctly discovering all the downloaded bundles, which resulted in having to fetch the consecutive ones from the remote. With this fixed, remotes using the bundle-uri mechanism can avoid having to perform redundant work and improve performance.\n\n## Read more\n\nThis article highlighted just a few of the contributions made by GitLab and\nthe wider Git community for this latest release. You can learn about these from the [official release announcement](https://lore.kernel.org/git/xmqqzfq0i0qa.fsf@gitster.g/T/#u) of the Git project. Also, check out our [previous Git release blog posts](https://about.gitlab.com/blog/tags/git/) to see other past highlights of contributions from GitLab team members.",[9,683,266],{"slug":2143,"featured":90,"template":687},"whats-new-in-git-2-46-0","content:en-us:blog:whats-new-in-git-2-46-0.yml","Whats New In Git 2 46 0","en-us/blog/whats-new-in-git-2-46-0.yml","en-us/blog/whats-new-in-git-2-46-0",{"_path":2149,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2150,"content":2156,"config":2161,"_id":2163,"_type":13,"title":2164,"_source":15,"_file":2165,"_stem":2166,"_extension":18},"/en-us/blog/whats-new-in-git-2-47-0",{"title":2151,"description":2152,"ogTitle":2151,"ogDescription":2152,"noIndex":6,"ogImage":2153,"ogUrl":2154,"ogSiteName":672,"ogType":673,"canonicalUrls":2154,"schema":2155},"What's new in Git 2.47.0?","Learn about the latest version of Git, including new global variables to configure reference and object hash formats. Discover contributions from GitLab's Git team and the wider Git community.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749663691/Blog/Hero%20Images/AdobeStock_752438815.jpg","https://about.gitlab.com/blog/whats-new-in-git-2-47-0","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What's new in Git 2.47.0?\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Justin Tobler\"}],\n        \"datePublished\": \"2024-10-07\",\n      }",{"title":2151,"description":2152,"authors":2157,"heroImage":2153,"date":2158,"body":2159,"category":680,"tags":2160},[2101],"2024-10-07","The Git project recently released [Git v2.47.0](https://lore.kernel.org/git/xmqqa5fg9bsz.fsf@gitster.g/).\nLet's look at a few notable highlights from this release, which includes\ncontributions from GitLab's Git team and the wider Git community.\n\n## New global configuration options\n\nIf you have been following recent Git releases, you are probably familiar with the new \"reftable\" reference backend that became available with\n[Git version 2.45](https://about.gitlab.com/blog/whats-new-in-git-2-45-0/). Check out our [Beginner's guide to the Git reftable format](https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format/) to learn more. Previously, in order to initialize a repository with the \"reftable\" format, the `--ref-format` option needed to be passed to git-init(1):\n\n```sh\n$ git init --ref-format reftable\n```\n\nWith the 2.47 release, Git now has the `init.defaultRefFormat` configuration\noption, which tells Git which reference backend to use when initializing a\nrepository. This can be used to override the default \"files\" backend and begin using the \"reftable\" backend. To configure, execute the following:\n\n```sh\n$ git config set --global init.defaultRefFormat reftable\n```\n\nAs some of you may know, the object hash format used by Git repositories is\nalso configurable. By default, repositories are initialized to use the SHA-1\nobject format. An alternative is the SHA-256 format, which is more secure and future-proof. You can read more about this in one of our\n[previous blog posts on SHA-256 support in Gitaly](https://about.gitlab.com/blog/sha256-support-in-gitaly/#what-is-sha-256%3F). A SHA-256 repository can be created by passing the `--object-format` option to git-init(1):\n\n```sh\n$ git init --object-format sha256\n```\n\nIn this Git release another configuration option, `init.defaultObjectFormat`, has been added. This option tells Git which object format to use by default when initializing a repository. To configure, execute the following:\n\n```sh\n$ git config set --global init.defaultObjectFormat sha256\n```\n\nSomething to note, SHA-256 repositories are not interoperable with SHA-1\nrepositories and not all forges support hosting SHA-256 repositories. GitLab\nrecently announced [experimental support for SHA-256 repositories](https://about.gitlab.com/blog/gitlab-now-supports-sha256-repositories/) if you want to try it out.\n\nThese options provide a useful mechanism to begin using these repository\nfeatures without having to consciously think about it every time you initialize a new repository.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## New subcommand for git-refs(1)\n\nIn the previous Git release, the [git-refs(1)](https://git-scm.com/docs/git-refs) command was introduced to provide low-level access to references in a\nrepository and provided the \"migrate\" subcommand to convert between reference backends. This release adds a new \"verify\" subcommand which allows the user to check the reference database for consistency. To verify the consistency of a repository, we often execute [git-fsck(1)](https://git-scm.com/docs/git-fsck).\n\nNotably, this command does not explicitly verify the reference database of the repository though. With the introduction of the \"reftable\" reference format, which is a binary format and thus harder to inspect manually, it is now even more important that tooling be established to fill this gap. Let's set up a repository with an invalid reference to demonstrate:\n\n```sh\n# The \"files\" backend is used so we can easily create an invalid reference.\n$ git init --ref-format files\n$ git commit --allow-empty -m \"init\"\n# A lone '@' is not a valid reference name.\n$ cp .git/refs/heads/main .git/refs/heads/@\n$ git refs verify\nerror: refs/heads/@: badRefName: invalid refname format\n```\n\nWe can see the invalid reference was detected and an error message printed to the user. While this tooling is not something the end-user will likely run, it is particularly useful on the server side to ensure repositories remain consistent. Eventually, the goal is to integrate this command as part of git-fsck(1) to provide a unified way to execute repository consistency checks.\n\nThis project was led by Jialuo She as part of the Google Summer of Code. To\nlearn more, you can read Jialuo's [GSoC report](https://luolibrary.com/2024/08/25/GSoC-Final-Report/).\n\n## Ongoing reftables work\n\nThis release also includes fixes for some bugs found in the \"reftable\" backend. One of these bugs is particularly interesting and revolves around how table compaction was being performed.\n\nAs you may recall, the reftable backend consists of a series of tables\ncontaining the state of all the references in the repository. Each atomic set of reference changes results in a new table being written and recorded in the \"tables.list\" file. To reduce the number of tables present, after each reference update, the tables are compacted to follow a geometric sequence by file size. After the tables are compacted, the \"tables.list\" file is updated to reflect the new on-disk state of the reftables.\n\nBy design, concurrent table writes and compaction is allowed. Synchronization at certain points is controlled through the use of lock files. For example, when compaction is starting the \"tables.list\" file is initially locked so the file can be consistently read and the tables requiring compaction can also be locked. Since the actual table compaction can take a while the lock is released, allowing concurrent writes to proceed. This is safe because concurrent writers know that they must not modify the now-locked tables which are about to be compacted. When the newly compacted tables have finished being written, the \"tables.list\" file is locked again and this time it is updated to reflect the new table state.\n\nThere is a problem though: What happens if a concurrent reference update writes a new table to the \"tables.list\" in the middle of table compaction after the initial lock was released, but before the new list file was written? If this race were to occur, the compacting process would not know about the new table and consequently rewrite the \"tables.list\" file without the new table. This effectively drops the concurrent update and could result in references not being added, updated, or removed as expected.\n\nLuckily, the fix to remediate this problem is rather straightforward. When the compacting process acquires the lock to write to the \"tables.list\" it must first check if any updates to the file have occurred and reload the file. Doing so ensures any concurrent table updates are also reflected appropriately. For more information on this fix, check out the corresponding\n[mailing-list thread](https://lore.kernel.org/git/cover.1722435214.git.ps@pks.im/).\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Fixes for git-maintenance(1)\n\nAs a repository grows, it is important that it is properly maintained. By\ndefault, Git executes\n[git-maintenance(1)](https://git-scm.com/docs/git-maintenance) after certain\noperations to keep the repository healthy. To avoid performing unnecessary\nmaintenance, the `--auto` option is specified which uses defined heuristics to determine whether maintenance tasks should be run. The command can be\nconfigured to perform various different maintenance tasks, but by default, it simply executes [git-gc(1)](https://git-scm.com/docs/git-gc) in the background and allows the user to carry on with their business.\n\nThis works as expected until maintenance is configured to perform non-default maintenance tasks. When this happens the configured maintenance tasks are performed in the foreground and the initial maintenance process doesn't exit until all tasks complete. Only the \"gc\" task detaches into the background as expected. It turns out this was because git-gc(1), when run with `--auto`, was accidentally detaching itself, and other maintenance tasks had no means to do so. This had the potential to slow down certain Git commands as auto-maintenance had to run to completion before they could exit.\n\nThis release addresses this issue by teaching git-maintenance(1) the `--detach` option, which allows the whole git-maintenance(1) process to run in the background instead of individual tasks. The auto-maintenance performed by Git was also updated to use this new option. For more information on this fix, check out the [mailing-list thread](https://lore.kernel.org/git/cover.1723533091.git.ps@pks.im/).\n\nA little earlier it was mentioned that the auto-maintenance uses a set of\nheuristics to determine whether or not certain maintenance operations should be performed. Unfortunately for the \"files\" reference backend, when\n[git-pack-refs(1)](https://git-scm.com/docs/git-pack-refs) executes with the\n`--auto` option, there is no such heuristic and loose references are\nunconditionally packed into a \"packed-refs\" file. For repositories with many\nreferences, rewriting the \"packed-refs\" file can be quite time-consuming.\n\nThis release also introduces a heuristic that decides whether it should pack\nloose references in the \"files\" backend. This heuristic takes into account the size of the existing \"packed-refs\" file and the number of loose references present in the repository. The larger the \"packed-refs\" file gets, the higher the threshold for the number of loose references before reference packing occurs. This effectively makes reference packing in the \"files\" backend less aggressive while still keeping the repository in a maintained state. Check out the [mailing-list thread](https://lore.kernel.org/git/cover.1725280479.git.ps@pks.im/)\nfor more info.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Code refactoring and maintainability improvements\n\nIn addition to functional changes, there is also work being done to refactor\nand clean up the code. These improvements are also valuable because they help move the project closer toward the longstanding goal of libifying its internal components. To read more, here is a recent\n[update thread](https://lore.kernel.org/git/eoy2sjhnul57g6crprxi3etgeuacjmgxpl4yllstih7woyuebm@bd62ib3fi2ju/) regarding libification.\n\nOne area of improvement has been around resolving memory leaks. The Git project has quite a few memory leaks. For the most part, these leaks don't cause much trouble because usually a Git process only runs for a short amount of time and the system cleans up after, but in the context of libification it becomes something that should be addressed. Tests in the project can be compiled with a leak sanitizer to detect leaks, but due to the presence of existing leaks, it is difficult to validate and enforce that new changes do not introduce new leaks. There has been an ongoing effort to fix all memory leaks surfaced by existing tests in the project. Leak-free tests are subsequently marked with `TEST_PASSES_SANITIZE_LEAK=true` to indicate that they are expected to be free of leaks going forward. Prior to this release, the project had 223 test files containing memory leaks. This has now been whittled down to just 60 in this release.\n\nAnother ongoing effort has been to reduce the use of global variables\nthroughout the project. One such notorious global variable is `the_repository`, which contains the state of the repository being operated on and is referenced all over the project. This release comes with a number of patches that remove uses of `the_repository` in favor of directly passing the value where needed. Subsystems in the Git project that still depend on `the_repository` have `USE_THE_REPOSITORY_VARIABLE` defined allowing the global to be used. Now the refs, config, and path subsystems no longer rely on its use.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab)\nwith the help of [John Cai](https://gitlab.com/jcaigitlab) and\n[Jeff King](https://github.com/peff).\n\n## Read more\n\nThis blog post highlighted just a few of the contributions made by GitLab and the wider Git community for this latest release. You can learn about these from the [official release announcement](https://lore.kernel.org/git/xmqqa5fg9bsz.fsf@gitster.g/)\nof the Git project. Also, check out our [previous Git release blog posts](https://about.gitlab.com/blog/tags/git/)\nto see other past highlights of contributions from GitLab team members.\n\n- [What’s new in Git 2.46.0?](https://about.gitlab.com/blog/whats-new-in-git-2-46-0/)\n- [What's new in Git 2.45.0?](https://about.gitlab.com/blog/whats-new-in-git-2-45-0/)\n- [A beginner's guide to the Git reftable format](https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format/)\n- [Git pull vs. git fetch: What's the difference?](https://about.gitlab.com/blog/git-pull-vs-git-fetch-whats-the-difference/)",[9,683,266],{"slug":2162,"featured":90,"template":687},"whats-new-in-git-2-47-0","content:en-us:blog:whats-new-in-git-2-47-0.yml","Whats New In Git 2 47 0","en-us/blog/whats-new-in-git-2-47-0.yml","en-us/blog/whats-new-in-git-2-47-0",{"_path":2168,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2169,"content":2174,"config":2179,"_id":2181,"_type":13,"title":2182,"_source":15,"_file":2183,"_stem":2184,"_extension":18},"/en-us/blog/whats-new-in-git-2-48-0",{"title":2170,"description":2171,"ogTitle":2170,"ogDescription":2171,"noIndex":6,"ogImage":2153,"ogUrl":2172,"ogSiteName":672,"ogType":673,"canonicalUrls":2172,"schema":2173},"What’s new in Git 2.48.0?","Learn about the latest version of Git, including a new build system and optimization in the new reftable backend. Discover contributions from GitLab's Git team and the Git community.","https://about.gitlab.com/blog/whats-new-in-git-2-48-0","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What’s new in Git 2.48.0?\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Christian Couder\"}],\n        \"datePublished\": \"2025-01-10\",\n      }",{"title":2170,"description":2171,"authors":2175,"heroImage":2153,"date":2176,"body":2177,"category":680,"tags":2178},[876],"2025-01-10","The Git project recently released [Git 2.48.0](https://lore.kernel.org/git/xmqqplku7cvm.fsf@gitster.g/). Let's look at a few notable highlights from this release, which includes contributions from GitLab's Git team and the wider Git community.\n\n## Meson build system\n\nFor a long time, Git could be built using either a [Makefile](https://en.wikipedia.org/wiki/GNU_Make)-based build system or an [Autoconf](https://en.wikipedia.org/wiki/Autoconf)-based build system. Git developers have been using mostly the Makefile-based build system, so\n[the Autoconf-based build system has lagged behind](https://lore.kernel.org/git/GV1PR02MB848925A79A9DD733848182D58D662@GV1PR02MB8489.eurprd02.prod.outlook.com/) in features and maintenance. Another issue was that a lot of Windows\ndevelopers use integrated development environments (IDEs) that don’t\nhave good support for Makefile- and Autoconf-based build systems.\n\nIn 2020, support for building Git using [CMake](https://cmake.org/) was added. CMake added better Windows support and IDE integration, especially for Visual\nStudio. Some modern build system features like out-of-source builds were also included.\n\nRecently, it appeared the CMake support was also lagging\nbehind and that it might never be a good option to replace the two other\nbuild systems. So [Patrick Steinhardt](https://gitlab.com/pks-gitlab), GitLab Git Engineering Manager, implemented support for the [Meson](https://mesonbuild.com/) build\nsystem with the goal of eventually replacing the Autoconf-, CMake-, and\nmaybe the Makefile-based build systems.\n\nThe new Meson-based build system has the following advantages:\n* Allows users to easily find the available build options, something which is difficult with Makefiles and CMake\n* Has a simple syntax compared to Autoconf and CMake\n* Supports many different operating systems, compilers, and IDEs\n* Supports modern build system features like out-of-source builds\n\nHere is an example of how it can actually be used to build Git:\n\n```shell\n$ cd git             \t# go into the root of Git's source code\n$ meson setup build/ \t# setup \"build\" as a build directory\n$ cd build           \t# go into the \"build\" directory\n$ meson compile      \t# actually build Git\n$ meson test         \t# test the new build\n$ meson install      \t# install the new build\n\n```\n\nMultiple build directories can be set up using `meson setup \u003Cbuild_dir>`, and the configuration of the build inside a build directory can be viewed or changed by running `meson configure` inside the build directory.\n\nMore information on how to build Git using Meson can be found at the top of the [`meson.build` file](https://gitlab.com/gitlab-org/git/-/blob/master/meson.build) in the Git code repository. A\n[comparison of the different build systems](https://gitlab.com/gitlab-org/git/-/blob/master/Documentation/technical/build-systems.txt) for Git is available as part of Git's technical documentation.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Git is now memory-leak-free (as exercised by the test suite)\n\nIn our Git release blog post about the previous Git 2.47.0 release, we\ntalked about our [ongoing effort to fix all memory leaks](https://about.gitlab.com/blog/whats-new-in-git-2-47-0/#code-refactoring-and-maintainability-improvements) surfaced by existing tests in the project. We said that prior to the Git 2.47.0 release, the project had 223 test files containing memory\nleaks, and that this had been whittled down to just 60.\n\nWe are pleased to report that the memory leaks in all 60 remaining test files have been resolved. As a result, Git, as exercised by the test suite, is now free of memory leaks. This is an important step towards the longstanding goal of “libifying” Git internal components (which means converting those components into internal libraries). It will also help with optimizing Git for memory usage.\n\nNow, any newly added test must be leak-free by default. It's still\npossible to have leaking tests, but the authors will have to use an\nescape hatch for that and provide good arguments why their test cannot\nbe made leak free.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Improved bundle URI checks\n\nIn our Git release blog post about the Git 2.46.0 release, we talked\nabout some [bundle URI fixes](https://about.gitlab.com/blog/whats-new-in-git-2-46-0/#bundle-uri-fixes)\nby [Xing Xin](https://lore.kernel.org/git/pull.1730.git.1715742069966.gitgitgadget@gmail.com/).\nAfter those fixes, Xing Xin worked on making it possible for [fetches using bundles to be fully checked](https://lore.kernel.org/git/pull.1730.v8.git.1718770053.gitgitgadget@gmail.com/)\nusing the [fsck](https://git-scm.com/docs/git-fsck) mechanism like regular fetches.\n\nWhen validating regular fetches, it's possible to specify\n[different severities](https://git-scm.com/docs/git-fsck#Documentation/git-fsck.txt-fsckltmsg-idgt) for [different fsck issues](https://git-scm.com/docs/git-fsck#_fsck_messages)\nto have fine-grained handling of what is accepted and what is rejected in a specific repository. This wasn't possible for fetches using bundles previously.\n\nTo further increase the usefulness and safety of [bundle-uri](https://git-scm.com/docs/bundle-uri), we [addressed this problem](https://lore.kernel.org/git/20241121204119.1440773-1-jltobler@gmail.com/) so that the different severities specified for different fsck issues\nare now used when checking fetches using bundles, too.\n\nThis project was led by [Justin Tobler](https://gitlab.com/justintobler).\n\n## Add reference consistency checks\n\nIn our Git release blog post about the Git 2.47.0 release, we mentioned Jialuo She's work on\n[adding a new 'verify' subcommand](https://about.gitlab.com/blog/whats-new-in-git-2-47-0/#new-subcommand-for-git-refs(1)) to git-refs(1) which was part of the\n[Google Summer of Code 2024](https://summerofcode.withgoogle.com/archive/2024/projects/ukm4PTEF) (GSoC 2024).\n\nIn that blog post, we said that eventually the goal was to integrate this new subcommand as part of git-fsck(1) to provide a unified way to execute repository consistency checks. Jialuo She has decided to work on that after his GSoC was over.\n\nThe result from [this effort](https://lore.kernel.org/git/ZrtrT1CPI4YUf5db@ArchLinux/)\nis that git-fsck(1) can now detect and handle a number of reference-related issues, like when the content of a reference is bad, when a symbolic link is used as a symbolic reference, or when the target of a symbolic reference doesn't point to a valid reference. We still need to call `git refs verify` as part of git-fsck(1), and have the former perform all non-backend-specific checks that the latter currently does, but we are closer to our end goal of a unified way to execute all refs consistency checks.\n\nThis project was led by Jialuo She.\n\n## Iterator reuse in reftables\n\nIn the [Git 2.45.0](https://gitlab.com/gitlab-org/git/-/raw/master/Documentation/RelNotes/2.45.0.txt) release, the 'reftables' format was introduced as a new backend for storing references (mostly branches and tags). If you are not yet\nfamiliar with the reftables backend, check out our previous [Git release blog post](https://about.gitlab.com/blog/whats-new-in-git-2-45-0/) where the feature was introduced and our beginner’s guide to [learn more about how reftables work](https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format/).\n\nSince that release, we continued to improve this backend, and we recently focused on improving its performance by [reusing some internal iterators](https://lore.kernel.org/git/cover.1730732881.git.ps@pks.im/) when reading random references. Before these changes, reading a single reference required us to create a whole new iterator, seek it to the correct location in the respective tables, and then read the next value from it, which can be quite inefficient when reading many references in quick succession. After the change we now only create a single iterator and reuse it to read multiple references, thus saving some overhead.\n\nThe result of this work is increased performance in a number of reftables-related use cases, especially a 7% speedup when creating many references in a transaction that performs many random reads. Furthermore, this creates the possibility for more optimizations as we can continue to reuse more state kept in the iterators.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Support for reflogs in `git-refs migrate`\n\nAfter the 'reftables' backend was introduced in Git 2.45.0 (see the section above), we worked on tooling to migrate reference backends in Git 2.46.0, which consisted of adding a new `migrate` subcommand to git-refs(1).\n\nOur article about Git 2.46.0 [talked about this work](https://about.gitlab.com/blog/whats-new-in-git-2-46-0/#tooling-to-migrate-reference-backends) and mentioned some limitations that still existed. In particular, the article said:\n\n\"The reflogs in a repository are a component of a reference backend and would also require migration between formats. Unfortunately, the tooling is not yet capable of converting reflogs between the files and reftables backends.\"\n\nWe are pleased to report that we have [lifted this limitation in Git 2.48.0](https://lore.kernel.org/git/20241216-320-git-refs-migrate-reflogs-v4-0-d7cd3f197453@gmail.com/).\nReflogs can now also be migrated with `git refs migrate`. The migration tool is not yet capable of handling a repository with multiple worktrees, but this is the only limitation left. If you\ndon't use worktrees, you can already take advantage of the reftables backend in your existing repositories.\n\nThis project was led by [Karthik Nayak](https://gitlab.com/knayakgl).\n\n## Ref-filter optimization\n\nThe 'ref-filter' subsystem is some formatting code used by commands like `git for-each-ref`, `git branch` and `git tag` to sort, filter, format, and display information related to Git references.\n\nAs repositories grow, they can contain a huge number of references. This is why there is work not only on improving backends that store references, like the reftables backend (see above), but\nalso on optimizing formatting code, like the 'ref-filter' subsystem.\n\nWe recently [found a way](https://lore.kernel.org/git/d23c3e3ee7fdb49fcd05b4f2e52dd2a1cfdc10f2.1729510342.git.ps@pks.im/)\nto avoid temporarily buffering references and iterating several times on them in the ref-filter code when they should be processed in the same sorting order as the order the backends provide them. This results in memory savings and makes certain commands up to 770 times faster in some\ncases.\n\nThis project was led by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Read more\n\nThis blog post highlighted just a few of the contributions made by GitLab and the wider Git community for this latest release. You can learn about these from the official release announcement of the Git project. Also, check out [our previous Git release blog posts](https://about.gitlab.com/blog/tags/git/) to see other past highlights of contributions from GitLab team members.\n\n- [What’s new in Git 2.47.0?](https://about.gitlab.com/blog/whats-new-in-git-2-47-0/)\n- [What’s new in Git 2.46.0?](https://about.gitlab.com/blog/whats-new-in-git-2-46-0/)\n- [What’s new in Git 2.45.0](https://about.gitlab.com/blog/whats-new-in-git-2-45-0/)\n- [A beginner's guide to the Git reftable format](https://about.gitlab.com/blog/a-beginners-guide-to-the-git-reftable-format/)\n",[9,683,266],{"slug":2180,"featured":90,"template":687},"whats-new-in-git-2-48-0","content:en-us:blog:whats-new-in-git-2-48-0.yml","Whats New In Git 2 48 0","en-us/blog/whats-new-in-git-2-48-0.yml","en-us/blog/whats-new-in-git-2-48-0",{"_path":2186,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2187,"content":2192,"config":2197,"_id":2199,"_type":13,"title":2200,"_source":15,"_file":2201,"_stem":2202,"_extension":18},"/en-us/blog/whats-new-in-git-2-49-0",{"title":2188,"description":2189,"ogTitle":2188,"ogDescription":2189,"noIndex":6,"ogImage":2102,"ogUrl":2190,"ogSiteName":672,"ogType":673,"canonicalUrls":2190,"schema":2191},"What's new in Git 2.49.0?","Learn about the latest version of Git, including improved performance thanks to zlib-ng, a new name-hashing algorithm, and git-backfill(1).","https://about.gitlab.com/blog/whats-new-in-git-2-49-0","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"What's new in Git 2.49.0?\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Toon Claes\"}],\n        \"datePublished\": \"2025-03-14\",\n      }",{"title":2188,"description":2189,"authors":2193,"heroImage":2102,"date":2194,"body":2195,"category":680,"tags":2196},[1698],"2025-03-14","The Git project recently released [Git 2.49.0](https://lore.kernel.org/git/xmqqfrjfilc8.fsf@gitster.g/). Let's look at a few notable highlights from this release, which includes contributions from GitLab's Git team and the wider Git community.\n\nWhat's covered:\n- [git-backfill(1) and the new path-walk API](#git-backfill(1)-and-the-new-path-walk-api)\n- [Introduction of zlib-ng](#introduction-of-zlib-ng)\n- [Continued iteration on Meson](#continued-iteration-on-meson)\n- [Deprecation of .git/branches/ and .git/remotes/](#deprecation-of-.gitbranches%2F-and-.git%2Fremotes%2F)\n- [Rust bindings for libgit](#rust-bindings-for-libgit)\n- [New name-hashing algorithm](#new-name-hashing-algorithm)\n- [Promisor remote capability](#promisor-remote-capability)\n- [Thin clone using `--revision`](#thin-clone-using---revision)\n\n## git-backfill(1) and the new path-walk API\n\nWhen you [`git-clone(1)`](https://git-scm.com/docs/git-clone) a Git repository,\nyou can pass it the\n[`--filter`](https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--filterltfilter-specgtcode)\noption. Using this option allows you to create a _partial clone_. In a partial\nclone the server only sends a subset of reachable objects according to the given\nobject filter. For example, creating a clone with `--filter=blob:none` will not\nfetch any blobs (file contents) from the server and create a _blobless clone_.\n\nBlobless clones have all the reachable commits and trees, but no blobs. When you\nperform an operation like\n[`git-checkout(1)`](https://git-scm.com/docs/git-checkout), Git will download\nthe missing blobs to complete that operation. For some operations, like\n[`git-blame(1)`](https://git-scm.com/docs/git-blame), this might result in\ndownloading objects one by one, which will slow down the command drastically.\nThis performance degradation occurs because `git-blame(1)` must traverse the\ncommit history to identify which specific blobs it needs, then request each\nmissing blob from the server separately.\n\nIn Git 2.49, a new subcommand `git-backfill(1)` is introduced, which can be\nused to download missing blobs in a blobless partial clone.\n\nUnder the hood, the `git-backfill(1)` command leverages the new path-walk API, which is different from how Git generally iterates over commits. Rather than iterating over the commits one at a time and recursively visiting the trees and blobs associated with each commit, the path-walk API does traversal by path. For each path, it adds a list of associated tree objects to a stack. This stack is then processed in a depth-first order. So, instead of processing every object in commit `1` before moving to commit `2`, it will process all versions of file `A` across all commits before moving to file `B`. This approach greatly improves performance in scenarios where grouping by path is essential.\n\nLet me demonstrate its use by making a blobless clone of [`gitlab-org/git`](https://gitlab.com/gitlab-org/git):\n\n```shell\n$ git clone --filter=blob:none --bare --no-tags git@gitlab.com:gitlab-org/git.git\nCloning into bare repository 'git.git'...\nremote: Enumerating objects: 245904, done.\nremote: Counting objects: 100% (1736/1736), done.\nremote: Compressing objects: 100% (276/276), done.\nremote: Total 245904 (delta 1591), reused 1547 (delta 1459), pack-reused 244168 (from 1)\nReceiving objects: 100% (245904/245904), 59.35 MiB | 15.96 MiB/s, done.\nResolving deltas: 100% (161482/161482), done.\n```\n\nAbove, we use `--bare` to ensure Git doesn't need to download any blobs to check\nout an initial branch. We can verify this clone does not contain any blobs:\n\n```sh\n$ git cat-file --batch-all-objects --batch-check='%(objecttype)' | sort | uniq -c\n  83977 commit\n 161927 tree\n```\n\nIf you want to see the contents of a file in the repository, Git has to download it:\n\n```sh\n$ git cat-file -p HEAD:README.md\nremote: Enumerating objects: 1, done.\nremote: Total 1 (delta 0), reused 0 (delta 0), pack-reused 1 (from 1)\nReceiving objects: 100% (1/1), 1.64 KiB | 1.64 MiB/s, done.\n\n[![Build status](https://github.com/git/git/workflows/CI/badge.svg)](https://github.com/git/git/actions?query=branch%3Amaster+event%3Apush)\n\nGit - fast, scalable, distributed revision control system\n=========================================================\n\nGit is a fast, scalable, distributed revision control system with an\nunusually rich command set that provides both high-level operations\nand full access to internals.\n\n[snip]\n```\n\nAs you can see above, Git first talks to the remote repository to download the blob before\nit can display it.\n\nWhen you would like to `git-blame(1)` that file, it needs to download a lot\nmore:\n\n```sh\n$ git blame HEAD README.md\nremote: Enumerating objects: 1, done.\nremote: Counting objects: 100% (1/1), done.\nremote: Total 1 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)\nReceiving objects: 100% (1/1), 1.64 KiB | 1.64 MiB/s, done.\nremote: Enumerating objects: 1, done.\nremote: Counting objects: 100% (1/1), done.\nremote: Total 1 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)\nReceiving objects: 100% (1/1), 1.64 KiB | 1.64 MiB/s, done.\nremote: Enumerating objects: 1, done.\nremote: Counting objects: 100% (1/1), done.\nremote: Total 1 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)\nReceiving objects: 100% (1/1), 1.64 KiB | 1.64 MiB/s, done.\nremote: Enumerating objects: 1, done.\n\n[snip]\n\ndf7375d772 README.md (Ævar Arnfjörð Bjarmason 2021-11-23 17:29:09 +0100  1) [![Build status](https://github.com/git/git/workflows/CI/badge.svg)](https://github.com/git/git/actions?query=branch%3Amaster+event%3Apush)\n5f7864663b README.md (Johannes Schindelin \t2019-01-29 06:19:32 -0800  2)\n28513c4f56 README.md (Matthieu Moy        \t2016-02-25 09:37:29 +0100  3) Git - fast, scalable, distributed revision control system\n28513c4f56 README.md (Matthieu Moy        \t2016-02-25 09:37:29 +0100  4) =========================================================\n556b6600b2 README\t(Nicolas Pitre       \t2007-01-17 13:04:39 -0500  5)\n556b6600b2 README\t(Nicolas Pitre       \t2007-01-17 13:04:39 -0500  6) Git is a fast, scalable, distributed revision control system with an\n556b6600b2 README\t(Nicolas Pitre       \t2007-01-17 13:04:39 -0500  7) unusually rich command set that provides both high-level operations\n556b6600b2 README\t(Nicolas Pitre       \t2007-01-17 13:04:39 -0500  8) and full access to internals.\n556b6600b2 README\t(Nicolas Pitre       \t2007-01-17 13:04:39 -0500  9)\n\n[snip]\n```\n\nWe've truncated the output, but as you can see, Git goes to the server for each\nrevision of that file separately. That's really inefficient. With\n`git-backfill(1)` we can ask Git to download all blobs:\n\n```shell\n$ git backfill\nremote: Enumerating objects: 50711, done.\nremote: Counting objects: 100% (15438/15438), done.\nremote: Compressing objects: 100% (708/708), done.\nremote: Total 50711 (delta 15154), reused 14730 (delta 14730), pack-reused 35273 (from 1)\nReceiving objects: 100% (50711/50711), 11.62 MiB | 12.28 MiB/s, done.\nResolving deltas: 100% (49154/49154), done.\nremote: Enumerating objects: 50017, done.\nremote: Counting objects: 100% (10826/10826), done.\nremote: Compressing objects: 100% (634/634), done.\nremote: Total 50017 (delta 10580), reused 10192 (delta 10192), pack-reused 39191 (from 1)\nReceiving objects: 100% (50017/50017), 12.17 MiB | 12.33 MiB/s, done.\nResolving deltas: 100% (48301/48301), done.\nremote: Enumerating objects: 47303, done.\nremote: Counting objects: 100% (7311/7311), done.\nremote: Compressing objects: 100% (618/618), done.\nremote: Total 47303 (delta 7021), reused 6693 (delta 6693), pack-reused 39992 (from 1)\nReceiving objects: 100% (47303/47303), 40.84 MiB | 15.26 MiB/s, done.\nResolving deltas: 100% (43788/43788), done.\n```\n\nThis backfills all blobs, turning the blobless clone into a full clone:\n\n```shell\n$ git cat-file --batch-all-objects --batch-check='%(objecttype)' | sort | uniq -c\n 148031 blob\n  83977 commit\n 161927 tree\n```\n\nThis [project](https://lore.kernel.org/git/pull.1820.v3.git.1738602667.gitgitgadget@gmail.com/)\nwas led by [Derrick Stolee](https://stolee.dev/) and was merged with\n[e565f37553](https://gitlab.com/gitlab-org/git/-/commit/e565f3755342caf1d21e22359eaf09ec11d8c0ae).\n\n## Introduction of zlib-ng\n\nAll objects in the `.git/` folder are compressed by Git using [`zlib`](https://zlib.net/). `zlib` is the reference implementation for the [RFC\n1950](https://datatracker.ietf.org/doc/html/rfc1950): ZLIB Compressed Data\nFormat. Created in 1995, `zlib` has a long history and is incredibly\nportable, even supporting many systems that predate the Internet. Because of its\nwide support of architectures and compilers, it has limitations in what it is\ncapable of.\n\nThe fork [`zlib-ng`](https://github.com/zlib-ng/zlib-ng) was created to\naccommodate the limitations. `zlib-ng` aims to be optimized for modern\nsystems. This fork drops support for legacy systems and instead brings in\npatches for Intel optimizations, some Cloudflare optimizations, and a couple\nother smaller patches.\n\nThe `zlib-ng` library itself provides a compatibility layer for `zlib`. The\ncompatibility later allows `zlib-ng` to be a drop-in replacement for `zlib`, but\nthat layer is not available on all Linux distributions. In Git 2.49:\n\n- A compatibility layer was added to the Git project.\n- Build options were added to both to the [`Makefile`](https://gitlab.com/gitlab-org/git/-/blob/b9d6f64393275b505937a8621a6cc4875adde8e0/Makefile#L186-187)\n  and [Meson Build file](https://gitlab.com/gitlab-org/git/-/blob/b9d6f64393275b505937a8621a6cc4875adde8e0/meson.build#L795-811).\n\nThese additions make it easier to benefit from the performance improvements of\n`zlib-ng`.\n\nIn local benchmarks, we've seen a ~25% speedup when using `zlib-ng` instead of `zlib`. And we're in the process of rolling out these changes to\nGitLab.com, too.\n\nIf you want to benefit from the gains of `zlib-ng`, first verify if Git\non your machine is already using `zlib-ng` by running\n`git version --build-options`:\n\n```shell\n$ git version --build-options\ngit version 2.47.1\ncpu: x86_64\nno commit associated with this build\nsizeof-long: 8\nsizeof-size_t: 8\nshell-path: /bin/sh\nlibcurl: 8.6.0\nOpenSSL: OpenSSL 3.2.2 4 Jun 2024\nzlib: 1.3.1.zlib-ng\n```\n\nIf the last line includes `zlib-ng` then your Git is already built\nusing the faster `zlib` variant. If not, you can either:\n\n- Ask the maintainer of the Git package you are using to include `zlib-ng` support.\n- Build Git yourself from source.\n\nThese [changes](https://gitlab.com/gitlab-org/git/-/commit/9d0e81e2ae3bd7f6d8a655be53c2396d7af3d2b0)\nwere [introduced](https://lore.kernel.org/git/20250128-b4-pks-compat-drop-uncompress2-v4-0-129bc36ae8f5@pks.im/)\nby [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Continued iteration on Meson\n\nIn our article about the Git 2.48 release,\nwe touched on [the introduction of the Meson build system](https://about.gitlab.com/blog/whats-new-in-git-2-48-0/#meson-build-system). [Meson](https://en.wikipedia.org/wiki/Meson_(software)) is\na build automation tool used by the Git project that at some point might replace [Autoconf](https://en.wikipedia.org/wiki/Autoconf),\n[CMake](https://en.wikipedia.org/wiki/CMake), and maybe even\n[Make](https://en.wikipedia.org/wiki/Make_(software)).\n\nDuring this release cycle, work continued on using Meson, adding various missing\nfeatures and stabilization fixes:\n\n  - [Improved test coverage for\n\tCI](https://lore.kernel.org/git/20250122-b4-pks-meson-additions-v3-0-5a51eb5d3dcd@pks.im/)\n\twas merged in\n\t[72f1ddfbc9](https://gitlab.com/gitlab-org/git/-/commit/72f1ddfbc95b47c6011bb423e6947418d1d72709).\n  - [Bits and pieces to use Meson in `contrib/`](https://lore.kernel.org/git/20250219-b4-pks-meson-contrib-v2-0-1ba5d7fde0b9@pks.im/)\n\twere merged in\n\t[2a1530a953](https://gitlab.com/gitlab-org/git/-/commit/2a1530a953cc4d2ae62416db86c545c7ccb73ace).\n  - [Assorted fixes and improvements to the build procedure based on\n\tmeson](https://lore.kernel.org/git/20250226-b4-pks-meson-improvements-v3-0-60c77cf673ae@pks.im/)\n\twere merged in\n\t[ab09eddf60](https://gitlab.com/gitlab-org/git/-/commit/ab09eddf601501290b5c719574fbe6c02314631f).\n  - [Making Meson aware of building\n\t`git-subtree(1)`](https://lore.kernel.org/git/20250117-b4-pks-build-subtree-v1-0-03c2ed6cc42e@pks.im/)\n\twas merged in\n\t[3ddeb7f337](https://gitlab.com/gitlab-org/git/-/commit/3ddeb7f3373ae0e309d9df62ada24375afa456c7).\n  - [Learn Meson to generate HTML documentation\n\tpages](https://lore.kernel.org/git/20241227-b4-pks-meson-docs-v2-0-f61e63edbfa1@pks.im/)\n\twas merged in\n\t[1b4e9a5f8b](https://gitlab.com/gitlab-org/git/-/commit/1b4e9a5f8b5f048972c21fe8acafe0404096f694).\n\nAll these efforts were carried out by [Patrick Steinhardt](https://gitlab.com/pks-gitlab).\n\n## Deprecation of .git/branches/ and .git/remotes/\n\nYou are probably aware of the existence of the `.git` directory, and what is\ninside. But have you ever heard about the sub-directories `.git/branches/` and\n`.git/remotes/`? As you might know, reference to branches are stored in\n`.git/refs/heads/`, so that's not what `.git/branches/` is for, and what about\n`.git/remotes/`?\n\nWay back in 2005, [`.git/branches/`](https://git-scm.com/docs/git-fetch#_named_file_in_git_dirbranches)\nwas introduced to store a shorthand name for a remote, and a few months later they were\nmoved to [`.git/remotes/`](https://git-scm.com/docs/git-fetch#_named_file_in_git_dirremotes).\nIn [2006](https://lore.kernel.org/git/Pine.LNX.4.63.0604301520460.2646@wbgn013.biozentrum.uni-wuerzburg.de/),\n[`git-config(1)`](https://git-scm.com/docs/git-config) learned to store\n[remotes](https://git-scm.com/docs/git-config#Documentation/git-config.txt-remoteltnamegturl).\nThis has become the standard way to configure remotes and, in 2011, the\ndirectories `.git/branches/` and `.git/remotes/` were\n[documented](https://gitlab.com/git-scm/git/-/commit/3d3d282146e13f2d7f055ad056956fd8e5d7ed29#e615263aaf131d42be8b0d0888ebd3fec954c6c9_132_124)\nas being \"legacy\" and no longer used in modern repositories.\n\nIn 2024, the document [BreakingChanges](https://git-scm.com/docs/BreakingChanges)\nwas started to outline breaking changes for the next major version of Git\n(v3.0). While this release is not planned to happen any time soon, this document\nkeeps track of changes that are expected to be part of that release.\nIn [8ccc75c245](https://gitlab.com/git-scm/git/-/commit/8ccc75c2452b5814d2445d60d54266293ca48674),\nthe use of the directories `.git/branches/` and `.git/remotes/` was added to\nthis document and that officially marks as them deprecated and to be removed in\nGit 3.0.\n\nThanks to [Patrick Steinhardt](https://gitlab.com/pks-gitlab) for\n[formalizing this deprecation](https://lore.kernel.org/git/20250122-pks-remote-branches-deprecation-v4-5-5cbf5b28afd5@pks.im/).\n\n## Rust bindings for libgit\n\nWhen compiling Git, an internal library `libgit.a` is made. This library\ncontains some of the core functionality of Git.\n\nWhile this library (and most of Git) is written in C, in Git 2.49 bindings were\nadded to make some of these functions available in Rust. To achieve this, two\nnew Cargo packages were created: `libgit-sys` and `libgit-rs`. These packages\nlive in the [`contrib/`](https://gitlab.com/gitlab-org/git/-/tree/master/contrib) subdirectory in the Git source tree.\n\nIt's pretty\n[common](https://doc.rust-lang.org/cargo/reference/build-scripts.html#-sys-packages)\nto split out a library into two packages when a [Foreign Function\nInterface](https://en.wikipedia.org/wiki/Foreign_function_interface) is used.\nThe `libgit-sys` package provides the pure interface to C functions and links to\nthe native `libgit.a` library. The package `libgit-rs` provides a high-level\ninterface to the functions in `libgit-sys` with a feel that is more idiomatic to\nRust.\n\nSo far, the functionality in these Rust packages is very limited. It only\nprovides an interface to interact with the `git-config(1)`.\n\nThis initiative was led by [Josh Steadmon](https://lore.kernel.org/git/8793ff64a7f6c4c04dd03b71162a85849feda944.1738187176.git.steadmon@google.com/) and was merged with [a4af0b6288](https://gitlab.com/gitlab-org/git/-/commit/a4af0b6288e25eb327ae9018cee09def9e43f1cd).\n\n## New name-hashing algorithm\n\nThe Git object database in `.git/` stores most of its data in packfiles. And\npackfiles are also used to submit objects between Git server and client over the\nwire.\n\nYou can read all about the format at\n[`gitformat-pack(5)`](https://git-scm.com/docs/gitformat-pack). One important\naspect of the packfiles is delta-compression. With delta-compression not every\nobject is stored as-is, but some objects are saved as a _delta_ of another\n_base_. So instead of saving the full contents of the objects, changes compared\nto another object are stored.\n\nWithout going into the details how these deltas are calculated or stored, you\ncan imagine that it is important group files together that are very similar. In\nv2.48 and earlier, Git looked at the last 16 characters of the path name to\ndetermine whether blobs might be similar. This algorithm is named version `1`.\n\nIn Git 2.49, version `2` is available. This is an iteration on version `1`, but\nmodified so the effect of the parent directory is reduced. You can specify the\nname-hash algorithm version you want to use with option `--name-hash-version` of\n[`git-repack(1)`](https://git-scm.com/docs/git-repack).\n\n[Derrick Stolee](https://stolee.dev/), who drove this project, did some\ncomparison in resulting packfile size after running `git repack -adf\n--name-hash-version=\u003Cn>`:\n\n| Repo                                          \t| Version 1 size   | Version 2 size |\n|---------------------------------------------------|-----------|---------|\n| [fluentui](https://github.com/microsoft/fluentui) | 440 MB \t| 161 MB   |\n| Repo B                                        \t| 6,248 MB   | 856 MB   |\n| Repo C                                        \t| 37,278 MB  | 6,921 MB |\n| Repo D                                        \t| 131,204 MB | 7,463 MB |\n\nYou can read more of the details in the [patch\nset](https://lore.kernel.org/git/pull.1823.v4.git.1738004554.gitgitgadget@gmail.com/),\nwhich is merged in\n[aae91a86fb](https://gitlab.com/gitlab-org/git/-/commit/aae91a86fb2a71ff89a71b63ccec3a947b26ca51).\n\n## Promisor remote capability\n\nIt's known that Git isn't great in dealing with large files. There are some\nsolutions to this problem, like [Git LFS](https://git-lfs.com/), but there are\nstill some shortcomings. To give a few:\n\n- With Git LFS the user has to configure which files to put in LFS. The server has\n  no control about that and has to serve all files.\n- Whenever a file is committed to the repository, there is no way to get it out\n  again without rewriting history. This is annoying, especially for large files,\n  because they are stuck for eternity.\n- Users cannot change their mind on which files to put into Git LFS.\n- A tool like Git LFS requires significant effort to set up, learn, and use\n  correctly.\n\nFor some time, Git has had the concept of promisor remotes. This feature can be used to deal with large files, and in Git 2.49 this feature took a step forward.\n\nThe idea for the new “promisor-remote” capability is relatively simple: Instead of sending all\nobjects itself, a Git server can tell to the Git client \"Hey, go download these\nobjects from _XYZ_\". _XYZ_ would be a promisor remote.\n\nGit 2.49 enables the server to advertise the information of the promisor remote\nto the client. This change is an extension to\n[`gitprotocol-v2`](https://git-scm.com/docs/gitprotocol-v2). While the server\nand the client are transmitting data to each other, the server can send  names and URLs of the promisor remotes it knows\nabout.\n\nSo far, the client is not using the promisor remote info it gets from the server during clone, so all\nobjects are still transmitted from the remote the clone initiated from. We are planning to continue work on this feature, making it use promisor remote info from the server, and making it easier to use.\n\nThis [patch\nset](https://lore.kernel.org/git/20250218113204.2847463-1-christian.couder@gmail.com/)\nwas submitted by [Christian Couder](https://gitlab.com/chriscool) and merged\nwith\n[2c6fd30198](https://gitlab.com/gitlab-org/git/-/commit/2c6fd30198187c928cbf927802556908c381799c).\n\n## Thin clone using `--revision`\n\nA new `--revision` option was added to\n[`git-clone(1)`](https://git-scm.com/docs/git-clone). This enables you to create\na thin clone of a repository that only contains the history of the given\nrevision. The option is similar to `--branch`, but accepts a ref name (like\n`refs/heads/main`, `refs/tags/v1.0`, and `refs/merge-requests/123`) or a\nhexadecimal commit object ID. The difference to `--branch` is that it does not\ncreate a tracking branch and detaches `HEAD`. This means it's not suited if you\nwant to contribute back to that branch.\n\nYou can use `--revision` in combination with `--depth` to create a very minimal\nclone. A suggested use-case is for automated testing. When you have a CI system\nthat needs to check out a branch (or any reference) to perform autonomous\ntesting on the source code, having a minimal clone is all you need.\n\nThis\n[change](https://gitlab.com/gitlab-org/git/-/commit/5785d9143bcb3ef19452a83bc2e870ff3d5ed95a)\nwas\n[driven](https://lore.kernel.org/git/20250206-toon-clone-refs-v7-0-4622b7392202@iotcl.com/)\nby [Toon Claes](https://gitlab.com/toon).\n\n# Read more\n- [What’s new in Git 2.48.0?](https://about.gitlab.com/blog/whats-new-in-git-2-48-0/)\n- [What’s new in Git 2.47.0?](https://about.gitlab.com/blog/whats-new-in-git-2-47-0/)\n- [What’s new in Git 2.46.0?](https://about.gitlab.com/blog/whats-new-in-git-2-46-0/)",[266,683,9],{"slug":2198,"featured":90,"template":687},"whats-new-in-git-2-49-0","content:en-us:blog:whats-new-in-git-2-49-0.yml","Whats New In Git 2 49 0","en-us/blog/whats-new-in-git-2-49-0.yml","en-us/blog/whats-new-in-git-2-49-0",{"_path":2204,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2205,"content":2211,"config":2218,"_id":2220,"_type":13,"title":2221,"_source":15,"_file":2222,"_stem":2223,"_extension":18},"/en-us/blog/whats-next-for-gitlab-ci",{"title":2206,"description":2207,"ogTitle":2206,"ogDescription":2207,"noIndex":6,"ogImage":2208,"ogUrl":2209,"ogSiteName":672,"ogType":673,"canonicalUrls":2209,"schema":2210},"From 2/3 of Git market to next-Gen CI system & auto DevOps","GitLab first became the standard for self hosting git with two-thirds of the market, then became the next generation CI system, and the next step is creating Auto DevOps.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749679783/Blog/Hero%20Images/whats-next-for-gitlab-ci.jpg","https://about.gitlab.com/blog/whats-next-for-gitlab-ci","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"From 2/3 of the self-managed Git market, to the next-generation CI system, to Auto DevOps\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Sid Sijbrandij\"}],\n        \"datePublished\": \"2017-06-29\",\n      }",{"title":2212,"description":2207,"authors":2213,"heroImage":2208,"date":2215,"body":2216,"category":748,"tags":2217},"From 2/3 of the self-managed Git market, to the next-generation CI system, to Auto DevOps",[2214],"Sid Sijbrandij","2017-06-29","\n\nGitLab has transformed from offering just version control to becoming the first integrated product for DevOps. With GitLab you can go all the way from chatting about an idea to measuring it in production without spending time on configuring a bunch of tools. The version control part of GitLab is now used by 2/3 of the market that self host Git. The continuous integration (CI) part of GitLab is now the most popular next generation CI system. Today we introduce the future direction of GitLab: Auto DevOps.\n\n\u003C!-- more -->\n\nWhen we [announced our master plan in September of 2016](/blog/gitlab-master-plan/), we gave our vision for a tool that changes the way developers create software. Before the end of 2016 we [completed the master plan](/releases/2016/12/22/gitlab-8-15-released/) and introduced Auto Deploy. Auto Deploy evolved and sparked a vision for a more integrated DevOps experience. Today we have a video to present that vision of Auto DevOps.\n\n## GitLab has 2/3 market share in the self-managed Git market\n\nWith more than 100,000 organizations self-hosting GitLab, we have the largest share of companies who choose to host their own code. We’re estimated to have two-thirds of the single tenant market. When [Bitrise surveyed](http://blog.bitrise.io/2017/01/27/state-of-app-development-in-2016.html#self-hosted) ten thousand developers who build apps regularly on their platform, they found that 67 percent of self-managed apps prefer GitLab’s on-premise solution.\n\n![Image via Bitrise blog](https://about.gitlab.com/images/blogimages/bitrise-self-hosted-chart.png){: .shadow}\u003Cbr>\n\nSimilarly, in their survey of roughly one thousand development teams, [BuddyBuild found](https://www.buddybuild.com/blog/source-code-hosting#selfhosted) that 79% of mobile developers who host their own code have chosen GitLab:\n\n![Image via buddybuild blog](https://about.gitlab.com/images/blogimages/buddybuild-self-hosted-chart.png){: .shadow}\u003Cbr>\n\nIn their articles, both Bitrise and BuddyBuild note that few organizations use self-managed instances. We think there is a selection effect since both of them are SaaS-only offerings. Based on our experience, in large organizations (over 750 people), it is still more common to self host your Git server (frequently on a cloud service like AWS or GCP) than to use a SaaS service.\n\n## GitLab CI is the most popular next-generation CI system\n\nOur commitment to seamless integration extends to CI. Integrated [CI/CD](/topics/ci-cd/) is both more time and resource efficient than a set of distinct tools, and allows developers greater control over their build pipeline, so they can spot issues early and address them at a relatively low cost. Tighter integration between different stages of the development process makes it easier to cross-reference code, tests, and deployments while discussing them, allowing you to see the full context and iterate much more rapidly. We've heard from customers like [Ticketmaster](/blog/continuous-integration-ticketmaster/) that adopting GitLab CI can transform the entire software development lifecycle (SDLC), in their case helping the Ticketmaster mobile development team deliver on the longstanding goal of weekly releases. As more and more companies look to embrace CI as part of their development methodology, having CI fully integrated into their overall SDLC solution will ensure these companies are able to realize the full potential of CI. You can read more about the benefits of integrated CI in our white paper, [Scaling Continuous Integration](http://get.gitlab.com/scaled-ci-cd/).\n\nIn his post on [building Heroku CI](https://blog.heroku.com/building-tools-for-developers-heroku-ci), Heroku’s Ike DeLorenzo noted that GitLab CI is “clearly the biggest mover in activity on Stack Overflow,” with more popularity than both Travis CI and CircleCI:\n\n![Image via Heroku blog](https://about.gitlab.com/images/blogimages/heroku-questions-chart.png){: .shadow}\u003Cbr>\n\nWhile the use of Jenkins for CI is still higher than any other solution, we see more and more organizations moving from Jenkins, because upgrading their Jenkins server is a brittle process. The last two big things that GitLab CI lacked were scheduled builds (contributed to [GitLab 9.2](/releases/2017/05/22/gitlab-9-2-released/)) and cross-project builds (released in [GitLab 9.3 on June 22](/releases/2017/06/22/gitlab-9-3-released/)).\n\n## Auto DevOps is next\n\nWe want to [deliver more of idea to production](https://gitlab.com/gitlab-org/gitlab-ce/issues/32639) and continue to make the flow even better. [Our direction](/direction/#ci--cd) is to fully automate DevOps with the concept of [Auto DevOps](https://gitlab.com/gitlab-org/gitlab-ee/issues/2517). In a cloud-native world, developers have many projects, and it doesn't make sense to have to set up their tools for every one of them. With help from the wider community we'll ensure that everything works out of the box, from code quality metrics to Review Apps, and from metrics to autoscaling.\n\nWatch our Head of Product Mark Pundsack demonstrate our Auto DevOps vision, including Auto Create, Auto Build, Auto CI, Auto Deploy, Auto Code Quality, and Auto Review Apps:\n\n\u003Ciframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/KGrJguM361c?rel=0\" frameborder=\"0\" allowfullscreen>\u003C/iframe>\n\nWe couldn't have built GitLab into the tool and company it is today without the contributions of the wider community, and the feedback from our customers. We're excited to see what you build with GitLab.\n\nHave thoughts about Auto DevOps? Comment on this blog post or on [the issue for Auto DevOps](https://gitlab.com/gitlab-org/gitlab-ee/issues/2517). Interested in what your team can do with GitLab Enterprise Edition? [Sign up for a free trial](/free-trial/) and let us know what you think.\n",[9,819,705,108],{"slug":2219,"featured":6,"template":687},"whats-next-for-gitlab-ci","content:en-us:blog:whats-next-for-gitlab-ci.yml","Whats Next For Gitlab Ci","en-us/blog/whats-next-for-gitlab-ci.yml","en-us/blog/whats-next-for-gitlab-ci",{"_path":2225,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2226,"content":2232,"config":2238,"_id":2240,"_type":13,"title":2241,"_source":15,"_file":2242,"_stem":2243,"_extension":18},"/en-us/blog/working-on-two-git-branches-at-the-same-time",{"title":2227,"description":2228,"ogTitle":2227,"ogDescription":2228,"noIndex":6,"ogImage":2229,"ogUrl":2230,"ogSiteName":672,"ogType":673,"canonicalUrls":2230,"schema":2231},"How to work on two Git branches at the same time","Watch the demo on how using the GitLab Web IDE and your local dev environment to work on two branches at once can help save time.","https://res.cloudinary.com/about-gitlab-com/image/upload/v1749678782/Blog/Hero%20Images/working-on-two-git-branches-at-the-same-time.jpg","https://about.gitlab.com/blog/working-on-two-git-branches-at-the-same-time","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"How to work on two Git branches at the same time\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"William Chia\"}],\n        \"datePublished\": \"2018-10-03\",\n      }",{"title":2227,"description":2228,"authors":2233,"heroImage":2229,"date":2235,"body":2236,"category":726,"tags":2237},[2234],"William Chia","2018-10-03","\nI was recently using both my local development environment and the GitLab [Web IDE](/blog/introducing-gitlab-s-integrated-development-environment/), and found a really nice workflow for working with two Git branches simultaneously.\n\n### The problem\n\nIn this scenario, you’re doing development work on one branch, in one part of your codebase, and then likely documenting your process in another place. I really don’t want all of this in one merge request, because I don’t want to delay shipping the development work if [the docs](https://docs.gitlab.com) aren’t done. I want to be able to get it live so that others can see it, give feedback on each individual component, and iterate on it. At the same time, I don’t want to delay too long on documenting the process, because I want the docs to be as accurate and reproducible as possible.\n\n### The fix\n\nWhile doing my development work in my local development environment, I created another merge request for the documentation using the [Web IDE](https://docs.gitlab.com/ee/user/project/web_ide/), essentially working on two different Git branches at the same time, using two different editors.\n\nIn my quick example below, you can see a merge request to add Jenkins content to our [DevOps tools](/competition/) page. I’ve checked out this branch locally, and I have it open in my Atom editor. I’ve been doing some work by updating `features.yml`, as well as a Markdown file and a Haml file. All of these changes are related to one merge request. While I’m committing changes locally to the comparison page, I’m documenting each step in my Web IDE in a separate tab, to make sure my instructions are precise, helpful, and completed in real time.\n\n### Watch the demo\n\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube.com/embed/uV3ycYnwhBc\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\nYou can see what we've got planned for the Web IDE in 2019 in our post about [our product vision for DevOps Create](/blog/create-vision/).\n\nWhat are other ways the Web IDE has come in handy for you? Let us know by tweeting us [@gitlab](https://twitter.com/gitlab)!\n\nCover [photo](https://unsplash.com/photos/3y1zF4hIPCg) by [Hans-Peter Gauster](https://unsplash.com/photos/3y1zF4hIPCg) on Unsplash\n{: .note}\n",[1483,1242,9,774,982],{"slug":2239,"featured":6,"template":687},"working-on-two-git-branches-at-the-same-time","content:en-us:blog:working-on-two-git-branches-at-the-same-time.yml","Working On Two Git Branches At The Same Time","en-us/blog/working-on-two-git-branches-at-the-same-time.yml","en-us/blog/working-on-two-git-branches-at-the-same-time",{"_path":2245,"_dir":243,"_draft":6,"_partial":6,"_locale":7,"seo":2246,"content":2251,"config":2256,"_id":2258,"_type":13,"title":2259,"_source":15,"_file":2260,"_stem":2261,"_extension":18},"/en-us/blog/15-git-tips-improve-workflow",{"title":2247,"description":2248,"ogTitle":2247,"ogDescription":2248,"noIndex":6,"ogImage":740,"ogUrl":2249,"ogSiteName":672,"ogType":673,"canonicalUrls":2249,"schema":2250},"15 Git tips to improve your workflow","Learn how to compare commits, delete stale branches, and write aliases to save you some time. It's time to dust off your command line and Git busy!","https://about.gitlab.com/blog/15-git-tips-improve-workflow","\n                        {\n        \"@context\": \"https://schema.org\",\n        \"@type\": \"Article\",\n        \"headline\": \"15 Git tips to improve your workflow\",\n        \"author\": [{\"@type\":\"Person\",\"name\":\"Suri Patel\"}],\n        \"datePublished\": \"2020-04-07\",\n      }",{"title":2247,"description":2248,"authors":2252,"heroImage":740,"date":2253,"body":2254,"category":680,"tags":2255},[1519],"2020-04-07","\n\nThis year, [Git](https://git-scm.com/) celebrates its 15th anniversary, and we’ve been excitedly posting some thoughts about its creation and impact — from sharing our experience at [Git Merge 2020](/blog/git-merge-fifteen-year-git-party/), discussing [the problem with Git flow](/blog/what-is-gitlab-flow/), or highlighting the newest Git feature [Partial Clone](/blog/partial-clone-for-massive-repositories/).\n\nWhether you’re just getting started with Git, or you know your way around a command line, it’s always nice to brush up on your skills, which is why we’ve gathered 15 methods to improve your Git-based workflow.\n\n### 1. Git aliases\n\nOne of the most impactful ways to improve your daily workflow is to create aliases for common commands to save you some time in the terminal.\n\nYou can use the following commands to create aliases for the most-used Git commands, `checkout`, `commit` and `branch`.\n\n```\ngit config --global alias.co checkout\ngit config --global alias.ci commit\ngit config --global alias.br branch\n```\n\nInstead of typing `git checkout master`, you only need to type `git co master`.\n\nYou could also edit these commands or add more by modifying the `~/.gitconfig` file directly:\n\n```\n[alias]\n    co = checkout\n    ci = commit\n    br = branch\n```\n\n### 2. See the repository status in your terminal’s prompt\n\nIf you’d like to visualize the status of your repository, you can run `git-prompt.sh`\n(you can [download it](https://github.com/git/git/blob/master/contrib/completion/git-prompt.sh) and follow the\ninstructions to use it in your system). If you're using Linux\nand have installed Git with your package manager, it may already be\npresent on your system, likely under `/etc/bash_completion.d/`.\n\nYou can replace your standard shell prompt with something a bit more exciting:\n\n![Git shell prompt](https://about.gitlab.com/images/blogimages/git-tricks/git-shell-info.png){: .shadow}\n\n_Taken from oh-my-zsh's [themes wiki](https://github.com/robbyrussell/oh-my-zsh/wiki/Themes#kafeitu)._\n\n### 3. Compare commits from the command line\n\nA simple way to compare the differences between commits or versions of the same file is to use the `git diff` command.\n\nIf you want to compare the same file between different commits, you run the following:\n\n```\n$ git diff $start_commit..$end_commit -- path/to/file\n```\n\nIf you want to compare the changes between two commits:\n\n```\n$ git diff $start_commit..$end_commit\n```\n\nThese commands will open the diff view inside the terminal, but if you prefer to use a more visual tool to compare your diffs, you can use `git difftool`. [Meld](https://meldmerge.org/) is a useful viewer/editor to visually compare diffs.\n\nTo configure Meld:\n\n```\n$ git config --global diff.tool git-meld\n```\n\nTo start viewing the diffs:\n\n```\n$ git difftool $start_commit..$end_commit -- path/to/file\n# or\n$ git difftool $start_commit..$end_commit\n```\n\n### 4. Stashing uncommitted changes\n\nIf you’re ever working on a feature and need to do an emergency fix on the project, you could run into a problem. You don’t want to commit an unfinished feature, and you also don’t want to lose current changes. The solution is to temporarily remove these changes with the Git stash command:\n\n```\n$ git stash\n```\n\nThe git stash command hides changes, giving you a clean working directory and the ability to switch to a new branch to make updates, without having to commit a meaningless snapshot in order to save the current state.\n\nOnce you’re done working on a fix and want to revisit your previous changes, you can run:\n\n```\n$ git stash pop\n```\n\nAnd your changes will be recovered. 🎉\n\nIf you no longer need those changes and want to clear the stash stack, you can do so with:\n\n```\n$ git stash drop\n```\n\n### 5. Pull frequently\n\nIf you’re using [GitLab Flow](/solutions/gitlab-flow/), then you’re working\non feature branches. Depending on how long your feature takes to implement, there might be several changes made to the master branch. In order to avoid major conflicts, you should frequently pull the changes from the master branch to your branch to resolve any conflicts as soon as possible and to make merging your branch to master easier.\n\n### 6. Autocomplete commands\n\nUsing [completion scripts](https://github.com/git/git/tree/master/contrib/completion), you can quickly create the commands for `bash`, `tcsh` and `zsh`. If you want to type `git pull`, you can type just the first letter with `git p` followed by \u003Ckbd>Tab\u003C/kbd> will show the following:\n\n```\npack-objects   -- create packed archive of objects\npack-redundant -- find redundant pack files\npack-refs      -- pack heads and tags for efficient repository access\nparse-remote   -- routines to help parsing remote repository access parameters\npatch-id       -- compute unique ID for a patch\nprune          -- prune all unreachable objects from the object database\nprune-packed   -- remove extra objects that are already in pack files\npull           -- fetch from and merge with another repository or local branch\npush           -- update remote refs along with associated objects\n```\n\nTo show all available commands, type `git` in your terminal followed by\n\u003Ckbd>Tab\u003C/kbd>+ \u003Ckbd>Tab\u003C/kbd>.\n\n### 7. Set a global `.gitignore`\n\nIf you want to avoid committing files like `.DS_Store` or Vim `swp` files,\nyou can set up a global `.gitignore` file.\n\nCreate the file:\n\n```bash\ntouch ~/.gitignore\n```\n\nThen run:\n\n```bash\ngit config --global core.excludesFile ~/.gitignore\n```\n\nOr manually add the following to your `~/.gitconfig`:\n\n```ini\n[core]\n  excludesFile = ~/.gitignore\n```\nYou can create a list of the things you want Git to ignore. To learn more, visit the [gitignore documentation](https://git-scm.com/docs/gitignore).\n\n### 8. Enable Git’s autosquash feature by default\n\nAutosquash makes it easier to squash commits during an interactive rebase. It can be enabled for each rebase using `git rebase -i --autosquash`, but it's easier to turn it on by default.\n\n```bash\ngit config --global rebase.autosquash true\n```\n\nOr manually add the following to your `~/.gitconfig`:\n\n```ini\n[rebase]\n  autosquash = true\n```\n\n### 9. Delete local branches that have been removed from remote on fetch/pull\n\nYou likely have stale branches in your local repository that no longer exist in the remote one. To delete them in each fetch/pull, run:\n\n```bash\ngit config --global fetch.prune true\n```\n\nOr manually add the following to your `~/.gitconfig`:\n\n```ini\n[fetch]\n  prune = true\n```\n\n### 10. Use Git blame more efficiently\n\nGit blame is a handy way to discover who changed a line in a file. Depending on what you want to show, you can pass different flags:\n\n```\n$ git blame -w  # ignores white space\n$ git blame -M  # ignores moving text\n$ git blame -C  # ignores moving text into other files\n```\n\n### 11. Add an alias to check out merge requests locally\n\nA [merge request](https://docs.gitlab.com/ee/user/project/merge_requests/) contains all the history from a repository, and the additional\ncommits added to the branch associated with the MR. You can check out a public merge request locally even if the source project is a fork (even a private fork) of the target project.\n\nTo check out a merge request locally, add the following alias to your `~/.gitconfig`:\n\n```\n[alias]\n  mr = !sh -c 'git fetch $1 merge-requests/$2/head:mr-$1-$2 && git checkout mr-$1-$2' -\n```\n\nNow you can check out a merge request from any repository and any remote. For example, to check out the merge request with ID 5 as shown in GitLab\nfrom the `upstream` remote, run:\n\n```\ngit mr upstream 5\n```\n\nThis will fetch the merge request into a local `mr-upstream-5` branch and check\nit out. In the above example, `upstream` is the remote that points to GitLab\nwhich you can find out by running `git remote -v`.\n\n### 12. An alias of `HEAD`\n\nBreaking news: `@` is the same as `HEAD`. Using it during a rebase is a lifesaver:\n\n```bash\ngit rebase -i @~2\n```\n\n### 13. Resetting files\n\nYou’re modifying your code when you suddenly realize that the changes you made are not great, and you’d like to reset them. Rather than clicking undo on everything you edited, you can reset your files to the HEAD of the branch:\n\n```\n$ git reset --hard HEAD\n```\n\nOr if you want to reset a single file:\n\n```\n$ git checkout HEAD -- path/to/file\n```\n\nNow, if you already committed your changes, but still want to revert back, you can use:\n\n```\n$ git reset --soft HEAD~1\n```\n\n### 14. The `git-open` plugin\n\nIf you’d like to quickly visit the website that hosts the repository you’re on, you’ll need `git-open`.\n\n[Install it](https://github.com/paulirish/git-open#installation) and take it for a spin by cloning a repository from\n[GitLab.com](https://gitlab.com/explore). From your terminal, navigate to the\nrepository and run `git open` to be transferred to the project’s page on\nGitLab.com.\n\nThe plugin works by default for projects hosted on GitLab.com, but you can also use it\nwith your own GitLab instances. In that case, set up the domain name with:\n\n```bash\ngit config gitopen.gitlab.domain git.example.com\n```\n\nYou can open different remotes and branches if they have been set up. You can learn more by checking out the [examples section](https://github.com/paulirish/git-open#examples).\n\n### 15. The `git-extras` plugin\n\nIf you want to elevate Git with more commands, try out the\n[`git-extras` plugin](https://github.com/tj/git-extras), which includes `git info` (show\ninformation about the repository) and `git effort` (number of commits per file).\n\n## Learn more about Git\n\nWe’re excited to announce that [Brendan O’Leary](/company/team/#brendan), senior developer evangelist, will create 15 videos to celebrate Git's anniversary over the next several months. He’ll focus on a variety of topics, from rebasing and merging to cherry-picking and branching. Take a look at the first video in the series. 🍿\n\n\u003C!-- blank line -->\n\u003Cfigure class=\"video_container\">\n  \u003Ciframe src=\"https://www.youtube-nocookie.com/embed/9oDNBuive-g\" frameborder=\"0\" allowfullscreen=\"true\"> \u003C/iframe>\n\u003C/figure>\n\u003C!-- blank line -->\n\nCover image by [Brooke Lark](https://unsplash.com/@brookelark?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) on [Unsplash](https://unsplash.com/s/photos/birthday?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)\n{: .note}\n",[9,982,798],{"slug":2257,"featured":6,"template":687},"15-git-tips-improve-workflow","content:en-us:blog:15-git-tips-improve-workflow.yml","15 Git Tips Improve Workflow","en-us/blog/15-git-tips-improve-workflow.yml","en-us/blog/15-git-tips-improve-workflow",9,[665,692,713,735,757,784,805,827,846],1753981641681]